@kodelyth/voice-call 2026.5.39 → 2026.5.42

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (137) hide show
  1. package/README.md +167 -0
  2. package/api.ts +16 -0
  3. package/cli-metadata.ts +10 -0
  4. package/config-api.ts +12 -0
  5. package/dist/api.js +2 -0
  6. package/dist/cli-metadata.js +12 -0
  7. package/dist/config-DAwbG2aw.js +621 -0
  8. package/dist/config-compat-BYfJ5ueI.js +129 -0
  9. package/dist/guarded-json-api-xAIbFPZh.js +591 -0
  10. package/dist/index.js +1341 -0
  11. package/dist/mock-jtSdKDQN.js +135 -0
  12. package/dist/plivo-L-JTeuEc.js +392 -0
  13. package/dist/realtime-handler-5pSItXxX.js +1227 -0
  14. package/dist/realtime-transcription.runtime-CAbQKwCN.js +2 -0
  15. package/dist/realtime-voice.runtime-vCpCAutg.js +2 -0
  16. package/dist/response-generator-B-MjbtsM.js +199 -0
  17. package/dist/runtime-api.js +6 -0
  18. package/dist/runtime-entry-ohPMJR46.js +3435 -0
  19. package/dist/runtime-entry.js +2 -0
  20. package/dist/setup-api.js +37 -0
  21. package/dist/telnyx-BWr9EZ4x.js +278 -0
  22. package/dist/twilio-D9B0zY1k.js +679 -0
  23. package/index.test.ts +1075 -0
  24. package/index.ts +863 -0
  25. package/klaw.plugin.json +30 -133
  26. package/package.json +3 -3
  27. package/runtime-api.ts +20 -0
  28. package/runtime-entry.ts +1 -0
  29. package/setup-api.ts +47 -0
  30. package/src/allowlist.test.ts +18 -0
  31. package/src/allowlist.ts +19 -0
  32. package/src/cli.test.ts +12 -0
  33. package/src/cli.ts +866 -0
  34. package/src/config-compat.test.ts +130 -0
  35. package/src/config-compat.ts +227 -0
  36. package/src/config.test.ts +542 -0
  37. package/src/config.ts +883 -0
  38. package/src/core-bridge.ts +14 -0
  39. package/src/deep-merge.test.ts +40 -0
  40. package/src/deep-merge.ts +23 -0
  41. package/src/gateway-continue-operation.ts +200 -0
  42. package/src/http-headers.test.ts +16 -0
  43. package/src/http-headers.ts +15 -0
  44. package/src/manager/context.ts +50 -0
  45. package/src/manager/events.test.ts +578 -0
  46. package/src/manager/events.ts +332 -0
  47. package/src/manager/lifecycle.ts +53 -0
  48. package/src/manager/lookup.test.ts +52 -0
  49. package/src/manager/lookup.ts +35 -0
  50. package/src/manager/outbound.test.ts +629 -0
  51. package/src/manager/outbound.ts +508 -0
  52. package/src/manager/state.ts +48 -0
  53. package/src/manager/store.ts +107 -0
  54. package/src/manager/timers.test.ts +127 -0
  55. package/src/manager/timers.ts +113 -0
  56. package/src/manager/twiml.test.ts +13 -0
  57. package/src/manager/twiml.ts +17 -0
  58. package/src/manager.closed-loop.test.ts +259 -0
  59. package/src/manager.inbound-allowlist.test.ts +183 -0
  60. package/src/manager.notify.test.ts +390 -0
  61. package/src/manager.restore.test.ts +310 -0
  62. package/src/manager.test-harness.ts +127 -0
  63. package/src/manager.ts +441 -0
  64. package/src/media-stream.test.ts +953 -0
  65. package/src/media-stream.ts +876 -0
  66. package/src/providers/base.ts +99 -0
  67. package/src/providers/mock.test.ts +86 -0
  68. package/src/providers/mock.ts +185 -0
  69. package/src/providers/plivo.test.ts +93 -0
  70. package/src/providers/plivo.ts +601 -0
  71. package/src/providers/shared/call-status.test.ts +24 -0
  72. package/src/providers/shared/call-status.ts +24 -0
  73. package/src/providers/shared/guarded-json-api.test.ts +127 -0
  74. package/src/providers/shared/guarded-json-api.ts +49 -0
  75. package/src/providers/telnyx.test.ts +489 -0
  76. package/src/providers/telnyx.ts +419 -0
  77. package/src/providers/twilio/api.test.ts +184 -0
  78. package/src/providers/twilio/api.ts +100 -0
  79. package/src/providers/twilio/twiml-policy.test.ts +84 -0
  80. package/src/providers/twilio/twiml-policy.ts +87 -0
  81. package/src/providers/twilio/webhook.ts +34 -0
  82. package/src/providers/twilio.test.ts +607 -0
  83. package/src/providers/twilio.ts +861 -0
  84. package/src/providers/twilio.types.ts +17 -0
  85. package/src/realtime-agent-context.test.ts +101 -0
  86. package/src/realtime-agent-context.ts +149 -0
  87. package/src/realtime-defaults.ts +3 -0
  88. package/src/realtime-fast-context.test.ts +74 -0
  89. package/src/realtime-fast-context.ts +27 -0
  90. package/src/realtime-transcription.runtime.ts +4 -0
  91. package/src/realtime-voice.runtime.ts +5 -0
  92. package/src/response-generator.test.ts +385 -0
  93. package/src/response-generator.ts +348 -0
  94. package/src/response-model.test.ts +71 -0
  95. package/src/response-model.ts +23 -0
  96. package/src/runtime.test.ts +625 -0
  97. package/src/runtime.ts +528 -0
  98. package/src/telephony-audio.test.ts +61 -0
  99. package/src/telephony-audio.ts +12 -0
  100. package/src/telephony-tts.test.ts +196 -0
  101. package/src/telephony-tts.ts +235 -0
  102. package/src/test-fixtures.ts +82 -0
  103. package/src/tts-provider-voice.test.ts +34 -0
  104. package/src/tts-provider-voice.ts +21 -0
  105. package/src/tunnel.test.ts +173 -0
  106. package/src/tunnel.ts +314 -0
  107. package/src/types.ts +311 -0
  108. package/src/utils.test.ts +17 -0
  109. package/src/utils.ts +14 -0
  110. package/src/voice-mapping.test.ts +32 -0
  111. package/src/voice-mapping.ts +65 -0
  112. package/src/webhook/realtime-audio-pacer.test.ts +146 -0
  113. package/src/webhook/realtime-audio-pacer.ts +204 -0
  114. package/src/webhook/realtime-handler.test.ts +1450 -0
  115. package/src/webhook/realtime-handler.ts +1382 -0
  116. package/src/webhook/stale-call-reaper.test.ts +89 -0
  117. package/src/webhook/stale-call-reaper.ts +38 -0
  118. package/src/webhook/stream-frame-adapter.test.ts +187 -0
  119. package/src/webhook/stream-frame-adapter.ts +219 -0
  120. package/src/webhook/tailscale.test.ts +216 -0
  121. package/src/webhook/tailscale.ts +129 -0
  122. package/src/webhook-exposure.test.ts +33 -0
  123. package/src/webhook-exposure.ts +84 -0
  124. package/src/webhook-security.test.ts +813 -0
  125. package/src/webhook-security.ts +982 -0
  126. package/src/webhook.hangup-once.lifecycle.test.ts +179 -0
  127. package/src/webhook.test.ts +1615 -0
  128. package/src/webhook.ts +933 -0
  129. package/src/webhook.types.ts +5 -0
  130. package/src/websocket-test-support.ts +72 -0
  131. package/tsconfig.json +16 -0
  132. package/api.js +0 -7
  133. package/cli-metadata.js +0 -7
  134. package/index.js +0 -7
  135. package/runtime-api.js +0 -7
  136. package/runtime-entry.js +0 -7
  137. package/setup-api.js +0 -7
@@ -0,0 +1,3435 @@
1
+ import { isBlockedHostnameOrIp, isRequestBodyLimitError, readRequestBodyWithLimit, requestBodyErrorToText } from "./runtime-api.js";
2
+ import "./api.js";
3
+ import { a as resolveVoiceCallEffectiveConfig, c as deepMergeDefined, i as resolveVoiceCallConfig, n as normalizeVoiceCallConfig, o as resolveVoiceCallSessionKey, r as resolveTwilioAuthToken, s as validateProviderConfig } from "./config-DAwbG2aw.js";
4
+ import { formatErrorMessage } from "klaw/plugin-sdk/error-runtime";
5
+ import { isLoopbackHost } from "klaw/plugin-sdk/gateway-runtime";
6
+ import { normalizeLowercaseStringOrEmpty, normalizeOptionalLowercaseString, normalizeOptionalString } from "klaw/plugin-sdk/string-coerce-runtime";
7
+ import { REALTIME_VOICE_AGENT_CONSULT_TOOL_NAME, buildRealtimeVoiceAgentConsultPolicyInstructions, consultRealtimeVoiceAgent, convertPcmToMulaw8k, createTalkSessionController, recordTalkObservabilityEvent, resolveRealtimeVoiceAgentConsultTools, resolveRealtimeVoiceAgentConsultToolsAllow, resolveRealtimeVoiceFastContextConsult } from "klaw/plugin-sdk/realtime-voice";
8
+ import { z } from "zod";
9
+ import fs from "node:fs";
10
+ import os from "node:os";
11
+ import path from "node:path";
12
+ import crypto from "node:crypto";
13
+ import { appendRegularFile, privateFileStore, privateFileStoreSync, root } from "klaw/plugin-sdk/security-runtime";
14
+ import { parseTtsDirectives } from "klaw/plugin-sdk/speech";
15
+ import { spawn } from "node:child_process";
16
+ import http from "node:http";
17
+ import { URL as URL$1 } from "node:url";
18
+ import { resolveConfiguredCapabilityProvider } from "klaw/plugin-sdk/provider-selection-runtime";
19
+ import { WEBHOOK_BODY_READ_DEFAULTS, createWebhookInFlightLimiter } from "klaw/plugin-sdk/webhook-ingress";
20
+ import { WebSocket as WebSocket$1, WebSocketServer } from "ws";
21
+ //#region extensions/voice-call/src/allowlist.ts
22
+ function normalizePhoneNumber(input) {
23
+ if (!input) return "";
24
+ return input.replace(/\D/g, "");
25
+ }
26
+ function isAllowlistedCaller(normalizedFrom, allowFrom) {
27
+ if (!normalizedFrom) return false;
28
+ return (allowFrom ?? []).some((num) => {
29
+ const normalizedAllow = normalizePhoneNumber(num);
30
+ return normalizedAllow !== "" && normalizedAllow === normalizedFrom;
31
+ });
32
+ }
33
+ //#endregion
34
+ //#region extensions/voice-call/src/types.ts
35
+ const ProviderNameSchema = z.enum([
36
+ "telnyx",
37
+ "twilio",
38
+ "plivo",
39
+ "mock"
40
+ ]);
41
+ const CallStateSchema = z.enum([
42
+ "initiated",
43
+ "ringing",
44
+ "answered",
45
+ "active",
46
+ "speaking",
47
+ "listening",
48
+ "completed",
49
+ "hangup-user",
50
+ "hangup-bot",
51
+ "timeout",
52
+ "error",
53
+ "failed",
54
+ "no-answer",
55
+ "busy",
56
+ "voicemail"
57
+ ]);
58
+ const TerminalStates = new Set([
59
+ "completed",
60
+ "hangup-user",
61
+ "hangup-bot",
62
+ "timeout",
63
+ "error",
64
+ "failed",
65
+ "no-answer",
66
+ "busy",
67
+ "voicemail"
68
+ ]);
69
+ const EndReasonSchema = z.enum([
70
+ "completed",
71
+ "hangup-user",
72
+ "hangup-bot",
73
+ "timeout",
74
+ "error",
75
+ "failed",
76
+ "no-answer",
77
+ "busy",
78
+ "voicemail"
79
+ ]);
80
+ const BaseEventSchema = z.object({
81
+ id: z.string(),
82
+ dedupeKey: z.string().optional(),
83
+ callId: z.string(),
84
+ providerCallId: z.string().optional(),
85
+ timestamp: z.number(),
86
+ turnToken: z.string().optional(),
87
+ direction: z.enum(["inbound", "outbound"]).optional(),
88
+ from: z.string().optional(),
89
+ to: z.string().optional()
90
+ });
91
+ z.discriminatedUnion("type", [
92
+ BaseEventSchema.extend({ type: z.literal("call.initiated") }),
93
+ BaseEventSchema.extend({ type: z.literal("call.ringing") }),
94
+ BaseEventSchema.extend({ type: z.literal("call.answered") }),
95
+ BaseEventSchema.extend({ type: z.literal("call.active") }),
96
+ BaseEventSchema.extend({
97
+ type: z.literal("call.speaking"),
98
+ text: z.string()
99
+ }),
100
+ BaseEventSchema.extend({
101
+ type: z.literal("call.speech"),
102
+ transcript: z.string(),
103
+ isFinal: z.boolean(),
104
+ confidence: z.number().min(0).max(1).optional()
105
+ }),
106
+ BaseEventSchema.extend({
107
+ type: z.literal("call.silence"),
108
+ durationMs: z.number()
109
+ }),
110
+ BaseEventSchema.extend({
111
+ type: z.literal("call.dtmf"),
112
+ digits: z.string()
113
+ }),
114
+ BaseEventSchema.extend({
115
+ type: z.literal("call.ended"),
116
+ reason: EndReasonSchema
117
+ }),
118
+ BaseEventSchema.extend({
119
+ type: z.literal("call.error"),
120
+ error: z.string(),
121
+ retryable: z.boolean().optional()
122
+ })
123
+ ]);
124
+ const CallDirectionSchema = z.enum(["outbound", "inbound"]);
125
+ const TranscriptEntrySchema = z.object({
126
+ timestamp: z.number(),
127
+ speaker: z.enum(["bot", "user"]),
128
+ text: z.string(),
129
+ isFinal: z.boolean().default(true)
130
+ });
131
+ const CallRecordSchema = z.object({
132
+ callId: z.string(),
133
+ providerCallId: z.string().optional(),
134
+ provider: ProviderNameSchema,
135
+ direction: CallDirectionSchema,
136
+ state: CallStateSchema,
137
+ from: z.string(),
138
+ to: z.string(),
139
+ sessionKey: z.string().optional(),
140
+ startedAt: z.number(),
141
+ answeredAt: z.number().optional(),
142
+ endedAt: z.number().optional(),
143
+ endReason: EndReasonSchema.optional(),
144
+ transcript: z.array(TranscriptEntrySchema).default([]),
145
+ processedEventIds: z.array(z.string()).default([]),
146
+ metadata: z.record(z.string(), z.unknown()).optional()
147
+ });
148
+ //#endregion
149
+ //#region extensions/voice-call/src/manager/state.ts
150
+ const ConversationStates = new Set(["speaking", "listening"]);
151
+ const StateOrder = [
152
+ "initiated",
153
+ "ringing",
154
+ "answered",
155
+ "active",
156
+ "speaking",
157
+ "listening"
158
+ ];
159
+ function transitionState(call, newState) {
160
+ if (call.state === newState || TerminalStates.has(call.state)) return;
161
+ if (TerminalStates.has(newState)) {
162
+ call.state = newState;
163
+ return;
164
+ }
165
+ if (ConversationStates.has(call.state) && ConversationStates.has(newState)) {
166
+ call.state = newState;
167
+ return;
168
+ }
169
+ const currentIndex = StateOrder.indexOf(call.state);
170
+ if (StateOrder.indexOf(newState) > currentIndex) call.state = newState;
171
+ }
172
+ function addTranscriptEntry(call, speaker, text) {
173
+ const entry = {
174
+ timestamp: Date.now(),
175
+ speaker,
176
+ text,
177
+ isFinal: true
178
+ };
179
+ call.transcript.push(entry);
180
+ }
181
+ //#endregion
182
+ //#region extensions/voice-call/src/manager/store.ts
183
+ const pendingPersistWrites = /* @__PURE__ */ new Set();
184
+ function persistCallRecord(storePath, call) {
185
+ const write = appendRegularFile({
186
+ filePath: path.join(storePath, "calls.jsonl"),
187
+ content: `${JSON.stringify(call)}\n`,
188
+ rejectSymlinkParents: true
189
+ }).catch((err) => {
190
+ console.error("[voice-call] Failed to persist call record:", err);
191
+ }).finally(() => {
192
+ pendingPersistWrites.delete(write);
193
+ });
194
+ pendingPersistWrites.add(write);
195
+ }
196
+ function loadActiveCallsFromStore(storePath) {
197
+ const logPath = path.join(storePath, "calls.jsonl");
198
+ const content = privateFileStoreSync(storePath).readTextIfExists(path.basename(logPath));
199
+ if (content === null) return {
200
+ activeCalls: /* @__PURE__ */ new Map(),
201
+ providerCallIdMap: /* @__PURE__ */ new Map(),
202
+ processedEventIds: /* @__PURE__ */ new Set(),
203
+ rejectedProviderCallIds: /* @__PURE__ */ new Set()
204
+ };
205
+ const lines = content.split("\n");
206
+ const callMap = /* @__PURE__ */ new Map();
207
+ for (const line of lines) {
208
+ if (!line.trim()) continue;
209
+ try {
210
+ const call = CallRecordSchema.parse(JSON.parse(line));
211
+ callMap.set(call.callId, call);
212
+ } catch {}
213
+ }
214
+ const activeCalls = /* @__PURE__ */ new Map();
215
+ const providerCallIdMap = /* @__PURE__ */ new Map();
216
+ const processedEventIds = /* @__PURE__ */ new Set();
217
+ const rejectedProviderCallIds = /* @__PURE__ */ new Set();
218
+ for (const [callId, call] of callMap) {
219
+ for (const eventId of call.processedEventIds) processedEventIds.add(eventId);
220
+ if (TerminalStates.has(call.state)) continue;
221
+ activeCalls.set(callId, call);
222
+ if (call.providerCallId) providerCallIdMap.set(call.providerCallId, callId);
223
+ }
224
+ return {
225
+ activeCalls,
226
+ providerCallIdMap,
227
+ processedEventIds,
228
+ rejectedProviderCallIds
229
+ };
230
+ }
231
+ async function getCallHistoryFromStore(storePath, limit = 50) {
232
+ const logPath = path.join(storePath, "calls.jsonl");
233
+ const content = await privateFileStore(storePath).readTextIfExists(path.basename(logPath));
234
+ if (content === null) return [];
235
+ const lines = content.trim().split("\n").filter(Boolean);
236
+ const calls = [];
237
+ for (const line of lines.slice(-limit)) try {
238
+ const parsed = CallRecordSchema.parse(JSON.parse(line));
239
+ calls.push(parsed);
240
+ } catch {}
241
+ return calls;
242
+ }
243
+ //#endregion
244
+ //#region extensions/voice-call/src/manager/timers.ts
245
+ function clearMaxDurationTimer(ctx, callId) {
246
+ const timer = ctx.maxDurationTimers.get(callId);
247
+ if (timer) {
248
+ clearTimeout(timer);
249
+ ctx.maxDurationTimers.delete(callId);
250
+ }
251
+ }
252
+ function startMaxDurationTimer(params) {
253
+ clearMaxDurationTimer(params.ctx, params.callId);
254
+ const maxDurationMs = params.timeoutMs ?? params.ctx.config.maxDurationSeconds * 1e3;
255
+ console.log(`[voice-call] Starting max duration timer (${Math.ceil(maxDurationMs / 1e3)}s) for call ${params.callId}`);
256
+ const timer = setTimeout(async () => {
257
+ params.ctx.maxDurationTimers.delete(params.callId);
258
+ const call = params.ctx.activeCalls.get(params.callId);
259
+ if (call && !TerminalStates.has(call.state)) {
260
+ console.log(`[voice-call] Max duration reached (${Math.ceil(maxDurationMs / 1e3)}s), ending call ${params.callId}`);
261
+ call.endReason = "timeout";
262
+ persistCallRecord(params.ctx.storePath, call);
263
+ await params.onTimeout(params.callId);
264
+ }
265
+ }, maxDurationMs);
266
+ params.ctx.maxDurationTimers.set(params.callId, timer);
267
+ }
268
+ function clearTranscriptWaiter(ctx, callId) {
269
+ const waiter = ctx.transcriptWaiters.get(callId);
270
+ if (!waiter) return;
271
+ clearTimeout(waiter.timeout);
272
+ ctx.transcriptWaiters.delete(callId);
273
+ }
274
+ function rejectTranscriptWaiter(ctx, callId, reason) {
275
+ const waiter = ctx.transcriptWaiters.get(callId);
276
+ if (!waiter) return;
277
+ clearTranscriptWaiter(ctx, callId);
278
+ waiter.reject(new Error(reason));
279
+ }
280
+ function resolveTranscriptWaiter(ctx, callId, transcript, turnToken) {
281
+ const waiter = ctx.transcriptWaiters.get(callId);
282
+ if (!waiter) return false;
283
+ if (waiter.turnToken && waiter.turnToken !== turnToken) return false;
284
+ clearTranscriptWaiter(ctx, callId);
285
+ waiter.resolve(transcript);
286
+ return true;
287
+ }
288
+ function waitForFinalTranscript(ctx, callId, turnToken) {
289
+ if (ctx.transcriptWaiters.has(callId)) return Promise.reject(/* @__PURE__ */ new Error("Already waiting for transcript"));
290
+ const timeoutMs = ctx.config.transcriptTimeoutMs;
291
+ return new Promise((resolve, reject) => {
292
+ const timeout = setTimeout(() => {
293
+ ctx.transcriptWaiters.delete(callId);
294
+ reject(/* @__PURE__ */ new Error(`Timed out waiting for transcript after ${timeoutMs}ms`));
295
+ }, timeoutMs);
296
+ ctx.transcriptWaiters.set(callId, {
297
+ resolve,
298
+ reject,
299
+ timeout,
300
+ turnToken
301
+ });
302
+ });
303
+ }
304
+ //#endregion
305
+ //#region extensions/voice-call/src/manager/lifecycle.ts
306
+ function removeProviderCallMapping(providerCallIdMap, call) {
307
+ if (!call.providerCallId) return;
308
+ if (providerCallIdMap.get(call.providerCallId) === call.callId) providerCallIdMap.delete(call.providerCallId);
309
+ }
310
+ function finalizeCall(params) {
311
+ const { ctx, call, endReason } = params;
312
+ call.endedAt = params.endedAt ?? Date.now();
313
+ call.endReason = endReason;
314
+ transitionState(call, endReason);
315
+ persistCallRecord(ctx.storePath, call);
316
+ if (ctx.maxDurationTimers) clearMaxDurationTimer({ maxDurationTimers: ctx.maxDurationTimers }, call.callId);
317
+ if (ctx.transcriptWaiters) rejectTranscriptWaiter({ transcriptWaiters: ctx.transcriptWaiters }, call.callId, params.transcriptRejectReason ?? `Call ended: ${endReason}`);
318
+ ctx.activeCalls.delete(call.callId);
319
+ removeProviderCallMapping(ctx.providerCallIdMap, call);
320
+ }
321
+ //#endregion
322
+ //#region extensions/voice-call/src/manager/lookup.ts
323
+ function getCallByProviderCallId(params) {
324
+ const callId = params.providerCallIdMap.get(params.providerCallId);
325
+ if (callId) return params.activeCalls.get(callId);
326
+ for (const call of params.activeCalls.values()) if (call.providerCallId === params.providerCallId) return call;
327
+ }
328
+ function findCall(params) {
329
+ const directCall = params.activeCalls.get(params.callIdOrProviderCallId);
330
+ if (directCall) return directCall;
331
+ return getCallByProviderCallId({
332
+ activeCalls: params.activeCalls,
333
+ providerCallIdMap: params.providerCallIdMap,
334
+ providerCallId: params.callIdOrProviderCallId
335
+ });
336
+ }
337
+ //#endregion
338
+ //#region extensions/voice-call/src/tts-provider-voice.ts
339
+ function resolveProviderVoiceSetting(providerConfig) {
340
+ if (!providerConfig || typeof providerConfig !== "object") return;
341
+ const candidate = providerConfig;
342
+ return normalizeOptionalString(candidate.voice) ?? normalizeOptionalString(candidate.voiceId);
343
+ }
344
+ function resolvePreferredTtsVoice(config) {
345
+ const providerId = config.tts?.provider;
346
+ if (!providerId) return;
347
+ return resolveProviderVoiceSetting(config.tts?.providers?.[providerId]);
348
+ }
349
+ //#endregion
350
+ //#region extensions/voice-call/src/voice-mapping.ts
351
+ /**
352
+ * Escape XML special characters for TwiML and other XML responses.
353
+ */
354
+ function escapeXml(text) {
355
+ return text.replace(/&/g, "&amp;").replace(/</g, "&lt;").replace(/>/g, "&gt;").replace(/"/g, "&quot;").replace(/'/g, "&apos;");
356
+ }
357
+ /**
358
+ * Map of OpenAI voice names to similar Twilio Polly voices.
359
+ */
360
+ const OPENAI_TO_POLLY_MAP = {
361
+ alloy: "Polly.Joanna",
362
+ echo: "Polly.Matthew",
363
+ fable: "Polly.Amy",
364
+ onyx: "Polly.Brian",
365
+ nova: "Polly.Salli",
366
+ shimmer: "Polly.Kimberly"
367
+ };
368
+ /**
369
+ * Default Polly voice when no mapping is found.
370
+ */
371
+ const DEFAULT_POLLY_VOICE = "Polly.Joanna";
372
+ /**
373
+ * Map OpenAI voice names to Twilio Polly equivalents.
374
+ * Falls through if already a valid Polly/Google voice.
375
+ *
376
+ * @param voice - OpenAI voice name (alloy, echo, etc.) or Polly voice name
377
+ * @returns Polly voice name suitable for Twilio TwiML
378
+ */
379
+ function mapVoiceToPolly(voice) {
380
+ if (!voice) return DEFAULT_POLLY_VOICE;
381
+ if (voice.startsWith("Polly.") || voice.startsWith("Google.")) return voice;
382
+ return OPENAI_TO_POLLY_MAP[normalizeLowercaseStringOrEmpty(voice)] || "Polly.Joanna";
383
+ }
384
+ //#endregion
385
+ //#region extensions/voice-call/src/manager/twiml.ts
386
+ function generateNotifyTwiml(message, voice) {
387
+ return `<?xml version="1.0" encoding="UTF-8"?>
388
+ <Response>
389
+ <Say voice="${voice}">${escapeXml(message)}</Say>
390
+ <Hangup/>
391
+ </Response>`;
392
+ }
393
+ function generateDtmfRedirectTwiml(digits, webhookUrl) {
394
+ return `<?xml version="1.0" encoding="UTF-8"?>
395
+ <Response>
396
+ <Play digits="${escapeXml(digits)}" />
397
+ <Redirect method="POST">${escapeXml(webhookUrl)}</Redirect>
398
+ </Response>`;
399
+ }
400
+ //#endregion
401
+ //#region extensions/voice-call/src/manager/outbound.ts
402
+ function lookupConnectedCall(ctx, callId) {
403
+ const call = ctx.activeCalls.get(callId);
404
+ if (!call) return {
405
+ kind: "error",
406
+ error: "Call not found"
407
+ };
408
+ if (!ctx.provider || !call.providerCallId) return {
409
+ kind: "error",
410
+ error: "Call not connected"
411
+ };
412
+ if (TerminalStates.has(call.state)) return {
413
+ kind: "ended",
414
+ call
415
+ };
416
+ return {
417
+ kind: "ok",
418
+ call,
419
+ providerCallId: call.providerCallId,
420
+ provider: ctx.provider
421
+ };
422
+ }
423
+ function requireConnectedCall(ctx, callId) {
424
+ const lookup = lookupConnectedCall(ctx, callId);
425
+ if (lookup.kind === "error") return {
426
+ ok: false,
427
+ error: lookup.error
428
+ };
429
+ if (lookup.kind === "ended") return {
430
+ ok: false,
431
+ error: "Call has ended"
432
+ };
433
+ return {
434
+ ok: true,
435
+ call: lookup.call,
436
+ providerCallId: lookup.providerCallId,
437
+ provider: lookup.provider
438
+ };
439
+ }
440
+ function validateDtmfDigits(digits) {
441
+ return /^[0-9*#wWpP,]+$/.test(digits) ? null : "digits may only contain digits, *, #, comma, w, p";
442
+ }
443
+ async function initiateCall(ctx, to, sessionKey, options) {
444
+ const opts = typeof options === "string" ? { message: options } : options ?? {};
445
+ const initialMessage = opts.message;
446
+ const mode = opts.mode ?? ctx.config.outbound.defaultMode;
447
+ const dtmfSequence = opts.dtmfSequence;
448
+ const requesterSessionKey = opts.requesterSessionKey?.trim();
449
+ if (dtmfSequence) {
450
+ const validationError = validateDtmfDigits(dtmfSequence);
451
+ if (validationError) return {
452
+ callId: "",
453
+ success: false,
454
+ error: validationError
455
+ };
456
+ if (mode !== "conversation") return {
457
+ callId: "",
458
+ success: false,
459
+ error: "dtmfSequence requires conversation mode"
460
+ };
461
+ }
462
+ if (!ctx.provider) return {
463
+ callId: "",
464
+ success: false,
465
+ error: "Provider not initialized"
466
+ };
467
+ if (!ctx.webhookUrl) return {
468
+ callId: "",
469
+ success: false,
470
+ error: "Webhook URL not configured"
471
+ };
472
+ if (ctx.activeCalls.size >= ctx.config.maxConcurrentCalls) return {
473
+ callId: "",
474
+ success: false,
475
+ error: `Maximum concurrent calls (${ctx.config.maxConcurrentCalls}) reached`
476
+ };
477
+ const callId = crypto.randomUUID();
478
+ const from = ctx.config.fromNumber || (ctx.provider?.name === "mock" ? "+15550000000" : void 0);
479
+ if (!from) return {
480
+ callId: "",
481
+ success: false,
482
+ error: "fromNumber not configured"
483
+ };
484
+ const callRecord = {
485
+ callId,
486
+ provider: ctx.provider.name,
487
+ direction: "outbound",
488
+ state: "initiated",
489
+ from,
490
+ to,
491
+ sessionKey: resolveVoiceCallSessionKey({
492
+ config: ctx.config,
493
+ callId,
494
+ phone: to,
495
+ explicitSessionKey: sessionKey
496
+ }),
497
+ startedAt: Date.now(),
498
+ transcript: [],
499
+ processedEventIds: [],
500
+ metadata: {
501
+ ...initialMessage && { initialMessage },
502
+ mode,
503
+ ...requesterSessionKey ? { requesterSessionKey } : {}
504
+ }
505
+ };
506
+ ctx.activeCalls.set(callId, callRecord);
507
+ persistCallRecord(ctx.storePath, callRecord);
508
+ try {
509
+ let inlineTwiml;
510
+ let preConnectTwiml;
511
+ if (mode === "notify" && initialMessage) {
512
+ const pollyVoice = mapVoiceToPolly(resolvePreferredTtsVoice(ctx.config));
513
+ inlineTwiml = generateNotifyTwiml(initialMessage, pollyVoice);
514
+ console.log(`[voice-call] Using inline TwiML for notify mode (voice: ${pollyVoice})`);
515
+ } else if (dtmfSequence) {
516
+ preConnectTwiml = generateDtmfRedirectTwiml(dtmfSequence, ctx.webhookUrl);
517
+ console.log(`[voice-call] Using pre-connect DTMF TwiML for call ${callId} (digits=${dtmfSequence.length}, initialMessage=${initialMessage ? "yes" : "no"})`);
518
+ }
519
+ const streamSession = ctx.config.realtime?.enabled && ctx.provider.name === "telnyx" && ctx.streamSessionIssuer ? ctx.streamSessionIssuer({
520
+ providerName: "telnyx",
521
+ callId,
522
+ from,
523
+ to,
524
+ direction: "outbound"
525
+ }) : void 0;
526
+ const result = await ctx.provider.initiateCall({
527
+ callId,
528
+ from,
529
+ to,
530
+ webhookUrl: ctx.webhookUrl,
531
+ inlineTwiml,
532
+ preConnectTwiml,
533
+ ...streamSession ? {
534
+ streamUrl: streamSession.streamUrl,
535
+ streamAuthToken: streamSession.token
536
+ } : {}
537
+ });
538
+ callRecord.providerCallId = result.providerCallId;
539
+ ctx.providerCallIdMap.set(result.providerCallId, callId);
540
+ persistCallRecord(ctx.storePath, callRecord);
541
+ console.log(`[voice-call] Outbound call initiated: callId=${callId} providerCallId=${result.providerCallId} mode=${mode} preConnectDtmf=${preConnectTwiml ? "yes" : "no"} initialMessage=${initialMessage ? "yes" : "no"}`);
542
+ return {
543
+ callId,
544
+ success: true
545
+ };
546
+ } catch (err) {
547
+ finalizeCall({
548
+ ctx,
549
+ call: callRecord,
550
+ endReason: "failed"
551
+ });
552
+ return {
553
+ callId,
554
+ success: false,
555
+ error: formatErrorMessage(err)
556
+ };
557
+ }
558
+ }
559
+ async function speak(ctx, callId, text) {
560
+ const connected = requireConnectedCall(ctx, callId);
561
+ if (!connected.ok) return {
562
+ success: false,
563
+ error: connected.error
564
+ };
565
+ const { call, providerCallId, provider } = connected;
566
+ try {
567
+ transitionState(call, "speaking");
568
+ persistCallRecord(ctx.storePath, call);
569
+ const numberRouteKey = typeof call.metadata?.numberRouteKey === "string" ? call.metadata.numberRouteKey : call.to;
570
+ const voice = resolvePreferredTtsVoice(resolveVoiceCallEffectiveConfig(ctx.config, numberRouteKey).config);
571
+ await provider.playTts({
572
+ callId,
573
+ providerCallId,
574
+ text,
575
+ voice
576
+ });
577
+ addTranscriptEntry(call, "bot", text);
578
+ persistCallRecord(ctx.storePath, call);
579
+ return { success: true };
580
+ } catch (err) {
581
+ transitionState(call, "listening");
582
+ persistCallRecord(ctx.storePath, call);
583
+ return {
584
+ success: false,
585
+ error: formatErrorMessage(err)
586
+ };
587
+ }
588
+ }
589
+ function shouldStartListeningAfterInitialMessage(ctx) {
590
+ if (ctx.provider?.name !== "twilio") return true;
591
+ if (!ctx.config.streaming.enabled) return true;
592
+ return ctx.provider.isConversationStreamConnectEnabled?.() !== true;
593
+ }
594
+ async function sendDtmf(ctx, callId, digits) {
595
+ const validationError = validateDtmfDigits(digits);
596
+ if (validationError) return {
597
+ success: false,
598
+ error: validationError
599
+ };
600
+ const connected = requireConnectedCall(ctx, callId);
601
+ if (!connected.ok) return {
602
+ success: false,
603
+ error: connected.error
604
+ };
605
+ if (!connected.provider.sendDtmf) return {
606
+ success: false,
607
+ error: `${connected.provider.name} does not support outbound DTMF`
608
+ };
609
+ try {
610
+ await connected.provider.sendDtmf({
611
+ callId,
612
+ providerCallId: connected.providerCallId,
613
+ digits
614
+ });
615
+ return { success: true };
616
+ } catch (err) {
617
+ return {
618
+ success: false,
619
+ error: formatErrorMessage(err)
620
+ };
621
+ }
622
+ }
623
+ async function speakInitialMessage(ctx, providerCallId) {
624
+ const call = getCallByProviderCallId({
625
+ activeCalls: ctx.activeCalls,
626
+ providerCallIdMap: ctx.providerCallIdMap,
627
+ providerCallId
628
+ });
629
+ if (!call) {
630
+ console.warn(`[voice-call] speakInitialMessage: no call found for ${providerCallId}`);
631
+ return;
632
+ }
633
+ const initialMessage = call.metadata?.initialMessage;
634
+ const mode = call.metadata?.mode ?? "conversation";
635
+ if (!initialMessage) {
636
+ console.log(`[voice-call] speakInitialMessage: no initial message for ${call.callId}`);
637
+ return;
638
+ }
639
+ if (ctx.initialMessageInFlight.has(call.callId)) {
640
+ console.log(`[voice-call] speakInitialMessage: initial message already in flight for ${call.callId}`);
641
+ return;
642
+ }
643
+ ctx.initialMessageInFlight.add(call.callId);
644
+ try {
645
+ console.log(`[voice-call] Speaking initial message for call ${call.callId} (mode: ${mode})`);
646
+ const result = await speak(ctx, call.callId, initialMessage);
647
+ if (!result.success) {
648
+ console.warn(`[voice-call] Failed to speak initial message: ${result.error}`);
649
+ return;
650
+ }
651
+ if (call.metadata) {
652
+ delete call.metadata.initialMessage;
653
+ persistCallRecord(ctx.storePath, call);
654
+ }
655
+ if (mode === "notify") {
656
+ const delaySec = ctx.config.outbound.notifyHangupDelaySec;
657
+ console.log(`[voice-call] Notify mode: auto-hangup in ${delaySec}s for call ${call.callId}`);
658
+ setTimeout(async () => {
659
+ const currentCall = ctx.activeCalls.get(call.callId);
660
+ if (currentCall && !TerminalStates.has(currentCall.state)) {
661
+ console.log(`[voice-call] Notify mode: hanging up call ${call.callId}`);
662
+ await endCall(ctx, call.callId);
663
+ }
664
+ }, delaySec * 1e3);
665
+ } else if (mode === "conversation" && ctx.provider && shouldStartListeningAfterInitialMessage(ctx)) {
666
+ transitionState(call, "listening");
667
+ persistCallRecord(ctx.storePath, call);
668
+ await ctx.provider.startListening({
669
+ callId: call.callId,
670
+ providerCallId
671
+ });
672
+ }
673
+ } finally {
674
+ ctx.initialMessageInFlight.delete(call.callId);
675
+ }
676
+ }
677
+ async function continueCall(ctx, callId, prompt) {
678
+ const connected = requireConnectedCall(ctx, callId);
679
+ if (!connected.ok) return {
680
+ success: false,
681
+ error: connected.error
682
+ };
683
+ const { call, providerCallId, provider } = connected;
684
+ if (ctx.activeTurnCalls.has(callId) || ctx.transcriptWaiters.has(callId)) return {
685
+ success: false,
686
+ error: "Already waiting for transcript"
687
+ };
688
+ ctx.activeTurnCalls.add(callId);
689
+ const turnStartedAt = Date.now();
690
+ const turnToken = provider.name === "twilio" ? crypto.randomUUID() : void 0;
691
+ try {
692
+ await speak(ctx, callId, prompt);
693
+ transitionState(call, "listening");
694
+ persistCallRecord(ctx.storePath, call);
695
+ const listenStartedAt = Date.now();
696
+ await provider.startListening({
697
+ callId,
698
+ providerCallId,
699
+ turnToken
700
+ });
701
+ const transcript = await waitForFinalTranscript(ctx, callId, turnToken);
702
+ const transcriptReceivedAt = Date.now();
703
+ await provider.stopListening({
704
+ callId,
705
+ providerCallId
706
+ });
707
+ const lastTurnLatencyMs = transcriptReceivedAt - turnStartedAt;
708
+ const lastTurnListenWaitMs = transcriptReceivedAt - listenStartedAt;
709
+ const turnCount = call.metadata && typeof call.metadata.turnCount === "number" ? call.metadata.turnCount + 1 : 1;
710
+ call.metadata = {
711
+ ...call.metadata,
712
+ turnCount,
713
+ lastTurnLatencyMs,
714
+ lastTurnListenWaitMs,
715
+ lastTurnCompletedAt: transcriptReceivedAt
716
+ };
717
+ persistCallRecord(ctx.storePath, call);
718
+ console.log("[voice-call] continueCall latency call=" + call.callId + " totalMs=" + String(lastTurnLatencyMs) + " listenWaitMs=" + String(lastTurnListenWaitMs));
719
+ return {
720
+ success: true,
721
+ transcript
722
+ };
723
+ } catch (err) {
724
+ return {
725
+ success: false,
726
+ error: formatErrorMessage(err)
727
+ };
728
+ } finally {
729
+ ctx.activeTurnCalls.delete(callId);
730
+ clearTranscriptWaiter(ctx, callId);
731
+ }
732
+ }
733
+ async function endCall(ctx, callId, options) {
734
+ const lookup = lookupConnectedCall(ctx, callId);
735
+ if (lookup.kind === "error") return {
736
+ success: false,
737
+ error: lookup.error
738
+ };
739
+ if (lookup.kind === "ended") return { success: true };
740
+ const { call, providerCallId, provider } = lookup;
741
+ const reason = options?.reason ?? "hangup-bot";
742
+ try {
743
+ await provider.hangupCall({
744
+ callId,
745
+ providerCallId,
746
+ reason
747
+ });
748
+ finalizeCall({
749
+ ctx,
750
+ call,
751
+ endReason: reason
752
+ });
753
+ return { success: true };
754
+ } catch (err) {
755
+ return {
756
+ success: false,
757
+ error: formatErrorMessage(err)
758
+ };
759
+ }
760
+ }
761
+ //#endregion
762
+ //#region extensions/voice-call/src/manager/events.ts
763
+ function shouldAcceptInbound(config, from) {
764
+ const { inboundPolicy: policy, allowFrom } = config;
765
+ switch (policy) {
766
+ case "disabled":
767
+ console.log("[voice-call] Inbound call rejected: policy is disabled");
768
+ return false;
769
+ case "open":
770
+ console.log("[voice-call] Inbound call accepted: policy is open");
771
+ return true;
772
+ case "allowlist":
773
+ case "pairing": {
774
+ const normalized = normalizePhoneNumber(from);
775
+ if (!normalized) {
776
+ console.log("[voice-call] Inbound call rejected: missing caller ID");
777
+ return false;
778
+ }
779
+ const allowed = isAllowlistedCaller(normalized, allowFrom);
780
+ console.log(`[voice-call] Inbound call ${allowed ? "accepted" : "rejected"}: ${from} ${allowed ? "is in" : "not in"} allowlist`);
781
+ return allowed;
782
+ }
783
+ default: return false;
784
+ }
785
+ }
786
+ function createWebhookCall(params) {
787
+ const callId = crypto.randomUUID();
788
+ const effective = resolveVoiceCallEffectiveConfig(params.ctx.config, params.direction === "inbound" ? params.to : void 0);
789
+ const effectiveConfig = effective.config;
790
+ const callRecord = {
791
+ callId,
792
+ providerCallId: params.providerCallId,
793
+ provider: params.ctx.provider?.name || "twilio",
794
+ direction: params.direction,
795
+ state: "ringing",
796
+ from: params.from,
797
+ to: params.to,
798
+ sessionKey: resolveVoiceCallSessionKey({
799
+ config: effectiveConfig,
800
+ callId,
801
+ phone: params.direction === "outbound" ? params.to : params.from
802
+ }),
803
+ startedAt: Date.now(),
804
+ transcript: [],
805
+ processedEventIds: [],
806
+ metadata: {
807
+ initialMessage: params.direction === "inbound" ? effectiveConfig.inboundGreeting || "Hello! How can I help you today?" : void 0,
808
+ ...effective.numberRouteKey ? { numberRouteKey: effective.numberRouteKey } : {}
809
+ }
810
+ };
811
+ params.ctx.activeCalls.set(callId, callRecord);
812
+ params.ctx.providerCallIdMap.set(params.providerCallId, callId);
813
+ persistCallRecord(params.ctx.storePath, callRecord);
814
+ console.log(`[voice-call] Created ${params.direction} call record: ${callId} from ${params.from}`);
815
+ return callRecord;
816
+ }
817
+ function persistRejectedInboundCall(params) {
818
+ const callId = params.event.callId || params.providerCallId;
819
+ const now = Date.now();
820
+ const rejectedCall = {
821
+ callId,
822
+ providerCallId: params.providerCallId,
823
+ provider: params.ctx.provider?.name || "twilio",
824
+ direction: "inbound",
825
+ state: "hangup-bot",
826
+ from: params.event.from || "unknown",
827
+ to: params.event.to || params.ctx.config.fromNumber || "unknown",
828
+ startedAt: params.event.timestamp || now,
829
+ endedAt: now,
830
+ endReason: "hangup-bot",
831
+ transcript: [],
832
+ processedEventIds: [params.dedupeKey],
833
+ metadata: { rejectionReason: "inbound-policy" }
834
+ };
835
+ persistCallRecord(params.ctx.storePath, rejectedCall);
836
+ }
837
+ function processEvent(ctx, event) {
838
+ const dedupeKey = event.dedupeKey || event.id;
839
+ if (ctx.processedEventIds.has(dedupeKey)) return;
840
+ let call = findCall({
841
+ activeCalls: ctx.activeCalls,
842
+ providerCallIdMap: ctx.providerCallIdMap,
843
+ callIdOrProviderCallId: event.callId
844
+ });
845
+ const providerCallId = event.providerCallId;
846
+ const eventDirection = event.direction === "inbound" || event.direction === "outbound" ? event.direction : void 0;
847
+ if (!call && providerCallId && eventDirection) {
848
+ if (eventDirection === "inbound" && !shouldAcceptInbound(ctx.config, event.from)) {
849
+ const pid = providerCallId;
850
+ if (!ctx.provider) {
851
+ console.warn(`[voice-call] Inbound call rejected by policy but no provider to hang up (providerCallId: ${pid}, from: ${event.from}); call will time out on provider side.`);
852
+ return;
853
+ }
854
+ ctx.processedEventIds.add(dedupeKey);
855
+ if (ctx.rejectedProviderCallIds.has(pid)) return;
856
+ ctx.rejectedProviderCallIds.add(pid);
857
+ const callId = event.callId ?? pid;
858
+ persistRejectedInboundCall({
859
+ ctx,
860
+ event,
861
+ dedupeKey,
862
+ providerCallId: pid
863
+ });
864
+ console.log(`[voice-call] Rejecting inbound call by policy: ${pid}`);
865
+ ctx.provider.hangupCall({
866
+ callId,
867
+ providerCallId: pid,
868
+ reason: "hangup-bot"
869
+ }).catch((err) => {
870
+ ctx.rejectedProviderCallIds.delete(pid);
871
+ const message = formatErrorMessage(err);
872
+ console.warn(`[voice-call] Failed to reject inbound call ${pid}:`, message);
873
+ });
874
+ return;
875
+ }
876
+ call = createWebhookCall({
877
+ ctx,
878
+ providerCallId,
879
+ direction: eventDirection === "outbound" ? "outbound" : "inbound",
880
+ from: event.from || "unknown",
881
+ to: event.to || ctx.config.fromNumber || "unknown"
882
+ });
883
+ event.callId = call.callId;
884
+ }
885
+ if (!call) return;
886
+ if (event.providerCallId && event.providerCallId !== call.providerCallId) {
887
+ const previousProviderCallId = call.providerCallId;
888
+ call.providerCallId = event.providerCallId;
889
+ ctx.providerCallIdMap.set(event.providerCallId, call.callId);
890
+ if (previousProviderCallId) {
891
+ if (ctx.providerCallIdMap.get(previousProviderCallId) === call.callId) ctx.providerCallIdMap.delete(previousProviderCallId);
892
+ }
893
+ }
894
+ if (!(event.type === "call.error" && event.retryable)) {
895
+ ctx.processedEventIds.add(dedupeKey);
896
+ call.processedEventIds.push(dedupeKey);
897
+ }
898
+ switch (event.type) {
899
+ case "call.initiated":
900
+ transitionState(call, "initiated");
901
+ if (call.direction === "inbound" && call.providerCallId && ctx.provider?.answerCall) {
902
+ const inboundStreamSession = ctx.config.realtime?.enabled && ctx.provider.name === "telnyx" && ctx.streamSessionIssuer ? ctx.streamSessionIssuer({
903
+ providerName: "telnyx",
904
+ callId: call.callId,
905
+ from: call.from,
906
+ to: call.to,
907
+ direction: "inbound"
908
+ }) : void 0;
909
+ ctx.provider.answerCall({
910
+ callId: call.callId,
911
+ providerCallId: call.providerCallId,
912
+ ...inboundStreamSession ? {
913
+ streamUrl: inboundStreamSession.streamUrl,
914
+ streamAuthToken: inboundStreamSession.token
915
+ } : {}
916
+ }).catch((err) => {
917
+ const message = formatErrorMessage(err);
918
+ console.warn(`[voice-call] Failed to answer inbound call ${call.providerCallId}:`, message);
919
+ });
920
+ }
921
+ break;
922
+ case "call.ringing":
923
+ transitionState(call, "ringing");
924
+ break;
925
+ case "call.answered":
926
+ call.answeredAt = event.timestamp;
927
+ transitionState(call, "answered");
928
+ startMaxDurationTimer({
929
+ ctx,
930
+ callId: call.callId,
931
+ onTimeout: async (callId) => {
932
+ await endCall(ctx, callId, { reason: "timeout" });
933
+ }
934
+ });
935
+ ctx.onCallAnswered?.(call);
936
+ break;
937
+ case "call.active":
938
+ transitionState(call, "active");
939
+ break;
940
+ case "call.speaking":
941
+ transitionState(call, "speaking");
942
+ break;
943
+ case "call.speech":
944
+ if (event.isFinal) {
945
+ const hadWaiter = ctx.transcriptWaiters.has(call.callId);
946
+ const resolved = resolveTranscriptWaiter(ctx, call.callId, event.transcript, event.turnToken);
947
+ if (hadWaiter && !resolved) {
948
+ console.warn(`[voice-call] Ignoring speech event with mismatched turn token for ${call.callId}`);
949
+ break;
950
+ }
951
+ addTranscriptEntry(call, "user", event.transcript);
952
+ }
953
+ transitionState(call, "listening");
954
+ break;
955
+ case "call.silence":
956
+ case "call.dtmf": break;
957
+ case "call.ended":
958
+ finalizeCall({
959
+ ctx,
960
+ call,
961
+ endReason: event.reason,
962
+ endedAt: event.timestamp
963
+ });
964
+ return;
965
+ case "call.error":
966
+ if (!event.retryable) {
967
+ finalizeCall({
968
+ ctx,
969
+ call,
970
+ endReason: "error",
971
+ endedAt: event.timestamp,
972
+ transcriptRejectReason: `Call error: ${event.error}`
973
+ });
974
+ return;
975
+ }
976
+ break;
977
+ }
978
+ persistCallRecord(ctx.storePath, call);
979
+ }
980
+ //#endregion
981
+ //#region extensions/voice-call/src/utils.ts
982
+ function resolveUserPath(input) {
983
+ const trimmed = input.trim();
984
+ if (!trimmed) return trimmed;
985
+ if (trimmed.startsWith("~")) {
986
+ const expanded = trimmed.replace(/^~(?=$|[\\/])/, os.homedir());
987
+ return path.resolve(expanded);
988
+ }
989
+ return path.resolve(trimmed);
990
+ }
991
+ //#endregion
992
+ //#region extensions/voice-call/src/manager.ts
993
+ function markRestoredCallSkipped(call, endReason) {
994
+ call.endedAt = Date.now();
995
+ call.endReason = endReason;
996
+ call.state = endReason;
997
+ }
998
+ function incrementRestoreStatusCount(counts, status) {
999
+ const key = normalizeOptionalString(status) ?? "terminal";
1000
+ counts.set(key, (counts.get(key) ?? 0) + 1);
1001
+ }
1002
+ function resolveDefaultStoreBase(config, storePath) {
1003
+ const rawOverride = storePath?.trim() || config.store?.trim();
1004
+ if (rawOverride) return resolveUserPath(rawOverride);
1005
+ const preferred = path.join(os.homedir(), ".klaw", "voice-calls");
1006
+ return [preferred].map((dir) => resolveUserPath(dir)).find((dir) => {
1007
+ try {
1008
+ return fs.existsSync(path.join(dir, "calls.jsonl")) || fs.existsSync(dir);
1009
+ } catch {
1010
+ return false;
1011
+ }
1012
+ }) ?? resolveUserPath(preferred);
1013
+ }
1014
+ /**
1015
+ * Manages voice calls: state ownership and delegation to manager helper modules.
1016
+ */
1017
+ var CallManager = class {
1018
+ constructor(config, storePath) {
1019
+ this.activeCalls = /* @__PURE__ */ new Map();
1020
+ this.providerCallIdMap = /* @__PURE__ */ new Map();
1021
+ this.processedEventIds = /* @__PURE__ */ new Set();
1022
+ this.rejectedProviderCallIds = /* @__PURE__ */ new Set();
1023
+ this.provider = null;
1024
+ this.webhookUrl = null;
1025
+ this.activeTurnCalls = /* @__PURE__ */ new Set();
1026
+ this.transcriptWaiters = /* @__PURE__ */ new Map();
1027
+ this.maxDurationTimers = /* @__PURE__ */ new Map();
1028
+ this.initialMessageInFlight = /* @__PURE__ */ new Set();
1029
+ this.config = config;
1030
+ this.storePath = resolveDefaultStoreBase(config, storePath);
1031
+ }
1032
+ /**
1033
+ * Initialize the call manager with a provider.
1034
+ * Verifies persisted calls with the provider and restarts timers.
1035
+ */
1036
+ async initialize(provider, webhookUrl) {
1037
+ this.provider = provider;
1038
+ this.webhookUrl = webhookUrl;
1039
+ fs.mkdirSync(this.storePath, { recursive: true });
1040
+ const persisted = loadActiveCallsFromStore(this.storePath);
1041
+ this.processedEventIds = persisted.processedEventIds;
1042
+ this.rejectedProviderCallIds = persisted.rejectedProviderCallIds;
1043
+ const verified = await this.verifyRestoredCalls(provider, persisted.activeCalls);
1044
+ this.activeCalls = verified;
1045
+ this.providerCallIdMap = /* @__PURE__ */ new Map();
1046
+ for (const [callId, call] of verified) if (call.providerCallId) this.providerCallIdMap.set(call.providerCallId, callId);
1047
+ let skippedAlreadyElapsedTimers = 0;
1048
+ for (const [callId, call] of verified) if (call.answeredAt && !TerminalStates.has(call.state)) {
1049
+ const elapsed = Date.now() - call.answeredAt;
1050
+ const maxDurationMs = this.config.maxDurationSeconds * 1e3;
1051
+ if (elapsed >= maxDurationMs) {
1052
+ verified.delete(callId);
1053
+ if (call.providerCallId) this.providerCallIdMap.delete(call.providerCallId);
1054
+ skippedAlreadyElapsedTimers += 1;
1055
+ continue;
1056
+ }
1057
+ startMaxDurationTimer({
1058
+ ctx: this.getContext(),
1059
+ callId,
1060
+ timeoutMs: maxDurationMs - elapsed,
1061
+ onTimeout: async (id) => {
1062
+ await endCall(this.getContext(), id, { reason: "timeout" });
1063
+ }
1064
+ });
1065
+ console.log(`[voice-call] Restarted max-duration timer for restored call ${callId}`);
1066
+ }
1067
+ if (skippedAlreadyElapsedTimers > 0) console.log(`[voice-call] Skipped ${skippedAlreadyElapsedTimers} restored call(s) whose max-duration timer already elapsed`);
1068
+ if (verified.size > 0) console.log(`[voice-call] Restored ${verified.size} active call(s) from store`);
1069
+ }
1070
+ /**
1071
+ * Verify persisted calls with the provider before restoring.
1072
+ * Calls without providerCallId or older than maxDurationSeconds are skipped.
1073
+ * Transient provider errors keep the call (rely on timer fallback).
1074
+ */
1075
+ async verifyRestoredCalls(provider, candidates) {
1076
+ if (candidates.size === 0) return /* @__PURE__ */ new Map();
1077
+ const maxAgeMs = this.config.maxDurationSeconds * 1e3;
1078
+ const now = Date.now();
1079
+ const verified = /* @__PURE__ */ new Map();
1080
+ const verifyTasks = [];
1081
+ let skippedNoProviderCallId = 0;
1082
+ let skippedOlderThanMaxDuration = 0;
1083
+ const skippedTerminalStatuses = /* @__PURE__ */ new Map();
1084
+ let keptVerifiedActive = 0;
1085
+ let keptUnknownProviderStatus = 0;
1086
+ let keptVerificationFailures = 0;
1087
+ for (const [callId, call] of candidates) {
1088
+ if (!call.providerCallId) {
1089
+ skippedNoProviderCallId += 1;
1090
+ continue;
1091
+ }
1092
+ if (now - call.startedAt > maxAgeMs) {
1093
+ skippedOlderThanMaxDuration += 1;
1094
+ markRestoredCallSkipped(call, "timeout");
1095
+ persistCallRecord(this.storePath, call);
1096
+ await provider.hangupCall({
1097
+ callId,
1098
+ providerCallId: call.providerCallId,
1099
+ reason: "timeout"
1100
+ }).catch((err) => {
1101
+ console.warn(`[voice-call] Failed to hang up expired restored call ${callId}:`, err instanceof Error ? err.message : String(err));
1102
+ });
1103
+ continue;
1104
+ }
1105
+ const task = {
1106
+ callId,
1107
+ call,
1108
+ promise: provider.getCallStatus({ providerCallId: call.providerCallId }).then((result) => {
1109
+ if (result.isTerminal) {
1110
+ incrementRestoreStatusCount(skippedTerminalStatuses, result.status);
1111
+ markRestoredCallSkipped(call, "completed");
1112
+ persistCallRecord(this.storePath, call);
1113
+ } else if (result.isUnknown) {
1114
+ keptUnknownProviderStatus += 1;
1115
+ verified.set(callId, call);
1116
+ } else {
1117
+ keptVerifiedActive += 1;
1118
+ verified.set(callId, call);
1119
+ }
1120
+ }).catch(() => {
1121
+ keptVerificationFailures += 1;
1122
+ verified.set(callId, call);
1123
+ })
1124
+ };
1125
+ verifyTasks.push(task);
1126
+ }
1127
+ await Promise.allSettled(verifyTasks.map((t) => t.promise));
1128
+ if (skippedNoProviderCallId > 0) console.log(`[voice-call] Skipped ${skippedNoProviderCallId} restored call(s) with no providerCallId`);
1129
+ if (skippedOlderThanMaxDuration > 0) console.log(`[voice-call] Skipped ${skippedOlderThanMaxDuration} restored call(s) older than maxDurationSeconds`);
1130
+ for (const [status, count] of [...skippedTerminalStatuses].toSorted(([a], [b]) => a.localeCompare(b))) console.log(`[voice-call] Skipped ${count} restored call(s) with provider status: ${status}`);
1131
+ if (keptVerifiedActive > 0) console.log(`[voice-call] Kept ${keptVerifiedActive} restored call(s) confirmed active by provider`);
1132
+ if (keptUnknownProviderStatus > 0) console.log(`[voice-call] Kept ${keptUnknownProviderStatus} restored call(s) with unknown provider status (relying on timer)`);
1133
+ if (keptVerificationFailures > 0) console.log(`[voice-call] Kept ${keptVerificationFailures} restored call(s) after verification failure (relying on timer)`);
1134
+ return verified;
1135
+ }
1136
+ /**
1137
+ * Get the current provider.
1138
+ */
1139
+ getProvider() {
1140
+ return this.provider;
1141
+ }
1142
+ /**
1143
+ * Initiate an outbound call.
1144
+ */
1145
+ async initiateCall(to, sessionKey, options) {
1146
+ return initiateCall(this.getContext(), to, sessionKey, options);
1147
+ }
1148
+ /**
1149
+ * Speak to user in an active call.
1150
+ */
1151
+ async speak(callId, text) {
1152
+ return speak(this.getContext(), callId, text);
1153
+ }
1154
+ /**
1155
+ * Send DTMF digits to an active call.
1156
+ */
1157
+ async sendDtmf(callId, digits) {
1158
+ return sendDtmf(this.getContext(), callId, digits);
1159
+ }
1160
+ /**
1161
+ * Speak the initial message for a call (called when media stream connects).
1162
+ */
1163
+ async speakInitialMessage(providerCallId) {
1164
+ return speakInitialMessage(this.getContext(), providerCallId);
1165
+ }
1166
+ /**
1167
+ * Continue call: speak prompt, then wait for user's final transcript.
1168
+ */
1169
+ async continueCall(callId, prompt) {
1170
+ return continueCall(this.getContext(), callId, prompt);
1171
+ }
1172
+ /**
1173
+ * End an active call.
1174
+ */
1175
+ async endCall(callId) {
1176
+ return endCall(this.getContext(), callId);
1177
+ }
1178
+ getContext() {
1179
+ return {
1180
+ activeCalls: this.activeCalls,
1181
+ providerCallIdMap: this.providerCallIdMap,
1182
+ processedEventIds: this.processedEventIds,
1183
+ rejectedProviderCallIds: this.rejectedProviderCallIds,
1184
+ provider: this.provider,
1185
+ config: this.config,
1186
+ storePath: this.storePath,
1187
+ webhookUrl: this.webhookUrl,
1188
+ activeTurnCalls: this.activeTurnCalls,
1189
+ transcriptWaiters: this.transcriptWaiters,
1190
+ maxDurationTimers: this.maxDurationTimers,
1191
+ initialMessageInFlight: this.initialMessageInFlight,
1192
+ onCallAnswered: (call) => {
1193
+ this.maybeSpeakInitialMessageOnAnswered(call);
1194
+ },
1195
+ streamSessionIssuer: this.streamSessionIssuer
1196
+ };
1197
+ }
1198
+ /**
1199
+ * Process a webhook event.
1200
+ */
1201
+ processEvent(event) {
1202
+ processEvent(this.getContext(), event);
1203
+ }
1204
+ shouldDeferConversationInitialMessageUntilStreamConnect() {
1205
+ if (!this.provider || this.provider.name !== "twilio" || !this.config.streaming.enabled) return false;
1206
+ const streamAwareProvider = this.provider;
1207
+ if (typeof streamAwareProvider.isConversationStreamConnectEnabled !== "function") return false;
1208
+ return streamAwareProvider.isConversationStreamConnectEnabled();
1209
+ }
1210
+ maybeSpeakInitialMessageOnAnswered(call) {
1211
+ if (!(normalizeOptionalString(call.metadata?.initialMessage) ?? "")) return;
1212
+ const mode = call.metadata?.mode ?? "conversation";
1213
+ if (mode === "conversation") {
1214
+ if (this.config.realtime.enabled) return;
1215
+ if (this.shouldDeferConversationInitialMessageUntilStreamConnect()) return;
1216
+ } else if (mode !== "notify") return;
1217
+ if (!this.provider || !call.providerCallId) return;
1218
+ this.speakInitialMessage(call.providerCallId).catch((err) => {
1219
+ console.warn(`[voice-call] Failed to speak initial message for call ${call.callId}: ${formatErrorMessage(err)}`);
1220
+ });
1221
+ }
1222
+ /**
1223
+ * Get an active call by ID.
1224
+ */
1225
+ getCall(callId) {
1226
+ return this.activeCalls.get(callId);
1227
+ }
1228
+ /**
1229
+ * Get an active call by provider call ID (e.g., Twilio CallSid).
1230
+ */
1231
+ getCallByProviderCallId(providerCallId) {
1232
+ return getCallByProviderCallId({
1233
+ activeCalls: this.activeCalls,
1234
+ providerCallIdMap: this.providerCallIdMap,
1235
+ providerCallId
1236
+ });
1237
+ }
1238
+ /**
1239
+ * Get all active calls.
1240
+ */
1241
+ getActiveCalls() {
1242
+ return Array.from(this.activeCalls.values());
1243
+ }
1244
+ /**
1245
+ * Get call history (from persisted logs).
1246
+ */
1247
+ async getCallHistory(limit = 50) {
1248
+ return getCallHistoryFromStore(this.storePath, limit);
1249
+ }
1250
+ };
1251
+ //#endregion
1252
+ //#region extensions/voice-call/src/realtime-agent-context.ts
1253
+ function normalizeString(value) {
1254
+ return typeof value === "string" && value.trim() ? value.trim() : void 0;
1255
+ }
1256
+ function readAgentEntries(cfg) {
1257
+ const agents = cfg.agents;
1258
+ return Array.isArray(agents?.list) ? agents.list.filter((entry) => Boolean(entry && typeof entry === "object")) : [];
1259
+ }
1260
+ function resolveAgentSystemPromptOverride(cfg, agentId) {
1261
+ return normalizeString(readAgentEntries(cfg).find((candidate) => normalizeString(candidate.id) === agentId)?.systemPromptOverride) ?? normalizeString(cfg.agents?.defaults?.systemPromptOverride);
1262
+ }
1263
+ function limitText(text, maxChars) {
1264
+ if (text.length <= maxChars) return text;
1265
+ return `${text.slice(0, Math.max(0, maxChars - 32)).trimEnd()}\n[truncated]`;
1266
+ }
1267
+ async function readWorkspaceVoiceContextFiles(params) {
1268
+ const sections = [];
1269
+ let remaining = params.maxChars;
1270
+ const workspaceRoot = await root(params.workspaceDir).catch(() => null);
1271
+ if (!workspaceRoot) return sections;
1272
+ for (const file of params.files) {
1273
+ if (remaining <= 0) continue;
1274
+ const trimmed = (await workspaceRoot.readText(file).catch(() => void 0))?.trim();
1275
+ if (!trimmed) continue;
1276
+ const section = `### ${file}\n${limitText(trimmed, Math.max(0, remaining - file.length - 16))}`;
1277
+ sections.push(section);
1278
+ remaining -= section.length;
1279
+ }
1280
+ return sections;
1281
+ }
1282
+ async function buildRealtimeVoiceInstructions(params) {
1283
+ const { config } = params;
1284
+ const sections = [params.baseInstructions];
1285
+ const consultGuidance = buildRealtimeVoiceAgentConsultPolicyInstructions(config.realtime);
1286
+ if (consultGuidance) sections.push(consultGuidance);
1287
+ const contextConfig = config.realtime.agentContext;
1288
+ if (!contextConfig.enabled) return sections.filter(Boolean).join("\n\n");
1289
+ const agentId = config.agentId ?? "main";
1290
+ const capsule = [
1291
+ "Klaw agent voice context:",
1292
+ `- Agent id: ${agentId}`,
1293
+ "- Use this context to match the Klaw agent's personality and standing preferences on fast voice turns.",
1294
+ "- Treat this as compact context only; call klaw_agent_consult when the caller needs the full agent brain, tools, memory, or workspace state."
1295
+ ];
1296
+ if (contextConfig.includeIdentity) {
1297
+ const identity = params.agentRuntime.resolveAgentIdentity(params.coreConfig, agentId);
1298
+ const identityLines = [
1299
+ normalizeString(identity?.name) ? `- Name: ${normalizeString(identity?.name)}` : void 0,
1300
+ normalizeString(identity?.emoji) ? `- Emoji: ${normalizeString(identity?.emoji)}` : void 0,
1301
+ normalizeString(identity?.vibe) ? `- Vibe: ${normalizeString(identity?.vibe)}` : void 0,
1302
+ normalizeString(identity?.theme) ? `- Theme: ${normalizeString(identity?.theme)}` : void 0,
1303
+ normalizeString(identity?.creature) ? `- Creature/persona: ${normalizeString(identity?.creature)}` : void 0
1304
+ ].filter(Boolean);
1305
+ if (identityLines.length > 0) capsule.push(`Configured identity:\n${identityLines.join("\n")}`);
1306
+ }
1307
+ if (contextConfig.includeSystemPrompt) {
1308
+ const systemPrompt = resolveAgentSystemPromptOverride(params.coreConfig, agentId);
1309
+ if (systemPrompt) capsule.push(`Configured system prompt override:\n${systemPrompt}`);
1310
+ }
1311
+ if (contextConfig.includeWorkspaceFiles) {
1312
+ const fileSections = await readWorkspaceVoiceContextFiles({
1313
+ workspaceDir: params.agentRuntime.resolveAgentWorkspaceDir(params.coreConfig, agentId),
1314
+ files: contextConfig.files,
1315
+ maxChars: contextConfig.maxChars
1316
+ });
1317
+ if (fileSections.length > 0) capsule.push(`Workspace voice context:\n${fileSections.join("\n\n")}`);
1318
+ }
1319
+ sections.push(limitText(capsule.join("\n\n"), contextConfig.maxChars));
1320
+ return sections.filter(Boolean).join("\n\n");
1321
+ }
1322
+ //#endregion
1323
+ //#region extensions/voice-call/src/realtime-fast-context.ts
1324
+ async function resolveRealtimeFastContextConsult(params) {
1325
+ return await resolveRealtimeVoiceFastContextConsult({
1326
+ ...params,
1327
+ labels: {
1328
+ audienceLabel: "caller",
1329
+ contextName: "Klaw memory or session context"
1330
+ }
1331
+ });
1332
+ }
1333
+ //#endregion
1334
+ //#region extensions/voice-call/src/response-model.ts
1335
+ function resolveVoiceResponseModel(params) {
1336
+ const modelRef = params.voiceConfig.responseModel ?? `${params.agentRuntime.defaults.provider}/${params.agentRuntime.defaults.model}`;
1337
+ const slashIndex = modelRef.indexOf("/");
1338
+ return {
1339
+ modelRef,
1340
+ provider: slashIndex === -1 ? params.agentRuntime.defaults.provider : modelRef.slice(0, slashIndex),
1341
+ model: slashIndex === -1 ? modelRef : modelRef.slice(slashIndex + 1)
1342
+ };
1343
+ }
1344
+ //#endregion
1345
+ //#region extensions/voice-call/src/telephony-audio.ts
1346
+ /**
1347
+ * Chunk audio buffer into 20ms frames for streaming (8kHz mono mu-law).
1348
+ */
1349
+ function chunkAudio(audio, chunkSize = 160) {
1350
+ return (function* () {
1351
+ for (let i = 0; i < audio.length; i += chunkSize) yield audio.subarray(i, Math.min(i + chunkSize, audio.length));
1352
+ })();
1353
+ }
1354
+ //#endregion
1355
+ //#region extensions/voice-call/src/telephony-tts.ts
1356
+ const TELEPHONY_DEFAULT_TTS_TIMEOUT_MS = 8e3;
1357
+ function createTelephonyTtsProvider(params) {
1358
+ const { coreConfig, ttsOverride, runtime, logger } = params;
1359
+ const mergedConfig = applyTtsOverride(coreConfig, ttsOverride);
1360
+ const ttsConfig = mergedConfig.messages?.tts;
1361
+ const modelOverrides = resolveTelephonyModelOverridePolicy(readTelephonyModelOverrides(ttsConfig));
1362
+ const providerConfigs = collectTelephonyProviderConfigs(ttsConfig);
1363
+ const activeProvider = normalizeProviderId(ttsConfig?.provider);
1364
+ return {
1365
+ synthesisTimeoutMs: mergedConfig.messages?.tts?.timeoutMs ?? 8e3,
1366
+ synthesizeForTelephony: async (text) => {
1367
+ const directives = parseTtsDirectives(text, modelOverrides, {
1368
+ cfg: mergedConfig,
1369
+ providerConfigs,
1370
+ preferredProviderId: activeProvider
1371
+ });
1372
+ if (directives.warnings.length > 0) logger?.warn?.(`[voice-call] Ignored telephony TTS directive overrides (${directives.warnings.join("; ")})`);
1373
+ const cleanText = directives.hasDirective ? directives.ttsText?.trim() || directives.cleanedText.trim() : text;
1374
+ const result = await runtime.textToSpeechTelephony({
1375
+ text: cleanText,
1376
+ cfg: mergedConfig,
1377
+ overrides: directives.overrides
1378
+ });
1379
+ if (!result.success || !result.audioBuffer || !result.sampleRate) throw new Error(result.error ?? "TTS conversion failed");
1380
+ if (result.fallbackFrom && result.provider && result.fallbackFrom !== result.provider) {
1381
+ const attemptedChain = result.attemptedProviders && result.attemptedProviders.length > 0 ? result.attemptedProviders.join(" -> ") : `${result.fallbackFrom} -> ${result.provider}`;
1382
+ logger?.warn?.(`[voice-call] Telephony TTS fallback used from=${result.fallbackFrom} to=${result.provider} attempts=${attemptedChain}`);
1383
+ }
1384
+ return convertPcmToMulaw8k(result.audioBuffer, result.sampleRate);
1385
+ }
1386
+ };
1387
+ }
1388
+ function applyTtsOverride(coreConfig, override) {
1389
+ if (!override) return coreConfig;
1390
+ const base = coreConfig.messages?.tts;
1391
+ const merged = mergeTtsConfig(base, override);
1392
+ if (!merged) return coreConfig;
1393
+ return {
1394
+ ...coreConfig,
1395
+ messages: {
1396
+ ...coreConfig.messages,
1397
+ tts: merged
1398
+ }
1399
+ };
1400
+ }
1401
+ function mergeTtsConfig(base, override) {
1402
+ if (!base && !override) return;
1403
+ if (!override) return base;
1404
+ if (!base) return override;
1405
+ return deepMergeDefined(base, override);
1406
+ }
1407
+ function resolveTelephonyModelOverridePolicy(overrides) {
1408
+ if (!(overrides?.enabled ?? true)) return {
1409
+ enabled: false,
1410
+ allowText: false,
1411
+ allowProvider: false,
1412
+ allowVoice: false,
1413
+ allowModelId: false,
1414
+ allowVoiceSettings: false,
1415
+ allowNormalization: false,
1416
+ allowSeed: false
1417
+ };
1418
+ const allow = (value, defaultValue = true) => value ?? defaultValue;
1419
+ return {
1420
+ enabled: true,
1421
+ allowText: allow(overrides?.allowText),
1422
+ allowProvider: allow(overrides?.allowProvider, false),
1423
+ allowVoice: allow(overrides?.allowVoice),
1424
+ allowModelId: allow(overrides?.allowModelId),
1425
+ allowVoiceSettings: allow(overrides?.allowVoiceSettings),
1426
+ allowNormalization: allow(overrides?.allowNormalization),
1427
+ allowSeed: allow(overrides?.allowSeed)
1428
+ };
1429
+ }
1430
+ function readTelephonyModelOverrides(ttsConfig) {
1431
+ const value = ttsConfig?.modelOverrides;
1432
+ return value && typeof value === "object" && !Array.isArray(value) ? value : void 0;
1433
+ }
1434
+ function normalizeProviderId(value) {
1435
+ return typeof value === "string" ? value.trim().toLowerCase() || void 0 : void 0;
1436
+ }
1437
+ function asProviderConfig(value) {
1438
+ return value && typeof value === "object" && !Array.isArray(value) ? value : {};
1439
+ }
1440
+ function collectTelephonyProviderConfigs(ttsConfig) {
1441
+ if (!ttsConfig) return {};
1442
+ const entries = {};
1443
+ const rawProviders = ttsConfig.providers && typeof ttsConfig.providers === "object" && !Array.isArray(ttsConfig.providers) ? ttsConfig.providers : {};
1444
+ for (const [providerId, value] of Object.entries(rawProviders)) {
1445
+ const normalized = normalizeProviderId(providerId) ?? providerId;
1446
+ entries[normalized] = asProviderConfig(value);
1447
+ }
1448
+ const reservedKeys = new Set([
1449
+ "auto",
1450
+ "enabled",
1451
+ "maxTextLength",
1452
+ "mode",
1453
+ "modelOverrides",
1454
+ "persona",
1455
+ "personas",
1456
+ "prefsPath",
1457
+ "provider",
1458
+ "providers",
1459
+ "summaryModel",
1460
+ "timeoutMs"
1461
+ ]);
1462
+ for (const [key, value] of Object.entries(ttsConfig)) {
1463
+ if (reservedKeys.has(key) || typeof value !== "object" || value === null || Array.isArray(value)) continue;
1464
+ const normalized = normalizeProviderId(key) ?? key;
1465
+ entries[normalized] ??= asProviderConfig(value);
1466
+ }
1467
+ return entries;
1468
+ }
1469
+ //#endregion
1470
+ //#region extensions/voice-call/src/webhook/tailscale.ts
1471
+ function runTailscaleCommand(args, timeoutMs = 2500) {
1472
+ return new Promise((resolve) => {
1473
+ const proc = spawn("tailscale", args, { stdio: [
1474
+ "ignore",
1475
+ "pipe",
1476
+ "pipe"
1477
+ ] });
1478
+ let stdout = "";
1479
+ let settled = false;
1480
+ let timer;
1481
+ const finish = (result) => {
1482
+ if (settled) return;
1483
+ settled = true;
1484
+ clearTimeout(timer);
1485
+ resolve(result);
1486
+ };
1487
+ proc.stdout.on("data", (data) => {
1488
+ stdout += data;
1489
+ });
1490
+ timer = setTimeout(() => {
1491
+ proc.kill("SIGKILL");
1492
+ finish({
1493
+ code: -1,
1494
+ stdout: ""
1495
+ });
1496
+ }, timeoutMs);
1497
+ proc.on("error", () => {
1498
+ finish({
1499
+ code: -1,
1500
+ stdout: ""
1501
+ });
1502
+ });
1503
+ proc.on("close", (code) => {
1504
+ finish({
1505
+ code: code ?? -1,
1506
+ stdout
1507
+ });
1508
+ });
1509
+ });
1510
+ }
1511
+ async function getTailscaleSelfInfo() {
1512
+ const { code, stdout } = await runTailscaleCommand(["status", "--json"]);
1513
+ if (code !== 0) return null;
1514
+ try {
1515
+ const status = JSON.parse(stdout);
1516
+ return {
1517
+ dnsName: status.Self?.DNSName?.replace(/\.$/, "") || null,
1518
+ nodeId: status.Self?.ID || null
1519
+ };
1520
+ } catch {
1521
+ return null;
1522
+ }
1523
+ }
1524
+ async function getTailscaleDnsName() {
1525
+ return (await getTailscaleSelfInfo())?.dnsName ?? null;
1526
+ }
1527
+ async function setupTailscaleExposureRoute(opts) {
1528
+ const dnsName = await getTailscaleDnsName();
1529
+ if (!dnsName) {
1530
+ console.warn("[voice-call] Could not get Tailscale DNS name");
1531
+ return null;
1532
+ }
1533
+ const { code } = await runTailscaleCommand([
1534
+ opts.mode,
1535
+ "--bg",
1536
+ "--yes",
1537
+ "--set-path",
1538
+ opts.path,
1539
+ opts.localUrl
1540
+ ]);
1541
+ if (code === 0) {
1542
+ const publicUrl = `https://${dnsName}${opts.path}`;
1543
+ console.log(`[voice-call] Tailscale ${opts.mode} active: ${publicUrl}`);
1544
+ return publicUrl;
1545
+ }
1546
+ console.warn(`[voice-call] Tailscale ${opts.mode} failed`);
1547
+ return null;
1548
+ }
1549
+ async function cleanupTailscaleExposureRoute(opts) {
1550
+ await runTailscaleCommand([
1551
+ opts.mode,
1552
+ "off",
1553
+ opts.path
1554
+ ]);
1555
+ }
1556
+ async function setupTailscaleExposure(config) {
1557
+ if (config.tailscale.mode === "off") return null;
1558
+ const mode = config.tailscale.mode === "funnel" ? "funnel" : "serve";
1559
+ const localUrl = `http://127.0.0.1:${config.serve.port}${config.serve.path}`;
1560
+ return setupTailscaleExposureRoute({
1561
+ mode,
1562
+ path: config.tailscale.path,
1563
+ localUrl
1564
+ });
1565
+ }
1566
+ async function cleanupTailscaleExposure(config) {
1567
+ if (config.tailscale.mode === "off") return;
1568
+ await cleanupTailscaleExposureRoute({
1569
+ mode: config.tailscale.mode === "funnel" ? "funnel" : "serve",
1570
+ path: config.tailscale.path
1571
+ });
1572
+ }
1573
+ //#endregion
1574
+ //#region extensions/voice-call/src/tunnel.ts
1575
+ /**
1576
+ * Start an ngrok tunnel to expose the local webhook server.
1577
+ *
1578
+ * Uses the ngrok CLI which must be installed: https://ngrok.com/download
1579
+ *
1580
+ * @example
1581
+ * const tunnel = await startNgrokTunnel({ port: 3334, path: '/voice/webhook' });
1582
+ * console.log('Public URL:', tunnel.publicUrl);
1583
+ * // Later: await tunnel.stop();
1584
+ */
1585
+ async function startNgrokTunnel(config) {
1586
+ if (config.authToken) await runNgrokCommand([
1587
+ "config",
1588
+ "add-authtoken",
1589
+ config.authToken
1590
+ ]);
1591
+ const args = [
1592
+ "http",
1593
+ String(config.port),
1594
+ "--log",
1595
+ "stdout",
1596
+ "--log-format",
1597
+ "json"
1598
+ ];
1599
+ if (config.domain) args.push("--domain", config.domain);
1600
+ return new Promise((resolve, reject) => {
1601
+ const proc = spawn("ngrok", args, { stdio: [
1602
+ "ignore",
1603
+ "pipe",
1604
+ "pipe"
1605
+ ] });
1606
+ let resolved = false;
1607
+ let publicUrl = null;
1608
+ let outputBuffer = "";
1609
+ const timeout = setTimeout(() => {
1610
+ if (!resolved) {
1611
+ resolved = true;
1612
+ proc.kill("SIGTERM");
1613
+ reject(/* @__PURE__ */ new Error("ngrok startup timed out (30s)"));
1614
+ }
1615
+ }, 3e4);
1616
+ const processLine = (line) => {
1617
+ try {
1618
+ const log = JSON.parse(line);
1619
+ if (log.msg === "started tunnel" && log.url) publicUrl = log.url;
1620
+ if (log.addr && log.url && !publicUrl) publicUrl = log.url;
1621
+ if (publicUrl && !resolved) {
1622
+ resolved = true;
1623
+ clearTimeout(timeout);
1624
+ const fullUrl = publicUrl + config.path;
1625
+ console.log(`[voice-call] ngrok tunnel active: ${fullUrl}`);
1626
+ resolve({
1627
+ publicUrl: fullUrl,
1628
+ provider: "ngrok",
1629
+ stop: async () => {
1630
+ proc.kill("SIGTERM");
1631
+ await new Promise((res) => {
1632
+ proc.on("close", () => res());
1633
+ setTimeout(res, 2e3);
1634
+ });
1635
+ }
1636
+ });
1637
+ }
1638
+ } catch {}
1639
+ };
1640
+ proc.stdout.on("data", (data) => {
1641
+ outputBuffer += data.toString();
1642
+ const lines = outputBuffer.split("\n");
1643
+ outputBuffer = lines.pop() || "";
1644
+ for (const line of lines) if (line.trim()) processLine(line);
1645
+ });
1646
+ proc.stderr.on("data", (data) => {
1647
+ const msg = data.toString();
1648
+ if (msg.includes("ERR_NGROK")) {
1649
+ if (!resolved) {
1650
+ resolved = true;
1651
+ clearTimeout(timeout);
1652
+ reject(/* @__PURE__ */ new Error(`ngrok error: ${msg}`));
1653
+ }
1654
+ }
1655
+ });
1656
+ proc.on("error", (err) => {
1657
+ if (!resolved) {
1658
+ resolved = true;
1659
+ clearTimeout(timeout);
1660
+ reject(/* @__PURE__ */ new Error(`Failed to start ngrok: ${err.message}`));
1661
+ }
1662
+ });
1663
+ proc.on("close", (code) => {
1664
+ if (!resolved) {
1665
+ resolved = true;
1666
+ clearTimeout(timeout);
1667
+ reject(/* @__PURE__ */ new Error(`ngrok exited unexpectedly with code ${code}`));
1668
+ }
1669
+ });
1670
+ });
1671
+ }
1672
+ /**
1673
+ * Run an ngrok command and wait for completion.
1674
+ */
1675
+ async function runNgrokCommand(args) {
1676
+ return new Promise((resolve, reject) => {
1677
+ const proc = spawn("ngrok", args, { stdio: [
1678
+ "ignore",
1679
+ "pipe",
1680
+ "pipe"
1681
+ ] });
1682
+ let stdout = "";
1683
+ let stderr = "";
1684
+ proc.stdout.on("data", (data) => {
1685
+ stdout += data.toString();
1686
+ });
1687
+ proc.stderr.on("data", (data) => {
1688
+ stderr += data.toString();
1689
+ });
1690
+ proc.on("close", (code) => {
1691
+ if (code === 0) resolve(stdout);
1692
+ else reject(/* @__PURE__ */ new Error(`ngrok command failed: ${stderr || stdout}`));
1693
+ });
1694
+ proc.on("error", reject);
1695
+ });
1696
+ }
1697
+ /**
1698
+ * Start a Tailscale serve/funnel tunnel.
1699
+ */
1700
+ async function startTailscaleTunnel(config) {
1701
+ const dnsName = await getTailscaleDnsName();
1702
+ if (!dnsName) throw new Error("Could not get Tailscale DNS name. Is Tailscale running?");
1703
+ const path = config.path.startsWith("/") ? config.path : `/${config.path}`;
1704
+ const localUrl = `http://127.0.0.1:${config.port}${path}`;
1705
+ return new Promise((resolve, reject) => {
1706
+ const proc = spawn("tailscale", [
1707
+ config.mode,
1708
+ "--bg",
1709
+ "--yes",
1710
+ "--set-path",
1711
+ path,
1712
+ localUrl
1713
+ ], { stdio: [
1714
+ "ignore",
1715
+ "pipe",
1716
+ "pipe"
1717
+ ] });
1718
+ const timeout = setTimeout(() => {
1719
+ proc.kill("SIGKILL");
1720
+ reject(/* @__PURE__ */ new Error(`Tailscale ${config.mode} timed out`));
1721
+ }, 1e4);
1722
+ proc.on("close", (code) => {
1723
+ clearTimeout(timeout);
1724
+ if (code === 0) {
1725
+ const publicUrl = `https://${dnsName}${path}`;
1726
+ console.log(`[voice-call] Tailscale ${config.mode} active: ${publicUrl}`);
1727
+ resolve({
1728
+ publicUrl,
1729
+ provider: `tailscale-${config.mode}`,
1730
+ stop: async () => {
1731
+ await stopTailscaleTunnel(config.mode, path);
1732
+ }
1733
+ });
1734
+ } else reject(/* @__PURE__ */ new Error(`Tailscale ${config.mode} failed with code ${code}`));
1735
+ });
1736
+ proc.on("error", (err) => {
1737
+ clearTimeout(timeout);
1738
+ reject(err);
1739
+ });
1740
+ });
1741
+ }
1742
+ /**
1743
+ * Stop a Tailscale serve/funnel tunnel.
1744
+ */
1745
+ async function stopTailscaleTunnel(mode, path) {
1746
+ return new Promise((resolve) => {
1747
+ const proc = spawn("tailscale", [
1748
+ mode,
1749
+ "off",
1750
+ path
1751
+ ], { stdio: "ignore" });
1752
+ const timeout = setTimeout(() => {
1753
+ proc.kill("SIGKILL");
1754
+ resolve();
1755
+ }, 5e3);
1756
+ proc.on("close", () => {
1757
+ clearTimeout(timeout);
1758
+ resolve();
1759
+ });
1760
+ });
1761
+ }
1762
+ /**
1763
+ * Start a tunnel based on configuration.
1764
+ */
1765
+ async function startTunnel(config) {
1766
+ switch (config.provider) {
1767
+ case "ngrok": return startNgrokTunnel({
1768
+ port: config.port,
1769
+ path: config.path,
1770
+ authToken: config.ngrokAuthToken,
1771
+ domain: config.ngrokDomain
1772
+ });
1773
+ case "tailscale-serve": return startTailscaleTunnel({
1774
+ mode: "serve",
1775
+ port: config.port,
1776
+ path: config.path
1777
+ });
1778
+ case "tailscale-funnel": return startTailscaleTunnel({
1779
+ mode: "funnel",
1780
+ port: config.port,
1781
+ path: config.path
1782
+ });
1783
+ default: return null;
1784
+ }
1785
+ }
1786
+ //#endregion
1787
+ //#region extensions/voice-call/src/webhook-exposure.ts
1788
+ function providerRequiresPublicWebhook(providerName) {
1789
+ return providerName === "twilio" || providerName === "telnyx" || providerName === "plivo";
1790
+ }
1791
+ function isLocalOnlyWebhookHost(hostname) {
1792
+ return isBlockedHostnameOrIp(hostname);
1793
+ }
1794
+ function isProviderUnreachableWebhookUrl(webhookUrl) {
1795
+ try {
1796
+ return isLocalOnlyWebhookHost(new URL(webhookUrl).hostname);
1797
+ } catch {
1798
+ return false;
1799
+ }
1800
+ }
1801
+ function resolveWebhookExposureStatus(config) {
1802
+ if (config.provider === "mock") return {
1803
+ ok: true,
1804
+ configured: true,
1805
+ message: "Mock provider does not need a public webhook"
1806
+ };
1807
+ if (config.publicUrl) {
1808
+ if (isProviderUnreachableWebhookUrl(config.publicUrl)) return {
1809
+ ok: false,
1810
+ configured: true,
1811
+ message: `Public webhook URL is local/private and cannot be reached by ${config.provider ?? "the provider"}: ${config.publicUrl}`
1812
+ };
1813
+ return {
1814
+ ok: true,
1815
+ configured: true,
1816
+ message: `Public webhook URL configured: ${config.publicUrl}`
1817
+ };
1818
+ }
1819
+ if (config.tunnel?.provider && config.tunnel.provider !== "none") return {
1820
+ ok: true,
1821
+ configured: true,
1822
+ message: "Webhook exposure configured through tunnel"
1823
+ };
1824
+ if (config.tailscale?.mode && config.tailscale.mode !== "off") return {
1825
+ ok: true,
1826
+ configured: true,
1827
+ message: "Webhook exposure configured through Tailscale"
1828
+ };
1829
+ return {
1830
+ ok: false,
1831
+ configured: false,
1832
+ message: "Set publicUrl or configure tunnel/tailscale so the provider can reach webhooks"
1833
+ };
1834
+ }
1835
+ //#endregion
1836
+ //#region extensions/voice-call/src/http-headers.ts
1837
+ function getHeader(headers, name) {
1838
+ const target = normalizeLowercaseStringOrEmpty(name);
1839
+ const value = headers[target] ?? Object.entries(headers).find(([key]) => normalizeLowercaseStringOrEmpty(key) === target)?.[1];
1840
+ if (Array.isArray(value)) return value[0];
1841
+ return value;
1842
+ }
1843
+ //#endregion
1844
+ //#region extensions/voice-call/src/media-stream.ts
1845
+ const DEFAULT_PRE_START_TIMEOUT_MS = 5e3;
1846
+ const DEFAULT_MAX_PENDING_CONNECTIONS = 32;
1847
+ const DEFAULT_MAX_PENDING_CONNECTIONS_PER_IP = 4;
1848
+ const DEFAULT_MAX_CONNECTIONS = 128;
1849
+ const MAX_INBOUND_MESSAGE_BYTES = 64 * 1024;
1850
+ const MAX_WS_BUFFERED_BYTES = 1024 * 1024;
1851
+ const CLOSE_REASON_LOG_MAX_CHARS = 120;
1852
+ function sanitizeLogText(value, maxChars) {
1853
+ const sanitized = value.replace(/\p{Cc}/gu, " ").replace(/\s+/g, " ").trim();
1854
+ if (sanitized.length <= maxChars) return sanitized;
1855
+ return `${sanitized.slice(0, maxChars)}...`;
1856
+ }
1857
+ function normalizeWsMessageData(data) {
1858
+ if (Buffer.isBuffer(data)) return data;
1859
+ if (Array.isArray(data)) return Buffer.concat(data);
1860
+ return Buffer.from(data);
1861
+ }
1862
+ function parseTwilioMediaMessage(data) {
1863
+ const raw = normalizeWsMessageData(data);
1864
+ try {
1865
+ return JSON.parse(raw.toString("utf8"));
1866
+ } catch (cause) {
1867
+ throw new Error("Twilio media stream message was malformed JSON", { cause });
1868
+ }
1869
+ }
1870
+ /**
1871
+ * Manages WebSocket connections for Twilio media streams.
1872
+ */
1873
+ var MediaStreamHandler = class {
1874
+ constructor(config) {
1875
+ this.wss = null;
1876
+ this.sessions = /* @__PURE__ */ new Map();
1877
+ this.pendingConnections = /* @__PURE__ */ new Map();
1878
+ this.pendingByIp = /* @__PURE__ */ new Map();
1879
+ this.inflightUpgrades = 0;
1880
+ this.ttsQueues = /* @__PURE__ */ new Map();
1881
+ this.ttsPlaying = /* @__PURE__ */ new Map();
1882
+ this.ttsActiveControllers = /* @__PURE__ */ new Map();
1883
+ this.config = config;
1884
+ this.preStartTimeoutMs = config.preStartTimeoutMs ?? DEFAULT_PRE_START_TIMEOUT_MS;
1885
+ this.maxPendingConnections = config.maxPendingConnections ?? DEFAULT_MAX_PENDING_CONNECTIONS;
1886
+ this.maxPendingConnectionsPerIp = config.maxPendingConnectionsPerIp ?? DEFAULT_MAX_PENDING_CONNECTIONS_PER_IP;
1887
+ this.maxConnections = config.maxConnections ?? DEFAULT_MAX_CONNECTIONS;
1888
+ }
1889
+ /**
1890
+ * Handle WebSocket upgrade for media stream connections.
1891
+ */
1892
+ handleUpgrade(request, socket, head) {
1893
+ if (!this.wss) {
1894
+ this.wss = new WebSocketServer({
1895
+ noServer: true,
1896
+ maxPayload: MAX_INBOUND_MESSAGE_BYTES
1897
+ });
1898
+ this.wss.on("connection", (ws, req) => this.handleConnection(ws, req));
1899
+ }
1900
+ if (this.getCurrentConnectionCount() >= this.maxConnections) {
1901
+ this.rejectUpgrade(socket, 503, "Too many media stream connections");
1902
+ return;
1903
+ }
1904
+ this.inflightUpgrades += 1;
1905
+ let released = false;
1906
+ const releaseUpgradeReservation = () => {
1907
+ if (released) return;
1908
+ released = true;
1909
+ this.inflightUpgrades = Math.max(0, this.inflightUpgrades - 1);
1910
+ };
1911
+ const handleUpgradeAbort = () => {
1912
+ socket.removeListener("error", handleUpgradeAbort);
1913
+ socket.removeListener("close", handleUpgradeAbort);
1914
+ releaseUpgradeReservation();
1915
+ };
1916
+ socket.once("error", handleUpgradeAbort);
1917
+ socket.once("close", handleUpgradeAbort);
1918
+ try {
1919
+ this.wss.handleUpgrade(request, socket, head, (ws) => {
1920
+ socket.removeListener("error", handleUpgradeAbort);
1921
+ socket.removeListener("close", handleUpgradeAbort);
1922
+ releaseUpgradeReservation();
1923
+ this.wss?.emit("connection", ws, request);
1924
+ });
1925
+ } catch (error) {
1926
+ socket.removeListener("error", handleUpgradeAbort);
1927
+ socket.removeListener("close", handleUpgradeAbort);
1928
+ releaseUpgradeReservation();
1929
+ throw error;
1930
+ }
1931
+ }
1932
+ /**
1933
+ * Handle new WebSocket connection from Twilio.
1934
+ */
1935
+ async handleConnection(ws, _request) {
1936
+ let session = null;
1937
+ const streamToken = this.getStreamToken(_request);
1938
+ const ip = this.getClientIp(_request);
1939
+ if (!this.registerPendingConnection(ws, ip)) {
1940
+ ws.close(1013, "Too many pending media stream connections");
1941
+ return;
1942
+ }
1943
+ ws.on("message", async (data) => {
1944
+ try {
1945
+ const message = parseTwilioMediaMessage(data);
1946
+ switch (message.event) {
1947
+ case "connected":
1948
+ console.log("[MediaStream] Twilio connected");
1949
+ break;
1950
+ case "start":
1951
+ session = this.handleStart(ws, message, streamToken);
1952
+ if (session) this.clearPendingConnection(ws);
1953
+ break;
1954
+ case "media":
1955
+ if (session && message.media?.payload) {
1956
+ const audioBuffer = Buffer.from(message.media.payload, "base64");
1957
+ const turnId = this.ensureActiveTurn(session);
1958
+ this.emitTalkEvent(session, {
1959
+ type: "input.audio.delta",
1960
+ turnId,
1961
+ payload: {
1962
+ callId: session.callId,
1963
+ streamSid: session.streamSid,
1964
+ bytes: audioBuffer.byteLength
1965
+ }
1966
+ });
1967
+ session.sttSession.sendAudio(audioBuffer);
1968
+ }
1969
+ break;
1970
+ case "stop":
1971
+ if (session) {
1972
+ this.handleStop(session);
1973
+ session = null;
1974
+ }
1975
+ break;
1976
+ case "clear":
1977
+ case "mark": break;
1978
+ }
1979
+ } catch (error) {
1980
+ console.error("[MediaStream] Error processing message:", error);
1981
+ }
1982
+ });
1983
+ ws.on("close", (code, reason) => {
1984
+ const reasonText = sanitizeLogText(Buffer.isBuffer(reason) ? reason.toString("utf8") : String(reason || ""), CLOSE_REASON_LOG_MAX_CHARS);
1985
+ console.log(`[MediaStream] WebSocket closed (code: ${code}, reason: ${reasonText || "none"})`);
1986
+ this.clearPendingConnection(ws);
1987
+ if (session) this.handleStop(session);
1988
+ });
1989
+ ws.on("error", (error) => {
1990
+ console.error("[MediaStream] WebSocket error:", error);
1991
+ });
1992
+ }
1993
+ /**
1994
+ * Handle stream start event.
1995
+ */
1996
+ handleStart(ws, message, streamToken) {
1997
+ const streamSid = message.streamSid || "";
1998
+ const callSid = message.start?.callSid || "";
1999
+ const effectiveToken = message.start?.customParameters?.token ?? streamToken;
2000
+ console.log(`[MediaStream] Stream started: ${streamSid} (call: ${callSid})`);
2001
+ if (!callSid) {
2002
+ console.warn("[MediaStream] Missing callSid; closing stream");
2003
+ ws.close(1008, "Missing callSid");
2004
+ return null;
2005
+ }
2006
+ if (this.config.shouldAcceptStream && !this.config.shouldAcceptStream({
2007
+ callId: callSid,
2008
+ streamSid,
2009
+ token: effectiveToken
2010
+ })) {
2011
+ console.warn(`[MediaStream] Rejecting stream for unknown call: ${callSid}`);
2012
+ ws.close(1008, "Unknown call");
2013
+ return null;
2014
+ }
2015
+ const session = {
2016
+ callId: callSid,
2017
+ streamSid,
2018
+ ws,
2019
+ sttSession: this.config.transcriptionProvider.createSession({
2020
+ cfg: this.config.cfg,
2021
+ providerConfig: this.config.providerConfig,
2022
+ onPartial: (partial) => {
2023
+ const session = this.sessions.get(streamSid);
2024
+ if (session) this.emitTalkEvent(session, {
2025
+ type: "transcript.delta",
2026
+ turnId: this.ensureActiveTurn(session),
2027
+ payload: {
2028
+ callId: callSid,
2029
+ streamSid,
2030
+ text: partial,
2031
+ role: "user"
2032
+ }
2033
+ });
2034
+ this.config.onPartialTranscript?.(callSid, partial);
2035
+ },
2036
+ onTranscript: (transcript) => {
2037
+ const session = this.sessions.get(streamSid);
2038
+ if (session) {
2039
+ const turnId = this.ensureActiveTurn(session);
2040
+ this.emitTalkEvent(session, {
2041
+ type: "input.audio.committed",
2042
+ turnId,
2043
+ final: true,
2044
+ payload: {
2045
+ callId: callSid,
2046
+ streamSid
2047
+ }
2048
+ });
2049
+ this.emitTalkEvent(session, {
2050
+ type: "transcript.done",
2051
+ turnId,
2052
+ final: true,
2053
+ payload: {
2054
+ callId: callSid,
2055
+ streamSid,
2056
+ text: transcript,
2057
+ role: "user"
2058
+ }
2059
+ });
2060
+ }
2061
+ this.config.onTranscript?.(callSid, transcript);
2062
+ },
2063
+ onSpeechStart: () => {
2064
+ const session = this.sessions.get(streamSid);
2065
+ if (session) this.ensureActiveTurn(session);
2066
+ this.config.onSpeechStart?.(callSid);
2067
+ },
2068
+ onError: (error) => {
2069
+ console.warn("[MediaStream] Transcription session error:", error.message);
2070
+ const session = this.sessions.get(streamSid);
2071
+ if (session) this.emitTalkEvent(session, {
2072
+ type: "session.error",
2073
+ final: true,
2074
+ payload: {
2075
+ callId: callSid,
2076
+ streamSid,
2077
+ error: error.message
2078
+ }
2079
+ });
2080
+ }
2081
+ }),
2082
+ talk: this.createTalkEvents(callSid, streamSid)
2083
+ };
2084
+ this.sessions.set(streamSid, session);
2085
+ this.config.onConnect?.(callSid, streamSid);
2086
+ this.emitTalkEvent(session, {
2087
+ type: "session.started",
2088
+ payload: {
2089
+ callId: callSid,
2090
+ streamSid,
2091
+ provider: this.config.transcriptionProvider.id
2092
+ }
2093
+ });
2094
+ this.connectTranscriptionAndNotify(session);
2095
+ return session;
2096
+ }
2097
+ async connectTranscriptionAndNotify(session) {
2098
+ try {
2099
+ await session.sttSession.connect();
2100
+ } catch (error) {
2101
+ console.warn("[MediaStream] STT connection failed; closing media stream:", error instanceof Error ? error.message : String(error));
2102
+ this.emitTalkEvent(session, {
2103
+ type: "session.error",
2104
+ final: true,
2105
+ payload: {
2106
+ callId: session.callId,
2107
+ streamSid: session.streamSid,
2108
+ error: error instanceof Error ? error.message : String(error)
2109
+ }
2110
+ });
2111
+ if (this.sessions.get(session.streamSid) === session && session.ws.readyState === WebSocket$1.OPEN) session.ws.close(1011, "STT connection failed");
2112
+ else session.sttSession.close();
2113
+ return;
2114
+ }
2115
+ if (this.sessions.get(session.streamSid) !== session || session.ws.readyState !== WebSocket$1.OPEN) {
2116
+ session.sttSession.close();
2117
+ return;
2118
+ }
2119
+ this.emitTalkEvent(session, {
2120
+ type: "session.ready",
2121
+ payload: {
2122
+ callId: session.callId,
2123
+ streamSid: session.streamSid
2124
+ }
2125
+ });
2126
+ this.config.onTranscriptionReady?.(session.callId, session.streamSid);
2127
+ }
2128
+ /**
2129
+ * Handle stream stop event.
2130
+ */
2131
+ handleStop(session) {
2132
+ console.log(`[MediaStream] Stream stopped: ${session.streamSid}`);
2133
+ this.clearTtsState(session.streamSid);
2134
+ session.sttSession.close();
2135
+ this.sessions.delete(session.streamSid);
2136
+ this.emitTalkEvent(session, {
2137
+ type: "session.closed",
2138
+ final: true,
2139
+ payload: {
2140
+ callId: session.callId,
2141
+ streamSid: session.streamSid
2142
+ }
2143
+ });
2144
+ this.config.onDisconnect?.(session.callId, session.streamSid);
2145
+ }
2146
+ getStreamToken(request) {
2147
+ if (!request.url || !request.headers.host) return;
2148
+ try {
2149
+ return new URL(request.url, `http://${request.headers.host}`).searchParams.get("token") ?? void 0;
2150
+ } catch {
2151
+ return;
2152
+ }
2153
+ }
2154
+ getClientIp(request) {
2155
+ const resolvedIp = this.config.resolveClientIp?.(request)?.trim();
2156
+ if (resolvedIp) return resolvedIp;
2157
+ return request.socket.remoteAddress || "unknown";
2158
+ }
2159
+ getCurrentConnectionCount() {
2160
+ return this.wss ? this.wss.clients.size + this.inflightUpgrades : this.inflightUpgrades;
2161
+ }
2162
+ registerPendingConnection(ws, ip) {
2163
+ if (this.pendingConnections.size >= this.maxPendingConnections) {
2164
+ console.warn("[MediaStream] Rejecting connection: pending connection limit reached");
2165
+ return false;
2166
+ }
2167
+ const pendingForIp = this.pendingByIp.get(ip) ?? 0;
2168
+ if (pendingForIp >= this.maxPendingConnectionsPerIp) {
2169
+ console.warn(`[MediaStream] Rejecting connection: pending per-IP limit reached (${ip})`);
2170
+ return false;
2171
+ }
2172
+ const timeout = setTimeout(() => {
2173
+ if (!this.pendingConnections.has(ws)) return;
2174
+ console.warn(`[MediaStream] Closing pre-start idle connection after ${this.preStartTimeoutMs}ms (${ip})`);
2175
+ ws.close(1008, "Start timeout");
2176
+ }, this.preStartTimeoutMs);
2177
+ timeout.unref?.();
2178
+ this.pendingConnections.set(ws, {
2179
+ ip,
2180
+ timeout
2181
+ });
2182
+ this.pendingByIp.set(ip, pendingForIp + 1);
2183
+ return true;
2184
+ }
2185
+ clearPendingConnection(ws) {
2186
+ const pending = this.pendingConnections.get(ws);
2187
+ if (!pending) return;
2188
+ clearTimeout(pending.timeout);
2189
+ this.pendingConnections.delete(ws);
2190
+ const current = this.pendingByIp.get(pending.ip) ?? 0;
2191
+ if (current <= 1) {
2192
+ this.pendingByIp.delete(pending.ip);
2193
+ return;
2194
+ }
2195
+ this.pendingByIp.set(pending.ip, current - 1);
2196
+ }
2197
+ rejectUpgrade(socket, statusCode, message) {
2198
+ const statusText = statusCode === 429 ? "Too Many Requests" : "Service Unavailable";
2199
+ const body = `${message}\n`;
2200
+ socket.write(`HTTP/1.1 ${statusCode} ${statusText}\r\nConnection: close\r
2201
+ Content-Type: text/plain; charset=utf-8\r
2202
+ Content-Length: ${Buffer.byteLength(body)}\r\n\r
2203
+ ` + body);
2204
+ socket.destroy();
2205
+ }
2206
+ /**
2207
+ * Get an active session with an open WebSocket, or undefined if unavailable.
2208
+ */
2209
+ getOpenSession(streamSid) {
2210
+ const session = this.sessions.get(streamSid);
2211
+ return session?.ws.readyState === WebSocket$1.OPEN ? session : void 0;
2212
+ }
2213
+ /**
2214
+ * Send a message to a stream's WebSocket if available.
2215
+ */
2216
+ sendToStream(streamSid, message) {
2217
+ const session = this.sessions.get(streamSid);
2218
+ if (!session) return {
2219
+ sent: false,
2220
+ bufferedBeforeBytes: 0,
2221
+ bufferedAfterBytes: 0
2222
+ };
2223
+ const readyState = session.ws.readyState;
2224
+ const bufferedBeforeBytes = session.ws.bufferedAmount;
2225
+ if (readyState !== WebSocket$1.OPEN) return {
2226
+ sent: false,
2227
+ readyState,
2228
+ bufferedBeforeBytes,
2229
+ bufferedAfterBytes: session.ws.bufferedAmount
2230
+ };
2231
+ if (bufferedBeforeBytes > MAX_WS_BUFFERED_BYTES) {
2232
+ try {
2233
+ session.ws.close(1013, "Backpressure: send buffer exceeded");
2234
+ } catch {}
2235
+ return {
2236
+ sent: false,
2237
+ readyState,
2238
+ bufferedBeforeBytes,
2239
+ bufferedAfterBytes: session.ws.bufferedAmount
2240
+ };
2241
+ }
2242
+ try {
2243
+ session.ws.send(JSON.stringify(message));
2244
+ const bufferedAfterBytes = session.ws.bufferedAmount;
2245
+ if (bufferedAfterBytes > MAX_WS_BUFFERED_BYTES) {
2246
+ try {
2247
+ session.ws.close(1013, "Backpressure: send buffer exceeded");
2248
+ } catch {}
2249
+ return {
2250
+ sent: false,
2251
+ readyState,
2252
+ bufferedBeforeBytes,
2253
+ bufferedAfterBytes
2254
+ };
2255
+ }
2256
+ return {
2257
+ sent: true,
2258
+ readyState,
2259
+ bufferedBeforeBytes,
2260
+ bufferedAfterBytes
2261
+ };
2262
+ } catch {
2263
+ return {
2264
+ sent: false,
2265
+ readyState,
2266
+ bufferedBeforeBytes,
2267
+ bufferedAfterBytes: session.ws.bufferedAmount
2268
+ };
2269
+ }
2270
+ }
2271
+ /**
2272
+ * Send audio to a specific stream (for TTS playback).
2273
+ * Audio should be mu-law encoded at 8kHz mono.
2274
+ */
2275
+ sendAudio(streamSid, muLawAudio) {
2276
+ const session = this.getOpenSession(streamSid);
2277
+ if (session) this.emitTalkEvent(session, {
2278
+ type: "output.audio.delta",
2279
+ turnId: this.ensureActiveTurn(session),
2280
+ payload: {
2281
+ callId: session.callId,
2282
+ streamSid,
2283
+ bytes: muLawAudio.byteLength
2284
+ }
2285
+ });
2286
+ return this.sendToStream(streamSid, {
2287
+ event: "media",
2288
+ streamSid,
2289
+ media: { payload: muLawAudio.toString("base64") }
2290
+ });
2291
+ }
2292
+ /**
2293
+ * Send a mark event to track audio playback position.
2294
+ */
2295
+ sendMark(streamSid, name) {
2296
+ return this.sendToStream(streamSid, {
2297
+ event: "mark",
2298
+ streamSid,
2299
+ mark: { name }
2300
+ });
2301
+ }
2302
+ /**
2303
+ * Clear audio buffer (interrupt playback).
2304
+ */
2305
+ clearAudio(streamSid) {
2306
+ return this.sendToStream(streamSid, {
2307
+ event: "clear",
2308
+ streamSid
2309
+ });
2310
+ }
2311
+ /**
2312
+ * Queue a TTS operation for sequential playback.
2313
+ * Only one TTS operation plays at a time per stream to prevent overlap.
2314
+ */
2315
+ async queueTts(streamSid, playFn) {
2316
+ const queue = this.getTtsQueue(streamSid);
2317
+ let resolveEntry;
2318
+ let rejectEntry;
2319
+ const promise = new Promise((resolve, reject) => {
2320
+ resolveEntry = resolve;
2321
+ rejectEntry = reject;
2322
+ });
2323
+ queue.push({
2324
+ playFn,
2325
+ controller: new AbortController(),
2326
+ resolve: resolveEntry,
2327
+ reject: rejectEntry
2328
+ });
2329
+ if (!this.ttsPlaying.get(streamSid)) this.processQueue(streamSid);
2330
+ return promise;
2331
+ }
2332
+ /**
2333
+ * Clear TTS queue and interrupt current playback (barge-in).
2334
+ */
2335
+ clearTtsQueue(streamSid, _reason = "unspecified") {
2336
+ const queue = this.getTtsQueue(streamSid);
2337
+ this.resolveQueuedTtsEntries(queue);
2338
+ this.ttsActiveControllers.get(streamSid)?.abort();
2339
+ const session = this.sessions.get(streamSid);
2340
+ if (session?.talk.activeTurnId) {
2341
+ const cancelled = session.talk.cancelTurn({ payload: {
2342
+ callId: session.callId,
2343
+ streamSid,
2344
+ reason: _reason
2345
+ } });
2346
+ if (cancelled.ok) this.config.onTalkEvent?.(session.callId, session.streamSid, cancelled.event);
2347
+ }
2348
+ this.clearAudio(streamSid);
2349
+ }
2350
+ /**
2351
+ * Get active session by call ID.
2352
+ */
2353
+ getSessionByCallId(callId) {
2354
+ return [...this.sessions.values()].find((session) => session.callId === callId);
2355
+ }
2356
+ /**
2357
+ * Close all sessions.
2358
+ */
2359
+ closeAll() {
2360
+ for (const session of this.sessions.values()) {
2361
+ this.clearTtsState(session.streamSid);
2362
+ session.sttSession.close();
2363
+ session.ws.close();
2364
+ }
2365
+ this.sessions.clear();
2366
+ }
2367
+ getTtsQueue(streamSid) {
2368
+ const existing = this.ttsQueues.get(streamSid);
2369
+ if (existing) return existing;
2370
+ const queue = [];
2371
+ this.ttsQueues.set(streamSid, queue);
2372
+ return queue;
2373
+ }
2374
+ /**
2375
+ * Process the TTS queue for a stream.
2376
+ * Uses iterative approach to avoid stack accumulation from recursion.
2377
+ */
2378
+ async processQueue(streamSid) {
2379
+ this.ttsPlaying.set(streamSid, true);
2380
+ while (true) {
2381
+ const queue = this.ttsQueues.get(streamSid);
2382
+ if (!queue || queue.length === 0) {
2383
+ this.ttsPlaying.set(streamSid, false);
2384
+ this.ttsActiveControllers.delete(streamSid);
2385
+ return;
2386
+ }
2387
+ const entry = queue.shift();
2388
+ this.ttsActiveControllers.set(streamSid, entry.controller);
2389
+ const session = this.sessions.get(streamSid);
2390
+ let playbackTurnId;
2391
+ try {
2392
+ if (session) {
2393
+ playbackTurnId = this.ensureActiveTurn(session);
2394
+ this.emitTalkEvent(session, {
2395
+ type: "output.audio.started",
2396
+ turnId: playbackTurnId,
2397
+ payload: {
2398
+ callId: session.callId,
2399
+ streamSid
2400
+ }
2401
+ });
2402
+ }
2403
+ await entry.playFn(entry.controller.signal);
2404
+ if (entry.controller.signal.aborted) {
2405
+ entry.resolve();
2406
+ continue;
2407
+ }
2408
+ if (session) {
2409
+ const turnId = playbackTurnId ?? this.ensureActiveTurn(session);
2410
+ this.emitTalkEvent(session, {
2411
+ type: "output.audio.done",
2412
+ turnId,
2413
+ final: true,
2414
+ payload: {
2415
+ callId: session.callId,
2416
+ streamSid
2417
+ }
2418
+ });
2419
+ if (session.talk.activeTurnId) {
2420
+ const ended = session.talk.endTurn({ payload: {
2421
+ callId: session.callId,
2422
+ streamSid
2423
+ } });
2424
+ if (ended.ok) this.config.onTalkEvent?.(session.callId, session.streamSid, ended.event);
2425
+ }
2426
+ }
2427
+ entry.resolve();
2428
+ } catch (error) {
2429
+ if (entry.controller.signal.aborted) entry.resolve();
2430
+ else {
2431
+ console.error("[MediaStream] TTS playback error:", error);
2432
+ entry.reject(error);
2433
+ }
2434
+ } finally {
2435
+ if (this.ttsActiveControllers.get(streamSid) === entry.controller) this.ttsActiveControllers.delete(streamSid);
2436
+ }
2437
+ }
2438
+ }
2439
+ createTalkEvents(callId, streamSid) {
2440
+ return createTalkSessionController({
2441
+ sessionId: `voice-call:${callId}:${streamSid}`,
2442
+ mode: "stt-tts",
2443
+ transport: "gateway-relay",
2444
+ brain: "agent-consult",
2445
+ provider: this.config.transcriptionProvider.id,
2446
+ turnIdPrefix: `${streamSid}:turn`
2447
+ }, { onEvent: recordTalkObservabilityEvent });
2448
+ }
2449
+ emitTalkEvent(session, input) {
2450
+ const event = session.talk.emit(input);
2451
+ this.config.onTalkEvent?.(session.callId, session.streamSid, event);
2452
+ }
2453
+ ensureActiveTurn(session) {
2454
+ const turn = session.talk.ensureTurn({ payload: {
2455
+ callId: session.callId,
2456
+ streamSid: session.streamSid
2457
+ } });
2458
+ if (turn.event) this.config.onTalkEvent?.(session.callId, session.streamSid, turn.event);
2459
+ return turn.turnId;
2460
+ }
2461
+ clearTtsState(streamSid) {
2462
+ const queue = this.ttsQueues.get(streamSid);
2463
+ if (queue) this.resolveQueuedTtsEntries(queue);
2464
+ this.ttsActiveControllers.get(streamSid)?.abort();
2465
+ this.ttsActiveControllers.delete(streamSid);
2466
+ this.ttsPlaying.delete(streamSid);
2467
+ this.ttsQueues.delete(streamSid);
2468
+ }
2469
+ resolveQueuedTtsEntries(queue) {
2470
+ const pending = queue.splice(0);
2471
+ for (const entry of pending) {
2472
+ entry.controller.abort();
2473
+ entry.resolve();
2474
+ }
2475
+ }
2476
+ };
2477
+ //#endregion
2478
+ //#region extensions/voice-call/src/providers/shared/call-status.ts
2479
+ const TERMINAL_PROVIDER_STATUS_TO_END_REASON = {
2480
+ completed: "completed",
2481
+ failed: "failed",
2482
+ busy: "busy",
2483
+ "no-answer": "no-answer",
2484
+ canceled: "hangup-bot"
2485
+ };
2486
+ function normalizeProviderStatus(status) {
2487
+ const normalized = normalizeOptionalLowercaseString(status);
2488
+ return normalized && normalized.length > 0 ? normalized : "unknown";
2489
+ }
2490
+ function mapProviderStatusToEndReason(status) {
2491
+ return TERMINAL_PROVIDER_STATUS_TO_END_REASON[normalizeProviderStatus(status)] ?? null;
2492
+ }
2493
+ function isProviderStatusTerminal(status) {
2494
+ return mapProviderStatusToEndReason(status) !== null;
2495
+ }
2496
+ //#endregion
2497
+ //#region extensions/voice-call/src/webhook/stale-call-reaper.ts
2498
+ const CHECK_INTERVAL_MS = 3e4;
2499
+ function startStaleCallReaper(params) {
2500
+ const maxAgeSeconds = params.staleCallReaperSeconds;
2501
+ if (!maxAgeSeconds || maxAgeSeconds <= 0) return null;
2502
+ const maxAgeMs = maxAgeSeconds * 1e3;
2503
+ const interval = setInterval(() => {
2504
+ const now = Date.now();
2505
+ for (const call of params.manager.getActiveCalls()) {
2506
+ if (call.answeredAt || TerminalStates.has(call.state)) continue;
2507
+ const age = now - call.startedAt;
2508
+ if (age > maxAgeMs) {
2509
+ console.log(`[voice-call] Reaping stale call ${call.callId} (age: ${Math.round(age / 1e3)}s, state: ${call.state})`);
2510
+ params.manager.endCall(call.callId).catch((err) => {
2511
+ console.warn(`[voice-call] Reaper failed to end call ${call.callId}:`, err);
2512
+ });
2513
+ }
2514
+ }
2515
+ }, CHECK_INTERVAL_MS);
2516
+ return () => {
2517
+ clearInterval(interval);
2518
+ };
2519
+ }
2520
+ //#endregion
2521
+ //#region extensions/voice-call/src/webhook.ts
2522
+ const MAX_WEBHOOK_BODY_BYTES = WEBHOOK_BODY_READ_DEFAULTS.preAuth.maxBytes;
2523
+ const WEBHOOK_BODY_TIMEOUT_MS = WEBHOOK_BODY_READ_DEFAULTS.preAuth.timeoutMs;
2524
+ const MISSING_REMOTE_ADDRESS_IN_FLIGHT_KEY = "__voice_call_no_remote__";
2525
+ const STREAM_DISCONNECT_HANGUP_GRACE_MS = 2e3;
2526
+ const TRANSCRIPT_LOG_MAX_CHARS = 200;
2527
+ let realtimeTranscriptionRuntimePromise;
2528
+ let responseGeneratorModulePromise;
2529
+ function loadRealtimeTranscriptionRuntime() {
2530
+ realtimeTranscriptionRuntimePromise ??= import("./realtime-transcription.runtime-CAbQKwCN.js");
2531
+ return realtimeTranscriptionRuntimePromise;
2532
+ }
2533
+ function loadResponseGeneratorModule() {
2534
+ responseGeneratorModulePromise ??= import("./response-generator-B-MjbtsM.js");
2535
+ return responseGeneratorModulePromise;
2536
+ }
2537
+ function sanitizeTranscriptForLog(value) {
2538
+ const sanitized = value.replace(/\p{Cc}/gu, " ").replace(/\s+/g, " ").trim();
2539
+ if (sanitized.length <= TRANSCRIPT_LOG_MAX_CHARS) return sanitized;
2540
+ return `${sanitized.slice(0, TRANSCRIPT_LOG_MAX_CHARS)}...`;
2541
+ }
2542
+ function appendRecentTalkEventMetadata(call, event) {
2543
+ const metadata = call.metadata ?? {};
2544
+ const recent = Array.isArray(metadata.recentTalkEvents) ? metadata.recentTalkEvents.filter((entry) => !!entry && typeof entry === "object" && !Array.isArray(entry)) : [];
2545
+ recent.push({
2546
+ at: event.timestamp,
2547
+ type: event.type,
2548
+ sessionId: event.sessionId,
2549
+ turnId: event.turnId
2550
+ });
2551
+ call.metadata = {
2552
+ ...metadata,
2553
+ lastTalkEventAt: event.timestamp,
2554
+ lastTalkEventType: event.type,
2555
+ recentTalkEvents: recent.slice(-10)
2556
+ };
2557
+ }
2558
+ function buildRequestUrl(requestUrl) {
2559
+ return new URL$1(requestUrl ?? "/", "http://localhost");
2560
+ }
2561
+ function normalizeProxyIp(value) {
2562
+ const trimmed = value?.trim();
2563
+ if (!trimmed) return;
2564
+ const normalized = (trimmed.startsWith("[") && trimmed.endsWith("]") ? trimmed.slice(1, -1) : trimmed).toLowerCase();
2565
+ if (normalized.startsWith("::ffff:")) {
2566
+ const mappedIpv4 = normalized.slice(7);
2567
+ if (/^\d{1,3}(?:\.\d{1,3}){3}$/.test(mappedIpv4)) return mappedIpv4;
2568
+ }
2569
+ return normalized;
2570
+ }
2571
+ function resolveForwardedClientIp(request, trustedProxyIPs) {
2572
+ const normalizedTrustedProxyIps = new Set(trustedProxyIPs.map((ip) => normalizeProxyIp(ip)).filter((ip) => Boolean(ip)));
2573
+ const forwardedFor = getHeader(request.headers, "x-forwarded-for");
2574
+ if (forwardedFor) {
2575
+ const forwardedIps = forwardedFor.split(",").map((part) => part.trim()).filter(Boolean);
2576
+ if (forwardedIps.length > 0) {
2577
+ if (normalizedTrustedProxyIps.size === 0) return forwardedIps[0];
2578
+ for (let index = forwardedIps.length - 1; index >= 0; index -= 1) {
2579
+ const hop = forwardedIps[index];
2580
+ if (!normalizedTrustedProxyIps.has(normalizeProxyIp(hop) ?? "")) return hop;
2581
+ }
2582
+ return forwardedIps[0];
2583
+ }
2584
+ }
2585
+ return getHeader(request.headers, "x-real-ip")?.trim() || void 0;
2586
+ }
2587
+ function normalizeWebhookResponse(parsed) {
2588
+ return {
2589
+ statusCode: parsed.statusCode ?? 200,
2590
+ headers: parsed.providerResponseHeaders,
2591
+ body: parsed.providerResponseBody ?? "OK"
2592
+ };
2593
+ }
2594
+ function buildRealtimeRejectedTwiML() {
2595
+ return {
2596
+ statusCode: 200,
2597
+ headers: { "Content-Type": "text/xml" },
2598
+ body: "<?xml version=\"1.0\" encoding=\"UTF-8\"?><Response><Reject reason=\"rejected\" /></Response>"
2599
+ };
2600
+ }
2601
+ /**
2602
+ * HTTP server for receiving voice call webhooks from providers.
2603
+ * Supports WebSocket upgrades for media streams when streaming is enabled.
2604
+ */
2605
+ var VoiceCallWebhookServer = class {
2606
+ constructor(config, manager, provider, coreConfig, fullConfig, agentRuntime, logger) {
2607
+ this.server = null;
2608
+ this.listeningUrl = null;
2609
+ this.startPromise = null;
2610
+ this.stopStaleCallReaper = null;
2611
+ this.webhookInFlightLimiter = createWebhookInFlightLimiter();
2612
+ this.mediaStreamHandler = null;
2613
+ this.pendingDisconnectHangups = /* @__PURE__ */ new Map();
2614
+ this.realtimeHandler = null;
2615
+ this.config = normalizeVoiceCallConfig(config);
2616
+ this.manager = manager;
2617
+ this.provider = provider;
2618
+ this.coreConfig = coreConfig ?? null;
2619
+ this.fullConfig = fullConfig ?? null;
2620
+ this.agentRuntime = agentRuntime ?? null;
2621
+ this.logger = logger ?? {
2622
+ info: console.log,
2623
+ warn: console.warn,
2624
+ error: console.error,
2625
+ debug: console.debug
2626
+ };
2627
+ }
2628
+ /**
2629
+ * Get the media stream handler (for wiring to provider).
2630
+ */
2631
+ getMediaStreamHandler() {
2632
+ return this.mediaStreamHandler;
2633
+ }
2634
+ getRealtimeHandler() {
2635
+ return this.realtimeHandler;
2636
+ }
2637
+ speakRealtime(callId, instructions) {
2638
+ if (!this.realtimeHandler) return {
2639
+ success: false,
2640
+ error: "Realtime voice handler is not configured"
2641
+ };
2642
+ return this.realtimeHandler.speak(callId, instructions);
2643
+ }
2644
+ setRealtimeHandler(handler) {
2645
+ this.realtimeHandler = handler;
2646
+ }
2647
+ clearPendingDisconnectHangup(providerCallId) {
2648
+ const existing = this.pendingDisconnectHangups.get(providerCallId);
2649
+ if (!existing) return;
2650
+ clearTimeout(existing);
2651
+ this.pendingDisconnectHangups.delete(providerCallId);
2652
+ }
2653
+ resolveMediaStreamClientIp(request) {
2654
+ const remoteIp = request.socket.remoteAddress ?? void 0;
2655
+ const trustedProxyIPs = this.config.webhookSecurity.trustedProxyIPs.filter(Boolean);
2656
+ const normalizedTrustedProxyIps = new Set(trustedProxyIPs.map((ip) => normalizeProxyIp(ip)).filter((ip) => Boolean(ip)));
2657
+ const normalizedRemoteIp = normalizeProxyIp(remoteIp);
2658
+ const fromTrustedProxy = normalizedTrustedProxyIps.size > 0 && normalizedRemoteIp !== void 0 && normalizedTrustedProxyIps.has(normalizedRemoteIp);
2659
+ if (this.config.webhookSecurity.trustForwardingHeaders && fromTrustedProxy) {
2660
+ const forwardedIp = resolveForwardedClientIp(request, trustedProxyIPs);
2661
+ if (forwardedIp) return forwardedIp;
2662
+ }
2663
+ return remoteIp;
2664
+ }
2665
+ shouldSuppressBargeInForInitialMessage(call) {
2666
+ if (!call || call.direction !== "outbound") return false;
2667
+ if (call.state !== "speaking") return false;
2668
+ if ((call.metadata?.mode ?? "conversation") !== "conversation") return false;
2669
+ return (normalizeOptionalString(call.metadata?.initialMessage) ?? "").length > 0;
2670
+ }
2671
+ /**
2672
+ * Initialize media streaming with the selected realtime transcription provider.
2673
+ */
2674
+ async initializeMediaStreaming() {
2675
+ const streaming = this.config.streaming;
2676
+ const pluginConfig = this.fullConfig ?? this.coreConfig;
2677
+ const { getRealtimeTranscriptionProvider, listRealtimeTranscriptionProviders } = await loadRealtimeTranscriptionRuntime();
2678
+ const resolution = resolveConfiguredCapabilityProvider({
2679
+ configuredProviderId: streaming.provider,
2680
+ providerConfigs: streaming.providers,
2681
+ cfg: pluginConfig,
2682
+ cfgForResolve: pluginConfig ?? {},
2683
+ getConfiguredProvider: (providerId) => getRealtimeTranscriptionProvider(providerId, pluginConfig),
2684
+ listProviders: () => listRealtimeTranscriptionProviders(pluginConfig),
2685
+ resolveProviderConfig: ({ provider, cfg, rawConfig }) => provider.resolveConfig?.({
2686
+ cfg,
2687
+ rawConfig
2688
+ }) ?? rawConfig,
2689
+ isProviderConfigured: ({ provider, cfg, providerConfig }) => provider.isConfigured({
2690
+ cfg,
2691
+ providerConfig
2692
+ })
2693
+ });
2694
+ if (!resolution.ok && resolution.code === "missing-configured-provider") {
2695
+ console.warn(`[voice-call] Streaming enabled but realtime transcription provider "${resolution.configuredProviderId}" is not registered`);
2696
+ return;
2697
+ }
2698
+ if (!resolution.ok && resolution.code === "no-registered-provider") {
2699
+ console.warn("[voice-call] Streaming enabled but no realtime transcription provider is registered");
2700
+ return;
2701
+ }
2702
+ if (!resolution.ok) {
2703
+ console.warn(`[voice-call] Streaming enabled but provider "${resolution.provider?.id}" is not configured`);
2704
+ return;
2705
+ }
2706
+ const streamConfig = {
2707
+ transcriptionProvider: resolution.provider,
2708
+ providerConfig: resolution.providerConfig,
2709
+ cfg: this.fullConfig ?? this.coreConfig ?? void 0,
2710
+ preStartTimeoutMs: streaming.preStartTimeoutMs,
2711
+ maxPendingConnections: streaming.maxPendingConnections,
2712
+ maxPendingConnectionsPerIp: streaming.maxPendingConnectionsPerIp,
2713
+ maxConnections: streaming.maxConnections,
2714
+ resolveClientIp: (request) => this.resolveMediaStreamClientIp(request),
2715
+ shouldAcceptStream: ({ callId, token }) => {
2716
+ if (!this.manager.getCallByProviderCallId(callId)) return false;
2717
+ if (this.provider.name === "twilio") {
2718
+ if (!this.provider.isValidStreamToken(callId, token)) {
2719
+ console.warn(`[voice-call] Rejecting media stream: invalid token for ${callId}`);
2720
+ return false;
2721
+ }
2722
+ }
2723
+ return true;
2724
+ },
2725
+ onTranscript: (providerCallId, transcript) => {
2726
+ const safeTranscript = sanitizeTranscriptForLog(transcript);
2727
+ console.log(`[voice-call] Transcript for ${providerCallId}: ${safeTranscript} (chars=${transcript.length})`);
2728
+ const call = this.manager.getCallByProviderCallId(providerCallId);
2729
+ if (!call) {
2730
+ console.warn(`[voice-call] No active call found for provider ID: ${providerCallId}`);
2731
+ return;
2732
+ }
2733
+ if (this.shouldSuppressBargeInForInitialMessage(call)) {
2734
+ console.log(`[voice-call] Ignoring barge transcript while initial message is still playing (${providerCallId})`);
2735
+ return;
2736
+ }
2737
+ if (this.provider.name === "twilio") this.provider.clearTtsQueue(providerCallId);
2738
+ const event = {
2739
+ id: `stream-transcript-${Date.now()}`,
2740
+ type: "call.speech",
2741
+ callId: call.callId,
2742
+ providerCallId,
2743
+ timestamp: Date.now(),
2744
+ transcript,
2745
+ isFinal: true
2746
+ };
2747
+ this.manager.processEvent(event);
2748
+ const callMode = call.metadata?.mode;
2749
+ if (call.direction === "inbound" || callMode === "conversation") this.handleInboundResponse(call.callId, transcript).catch((err) => {
2750
+ console.warn(`[voice-call] Failed to auto-respond:`, err);
2751
+ });
2752
+ },
2753
+ onSpeechStart: (providerCallId) => {
2754
+ if (this.provider.name !== "twilio") return;
2755
+ const call = this.manager.getCallByProviderCallId(providerCallId);
2756
+ if (this.shouldSuppressBargeInForInitialMessage(call)) return;
2757
+ this.provider.clearTtsQueue(providerCallId);
2758
+ },
2759
+ onPartialTranscript: (callId, partial) => {
2760
+ const safePartial = sanitizeTranscriptForLog(partial);
2761
+ console.log(`[voice-call] Partial for ${callId}: ${safePartial} (chars=${partial.length})`);
2762
+ },
2763
+ onTalkEvent: (providerCallId, _streamSid, event) => {
2764
+ const call = this.manager.getCallByProviderCallId(providerCallId);
2765
+ if (call) appendRecentTalkEventMetadata(call, event);
2766
+ },
2767
+ onConnect: (callId, streamSid) => {
2768
+ console.log(`[voice-call] Media stream connected: ${callId} -> ${streamSid}`);
2769
+ this.clearPendingDisconnectHangup(callId);
2770
+ if (this.provider.name === "twilio") this.provider.registerCallStream(callId, streamSid);
2771
+ },
2772
+ onTranscriptionReady: (callId) => {
2773
+ this.manager.speakInitialMessage(callId).catch((err) => {
2774
+ console.warn(`[voice-call] Failed to speak initial message:`, err);
2775
+ });
2776
+ },
2777
+ onDisconnect: (callId, streamSid) => {
2778
+ console.log(`[voice-call] Media stream disconnected: ${callId} (${streamSid})`);
2779
+ if (this.provider.name === "twilio") this.provider.unregisterCallStream(callId, streamSid);
2780
+ this.clearPendingDisconnectHangup(callId);
2781
+ const timer = setTimeout(() => {
2782
+ this.pendingDisconnectHangups.delete(callId);
2783
+ const disconnectedCall = this.manager.getCallByProviderCallId(callId);
2784
+ if (!disconnectedCall) return;
2785
+ if (this.provider.name === "twilio") {
2786
+ if (this.provider.hasRegisteredStream(callId)) return;
2787
+ }
2788
+ console.log(`[voice-call] Auto-ending call ${disconnectedCall.callId} after stream disconnect grace`);
2789
+ this.manager.endCall(disconnectedCall.callId).catch((err) => {
2790
+ console.warn(`[voice-call] Failed to auto-end call ${disconnectedCall.callId}:`, err);
2791
+ });
2792
+ }, STREAM_DISCONNECT_HANGUP_GRACE_MS);
2793
+ timer.unref?.();
2794
+ this.pendingDisconnectHangups.set(callId, timer);
2795
+ }
2796
+ };
2797
+ this.mediaStreamHandler = new MediaStreamHandler(streamConfig);
2798
+ console.log("[voice-call] Media streaming initialized");
2799
+ }
2800
+ /**
2801
+ * Start the webhook server.
2802
+ * Idempotent: returns immediately if the server is already listening.
2803
+ */
2804
+ async start() {
2805
+ const { port, bind, path: webhookPath } = this.config.serve;
2806
+ const streamPath = this.config.streaming.streamPath;
2807
+ if (this.server?.listening) return this.listeningUrl ?? this.resolveListeningUrl(bind, webhookPath);
2808
+ if (this.config.streaming.enabled && !this.mediaStreamHandler) await this.initializeMediaStreaming();
2809
+ if (this.startPromise) return this.startPromise;
2810
+ this.startPromise = new Promise((resolve, reject) => {
2811
+ this.server = http.createServer((req, res) => {
2812
+ this.handleRequest(req, res, webhookPath).catch((err) => {
2813
+ console.error("[voice-call] Webhook error:", err);
2814
+ res.statusCode = 500;
2815
+ res.end("Internal Server Error");
2816
+ });
2817
+ });
2818
+ if (this.realtimeHandler || this.mediaStreamHandler) this.server.on("upgrade", (request, socket, head) => {
2819
+ if (this.realtimeHandler && this.isRealtimeWebSocketUpgrade(request)) {
2820
+ this.realtimeHandler.handleWebSocketUpgrade(request, socket, head);
2821
+ return;
2822
+ }
2823
+ if (this.getUpgradePathname(request) === streamPath && this.mediaStreamHandler) this.mediaStreamHandler?.handleUpgrade(request, socket, head);
2824
+ else socket.destroy();
2825
+ });
2826
+ this.server.on("error", (err) => {
2827
+ this.server = null;
2828
+ this.listeningUrl = null;
2829
+ this.startPromise = null;
2830
+ reject(err);
2831
+ });
2832
+ this.server.listen(port, bind, () => {
2833
+ const url = this.resolveListeningUrl(bind, webhookPath);
2834
+ this.listeningUrl = url;
2835
+ this.startPromise = null;
2836
+ this.logger.info(`[voice-call] Webhook server listening on ${url}`);
2837
+ if (this.mediaStreamHandler) {
2838
+ const address = this.server?.address();
2839
+ const actualPort = address && typeof address === "object" ? address.port : this.config.serve.port;
2840
+ this.logger.info(`[voice-call] Media stream WebSocket on ws://${bind}:${actualPort}${streamPath}`);
2841
+ }
2842
+ resolve(url);
2843
+ this.stopStaleCallReaper = startStaleCallReaper({
2844
+ manager: this.manager,
2845
+ staleCallReaperSeconds: this.config.staleCallReaperSeconds
2846
+ });
2847
+ });
2848
+ });
2849
+ return this.startPromise;
2850
+ }
2851
+ /**
2852
+ * Stop the webhook server.
2853
+ */
2854
+ async stop() {
2855
+ for (const timer of this.pendingDisconnectHangups.values()) clearTimeout(timer);
2856
+ this.pendingDisconnectHangups.clear();
2857
+ this.webhookInFlightLimiter.clear();
2858
+ this.startPromise = null;
2859
+ if (this.stopStaleCallReaper) {
2860
+ this.stopStaleCallReaper();
2861
+ this.stopStaleCallReaper = null;
2862
+ }
2863
+ return new Promise((resolve) => {
2864
+ if (this.server) this.server.close(() => {
2865
+ this.server = null;
2866
+ this.listeningUrl = null;
2867
+ resolve();
2868
+ });
2869
+ else {
2870
+ this.listeningUrl = null;
2871
+ resolve();
2872
+ }
2873
+ });
2874
+ }
2875
+ resolveListeningUrl(bind, webhookPath) {
2876
+ const address = this.server?.address();
2877
+ if (address && typeof address === "object") {
2878
+ const host = address.address && address.address.length > 0 ? address.address : bind;
2879
+ return `http://${host.includes(":") && !host.startsWith("[") ? `[${host}]` : host}:${address.port}${webhookPath}`;
2880
+ }
2881
+ return `http://${bind}:${this.config.serve.port}${webhookPath}`;
2882
+ }
2883
+ getUpgradePathname(request) {
2884
+ try {
2885
+ return buildRequestUrl(request.url).pathname;
2886
+ } catch {
2887
+ return null;
2888
+ }
2889
+ }
2890
+ normalizeWebhookPathForMatch(pathname) {
2891
+ const trimmed = pathname.trim();
2892
+ if (!trimmed) return "/";
2893
+ const prefixed = trimmed.startsWith("/") ? trimmed : `/${trimmed}`;
2894
+ if (prefixed === "/") return prefixed;
2895
+ return prefixed.endsWith("/") ? prefixed.slice(0, -1) : prefixed;
2896
+ }
2897
+ isWebhookPathMatch(requestPath, configuredPath) {
2898
+ return this.normalizeWebhookPathForMatch(requestPath) === this.normalizeWebhookPathForMatch(configuredPath);
2899
+ }
2900
+ /**
2901
+ * Handle incoming HTTP request.
2902
+ */
2903
+ async handleRequest(req, res, webhookPath) {
2904
+ const payload = await this.runWebhookPipeline(req, webhookPath);
2905
+ this.writeWebhookResponse(res, payload);
2906
+ }
2907
+ async runWebhookPipeline(req, webhookPath) {
2908
+ const url = buildRequestUrl(req.url);
2909
+ if (url.pathname === "/voice/hold-music") return {
2910
+ statusCode: 200,
2911
+ headers: { "Content-Type": "text/xml" },
2912
+ body: `<?xml version="1.0" encoding="UTF-8"?>
2913
+ <Response>
2914
+ <Say voice="alice">All agents are currently busy. Please hold.</Say>
2915
+ <Play loop="0">https://s3.amazonaws.com/com.twilio.music.classical/BusyStrings.mp3</Play>
2916
+ </Response>`
2917
+ };
2918
+ if (!this.isWebhookPathMatch(url.pathname, webhookPath)) return {
2919
+ statusCode: 404,
2920
+ body: "Not Found"
2921
+ };
2922
+ if (req.method !== "POST") return {
2923
+ statusCode: 405,
2924
+ body: "Method Not Allowed"
2925
+ };
2926
+ const headerGate = this.verifyPreAuthWebhookHeaders(req.headers);
2927
+ if (!headerGate.ok) {
2928
+ console.warn(`[voice-call] Webhook rejected before body read: ${headerGate.reason}`);
2929
+ return {
2930
+ statusCode: 401,
2931
+ body: "Unauthorized"
2932
+ };
2933
+ }
2934
+ const remoteAddress = req.socket.remoteAddress;
2935
+ if (!remoteAddress) console.warn(`[voice-call] Webhook accepted with no remote address; using shared fallback in-flight key`);
2936
+ const inFlightKey = remoteAddress || MISSING_REMOTE_ADDRESS_IN_FLIGHT_KEY;
2937
+ if (!this.webhookInFlightLimiter.tryAcquire(inFlightKey)) {
2938
+ console.warn(`[voice-call] Webhook rejected before body read: too many in-flight requests`);
2939
+ return {
2940
+ statusCode: 429,
2941
+ body: "Too Many Requests"
2942
+ };
2943
+ }
2944
+ try {
2945
+ let body = "";
2946
+ try {
2947
+ body = await this.readBody(req, MAX_WEBHOOK_BODY_BYTES, WEBHOOK_BODY_TIMEOUT_MS);
2948
+ } catch (err) {
2949
+ if (isRequestBodyLimitError(err, "PAYLOAD_TOO_LARGE")) return {
2950
+ statusCode: 413,
2951
+ body: "Payload Too Large"
2952
+ };
2953
+ if (isRequestBodyLimitError(err, "REQUEST_BODY_TIMEOUT")) return {
2954
+ statusCode: 408,
2955
+ body: requestBodyErrorToText("REQUEST_BODY_TIMEOUT")
2956
+ };
2957
+ throw err;
2958
+ }
2959
+ const ctx = {
2960
+ headers: req.headers,
2961
+ rawBody: body,
2962
+ url: url.toString(),
2963
+ method: "POST",
2964
+ query: Object.fromEntries(url.searchParams),
2965
+ remoteAddress: req.socket.remoteAddress ?? void 0
2966
+ };
2967
+ const verification = this.provider.verifyWebhook(ctx);
2968
+ if (!verification.ok) {
2969
+ console.warn(`[voice-call] Webhook verification failed: ${verification.reason}`);
2970
+ return {
2971
+ statusCode: 401,
2972
+ body: "Unauthorized"
2973
+ };
2974
+ }
2975
+ if (!verification.verifiedRequestKey) {
2976
+ console.warn("[voice-call] Webhook verification succeeded without request identity key");
2977
+ return {
2978
+ statusCode: 401,
2979
+ body: "Unauthorized"
2980
+ };
2981
+ }
2982
+ const initialTwiML = this.provider.consumeInitialTwiML?.(ctx);
2983
+ if (initialTwiML !== void 0 && initialTwiML !== null) {
2984
+ const params = new URLSearchParams(ctx.rawBody);
2985
+ console.log(`[voice-call] Serving provider initial TwiML before realtime handling (callSid=${params.get("CallSid") ?? "unknown"}, direction=${params.get("Direction") ?? "unknown"})`);
2986
+ return {
2987
+ statusCode: 200,
2988
+ headers: { "Content-Type": "application/xml" },
2989
+ body: initialTwiML
2990
+ };
2991
+ }
2992
+ const realtimeParams = this.getRealtimeTwimlParams(ctx);
2993
+ if (realtimeParams) {
2994
+ const direction = realtimeParams.get("Direction");
2995
+ if ((!direction || direction === "inbound") && !this.shouldAcceptRealtimeInboundRequest(realtimeParams)) {
2996
+ console.log("[voice-call] Realtime inbound call rejected before stream setup");
2997
+ return buildRealtimeRejectedTwiML();
2998
+ }
2999
+ console.log(`[voice-call] Serving realtime TwiML for Twilio call ${realtimeParams.get("CallSid") ?? "unknown"} (direction=${direction ?? "unknown"})`);
3000
+ return this.realtimeHandler.buildTwiMLPayload(req, realtimeParams);
3001
+ }
3002
+ const parsed = this.provider.parseWebhookEvent(ctx, { verifiedRequestKey: verification.verifiedRequestKey });
3003
+ if (verification.isReplay) console.warn("[voice-call] Replay detected; skipping event side effects");
3004
+ else this.processParsedEvents(parsed.events);
3005
+ return normalizeWebhookResponse(parsed);
3006
+ } finally {
3007
+ this.webhookInFlightLimiter.release(inFlightKey);
3008
+ }
3009
+ }
3010
+ verifyPreAuthWebhookHeaders(headers) {
3011
+ if (this.config.skipSignatureVerification) return { ok: true };
3012
+ switch (this.provider.name) {
3013
+ case "telnyx": {
3014
+ const signature = getHeader(headers, "telnyx-signature-ed25519");
3015
+ const timestamp = getHeader(headers, "telnyx-timestamp");
3016
+ if (signature && timestamp) return { ok: true };
3017
+ return {
3018
+ ok: false,
3019
+ reason: "missing Telnyx signature or timestamp header"
3020
+ };
3021
+ }
3022
+ case "twilio":
3023
+ if (getHeader(headers, "x-twilio-signature")) return { ok: true };
3024
+ return {
3025
+ ok: false,
3026
+ reason: "missing X-Twilio-Signature header"
3027
+ };
3028
+ case "plivo": {
3029
+ const hasV3 = Boolean(getHeader(headers, "x-plivo-signature-v3")) && Boolean(getHeader(headers, "x-plivo-signature-v3-nonce"));
3030
+ const hasV2 = Boolean(getHeader(headers, "x-plivo-signature-v2")) && Boolean(getHeader(headers, "x-plivo-signature-v2-nonce"));
3031
+ if (hasV3 || hasV2) return { ok: true };
3032
+ return {
3033
+ ok: false,
3034
+ reason: "missing Plivo signature headers"
3035
+ };
3036
+ }
3037
+ default: return { ok: true };
3038
+ }
3039
+ }
3040
+ isRealtimeWebSocketUpgrade(req) {
3041
+ try {
3042
+ const pathname = buildRequestUrl(req.url).pathname;
3043
+ const pattern = this.realtimeHandler?.getStreamPathPattern();
3044
+ return Boolean(pattern && pathname.startsWith(pattern));
3045
+ } catch {
3046
+ return false;
3047
+ }
3048
+ }
3049
+ getRealtimeTwimlParams(ctx) {
3050
+ if (!this.realtimeHandler || this.provider.name !== "twilio") return null;
3051
+ const params = new URLSearchParams(ctx.rawBody);
3052
+ const direction = params.get("Direction");
3053
+ if (!(!direction || direction === "inbound" || direction.startsWith("outbound"))) return null;
3054
+ if (ctx.query?.type === "status") return null;
3055
+ const callStatus = params.get("CallStatus");
3056
+ if (callStatus && isProviderStatusTerminal(callStatus)) return null;
3057
+ return !params.get("SpeechResult") && !params.get("Digits") ? params : null;
3058
+ }
3059
+ shouldAcceptRealtimeInboundRequest(params) {
3060
+ switch (this.config.inboundPolicy) {
3061
+ case "open": return true;
3062
+ case "allowlist":
3063
+ case "pairing": return isAllowlistedCaller(normalizePhoneNumber(params.get("From") ?? void 0), this.config.allowFrom);
3064
+ default: return false;
3065
+ }
3066
+ }
3067
+ processParsedEvents(events) {
3068
+ for (const event of events) try {
3069
+ this.manager.processEvent(event);
3070
+ } catch (err) {
3071
+ console.error(`[voice-call] Error processing event ${event.type}:`, err);
3072
+ }
3073
+ }
3074
+ writeWebhookResponse(res, payload) {
3075
+ res.statusCode = payload.statusCode;
3076
+ if (payload.headers) for (const [key, value] of Object.entries(payload.headers)) res.setHeader(key, value);
3077
+ res.end(payload.body);
3078
+ }
3079
+ /**
3080
+ * Read request body as string with timeout protection.
3081
+ */
3082
+ readBody(req, maxBytes, timeoutMs = WEBHOOK_BODY_TIMEOUT_MS) {
3083
+ return readRequestBodyWithLimit(req, {
3084
+ maxBytes,
3085
+ timeoutMs
3086
+ });
3087
+ }
3088
+ /**
3089
+ * Handle auto-response for inbound calls using the agent system.
3090
+ * Supports tool calling for richer voice interactions.
3091
+ */
3092
+ async handleInboundResponse(callId, userMessage) {
3093
+ console.log(`[voice-call] Auto-responding to inbound call ${callId}: "${userMessage}"`);
3094
+ const call = this.manager.getCall(callId);
3095
+ if (!call) {
3096
+ console.warn(`[voice-call] Call ${callId} not found for auto-response`);
3097
+ return;
3098
+ }
3099
+ if (!this.coreConfig) {
3100
+ console.warn("[voice-call] Core config missing; skipping auto-response");
3101
+ return;
3102
+ }
3103
+ if (!this.agentRuntime) {
3104
+ console.warn("[voice-call] Agent runtime missing; skipping auto-response");
3105
+ return;
3106
+ }
3107
+ try {
3108
+ const { generateVoiceResponse } = await loadResponseGeneratorModule();
3109
+ const numberRouteKey = typeof call.metadata?.numberRouteKey === "string" ? call.metadata.numberRouteKey : call.to;
3110
+ const effectiveConfig = resolveVoiceCallEffectiveConfig(this.config, numberRouteKey).config;
3111
+ const result = await generateVoiceResponse({
3112
+ voiceConfig: effectiveConfig,
3113
+ coreConfig: this.coreConfig,
3114
+ agentRuntime: this.agentRuntime,
3115
+ callId,
3116
+ sessionKey: call.sessionKey,
3117
+ from: call.from,
3118
+ transcript: call.transcript,
3119
+ userMessage
3120
+ });
3121
+ if (result.error) {
3122
+ console.error(`[voice-call] Response generation error: ${result.error}`);
3123
+ return;
3124
+ }
3125
+ if (result.text) {
3126
+ console.log(`[voice-call] AI response: "${result.text}"`);
3127
+ await this.manager.speak(callId, result.text);
3128
+ }
3129
+ } catch (err) {
3130
+ console.error(`[voice-call] Auto-response error:`, err);
3131
+ }
3132
+ }
3133
+ };
3134
+ //#endregion
3135
+ //#region extensions/voice-call/src/runtime.ts
3136
+ const REALTIME_VOICE_CONSULT_SYSTEM_PROMPT = [
3137
+ "You are the configured Klaw agent receiving delegated requests from a live phone voice bridge.",
3138
+ "Act on behalf of the caller using the normal available tools when the caller asks you to do work.",
3139
+ "Prioritize completing the user's request and returning a fast, speakable result over exhaustive investigation.",
3140
+ "For tool-backed status checks, prefer one or two bounded read-only queries before answering.",
3141
+ "Do not print secret values or dump environment variables; only check whether required configuration is present.",
3142
+ "Be accurate, brief, and speakable."
3143
+ ].join(" ");
3144
+ let telnyxProviderPromise;
3145
+ let twilioProviderPromise;
3146
+ let plivoProviderPromise;
3147
+ let mockProviderPromise;
3148
+ let realtimeVoiceRuntimePromise;
3149
+ let realtimeHandlerPromise;
3150
+ function loadTelnyxProvider() {
3151
+ telnyxProviderPromise ??= import("./telnyx-BWr9EZ4x.js");
3152
+ return telnyxProviderPromise;
3153
+ }
3154
+ function loadTwilioProvider() {
3155
+ twilioProviderPromise ??= import("./twilio-D9B0zY1k.js");
3156
+ return twilioProviderPromise;
3157
+ }
3158
+ function loadPlivoProvider() {
3159
+ plivoProviderPromise ??= import("./plivo-L-JTeuEc.js");
3160
+ return plivoProviderPromise;
3161
+ }
3162
+ function loadMockProvider() {
3163
+ mockProviderPromise ??= import("./mock-jtSdKDQN.js");
3164
+ return mockProviderPromise;
3165
+ }
3166
+ function loadRealtimeVoiceRuntime() {
3167
+ realtimeVoiceRuntimePromise ??= import("./realtime-voice.runtime-vCpCAutg.js");
3168
+ return realtimeVoiceRuntimePromise;
3169
+ }
3170
+ function loadRealtimeHandler() {
3171
+ realtimeHandlerPromise ??= import("./realtime-handler-5pSItXxX.js");
3172
+ return realtimeHandlerPromise;
3173
+ }
3174
+ function resolveVoiceCallConsultSessionKey(call) {
3175
+ if (call.sessionKey) return call.sessionKey;
3176
+ const phone = call.direction === "outbound" ? call.to : call.from;
3177
+ return resolveVoiceCallSessionKey({
3178
+ config: call.config,
3179
+ callId: call.callId,
3180
+ phone
3181
+ });
3182
+ }
3183
+ function mapVoiceCallConsultTranscript(call, context) {
3184
+ const transcript = (call.transcript ?? []).map((entry) => ({
3185
+ role: entry.speaker === "bot" ? "assistant" : "user",
3186
+ text: entry.text
3187
+ }));
3188
+ const partial = context?.partialUserTranscript?.trim();
3189
+ if (partial && transcript.at(-1)?.text !== partial) transcript.push({
3190
+ role: "user",
3191
+ text: partial
3192
+ });
3193
+ return transcript;
3194
+ }
3195
+ function createRuntimeResourceLifecycle(params) {
3196
+ let tunnelResult = null;
3197
+ let stopped = false;
3198
+ const runStep = async (step, suppressErrors) => {
3199
+ if (suppressErrors) {
3200
+ await step().catch(() => {});
3201
+ return;
3202
+ }
3203
+ await step();
3204
+ };
3205
+ return {
3206
+ setTunnelResult: (result) => {
3207
+ tunnelResult = result;
3208
+ },
3209
+ stop: async (opts) => {
3210
+ if (stopped) return;
3211
+ stopped = true;
3212
+ const suppressErrors = opts?.suppressErrors ?? false;
3213
+ await runStep(async () => {
3214
+ if (tunnelResult) await tunnelResult.stop();
3215
+ }, suppressErrors);
3216
+ await runStep(async () => {
3217
+ await cleanupTailscaleExposure(params.config);
3218
+ }, suppressErrors);
3219
+ await runStep(async () => {
3220
+ await params.webhookServer.stop();
3221
+ }, suppressErrors);
3222
+ }
3223
+ };
3224
+ }
3225
+ async function resolveProvider(config) {
3226
+ const allowNgrokFreeTierLoopbackBypass = config.tunnel?.provider === "ngrok" && isLoopbackHost(config.serve?.bind ?? "") && (config.tunnel?.allowNgrokFreeTierLoopbackBypass ?? false);
3227
+ switch (config.provider) {
3228
+ case "telnyx": {
3229
+ const { TelnyxProvider } = await loadTelnyxProvider();
3230
+ return new TelnyxProvider({
3231
+ apiKey: config.telnyx?.apiKey,
3232
+ connectionId: config.telnyx?.connectionId,
3233
+ publicKey: config.telnyx?.publicKey
3234
+ }, { skipVerification: config.skipSignatureVerification });
3235
+ }
3236
+ case "twilio": {
3237
+ const { TwilioProvider } = await loadTwilioProvider();
3238
+ return new TwilioProvider({
3239
+ accountSid: config.twilio?.accountSid,
3240
+ authToken: resolveTwilioAuthToken(config)
3241
+ }, {
3242
+ allowNgrokFreeTierLoopbackBypass,
3243
+ publicUrl: config.publicUrl,
3244
+ skipVerification: config.skipSignatureVerification,
3245
+ streamPath: config.streaming?.enabled ? config.streaming.streamPath : void 0,
3246
+ webhookSecurity: config.webhookSecurity
3247
+ });
3248
+ }
3249
+ case "plivo": {
3250
+ const { PlivoProvider } = await loadPlivoProvider();
3251
+ return new PlivoProvider({
3252
+ authId: config.plivo?.authId,
3253
+ authToken: config.plivo?.authToken
3254
+ }, {
3255
+ publicUrl: config.publicUrl,
3256
+ skipVerification: config.skipSignatureVerification,
3257
+ ringTimeoutSec: Math.max(1, Math.floor(config.ringTimeoutMs / 1e3)),
3258
+ webhookSecurity: config.webhookSecurity
3259
+ });
3260
+ }
3261
+ case "mock": {
3262
+ const { MockProvider } = await loadMockProvider();
3263
+ return new MockProvider();
3264
+ }
3265
+ default: throw new Error(`Unsupported voice-call provider: ${String(config.provider)}`);
3266
+ }
3267
+ }
3268
+ async function resolveRealtimeProvider(params) {
3269
+ const { resolveConfiguredRealtimeVoiceProvider } = await loadRealtimeVoiceRuntime();
3270
+ return resolveConfiguredRealtimeVoiceProvider({
3271
+ configuredProviderId: params.config.realtime.provider,
3272
+ providerConfigs: params.config.realtime.providers,
3273
+ cfg: params.fullConfig
3274
+ });
3275
+ }
3276
+ async function createVoiceCallRuntime(params) {
3277
+ const { config: rawConfig, coreConfig, fullConfig, agentRuntime, ttsRuntime, logger } = params;
3278
+ const log = logger ?? {
3279
+ info: console.log,
3280
+ warn: console.warn,
3281
+ error: console.error,
3282
+ debug: console.debug
3283
+ };
3284
+ const config = resolveVoiceCallConfig(rawConfig);
3285
+ const cfg = fullConfig ?? coreConfig;
3286
+ if (!config.enabled) throw new Error("Voice call disabled. Enable the plugin entry in config.");
3287
+ if (config.skipSignatureVerification) log.warn("[voice-call] SECURITY WARNING: skipSignatureVerification=true disables webhook signature verification (development only). Do not use in production.");
3288
+ const validation = validateProviderConfig(config);
3289
+ if (!validation.valid) throw new Error(`Invalid voice-call config: ${validation.errors.join("; ")}`);
3290
+ const provider = await resolveProvider(config);
3291
+ const manager = new CallManager(config);
3292
+ const realtimeProvider = config.realtime.enabled ? await resolveRealtimeProvider({
3293
+ config,
3294
+ fullConfig: cfg
3295
+ }) : null;
3296
+ const webhookServer = new VoiceCallWebhookServer(config, manager, provider, coreConfig, fullConfig ?? coreConfig, agentRuntime, log);
3297
+ if (realtimeProvider) {
3298
+ const { RealtimeCallHandler } = await loadRealtimeHandler();
3299
+ const realtimeInstructions = await buildRealtimeVoiceInstructions({
3300
+ baseInstructions: config.realtime.instructions,
3301
+ config,
3302
+ coreConfig,
3303
+ agentRuntime
3304
+ });
3305
+ const realtimeHandler = new RealtimeCallHandler({
3306
+ ...config.realtime,
3307
+ instructions: realtimeInstructions,
3308
+ tools: resolveRealtimeVoiceAgentConsultTools(config.realtime.toolPolicy, config.realtime.tools)
3309
+ }, manager, provider, realtimeProvider.provider, realtimeProvider.providerConfig, config.serve.path, cfg);
3310
+ if (config.realtime.toolPolicy !== "none") realtimeHandler.registerToolHandler(REALTIME_VOICE_AGENT_CONSULT_TOOL_NAME, async (args, callId, handlerContext) => {
3311
+ const call = manager.getCall(callId);
3312
+ if (!call) return { error: `Call "${callId}" not found` };
3313
+ const effectiveConfig = resolveVoiceCallEffectiveConfig(config, typeof call.metadata?.numberRouteKey === "string" ? call.metadata.numberRouteKey : call.to).config;
3314
+ const agentId = effectiveConfig.agentId ?? "main";
3315
+ const sessionKey = resolveVoiceCallConsultSessionKey({
3316
+ ...call,
3317
+ config: effectiveConfig
3318
+ });
3319
+ const requesterSessionKey = typeof call.metadata?.requesterSessionKey === "string" ? call.metadata.requesterSessionKey : void 0;
3320
+ const fastContext = await resolveRealtimeFastContextConsult({
3321
+ cfg,
3322
+ agentId,
3323
+ sessionKey,
3324
+ config: effectiveConfig.realtime.fastContext,
3325
+ args,
3326
+ logger: log
3327
+ });
3328
+ if (fastContext.handled) return fastContext.result;
3329
+ const { provider: agentProvider, model } = resolveVoiceResponseModel({
3330
+ voiceConfig: effectiveConfig,
3331
+ agentRuntime
3332
+ });
3333
+ const thinkLevel = effectiveConfig.realtime.consultThinkingLevel ?? agentRuntime.resolveThinkingDefault({
3334
+ cfg,
3335
+ provider: agentProvider,
3336
+ model
3337
+ });
3338
+ return await consultRealtimeVoiceAgent({
3339
+ cfg,
3340
+ agentRuntime,
3341
+ logger: log,
3342
+ agentId,
3343
+ sessionKey,
3344
+ messageProvider: "voice",
3345
+ lane: "voice",
3346
+ runIdPrefix: `voice-realtime-consult:${callId}`,
3347
+ args,
3348
+ transcript: mapVoiceCallConsultTranscript(call, handlerContext),
3349
+ surface: "a live phone call",
3350
+ userLabel: "Caller",
3351
+ assistantLabel: "Agent",
3352
+ questionSourceLabel: "caller",
3353
+ provider: agentProvider,
3354
+ model,
3355
+ thinkLevel,
3356
+ fastMode: effectiveConfig.realtime.consultFastMode,
3357
+ timeoutMs: effectiveConfig.responseTimeoutMs,
3358
+ spawnedBy: requesterSessionKey,
3359
+ contextMode: requesterSessionKey ? "fork" : void 0,
3360
+ toolsAllow: resolveRealtimeVoiceAgentConsultToolsAllow(effectiveConfig.realtime.toolPolicy),
3361
+ extraSystemPrompt: REALTIME_VOICE_CONSULT_SYSTEM_PROMPT
3362
+ });
3363
+ });
3364
+ webhookServer.setRealtimeHandler(realtimeHandler);
3365
+ }
3366
+ const lifecycle = createRuntimeResourceLifecycle({
3367
+ config,
3368
+ webhookServer
3369
+ });
3370
+ const localUrl = await webhookServer.start();
3371
+ try {
3372
+ let publicUrl = config.publicUrl ?? null;
3373
+ if (!publicUrl && config.tunnel?.provider && config.tunnel.provider !== "none") try {
3374
+ const nextTunnelResult = await startTunnel({
3375
+ provider: config.tunnel.provider,
3376
+ port: config.serve.port,
3377
+ path: config.serve.path,
3378
+ ngrokAuthToken: config.tunnel.ngrokAuthToken,
3379
+ ngrokDomain: config.tunnel.ngrokDomain
3380
+ });
3381
+ lifecycle.setTunnelResult(nextTunnelResult);
3382
+ publicUrl = nextTunnelResult?.publicUrl ?? null;
3383
+ } catch (err) {
3384
+ log.error(`[voice-call] Tunnel setup failed: ${formatErrorMessage(err)}`);
3385
+ }
3386
+ if (!publicUrl && config.tailscale?.mode !== "off") publicUrl = await setupTailscaleExposure(config);
3387
+ const webhookUrl = publicUrl ?? localUrl;
3388
+ if (providerRequiresPublicWebhook(provider.name) && isProviderUnreachableWebhookUrl(webhookUrl)) throw new Error(`[voice-call] ${provider.name} requires a publicly reachable webhook URL. Refusing to use local-only webhook ${webhookUrl}. Set plugins.entries.voice-call.config.publicUrl or enable tunnel/tailscale exposure.`);
3389
+ if (publicUrl) provider.setPublicUrl?.(publicUrl);
3390
+ if (publicUrl && realtimeProvider) webhookServer.getRealtimeHandler()?.setPublicUrl(publicUrl);
3391
+ const realtimeHandler = webhookServer.getRealtimeHandler();
3392
+ if (realtimeHandler) manager.streamSessionIssuer = (request) => realtimeHandler.issueStreamSession(request);
3393
+ if (provider.name === "twilio" && config.streaming?.enabled) {
3394
+ const twilioProvider = provider;
3395
+ if (ttsRuntime?.textToSpeechTelephony) try {
3396
+ const ttsProvider = createTelephonyTtsProvider({
3397
+ coreConfig,
3398
+ ttsOverride: config.tts,
3399
+ runtime: ttsRuntime,
3400
+ logger: log
3401
+ });
3402
+ twilioProvider.setTTSProvider(ttsProvider);
3403
+ log.info("[voice-call] Telephony TTS provider configured");
3404
+ } catch (err) {
3405
+ log.warn(`[voice-call] Failed to initialize telephony TTS: ${formatErrorMessage(err)}`);
3406
+ }
3407
+ else log.warn("[voice-call] Telephony TTS unavailable; streaming TTS disabled");
3408
+ const mediaHandler = webhookServer.getMediaStreamHandler();
3409
+ if (mediaHandler) {
3410
+ twilioProvider.setMediaStreamHandler(mediaHandler);
3411
+ log.info("[voice-call] Media stream handler wired to provider");
3412
+ }
3413
+ }
3414
+ if (realtimeProvider) log.info(`[voice-call] Realtime voice provider: ${realtimeProvider.provider.id}`);
3415
+ await manager.initialize(provider, webhookUrl);
3416
+ const stop = async () => await lifecycle.stop();
3417
+ log.info("[voice-call] Runtime initialized");
3418
+ log.info(`[voice-call] Webhook URL: ${webhookUrl}`);
3419
+ if (publicUrl && publicUrl !== webhookUrl) log.info(`[voice-call] Public URL: ${publicUrl}`);
3420
+ return {
3421
+ config,
3422
+ provider,
3423
+ manager,
3424
+ webhookServer,
3425
+ webhookUrl,
3426
+ publicUrl,
3427
+ stop
3428
+ };
3429
+ } catch (err) {
3430
+ await lifecycle.stop({ suppressErrors: true });
3431
+ throw err;
3432
+ }
3433
+ }
3434
+ //#endregion
3435
+ export { getHeader as a, getTailscaleSelfInfo as c, chunkAudio as d, resolveVoiceResponseModel as f, mapVoiceToPolly as h, normalizeProviderStatus as i, setupTailscaleExposureRoute as l, escapeXml as m, isProviderStatusTerminal as n, resolveWebhookExposureStatus as o, resolveUserPath as p, mapProviderStatusToEndReason as r, cleanupTailscaleExposureRoute as s, createVoiceCallRuntime as t, TELEPHONY_DEFAULT_TTS_TIMEOUT_MS as u };