@kodelyth/voice-call 2026.5.42 → 2026.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (111) hide show
  1. package/package.json +16 -4
  2. package/api.ts +0 -16
  3. package/cli-metadata.ts +0 -10
  4. package/config-api.ts +0 -12
  5. package/index.test.ts +0 -1075
  6. package/index.ts +0 -863
  7. package/runtime-api.ts +0 -20
  8. package/runtime-entry.ts +0 -1
  9. package/setup-api.ts +0 -47
  10. package/src/allowlist.test.ts +0 -18
  11. package/src/allowlist.ts +0 -19
  12. package/src/cli.test.ts +0 -12
  13. package/src/cli.ts +0 -866
  14. package/src/config-compat.test.ts +0 -130
  15. package/src/config-compat.ts +0 -227
  16. package/src/config.test.ts +0 -542
  17. package/src/config.ts +0 -883
  18. package/src/core-bridge.ts +0 -14
  19. package/src/deep-merge.test.ts +0 -40
  20. package/src/deep-merge.ts +0 -23
  21. package/src/gateway-continue-operation.ts +0 -200
  22. package/src/http-headers.test.ts +0 -16
  23. package/src/http-headers.ts +0 -15
  24. package/src/manager/context.ts +0 -50
  25. package/src/manager/events.test.ts +0 -578
  26. package/src/manager/events.ts +0 -332
  27. package/src/manager/lifecycle.ts +0 -53
  28. package/src/manager/lookup.test.ts +0 -52
  29. package/src/manager/lookup.ts +0 -35
  30. package/src/manager/outbound.test.ts +0 -629
  31. package/src/manager/outbound.ts +0 -508
  32. package/src/manager/state.ts +0 -48
  33. package/src/manager/store.ts +0 -107
  34. package/src/manager/timers.test.ts +0 -127
  35. package/src/manager/timers.ts +0 -113
  36. package/src/manager/twiml.test.ts +0 -13
  37. package/src/manager/twiml.ts +0 -17
  38. package/src/manager.closed-loop.test.ts +0 -259
  39. package/src/manager.inbound-allowlist.test.ts +0 -183
  40. package/src/manager.notify.test.ts +0 -390
  41. package/src/manager.restore.test.ts +0 -310
  42. package/src/manager.test-harness.ts +0 -127
  43. package/src/manager.ts +0 -441
  44. package/src/media-stream.test.ts +0 -953
  45. package/src/media-stream.ts +0 -876
  46. package/src/providers/base.ts +0 -99
  47. package/src/providers/mock.test.ts +0 -86
  48. package/src/providers/mock.ts +0 -185
  49. package/src/providers/plivo.test.ts +0 -93
  50. package/src/providers/plivo.ts +0 -601
  51. package/src/providers/shared/call-status.test.ts +0 -24
  52. package/src/providers/shared/call-status.ts +0 -24
  53. package/src/providers/shared/guarded-json-api.test.ts +0 -127
  54. package/src/providers/shared/guarded-json-api.ts +0 -49
  55. package/src/providers/telnyx.test.ts +0 -489
  56. package/src/providers/telnyx.ts +0 -419
  57. package/src/providers/twilio/api.test.ts +0 -184
  58. package/src/providers/twilio/api.ts +0 -100
  59. package/src/providers/twilio/twiml-policy.test.ts +0 -84
  60. package/src/providers/twilio/twiml-policy.ts +0 -87
  61. package/src/providers/twilio/webhook.ts +0 -34
  62. package/src/providers/twilio.test.ts +0 -607
  63. package/src/providers/twilio.ts +0 -861
  64. package/src/providers/twilio.types.ts +0 -17
  65. package/src/realtime-agent-context.test.ts +0 -101
  66. package/src/realtime-agent-context.ts +0 -149
  67. package/src/realtime-defaults.ts +0 -3
  68. package/src/realtime-fast-context.test.ts +0 -74
  69. package/src/realtime-fast-context.ts +0 -27
  70. package/src/realtime-transcription.runtime.ts +0 -4
  71. package/src/realtime-voice.runtime.ts +0 -5
  72. package/src/response-generator.test.ts +0 -385
  73. package/src/response-generator.ts +0 -348
  74. package/src/response-model.test.ts +0 -71
  75. package/src/response-model.ts +0 -23
  76. package/src/runtime.test.ts +0 -625
  77. package/src/runtime.ts +0 -528
  78. package/src/telephony-audio.test.ts +0 -61
  79. package/src/telephony-audio.ts +0 -12
  80. package/src/telephony-tts.test.ts +0 -196
  81. package/src/telephony-tts.ts +0 -235
  82. package/src/test-fixtures.ts +0 -82
  83. package/src/tts-provider-voice.test.ts +0 -34
  84. package/src/tts-provider-voice.ts +0 -21
  85. package/src/tunnel.test.ts +0 -173
  86. package/src/tunnel.ts +0 -314
  87. package/src/types.ts +0 -311
  88. package/src/utils.test.ts +0 -17
  89. package/src/utils.ts +0 -14
  90. package/src/voice-mapping.test.ts +0 -32
  91. package/src/voice-mapping.ts +0 -65
  92. package/src/webhook/realtime-audio-pacer.test.ts +0 -146
  93. package/src/webhook/realtime-audio-pacer.ts +0 -204
  94. package/src/webhook/realtime-handler.test.ts +0 -1450
  95. package/src/webhook/realtime-handler.ts +0 -1382
  96. package/src/webhook/stale-call-reaper.test.ts +0 -89
  97. package/src/webhook/stale-call-reaper.ts +0 -38
  98. package/src/webhook/stream-frame-adapter.test.ts +0 -187
  99. package/src/webhook/stream-frame-adapter.ts +0 -219
  100. package/src/webhook/tailscale.test.ts +0 -216
  101. package/src/webhook/tailscale.ts +0 -129
  102. package/src/webhook-exposure.test.ts +0 -33
  103. package/src/webhook-exposure.ts +0 -84
  104. package/src/webhook-security.test.ts +0 -813
  105. package/src/webhook-security.ts +0 -982
  106. package/src/webhook.hangup-once.lifecycle.test.ts +0 -179
  107. package/src/webhook.test.ts +0 -1615
  108. package/src/webhook.ts +0 -933
  109. package/src/webhook.types.ts +0 -5
  110. package/src/websocket-test-support.ts +0 -72
  111. package/tsconfig.json +0 -16
@@ -1,1382 +0,0 @@
1
- import { randomUUID } from "node:crypto";
2
- import http from "node:http";
3
- import type { Duplex } from "node:stream";
4
- import type { KlawConfig } from "klaw/plugin-sdk/config-contracts";
5
- import { formatErrorMessage } from "klaw/plugin-sdk/error-runtime";
6
- import {
7
- buildRealtimeVoiceAgentConsultWorkingResponse,
8
- createTalkSessionController,
9
- createRealtimeVoiceBridgeSession,
10
- REALTIME_VOICE_AGENT_CONSULT_TOOL_NAME,
11
- recordTalkObservabilityEvent,
12
- type RealtimeVoiceBridgeSession,
13
- type RealtimeVoiceProviderConfig,
14
- type RealtimeVoiceProviderPlugin,
15
- type TalkEvent,
16
- type TalkEventInput,
17
- type TalkSessionController,
18
- } from "klaw/plugin-sdk/realtime-voice";
19
- import WebSocket, { WebSocketServer } from "ws";
20
- import type { VoiceCallRealtimeConfig } from "../config.js";
21
- import type { CallManager } from "../manager.js";
22
- import type { VoiceCallProvider } from "../providers/base.js";
23
- import type { CallRecord, NormalizedEvent } from "../types.js";
24
- import type { WebhookResponsePayload } from "../webhook.types.js";
25
- import { RealtimeAudioPacer, RealtimeMulawSpeechStartDetector } from "./realtime-audio-pacer.js";
26
- import {
27
- type StreamFrameAdapter,
28
- TelnyxStreamFrameAdapter,
29
- TwilioStreamFrameAdapter,
30
- } from "./stream-frame-adapter.js";
31
-
32
- export type ToolHandlerContext = {
33
- partialUserTranscript?: string;
34
- };
35
- export type ToolHandlerFn = (
36
- args: unknown,
37
- callId: string,
38
- context: ToolHandlerContext,
39
- ) => Promise<unknown>;
40
-
41
- const STREAM_TOKEN_TTL_MS = 30_000;
42
- const DEFAULT_HOST = "localhost:8443";
43
- const MAX_REALTIME_MESSAGE_BYTES = 256 * 1024;
44
- const MAX_REALTIME_WS_BUFFERED_BYTES = 1024 * 1024;
45
- const FORCED_CONSULT_FALLBACK_DELAY_MS = 200;
46
- const FORCED_CONSULT_NATIVE_DEDUPE_MS = 2_000;
47
- const FORCED_CONSULT_RESULT_MAX_CHARS = 1800;
48
- const CONSULT_TRANSCRIPT_SETTLE_MS = 350;
49
- const CONSULT_TRANSCRIPT_SETTLE_MAX_MS = 1_000;
50
- const MAX_PARTIAL_USER_TRANSCRIPT_CHARS = 1_200;
51
- const RECENT_FINAL_USER_TRANSCRIPT_TTL_MS = 2_000;
52
- const BARGE_IN_REQUIRED_LOUD_CHUNKS = 2;
53
-
54
- function normalizePath(pathname: string): string {
55
- const trimmed = pathname.trim();
56
- if (!trimmed) {
57
- return "/";
58
- }
59
- const prefixed = trimmed.startsWith("/") ? trimmed : `/${trimmed}`;
60
- if (prefixed === "/") {
61
- return prefixed;
62
- }
63
- return prefixed.endsWith("/") ? prefixed.slice(0, -1) : prefixed;
64
- }
65
-
66
- function buildGreetingInstructions(
67
- baseInstructions: string | undefined,
68
- greeting: string | undefined,
69
- ): string | undefined {
70
- const trimmedGreeting = greeting?.trim();
71
- if (!trimmedGreeting) {
72
- return undefined;
73
- }
74
- const intro =
75
- "Start the call by greeting the caller naturally. Include this greeting in your first spoken reply:";
76
- return baseInstructions
77
- ? `${baseInstructions}\n\n${intro} "${trimmedGreeting}"`
78
- : `${intro} "${trimmedGreeting}"`;
79
- }
80
-
81
- function readSpeakableToolResultText(result: unknown): string | undefined {
82
- if (typeof result === "string") {
83
- return result.trim() || undefined;
84
- }
85
- if (!result || typeof result !== "object" || Array.isArray(result)) {
86
- return undefined;
87
- }
88
- const text = (result as { text?: unknown }).text;
89
- if (typeof text === "string" && text.trim()) {
90
- return text.trim();
91
- }
92
- const output = (result as { output?: unknown }).output;
93
- return typeof output === "string" && output.trim() ? output.trim() : undefined;
94
- }
95
-
96
- function readConsultArgText(args: unknown, key: string): string | undefined {
97
- if (!args || typeof args !== "object" || Array.isArray(args)) {
98
- return undefined;
99
- }
100
- const value = (args as Record<string, unknown>)[key];
101
- return typeof value === "string" && value.trim() ? value.trim() : undefined;
102
- }
103
-
104
- function readConsultQuestionText(args: unknown): string | undefined {
105
- return (
106
- readConsultArgText(args, "question") ??
107
- readConsultArgText(args, "prompt") ??
108
- readConsultArgText(args, "query") ??
109
- readConsultArgText(args, "task")
110
- );
111
- }
112
-
113
- function normalizeTranscriptText(text: string): string {
114
- return text.replace(/\s+/g, " ").trim();
115
- }
116
-
117
- function findTextOverlap(base: string, next: string): number {
118
- const max = Math.min(base.length, next.length);
119
- for (let size = max; size > 0; size -= 1) {
120
- if (base.slice(-size) === next.slice(0, size)) {
121
- return size;
122
- }
123
- }
124
- return 0;
125
- }
126
-
127
- function shouldInsertTranscriptSpace(base: string, next: string): boolean {
128
- if (!base || !next) {
129
- return false;
130
- }
131
- const last = base.at(-1);
132
- if (
133
- /\s$/.test(base) ||
134
- last === "(" ||
135
- last === "[" ||
136
- last === "{" ||
137
- last === '"' ||
138
- last === "'" ||
139
- /^[\s,.;:!?)]/.test(next)
140
- ) {
141
- return false;
142
- }
143
- return true;
144
- }
145
-
146
- function appendTranscriptText(base: string | undefined, fragment: string): string {
147
- const next = normalizeTranscriptText(fragment);
148
- if (!next) {
149
- return base ?? "";
150
- }
151
- const current = normalizeTranscriptText(base ?? "");
152
- if (!current) {
153
- return next;
154
- }
155
- const currentLower = current.toLowerCase();
156
- const nextLower = next.toLowerCase();
157
- if (currentLower === nextLower || currentLower.endsWith(nextLower)) {
158
- return current;
159
- }
160
- if (nextLower.startsWith(currentLower)) {
161
- return next;
162
- }
163
- const overlap = findTextOverlap(currentLower, nextLower);
164
- if (overlap >= 6 || (overlap >= 3 && next.length <= 12)) {
165
- return `${current}${next.slice(overlap)}`.trim();
166
- }
167
- const separator = shouldInsertTranscriptSpace(current, next) ? " " : "";
168
- return `${current}${separator}${next}`.trim();
169
- }
170
-
171
- function limitPartialUserTranscript(text: string): string {
172
- if (text.length <= MAX_PARTIAL_USER_TRANSCRIPT_CHARS) {
173
- return text;
174
- }
175
- const tail = text.slice(-MAX_PARTIAL_USER_TRANSCRIPT_CHARS);
176
- return tail.replace(/^\S+\s+/, "").trimStart() || tail.trimStart();
177
- }
178
-
179
- function withFallbackConsultQuestion(args: unknown, fallback: string | undefined): unknown {
180
- const providerQuestion = readConsultQuestionText(args);
181
- const question = fallback?.trim();
182
- if (providerQuestion) {
183
- if (
184
- question &&
185
- providerQuestion.length <= 40 &&
186
- question.length >= providerQuestion.length + 8
187
- ) {
188
- const context = readConsultArgText(args, "context");
189
- const fallbackContext = `Realtime provider supplied a shorter consult question: ${providerQuestion}`;
190
- return args && typeof args === "object" && !Array.isArray(args)
191
- ? {
192
- ...args,
193
- question,
194
- context: context ? `${context}\n\n${fallbackContext}` : fallbackContext,
195
- }
196
- : { question, context: fallbackContext };
197
- }
198
- return args;
199
- }
200
- if (!question) {
201
- return args;
202
- }
203
- return args && typeof args === "object" && !Array.isArray(args)
204
- ? { ...args, question }
205
- : { question };
206
- }
207
-
208
- function buildForcedConsultSpeechPrompt(result: string): string {
209
- const trimmed = result.trim();
210
- const bounded =
211
- trimmed.length <= FORCED_CONSULT_RESULT_MAX_CHARS
212
- ? trimmed
213
- : `${trimmed.slice(0, FORCED_CONSULT_RESULT_MAX_CHARS - 16).trimEnd()} [truncated]`;
214
- return [
215
- "Internal Klaw consult result is ready.",
216
- "Do not call tools for this internal result.",
217
- "Speak the following answer to the caller now, briefly and naturally:",
218
- bounded,
219
- ].join("\n");
220
- }
221
-
222
- type PendingStreamToken = {
223
- expiry: number;
224
- from?: string;
225
- to?: string;
226
- direction?: "inbound" | "outbound";
227
- providerName?: "twilio" | "telnyx";
228
- callId?: string;
229
- };
230
-
231
- export type StreamSessionRequest = {
232
- providerName?: "twilio" | "telnyx";
233
- callId?: string;
234
- from?: string;
235
- to?: string;
236
- direction?: "inbound" | "outbound";
237
- };
238
-
239
- export type StreamSession = {
240
- token: string;
241
- streamUrl: string;
242
- };
243
-
244
- type CallRegistration = {
245
- callId: string;
246
- initialGreetingInstructions?: string;
247
- };
248
-
249
- type ActiveRealtimeVoiceBridge = RealtimeVoiceBridgeSession;
250
-
251
- type RealtimeSpeakResult = {
252
- success: boolean;
253
- error?: string;
254
- };
255
-
256
- type ForcedConsultState = {
257
- promise: Promise<unknown>;
258
- sendSpeechPrompt: boolean;
259
- completedAt?: number;
260
- };
261
-
262
- type NativeConsultState = {
263
- startedAt: number;
264
- promise: Promise<unknown>;
265
- partialUserTranscript?: string;
266
- };
267
-
268
- type TelephonyCloseReason = "completed" | "error";
269
-
270
- function appendRecentTalkEventMetadata(
271
- call: CallRecord | null | undefined,
272
- event: TalkEvent,
273
- ): void {
274
- if (!call) {
275
- return;
276
- }
277
- const metadata = call.metadata ?? {};
278
- const previous = Array.isArray(metadata.recentTalkEvents) ? metadata.recentTalkEvents : [];
279
- metadata.lastTalkEventAt = event.timestamp;
280
- metadata.lastTalkEventType = event.type;
281
- metadata.recentTalkEvents = [
282
- ...previous,
283
- {
284
- id: event.id,
285
- brain: event.brain,
286
- mode: event.mode,
287
- provider: event.provider,
288
- seq: event.seq,
289
- sessionId: event.sessionId,
290
- timestamp: event.timestamp,
291
- transport: event.transport,
292
- type: event.type,
293
- ...(event.turnId ? { turnId: event.turnId } : {}),
294
- ...(event.final !== undefined ? { final: event.final } : {}),
295
- },
296
- ].slice(-12);
297
- call.metadata = metadata;
298
- }
299
-
300
- export class RealtimeCallHandler {
301
- private readonly toolHandlers = new Map<string, ToolHandlerFn>();
302
- private readonly pendingStreamTokens = new Map<string, PendingStreamToken>();
303
- private readonly activeBridgesByCallId = new Map<string, ActiveRealtimeVoiceBridge>();
304
- private readonly activeTelephonyClosersByCallId = new Map<
305
- string,
306
- (reason: TelephonyCloseReason) => void
307
- >();
308
- private readonly partialUserTranscriptsByCallId = new Map<string, string>();
309
- private readonly partialUserTranscriptUpdatedAtByCallId = new Map<string, number>();
310
- private readonly recentFinalUserTranscriptsByCallId = new Map<string, string>();
311
- private readonly recentFinalUserTranscriptTimersByCallId = new Map<
312
- string,
313
- ReturnType<typeof setTimeout>
314
- >();
315
- private readonly forcedConsultTimersByCallId = new Map<string, ReturnType<typeof setTimeout>>();
316
- private readonly forcedConsultInFlightByCallId = new Set<string>();
317
- private readonly forcedConsultsByCallId = new Map<string, ForcedConsultState>();
318
- private readonly lastProviderConsultAtByCallId = new Map<string, number>();
319
- private readonly nativeConsultsInFlightByCallId = new Map<string, NativeConsultState>();
320
- private publicOrigin: string | null = null;
321
- private publicPathPrefix = "";
322
-
323
- constructor(
324
- private readonly config: VoiceCallRealtimeConfig,
325
- private readonly manager: CallManager,
326
- private readonly provider: VoiceCallProvider,
327
- private readonly realtimeProvider: RealtimeVoiceProviderPlugin,
328
- private readonly providerConfig: RealtimeVoiceProviderConfig,
329
- private readonly servePath: string,
330
- private readonly coreConfig?: KlawConfig,
331
- ) {}
332
-
333
- setPublicUrl(url: string): void {
334
- try {
335
- const parsed = new URL(url);
336
- this.publicOrigin = parsed.host;
337
- const normalizedServePath = normalizePath(this.servePath);
338
- const normalizedPublicPath = normalizePath(parsed.pathname);
339
- const idx = normalizedPublicPath.indexOf(normalizedServePath);
340
- this.publicPathPrefix = idx > 0 ? normalizedPublicPath.slice(0, idx) : "";
341
- } catch {
342
- this.publicOrigin = null;
343
- this.publicPathPrefix = "";
344
- }
345
- }
346
-
347
- getStreamPathPattern(): string {
348
- return `${this.publicPathPrefix}${normalizePath(this.config.streamPath ?? "/voice/stream/realtime")}`;
349
- }
350
-
351
- buildTwiMLPayload(req: http.IncomingMessage, params?: URLSearchParams): WebhookResponsePayload {
352
- const rawDirection = params?.get("Direction");
353
- const previousOrigin = this.publicOrigin;
354
- if (!previousOrigin) {
355
- this.publicOrigin = req.headers.host ?? DEFAULT_HOST;
356
- }
357
- try {
358
- const { streamUrl } = this.issueStreamSession({
359
- providerName: "twilio",
360
- from: params?.get("From") ?? undefined,
361
- to: params?.get("To") ?? undefined,
362
- direction: rawDirection?.startsWith("outbound") ? "outbound" : "inbound",
363
- });
364
- const twiml = `<?xml version="1.0" encoding="UTF-8"?>
365
- <Response>
366
- <Connect>
367
- <Stream url="${streamUrl}" />
368
- </Connect>
369
- </Response>`;
370
- return {
371
- statusCode: 200,
372
- headers: { "Content-Type": "text/xml" },
373
- body: twiml,
374
- };
375
- } finally {
376
- this.publicOrigin = previousOrigin;
377
- }
378
- }
379
-
380
- handleWebSocketUpgrade(request: http.IncomingMessage, socket: Duplex, head: Buffer): void {
381
- const url = new URL(request.url ?? "/", "wss://localhost");
382
- const token = url.pathname.split("/").pop() ?? null;
383
- const callerMeta = token ? this.consumeStreamToken(token) : null;
384
- if (!callerMeta) {
385
- socket.write("HTTP/1.1 401 Unauthorized\r\n\r\n");
386
- socket.destroy();
387
- return;
388
- }
389
-
390
- const providerName = callerMeta.providerName ?? "twilio";
391
- const adapter: StreamFrameAdapter =
392
- providerName === "telnyx" ? new TelnyxStreamFrameAdapter() : new TwilioStreamFrameAdapter();
393
-
394
- const wss = new WebSocketServer({
395
- noServer: true,
396
- // Reject oversized realtime frames before JSON parsing or bridge setup runs.
397
- maxPayload: MAX_REALTIME_MESSAGE_BYTES,
398
- });
399
- wss.handleUpgrade(request, socket, head, (ws) => {
400
- let bridge: ActiveRealtimeVoiceBridge | null = null;
401
- let initialized = false;
402
- let activeCallSid = "unknown";
403
- let stopReceived = false;
404
- let lastMediaTimestamp: number | undefined;
405
- let lastMediaGapWarnAt = 0;
406
-
407
- ws.on("message", (data: Buffer) => {
408
- try {
409
- const frame = adapter.parseInbound(data.toString());
410
- if (frame.kind === "ignored") {
411
- return;
412
- }
413
- if (frame.kind === "start") {
414
- if (initialized) {
415
- return;
416
- }
417
- initialized = true;
418
- activeCallSid = frame.providerCallId;
419
- const nextBridge = this.handleCall(
420
- frame.streamId,
421
- frame.providerCallId,
422
- ws,
423
- callerMeta,
424
- adapter,
425
- );
426
- if (!nextBridge) {
427
- return;
428
- }
429
- bridge = nextBridge;
430
- return;
431
- }
432
- if (!bridge) {
433
- return;
434
- }
435
- if (frame.kind === "media") {
436
- const audio = Buffer.from(frame.payloadBase64, "base64");
437
- bridge.sendAudio(audio);
438
- if (frame.timestampMs !== undefined) {
439
- if (lastMediaTimestamp !== undefined) {
440
- const gapMs = frame.timestampMs - lastMediaTimestamp;
441
- const now = Date.now();
442
- if ((gapMs > 120 || gapMs < 0) && now - lastMediaGapWarnAt > 5_000) {
443
- lastMediaGapWarnAt = now;
444
- console.warn(
445
- `[voice-call] realtime media timestamp gap providerCallId=${activeCallSid} gapMs=${gapMs} timestamp=${frame.timestampMs}`,
446
- );
447
- }
448
- }
449
- lastMediaTimestamp = frame.timestampMs;
450
- bridge.setMediaTimestamp(frame.timestampMs);
451
- }
452
- return;
453
- }
454
- if (frame.kind === "mark") {
455
- bridge.acknowledgeMark();
456
- return;
457
- }
458
- if (frame.kind === "error") {
459
- console.error(
460
- `[voice-call] realtime WS error frame providerCallId=${activeCallSid} code=${frame.code ?? "?"} title=${frame.title ?? ""} detail=${frame.detail ?? ""}`,
461
- );
462
- return;
463
- }
464
- if (frame.kind === "stop") {
465
- stopReceived = true;
466
- this.closeTelephonyBridge(activeCallSid, bridge, "completed");
467
- }
468
- } catch (error) {
469
- console.error("[voice-call] realtime WS parse failed:", error);
470
- }
471
- });
472
-
473
- ws.on("close", (code) => {
474
- const reason = stopReceived || code === 1000 || code === 1005 ? "completed" : "error";
475
- this.closeTelephonyBridge(activeCallSid, bridge, reason);
476
- });
477
-
478
- ws.on("error", (error) => {
479
- console.error("[voice-call] realtime WS error:", error);
480
- });
481
- });
482
- }
483
-
484
- registerToolHandler(name: string, fn: ToolHandlerFn): void {
485
- this.toolHandlers.set(name, fn);
486
- }
487
-
488
- speak(callId: string, instructions: string): RealtimeSpeakResult {
489
- const bridge = this.activeBridgesByCallId.get(callId);
490
- if (!bridge) {
491
- return { success: false, error: "No active realtime bridge for call" };
492
- }
493
- try {
494
- bridge.triggerGreeting(instructions);
495
- return { success: true };
496
- } catch (error) {
497
- return { success: false, error: formatErrorMessage(error) };
498
- }
499
- }
500
-
501
- issueStreamSession(request: StreamSessionRequest = {}): StreamSession {
502
- const token = this.issueStreamToken({
503
- providerName: request.providerName ?? "twilio",
504
- callId: request.callId,
505
- from: request.from,
506
- to: request.to,
507
- direction: request.direction,
508
- });
509
- const host = this.publicOrigin || DEFAULT_HOST;
510
- const streamUrl = `wss://${host}${this.getStreamPathPattern()}/${token}`;
511
- return { token, streamUrl };
512
- }
513
-
514
- private issueStreamToken(meta: Omit<PendingStreamToken, "expiry"> = {}): string {
515
- const token = randomUUID();
516
- this.pendingStreamTokens.set(token, { expiry: Date.now() + STREAM_TOKEN_TTL_MS, ...meta });
517
- for (const [candidate, entry] of this.pendingStreamTokens) {
518
- if (Date.now() > entry.expiry) {
519
- this.pendingStreamTokens.delete(candidate);
520
- }
521
- }
522
- return token;
523
- }
524
-
525
- private consumeStreamToken(token: string): Omit<PendingStreamToken, "expiry"> | null {
526
- const entry = this.pendingStreamTokens.get(token);
527
- if (!entry) {
528
- return null;
529
- }
530
- this.pendingStreamTokens.delete(token);
531
- if (Date.now() > entry.expiry) {
532
- return null;
533
- }
534
- return {
535
- from: entry.from,
536
- to: entry.to,
537
- direction: entry.direction,
538
- providerName: entry.providerName,
539
- callId: entry.callId,
540
- };
541
- }
542
-
543
- private handleCall(
544
- streamSid: string,
545
- callSid: string,
546
- ws: WebSocket,
547
- callerMeta: Omit<PendingStreamToken, "expiry">,
548
- adapter: StreamFrameAdapter,
549
- ): ActiveRealtimeVoiceBridge | null {
550
- const registration = this.registerCallInManager(callSid, callerMeta);
551
- if (!registration) {
552
- ws.close(1008, "Caller rejected by policy");
553
- return null;
554
- }
555
-
556
- const { callId, initialGreetingInstructions } = registration;
557
- const callRecord = this.manager.getCallByProviderCallId(callSid);
558
- const talk: TalkSessionController = createTalkSessionController(
559
- {
560
- sessionId: `voice-call:${callId}:realtime`,
561
- mode: "realtime",
562
- transport: "gateway-relay",
563
- brain: "agent-consult",
564
- provider: this.realtimeProvider.id,
565
- },
566
- { onEvent: recordTalkObservabilityEvent },
567
- );
568
- const rememberTalkEvent = (event: TalkEvent | undefined): TalkEvent | undefined => {
569
- if (event) {
570
- appendRecentTalkEventMetadata(callRecord, event);
571
- }
572
- return event;
573
- };
574
- const emitTalkEvent = (input: TalkEventInput): TalkEvent => {
575
- return rememberTalkEvent(talk.emit(input)) as TalkEvent;
576
- };
577
- const ensureTalkTurn = (): string => {
578
- const turn = talk.ensureTurn({
579
- payload: { callId, providerCallId: callSid },
580
- });
581
- rememberTalkEvent(turn.event);
582
- return turn.turnId;
583
- };
584
- const endTalkTurn = (reason = "completed"): void => {
585
- const ended = talk.endTurn({
586
- payload: { callId, providerCallId: callSid, reason },
587
- });
588
- if (ended.ok) {
589
- rememberTalkEvent(ended.event);
590
- }
591
- };
592
- const finishOutputAudio = (reason: string): void => {
593
- rememberTalkEvent(
594
- talk.finishOutputAudio({
595
- payload: { callId, providerCallId: callSid, reason },
596
- }),
597
- );
598
- };
599
- emitTalkEvent({
600
- type: "session.started",
601
- payload: { callId, providerCallId: callSid, streamSid },
602
- });
603
- console.log(
604
- `[voice-call] Realtime bridge starting for call ${callId} (providerCallId=${callSid}, initialGreeting=${initialGreetingInstructions ? "queued" : "absent"})`,
605
- );
606
- let callEndEmitted = false;
607
- const emitCallEnd = (reason: "completed" | "error") => {
608
- if (callEndEmitted) {
609
- return;
610
- }
611
- callEndEmitted = true;
612
- this.endCallInManager(callSid, callId, reason);
613
- };
614
-
615
- const sendString = (message: string): boolean => {
616
- if (ws.readyState !== WebSocket.OPEN) {
617
- return false;
618
- }
619
- if (ws.bufferedAmount > MAX_REALTIME_WS_BUFFERED_BYTES) {
620
- console.warn(
621
- `[voice-call] realtime outbound websocket backpressure before send callId=${callId} providerCallId=${callSid} bufferedBytes=${ws.bufferedAmount}`,
622
- );
623
- ws.close(1013, "Backpressure: send buffer exceeded");
624
- return false;
625
- }
626
- ws.send(message);
627
- if (ws.bufferedAmount > MAX_REALTIME_WS_BUFFERED_BYTES) {
628
- console.warn(
629
- `[voice-call] realtime outbound websocket backpressure after send callId=${callId} providerCallId=${callSid} bufferedBytes=${ws.bufferedAmount}`,
630
- );
631
- ws.close(1013, "Backpressure: send buffer exceeded");
632
- return false;
633
- }
634
- return true;
635
- };
636
- const audioPacer = new RealtimeAudioPacer({
637
- send: sendString,
638
- serializer: {
639
- media: (payload) => adapter.serializeMedia(payload),
640
- clear: () => adapter.serializeClear(),
641
- mark: (name) => adapter.serializeMark(name),
642
- },
643
- onBackpressure: () => {
644
- console.warn(
645
- `[voice-call] realtime paced audio backpressure callId=${callId} providerCallId=${callSid}`,
646
- );
647
- if (ws.readyState === WebSocket.OPEN) {
648
- ws.close(1013, "Backpressure: paced audio queue exceeded");
649
- }
650
- },
651
- });
652
- const speechDetector = new RealtimeMulawSpeechStartDetector({
653
- requiredLoudChunks: BARGE_IN_REQUIRED_LOUD_CHUNKS,
654
- });
655
- const session = createRealtimeVoiceBridgeSession({
656
- provider: this.realtimeProvider,
657
- cfg: this.coreConfig,
658
- providerConfig: this.providerConfig,
659
- instructions: this.config.instructions,
660
- tools: this.config.tools,
661
- initialGreetingInstructions,
662
- triggerGreetingOnReady: Boolean(initialGreetingInstructions),
663
- audioSink: {
664
- isOpen: () => ws.readyState === WebSocket.OPEN,
665
- sendAudio: (muLaw) => {
666
- const turnId = ensureTalkTurn();
667
- rememberTalkEvent(
668
- talk.startOutputAudio({
669
- turnId,
670
- payload: { callId, providerCallId: callSid },
671
- }).event,
672
- );
673
- emitTalkEvent({
674
- type: "output.audio.delta",
675
- turnId,
676
- payload: { byteLength: muLaw.length },
677
- });
678
- audioPacer.sendAudio(muLaw);
679
- },
680
- clearAudio: () => {
681
- const clearedBytes = audioPacer.clearAudio();
682
- console.log(
683
- `[voice-call] realtime outbound audio clear requested callId=${callId} providerCallId=${callSid} queuedBytes=${clearedBytes}`,
684
- );
685
- finishOutputAudio("clear");
686
- },
687
- sendMark: (markName) => {
688
- audioPacer.sendMark(markName);
689
- },
690
- },
691
- onTranscript: (role, text, isFinal) => {
692
- const turnId = ensureTalkTurn();
693
- const eventType =
694
- role === "assistant"
695
- ? isFinal
696
- ? "output.text.done"
697
- : "output.text.delta"
698
- : isFinal
699
- ? "transcript.done"
700
- : "transcript.delta";
701
- const payload = role === "assistant" ? { text } : { role, text };
702
- emitTalkEvent({
703
- type: eventType,
704
- turnId,
705
- payload,
706
- final: isFinal,
707
- });
708
- if (role === "user" && isFinal) {
709
- emitTalkEvent({
710
- type: "input.audio.committed",
711
- turnId,
712
- payload: { callId, providerCallId: callSid },
713
- final: true,
714
- });
715
- }
716
- if (!isFinal) {
717
- if (role === "user" && text.trim()) {
718
- const transcript = this.recordPartialUserTranscript(callId, text);
719
- console.log(
720
- `[voice-call] realtime input transcript callId=${callId} providerCallId=${callSid} final=false chars=${text.trim().length} aggregateChars=${transcript.length}`,
721
- );
722
- }
723
- return;
724
- }
725
- if (role === "user") {
726
- const transcript = this.recordPartialUserTranscript(callId, text);
727
- this.clearPartialUserTranscript(callId);
728
- this.setRecentFinalUserTranscript(callId, transcript);
729
- console.log(
730
- `[voice-call] realtime input transcript callId=${callId} providerCallId=${callSid} final=true chars=${text.trim().length} aggregateChars=${transcript.length}`,
731
- );
732
- const event: NormalizedEvent = {
733
- id: `realtime-speech-${callSid}-${Date.now()}`,
734
- type: "call.speech",
735
- callId,
736
- providerCallId: callSid,
737
- timestamp: Date.now(),
738
- transcript,
739
- isFinal: true,
740
- };
741
- this.manager.processEvent(event);
742
- this.scheduleForcedAgentConsult({
743
- session,
744
- callId,
745
- callSid,
746
- transcript,
747
- clearAudio: () => {
748
- const clearedBytes = audioPacer.clearAudio();
749
- console.log(
750
- `[voice-call] realtime forced consult cleared outbound audio callId=${callId} providerCallId=${callSid} queuedBytes=${clearedBytes}`,
751
- );
752
- },
753
- });
754
- return;
755
- }
756
- this.manager.processEvent({
757
- id: `realtime-bot-${callSid}-${Date.now()}`,
758
- type: "call.speaking",
759
- callId,
760
- providerCallId: callSid,
761
- timestamp: Date.now(),
762
- text,
763
- });
764
- },
765
- onToolCall: (toolEvent, session) => {
766
- const turnId = ensureTalkTurn();
767
- emitTalkEvent({
768
- type: "tool.call",
769
- turnId,
770
- itemId: toolEvent.itemId,
771
- callId: toolEvent.callId,
772
- payload: { name: toolEvent.name, args: toolEvent.args },
773
- });
774
- console.log(
775
- `[voice-call] realtime tool call received callId=${callId} providerCallId=${callSid} tool=${toolEvent.name}`,
776
- );
777
- void this.executeToolCall(
778
- session,
779
- callId,
780
- toolEvent.callId || toolEvent.itemId,
781
- toolEvent.name,
782
- toolEvent.args,
783
- turnId,
784
- emitTalkEvent,
785
- );
786
- },
787
- onEvent: (event) => {
788
- if (event.type === "input_audio_buffer.speech_started") {
789
- ensureTalkTurn();
790
- return;
791
- }
792
- if (event.type === "input_audio_buffer.speech_stopped") {
793
- const turnId = talk.activeTurnId;
794
- if (!turnId) {
795
- return;
796
- }
797
- emitTalkEvent({
798
- type: "input.audio.committed",
799
- turnId,
800
- payload: { callId, providerCallId: callSid, source: event.type },
801
- final: true,
802
- });
803
- return;
804
- }
805
- if (event.type === "response.done") {
806
- finishOutputAudio("response.done");
807
- endTalkTurn("response.done");
808
- return;
809
- }
810
- if (event.type === "error") {
811
- emitTalkEvent({
812
- type: "session.error",
813
- payload: { message: event.detail ?? "Realtime provider error" },
814
- final: true,
815
- });
816
- }
817
- },
818
- onReady: () => {
819
- emitTalkEvent({
820
- type: "session.ready",
821
- payload: { callId, providerCallId: callSid },
822
- });
823
- },
824
- onError: (error) => {
825
- console.error("[voice-call] realtime voice error:", error.message);
826
- emitTalkEvent({
827
- type: "session.error",
828
- payload: { message: error.message },
829
- final: true,
830
- });
831
- },
832
- onClose: (reason) => {
833
- this.activeBridgesByCallId.delete(callId);
834
- this.activeBridgesByCallId.delete(callSid);
835
- this.activeTelephonyClosersByCallId.delete(callId);
836
- this.activeTelephonyClosersByCallId.delete(callSid);
837
- this.clearUserTranscriptState(callId);
838
- finishOutputAudio(reason);
839
- emitTalkEvent({
840
- type: "session.closed",
841
- payload: { reason },
842
- final: true,
843
- });
844
- if (reason !== "error") {
845
- return;
846
- }
847
- emitCallEnd("error");
848
- if (ws.readyState === WebSocket.OPEN) {
849
- ws.close(1011, "Bridge disconnected");
850
- }
851
- void this.provider
852
- .hangupCall({ callId, providerCallId: callSid, reason: "error" })
853
- .catch((error: unknown) => {
854
- console.warn(
855
- `[voice-call] Failed to hang up realtime call ${callSid}: ${formatErrorMessage(
856
- error,
857
- )}`,
858
- );
859
- });
860
- },
861
- });
862
- const closeTelephony = (reason: TelephonyCloseReason) => {
863
- emitCallEnd(reason);
864
- session.close();
865
- };
866
- this.activeBridgesByCallId.set(callId, session);
867
- this.activeBridgesByCallId.set(callSid, session);
868
- this.activeTelephonyClosersByCallId.set(callId, closeTelephony);
869
- this.activeTelephonyClosersByCallId.set(callSid, closeTelephony);
870
- const sendAudioToSession = session.sendAudio.bind(session);
871
- session.sendAudio = (audio) => {
872
- if (speechDetector.accept(audio)) {
873
- const interruptedTurnId = ensureTalkTurn();
874
- const clearedBytes = audioPacer.clearAudio();
875
- console.log(
876
- `[voice-call] realtime outbound audio cleared by barge-in callId=${callId} providerCallId=${callSid} queuedBytes=${clearedBytes}`,
877
- );
878
- finishOutputAudio("barge-in");
879
- const cancelled = talk.cancelTurn({
880
- turnId: interruptedTurnId,
881
- payload: { callId, providerCallId: callSid, reason: "barge-in" },
882
- });
883
- if (cancelled.ok) {
884
- rememberTalkEvent(cancelled.event);
885
- }
886
- }
887
- emitTalkEvent({
888
- type: "input.audio.delta",
889
- turnId: ensureTalkTurn(),
890
- payload: { byteLength: audio.length },
891
- });
892
- sendAudioToSession(audio);
893
- };
894
- const closeSession = session.close.bind(session);
895
- session.close = () => {
896
- this.activeBridgesByCallId.delete(callId);
897
- this.activeBridgesByCallId.delete(callSid);
898
- this.activeTelephonyClosersByCallId.delete(callId);
899
- this.activeTelephonyClosersByCallId.delete(callSid);
900
- this.clearUserTranscriptState(callId);
901
- this.clearForcedConsultState(callId);
902
- audioPacer.close();
903
- closeSession();
904
- };
905
-
906
- session.connect().catch((error: Error) => {
907
- console.error("[voice-call] Failed to connect realtime bridge:", error);
908
- session.close();
909
- emitCallEnd("error");
910
- ws.close(1011, "Failed to connect");
911
- });
912
-
913
- return session;
914
- }
915
-
916
- private recordPartialUserTranscript(callId: string, text: string): string {
917
- const current = this.partialUserTranscriptsByCallId.get(callId);
918
- const next = limitPartialUserTranscript(appendTranscriptText(current, text));
919
- this.partialUserTranscriptsByCallId.set(callId, next);
920
- this.partialUserTranscriptUpdatedAtByCallId.set(callId, Date.now());
921
- return next;
922
- }
923
-
924
- private clearPartialUserTranscript(callId: string): void {
925
- this.partialUserTranscriptsByCallId.delete(callId);
926
- this.partialUserTranscriptUpdatedAtByCallId.delete(callId);
927
- }
928
-
929
- private setRecentFinalUserTranscript(callId: string, text: string): void {
930
- this.clearRecentFinalUserTranscript(callId);
931
- this.recentFinalUserTranscriptsByCallId.set(callId, text);
932
- const timer = setTimeout(() => {
933
- if (this.recentFinalUserTranscriptsByCallId.get(callId) === text) {
934
- this.recentFinalUserTranscriptsByCallId.delete(callId);
935
- }
936
- this.recentFinalUserTranscriptTimersByCallId.delete(callId);
937
- }, RECENT_FINAL_USER_TRANSCRIPT_TTL_MS);
938
- timer.unref?.();
939
- this.recentFinalUserTranscriptTimersByCallId.set(callId, timer);
940
- }
941
-
942
- private clearRecentFinalUserTranscript(callId: string): void {
943
- const timer = this.recentFinalUserTranscriptTimersByCallId.get(callId);
944
- if (timer) {
945
- clearTimeout(timer);
946
- this.recentFinalUserTranscriptTimersByCallId.delete(callId);
947
- }
948
- this.recentFinalUserTranscriptsByCallId.delete(callId);
949
- }
950
-
951
- private clearUserTranscriptState(callId: string): void {
952
- this.clearPartialUserTranscript(callId);
953
- this.clearRecentFinalUserTranscript(callId);
954
- }
955
-
956
- private resolveUserTranscriptContext(callId: string): string | undefined {
957
- return (
958
- this.partialUserTranscriptsByCallId.get(callId) ??
959
- this.recentFinalUserTranscriptsByCallId.get(callId)
960
- );
961
- }
962
-
963
- private consumePartialUserTranscript(callId: string, consumed: string | undefined): void {
964
- const text = consumed?.trim();
965
- if (!text) {
966
- return;
967
- }
968
- const current = this.partialUserTranscriptsByCallId.get(callId);
969
- if (!current) {
970
- return;
971
- }
972
- if (current === text) {
973
- this.clearPartialUserTranscript(callId);
974
- return;
975
- }
976
- if (current.toLowerCase().startsWith(text.toLowerCase())) {
977
- const remaining = current.slice(text.length).trimStart();
978
- if (remaining) {
979
- this.partialUserTranscriptsByCallId.set(callId, remaining);
980
- } else {
981
- this.clearPartialUserTranscript(callId);
982
- }
983
- }
984
- const recent = this.recentFinalUserTranscriptsByCallId.get(callId);
985
- if (!recent) {
986
- return;
987
- }
988
- if (recent === text || recent.toLowerCase().startsWith(text.toLowerCase())) {
989
- this.clearRecentFinalUserTranscript(callId);
990
- }
991
- }
992
-
993
- private async waitForConsultTranscriptSettle(callId: string, startedAt: number): Promise<void> {
994
- const deadline = startedAt + CONSULT_TRANSCRIPT_SETTLE_MAX_MS;
995
- while (true) {
996
- const updatedAt = this.partialUserTranscriptUpdatedAtByCallId.get(callId);
997
- if (!updatedAt) {
998
- return;
999
- }
1000
- const now = Date.now();
1001
- const quietFor = now - updatedAt;
1002
- if (quietFor >= CONSULT_TRANSCRIPT_SETTLE_MS || now >= deadline) {
1003
- return;
1004
- }
1005
- await new Promise((resolve) =>
1006
- setTimeout(resolve, Math.min(CONSULT_TRANSCRIPT_SETTLE_MS - quietFor, deadline - now)),
1007
- );
1008
- }
1009
- }
1010
-
1011
- private clearForcedConsultState(callId: string): void {
1012
- const timer = this.forcedConsultTimersByCallId.get(callId);
1013
- if (timer) {
1014
- clearTimeout(timer);
1015
- this.forcedConsultTimersByCallId.delete(callId);
1016
- }
1017
- this.forcedConsultInFlightByCallId.delete(callId);
1018
- this.forcedConsultsByCallId.delete(callId);
1019
- this.lastProviderConsultAtByCallId.delete(callId);
1020
- }
1021
-
1022
- private closeTelephonyBridge(
1023
- callIdOrSid: string,
1024
- bridge: ActiveRealtimeVoiceBridge | null,
1025
- reason: TelephonyCloseReason,
1026
- ): void {
1027
- const closer = this.activeTelephonyClosersByCallId.get(callIdOrSid);
1028
- if (closer) {
1029
- closer(reason);
1030
- return;
1031
- }
1032
- bridge?.close();
1033
- }
1034
-
1035
- private scheduleForcedAgentConsult(params: {
1036
- session: ActiveRealtimeVoiceBridge;
1037
- callId: string;
1038
- callSid: string;
1039
- transcript: string;
1040
- clearAudio: () => void;
1041
- }): void {
1042
- if (this.config.consultPolicy !== "always") {
1043
- return;
1044
- }
1045
- const question = params.transcript.trim();
1046
- if (!question) {
1047
- return;
1048
- }
1049
- const handler = this.toolHandlers.get(REALTIME_VOICE_AGENT_CONSULT_TOOL_NAME);
1050
- if (!handler) {
1051
- return;
1052
- }
1053
- const existingTimer = this.forcedConsultTimersByCallId.get(params.callId);
1054
- if (existingTimer) {
1055
- clearTimeout(existingTimer);
1056
- }
1057
- const timer = setTimeout(() => {
1058
- this.forcedConsultTimersByCallId.delete(params.callId);
1059
- if (this.forcedConsultInFlightByCallId.has(params.callId)) {
1060
- return;
1061
- }
1062
- const lastProviderConsultAt = this.lastProviderConsultAtByCallId.get(params.callId) ?? 0;
1063
- if (Date.now() - lastProviderConsultAt < 2_000) {
1064
- return;
1065
- }
1066
- void this.runForcedAgentConsult({
1067
- ...params,
1068
- question,
1069
- handler,
1070
- });
1071
- }, FORCED_CONSULT_FALLBACK_DELAY_MS);
1072
- this.forcedConsultTimersByCallId.set(params.callId, timer);
1073
- }
1074
-
1075
- private async runForcedAgentConsult(params: {
1076
- session: ActiveRealtimeVoiceBridge;
1077
- callId: string;
1078
- callSid: string;
1079
- question: string;
1080
- clearAudio: () => void;
1081
- handler: ToolHandlerFn;
1082
- }): Promise<void> {
1083
- this.forcedConsultInFlightByCallId.add(params.callId);
1084
- const startedAt = Date.now();
1085
- console.log(
1086
- `[voice-call] realtime forced agent consult starting callId=${params.callId} providerCallId=${params.callSid} chars=${params.question.length}`,
1087
- );
1088
- params.clearAudio();
1089
- const state: ForcedConsultState = {
1090
- sendSpeechPrompt: true,
1091
- promise: Promise.resolve().then(() =>
1092
- params.handler(
1093
- {
1094
- question: params.question,
1095
- context:
1096
- "The realtime provider produced a final user transcript without invoking klaw_agent_consult, so Klaw is forcing the consult because consultPolicy is always.",
1097
- },
1098
- params.callId,
1099
- {},
1100
- ),
1101
- ),
1102
- };
1103
- this.forcedConsultsByCallId.set(params.callId, state);
1104
- try {
1105
- const result = await state.promise;
1106
- state.completedAt = Date.now();
1107
- const text = readSpeakableToolResultText(result);
1108
- if (!text) {
1109
- console.warn(
1110
- `[voice-call] realtime forced agent consult returned no speakable text callId=${params.callId} providerCallId=${params.callSid}`,
1111
- );
1112
- return;
1113
- }
1114
- if (state.sendSpeechPrompt) {
1115
- params.clearAudio();
1116
- params.session.sendUserMessage(buildForcedConsultSpeechPrompt(text));
1117
- }
1118
- console.log(
1119
- `[voice-call] realtime forced agent consult completed callId=${params.callId} providerCallId=${params.callSid} elapsedMs=${Date.now() - startedAt}`,
1120
- );
1121
- this.consumePartialUserTranscript(params.callId, params.question);
1122
- } catch (error) {
1123
- console.warn(
1124
- `[voice-call] realtime forced agent consult failed callId=${params.callId} providerCallId=${params.callSid} error=${formatErrorMessage(error)}`,
1125
- );
1126
- } finally {
1127
- this.forcedConsultInFlightByCallId.delete(params.callId);
1128
- const cleanupTimer = setTimeout(() => {
1129
- if (this.forcedConsultsByCallId.get(params.callId) === state) {
1130
- this.forcedConsultsByCallId.delete(params.callId);
1131
- }
1132
- }, FORCED_CONSULT_NATIVE_DEDUPE_MS);
1133
- cleanupTimer.unref?.();
1134
- }
1135
- }
1136
-
1137
- private registerCallInManager(
1138
- callSid: string,
1139
- callerMeta: Omit<PendingStreamToken, "expiry"> = {},
1140
- ): CallRegistration | null {
1141
- const timestamp = Date.now();
1142
- const baseFields = {
1143
- providerCallId: callSid,
1144
- timestamp,
1145
- direction: callerMeta.direction ?? "inbound",
1146
- ...(callerMeta.from ? { from: callerMeta.from } : {}),
1147
- ...(callerMeta.to ? { to: callerMeta.to } : {}),
1148
- };
1149
-
1150
- const callRecord = this.resolveRealtimeCall(callSid, callerMeta, baseFields);
1151
- if (!callRecord) {
1152
- return null;
1153
- }
1154
-
1155
- const initialGreeting = this.extractInitialGreeting(callRecord);
1156
- console.log(
1157
- `[voice-call] Realtime call ${callRecord.callId} initial greeting ${initialGreeting ? "queued" : "absent"}`,
1158
- );
1159
- if (callRecord.metadata) {
1160
- delete callRecord.metadata.initialMessage;
1161
- }
1162
-
1163
- this.manager.processEvent({
1164
- id: `realtime-answered-${callSid}`,
1165
- callId: callRecord.callId,
1166
- type: "call.answered",
1167
- ...baseFields,
1168
- });
1169
-
1170
- return {
1171
- callId: callRecord.callId,
1172
- initialGreetingInstructions: buildGreetingInstructions(
1173
- this.config.instructions,
1174
- initialGreeting,
1175
- ),
1176
- };
1177
- }
1178
-
1179
- private resolveRealtimeCall(
1180
- callSid: string,
1181
- callerMeta: Omit<PendingStreamToken, "expiry">,
1182
- baseFields: {
1183
- providerCallId: string;
1184
- timestamp: number;
1185
- direction: "inbound" | "outbound";
1186
- from?: string;
1187
- to?: string;
1188
- },
1189
- ): CallRecord | null {
1190
- if (callerMeta.callId) {
1191
- const call = this.manager.getCall(callerMeta.callId);
1192
- return call?.providerCallId === callSid ? call : null;
1193
- }
1194
-
1195
- this.manager.processEvent({
1196
- id: `realtime-initiated-${callSid}`,
1197
- callId: callSid,
1198
- type: "call.initiated",
1199
- ...baseFields,
1200
- });
1201
-
1202
- return this.manager.getCallByProviderCallId(callSid) ?? null;
1203
- }
1204
-
1205
- private extractInitialGreeting(call: CallRecord): string | undefined {
1206
- return typeof call.metadata?.initialMessage === "string"
1207
- ? call.metadata.initialMessage
1208
- : undefined;
1209
- }
1210
-
1211
- private endCallInManager(callSid: string, callId: string, reason: "completed" | "error"): void {
1212
- this.manager.processEvent({
1213
- id: `realtime-ended-${callSid}-${Date.now()}`,
1214
- type: "call.ended",
1215
- callId,
1216
- providerCallId: callSid,
1217
- timestamp: Date.now(),
1218
- reason,
1219
- });
1220
- }
1221
-
1222
- private async executeToolCall(
1223
- bridge: ActiveRealtimeVoiceBridge,
1224
- callId: string,
1225
- bridgeCallId: string,
1226
- name: string,
1227
- args: unknown,
1228
- turnId: string,
1229
- emitTalkEvent?: (input: TalkEventInput) => TalkEvent,
1230
- ): Promise<void> {
1231
- const handler = this.toolHandlers.get(name);
1232
- const startedAt = Date.now();
1233
- const hasResultError = (result: unknown): boolean => {
1234
- return Boolean(
1235
- result && typeof result === "object" && !Array.isArray(result) && "error" in result,
1236
- );
1237
- };
1238
- const emitFinalToolEvent = (result: unknown): void => {
1239
- emitTalkEvent?.({
1240
- type: hasResultError(result) ? "tool.error" : "tool.result",
1241
- turnId,
1242
- callId: bridgeCallId,
1243
- payload: { name, result },
1244
- final: true,
1245
- });
1246
- };
1247
- const submitFinalToolResult = (result: unknown): void => {
1248
- bridge.submitToolResult(bridgeCallId, result);
1249
- emitFinalToolEvent(result);
1250
- };
1251
- const submitWorkingResponse = () => {
1252
- if (
1253
- handler &&
1254
- name === REALTIME_VOICE_AGENT_CONSULT_TOOL_NAME &&
1255
- bridge.bridge.supportsToolResultContinuation &&
1256
- !this.config.fastContext.enabled
1257
- ) {
1258
- emitTalkEvent?.({
1259
- type: "tool.progress",
1260
- turnId,
1261
- callId: bridgeCallId,
1262
- payload: { name, status: "working" },
1263
- });
1264
- bridge.submitToolResult(
1265
- bridgeCallId,
1266
- buildRealtimeVoiceAgentConsultWorkingResponse("caller"),
1267
- { willContinue: true },
1268
- );
1269
- }
1270
- };
1271
- if (name === REALTIME_VOICE_AGENT_CONSULT_TOOL_NAME) {
1272
- this.lastProviderConsultAtByCallId.set(callId, Date.now());
1273
- const timer = this.forcedConsultTimersByCallId.get(callId);
1274
- if (timer) {
1275
- clearTimeout(timer);
1276
- this.forcedConsultTimersByCallId.delete(callId);
1277
- }
1278
- const forcedConsult = this.forcedConsultsByCallId.get(callId);
1279
- if (forcedConsult) {
1280
- if (forcedConsult.completedAt) {
1281
- submitFinalToolResult({
1282
- status: "already_delivered",
1283
- message: "Klaw already delivered this consult result internally. Do not repeat it.",
1284
- });
1285
- return;
1286
- }
1287
- forcedConsult.sendSpeechPrompt = false;
1288
- const result = await forcedConsult.promise.catch((error: unknown) => ({
1289
- error: formatErrorMessage(error),
1290
- }));
1291
- submitFinalToolResult(result);
1292
- return;
1293
- }
1294
-
1295
- const existingNativeConsult = this.nativeConsultsInFlightByCallId.get(callId);
1296
- if (existingNativeConsult) {
1297
- console.log(
1298
- `[voice-call] realtime tool call sharing in-flight agent consult callId=${callId} ageMs=${Date.now() - existingNativeConsult.startedAt}`,
1299
- );
1300
- submitWorkingResponse();
1301
- submitFinalToolResult(await existingNativeConsult.promise);
1302
- return;
1303
- }
1304
-
1305
- submitWorkingResponse();
1306
- const state: NativeConsultState = {
1307
- startedAt,
1308
- promise: Promise.resolve(),
1309
- };
1310
- state.promise = (async () => {
1311
- await this.waitForConsultTranscriptSettle(callId, startedAt);
1312
- const context = {
1313
- partialUserTranscript: this.resolveUserTranscriptContext(callId),
1314
- };
1315
- state.partialUserTranscript = context.partialUserTranscript;
1316
- const handlerArgs = withFallbackConsultQuestion(args, context.partialUserTranscript);
1317
- console.log(
1318
- `[voice-call] realtime tool call executing callId=${callId} tool=${name} hasHandler=${Boolean(handler)}`,
1319
- );
1320
- return !handler
1321
- ? { error: `Tool "${name}" not available` }
1322
- : await handler(handlerArgs, callId, context);
1323
- })().catch((error: unknown) => ({
1324
- error: formatErrorMessage(error),
1325
- }));
1326
- this.nativeConsultsInFlightByCallId.set(callId, state);
1327
- try {
1328
- const result = await state.promise;
1329
- const status =
1330
- result && typeof result === "object" && !Array.isArray(result) && "error" in result
1331
- ? "error"
1332
- : "ok";
1333
- const error =
1334
- status === "error" && result && typeof result === "object" && !Array.isArray(result)
1335
- ? formatErrorMessage((result as { error?: unknown }).error ?? "unknown")
1336
- : undefined;
1337
- console.log(
1338
- `[voice-call] realtime tool call completed callId=${callId} tool=${name} status=${status} elapsedMs=${Date.now() - startedAt}${error ? ` error=${error}` : ""}`,
1339
- );
1340
- submitFinalToolResult(result);
1341
- if (status === "ok") {
1342
- this.consumePartialUserTranscript(callId, state.partialUserTranscript);
1343
- }
1344
- } finally {
1345
- if (this.nativeConsultsInFlightByCallId.get(callId) === state) {
1346
- this.nativeConsultsInFlightByCallId.delete(callId);
1347
- }
1348
- }
1349
- return;
1350
- }
1351
- console.log(
1352
- `[voice-call] realtime tool call executing callId=${callId} tool=${name} hasHandler=${Boolean(handler)}`,
1353
- );
1354
- const context = {
1355
- partialUserTranscript: this.resolveUserTranscriptContext(callId),
1356
- };
1357
- const handlerArgs =
1358
- name === REALTIME_VOICE_AGENT_CONSULT_TOOL_NAME
1359
- ? withFallbackConsultQuestion(args, context.partialUserTranscript)
1360
- : args;
1361
- const result = !handler
1362
- ? { error: `Tool "${name}" not available` }
1363
- : await handler(handlerArgs, callId, context).catch((error: unknown) => ({
1364
- error: formatErrorMessage(error),
1365
- }));
1366
- const status =
1367
- result && typeof result === "object" && !Array.isArray(result) && "error" in result
1368
- ? "error"
1369
- : "ok";
1370
- const error =
1371
- status === "error" && result && typeof result === "object" && !Array.isArray(result)
1372
- ? formatErrorMessage((result as { error?: unknown }).error ?? "unknown")
1373
- : undefined;
1374
- console.log(
1375
- `[voice-call] realtime tool call completed callId=${callId} tool=${name} status=${status} elapsedMs=${Date.now() - startedAt}${error ? ` error=${error}` : ""}`,
1376
- );
1377
- submitFinalToolResult(result);
1378
- if (name === REALTIME_VOICE_AGENT_CONSULT_TOOL_NAME && status === "ok") {
1379
- this.consumePartialUserTranscript(callId, context.partialUserTranscript);
1380
- }
1381
- }
1382
- }