@openclaw/voice-call 2026.5.7 → 2026.5.9-beta.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1052 @@
1
+ import { formatErrorMessage } from "openclaw/plugin-sdk/error-runtime";
2
+ import { REALTIME_VOICE_AGENT_CONSULT_TOOL_NAME, buildRealtimeVoiceAgentConsultWorkingResponse, createRealtimeVoiceBridgeSession, createTalkSessionController, recordTalkObservabilityEvent } from "openclaw/plugin-sdk/realtime-voice";
3
+ import { randomUUID } from "node:crypto";
4
+ import WebSocket, { WebSocketServer } from "ws";
5
+ //#region extensions/voice-call/src/webhook/realtime-audio-pacer.ts
6
+ const TELEPHONY_SAMPLE_RATE = 8e3;
7
+ const TELEPHONY_CHUNK_BYTES = 160;
8
+ const TELEPHONY_CHUNK_MS = 20;
9
+ const DEFAULT_SPEECH_RMS_THRESHOLD = .035;
10
+ const DEFAULT_REQUIRED_LOUD_CHUNKS = 4;
11
+ const DEFAULT_REQUIRED_QUIET_CHUNKS = 12;
12
+ const DEFAULT_MAX_QUEUED_AUDIO_BYTES = TELEPHONY_SAMPLE_RATE * 120;
13
+ const PCM16_MAX_AMPLITUDE = 32768;
14
+ const MULAW_LINEAR_SAMPLES = new Int16Array(256);
15
+ for (let i = 0; i < MULAW_LINEAR_SAMPLES.length; i += 1) MULAW_LINEAR_SAMPLES[i] = decodeMulawSample(i);
16
+ var RealtimeTwilioAudioPacer = class {
17
+ constructor(params) {
18
+ this.params = params;
19
+ this.queue = [];
20
+ this.timer = null;
21
+ this.queuedAudioBytes = 0;
22
+ this.closed = false;
23
+ }
24
+ sendAudio(muLaw) {
25
+ if (this.closed || muLaw.length === 0) return;
26
+ const maxQueuedAudioBytes = this.params.maxQueuedAudioBytes ?? DEFAULT_MAX_QUEUED_AUDIO_BYTES;
27
+ for (let offset = 0; offset < muLaw.length; offset += TELEPHONY_CHUNK_BYTES) {
28
+ const chunk = Buffer.from(muLaw.subarray(offset, offset + TELEPHONY_CHUNK_BYTES));
29
+ if (this.queuedAudioBytes + chunk.length > maxQueuedAudioBytes) {
30
+ this.failBackpressure();
31
+ return;
32
+ }
33
+ this.queue.push({
34
+ type: "audio",
35
+ chunk,
36
+ durationMs: Math.max(1, Math.round(chunk.length / TELEPHONY_SAMPLE_RATE * 1e3))
37
+ });
38
+ this.queuedAudioBytes += chunk.length;
39
+ }
40
+ this.ensurePump();
41
+ }
42
+ sendMark(name) {
43
+ if (this.closed || !name) return;
44
+ this.queue.push({
45
+ type: "mark",
46
+ name
47
+ });
48
+ this.ensurePump();
49
+ }
50
+ clearAudio() {
51
+ if (this.closed) return 0;
52
+ const clearedAudioBytes = this.queuedAudioBytes;
53
+ this.clearTimer();
54
+ this.queue = [];
55
+ this.queuedAudioBytes = 0;
56
+ this.params.sendJson({
57
+ event: "clear",
58
+ streamSid: this.params.streamSid
59
+ });
60
+ return clearedAudioBytes;
61
+ }
62
+ close() {
63
+ this.closed = true;
64
+ this.clearTimer();
65
+ this.queue = [];
66
+ this.queuedAudioBytes = 0;
67
+ }
68
+ clearTimer() {
69
+ if (!this.timer) return;
70
+ clearTimeout(this.timer);
71
+ this.timer = null;
72
+ }
73
+ ensurePump() {
74
+ if (!this.timer) this.pump();
75
+ }
76
+ failBackpressure() {
77
+ this.close();
78
+ this.params.onBackpressure?.();
79
+ }
80
+ pump() {
81
+ this.timer = null;
82
+ if (this.closed) return;
83
+ const item = this.queue.shift();
84
+ if (!item) return;
85
+ let delayMs = 0;
86
+ let sent = true;
87
+ if (item.type === "audio") {
88
+ this.queuedAudioBytes = Math.max(0, this.queuedAudioBytes - item.chunk.length);
89
+ sent = this.params.sendJson({
90
+ event: "media",
91
+ streamSid: this.params.streamSid,
92
+ media: { payload: item.chunk.toString("base64") }
93
+ });
94
+ delayMs = item.durationMs || TELEPHONY_CHUNK_MS;
95
+ } else sent = this.params.sendJson({
96
+ event: "mark",
97
+ streamSid: this.params.streamSid,
98
+ mark: { name: item.name }
99
+ });
100
+ if (!sent) {
101
+ this.queue = [];
102
+ this.queuedAudioBytes = 0;
103
+ return;
104
+ }
105
+ if (this.queue.length > 0) this.timer = setTimeout(() => this.pump(), delayMs);
106
+ }
107
+ };
108
+ function calculateMulawRms(muLaw) {
109
+ if (muLaw.length === 0) return 0;
110
+ let sum = 0;
111
+ for (let i = 0; i < muLaw.length; i += 1) {
112
+ const normalized = (MULAW_LINEAR_SAMPLES[muLaw[i] ?? 0] ?? 0) / PCM16_MAX_AMPLITUDE;
113
+ sum += normalized * normalized;
114
+ }
115
+ return Math.sqrt(sum / muLaw.length);
116
+ }
117
+ var RealtimeMulawSpeechStartDetector = class {
118
+ constructor(params = {}) {
119
+ this.params = params;
120
+ this.loudChunks = 0;
121
+ this.quietChunks = DEFAULT_REQUIRED_QUIET_CHUNKS;
122
+ this.speaking = false;
123
+ }
124
+ accept(muLaw) {
125
+ if (calculateMulawRms(muLaw) >= (this.params.rmsThreshold ?? DEFAULT_SPEECH_RMS_THRESHOLD)) {
126
+ this.quietChunks = 0;
127
+ this.loudChunks += 1;
128
+ const requiredLoudChunks = this.params.requiredLoudChunks ?? DEFAULT_REQUIRED_LOUD_CHUNKS;
129
+ if (!this.speaking && this.loudChunks >= requiredLoudChunks) {
130
+ this.speaking = true;
131
+ return true;
132
+ }
133
+ return false;
134
+ }
135
+ this.loudChunks = 0;
136
+ this.quietChunks += 1;
137
+ const requiredQuietChunks = this.params.requiredQuietChunks ?? DEFAULT_REQUIRED_QUIET_CHUNKS;
138
+ if (this.quietChunks >= requiredQuietChunks) this.speaking = false;
139
+ return false;
140
+ }
141
+ };
142
+ function decodeMulawSample(value) {
143
+ const muLaw = ~value & 255;
144
+ const sign = muLaw & 128;
145
+ const exponent = muLaw >> 4 & 7;
146
+ let sample = ((muLaw & 15) << 3) + 132 << exponent;
147
+ sample -= 132;
148
+ return sign ? -sample : sample;
149
+ }
150
+ //#endregion
151
+ //#region extensions/voice-call/src/webhook/realtime-handler.ts
152
+ const STREAM_TOKEN_TTL_MS = 3e4;
153
+ const DEFAULT_HOST = "localhost:8443";
154
+ const MAX_REALTIME_MESSAGE_BYTES = 256 * 1024;
155
+ const MAX_REALTIME_WS_BUFFERED_BYTES = 1024 * 1024;
156
+ const FORCED_CONSULT_FALLBACK_DELAY_MS = 200;
157
+ const FORCED_CONSULT_NATIVE_DEDUPE_MS = 2e3;
158
+ const FORCED_CONSULT_RESULT_MAX_CHARS = 1800;
159
+ const CONSULT_TRANSCRIPT_SETTLE_MS = 350;
160
+ const CONSULT_TRANSCRIPT_SETTLE_MAX_MS = 1e3;
161
+ const MAX_PARTIAL_USER_TRANSCRIPT_CHARS = 1200;
162
+ const RECENT_FINAL_USER_TRANSCRIPT_TTL_MS = 2e3;
163
+ const BARGE_IN_REQUIRED_LOUD_CHUNKS = 2;
164
+ function normalizePath(pathname) {
165
+ const trimmed = pathname.trim();
166
+ if (!trimmed) return "/";
167
+ const prefixed = trimmed.startsWith("/") ? trimmed : `/${trimmed}`;
168
+ if (prefixed === "/") return prefixed;
169
+ return prefixed.endsWith("/") ? prefixed.slice(0, -1) : prefixed;
170
+ }
171
+ function buildGreetingInstructions(baseInstructions, greeting) {
172
+ const trimmedGreeting = greeting?.trim();
173
+ if (!trimmedGreeting) return;
174
+ const intro = "Start the call by greeting the caller naturally. Include this greeting in your first spoken reply:";
175
+ return baseInstructions ? `${baseInstructions}\n\n${intro} "${trimmedGreeting}"` : `${intro} "${trimmedGreeting}"`;
176
+ }
177
+ function readSpeakableToolResultText(result) {
178
+ if (typeof result === "string") return result.trim() || void 0;
179
+ if (!result || typeof result !== "object" || Array.isArray(result)) return;
180
+ const text = result.text;
181
+ if (typeof text === "string" && text.trim()) return text.trim();
182
+ const output = result.output;
183
+ return typeof output === "string" && output.trim() ? output.trim() : void 0;
184
+ }
185
+ function readConsultArgText(args, key) {
186
+ if (!args || typeof args !== "object" || Array.isArray(args)) return;
187
+ const value = args[key];
188
+ return typeof value === "string" && value.trim() ? value.trim() : void 0;
189
+ }
190
+ function readConsultQuestionText(args) {
191
+ return readConsultArgText(args, "question") ?? readConsultArgText(args, "prompt") ?? readConsultArgText(args, "query") ?? readConsultArgText(args, "task");
192
+ }
193
+ function normalizeTranscriptText(text) {
194
+ return text.replace(/\s+/g, " ").trim();
195
+ }
196
+ function findTextOverlap(base, next) {
197
+ const max = Math.min(base.length, next.length);
198
+ for (let size = max; size > 0; size -= 1) if (base.slice(-size) === next.slice(0, size)) return size;
199
+ return 0;
200
+ }
201
+ function shouldInsertTranscriptSpace(base, next) {
202
+ if (!base || !next) return false;
203
+ const last = base.at(-1);
204
+ if (/\s$/.test(base) || last === "(" || last === "[" || last === "{" || last === "\"" || last === "'" || /^[\s,.;:!?)]/.test(next)) return false;
205
+ return true;
206
+ }
207
+ function appendTranscriptText(base, fragment) {
208
+ const next = normalizeTranscriptText(fragment);
209
+ if (!next) return base ?? "";
210
+ const current = normalizeTranscriptText(base ?? "");
211
+ if (!current) return next;
212
+ const currentLower = current.toLowerCase();
213
+ const nextLower = next.toLowerCase();
214
+ if (currentLower === nextLower || currentLower.endsWith(nextLower)) return current;
215
+ if (nextLower.startsWith(currentLower)) return next;
216
+ const overlap = findTextOverlap(currentLower, nextLower);
217
+ if (overlap >= 6 || overlap >= 3 && next.length <= 12) return `${current}${next.slice(overlap)}`.trim();
218
+ return `${current}${shouldInsertTranscriptSpace(current, next) ? " " : ""}${next}`.trim();
219
+ }
220
+ function limitPartialUserTranscript(text) {
221
+ if (text.length <= MAX_PARTIAL_USER_TRANSCRIPT_CHARS) return text;
222
+ const tail = text.slice(-MAX_PARTIAL_USER_TRANSCRIPT_CHARS);
223
+ return tail.replace(/^\S+\s+/, "").trimStart() || tail.trimStart();
224
+ }
225
+ function withFallbackConsultQuestion(args, fallback) {
226
+ const providerQuestion = readConsultQuestionText(args);
227
+ const question = fallback?.trim();
228
+ if (providerQuestion) {
229
+ if (question && providerQuestion.length <= 40 && question.length >= providerQuestion.length + 8) {
230
+ const context = readConsultArgText(args, "context");
231
+ const fallbackContext = `Realtime provider supplied a shorter consult question: ${providerQuestion}`;
232
+ return args && typeof args === "object" && !Array.isArray(args) ? {
233
+ ...args,
234
+ question,
235
+ context: context ? `${context}\n\n${fallbackContext}` : fallbackContext
236
+ } : {
237
+ question,
238
+ context: fallbackContext
239
+ };
240
+ }
241
+ return args;
242
+ }
243
+ if (!question) return args;
244
+ return args && typeof args === "object" && !Array.isArray(args) ? {
245
+ ...args,
246
+ question
247
+ } : { question };
248
+ }
249
+ function buildForcedConsultSpeechPrompt(result) {
250
+ const trimmed = result.trim();
251
+ return [
252
+ "Internal OpenClaw consult result is ready.",
253
+ "Do not call tools for this internal result.",
254
+ "Speak the following answer to the caller now, briefly and naturally:",
255
+ trimmed.length <= FORCED_CONSULT_RESULT_MAX_CHARS ? trimmed : `${trimmed.slice(0, FORCED_CONSULT_RESULT_MAX_CHARS - 16).trimEnd()} [truncated]`
256
+ ].join("\n");
257
+ }
258
+ function appendRecentTalkEventMetadata(call, event) {
259
+ if (!call) return;
260
+ const metadata = call.metadata ?? {};
261
+ const previous = Array.isArray(metadata.recentTalkEvents) ? metadata.recentTalkEvents : [];
262
+ metadata.lastTalkEventAt = event.timestamp;
263
+ metadata.lastTalkEventType = event.type;
264
+ metadata.recentTalkEvents = [...previous, {
265
+ id: event.id,
266
+ brain: event.brain,
267
+ mode: event.mode,
268
+ provider: event.provider,
269
+ seq: event.seq,
270
+ sessionId: event.sessionId,
271
+ timestamp: event.timestamp,
272
+ transport: event.transport,
273
+ type: event.type,
274
+ ...event.turnId ? { turnId: event.turnId } : {},
275
+ ...event.final !== void 0 ? { final: event.final } : {}
276
+ }].slice(-12);
277
+ call.metadata = metadata;
278
+ }
279
+ var RealtimeCallHandler = class {
280
+ constructor(config, manager, provider, realtimeProvider, providerConfig, servePath) {
281
+ this.config = config;
282
+ this.manager = manager;
283
+ this.provider = provider;
284
+ this.realtimeProvider = realtimeProvider;
285
+ this.providerConfig = providerConfig;
286
+ this.servePath = servePath;
287
+ this.toolHandlers = /* @__PURE__ */ new Map();
288
+ this.pendingStreamTokens = /* @__PURE__ */ new Map();
289
+ this.activeBridgesByCallId = /* @__PURE__ */ new Map();
290
+ this.activeTelephonyClosersByCallId = /* @__PURE__ */ new Map();
291
+ this.partialUserTranscriptsByCallId = /* @__PURE__ */ new Map();
292
+ this.partialUserTranscriptUpdatedAtByCallId = /* @__PURE__ */ new Map();
293
+ this.recentFinalUserTranscriptsByCallId = /* @__PURE__ */ new Map();
294
+ this.recentFinalUserTranscriptTimersByCallId = /* @__PURE__ */ new Map();
295
+ this.forcedConsultTimersByCallId = /* @__PURE__ */ new Map();
296
+ this.forcedConsultInFlightByCallId = /* @__PURE__ */ new Set();
297
+ this.forcedConsultsByCallId = /* @__PURE__ */ new Map();
298
+ this.lastProviderConsultAtByCallId = /* @__PURE__ */ new Map();
299
+ this.nativeConsultsInFlightByCallId = /* @__PURE__ */ new Map();
300
+ this.publicOrigin = null;
301
+ this.publicPathPrefix = "";
302
+ }
303
+ setPublicUrl(url) {
304
+ try {
305
+ const parsed = new URL(url);
306
+ this.publicOrigin = parsed.host;
307
+ const normalizedServePath = normalizePath(this.servePath);
308
+ const normalizedPublicPath = normalizePath(parsed.pathname);
309
+ const idx = normalizedPublicPath.indexOf(normalizedServePath);
310
+ this.publicPathPrefix = idx > 0 ? normalizedPublicPath.slice(0, idx) : "";
311
+ } catch {
312
+ this.publicOrigin = null;
313
+ this.publicPathPrefix = "";
314
+ }
315
+ }
316
+ getStreamPathPattern() {
317
+ return `${this.publicPathPrefix}${normalizePath(this.config.streamPath ?? "/voice/stream/realtime")}`;
318
+ }
319
+ buildTwiMLPayload(req, params) {
320
+ const host = this.publicOrigin || req.headers.host || DEFAULT_HOST;
321
+ const rawDirection = params?.get("Direction");
322
+ const token = this.issueStreamToken({
323
+ from: params?.get("From") ?? void 0,
324
+ to: params?.get("To") ?? void 0,
325
+ direction: rawDirection?.startsWith("outbound") ? "outbound" : "inbound"
326
+ });
327
+ return {
328
+ statusCode: 200,
329
+ headers: { "Content-Type": "text/xml" },
330
+ body: `<?xml version="1.0" encoding="UTF-8"?>
331
+ <Response>
332
+ <Connect>
333
+ <Stream url="${`wss://${host}${this.getStreamPathPattern()}/${token}`}" />
334
+ </Connect>
335
+ </Response>`
336
+ };
337
+ }
338
+ handleWebSocketUpgrade(request, socket, head) {
339
+ const token = new URL(request.url ?? "/", "wss://localhost").pathname.split("/").pop() ?? null;
340
+ const callerMeta = token ? this.consumeStreamToken(token) : null;
341
+ if (!callerMeta) {
342
+ socket.write("HTTP/1.1 401 Unauthorized\r\n\r\n");
343
+ socket.destroy();
344
+ return;
345
+ }
346
+ new WebSocketServer({
347
+ noServer: true,
348
+ maxPayload: MAX_REALTIME_MESSAGE_BYTES
349
+ }).handleUpgrade(request, socket, head, (ws) => {
350
+ let bridge = null;
351
+ let initialized = false;
352
+ let activeCallSid = "unknown";
353
+ let stopReceived = false;
354
+ let lastMediaTimestamp;
355
+ let lastMediaGapWarnAt = 0;
356
+ ws.on("message", (data) => {
357
+ try {
358
+ const msg = JSON.parse(data.toString());
359
+ if (!initialized && msg.event === "start") {
360
+ initialized = true;
361
+ const startData = typeof msg.start === "object" && msg.start !== null ? msg.start : void 0;
362
+ const streamSid = typeof startData?.streamSid === "string" ? startData.streamSid : "unknown";
363
+ const callSid = typeof startData?.callSid === "string" ? startData.callSid : "unknown";
364
+ activeCallSid = callSid;
365
+ const nextBridge = this.handleCall(streamSid, callSid, ws, callerMeta);
366
+ if (!nextBridge) return;
367
+ bridge = nextBridge;
368
+ return;
369
+ }
370
+ if (!bridge) return;
371
+ const mediaData = typeof msg.media === "object" && msg.media !== null ? msg.media : void 0;
372
+ if (msg.event === "media" && typeof mediaData?.payload === "string") {
373
+ const audio = Buffer.from(mediaData.payload, "base64");
374
+ bridge.sendAudio(audio);
375
+ const mediaTimestamp = typeof mediaData.timestamp === "number" ? mediaData.timestamp : typeof mediaData.timestamp === "string" ? Number.parseInt(mediaData.timestamp, 10) : NaN;
376
+ if (Number.isFinite(mediaTimestamp)) {
377
+ if (lastMediaTimestamp !== void 0) {
378
+ const gapMs = mediaTimestamp - lastMediaTimestamp;
379
+ const now = Date.now();
380
+ if ((gapMs > 120 || gapMs < 0) && now - lastMediaGapWarnAt > 5e3) {
381
+ lastMediaGapWarnAt = now;
382
+ console.warn(`[voice-call] realtime media timestamp gap providerCallId=${activeCallSid} gapMs=${gapMs} timestamp=${mediaTimestamp}`);
383
+ }
384
+ }
385
+ lastMediaTimestamp = mediaTimestamp;
386
+ bridge.setMediaTimestamp(mediaTimestamp);
387
+ }
388
+ return;
389
+ }
390
+ if (msg.event === "mark") {
391
+ bridge.acknowledgeMark();
392
+ return;
393
+ }
394
+ if (msg.event === "stop") {
395
+ stopReceived = true;
396
+ this.closeTelephonyBridge(activeCallSid, bridge, "completed");
397
+ }
398
+ } catch (error) {
399
+ console.error("[voice-call] realtime WS parse failed:", error);
400
+ }
401
+ });
402
+ ws.on("close", (code) => {
403
+ const reason = stopReceived || code === 1e3 || code === 1005 ? "completed" : "error";
404
+ this.closeTelephonyBridge(activeCallSid, bridge, reason);
405
+ });
406
+ ws.on("error", (error) => {
407
+ console.error("[voice-call] realtime WS error:", error);
408
+ });
409
+ });
410
+ }
411
+ registerToolHandler(name, fn) {
412
+ this.toolHandlers.set(name, fn);
413
+ }
414
+ speak(callId, instructions) {
415
+ const bridge = this.activeBridgesByCallId.get(callId);
416
+ if (!bridge) return {
417
+ success: false,
418
+ error: "No active realtime bridge for call"
419
+ };
420
+ try {
421
+ bridge.triggerGreeting(instructions);
422
+ return { success: true };
423
+ } catch (error) {
424
+ return {
425
+ success: false,
426
+ error: formatErrorMessage(error)
427
+ };
428
+ }
429
+ }
430
+ issueStreamToken(meta = {}) {
431
+ const token = randomUUID();
432
+ this.pendingStreamTokens.set(token, {
433
+ expiry: Date.now() + STREAM_TOKEN_TTL_MS,
434
+ ...meta
435
+ });
436
+ for (const [candidate, entry] of this.pendingStreamTokens) if (Date.now() > entry.expiry) this.pendingStreamTokens.delete(candidate);
437
+ return token;
438
+ }
439
+ consumeStreamToken(token) {
440
+ const entry = this.pendingStreamTokens.get(token);
441
+ if (!entry) return null;
442
+ this.pendingStreamTokens.delete(token);
443
+ if (Date.now() > entry.expiry) return null;
444
+ return {
445
+ from: entry.from,
446
+ to: entry.to,
447
+ direction: entry.direction
448
+ };
449
+ }
450
+ handleCall(streamSid, callSid, ws, callerMeta) {
451
+ const registration = this.registerCallInManager(callSid, callerMeta);
452
+ if (!registration) {
453
+ ws.close(1008, "Caller rejected by policy");
454
+ return null;
455
+ }
456
+ const { callId, initialGreetingInstructions } = registration;
457
+ const callRecord = this.manager.getCallByProviderCallId(callSid);
458
+ const talk = createTalkSessionController({
459
+ sessionId: `voice-call:${callId}:realtime`,
460
+ mode: "realtime",
461
+ transport: "gateway-relay",
462
+ brain: "agent-consult",
463
+ provider: this.realtimeProvider.id
464
+ }, { onEvent: recordTalkObservabilityEvent });
465
+ const rememberTalkEvent = (event) => {
466
+ if (event) appendRecentTalkEventMetadata(callRecord, event);
467
+ return event;
468
+ };
469
+ const emitTalkEvent = (input) => {
470
+ return rememberTalkEvent(talk.emit(input));
471
+ };
472
+ const ensureTalkTurn = () => {
473
+ const turn = talk.ensureTurn({ payload: {
474
+ callId,
475
+ providerCallId: callSid
476
+ } });
477
+ rememberTalkEvent(turn.event);
478
+ return turn.turnId;
479
+ };
480
+ const endTalkTurn = (reason = "completed") => {
481
+ const ended = talk.endTurn({ payload: {
482
+ callId,
483
+ providerCallId: callSid,
484
+ reason
485
+ } });
486
+ if (ended.ok) rememberTalkEvent(ended.event);
487
+ };
488
+ const finishOutputAudio = (reason) => {
489
+ rememberTalkEvent(talk.finishOutputAudio({ payload: {
490
+ callId,
491
+ providerCallId: callSid,
492
+ reason
493
+ } }));
494
+ };
495
+ emitTalkEvent({
496
+ type: "session.started",
497
+ payload: {
498
+ callId,
499
+ providerCallId: callSid,
500
+ streamSid
501
+ }
502
+ });
503
+ console.log(`[voice-call] Realtime bridge starting for call ${callId} (providerCallId=${callSid}, initialGreeting=${initialGreetingInstructions ? "queued" : "absent"})`);
504
+ let callEndEmitted = false;
505
+ const emitCallEnd = (reason) => {
506
+ if (callEndEmitted) return;
507
+ callEndEmitted = true;
508
+ this.endCallInManager(callSid, callId, reason);
509
+ };
510
+ const sendJson = (message) => {
511
+ if (ws.readyState !== WebSocket.OPEN) return false;
512
+ if (ws.bufferedAmount > MAX_REALTIME_WS_BUFFERED_BYTES) {
513
+ console.warn(`[voice-call] realtime outbound websocket backpressure before send callId=${callId} providerCallId=${callSid} bufferedBytes=${ws.bufferedAmount}`);
514
+ ws.close(1013, "Backpressure: send buffer exceeded");
515
+ return false;
516
+ }
517
+ ws.send(JSON.stringify(message));
518
+ if (ws.bufferedAmount > MAX_REALTIME_WS_BUFFERED_BYTES) {
519
+ console.warn(`[voice-call] realtime outbound websocket backpressure after send callId=${callId} providerCallId=${callSid} bufferedBytes=${ws.bufferedAmount}`);
520
+ ws.close(1013, "Backpressure: send buffer exceeded");
521
+ return false;
522
+ }
523
+ return true;
524
+ };
525
+ const audioPacer = new RealtimeTwilioAudioPacer({
526
+ streamSid,
527
+ sendJson,
528
+ onBackpressure: () => {
529
+ console.warn(`[voice-call] realtime paced audio backpressure callId=${callId} providerCallId=${callSid}`);
530
+ if (ws.readyState === WebSocket.OPEN) ws.close(1013, "Backpressure: paced audio queue exceeded");
531
+ }
532
+ });
533
+ const speechDetector = new RealtimeMulawSpeechStartDetector({ requiredLoudChunks: BARGE_IN_REQUIRED_LOUD_CHUNKS });
534
+ const session = createRealtimeVoiceBridgeSession({
535
+ provider: this.realtimeProvider,
536
+ providerConfig: this.providerConfig,
537
+ instructions: this.config.instructions,
538
+ tools: this.config.tools,
539
+ initialGreetingInstructions,
540
+ triggerGreetingOnReady: Boolean(initialGreetingInstructions),
541
+ audioSink: {
542
+ isOpen: () => ws.readyState === WebSocket.OPEN,
543
+ sendAudio: (muLaw) => {
544
+ const turnId = ensureTalkTurn();
545
+ rememberTalkEvent(talk.startOutputAudio({
546
+ turnId,
547
+ payload: {
548
+ callId,
549
+ providerCallId: callSid
550
+ }
551
+ }).event);
552
+ emitTalkEvent({
553
+ type: "output.audio.delta",
554
+ turnId,
555
+ payload: { byteLength: muLaw.length }
556
+ });
557
+ audioPacer.sendAudio(muLaw);
558
+ },
559
+ clearAudio: () => {
560
+ const clearedBytes = audioPacer.clearAudio();
561
+ console.log(`[voice-call] realtime outbound audio clear requested callId=${callId} providerCallId=${callSid} queuedBytes=${clearedBytes}`);
562
+ finishOutputAudio("clear");
563
+ },
564
+ sendMark: (markName) => {
565
+ audioPacer.sendMark(markName);
566
+ }
567
+ },
568
+ onTranscript: (role, text, isFinal) => {
569
+ const turnId = ensureTalkTurn();
570
+ emitTalkEvent({
571
+ type: role === "assistant" ? isFinal ? "output.text.done" : "output.text.delta" : isFinal ? "transcript.done" : "transcript.delta",
572
+ turnId,
573
+ payload: role === "assistant" ? { text } : {
574
+ role,
575
+ text
576
+ },
577
+ final: isFinal
578
+ });
579
+ if (role === "user" && isFinal) emitTalkEvent({
580
+ type: "input.audio.committed",
581
+ turnId,
582
+ payload: {
583
+ callId,
584
+ providerCallId: callSid
585
+ },
586
+ final: true
587
+ });
588
+ if (!isFinal) {
589
+ if (role === "user" && text.trim()) {
590
+ const transcript = this.recordPartialUserTranscript(callId, text);
591
+ console.log(`[voice-call] realtime input transcript callId=${callId} providerCallId=${callSid} final=false chars=${text.trim().length} aggregateChars=${transcript.length}`);
592
+ }
593
+ return;
594
+ }
595
+ if (role === "user") {
596
+ const transcript = this.recordPartialUserTranscript(callId, text);
597
+ this.clearPartialUserTranscript(callId);
598
+ this.setRecentFinalUserTranscript(callId, transcript);
599
+ console.log(`[voice-call] realtime input transcript callId=${callId} providerCallId=${callSid} final=true chars=${text.trim().length} aggregateChars=${transcript.length}`);
600
+ const event = {
601
+ id: `realtime-speech-${callSid}-${Date.now()}`,
602
+ type: "call.speech",
603
+ callId,
604
+ providerCallId: callSid,
605
+ timestamp: Date.now(),
606
+ transcript,
607
+ isFinal: true
608
+ };
609
+ this.manager.processEvent(event);
610
+ this.scheduleForcedAgentConsult({
611
+ session,
612
+ callId,
613
+ callSid,
614
+ transcript,
615
+ clearAudio: () => {
616
+ const clearedBytes = audioPacer.clearAudio();
617
+ console.log(`[voice-call] realtime forced consult cleared outbound audio callId=${callId} providerCallId=${callSid} queuedBytes=${clearedBytes}`);
618
+ }
619
+ });
620
+ return;
621
+ }
622
+ this.manager.processEvent({
623
+ id: `realtime-bot-${callSid}-${Date.now()}`,
624
+ type: "call.speaking",
625
+ callId,
626
+ providerCallId: callSid,
627
+ timestamp: Date.now(),
628
+ text
629
+ });
630
+ },
631
+ onToolCall: (toolEvent, session) => {
632
+ const turnId = ensureTalkTurn();
633
+ emitTalkEvent({
634
+ type: "tool.call",
635
+ turnId,
636
+ itemId: toolEvent.itemId,
637
+ callId: toolEvent.callId,
638
+ payload: {
639
+ name: toolEvent.name,
640
+ args: toolEvent.args
641
+ }
642
+ });
643
+ console.log(`[voice-call] realtime tool call received callId=${callId} providerCallId=${callSid} tool=${toolEvent.name}`);
644
+ this.executeToolCall(session, callId, toolEvent.callId || toolEvent.itemId, toolEvent.name, toolEvent.args, turnId, emitTalkEvent);
645
+ },
646
+ onEvent: (event) => {
647
+ if (event.type === "input_audio_buffer.speech_started") {
648
+ ensureTalkTurn();
649
+ return;
650
+ }
651
+ if (event.type === "input_audio_buffer.speech_stopped") {
652
+ const turnId = talk.activeTurnId;
653
+ if (!turnId) return;
654
+ emitTalkEvent({
655
+ type: "input.audio.committed",
656
+ turnId,
657
+ payload: {
658
+ callId,
659
+ providerCallId: callSid,
660
+ source: event.type
661
+ },
662
+ final: true
663
+ });
664
+ return;
665
+ }
666
+ if (event.type === "response.done") {
667
+ finishOutputAudio("response.done");
668
+ endTalkTurn("response.done");
669
+ return;
670
+ }
671
+ if (event.type === "error") emitTalkEvent({
672
+ type: "session.error",
673
+ payload: { message: event.detail ?? "Realtime provider error" },
674
+ final: true
675
+ });
676
+ },
677
+ onReady: () => {
678
+ emitTalkEvent({
679
+ type: "session.ready",
680
+ payload: {
681
+ callId,
682
+ providerCallId: callSid
683
+ }
684
+ });
685
+ },
686
+ onError: (error) => {
687
+ console.error("[voice-call] realtime voice error:", error.message);
688
+ emitTalkEvent({
689
+ type: "session.error",
690
+ payload: { message: error.message },
691
+ final: true
692
+ });
693
+ },
694
+ onClose: (reason) => {
695
+ this.activeBridgesByCallId.delete(callId);
696
+ this.activeBridgesByCallId.delete(callSid);
697
+ this.activeTelephonyClosersByCallId.delete(callId);
698
+ this.activeTelephonyClosersByCallId.delete(callSid);
699
+ this.clearUserTranscriptState(callId);
700
+ finishOutputAudio(reason);
701
+ emitTalkEvent({
702
+ type: "session.closed",
703
+ payload: { reason },
704
+ final: true
705
+ });
706
+ if (reason !== "error") return;
707
+ emitCallEnd("error");
708
+ if (ws.readyState === WebSocket.OPEN) ws.close(1011, "Bridge disconnected");
709
+ this.provider.hangupCall({
710
+ callId,
711
+ providerCallId: callSid,
712
+ reason: "error"
713
+ }).catch((error) => {
714
+ console.warn(`[voice-call] Failed to hang up realtime call ${callSid}: ${formatErrorMessage(error)}`);
715
+ });
716
+ }
717
+ });
718
+ const closeTelephony = (reason) => {
719
+ emitCallEnd(reason);
720
+ session.close();
721
+ };
722
+ this.activeBridgesByCallId.set(callId, session);
723
+ this.activeBridgesByCallId.set(callSid, session);
724
+ this.activeTelephonyClosersByCallId.set(callId, closeTelephony);
725
+ this.activeTelephonyClosersByCallId.set(callSid, closeTelephony);
726
+ const sendAudioToSession = session.sendAudio.bind(session);
727
+ session.sendAudio = (audio) => {
728
+ if (speechDetector.accept(audio)) {
729
+ const interruptedTurnId = ensureTalkTurn();
730
+ const clearedBytes = audioPacer.clearAudio();
731
+ console.log(`[voice-call] realtime outbound audio cleared by barge-in callId=${callId} providerCallId=${callSid} queuedBytes=${clearedBytes}`);
732
+ finishOutputAudio("barge-in");
733
+ const cancelled = talk.cancelTurn({
734
+ turnId: interruptedTurnId,
735
+ payload: {
736
+ callId,
737
+ providerCallId: callSid,
738
+ reason: "barge-in"
739
+ }
740
+ });
741
+ if (cancelled.ok) rememberTalkEvent(cancelled.event);
742
+ }
743
+ emitTalkEvent({
744
+ type: "input.audio.delta",
745
+ turnId: ensureTalkTurn(),
746
+ payload: { byteLength: audio.length }
747
+ });
748
+ sendAudioToSession(audio);
749
+ };
750
+ const closeSession = session.close.bind(session);
751
+ session.close = () => {
752
+ this.activeBridgesByCallId.delete(callId);
753
+ this.activeBridgesByCallId.delete(callSid);
754
+ this.activeTelephonyClosersByCallId.delete(callId);
755
+ this.activeTelephonyClosersByCallId.delete(callSid);
756
+ this.clearUserTranscriptState(callId);
757
+ this.clearForcedConsultState(callId);
758
+ audioPacer.close();
759
+ closeSession();
760
+ };
761
+ session.connect().catch((error) => {
762
+ console.error("[voice-call] Failed to connect realtime bridge:", error);
763
+ session.close();
764
+ emitCallEnd("error");
765
+ ws.close(1011, "Failed to connect");
766
+ });
767
+ return session;
768
+ }
769
+ recordPartialUserTranscript(callId, text) {
770
+ const next = limitPartialUserTranscript(appendTranscriptText(this.partialUserTranscriptsByCallId.get(callId), text));
771
+ this.partialUserTranscriptsByCallId.set(callId, next);
772
+ this.partialUserTranscriptUpdatedAtByCallId.set(callId, Date.now());
773
+ return next;
774
+ }
775
+ clearPartialUserTranscript(callId) {
776
+ this.partialUserTranscriptsByCallId.delete(callId);
777
+ this.partialUserTranscriptUpdatedAtByCallId.delete(callId);
778
+ }
779
+ setRecentFinalUserTranscript(callId, text) {
780
+ this.clearRecentFinalUserTranscript(callId);
781
+ this.recentFinalUserTranscriptsByCallId.set(callId, text);
782
+ const timer = setTimeout(() => {
783
+ if (this.recentFinalUserTranscriptsByCallId.get(callId) === text) this.recentFinalUserTranscriptsByCallId.delete(callId);
784
+ this.recentFinalUserTranscriptTimersByCallId.delete(callId);
785
+ }, RECENT_FINAL_USER_TRANSCRIPT_TTL_MS);
786
+ timer.unref?.();
787
+ this.recentFinalUserTranscriptTimersByCallId.set(callId, timer);
788
+ }
789
+ clearRecentFinalUserTranscript(callId) {
790
+ const timer = this.recentFinalUserTranscriptTimersByCallId.get(callId);
791
+ if (timer) {
792
+ clearTimeout(timer);
793
+ this.recentFinalUserTranscriptTimersByCallId.delete(callId);
794
+ }
795
+ this.recentFinalUserTranscriptsByCallId.delete(callId);
796
+ }
797
+ clearUserTranscriptState(callId) {
798
+ this.clearPartialUserTranscript(callId);
799
+ this.clearRecentFinalUserTranscript(callId);
800
+ }
801
+ resolveUserTranscriptContext(callId) {
802
+ return this.partialUserTranscriptsByCallId.get(callId) ?? this.recentFinalUserTranscriptsByCallId.get(callId);
803
+ }
804
+ consumePartialUserTranscript(callId, consumed) {
805
+ const text = consumed?.trim();
806
+ if (!text) return;
807
+ const current = this.partialUserTranscriptsByCallId.get(callId);
808
+ if (!current) return;
809
+ if (current === text) {
810
+ this.clearPartialUserTranscript(callId);
811
+ return;
812
+ }
813
+ if (current.toLowerCase().startsWith(text.toLowerCase())) {
814
+ const remaining = current.slice(text.length).trimStart();
815
+ if (remaining) this.partialUserTranscriptsByCallId.set(callId, remaining);
816
+ else this.clearPartialUserTranscript(callId);
817
+ }
818
+ const recent = this.recentFinalUserTranscriptsByCallId.get(callId);
819
+ if (!recent) return;
820
+ if (recent === text || recent.toLowerCase().startsWith(text.toLowerCase())) this.clearRecentFinalUserTranscript(callId);
821
+ }
822
+ async waitForConsultTranscriptSettle(callId, startedAt) {
823
+ const deadline = startedAt + CONSULT_TRANSCRIPT_SETTLE_MAX_MS;
824
+ while (true) {
825
+ const updatedAt = this.partialUserTranscriptUpdatedAtByCallId.get(callId);
826
+ if (!updatedAt) return;
827
+ const now = Date.now();
828
+ const quietFor = now - updatedAt;
829
+ if (quietFor >= CONSULT_TRANSCRIPT_SETTLE_MS || now >= deadline) return;
830
+ await new Promise((resolve) => setTimeout(resolve, Math.min(CONSULT_TRANSCRIPT_SETTLE_MS - quietFor, deadline - now)));
831
+ }
832
+ }
833
+ clearForcedConsultState(callId) {
834
+ const timer = this.forcedConsultTimersByCallId.get(callId);
835
+ if (timer) {
836
+ clearTimeout(timer);
837
+ this.forcedConsultTimersByCallId.delete(callId);
838
+ }
839
+ this.forcedConsultInFlightByCallId.delete(callId);
840
+ this.forcedConsultsByCallId.delete(callId);
841
+ this.lastProviderConsultAtByCallId.delete(callId);
842
+ }
843
+ closeTelephonyBridge(callIdOrSid, bridge, reason) {
844
+ const closer = this.activeTelephonyClosersByCallId.get(callIdOrSid);
845
+ if (closer) {
846
+ closer(reason);
847
+ return;
848
+ }
849
+ bridge?.close();
850
+ }
851
+ scheduleForcedAgentConsult(params) {
852
+ if (this.config.consultPolicy !== "always") return;
853
+ const question = params.transcript.trim();
854
+ if (!question) return;
855
+ const handler = this.toolHandlers.get(REALTIME_VOICE_AGENT_CONSULT_TOOL_NAME);
856
+ if (!handler) return;
857
+ const existingTimer = this.forcedConsultTimersByCallId.get(params.callId);
858
+ if (existingTimer) clearTimeout(existingTimer);
859
+ const timer = setTimeout(() => {
860
+ this.forcedConsultTimersByCallId.delete(params.callId);
861
+ if (this.forcedConsultInFlightByCallId.has(params.callId)) return;
862
+ const lastProviderConsultAt = this.lastProviderConsultAtByCallId.get(params.callId) ?? 0;
863
+ if (Date.now() - lastProviderConsultAt < 2e3) return;
864
+ this.runForcedAgentConsult({
865
+ ...params,
866
+ question,
867
+ handler
868
+ });
869
+ }, FORCED_CONSULT_FALLBACK_DELAY_MS);
870
+ this.forcedConsultTimersByCallId.set(params.callId, timer);
871
+ }
872
+ async runForcedAgentConsult(params) {
873
+ this.forcedConsultInFlightByCallId.add(params.callId);
874
+ const startedAt = Date.now();
875
+ console.log(`[voice-call] realtime forced agent consult starting callId=${params.callId} providerCallId=${params.callSid} chars=${params.question.length}`);
876
+ params.clearAudio();
877
+ const state = {
878
+ sendSpeechPrompt: true,
879
+ promise: Promise.resolve().then(() => params.handler({
880
+ question: params.question,
881
+ context: "The realtime provider produced a final user transcript without invoking openclaw_agent_consult, so OpenClaw is forcing the consult because consultPolicy is always."
882
+ }, params.callId, {}))
883
+ };
884
+ this.forcedConsultsByCallId.set(params.callId, state);
885
+ try {
886
+ const result = await state.promise;
887
+ state.completedAt = Date.now();
888
+ const text = readSpeakableToolResultText(result);
889
+ if (!text) {
890
+ console.warn(`[voice-call] realtime forced agent consult returned no speakable text callId=${params.callId} providerCallId=${params.callSid}`);
891
+ return;
892
+ }
893
+ if (state.sendSpeechPrompt) {
894
+ params.clearAudio();
895
+ params.session.sendUserMessage(buildForcedConsultSpeechPrompt(text));
896
+ }
897
+ console.log(`[voice-call] realtime forced agent consult completed callId=${params.callId} providerCallId=${params.callSid} elapsedMs=${Date.now() - startedAt}`);
898
+ this.consumePartialUserTranscript(params.callId, params.question);
899
+ } catch (error) {
900
+ console.warn(`[voice-call] realtime forced agent consult failed callId=${params.callId} providerCallId=${params.callSid} error=${formatErrorMessage(error)}`);
901
+ } finally {
902
+ this.forcedConsultInFlightByCallId.delete(params.callId);
903
+ setTimeout(() => {
904
+ if (this.forcedConsultsByCallId.get(params.callId) === state) this.forcedConsultsByCallId.delete(params.callId);
905
+ }, FORCED_CONSULT_NATIVE_DEDUPE_MS).unref?.();
906
+ }
907
+ }
908
+ registerCallInManager(callSid, callerMeta = {}) {
909
+ const baseFields = {
910
+ providerCallId: callSid,
911
+ timestamp: Date.now(),
912
+ direction: callerMeta.direction ?? "inbound",
913
+ ...callerMeta.from ? { from: callerMeta.from } : {},
914
+ ...callerMeta.to ? { to: callerMeta.to } : {}
915
+ };
916
+ this.manager.processEvent({
917
+ id: `realtime-initiated-${callSid}`,
918
+ callId: callSid,
919
+ type: "call.initiated",
920
+ ...baseFields
921
+ });
922
+ const callRecord = this.manager.getCallByProviderCallId(callSid);
923
+ if (!callRecord) return null;
924
+ const initialGreeting = this.extractInitialGreeting(callRecord);
925
+ console.log(`[voice-call] Realtime call ${callRecord.callId} initial greeting ${initialGreeting ? "queued" : "absent"}`);
926
+ if (callRecord.metadata) delete callRecord.metadata.initialMessage;
927
+ this.manager.processEvent({
928
+ id: `realtime-answered-${callSid}`,
929
+ callId: callSid,
930
+ type: "call.answered",
931
+ ...baseFields
932
+ });
933
+ return {
934
+ callId: callRecord.callId,
935
+ initialGreetingInstructions: buildGreetingInstructions(this.config.instructions, initialGreeting)
936
+ };
937
+ }
938
+ extractInitialGreeting(call) {
939
+ return typeof call.metadata?.initialMessage === "string" ? call.metadata.initialMessage : void 0;
940
+ }
941
+ endCallInManager(callSid, callId, reason) {
942
+ this.manager.processEvent({
943
+ id: `realtime-ended-${callSid}-${Date.now()}`,
944
+ type: "call.ended",
945
+ callId,
946
+ providerCallId: callSid,
947
+ timestamp: Date.now(),
948
+ reason
949
+ });
950
+ }
951
+ async executeToolCall(bridge, callId, bridgeCallId, name, args, turnId, emitTalkEvent) {
952
+ const handler = this.toolHandlers.get(name);
953
+ const startedAt = Date.now();
954
+ const hasResultError = (result) => {
955
+ return Boolean(result && typeof result === "object" && !Array.isArray(result) && "error" in result);
956
+ };
957
+ const emitFinalToolEvent = (result) => {
958
+ emitTalkEvent?.({
959
+ type: hasResultError(result) ? "tool.error" : "tool.result",
960
+ turnId,
961
+ callId: bridgeCallId,
962
+ payload: {
963
+ name,
964
+ result
965
+ },
966
+ final: true
967
+ });
968
+ };
969
+ const submitFinalToolResult = (result) => {
970
+ bridge.submitToolResult(bridgeCallId, result);
971
+ emitFinalToolEvent(result);
972
+ };
973
+ const submitWorkingResponse = () => {
974
+ if (handler && name === REALTIME_VOICE_AGENT_CONSULT_TOOL_NAME && bridge.bridge.supportsToolResultContinuation && !this.config.fastContext.enabled) {
975
+ emitTalkEvent?.({
976
+ type: "tool.progress",
977
+ turnId,
978
+ callId: bridgeCallId,
979
+ payload: {
980
+ name,
981
+ status: "working"
982
+ }
983
+ });
984
+ bridge.submitToolResult(bridgeCallId, buildRealtimeVoiceAgentConsultWorkingResponse("caller"), { willContinue: true });
985
+ }
986
+ };
987
+ if (name === REALTIME_VOICE_AGENT_CONSULT_TOOL_NAME) {
988
+ this.lastProviderConsultAtByCallId.set(callId, Date.now());
989
+ const timer = this.forcedConsultTimersByCallId.get(callId);
990
+ if (timer) {
991
+ clearTimeout(timer);
992
+ this.forcedConsultTimersByCallId.delete(callId);
993
+ }
994
+ const forcedConsult = this.forcedConsultsByCallId.get(callId);
995
+ if (forcedConsult) {
996
+ if (forcedConsult.completedAt) {
997
+ submitFinalToolResult({
998
+ status: "already_delivered",
999
+ message: "OpenClaw already delivered this consult result internally. Do not repeat it."
1000
+ });
1001
+ return;
1002
+ }
1003
+ forcedConsult.sendSpeechPrompt = false;
1004
+ submitFinalToolResult(await forcedConsult.promise.catch((error) => ({ error: formatErrorMessage(error) })));
1005
+ return;
1006
+ }
1007
+ const existingNativeConsult = this.nativeConsultsInFlightByCallId.get(callId);
1008
+ if (existingNativeConsult) {
1009
+ console.log(`[voice-call] realtime tool call sharing in-flight agent consult callId=${callId} ageMs=${Date.now() - existingNativeConsult.startedAt}`);
1010
+ submitWorkingResponse();
1011
+ submitFinalToolResult(await existingNativeConsult.promise);
1012
+ return;
1013
+ }
1014
+ submitWorkingResponse();
1015
+ const state = {
1016
+ startedAt,
1017
+ promise: Promise.resolve()
1018
+ };
1019
+ state.promise = (async () => {
1020
+ await this.waitForConsultTranscriptSettle(callId, startedAt);
1021
+ const context = { partialUserTranscript: this.resolveUserTranscriptContext(callId) };
1022
+ state.partialUserTranscript = context.partialUserTranscript;
1023
+ const handlerArgs = withFallbackConsultQuestion(args, context.partialUserTranscript);
1024
+ console.log(`[voice-call] realtime tool call executing callId=${callId} tool=${name} hasHandler=${Boolean(handler)}`);
1025
+ return !handler ? { error: `Tool "${name}" not available` } : await handler(handlerArgs, callId, context);
1026
+ })().catch((error) => ({ error: formatErrorMessage(error) }));
1027
+ this.nativeConsultsInFlightByCallId.set(callId, state);
1028
+ try {
1029
+ const result = await state.promise;
1030
+ const status = result && typeof result === "object" && !Array.isArray(result) && "error" in result ? "error" : "ok";
1031
+ const error = status === "error" && result && typeof result === "object" && !Array.isArray(result) ? formatErrorMessage(result.error ?? "unknown") : void 0;
1032
+ console.log(`[voice-call] realtime tool call completed callId=${callId} tool=${name} status=${status} elapsedMs=${Date.now() - startedAt}${error ? ` error=${error}` : ""}`);
1033
+ submitFinalToolResult(result);
1034
+ if (status === "ok") this.consumePartialUserTranscript(callId, state.partialUserTranscript);
1035
+ } finally {
1036
+ if (this.nativeConsultsInFlightByCallId.get(callId) === state) this.nativeConsultsInFlightByCallId.delete(callId);
1037
+ }
1038
+ return;
1039
+ }
1040
+ console.log(`[voice-call] realtime tool call executing callId=${callId} tool=${name} hasHandler=${Boolean(handler)}`);
1041
+ const context = { partialUserTranscript: this.resolveUserTranscriptContext(callId) };
1042
+ const handlerArgs = name === REALTIME_VOICE_AGENT_CONSULT_TOOL_NAME ? withFallbackConsultQuestion(args, context.partialUserTranscript) : args;
1043
+ const result = !handler ? { error: `Tool "${name}" not available` } : await handler(handlerArgs, callId, context).catch((error) => ({ error: formatErrorMessage(error) }));
1044
+ const status = result && typeof result === "object" && !Array.isArray(result) && "error" in result ? "error" : "ok";
1045
+ const error = status === "error" && result && typeof result === "object" && !Array.isArray(result) ? formatErrorMessage(result.error ?? "unknown") : void 0;
1046
+ console.log(`[voice-call] realtime tool call completed callId=${callId} tool=${name} status=${status} elapsedMs=${Date.now() - startedAt}${error ? ` error=${error}` : ""}`);
1047
+ submitFinalToolResult(result);
1048
+ if (name === REALTIME_VOICE_AGENT_CONSULT_TOOL_NAME && status === "ok") this.consumePartialUserTranscript(callId, context.partialUserTranscript);
1049
+ }
1050
+ };
1051
+ //#endregion
1052
+ export { RealtimeCallHandler };