getpatter 0.6.1 → 0.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,3 +1,11 @@
1
+ import {
2
+ OpenAIRealtime2Adapter,
3
+ OpenAIRealtimeAdapter,
4
+ createResampler16kTo8k,
5
+ createResampler8kTo16k,
6
+ mulawToPcm16,
7
+ pcm16ToMulaw
8
+ } from "./chunk-CL2U3YET.mjs";
1
9
  import {
2
10
  getLogger
3
11
  } from "./chunk-MVOQFAEO.mjs";
@@ -21,1259 +29,9 @@ import express from "express";
21
29
  import { createServer } from "http";
22
30
  import { WebSocketServer } from "ws";
23
31
 
24
- // src/providers/openai-realtime.ts
25
- init_esm_shims();
26
- import WebSocket from "ws";
27
- var OpenAIRealtimeAudioFormat = {
28
- G711_ULAW: "g711_ulaw",
29
- G711_ALAW: "g711_alaw",
30
- PCM16: "pcm16"
31
- };
32
- var OpenAIRealtimeModel = {
33
- GPT_REALTIME: "gpt-realtime",
34
- GPT_REALTIME_2: "gpt-realtime-2",
35
- GPT_REALTIME_MINI: "gpt-realtime-mini",
36
- GPT_4O_REALTIME_PREVIEW: "gpt-4o-realtime-preview",
37
- GPT_4O_MINI_REALTIME_PREVIEW: "gpt-4o-mini-realtime-preview"
38
- };
39
- var OpenAIVoice = {
40
- ALLOY: "alloy",
41
- ASH: "ash",
42
- BALLAD: "ballad",
43
- CORAL: "coral",
44
- ECHO: "echo",
45
- FABLE: "fable",
46
- NOVA: "nova",
47
- ONYX: "onyx",
48
- SAGE: "sage",
49
- SHIMMER: "shimmer",
50
- VERSE: "verse"
51
- };
52
- var OpenAITranscriptionModel = {
53
- WHISPER_1: "whisper-1",
54
- GPT_4O_TRANSCRIBE: "gpt-4o-transcribe",
55
- GPT_4O_MINI_TRANSCRIBE: "gpt-4o-mini-transcribe",
56
- GPT_REALTIME_WHISPER: "gpt-realtime-whisper"
57
- };
58
- var OpenAIRealtimeVADType = {
59
- SERVER_VAD: "server_vad",
60
- SEMANTIC_VAD: "semantic_vad"
61
- };
62
- var OpenAIRealtimeAdapter = class {
63
- constructor(apiKey, model = OpenAIRealtimeModel.GPT_REALTIME_MINI, voice = OpenAIVoice.ALLOY, instructions = "", tools, audioFormat = OpenAIRealtimeAudioFormat.G711_ULAW, options = {}) {
64
- this.apiKey = apiKey;
65
- this.model = model;
66
- this.voice = voice;
67
- this.instructions = instructions;
68
- this.tools = tools;
69
- this.audioFormat = audioFormat;
70
- this.options = options;
71
- }
72
- apiKey;
73
- model;
74
- voice;
75
- instructions;
76
- tools;
77
- audioFormat;
78
- // Fields exposed `protected` (not `private`) so a subclass can implement
79
- // alternate transports — e.g. `OpenAIRealtime2Adapter` overrides
80
- // `connect()` to speak the GA Realtime API while reusing the rest of
81
- // the runtime (audio dispatch, barge-in, heartbeat).
82
- ws = null;
83
- eventCallbacks = /* @__PURE__ */ new Set();
84
- messageListenerAttached = false;
85
- heartbeat = null;
86
- // Track the in-flight assistant item id so we can truncate cleanly on
87
- // barge-in (see ``cancelResponse``) — matches the Python adapter.
88
- currentResponseItemId = null;
89
- currentResponseAudioMs = 0;
90
- // Wall-clock timestamp (Date.now()) of the first ``response.audio.delta``
91
- // received since the current response item started. ``cancelResponse``
92
- // uses this to bound ``audio_end_ms`` to what the caller could plausibly
93
- // have heard — generated audio frequently arrives 5-10x real-time, so
94
- // ``audio_end_ms`` driven purely by the per-chunk byte counter overshoots
95
- // reality and leaves phantom assistant text on the conversation. The
96
- // wall-clock cap corresponds to the maximum playback that real-time TTS
97
- // could have produced, which is what the user actually heard.
98
- currentResponseFirstAudioAt = null;
99
- options;
100
- /**
101
- * Build the production session.update body. Mirrors the body sent
102
- * inside `connect()` so warmup can apply identical configuration to
103
- * the upstream session and prime it without billing.
104
- */
105
- buildSessionConfig() {
106
- const config = {
107
- input_audio_format: this.audioFormat,
108
- output_audio_format: this.audioFormat,
109
- voice: this.voice,
110
- instructions: this.instructions || "You are a helpful voice assistant. Be concise.",
111
- turn_detection: {
112
- type: this.options.vadType ?? OpenAIRealtimeVADType.SERVER_VAD,
113
- threshold: 0.5,
114
- prefix_padding_ms: 300,
115
- silence_duration_ms: this.options.silenceDurationMs ?? 300
116
- },
117
- input_audio_transcription: {
118
- model: this.options.inputAudioTranscriptionModel ?? OpenAITranscriptionModel.WHISPER_1
119
- }
120
- };
121
- if (this.options.temperature !== void 0) config.temperature = this.options.temperature;
122
- if (this.options.maxResponseOutputTokens !== void 0) {
123
- config.max_response_output_tokens = this.options.maxResponseOutputTokens;
124
- }
125
- if (this.options.modalities !== void 0) config.modalities = this.options.modalities;
126
- if (this.options.toolChoice !== void 0) config.tool_choice = this.options.toolChoice;
127
- if (this.options.reasoningEffort !== void 0) {
128
- config.reasoning = { effort: this.options.reasoningEffort };
129
- }
130
- if (this.tools?.length) {
131
- config.tools = this.tools.map((t) => {
132
- const def = {
133
- type: "function",
134
- name: t.name,
135
- description: t.description,
136
- parameters: t.parameters
137
- };
138
- if (t.strict === true) {
139
- def.strict = true;
140
- }
141
- return def;
142
- });
143
- }
144
- return config;
145
- }
146
- /**
147
- * Pre-call WebSocket warmup for the OpenAI Realtime endpoint.
148
- *
149
- * The canonical session-only warm step on the Realtime API: open the
150
- * WS, wait for `session.created`, send a single `session.update`
151
- * containing the same fields that the production `connect()` path
152
- * applies (`input_audio_format`, `output_audio_format`, `voice`,
153
- * `instructions`, `turn_detection`, `input_audio_transcription`,
154
- * plus any opt-in fields populated on the adapter), wait for the
155
- * matching `session.updated` ack, then close cleanly. This primes
156
- * the per-session state on the OpenAI side — DNS + TLS + auth
157
- * handshake + initial config exchange — without ever invoking the
158
- * model.
159
- *
160
- * Earlier revisions sent `response.create` with
161
- * `{"response": {"generate": false}}` to prime the inference path.
162
- * That field is NOT in the OpenAI Realtime API schema; the server
163
- * either ignores it (and bills tokens for a real model response) or
164
- * rejects the request with `invalid_request_error`. Both behaviours
165
- * are billing-unsafe or a no-op beyond TLS warm. The
166
- * `session.update` flow is documented and side-effect-free.
167
- *
168
- * Billing safety: `session.update` only mutates session
169
- * configuration. It does NOT invoke the model, does NOT consume any
170
- * audio buffer, and does NOT trigger token generation, so no
171
- * per-token cost is accrued. Best-effort: failures are logged at
172
- * debug level and never raised.
173
- */
174
- async warmup() {
175
- const url = `wss://api.openai.com/v1/realtime?model=${encodeURIComponent(this.model)}`;
176
- let ws = null;
177
- try {
178
- ws = await new Promise((resolve, reject) => {
179
- const sock = new WebSocket(url, {
180
- headers: {
181
- Authorization: `Bearer ${this.apiKey}`,
182
- "OpenAI-Beta": "realtime=v1"
183
- }
184
- });
185
- const timer = setTimeout(() => {
186
- try {
187
- sock.close();
188
- } catch {
189
- }
190
- reject(new Error("OpenAI Realtime warmup connect timeout"));
191
- }, 5e3);
192
- sock.once("open", () => {
193
- clearTimeout(timer);
194
- resolve(sock);
195
- });
196
- sock.once("error", (err) => {
197
- clearTimeout(timer);
198
- reject(err);
199
- });
200
- });
201
- const sessionCreated = await new Promise((resolve) => {
202
- const timer = setTimeout(() => resolve(false), 2e3);
203
- const onMsg = (raw) => {
204
- try {
205
- const data = JSON.parse(raw.toString());
206
- if (data.type === "session.created") {
207
- clearTimeout(timer);
208
- ws.off("message", onMsg);
209
- resolve(true);
210
- }
211
- } catch {
212
- }
213
- };
214
- ws.on("message", onMsg);
215
- });
216
- if (!sessionCreated) return;
217
- try {
218
- ws.send(JSON.stringify({ type: "session.update", session: this.buildSessionConfig() }));
219
- } catch {
220
- return;
221
- }
222
- await new Promise((resolve) => {
223
- const timer = setTimeout(() => resolve(), 1500);
224
- const onMsg = (raw) => {
225
- try {
226
- const data = JSON.parse(raw.toString());
227
- if (data.type === "session.updated") {
228
- clearTimeout(timer);
229
- ws.off("message", onMsg);
230
- resolve();
231
- }
232
- } catch {
233
- }
234
- };
235
- ws.on("message", onMsg);
236
- });
237
- } catch (err) {
238
- getLogger().debug(`OpenAI Realtime warmup failed (best-effort): ${String(err)}`);
239
- } finally {
240
- if (ws) {
241
- try {
242
- ws.close();
243
- } catch {
244
- }
245
- }
246
- }
247
- }
248
- /** Open the Realtime WebSocket and apply the session configuration. */
249
- async connect() {
250
- const url = `wss://api.openai.com/v1/realtime?model=${encodeURIComponent(this.model)}`;
251
- this.ws = new WebSocket(url, {
252
- headers: {
253
- Authorization: `Bearer ${this.apiKey}`,
254
- "OpenAI-Beta": "realtime=v1"
255
- }
256
- });
257
- await new Promise((resolve, reject) => {
258
- let sessionCreated = false;
259
- let settled = false;
260
- const ws = this.ws;
261
- const onSetupMessage = (raw) => {
262
- let msg;
263
- try {
264
- msg = JSON.parse(raw.toString());
265
- } catch (e) {
266
- getLogger().warn(`OpenAI Realtime: failed to parse message: ${String(e)}`);
267
- return;
268
- }
269
- if (msg.type === "session.created" && !sessionCreated) {
270
- sessionCreated = true;
271
- ws.send(JSON.stringify({ type: "session.update", session: this.buildSessionConfig() }));
272
- } else if (msg.type === "session.updated") {
273
- cleanup();
274
- resolve();
275
- }
276
- };
277
- const onSetupError = (err) => {
278
- cleanup();
279
- try {
280
- ws.close();
281
- } catch {
282
- }
283
- reject(err);
284
- };
285
- const cleanup = () => {
286
- if (settled) return;
287
- settled = true;
288
- clearTimeout(timer);
289
- ws.off("message", onSetupMessage);
290
- ws.off("error", onSetupError);
291
- };
292
- const timer = setTimeout(() => {
293
- cleanup();
294
- try {
295
- ws.close();
296
- } catch {
297
- }
298
- reject(new Error("OpenAI Realtime connect timeout"));
299
- }, 15e3);
300
- ws.on("message", onSetupMessage);
301
- ws.on("error", onSetupError);
302
- });
303
- this.armHeartbeatAndListener();
304
- }
305
- /**
306
- * Adopt a pre-opened, already-`session.updated` Realtime WebSocket
307
- * produced by the prewarm pipeline (see `Patter.parkProviderConnections`).
308
- * Skips the fresh `new WebSocket()` + `session.created` /
309
- * `session.update` round-trip — saves ~250-450 ms on first turn.
310
- *
311
- * Caller MUST verify `ws.readyState === OPEN` before calling and MUST
312
- * have already received `session.updated` on the parked socket. If
313
- * the parked WS died between park and adopt, fall back to `connect()`.
314
- */
315
- adoptWebSocket(ws) {
316
- this.ws = ws;
317
- this.armHeartbeatAndListener();
318
- }
319
- armHeartbeatAndListener() {
320
- this.heartbeat = setInterval(() => {
321
- try {
322
- this.ws?.ping();
323
- } catch {
324
- }
325
- }, 2e4);
326
- this.ensureMessageListener();
327
- }
328
- /**
329
- * Open a fresh Realtime WS, exchange `session.created` /
330
- * `session.update` / `session.updated` (so the upstream session is
331
- * fully primed), and return the OPEN socket WITHOUT arming the
332
- * heartbeat / message listener. Used by the prewarm pipeline to park
333
- * a Realtime connection during ringing; the live consumer adopts it
334
- * via {@link adoptWebSocket}.
335
- *
336
- * Bounded by 8 s. Throws on timeout / handshake failure — callers
337
- * (the prewarm pipeline) treat any error as a cache miss and the
338
- * call falls through to the cold `connect()` path.
339
- *
340
- * Billing safety: `session.update` does not invoke the model. No
341
- * tokens are billed.
342
- */
343
- async openParkedConnection() {
344
- const url = `wss://api.openai.com/v1/realtime?model=${encodeURIComponent(this.model)}`;
345
- const ws = new WebSocket(url, {
346
- headers: {
347
- Authorization: `Bearer ${this.apiKey}`,
348
- "OpenAI-Beta": "realtime=v1"
349
- }
350
- });
351
- await new Promise((resolve, reject) => {
352
- let sessionCreated = false;
353
- let settled = false;
354
- const onMessage = (raw) => {
355
- let msg;
356
- try {
357
- msg = JSON.parse(raw.toString());
358
- } catch {
359
- return;
360
- }
361
- if (msg.type === "session.created" && !sessionCreated) {
362
- sessionCreated = true;
363
- try {
364
- ws.send(JSON.stringify({ type: "session.update", session: this.buildSessionConfig() }));
365
- } catch (err) {
366
- cleanup();
367
- reject(err instanceof Error ? err : new Error(String(err)));
368
- }
369
- } else if (msg.type === "session.updated") {
370
- cleanup();
371
- resolve();
372
- }
373
- };
374
- const onError = (err) => {
375
- cleanup();
376
- reject(err);
377
- };
378
- const cleanup = () => {
379
- if (settled) return;
380
- settled = true;
381
- clearTimeout(timer);
382
- ws.off("message", onMessage);
383
- ws.off("error", onError);
384
- };
385
- const timer = setTimeout(() => {
386
- cleanup();
387
- reject(new Error("OpenAI Realtime park connect timeout"));
388
- }, 8e3);
389
- ws.on("message", onMessage);
390
- ws.on("error", onError);
391
- });
392
- return ws;
393
- }
394
- /** Append a base64-encoded audio chunk to the realtime input buffer. */
395
- sendAudio(mulawAudio) {
396
- if (!this.ws || this.ws.readyState !== WebSocket.OPEN) return;
397
- this.ws.send(JSON.stringify({ type: "input_audio_buffer.append", audio: mulawAudio.toString("base64") }));
398
- }
399
- /**
400
- * Register a listener for parsed realtime events.
401
- *
402
- * Previously every call attached a new ``ws.on('message')`` handler,
403
- * which leaked listeners across retries and multi-consumer hooks. We now
404
- * route all traffic through a single persistent handler that fans out to
405
- * a Set of callbacks. Use {@link offEvent} to remove one.
406
- */
407
- onEvent(callback) {
408
- this.eventCallbacks.add(callback);
409
- this.ensureMessageListener();
410
- }
411
- /** Remove a previously registered {@link onEvent} callback. */
412
- offEvent(callback) {
413
- this.eventCallbacks.delete(callback);
414
- }
415
- ensureMessageListener() {
416
- if (this.messageListenerAttached || !this.ws) return;
417
- this.messageListenerAttached = true;
418
- const ws = this.ws;
419
- const dispatch = (type, payload) => {
420
- for (const cb of this.eventCallbacks) {
421
- void Promise.resolve(cb(type, payload)).catch(
422
- (err) => getLogger().error("onEvent callback error:", err)
423
- );
424
- }
425
- };
426
- ws.on("message", (raw) => {
427
- let data;
428
- try {
429
- data = JSON.parse(raw.toString());
430
- } catch (e) {
431
- getLogger().warn(`OpenAI Realtime: failed to parse event message: ${String(e)}`);
432
- return;
433
- }
434
- const t = data.type;
435
- if (t === "response.audio.delta") {
436
- const buf = Buffer.from(data.delta ?? "", "base64");
437
- this.currentResponseAudioMs += estimateAudioMs(buf, this.audioFormat);
438
- if (this.currentResponseFirstAudioAt === null) {
439
- this.currentResponseFirstAudioAt = Date.now();
440
- }
441
- dispatch("audio", buf);
442
- } else if (t === "response.audio_transcript.delta") {
443
- dispatch("transcript_output", data.delta);
444
- } else if (t === "response.content_part.added" || t === "response.output_item.added") {
445
- const itemId = data.item?.id ?? data.item_id ?? null;
446
- if (itemId) {
447
- this.currentResponseItemId = itemId;
448
- this.currentResponseAudioMs = 0;
449
- this.currentResponseFirstAudioAt = null;
450
- }
451
- } else if (t === "input_audio_buffer.speech_started") {
452
- dispatch("speech_started", null);
453
- } else if (t === "input_audio_buffer.speech_stopped") {
454
- dispatch("speech_stopped", null);
455
- } else if (t === "conversation.item.input_audio_transcription.completed") {
456
- dispatch("transcript_input", data.transcript);
457
- } else if (t === "response.function_call_arguments.done") {
458
- dispatch("function_call", { call_id: data.call_id, name: data.name, arguments: data.arguments });
459
- } else if (t === "response.done") {
460
- this.currentResponseItemId = null;
461
- this.currentResponseAudioMs = 0;
462
- this.currentResponseFirstAudioAt = null;
463
- dispatch("response_done", data.response ?? null);
464
- } else if (t === "error") {
465
- dispatch("error", data.error);
466
- }
467
- });
468
- ws.on("close", (code, reason) => {
469
- if (code !== 1e3) {
470
- dispatch("error", {
471
- type: "connection_closed",
472
- code,
473
- reason: reason?.toString() ?? ""
474
- });
475
- }
476
- });
477
- ws.on("error", (err) => {
478
- dispatch("error", { type: "socket_error", message: err?.message ?? String(err) });
479
- });
480
- }
481
- /** Truncate the in-flight assistant turn and cancel the active response.
482
- *
483
- * ``audio_end_ms`` MUST reflect what the caller actually heard, not what
484
- * the server generated. OpenAI streams audio at 5-10x real-time, so the
485
- * byte-derived counter overstates playback whenever the consumer cleared
486
- * its playout buffer (e.g. ``send_clear``) before the audio reached the
487
- * speaker. We bound the truncate point by wall-clock time since the first
488
- * chunk of this response — that's the physical maximum a 1x real-time
489
- * playback could have produced. Without this cap, OpenAI keeps the full
490
- * generated assistant text on the transcript, and the model replays /
491
- * resumes from it on the next turn — manifesting as re-greetings and
492
- * mid-sentence fragments after a barge-in storm.
493
- */
494
- cancelResponse() {
495
- if (!this.ws) return;
496
- if (this.currentResponseItemId) {
497
- let audioEndMs = this.currentResponseAudioMs;
498
- if (this.currentResponseFirstAudioAt !== null) {
499
- const elapsedMs = Date.now() - this.currentResponseFirstAudioAt;
500
- audioEndMs = Math.min(audioEndMs, Math.max(elapsedMs, 0));
501
- }
502
- try {
503
- this.ws.send(JSON.stringify({
504
- type: "conversation.item.truncate",
505
- item_id: this.currentResponseItemId,
506
- content_index: 0,
507
- audio_end_ms: audioEndMs
508
- }));
509
- } catch (err) {
510
- getLogger().debug?.(`conversation.item.truncate failed: ${String(err)}`);
511
- }
512
- }
513
- this.ws.send(JSON.stringify({ type: "response.cancel" }));
514
- this.currentResponseItemId = null;
515
- this.currentResponseAudioMs = 0;
516
- this.currentResponseFirstAudioAt = null;
517
- }
518
- /** Inject a user text turn and request a new response. */
519
- async sendText(text) {
520
- this.ws?.send(JSON.stringify({
521
- type: "conversation.item.create",
522
- item: { type: "message", role: "user", content: [{ type: "input_text", text }] }
523
- }));
524
- this.ws?.send(JSON.stringify({ type: "response.create" }));
525
- }
526
- /**
527
- * Make the AI speak ``text`` as its opening line.
528
- *
529
- * Triggers ``response.create`` with explicit ``instructions`` that force
530
- * the model to render ``text`` verbatim as its first audio utterance.
531
- * This is the correct semantics for ``Agent.firstMessage`` per its
532
- * docstring ("What the AI says when the callee answers").
533
- *
534
- * Without this, ``sendText(firstMessage)`` would inject ``text`` as
535
- * ``role: user`` and the AI would *reply* to its own greeting, producing
536
- * role-confused openings (e.g. a receptionist agent responding "I'd like
537
- * to schedule a haircut" because it took its own first_message as a
538
- * customer cue).
539
- */
540
- async sendFirstMessage(text) {
541
- this.ws?.send(JSON.stringify({
542
- type: "response.create",
543
- response: {
544
- modalities: ["audio", "text"],
545
- instructions: `Say exactly the following sentence as your first turn and nothing else: "${text}"`
546
- }
547
- }));
548
- }
549
- /** Submit a tool/function-call result and request the next response. */
550
- async sendFunctionResult(callId, result) {
551
- this.ws?.send(JSON.stringify({
552
- type: "conversation.item.create",
553
- item: { type: "function_call_output", call_id: callId, output: result }
554
- }));
555
- this.ws?.send(JSON.stringify({ type: "response.create" }));
556
- }
557
- /** Stop the heartbeat, drop listeners, and close the Realtime WebSocket. */
558
- close() {
559
- if (this.heartbeat) {
560
- clearInterval(this.heartbeat);
561
- this.heartbeat = null;
562
- }
563
- this.eventCallbacks.clear();
564
- this.messageListenerAttached = false;
565
- this.ws?.close();
566
- this.ws = null;
567
- }
568
- };
569
- function estimateAudioMs(chunk, format) {
570
- if (chunk.length === 0) return 0;
571
- if (format === OpenAIRealtimeAudioFormat.G711_ULAW || format === OpenAIRealtimeAudioFormat.G711_ALAW)
572
- return Math.floor(chunk.length / 8);
573
- if (format === OpenAIRealtimeAudioFormat.PCM16) {
574
- return Math.floor(chunk.length / 48);
575
- }
576
- return 0;
577
- }
578
-
579
- // src/providers/openai-realtime-2.ts
580
- init_esm_shims();
581
- import WebSocket2 from "ws";
582
-
583
- // src/audio/transcoding.ts
584
- init_esm_shims();
585
- var MULAW_TO_PCM16_TABLE = (() => {
586
- const table = new Int16Array(256);
587
- for (let i = 0; i < 256; i++) {
588
- const mu = ~i & 255;
589
- const sign = mu & 128 ? -1 : 1;
590
- const exponent = mu >> 4 & 7;
591
- const mantissa = mu & 15;
592
- const magnitude = (mantissa << 1 | 33) << exponent + 2;
593
- table[i] = sign * (magnitude - 132);
594
- }
595
- return table;
596
- })();
597
- var PCM16_TO_MULAW_TABLE = (() => {
598
- const BIAS = 132;
599
- const CLIP = 32635;
600
- const table = new Uint8Array(65536);
601
- for (let i = 0; i < 65536; i++) {
602
- let sample = i >= 32768 ? i - 65536 : i;
603
- const sign = sample < 0 ? 128 : 0;
604
- if (sample < 0) sample = -sample;
605
- if (sample > CLIP) sample = CLIP;
606
- sample += BIAS;
607
- let exponent = 7;
608
- const exponentMask = 16384;
609
- for (let shift = exponentMask; shift > 0 && (sample & shift) === 0; shift >>= 1) {
610
- exponent--;
611
- }
612
- const mantissa = sample >> exponent + 3 & 15;
613
- const mulaw = ~(sign | exponent << 4 | mantissa) & 255;
614
- table[i] = mulaw;
615
- }
616
- return table;
617
- })();
618
- function mulawToPcm16(mulawData) {
619
- const out = Buffer.alloc(mulawData.length * 2);
620
- for (let i = 0; i < mulawData.length; i++) {
621
- out.writeInt16LE(MULAW_TO_PCM16_TABLE[mulawData[i]], i * 2);
622
- }
623
- return out;
624
- }
625
- function pcm16ToMulaw(pcmData) {
626
- const sampleCount = Math.floor(pcmData.length / 2);
627
- const out = Buffer.alloc(sampleCount);
628
- for (let i = 0; i < sampleCount; i++) {
629
- const sample = pcmData.readInt16LE(i * 2);
630
- out[i] = PCM16_TO_MULAW_TABLE[sample + 65536 & 65535];
631
- }
632
- return out;
633
- }
634
- var PcmCarry = class {
635
- pending = null;
636
- /**
637
- * Prepend any carried odd byte, return the even-length prefix, and stash
638
- * any new trailing odd byte for the next call.
639
- *
640
- * Returns a zero-length buffer when no complete sample is yet available.
641
- */
642
- push(chunk) {
643
- const combined = this.pending !== null ? Buffer.concat([this.pending, chunk]) : chunk;
644
- this.pending = null;
645
- const alignedLen = combined.length & ~1;
646
- if (alignedLen < combined.length) {
647
- this.pending = combined.subarray(alignedLen);
648
- }
649
- return combined.subarray(0, alignedLen);
650
- }
651
- /**
652
- * Return any pending byte as a 1-byte buffer (rare in practice — only if
653
- * the entire stream had an odd byte count), then reset internal state.
654
- */
655
- flush() {
656
- if (this.pending === null) return Buffer.alloc(0);
657
- const out = this.pending;
658
- this.pending = null;
659
- return out;
660
- }
661
- /** Reset carry state without flushing. */
662
- reset() {
663
- this.pending = null;
664
- }
665
- };
666
- var StatefulResampler = class {
667
- srcRate;
668
- dstRate;
669
- // 16k→8k: 5-tap FIR state.
670
- // Extended sample buffer carries the 2 history samples that precede the
671
- // current chunk AND any "pending" input sample that did not yet generate
672
- // output (i.e. the odd sample when the chunk had an odd sample count).
673
- // `firPhase` = 0 means the next output is at input position 0 of the
674
- // current chunk; 1 means it starts at input position 1 (because the
675
- // previous chunk ended on an even-output boundary).
676
- firHistory = new Int16Array(2);
677
- // [s_{-2}, s_{-1}]
678
- firHistoryValid = false;
679
- // Pending sample carried from odd-count chunks (not the byte carry —
680
- // this is a complete Int16 sample that becomes the first input for the
681
- // next call).
682
- firPendingSample = null;
683
- // 8k→16k: last input sample deferred across chunk boundaries.
684
- upsampleLast = 0;
685
- upsampleHasHistory = false;
686
- // 24k→16k: fractional phase and last input sample across chunks.
687
- resample24Last = 0;
688
- resample24Phase = 0;
689
- resample24HasHistory = false;
690
- // Odd-byte alignment carry.
691
- carry = new PcmCarry();
692
- constructor(opts) {
693
- this.srcRate = opts.srcRate;
694
- this.dstRate = opts.dstRate;
695
- if (opts.channels !== void 0 && opts.channels !== 1) {
696
- throw new Error("StatefulResampler: only mono (channels=1) is supported");
697
- }
698
- const key = `${this.srcRate}->${this.dstRate}`;
699
- if (key !== "16000->8000" && key !== "8000->16000" && key !== "24000->16000" && key !== "24000->8000") {
700
- throw new Error(
701
- `StatefulResampler: unsupported conversion ${key}. Supported: 16000->8000, 8000->16000, 24000->16000, 24000->8000`
702
- );
703
- }
704
- }
705
- /**
706
- * Process a chunk of PCM16-LE samples.
707
- *
708
- * Handles odd-byte inputs via an internal carry buffer. Returns an even-byte-
709
- * aligned output buffer; may return a zero-length buffer if not enough
710
- * aligned input is available yet.
711
- */
712
- process(pcm) {
713
- const aligned = this.carry.push(pcm);
714
- if (aligned.length === 0) return Buffer.alloc(0);
715
- if (this.srcRate === 16e3 && this.dstRate === 8e3) {
716
- return this._downsample16kTo8k(aligned);
717
- }
718
- if (this.srcRate === 8e3 && this.dstRate === 16e3) {
719
- return this._upsample8kTo16k(aligned);
720
- }
721
- if (this.srcRate === 24e3 && this.dstRate === 8e3) {
722
- return this._resample24kTo8k(aligned);
723
- }
724
- return this._resample24kTo16k(aligned);
725
- }
726
- /**
727
- * Flush internal state and return any remaining output samples.
728
- *
729
- * For 8k→16k: the deferred last sample is emitted duplicated (matching
730
- * the stateless helper's end-of-stream behaviour).
731
- * For 16k→8k: any pending odd sample is processed with edge-replication.
732
- * Resets all state after flushing.
733
- */
734
- flush() {
735
- this.carry.flush();
736
- if (this.srcRate === 16e3 && this.dstRate === 8e3 && this.firPendingSample !== null) {
737
- const s = this.firPendingSample;
738
- const tmp = Buffer.alloc(4);
739
- tmp.writeInt16LE(s, 0);
740
- tmp.writeInt16LE(s, 2);
741
- const out = this._downsample16kTo8k(tmp);
742
- this.firPendingSample = null;
743
- return out;
744
- }
745
- if (this.srcRate === 8e3 && this.dstRate === 16e3 && this.upsampleHasHistory) {
746
- const out = Buffer.alloc(4);
747
- out.writeInt16LE(this.upsampleLast, 0);
748
- out.writeInt16LE(this.upsampleLast, 2);
749
- this.upsampleHasHistory = false;
750
- this.upsampleLast = 0;
751
- return out;
752
- }
753
- return Buffer.alloc(0);
754
- }
755
- /** Reset all carried state (e.g. at call boundaries). */
756
- reset() {
757
- this.firHistory = new Int16Array(2);
758
- this.firHistoryValid = false;
759
- this.firPendingSample = null;
760
- this.upsampleLast = 0;
761
- this.upsampleHasHistory = false;
762
- this.resample24Last = 0;
763
- this.resample24Phase = 0;
764
- this.resample24HasHistory = false;
765
- this.carry.reset();
766
- }
767
- // ---------------------------------------------------------------------------
768
- // Private: 16 kHz → 8 kHz
769
- // ---------------------------------------------------------------------------
770
- /**
771
- * 2:1 decimation with a 5-tap binomial FIR anti-alias filter.
772
- *
773
- * FIR coefficients: [1, 4, 6, 4, 1] / 16 (cutoff ~Fs/4 = 4 kHz).
774
- *
775
- * Cross-chunk state:
776
- * - `firHistory[0]` = s_{-2}, `firHistory[1]` = s_{-1} relative to the
777
- * virtual stream (seeded to first-sample on the very first call).
778
- * - `firPendingSample` = a lone input sample carried from a chunk whose
779
- * sample count was odd; it will become the first input of the next chunk.
780
- *
781
- * Decimation: outputs are at even positions (0, 2, 4 …) in the virtual
782
- * extended stream, so every 2 input samples yield 1 output. An odd-sample-
783
- * count chunk leaves 1 sample in `firPendingSample`; the next chunk
784
- * prepends it so the output cadence is unbroken.
785
- */
786
- _downsample16kTo8k(buf) {
787
- const newSampleCount = buf.length >> 1;
788
- const hasPending = this.firPendingSample !== null;
789
- const totalInput = newSampleCount + (hasPending ? 1 : 0);
790
- const input = new Int16Array(totalInput);
791
- if (hasPending) {
792
- input[0] = this.firPendingSample;
793
- for (let j = 0; j < newSampleCount; j++) input[j + 1] = buf.readInt16LE(j * 2);
794
- } else {
795
- for (let j = 0; j < newSampleCount; j++) input[j] = buf.readInt16LE(j * 2);
796
- }
797
- this.firPendingSample = null;
798
- if (totalInput === 0) return Buffer.alloc(0);
799
- if (!this.firHistoryValid) {
800
- this.firHistory[0] = 0;
801
- this.firHistory[1] = 0;
802
- this.firHistoryValid = true;
803
- }
804
- const extended = new Int16Array(totalInput + 2);
805
- extended[0] = this.firHistory[0];
806
- extended[1] = this.firHistory[1];
807
- for (let j = 0; j < totalInput; j++) extended[j + 2] = input[j];
808
- const outSamples = totalInput >> 1;
809
- const out = Buffer.alloc(outSamples * 2);
810
- for (let i = 0; i < outSamples; i++) {
811
- const c = 2 + i * 2;
812
- const sM2 = extended[c - 2];
813
- const sM1 = extended[c - 1];
814
- const s0 = extended[c];
815
- const sP1 = c + 1 < extended.length ? extended[c + 1] : extended[extended.length - 1];
816
- const sP2 = c + 2 < extended.length ? extended[c + 2] : extended[extended.length - 1];
817
- const filtered = sM2 + 4 * sM1 + 6 * s0 + 4 * sP1 + sP2 + 8 >> 4;
818
- out.writeInt16LE(Math.max(-32768, Math.min(32767, filtered)), i * 2);
819
- }
820
- if (totalInput % 2 === 1) {
821
- this.firPendingSample = input[totalInput - 1];
822
- }
823
- if (totalInput >= 2) {
824
- this.firHistory[0] = input[totalInput - 2];
825
- this.firHistory[1] = input[totalInput - 1];
826
- } else {
827
- this.firHistory[0] = this.firHistory[1];
828
- this.firHistory[1] = input[0];
829
- }
830
- return out;
831
- }
832
- // ---------------------------------------------------------------------------
833
- // Private: 8 kHz → 16 kHz
834
- // ---------------------------------------------------------------------------
835
- /**
836
- * 1:2 linear-interpolation upsampler.
837
- *
838
- * For the first chunk (no history): emits 2*(N-1) samples and defers the
839
- * last sample. For subsequent chunks (with history): emits the deferred
840
- * sample + its interpolated midpoint THEN 2*(N-1) samples from the new
841
- * chunk, deferring the new last sample. Total across K chunks + flush =
842
- * 2*total_input_samples (correct output length).
843
- *
844
- * Call flush() after the final chunk to emit the last deferred sample
845
- * pair (self-duplicate at end of stream).
846
- */
847
- _upsample8kTo16k(buf) {
848
- const sampleCount = buf.length >> 1;
849
- if (sampleCount === 0) return Buffer.alloc(0);
850
- const outArr = [];
851
- if (this.upsampleHasHistory) {
852
- const next = buf.readInt16LE(0);
853
- outArr.push(this.upsampleLast);
854
- outArr.push(Math.round((this.upsampleLast + next) / 2));
855
- }
856
- for (let i = 0; i < sampleCount - 1; i++) {
857
- const s0 = buf.readInt16LE(i * 2);
858
- const s1 = buf.readInt16LE((i + 1) * 2);
859
- outArr.push(s0);
860
- outArr.push(Math.round((s0 + s1) / 2));
861
- }
862
- this.upsampleLast = buf.readInt16LE((sampleCount - 1) * 2);
863
- this.upsampleHasHistory = true;
864
- const outBuf = Buffer.alloc(outArr.length * 2);
865
- for (let j = 0; j < outArr.length; j++) outBuf.writeInt16LE(outArr[j], j * 2);
866
- return outBuf;
867
- }
868
- // ---------------------------------------------------------------------------
869
- // Private: 24 kHz → 16 kHz / 8 kHz
870
- // ---------------------------------------------------------------------------
871
- /**
872
- * 3:2 linear-interpolation decimator (ratio srcRate/dstRate = 1.5).
873
- *
874
- * `resample24Phase` tracks the fractional input position of the next output
875
- * sample relative to the START of the next chunk. Negative phase means the
876
- * next output straddles the previous/current chunk boundary; those are
877
- * handled using `resample24Last`.
878
- */
879
- _resample24kTo16k(buf) {
880
- return this._resample24kStep(buf, 24e3 / 16e3);
881
- }
882
- /** 3:1 decimation — collapses the 24k→16k→8k chain into a single step. */
883
- _resample24kTo8k(buf) {
884
- return this._resample24kStep(buf, 24e3 / 8e3);
885
- }
886
- /** Shared phase-stepping resampler used by 24→16 (step 1.5) and 24→8 (step 3). */
887
- _resample24kStep(buf, step) {
888
- const sampleCount = buf.length >> 1;
889
- if (sampleCount === 0) return Buffer.alloc(0);
890
- const outArr = [];
891
- let phase = this.resample24Phase;
892
- while (true) {
893
- const idx = Math.floor(phase);
894
- if (idx >= sampleCount) break;
895
- const frac = phase - idx;
896
- let s0;
897
- let s1;
898
- if (idx < 0) {
899
- s0 = this.resample24HasHistory ? this.resample24Last : 0;
900
- s1 = buf.readInt16LE(0);
901
- } else {
902
- s0 = buf.readInt16LE(idx * 2);
903
- s1 = idx + 1 < sampleCount ? buf.readInt16LE((idx + 1) * 2) : s0;
904
- }
905
- const interp = Math.round(s0 + (s1 - s0) * frac);
906
- outArr.push(Math.max(-32768, Math.min(32767, interp)));
907
- phase += step;
908
- }
909
- this.resample24Last = buf.readInt16LE((sampleCount - 1) * 2);
910
- this.resample24HasHistory = true;
911
- this.resample24Phase = phase - sampleCount;
912
- const outBuf = Buffer.alloc(outArr.length * 2);
913
- for (let j = 0; j < outArr.length; j++) outBuf.writeInt16LE(outArr[j], j * 2);
914
- return outBuf;
915
- }
916
- };
917
- function createResampler16kTo8k() {
918
- return new StatefulResampler({ srcRate: 16e3, dstRate: 8e3 });
919
- }
920
- function createResampler8kTo16k() {
921
- return new StatefulResampler({ srcRate: 8e3, dstRate: 16e3 });
922
- }
923
- function createResampler24kTo16k() {
924
- return new StatefulResampler({ srcRate: 24e3, dstRate: 16e3 });
925
- }
926
- function createResampler24kTo8k() {
927
- return new StatefulResampler({ srcRate: 24e3, dstRate: 8e3 });
928
- }
929
- var _warnedResample8kTo16k = false;
930
- var _warnedResample16kTo8k = false;
931
- var _warnedResample24kTo16k = false;
932
- function resample8kTo16k(pcm8k) {
933
- if (!_warnedResample8kTo16k) {
934
- _warnedResample8kTo16k = true;
935
- getLogger().warn(
936
- "[patter] resample8kTo16k() is deprecated. Use createResampler8kTo16k() (StatefulResampler) to eliminate chunk-boundary discontinuities."
937
- );
938
- }
939
- if (pcm8k.length === 0) return Buffer.alloc(0);
940
- const r = createResampler8kTo16k();
941
- const main = r.process(pcm8k);
942
- const tail = r.flush();
943
- return tail.length > 0 ? Buffer.concat([main, tail]) : main;
944
- }
945
- function resample16kTo8k(pcm16k) {
946
- if (!_warnedResample16kTo8k) {
947
- _warnedResample16kTo8k = true;
948
- getLogger().warn(
949
- "[patter] resample16kTo8k() is deprecated. Use createResampler16kTo8k() (StatefulResampler) to eliminate chunk-boundary discontinuities."
950
- );
951
- }
952
- if (pcm16k.length === 0) return Buffer.alloc(0);
953
- const r = createResampler16kTo8k();
954
- const out = r.process(pcm16k);
955
- const tail = r.flush();
956
- return tail.length > 0 ? Buffer.concat([out, tail]) : out;
957
- }
958
- function resample24kTo16k(pcm24k) {
959
- if (!_warnedResample24kTo16k) {
960
- _warnedResample24kTo16k = true;
961
- getLogger().warn(
962
- "[patter] resample24kTo16k() is deprecated. Use createResampler24kTo16k() (StatefulResampler) or OpenAITTS.resampleStreaming for anti-aliased resampling."
963
- );
964
- }
965
- if (pcm24k.length === 0) return Buffer.alloc(0);
966
- const sampleCount = Math.floor(pcm24k.length / 2);
967
- const outSamples = Math.floor(sampleCount * 2 / 3);
968
- const out = Buffer.alloc(outSamples * 2);
969
- for (let i = 0; i < outSamples; i++) {
970
- const pos = i * 1.5;
971
- const idx = Math.floor(pos);
972
- const frac = pos - idx;
973
- const s0 = pcm24k.readInt16LE(idx * 2);
974
- const s1 = idx + 1 < sampleCount ? pcm24k.readInt16LE((idx + 1) * 2) : s0;
975
- const interp = Math.round(s0 + (s1 - s0) * frac);
976
- out.writeInt16LE(Math.max(-32768, Math.min(32767, interp)), i * 2);
977
- }
978
- return out;
979
- }
980
-
981
- // src/providers/openai-realtime-2.ts
982
- var GA_TO_V1_EVENT_NAMES = {
983
- "response.output_audio.delta": "response.audio.delta",
984
- "response.output_audio.done": "response.audio.done",
985
- "response.output_audio_transcript.delta": "response.audio_transcript.delta",
986
- "response.output_audio_transcript.done": "response.audio_transcript.done"
987
- };
988
- var OpenAIRealtime2Adapter = class extends OpenAIRealtimeAdapter {
989
- /** Two-stage outbound resampler for 24 kHz → 8 kHz. Created lazily on
990
- * the first audio frame so each Realtime session has its own state.
991
- *
992
- * We chain `24k → 16k → 8k` instead of using the direct `24k → 8k`
993
- * variant of {@link StatefulResampler}: the direct path is a 3:1
994
- * decimation with linear interpolation only — no anti-alias filter
995
- * — so any energy above 4 kHz in the source aliases down into the
996
- * audible band and is heard as raspy/scratchy artefacts on speech.
997
- * `gpt-realtime-2` outputs voice with significant content above
998
- * 4 kHz. The second stage (16k → 8k) uses a 5-tap FIR anti-alias
999
- * filter which removes the offending band before decimation, and
1000
- * empirically (see commit message) the chain produces audibly
1001
- * cleaner output. The 24k → 16k step is still pure linear-interp
1002
- * but the inputs to it stay below the Nyquist of the 16 kHz stage,
1003
- * so it doesn't introduce new artefacts.
1004
- */
1005
- outboundResampler24To16 = null;
1006
- outboundResampler16To8 = null;
1007
- /** Last 8 kHz input sample carried across chunk boundaries for the
1008
- * direct 3× linear upsample (see `transcodeInboundMulaw8ToPcm24`).
1009
- * The carry guarantees the very first output of each chunk
1010
- * interpolates from the *real* preceding sample, not from the chunk's
1011
- * own first sample replicated — without it every 20 ms Twilio frame
1012
- * boundary becomes a small DC step that the GA server VAD interprets
1013
- * as constant low-energy noise, which never crosses the speech
1014
- * threshold. */
1015
- inbound8kCarry = null;
1016
- /** GA-shape `session.update` payload. See module-level docstring. */
1017
- buildGASessionConfig() {
1018
- const opts = this.options;
1019
- const fmt = { type: "audio/pcm", rate: 24e3 };
1020
- const config = {
1021
- type: "realtime",
1022
- output_modalities: opts.modalities ?? ["audio"],
1023
- audio: {
1024
- input: {
1025
- format: fmt,
1026
- transcription: {
1027
- model: opts.inputAudioTranscriptionModel ?? OpenAITranscriptionModel.WHISPER_1
1028
- },
1029
- // Lower threshold (0.3 vs the 0.5 default) because the inbound
1030
- // audio is telephony-band (8 kHz) linearly upsampled to 24 kHz —
1031
- // the upper 4-12 kHz band is interpolation, not real harmonics,
1032
- // and the GA server VAD's default tuning was calibrated against
1033
- // studio-quality 24 kHz audio. A more permissive threshold
1034
- // recovers reliable speech detection on phone-band input.
1035
- turn_detection: {
1036
- type: opts.vadType ?? OpenAIRealtimeVADType.SERVER_VAD,
1037
- threshold: 0.1,
1038
- prefix_padding_ms: 300,
1039
- silence_duration_ms: opts.silenceDurationMs ?? 500
1040
- }
1041
- },
1042
- output: {
1043
- format: fmt,
1044
- voice: this.voice
1045
- }
1046
- },
1047
- instructions: this.instructions || "You are a helpful voice assistant. Be concise."
1048
- };
1049
- if (opts.temperature !== void 0) config.temperature = opts.temperature;
1050
- if (opts.maxResponseOutputTokens !== void 0) {
1051
- config.max_output_tokens = opts.maxResponseOutputTokens;
1052
- }
1053
- if (opts.toolChoice !== void 0) config.tool_choice = opts.toolChoice;
1054
- if (opts.reasoningEffort !== void 0) {
1055
- config.reasoning = { effort: opts.reasoningEffort };
1056
- }
1057
- if (this.tools?.length) {
1058
- config.tools = this.tools.map((t) => {
1059
- const def = {
1060
- type: "function",
1061
- name: t.name,
1062
- description: t.description,
1063
- parameters: t.parameters
1064
- };
1065
- if (t.strict === true) def.strict = true;
1066
- return def;
1067
- });
1068
- }
1069
- return config;
1070
- }
1071
- /**
1072
- * Open the Realtime WebSocket against the GA endpoint and apply the GA
1073
- * session configuration. Header `OpenAI-Beta: realtime=v1` is OMITTED
1074
- * (the GA endpoint rejects it). Wire shape uses nested `audio.{input,
1075
- * output}` + `output_modalities` + `session.type === "realtime"`.
1076
- */
1077
- async connect() {
1078
- const url = `wss://api.openai.com/v1/realtime?model=${encodeURIComponent(this.model)}`;
1079
- this.ws = new WebSocket2(url, {
1080
- headers: { Authorization: `Bearer ${this.apiKey}` }
1081
- });
1082
- const wsRef = this.ws;
1083
- const originalOn = wsRef.on.bind(this.ws);
1084
- wsRef.on = (event, handler) => {
1085
- if (event !== "message") return originalOn(event, handler);
1086
- const wrapped = (raw, ...rest) => {
1087
- try {
1088
- const text = typeof raw === "string" ? raw : raw.toString();
1089
- const parsed = JSON.parse(text);
1090
- const t = parsed.type;
1091
- if (t && t in GA_TO_V1_EVENT_NAMES) {
1092
- const newType = GA_TO_V1_EVENT_NAMES[t];
1093
- if (t === "response.output_audio.delta" && typeof parsed.delta === "string") {
1094
- const mulaw = this.transcodeOutboundPcm24ToMulaw8Buffer(parsed.delta);
1095
- const FRAME_BYTES = 160;
1096
- if (mulaw.length === 0) return;
1097
- for (let off = 0; off < mulaw.length; off += FRAME_BYTES) {
1098
- const slice = mulaw.subarray(off, Math.min(off + FRAME_BYTES, mulaw.length));
1099
- const frame = { ...parsed, type: newType, delta: slice.toString("base64") };
1100
- handler(Buffer.from(JSON.stringify(frame)), ...rest);
1101
- }
1102
- return;
1103
- }
1104
- parsed.type = newType;
1105
- handler(Buffer.from(JSON.stringify(parsed)), ...rest);
1106
- return;
1107
- }
1108
- } catch {
1109
- }
1110
- handler(raw, ...rest);
1111
- };
1112
- return originalOn(event, wrapped);
1113
- };
1114
- await new Promise((resolve, reject) => {
1115
- let sessionCreated = false;
1116
- let settled = false;
1117
- const ws = this.ws;
1118
- const onSetupMessage = (raw) => {
1119
- let msg;
1120
- try {
1121
- msg = JSON.parse(raw.toString());
1122
- } catch (e) {
1123
- getLogger().warn(`OpenAI Realtime 2: failed to parse message: ${String(e)}`);
1124
- return;
1125
- }
1126
- if (msg.type === "session.created" && !sessionCreated) {
1127
- sessionCreated = true;
1128
- ws.send(JSON.stringify({ type: "session.update", session: this.buildGASessionConfig() }));
1129
- } else if (msg.type === "session.updated") {
1130
- cleanup();
1131
- resolve();
1132
- } else if (msg.type === "error") {
1133
- cleanup();
1134
- try {
1135
- ws.close();
1136
- } catch {
1137
- }
1138
- reject(new Error(`OpenAI Realtime 2 setup error: ${msg.error?.message ?? JSON.stringify(msg)}`));
1139
- }
1140
- };
1141
- const onSetupError = (err) => {
1142
- cleanup();
1143
- try {
1144
- ws.close();
1145
- } catch {
1146
- }
1147
- reject(err);
1148
- };
1149
- const cleanup = () => {
1150
- if (settled) return;
1151
- settled = true;
1152
- clearTimeout(timer);
1153
- ws.off("message", onSetupMessage);
1154
- ws.off("error", onSetupError);
1155
- };
1156
- const timer = setTimeout(() => {
1157
- cleanup();
1158
- try {
1159
- ws.close();
1160
- } catch {
1161
- }
1162
- reject(new Error("OpenAI Realtime 2 connect timeout"));
1163
- }, 15e3);
1164
- ws.on("message", onSetupMessage);
1165
- ws.on("error", onSetupError);
1166
- });
1167
- this.armHeartbeatAndListener();
1168
- }
1169
- /**
1170
- * GA-API variant of {@link OpenAIRealtimeAdapter.sendFirstMessage}. Two
1171
- * differences from the v1 path:
1172
- *
1173
- * 1. The v1 implementation sends `response.modalities` which the GA
1174
- * endpoint rejects with `Unknown parameter: 'response.modalities'`.
1175
- * Use `output_modalities` to match the GA `session.update` shape.
1176
- *
1177
- * 2. The GA `response.create` does NOT inherit `audio.output.voice`
1178
- * from the session — it falls back to the server-side default
1179
- * (`marin`, female) when the field is omitted on the response
1180
- * itself. Session-level `voice: "alloy"` only affects subsequent
1181
- * server-VAD-triggered responses, NOT this explicit
1182
- * `response.create`. We re-inject the configured voice here so the
1183
- * first-message voice matches the rest of the call.
1184
- */
1185
- /**
1186
- * Override the parent `sendAudio` to transcode inbound carrier audio
1187
- * (mulaw 8 kHz from Twilio/Telnyx) into PCM-16 24 kHz before sending
1188
- * `input_audio_buffer.append`. The GA server's audio engine ignores
1189
- * mulaw frames (commit returns "buffer only has 0.00ms of audio") even
1190
- * though it accepts `audio/pcmu` at the protocol level.
1191
- */
1192
- sendAudio(mulawAudio) {
1193
- if (!this.ws || this.ws.readyState !== this.ws.OPEN) return;
1194
- const pcm24k = this.transcodeInboundMulaw8ToPcm24(mulawAudio);
1195
- this.ws.send(JSON.stringify({
1196
- type: "input_audio_buffer.append",
1197
- audio: pcm24k.toString("base64")
1198
- }));
1199
- }
1200
- /**
1201
- * mulaw 8 kHz Buffer → PCM-16-LE 24 kHz Buffer.
1202
- *
1203
- * Direct 3× linear-interpolation upsample with a one-sample carry
1204
- * across chunk boundaries. For every consecutive pair of 8 kHz
1205
- * samples `(s_a, s_b)` we emit three 24 kHz samples:
1206
- *
1207
- * out_0 = s_a
1208
- * out_1 = 2/3·s_a + 1/3·s_b
1209
- * out_2 = 1/3·s_a + 2/3·s_b
1210
- *
1211
- * The carry stores the last 8 kHz sample of the chunk so the next
1212
- * chunk can start by pairing `(carry, firstNewSample)` — that's what
1213
- * keeps the output rate exact (each input sample → 3 output samples)
1214
- * and eliminates the chunk-boundary DC step that confused the GA
1215
- * server VAD. The first chunk has no carry and loses 3 samples at
1216
- * the leading edge (375 µs of audio); that's well below any audible
1217
- * artefact and well below the GA VAD's 300 ms prefix-padding window.
1218
- */
1219
- transcodeInboundMulaw8ToPcm24(mulaw) {
1220
- const pcm8 = mulawToPcm16(mulaw);
1221
- const samples8 = pcm8.length / 2;
1222
- if (samples8 === 0) return Buffer.alloc(0);
1223
- const GAIN = 2;
1224
- const inputs = [];
1225
- if (this.inbound8kCarry !== null) inputs.push(this.inbound8kCarry);
1226
- for (let i = 0; i < samples8; i++) {
1227
- const raw = pcm8.readInt16LE(i * 2) * GAIN;
1228
- inputs.push(Math.max(-32768, Math.min(32767, raw)));
1229
- }
1230
- this.inbound8kCarry = inputs[inputs.length - 1];
1231
- const numPairs = inputs.length - 1;
1232
- if (numPairs <= 0) return Buffer.alloc(0);
1233
- const out = Buffer.allocUnsafe(numPairs * 3 * 2);
1234
- for (let i = 0; i < numPairs; i++) {
1235
- const s0 = inputs[i];
1236
- const s1 = inputs[i + 1];
1237
- out.writeInt16LE(s0, i * 6);
1238
- out.writeInt16LE(Math.round((s0 * 2 + s1) / 3), i * 6 + 2);
1239
- out.writeInt16LE(Math.round((s0 + s1 * 2) / 3), i * 6 + 4);
1240
- }
1241
- return out;
1242
- }
1243
- /**
1244
- * Base64 PCM-16-LE 24 kHz → Base64 mulaw 8 kHz. Used by the WS
1245
- * translation shim on each `response.output_audio.delta`. The stateful
1246
- * resampler is created lazily and reused across all deltas in this
1247
- * session so the 3:1 decimator's phase carries across chunk
1248
- * boundaries — without that, every chunk boundary produces a click.
1249
- */
1250
- transcodeOutboundPcm24ToMulaw8Buffer(deltaB64) {
1251
- if (!this.outboundResampler24To16) {
1252
- this.outboundResampler24To16 = new StatefulResampler({ srcRate: 24e3, dstRate: 16e3 });
1253
- this.outboundResampler16To8 = new StatefulResampler({ srcRate: 16e3, dstRate: 8e3 });
1254
- }
1255
- const pcm24 = Buffer.from(deltaB64, "base64");
1256
- const pcm16 = this.outboundResampler24To16.process(pcm24);
1257
- const pcm8 = this.outboundResampler16To8.process(pcm16);
1258
- if (pcm8.length === 0) return Buffer.alloc(0);
1259
- return pcm16ToMulaw(pcm8);
1260
- }
1261
- async sendFirstMessage(text) {
1262
- this.ws?.send(JSON.stringify({
1263
- type: "response.create",
1264
- response: {
1265
- output_modalities: ["audio"],
1266
- audio: { output: { voice: this.voice } },
1267
- reasoning: { effort: "minimal" },
1268
- instructions: `Say exactly the following sentence as your first turn and nothing else: "${text}"`
1269
- }
1270
- }));
1271
- }
1272
- };
1273
-
1274
32
  // src/providers/elevenlabs-convai.ts
1275
33
  init_esm_shims();
1276
- import WebSocket3 from "ws";
34
+ import WebSocket from "ws";
1277
35
  var ELEVENLABS_CONVAI_URL = "wss://api.elevenlabs.io/v1/convai/conversation";
1278
36
  var ELEVENLABS_SIGNED_URL = "https://api.elevenlabs.io/v1/convai/conversation/get-signed-url";
1279
37
  var AGENT_SILENCE_MS = 500;
@@ -1395,8 +153,8 @@ var ElevenLabsConvAIAdapter = class _ElevenLabsConvAIAdapter {
1395
153
  wsUrl = this.agentId ? `${ELEVENLABS_CONVAI_URL}?agent_id=${encodeURIComponent(this.agentId)}` : ELEVENLABS_CONVAI_URL;
1396
154
  wsOptions = { headers: { "xi-api-key": this.apiKey } };
1397
155
  }
1398
- this.ws = new WebSocket3(wsUrl, wsOptions);
1399
- await new Promise((resolve, reject) => {
156
+ this.ws = new WebSocket(wsUrl, wsOptions);
157
+ await new Promise((resolve2, reject) => {
1400
158
  const timeout = setTimeout(
1401
159
  () => reject(new Error("ElevenLabs ConvAI connect timeout")),
1402
160
  15e3
@@ -1420,7 +178,7 @@ var ElevenLabsConvAIAdapter = class _ElevenLabsConvAIAdapter {
1420
178
  conversation_config_override: override
1421
179
  };
1422
180
  this.ws.send(JSON.stringify(config));
1423
- resolve();
181
+ resolve2();
1424
182
  });
1425
183
  this.ws.once("error", (err) => {
1426
184
  clearTimeout(timeout);
@@ -1457,7 +215,7 @@ var ElevenLabsConvAIAdapter = class _ElevenLabsConvAIAdapter {
1457
215
  }
1458
216
  respondToPing(eventId, delayMs) {
1459
217
  const send = () => {
1460
- if (!this.ws || this.ws.readyState !== WebSocket3.OPEN) return;
218
+ if (!this.ws || this.ws.readyState !== WebSocket.OPEN) return;
1461
219
  try {
1462
220
  this.ws.send(JSON.stringify({ type: "pong", event_id: eventId }));
1463
221
  } catch (err) {
@@ -1554,7 +312,7 @@ var ElevenLabsConvAIAdapter = class _ElevenLabsConvAIAdapter {
1554
312
  }
1555
313
  /** Send a caller-side audio chunk to ConvAI as a base64 `user_audio_chunk`. */
1556
314
  sendAudio(audioBytes) {
1557
- if (!this.ws || this.ws.readyState !== WebSocket3.OPEN) return;
315
+ if (!this.ws || this.ws.readyState !== WebSocket.OPEN) return;
1558
316
  this.ws.send(
1559
317
  JSON.stringify({
1560
318
  user_audio_chunk: audioBytes.toString("base64")
@@ -1577,20 +335,20 @@ var ElevenLabsConvAIAdapter = class _ElevenLabsConvAIAdapter {
1577
335
  return;
1578
336
  }
1579
337
  const ws = this.ws;
1580
- this.closePromise = new Promise((resolve) => {
1581
- if (ws.readyState === WebSocket3.CLOSED || ws.readyState === WebSocket3.CLOSING) {
1582
- resolve();
338
+ this.closePromise = new Promise((resolve2) => {
339
+ if (ws.readyState === WebSocket.CLOSED || ws.readyState === WebSocket.CLOSING) {
340
+ resolve2();
1583
341
  return;
1584
342
  }
1585
343
  const done = () => {
1586
- resolve();
344
+ resolve2();
1587
345
  };
1588
346
  ws.once("close", done);
1589
347
  ws.once("error", done);
1590
348
  try {
1591
349
  ws.close();
1592
350
  } catch {
1593
- resolve();
351
+ resolve2();
1594
352
  }
1595
353
  });
1596
354
  try {
@@ -1614,6 +372,8 @@ async function createTTS(agent) {
1614
372
 
1615
373
  // src/pricing.ts
1616
374
  init_esm_shims();
375
+ var PRICING_VERSION = "2026.3";
376
+ var PRICING_LAST_UPDATED = "2026-05-08";
1617
377
  var PricingUnit = {
1618
378
  MINUTE: "minute",
1619
379
  THOUSAND_CHARS: "1k_chars",
@@ -2063,8 +823,28 @@ function calculateTelephonyCost(provider2, durationSeconds, pricing) {
2063
823
  // src/dashboard/store.ts
2064
824
  init_esm_shims();
2065
825
  import { EventEmitter } from "events";
826
+ import * as fs2 from "fs";
827
+ import * as path2 from "path";
828
+
829
+ // src/version.ts
830
+ init_esm_shims();
2066
831
  import * as fs from "fs";
2067
832
  import * as path from "path";
833
+ function readVersion() {
834
+ try {
835
+ const pkgPath = path.resolve(__dirname, "..", "package.json");
836
+ const pkg = JSON.parse(fs.readFileSync(pkgPath, "utf8"));
837
+ return typeof pkg.version === "string" && pkg.version.length > 0 ? pkg.version : "";
838
+ } catch {
839
+ return "";
840
+ }
841
+ }
842
+ var VERSION = readVersion();
843
+
844
+ // src/dashboard/store.ts
845
+ function sdkVersion() {
846
+ return VERSION;
847
+ }
2068
848
  var MetricsStore = class extends EventEmitter {
2069
849
  maxCalls;
2070
850
  calls = [];
@@ -2347,15 +1127,15 @@ var MetricsStore = class extends EventEmitter {
2347
1127
  persistDeletedIds() {
2348
1128
  if (this.deletedIdsPath === null) return;
2349
1129
  try {
2350
- const dir = path.dirname(this.deletedIdsPath);
2351
- fs.mkdirSync(dir, { recursive: true });
1130
+ const dir = path2.dirname(this.deletedIdsPath);
1131
+ fs2.mkdirSync(dir, { recursive: true });
2352
1132
  const tmp = this.deletedIdsPath + ".tmp";
2353
1133
  const payload = {
2354
1134
  version: 1,
2355
1135
  deleted_call_ids: Array.from(this.deletedCallIds).sort()
2356
1136
  };
2357
- fs.writeFileSync(tmp, JSON.stringify(payload, null, 2), "utf8");
2358
- fs.renameSync(tmp, this.deletedIdsPath);
1137
+ fs2.writeFileSync(tmp, JSON.stringify(payload, null, 2), "utf8");
1138
+ fs2.renameSync(tmp, this.deletedIdsPath);
2359
1139
  } catch (err) {
2360
1140
  getLogger().debug(
2361
1141
  `MetricsStore.persistDeletedIds: ${String(err)}`
@@ -2388,7 +1168,8 @@ var MetricsStore = class extends EventEmitter {
2388
1168
  avg_duration: 0,
2389
1169
  avg_latency_ms: 0,
2390
1170
  cost_breakdown: { stt: 0, tts: 0, llm: 0, telephony: 0 },
2391
- active_calls: this.activeCalls.size
1171
+ active_calls: this.activeCalls.size,
1172
+ sdk_version: sdkVersion()
2392
1173
  };
2393
1174
  }
2394
1175
  let totalCost = 0;
@@ -2427,7 +1208,8 @@ var MetricsStore = class extends EventEmitter {
2427
1208
  llm: Math.round(costLlm * 1e6) / 1e6,
2428
1209
  telephony: Math.round(costTel * 1e6) / 1e6
2429
1210
  },
2430
- active_calls: this.activeCalls.size
1211
+ active_calls: this.activeCalls.size,
1212
+ sdk_version: sdkVersion()
2431
1213
  };
2432
1214
  }
2433
1215
  /**
@@ -2463,11 +1245,11 @@ var MetricsStore = class extends EventEmitter {
2463
1245
  */
2464
1246
  hydrate(logRoot) {
2465
1247
  if (!logRoot) return 0;
2466
- const deletedIdsPath = path.join(logRoot, ".deleted_call_ids.json");
1248
+ const deletedIdsPath = path2.join(logRoot, ".deleted_call_ids.json");
2467
1249
  this.deletedIdsPath = deletedIdsPath;
2468
- if (fs.existsSync(deletedIdsPath)) {
1250
+ if (fs2.existsSync(deletedIdsPath)) {
2469
1251
  try {
2470
- const raw = fs.readFileSync(deletedIdsPath, "utf8");
1252
+ const raw = fs2.readFileSync(deletedIdsPath, "utf8");
2471
1253
  const payload = JSON.parse(raw);
2472
1254
  const arr = Array.isArray(payload.deleted_call_ids) ? payload.deleted_call_ids : [];
2473
1255
  for (const cid of arr) {
@@ -2481,19 +1263,19 @@ var MetricsStore = class extends EventEmitter {
2481
1263
  );
2482
1264
  }
2483
1265
  }
2484
- const callsRoot = path.join(logRoot, "calls");
2485
- if (!fs.existsSync(callsRoot)) return 0;
1266
+ const callsRoot = path2.join(logRoot, "calls");
1267
+ if (!fs2.existsSync(callsRoot)) return 0;
2486
1268
  const collected = [];
2487
1269
  const seen = new Set(this.calls.map((c) => c.call_id));
2488
1270
  const walk = (dir, depth) => {
2489
1271
  let entries;
2490
1272
  try {
2491
- entries = fs.readdirSync(dir, { withFileTypes: true });
1273
+ entries = fs2.readdirSync(dir, { withFileTypes: true });
2492
1274
  } catch {
2493
1275
  return;
2494
1276
  }
2495
1277
  for (const entry of entries) {
2496
- const childPath = path.join(dir, entry.name);
1278
+ const childPath = path2.join(dir, entry.name);
2497
1279
  if (depth < 3) {
2498
1280
  if (entry.isDirectory() && /^\d+$/.test(entry.name)) {
2499
1281
  walk(childPath, depth + 1);
@@ -2501,10 +1283,10 @@ var MetricsStore = class extends EventEmitter {
2501
1283
  continue;
2502
1284
  }
2503
1285
  if (!entry.isDirectory()) continue;
2504
- const metadataPath = path.join(childPath, "metadata.json");
2505
- if (!fs.existsSync(metadataPath)) continue;
1286
+ const metadataPath = path2.join(childPath, "metadata.json");
1287
+ if (!fs2.existsSync(metadataPath)) continue;
2506
1288
  try {
2507
- const raw = fs.readFileSync(metadataPath, "utf8");
1289
+ const raw = fs2.readFileSync(metadataPath, "utf8");
2508
1290
  const meta = JSON.parse(raw);
2509
1291
  const callId = meta.call_id || entry.name;
2510
1292
  if (!callId || seen.has(callId)) continue;
@@ -2517,7 +1299,7 @@ var MetricsStore = class extends EventEmitter {
2517
1299
  }
2518
1300
  if (!record.transcript || record.transcript.length === 0) {
2519
1301
  const fromJsonl = loadTranscriptJsonl(
2520
- path.join(childPath, "transcript.jsonl")
1302
+ path2.join(childPath, "transcript.jsonl")
2521
1303
  );
2522
1304
  if (fromJsonl.length > 0) record.transcript = fromJsonl;
2523
1305
  }
@@ -2596,8 +1378,8 @@ function metadataToCallRecord(callId, meta) {
2596
1378
  }
2597
1379
  function loadTranscriptJsonl(filePath) {
2598
1380
  try {
2599
- if (!fs.existsSync(filePath)) return [];
2600
- const raw = fs.readFileSync(filePath, "utf8");
1381
+ if (!fs2.existsSync(filePath)) return [];
1382
+ const raw = fs2.readFileSync(filePath, "utf8");
2601
1383
  const lines = raw.split("\n").filter((l) => l.trim().length > 0);
2602
1384
  const out = [];
2603
1385
  for (const line of lines) {
@@ -2731,7 +1513,7 @@ function csvEscape(value) {
2731
1513
 
2732
1514
  // src/dashboard/ui.ts
2733
1515
  init_esm_shims();
2734
- import { readFileSync as readFileSync2 } from "fs";
1516
+ import { readFileSync as readFileSync3 } from "fs";
2735
1517
  import { join as join2, dirname as dirname2 } from "path";
2736
1518
  var FALLBACK_HTML = `<!doctype html>
2737
1519
  <html><head><meta charset="utf-8"><title>Patter dashboard</title></head>
@@ -2748,9 +1530,9 @@ function loadDashboardHtml() {
2748
1530
  join2(here, "dashboard", "ui.html"),
2749
1531
  join2(here, "..", "dashboard", "ui.html")
2750
1532
  ];
2751
- for (const path3 of candidates) {
1533
+ for (const path4 of candidates) {
2752
1534
  try {
2753
- return readFileSync2(path3, "utf8");
1535
+ return readFileSync3(path4, "utf8");
2754
1536
  } catch {
2755
1537
  }
2756
1538
  }
@@ -3047,8 +1829,8 @@ var RemoteMessageHandler = class {
3047
1829
  "WebSocket URL uses unencrypted ws:// \u2014 call transcripts and phone numbers will be sent in plaintext. Use wss:// in production."
3048
1830
  );
3049
1831
  }
3050
- const { WebSocket: WebSocket5 } = await import("ws");
3051
- const ws = new WebSocket5(url);
1832
+ const { WebSocket: WebSocket3 } = await import("ws");
1833
+ const ws = new WebSocket3(url);
3052
1834
  const chunks = [];
3053
1835
  let done = false;
3054
1836
  let error = null;
@@ -3102,10 +1884,10 @@ var RemoteMessageHandler = class {
3102
1884
  }
3103
1885
  });
3104
1886
  try {
3105
- await new Promise((resolve, reject) => {
1887
+ await new Promise((resolve2, reject) => {
3106
1888
  ws.on("open", () => {
3107
1889
  ws.send(JSON.stringify(data));
3108
- resolve();
1890
+ resolve2();
3109
1891
  });
3110
1892
  ws.on("error", (err) => {
3111
1893
  reject(err);
@@ -3115,11 +1897,11 @@ var RemoteMessageHandler = class {
3115
1897
  yield chunks.shift();
3116
1898
  }
3117
1899
  while (!done && !error) {
3118
- const text = await new Promise((resolve) => {
1900
+ const text = await new Promise((resolve2) => {
3119
1901
  if (chunks.length > 0) {
3120
- resolve(chunks.shift());
1902
+ resolve2(chunks.shift());
3121
1903
  } else {
3122
- resolveNext = resolve;
1904
+ resolveNext = resolve2;
3123
1905
  }
3124
1906
  });
3125
1907
  if (text === null) break;
@@ -3146,7 +1928,7 @@ init_esm_shims();
3146
1928
 
3147
1929
  // src/providers/deepgram-stt.ts
3148
1930
  init_esm_shims();
3149
- import WebSocket4 from "ws";
1931
+ import WebSocket2 from "ws";
3150
1932
 
3151
1933
  // src/errors.ts
3152
1934
  init_esm_shims();
@@ -3327,8 +2109,8 @@ var DeepgramSTT = class _DeepgramSTT {
3327
2109
  const url = `${DEEPGRAM_WS_URL}?${params.toString()}`;
3328
2110
  let ws = null;
3329
2111
  try {
3330
- ws = await new Promise((resolve, reject) => {
3331
- const sock = new WebSocket4(url, {
2112
+ ws = await new Promise((resolve2, reject) => {
2113
+ const sock = new WebSocket2(url, {
3332
2114
  headers: { Authorization: `Token ${this.apiKey}` }
3333
2115
  });
3334
2116
  const timer = setTimeout(() => {
@@ -3340,7 +2122,7 @@ var DeepgramSTT = class _DeepgramSTT {
3340
2122
  }, 5e3);
3341
2123
  sock.once("open", () => {
3342
2124
  clearTimeout(timer);
3343
- resolve(sock);
2125
+ resolve2(sock);
3344
2126
  });
3345
2127
  sock.once("error", (err) => {
3346
2128
  clearTimeout(timer);
@@ -3367,11 +2149,11 @@ var DeepgramSTT = class _DeepgramSTT {
3367
2149
  }
3368
2150
  async openSocket() {
3369
2151
  const url = this.buildUrl();
3370
- const ws = new WebSocket4(url, {
2152
+ const ws = new WebSocket2(url, {
3371
2153
  headers: { Authorization: `Token ${this.apiKey}` }
3372
2154
  });
3373
2155
  this.ws = ws;
3374
- await new Promise((resolve, reject) => {
2156
+ await new Promise((resolve2, reject) => {
3375
2157
  let settled = false;
3376
2158
  const settle = (fn) => {
3377
2159
  if (settled) return;
@@ -3383,7 +2165,7 @@ var DeepgramSTT = class _DeepgramSTT {
3383
2165
  () => settle(() => reject(new PatterConnectionError("Deepgram connect timeout"))),
3384
2166
  1e4
3385
2167
  );
3386
- ws.once("open", () => settle(resolve));
2168
+ ws.once("open", () => settle(resolve2));
3387
2169
  ws.once("error", (err) => settle(() => reject(err)));
3388
2170
  ws.once("unexpected-response", (_req, res) => {
3389
2171
  const status = res?.statusCode ?? 0;
@@ -3404,7 +2186,7 @@ var DeepgramSTT = class _DeepgramSTT {
3404
2186
  ws.on("close", (code, reason) => this.handleClose(code, reason.toString()));
3405
2187
  ws.on("error", (err) => this.handleError(err));
3406
2188
  this.keepaliveTimer = setInterval(() => {
3407
- if (this.ws && this.ws.readyState === WebSocket4.OPEN) {
2189
+ if (this.ws && this.ws.readyState === WebSocket2.OPEN) {
3408
2190
  try {
3409
2191
  this.ws.send(JSON.stringify({ type: "KeepAlive" }));
3410
2192
  } catch {
@@ -3523,7 +2305,7 @@ var DeepgramSTT = class _DeepgramSTT {
3523
2305
  }
3524
2306
  /** Send a binary audio chunk to Deepgram for transcription. */
3525
2307
  sendAudio(audio) {
3526
- if (!this.ws || this.ws.readyState !== WebSocket4.OPEN) {
2308
+ if (!this.ws || this.ws.readyState !== WebSocket2.OPEN) {
3527
2309
  this.audioDroppedCount++;
3528
2310
  if (this.audioDroppedCount === 1 || this.audioDroppedCount % 50 === 0) {
3529
2311
  getLogger().info(
@@ -3572,7 +2354,7 @@ var DeepgramSTT = class _DeepgramSTT {
3572
2354
  */
3573
2355
  finalize() {
3574
2356
  const ws = this.ws;
3575
- if (!ws || ws.readyState !== WebSocket4.OPEN) {
2357
+ if (!ws || ws.readyState !== WebSocket2.OPEN) {
3576
2358
  getLogger().info(
3577
2359
  `[DIAG] DeepgramSTT.finalize SKIPPED (ws state=${ws?.readyState ?? "null"})`
3578
2360
  );
@@ -3593,7 +2375,7 @@ var DeepgramSTT = class _DeepgramSTT {
3593
2375
  if (!ws) return;
3594
2376
  this.ws = null;
3595
2377
  const sendSafe = (payload) => {
3596
- if (ws.readyState === WebSocket4.OPEN) {
2378
+ if (ws.readyState === WebSocket2.OPEN) {
3597
2379
  try {
3598
2380
  ws.send(payload);
3599
2381
  } catch {
@@ -3607,7 +2389,7 @@ var DeepgramSTT = class _DeepgramSTT {
3607
2389
  } catch {
3608
2390
  }
3609
2391
  };
3610
- if (ws.readyState !== WebSocket4.OPEN) {
2392
+ if (ws.readyState !== WebSocket2.OPEN) {
3611
2393
  finishClose();
3612
2394
  return;
3613
2395
  }
@@ -3676,6 +2458,21 @@ var CallMetricsAccumulator = class {
3676
2458
  _bargeinStoppedAt = null;
3677
2459
  _turnUserText = "";
3678
2460
  _turnSttAudioSeconds = 0;
2461
+ /**
2462
+ * Guard against the recordTurnInterrupted / recordTurnComplete race.
2463
+ *
2464
+ * A VAD-path barge-in fires ``recordTurnInterrupted`` synchronously
2465
+ * inside ``handleAudioAsync`` while the in-flight pipeline LLM stream
2466
+ * keeps unwinding on its own task. When the LLM stream eventually
2467
+ * exits, the existing pipeline path falls through to
2468
+ * ``recordTurnComplete``, which would push a second turn for the same
2469
+ * logical exchange (this time carrying ``user_text=''`` because the
2470
+ * field was already reset). ``_turnAlreadyClosed`` is flipped by
2471
+ * ``recordTurnInterrupted`` and read by ``recordTurnComplete`` so the
2472
+ * late ``recordTurnComplete`` becomes a no-op until the next
2473
+ * ``startTurn`` re-arms the accumulator.
2474
+ */
2475
+ _turnAlreadyClosed = false;
3679
2476
  // Cumulative usage counters
3680
2477
  _totalSttAudioSeconds = 0;
3681
2478
  _totalTtsCharacters = 0;
@@ -3773,6 +2570,7 @@ var CallMetricsAccumulator = class {
3773
2570
  this._bargeinStoppedAt = null;
3774
2571
  this._turnUserText = "";
3775
2572
  this._turnSttAudioSeconds = 0;
2573
+ this._turnAlreadyClosed = false;
3776
2574
  this._vadStoppedAt = null;
3777
2575
  this._sttFinalAt = null;
3778
2576
  this._turnCommittedAt = null;
@@ -3929,8 +2727,18 @@ var CallMetricsAccumulator = class {
3929
2727
  recordTtsStopped(ts) {
3930
2728
  this._bargeinStoppedAt = ts ?? hrTimeMs();
3931
2729
  }
3932
- /** Close the current turn cleanly and append a `TurnMetrics` record. */
2730
+ /**
2731
+ * Close the current turn cleanly and append a `TurnMetrics` record.
2732
+ *
2733
+ * Returns ``null`` when ``recordTurnInterrupted`` has already closed
2734
+ * the current turn — this protects against the VAD-barge-in /
2735
+ * pipeline-LLM race where both paths try to finalise the same logical
2736
+ * turn and the second would otherwise push a phantom entry with
2737
+ * ``user_text=''``. The caller treats ``null`` as "nothing to emit";
2738
+ * ``emitTurnMetrics`` is already null-safe.
2739
+ */
3933
2740
  recordTurnComplete(agentText) {
2741
+ if (this._turnAlreadyClosed) return null;
3934
2742
  const latency = this._computeTurnLatency();
3935
2743
  const turn = {
3936
2744
  turn_index: this._turns.length,
@@ -3943,13 +2751,23 @@ var CallMetricsAccumulator = class {
3943
2751
  };
3944
2752
  this._turns.push(turn);
3945
2753
  this._resetTurnState();
2754
+ this._turnAlreadyClosed = true;
3946
2755
  this._eventBus?.emit("turn_ended", { callId: this.callId, turn });
3947
2756
  this._eventBus?.emit("metrics_collected", { callId: this.callId, turn });
3948
2757
  return turn;
3949
2758
  }
3950
- /** Close the current turn as interrupted (barge-in) and return the recorded metrics. */
2759
+ /**
2760
+ * Close the current turn as interrupted (barge-in) and return the
2761
+ * recorded metrics. Returns ``null`` when no turn is open, OR when
2762
+ * ``recordTurnComplete`` has already finalised the current turn —
2763
+ * bidirectional parity with the guard at the top of
2764
+ * ``recordTurnComplete``. Prevents an out-of-order interruption (e.g.
2765
+ * a future refactor that reorders the bargein + LLM-unwind paths)
2766
+ * from overwriting a turn that the complete path already emitted.
2767
+ */
3951
2768
  recordTurnInterrupted() {
3952
2769
  if (this._turnStart === null) return null;
2770
+ if (this._turnAlreadyClosed) return null;
3953
2771
  const latency = this._computeTurnLatency();
3954
2772
  const turn = {
3955
2773
  turn_index: this._turns.length,
@@ -3964,6 +2782,7 @@ var CallMetricsAccumulator = class {
3964
2782
  this._eventBus?.emit("turn_ended", { callId: this.callId, turn });
3965
2783
  this._eventBus?.emit("metrics_collected", { callId: this.callId, turn });
3966
2784
  this._resetTurnState();
2785
+ this._turnAlreadyClosed = true;
3967
2786
  this._turnCommittedMono = null;
3968
2787
  this._endpointSignalAt = null;
3969
2788
  return turn;
@@ -5252,7 +4071,23 @@ var HALLUCINATIONS = /* @__PURE__ */ new Set([
5252
4071
  ".",
5253
4072
  "bye",
5254
4073
  "right",
5255
- "cool"
4074
+ "cool",
4075
+ // Whisper YouTube-caption hallucinations
4076
+ "thank you for watching",
4077
+ "thanks for watching",
4078
+ "thank you for watching!",
4079
+ "thanks for watching!",
4080
+ "thank you so much for watching",
4081
+ "thanks for listening",
4082
+ "please subscribe",
4083
+ "subscribe",
4084
+ "music",
4085
+ "[music]",
4086
+ "\u266A",
4087
+ "[no audio]",
4088
+ "[silence]",
4089
+ "[blank_audio]",
4090
+ "(silence)"
5256
4091
  ]);
5257
4092
  var StreamHandler = class _StreamHandler {
5258
4093
  deps;
@@ -5378,13 +4213,17 @@ var StreamHandler = class _StreamHandler {
5378
4213
  * Same as the AEC variant but for deployments where AEC is OFF
5379
4214
  * (default on PSTN — Twilio/Telnyx). Without an adaptive filter to
5380
4215
  * converge, the only justification for a gate is anti-flicker on
5381
- * micro-events (cough, click). 100 ms covers the first PSTN echo
5382
- * round-trip (~40-100 ms) while allowing barge-in from 100 ms into
5383
- * the agent's turn covering nearly all of any response.
5384
- * Previously 250 ms, which blocked barge-in entirely on short (<500 ms)
5385
- * agent responses.
5386
- */
5387
- static MIN_AGENT_SPEAKING_MS_BEFORE_BARGE_IN_NO_AEC = 100;
4216
+ * micro-events (cough, click). Raised 100 500 ms on 2026-05-19
4217
+ * after the 0.6.2 acceptance run showed a phantom VAD speech_start
4218
+ * firing on the very first inbound frame (~500 ms into the call,
4219
+ * which is past a 100 ms gate). The phantom barge-in cancelled the
4220
+ * prewarmed firstMessage, the user heard a clipped (graffiante)
4221
+ * audio fragment, and the SDK left ``_turnAlreadyClosed=true`` so
4222
+ * subsequent ``recordTurnComplete`` calls were no-ops. 500 ms
4223
+ * filters those phantoms while still letting a real interruption
4224
+ * land within half a second of agent onset.
4225
+ */
4226
+ static MIN_AGENT_SPEAKING_MS_BEFORE_BARGE_IN_NO_AEC = 500;
5388
4227
  /** Handle for the pending grace-period timer, so it can be cleared on cleanup. */
5389
4228
  graceTimer = null;
5390
4229
  /**
@@ -5424,30 +4263,12 @@ var StreamHandler = class _StreamHandler {
5424
4263
  * coexist without name collisions even when firstMessage finishes while
5425
4264
  * a Realtime turn is still streaming.
5426
4265
  */
5427
- firstMessageMarkCounter = 0;
5428
- /**
5429
- * Maximum unconfirmed Twilio marks while streaming firstMessage. Each
5430
- * chunk is 40 ms of audio at 16 kHz PCM16, so a window of 3 caps
5431
- * the in-flight queue at ~120 ms. This means a barge-in's
5432
- * ``sendClear`` has at most 120 ms of already-buffered audio to flush
5433
- * — vs. ~2-5 s with the previous burst-send code, which was the
5434
- * root cause of "firstMessage non interrompibile". Higher values
5435
- * smooth playback under jittery RTT (each mark echo adds ~150-250 ms
5436
- * RTT on PSTN) at the cost of longer barge-in latency; lower values
5437
- * risk under-buffering. 3 hit the smallest barge-in cap without
5438
- * audible gaps in 2026-05 acceptance.
5439
- */
5440
- static FIRST_MESSAGE_MARK_WINDOW = 3;
5441
- /**
5442
- * Per-chunk soft timeout (ms) while awaiting a mark echo. Twilio's
5443
- * mark echoes typically arrive within 100-250 ms of audio playback.
5444
- * Capping at 500 ms guards against carriers (or test doubles) that
5445
- * never echo — without it a stalled echo would deadlock the loop and
5446
- * the agent would freeze mid-utterance. On timeout we drop the
5447
- * waiter from the queue and continue: playout may glitch by one
5448
- * chunk but the call stays alive.
5449
- */
5450
- static MARK_AWAIT_TIMEOUT_MS = 500;
4266
+ // firstMessageMarkCounter / FIRST_MESSAGE_MARK_WINDOW /
4267
+ // MARK_AWAIT_TIMEOUT_MS were retired with the move to the Twilio-FIFO-
4268
+ // trusts model (sendPacedFirstMessageBytes no longer emits marks).
4269
+ // Marks are still consumed via ``onMark`` for any adapter that wants
4270
+ // to round-trip one, but the firstMessage path no longer back-pressures
4271
+ // on them.
5451
4272
  /**
5452
4273
  * Minimum drain window (ms) between a ``cancelSpeaking`` and the next
5453
4274
  * ``beginSpeaking``. 150 ms covers a typical PSTN jitter buffer drain
@@ -5512,6 +4333,14 @@ var StreamHandler = class _StreamHandler {
5512
4333
  } catch {
5513
4334
  }
5514
4335
  }
4336
+ const ttsCancelable = this.tts;
4337
+ if (typeof ttsCancelable?.cancelActiveStream === "function") {
4338
+ try {
4339
+ ttsCancelable.cancelActiveStream();
4340
+ } catch (err) {
4341
+ getLogger().debug(`TTS cancelActiveStream raised: ${String(err)}`);
4342
+ }
4343
+ }
5515
4344
  }
5516
4345
  /**
5517
4346
  * Resolve every entry in ``pendingMarks`` and empty the queue. Idempotent
@@ -5528,56 +4357,19 @@ var StreamHandler = class _StreamHandler {
5528
4357
  }
5529
4358
  this.pendingMarks.length = 0;
5530
4359
  }
4360
+ // Mark-based back-pressure (sendMarkAwaitable / waitForMarkWindow)
4361
+ // was removed when sendPacedFirstMessageBytes switched to the
4362
+ // Twilio-FIFO-trusts model — see that method's doc comment for
4363
+ // rationale. ``pendingMarks`` and ``onMark`` are still kept so an
4364
+ // adapter that wants to round-trip a mark for some other purpose can
4365
+ // still do so without breaking the firstMessage path.
5531
4366
  /**
5532
- * Push a Twilio ``mark`` event AFTER the corresponding audio chunk and
5533
- * return a promise that resolves when the mark is echoed back via
5534
- * ``onMark`` (or when ``cancelSpeaking`` drains the queue, or after
5535
- * ``MARK_AWAIT_TIMEOUT_MS``). Returns null on non-Twilio carriers the
5536
- * caller is expected to fall back to time-based pacing in that case.
5537
- */
5538
- sendMarkAwaitable() {
5539
- if (this.deps.bridge.telephonyProvider !== "twilio") return null;
5540
- this.firstMessageMarkCounter += 1;
5541
- const markName = `fm_${this.firstMessageMarkCounter}`;
5542
- let resolve;
5543
- const promise = new Promise((r) => {
5544
- resolve = r;
5545
- });
5546
- this.pendingMarks.push({ name: markName, resolve, promise });
5547
- try {
5548
- this.deps.bridge.sendMark(this.ws, markName, this.streamSid);
5549
- } catch (err) {
5550
- getLogger().debug(`sendMark failed (${markName}): ${String(err)}`);
5551
- const idx = this.pendingMarks.findIndex((m) => m.name === markName);
5552
- if (idx >= 0) this.pendingMarks.splice(idx, 1);
5553
- return Promise.resolve();
5554
- }
5555
- return promise;
5556
- }
5557
- /**
5558
- * If the in-flight mark queue is at or above ``FIRST_MESSAGE_MARK_WINDOW``
5559
- * entries, wait for the oldest entry to clear (mark echoed, agent
5560
- * cancelled, or per-mark timeout). Repeats until the queue depth is
5561
- * within the window — under high RTT the carrier may have several
5562
- * marks queued and we want every loop iteration to be naturally back-
5563
- * pressured by playback.
5564
- */
5565
- async waitForMarkWindow() {
5566
- while (this.isSpeaking && this.pendingMarks.length >= _StreamHandler.FIRST_MESSAGE_MARK_WINDOW) {
5567
- const oldest = this.pendingMarks[0];
5568
- const timeout = new Promise(
5569
- (resolve) => setTimeout(resolve, _StreamHandler.MARK_AWAIT_TIMEOUT_MS)
5570
- );
5571
- await Promise.race([oldest.promise, timeout]);
5572
- if (this.pendingMarks[0] === oldest) {
5573
- this.pendingMarks.shift();
5574
- }
5575
- }
5576
- }
5577
- /**
5578
- * Bytes-per-millisecond for a 16 kHz PCM16 mono stream. Used by the
5579
- * non-Twilio firstMessage pacing path to translate chunk size into a
5580
- * playout-duration sleep. 16000 samples/sec × 2 bytes = 32 bytes/ms.
4367
+ * Bytes-per-millisecond for a 16 kHz PCM16 mono stream. Used by
4368
+ * ``sendPacedFirstMessageBytes`` to translate chunk size into a
4369
+ * playout-duration sleep so we never deliver faster than the carrier
4370
+ * can decode + play out (which manifested as severe crackling on the
4371
+ * HTTP-TTS path with client-side resampling). 16000 samples/sec × 2
4372
+ * bytes/sample = 32 bytes/ms.
5581
4373
  */
5582
4374
  static PCM16_16K_BYTES_PER_MS = 32;
5583
4375
  /** Cancel and clear the pending grace timer, if any. */
@@ -6015,7 +4807,7 @@ var StreamHandler = class _StreamHandler {
6015
4807
  if (activeVad && !this.vadDisabled) {
6016
4808
  try {
6017
4809
  const vadPromise = activeVad.processFrame(pcm16k, 16e3);
6018
- const timeoutPromise = new Promise((resolve) => setTimeout(() => resolve(null), 25));
4810
+ const timeoutPromise = new Promise((resolve2) => setTimeout(() => resolve2(null), 25));
6019
4811
  const evt = await Promise.race([vadPromise, timeoutPromise]);
6020
4812
  if (evt) {
6021
4813
  getLogger().info(
@@ -6151,9 +4943,21 @@ var StreamHandler = class _StreamHandler {
6151
4943
  /** Handle call stop / stream end. */
6152
4944
  /** Handle a carrier-emitted `stop` event signalling the call has ended. */
6153
4945
  async handleStop() {
4946
+ if (this.llmAbort !== null) {
4947
+ try {
4948
+ this.llmAbort.abort();
4949
+ } catch {
4950
+ }
4951
+ }
4952
+ const ttsCancelable = this.tts;
4953
+ if (typeof ttsCancelable?.cancelActiveStream === "function") {
4954
+ try {
4955
+ ttsCancelable.cancelActiveStream();
4956
+ } catch {
4957
+ }
4958
+ }
6154
4959
  this.clearPendingBargeIn();
6155
4960
  this.drainPendingMarks();
6156
- this.firstMessageMarkCounter = 0;
6157
4961
  this.clearGraceTimer();
6158
4962
  this.flushResamplers();
6159
4963
  await this.closeSttOnce();
@@ -6166,9 +4970,21 @@ var StreamHandler = class _StreamHandler {
6166
4970
  /** Handle WebSocket close event. */
6167
4971
  /** Tear down adapter, STT/TTS, and per-call state when the carrier WebSocket closes. */
6168
4972
  async handleWsClose() {
4973
+ if (this.llmAbort !== null) {
4974
+ try {
4975
+ this.llmAbort.abort();
4976
+ } catch {
4977
+ }
4978
+ }
4979
+ const ttsCancelable = this.tts;
4980
+ if (typeof ttsCancelable?.cancelActiveStream === "function") {
4981
+ try {
4982
+ ttsCancelable.cancelActiveStream();
4983
+ } catch {
4984
+ }
4985
+ }
6169
4986
  this.clearPendingBargeIn();
6170
4987
  this.drainPendingMarks();
6171
- this.firstMessageMarkCounter = 0;
6172
4988
  this.clearGraceTimer();
6173
4989
  this.flushResamplers();
6174
4990
  await this.closeSttOnce();
@@ -6207,13 +5023,39 @@ var StreamHandler = class _StreamHandler {
6207
5023
  * Maintains a 1-byte carry across calls so unaligned HTTP chunks from
6208
5024
  * streaming TTS providers never byte-swap the PCM16 samples downstream.
6209
5025
  */
6210
- encodePipelineAudio(pcm16k) {
6211
- const aligned = this.alignPcm16(pcm16k);
5026
+ encodePipelineAudio(audioChunk) {
5027
+ if (this.ttsOutputFormatNativeForCarrier === true) {
5028
+ return audioChunk.toString("base64");
5029
+ }
5030
+ const aligned = this.alignPcm16(audioChunk);
6212
5031
  if (aligned.length === 0) return "";
6213
5032
  const pcm8k = this.outboundResampler.process(aligned);
6214
5033
  const mulaw = pcm16ToMulaw(pcm8k);
6215
5034
  return mulaw.toString("base64");
6216
5035
  }
5036
+ /**
5037
+ * Cached result of ``isTtsOutputFormatNativeForCarrier()`` — settled
5038
+ * once at ``initPipeline`` time after ``setTelephonyCarrier`` has run
5039
+ * on the TTS adapter. Stable for the call lifetime: changes to the
5040
+ * adapter's output format mid-call would NOT flip this. ``true`` means
5041
+ * ``encodePipelineAudio`` can take the bypass path.
5042
+ */
5043
+ ttsOutputFormatNativeForCarrier = false;
5044
+ /**
5045
+ * Probe whether the TTS adapter is configured to emit bytes already in
5046
+ * the carrier's wire codec. Currently: Twilio expects ``ulaw_8000``,
5047
+ * Telnyx expects ``pcm_16000`` (no client transcode in either case if
5048
+ * matched). Anything else takes the resample-and-encode path.
5049
+ */
5050
+ isTtsOutputFormatNativeForCarrier() {
5051
+ if (!this.tts) return false;
5052
+ const fmt = this.tts.outputFormat;
5053
+ if (typeof fmt !== "string") return false;
5054
+ const carrier = this.deps.bridge.telephonyProvider;
5055
+ if (carrier === "twilio") return fmt === "ulaw_8000";
5056
+ if (carrier === "telnyx") return fmt === "pcm_16000";
5057
+ return false;
5058
+ }
6217
5059
  /**
6218
5060
  * Prepend any carry byte from the previous chunk, return the even-length
6219
5061
  * portion, and stash the final odd byte (if any) for the next call.
@@ -6224,17 +5066,11 @@ var StreamHandler = class _StreamHandler {
6224
5066
  this.ttsByteCarry = alignedLen < combined.length ? combined.subarray(alignedLen) : null;
6225
5067
  return combined.subarray(0, alignedLen);
6226
5068
  }
6227
- /**
6228
- * 40 ms @ 16 kHz mono PCM16 = 1280 bytes. Sized to mirror the smallest
6229
- * live-TTS chunk boundary so cancel granularity (mark/clear bookkeeping)
6230
- * is identical regardless of whether the firstMessage came from the
6231
- * prewarm cache or a live ``tts.synthesizeStream`` stream.
6232
- */
6233
- static PREWARM_CHUNK_BYTES = 1280;
6234
5069
  /**
6235
5070
  * Stream a cached firstMessage buffer in pacing-friendly chunks.
6236
5071
  *
6237
- * Splits ``prewarmBytes`` into ``PREWARM_CHUNK_BYTES`` slices and
5072
+ * Splits ``prewarmBytes`` into 20 ms slices (matching Twilio's PSTN
5073
+ * frame quantum) and
6238
5074
  * forwards each through ``deps.bridge.sendAudio`` exactly like the
6239
5075
  * live TTS path does — preserving Twilio mark/clear granularity. A
6240
5076
  * single multi-second sendAudio call would push the whole intro into
@@ -6250,7 +5086,7 @@ var StreamHandler = class _StreamHandler {
6250
5086
  return this.sendPacedFirstMessageBytes(prewarmBytes);
6251
5087
  }
6252
5088
  /**
6253
- * Iterate ``bytes`` as ``PREWARM_CHUNK_BYTES``-sized PCM16 slices and
5089
+ * Iterate ``bytes`` in 20 ms slices (Twilio PSTN frame quantum) and
6254
5090
  * forward each via ``deps.bridge.sendAudio`` with mark-gated pacing
6255
5091
  * (Twilio) or playout-time-based pacing (Telnyx). Caps the carrier-
6256
5092
  * side buffer at ``FIRST_MESSAGE_MARK_WINDOW`` chunks so a barge-in's
@@ -6267,30 +5103,20 @@ var StreamHandler = class _StreamHandler {
6267
5103
  */
6268
5104
  async sendPacedFirstMessageBytes(bytes) {
6269
5105
  if (this.pendingMarks.length > 0) this.drainPendingMarks();
6270
- this.firstMessageMarkCounter = 0;
6271
5106
  let firstChunkSent = false;
6272
- let initialFillComplete = false;
6273
- for (let i = 0; i < bytes.length; i += _StreamHandler.PREWARM_CHUNK_BYTES) {
6274
- if (!this.isSpeaking) break;
6275
- await this.waitForMarkWindow();
5107
+ const PSTN_FRAME_MS = 20;
5108
+ const bytesPerMs = this.ttsOutputFormatNativeForCarrier ? 8 : _StreamHandler.PCM16_16K_BYTES_PER_MS;
5109
+ const sliceBytes = bytesPerMs * PSTN_FRAME_MS;
5110
+ for (let i = 0; i < bytes.length; i += sliceBytes) {
6276
5111
  if (!this.isSpeaking) break;
6277
- const chunk = bytes.subarray(i, i + _StreamHandler.PREWARM_CHUNK_BYTES);
5112
+ const chunk = bytes.subarray(i, i + sliceBytes);
6278
5113
  if (!firstChunkSent) firstChunkSent = true;
6279
- if (this.aec) this.aec.pushFarEnd(chunk);
5114
+ if (this.aec && !this.ttsOutputFormatNativeForCarrier) {
5115
+ this.aec.pushFarEnd(chunk);
5116
+ }
6280
5117
  const encoded = this.encodePipelineAudio(chunk);
6281
5118
  this.deps.bridge.sendAudio(this.ws, encoded, this.streamSid);
6282
5119
  this.markFirstAudioSent();
6283
- const markPromise = this.sendMarkAwaitable();
6284
- if (!initialFillComplete && this.pendingMarks.length >= _StreamHandler.FIRST_MESSAGE_MARK_WINDOW) {
6285
- initialFillComplete = true;
6286
- }
6287
- if (markPromise === null || initialFillComplete) {
6288
- const playoutMs = Math.max(
6289
- 1,
6290
- Math.floor(chunk.length / _StreamHandler.PCM16_16K_BYTES_PER_MS)
6291
- );
6292
- await new Promise((resolve) => setTimeout(resolve, playoutMs));
6293
- }
6294
5120
  }
6295
5121
  return firstChunkSent;
6296
5122
  }
@@ -6310,6 +5136,12 @@ var StreamHandler = class _StreamHandler {
6310
5136
  getLogger().debug(`TTS setTelephonyCarrier failed (${label}): ${String(e)}`);
6311
5137
  }
6312
5138
  }
5139
+ this.ttsOutputFormatNativeForCarrier = this.isTtsOutputFormatNativeForCarrier();
5140
+ if (this.ttsOutputFormatNativeForCarrier) {
5141
+ getLogger().debug(
5142
+ `TTS outputFormat matches ${this.deps.bridge.telephonyProvider} wire codec \u2014 bypassing client-side transcode`
5143
+ );
5144
+ }
6313
5145
  }
6314
5146
  if (!this.stt) {
6315
5147
  getLogger().debug(`Pipeline mode (${label}): no STT configured`);
@@ -6319,7 +5151,7 @@ var StreamHandler = class _StreamHandler {
6319
5151
  }
6320
5152
  if (!this.deps.agent.vad) {
6321
5153
  try {
6322
- const { SileroVAD } = await import("./silero-vad-NSEXI4XS.mjs");
5154
+ const { SileroVAD } = await import("./silero-vad-LNDFGIY7.mjs");
6323
5155
  this.autoVad = await SileroVAD.forPhoneCall();
6324
5156
  getLogger().info(
6325
5157
  `auto-VAD enabled (SileroVAD, phone preset). Pass agent.vad=\u2026 to override.`
@@ -7012,16 +5844,49 @@ var StreamHandler = class _StreamHandler {
7012
5844
  async initRealtimeAdapter(resolvedPrompt) {
7013
5845
  const label = this.deps.bridge.label;
7014
5846
  this.adapter = this.deps.buildAIAdapter(resolvedPrompt);
7015
- try {
7016
- await this.adapter.connect();
7017
- getLogger().debug(`AI adapter connected (${label})`);
7018
- } catch (e) {
7019
- getLogger().error(`AI adapter connect FAILED (${label}):`, e);
5847
+ let parked;
5848
+ if (typeof this.deps.popPrewarmedConnections === "function") {
7020
5849
  try {
7021
- await this.deps.bridge.endCall(this.callId, this.ws);
7022
- } catch {
5850
+ parked = this.deps.popPrewarmedConnections(this.callId);
5851
+ } catch (err) {
5852
+ getLogger().debug(`popPrewarmedConnections raised: ${String(err)}`);
5853
+ }
5854
+ }
5855
+ const parkedRealtimeWs = parked?.openaiRealtime;
5856
+ let adoptOk = false;
5857
+ if (parkedRealtimeWs !== void 0) {
5858
+ const adapterAny = this.adapter;
5859
+ const wsAlive = parkedRealtimeWs.readyState === 1;
5860
+ if (typeof adapterAny?.adoptWebSocket === "function" && wsAlive) {
5861
+ try {
5862
+ adapterAny.adoptWebSocket(parkedRealtimeWs);
5863
+ getLogger().info(
5864
+ `[CONNECT] callId=${this.callId} provider=openai_realtime source=adopted ms=0`
5865
+ );
5866
+ adoptOk = true;
5867
+ } catch (err) {
5868
+ getLogger().debug(`Realtime adoptWebSocket failed: ${String(err)}; falling back`);
5869
+ }
5870
+ }
5871
+ if (!adoptOk) {
5872
+ try {
5873
+ parkedRealtimeWs.close();
5874
+ } catch {
5875
+ }
5876
+ }
5877
+ }
5878
+ if (!adoptOk) {
5879
+ try {
5880
+ await this.adapter.connect();
5881
+ getLogger().debug(`AI adapter connected (${label})`);
5882
+ } catch (e) {
5883
+ getLogger().error(`AI adapter connect FAILED (${label}):`, e);
5884
+ try {
5885
+ await this.deps.bridge.endCall(this.callId, this.ws);
5886
+ } catch {
5887
+ }
5888
+ return;
7023
5889
  }
7024
- return;
7025
5890
  }
7026
5891
  if (this.deps.agent.firstMessage) {
7027
5892
  this.metricsAcc.startTurn();
@@ -7141,8 +6006,21 @@ var StreamHandler = class _StreamHandler {
7141
6006
  await this.emitUserSpeechEnded();
7142
6007
  }
7143
6008
  async onAdapterTranscriptInput(inputText) {
6009
+ const stripped = inputText.trim().toLowerCase();
6010
+ if (HALLUCINATIONS.has(stripped) || stripped === "") {
6011
+ getLogger().debug(
6012
+ `Realtime transcript_input dropped (likely Whisper hallucination on silence/echo): ${sanitizeLogValue(inputText.slice(0, 60))}`
6013
+ );
6014
+ this.userTranscriptPending = false;
6015
+ return;
6016
+ }
7144
6017
  getLogger().debug(`User (${this.deps.bridge.label}): ${sanitizeLogValue(inputText)}`);
7145
6018
  this.history.push({ role: "user", text: inputText, timestamp: Date.now() });
6019
+ if (this.adapter instanceof OpenAIRealtimeAdapter) {
6020
+ void this.adapter.requestResponse().catch(
6021
+ (err) => getLogger().debug(`Realtime requestResponse failed: ${String(err)}`)
6022
+ );
6023
+ }
7146
6024
  if (!this.metricsAcc.turnActive) {
7147
6025
  this.metricsAcc.startTurn();
7148
6026
  this.currentAgentText = "";
@@ -7294,6 +6172,18 @@ var StreamHandler = class _StreamHandler {
7294
6172
  await this.flushAssistantTurn(text);
7295
6173
  }
7296
6174
  async onAdapterSpeechInterrupt() {
6175
+ if (this.adapter instanceof OpenAIRealtimeAdapter) {
6176
+ const startedAt = this.adapter.currentResponseFirstAudioAt;
6177
+ if (startedAt !== null) {
6178
+ const elapsedMs = Date.now() - startedAt;
6179
+ if (elapsedMs < _StreamHandler.MIN_AGENT_SPEAKING_MS_BEFORE_BARGE_IN_NO_AEC) {
6180
+ getLogger().info(
6181
+ `Realtime barge-in suppressed (response < gate, ${elapsedMs}ms)`
6182
+ );
6183
+ return;
6184
+ }
6185
+ }
6186
+ }
7297
6187
  this.deps.bridge.sendClear(this.ws, this.streamSid);
7298
6188
  if (this.adapter instanceof OpenAIRealtimeAdapter) this.adapter.cancelResponse();
7299
6189
  this.metricsAcc.recordTurnInterrupted();
@@ -7529,31 +6419,31 @@ async function queryDeepgramCost(metricsAcc, deepgramKey, deepgramRequestId) {
7529
6419
  // src/services/call-log.ts
7530
6420
  init_esm_shims();
7531
6421
  import * as crypto3 from "crypto";
7532
- import * as fs2 from "fs";
6422
+ import * as fs3 from "fs";
7533
6423
  import { promises as fsp } from "fs";
7534
6424
  import * as os from "os";
7535
- import * as path2 from "path";
6425
+ import * as path3 from "path";
7536
6426
  var SCHEMA_VERSION = "1.0";
7537
6427
  var DEFAULT_RETENTION_DAYS = 30;
7538
6428
  function xdgDataHome() {
7539
- return process.env.XDG_DATA_HOME || path2.join(os.homedir(), ".local", "share");
6429
+ return process.env.XDG_DATA_HOME || path3.join(os.homedir(), ".local", "share");
7540
6430
  }
7541
6431
  function platformDefaultRoot() {
7542
6432
  if (process.platform === "darwin") {
7543
- return path2.join(os.homedir(), "Library", "Application Support", "patter");
6433
+ return path3.join(os.homedir(), "Library", "Application Support", "patter");
7544
6434
  }
7545
6435
  if (process.platform === "win32") {
7546
6436
  const localAppData = process.env.LOCALAPPDATA;
7547
- if (localAppData) return path2.join(localAppData, "patter");
7548
- return path2.join(os.homedir(), "AppData", "Local", "patter");
6437
+ if (localAppData) return path3.join(localAppData, "patter");
6438
+ return path3.join(os.homedir(), "AppData", "Local", "patter");
7549
6439
  }
7550
- return path2.join(xdgDataHome(), "patter");
6440
+ return path3.join(xdgDataHome(), "patter");
7551
6441
  }
7552
6442
  function resolveLogRoot(explicit) {
7553
6443
  const value = explicit ?? process.env.PATTER_LOG_DIR;
7554
6444
  if (!value) return null;
7555
6445
  if (value.trim().toLowerCase() === "auto") return platformDefaultRoot();
7556
- if (value.startsWith("~")) return path2.join(os.homedir(), value.slice(1));
6446
+ if (value.startsWith("~")) return path3.join(os.homedir(), value.slice(1));
7557
6447
  return value;
7558
6448
  }
7559
6449
  function retentionDays() {
@@ -7564,9 +6454,9 @@ function retentionDays() {
7564
6454
  return Math.max(0, parsed);
7565
6455
  }
7566
6456
  function redactMode() {
7567
- const raw = (process.env.PATTER_LOG_REDACT_PHONE || "mask").trim().toLowerCase();
6457
+ const raw = (process.env.PATTER_LOG_REDACT_PHONE || "full").trim().toLowerCase();
7568
6458
  if (raw === "full" || raw === "mask" || raw === "hash_only") return raw;
7569
- return "mask";
6459
+ return "full";
7570
6460
  }
7571
6461
  function redactPhone(raw) {
7572
6462
  if (!raw) return "";
@@ -7582,9 +6472,9 @@ function utcIso(tsSeconds) {
7582
6472
  return new Date(ms).toISOString();
7583
6473
  }
7584
6474
  async function atomicWriteJson(filePath, payload) {
7585
- const dir = path2.dirname(filePath);
6475
+ const dir = path3.dirname(filePath);
7586
6476
  await fsp.mkdir(dir, { recursive: true });
7587
- const tmp = path2.join(dir, `.tmp.${process.pid}.${crypto3.randomBytes(4).toString("hex")}.json`);
6477
+ const tmp = path3.join(dir, `.tmp.${process.pid}.${crypto3.randomBytes(4).toString("hex")}.json`);
7588
6478
  try {
7589
6479
  const handle = await fsp.open(tmp, "w");
7590
6480
  try {
@@ -7603,7 +6493,7 @@ async function atomicWriteJson(filePath, payload) {
7603
6493
  }
7604
6494
  }
7605
6495
  async function appendJsonl(filePath, record) {
7606
- await fsp.mkdir(path2.dirname(filePath), { recursive: true });
6496
+ await fsp.mkdir(path3.dirname(filePath), { recursive: true });
7607
6497
  await fsp.appendFile(filePath, JSON.stringify(record) + "\n", { encoding: "utf8" });
7608
6498
  }
7609
6499
  var CallLogger = class {
@@ -7613,9 +6503,9 @@ var CallLogger = class {
7613
6503
  this.root = null;
7614
6504
  return;
7615
6505
  }
7616
- const resolved = root.startsWith("~") ? path2.join(os.homedir(), root.slice(1)) : root;
6506
+ const resolved = root.startsWith("~") ? path3.join(os.homedir(), root.slice(1)) : root;
7617
6507
  try {
7618
- fs2.mkdirSync(resolved, { recursive: true });
6508
+ fs3.mkdirSync(resolved, { recursive: true });
7619
6509
  this.root = resolved;
7620
6510
  getLogger().info(`Call logs: ${resolved}`);
7621
6511
  } catch (err) {
@@ -7637,7 +6527,7 @@ var CallLogger = class {
7637
6527
  const month = String(dt.getUTCMonth() + 1).padStart(2, "0");
7638
6528
  const day = String(dt.getUTCDate()).padStart(2, "0");
7639
6529
  const safeId = sanitizeLogValue(callId, 64).replace(/\//g, "_") || "unknown";
7640
- return path2.join(this.root, "calls", year, month, day, safeId);
6530
+ return path3.join(this.root, "calls", year, month, day, safeId);
7641
6531
  }
7642
6532
  /** Write the initial `metadata.json` for a new call. */
7643
6533
  async logCallStart(callId, input = {}) {
@@ -7655,6 +6545,7 @@ var CallLogger = class {
7655
6545
  status: "in_progress",
7656
6546
  caller: redactPhone(input.caller ?? ""),
7657
6547
  callee: redactPhone(input.callee ?? ""),
6548
+ direction: input.direction || "inbound",
7658
6549
  telephony_provider: input.telephonyProvider ?? "",
7659
6550
  provider_mode: input.providerMode ?? "",
7660
6551
  agent: input.agent ?? {},
@@ -7664,7 +6555,7 @@ var CallLogger = class {
7664
6555
  error: null
7665
6556
  };
7666
6557
  try {
7667
- await atomicWriteJson(path2.join(dir, "metadata.json"), metadata);
6558
+ await atomicWriteJson(path3.join(dir, "metadata.json"), metadata);
7668
6559
  } catch (err) {
7669
6560
  getLogger().warn(`call_log write failed (${sanitizeLogValue(callId)}): ${sanitizeLogValue(String(err))}`);
7670
6561
  }
@@ -7683,7 +6574,7 @@ var CallLogger = class {
7683
6574
  ...turn
7684
6575
  };
7685
6576
  try {
7686
- await appendJsonl(path2.join(dir, "transcript.jsonl"), record);
6577
+ await appendJsonl(path3.join(dir, "transcript.jsonl"), record);
7687
6578
  } catch (err) {
7688
6579
  getLogger().warn(
7689
6580
  `call_log turn write failed (${sanitizeLogValue(callId)}): ${sanitizeLogValue(String(err))}`
@@ -7702,7 +6593,7 @@ var CallLogger = class {
7702
6593
  data: payload
7703
6594
  };
7704
6595
  try {
7705
- await appendJsonl(path2.join(dir, "events.jsonl"), record);
6596
+ await appendJsonl(path3.join(dir, "events.jsonl"), record);
7706
6597
  } catch (err) {
7707
6598
  getLogger().warn(
7708
6599
  `call_log event write failed (${sanitizeLogValue(callId)}): ${sanitizeLogValue(String(err))}`
@@ -7714,7 +6605,7 @@ var CallLogger = class {
7714
6605
  if (!this.enabled) return;
7715
6606
  const dir = this.callDir(callId);
7716
6607
  if (dir === null) return;
7717
- const metadataPath = path2.join(dir, "metadata.json");
6608
+ const metadataPath = path3.join(dir, "metadata.json");
7718
6609
  let existing = {};
7719
6610
  try {
7720
6611
  existing = JSON.parse(await fsp.readFile(metadataPath, "utf8"));
@@ -7749,20 +6640,20 @@ var CallLogger = class {
7749
6640
  const days = retentionDays();
7750
6641
  if (days === 0) return;
7751
6642
  const cutoff = Date.now() / 1e3 - days * 86400;
7752
- const callsRoot = path2.join(this.root, "calls");
7753
- if (!fs2.existsSync(callsRoot)) return;
6643
+ const callsRoot = path3.join(this.root, "calls");
6644
+ if (!fs3.existsSync(callsRoot)) return;
7754
6645
  try {
7755
- for (const yearName of fs2.readdirSync(callsRoot)) {
6646
+ for (const yearName of fs3.readdirSync(callsRoot)) {
7756
6647
  if (!/^\d+$/.test(yearName)) continue;
7757
- const yearDir = path2.join(callsRoot, yearName);
7758
- if (!fs2.statSync(yearDir).isDirectory()) continue;
7759
- for (const monthName of fs2.readdirSync(yearDir)) {
6648
+ const yearDir = path3.join(callsRoot, yearName);
6649
+ if (!fs3.statSync(yearDir).isDirectory()) continue;
6650
+ for (const monthName of fs3.readdirSync(yearDir)) {
7760
6651
  if (!/^\d+$/.test(monthName)) continue;
7761
- const monthDir = path2.join(yearDir, monthName);
7762
- if (!fs2.statSync(monthDir).isDirectory()) continue;
7763
- for (const dayName of fs2.readdirSync(monthDir)) {
6652
+ const monthDir = path3.join(yearDir, monthName);
6653
+ if (!fs3.statSync(monthDir).isDirectory()) continue;
6654
+ for (const dayName of fs3.readdirSync(monthDir)) {
7764
6655
  if (!/^\d+$/.test(dayName)) continue;
7765
- const dayDir = path2.join(monthDir, dayName);
6656
+ const dayDir = path3.join(monthDir, dayName);
7766
6657
  const y = Number.parseInt(yearName, 10);
7767
6658
  const m = Number.parseInt(monthName, 10);
7768
6659
  const d = Number.parseInt(dayName, 10);
@@ -7772,12 +6663,12 @@ var CallLogger = class {
7772
6663
  }
7773
6664
  }
7774
6665
  try {
7775
- if (fs2.readdirSync(monthDir).length === 0) fs2.rmdirSync(monthDir);
6666
+ if (fs3.readdirSync(monthDir).length === 0) fs3.rmdirSync(monthDir);
7776
6667
  } catch {
7777
6668
  }
7778
6669
  }
7779
6670
  try {
7780
- if (fs2.readdirSync(yearDir).length === 0) fs2.rmdirSync(yearDir);
6671
+ if (fs3.readdirSync(yearDir).length === 0) fs3.rmdirSync(yearDir);
7781
6672
  } catch {
7782
6673
  }
7783
6674
  }
@@ -7788,19 +6679,19 @@ var CallLogger = class {
7788
6679
  };
7789
6680
  function rmTree(target) {
7790
6681
  try {
7791
- for (const child of fs2.readdirSync(target)) {
7792
- const childPath = path2.join(target, child);
7793
- const stat = fs2.lstatSync(childPath);
6682
+ for (const child of fs3.readdirSync(target)) {
6683
+ const childPath = path3.join(target, child);
6684
+ const stat = fs3.lstatSync(childPath);
7794
6685
  if (stat.isDirectory()) {
7795
6686
  rmTree(childPath);
7796
6687
  } else {
7797
6688
  try {
7798
- fs2.unlinkSync(childPath);
6689
+ fs3.unlinkSync(childPath);
7799
6690
  } catch {
7800
6691
  }
7801
6692
  }
7802
6693
  }
7803
- fs2.rmdirSync(target);
6694
+ fs3.rmdirSync(target);
7804
6695
  } catch {
7805
6696
  }
7806
6697
  }
@@ -8097,7 +6988,7 @@ var TELNYX_DTMF_ALLOWED = new Set("0123456789*#ABCDabcdwW");
8097
6988
  var TELNYX_DTMF_DURATION_MS = 250;
8098
6989
  async function sleep(ms) {
8099
6990
  if (ms <= 0) return;
8100
- await new Promise((resolve) => setTimeout(resolve, ms));
6991
+ await new Promise((resolve2) => setTimeout(resolve2, ms));
8101
6992
  }
8102
6993
  var TelnyxBridge = class {
8103
6994
  constructor(config) {
@@ -8700,7 +7591,7 @@ var EmbeddedServer = class {
8700
7591
  this.handleTwilioStream(ws, url);
8701
7592
  }
8702
7593
  });
8703
- await new Promise((resolve) => {
7594
+ await new Promise((resolve2) => {
8704
7595
  const bindHost = process.env.PATTER_BIND_HOST ?? "127.0.0.1";
8705
7596
  this.server.listen(port, bindHost, () => {
8706
7597
  getLogger().info(`Server on port ${port}`);
@@ -8722,7 +7613,7 @@ var EmbeddedServer = class {
8722
7613
  }
8723
7614
  console.log("\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n");
8724
7615
  }
8725
- resolve();
7616
+ resolve2();
8726
7617
  });
8727
7618
  });
8728
7619
  }
@@ -8765,7 +7656,7 @@ var EmbeddedServer = class {
8765
7656
  `Telnyx voicemail speak failed: ${speakResp.status} ${(await speakResp.text()).slice(0, 200)}`
8766
7657
  );
8767
7658
  }
8768
- await new Promise((resolve) => setTimeout(resolve, estimatedMs));
7659
+ await new Promise((resolve2) => setTimeout(resolve2, estimatedMs));
8769
7660
  await fetch(`https://api.telnyx.com/v2/calls/${encoded}/actions/hangup`, {
8770
7661
  method: "POST",
8771
7662
  headers,
@@ -8836,9 +7727,11 @@ var EmbeddedServer = class {
8836
7727
  const active = callId ? store.getActive(callId) : void 0;
8837
7728
  const resolvedCaller = dataCaller || active?.caller || "";
8838
7729
  const resolvedCallee = dataCallee || active?.callee || "";
7730
+ const resolvedDirection = (typeof data.direction === "string" ? data.direction : "") || active?.direction || "inbound";
8839
7731
  void logger.logCallStart(callId, {
8840
7732
  caller: resolvedCaller,
8841
7733
  callee: resolvedCallee,
7734
+ direction: resolvedDirection,
8842
7735
  telephonyProvider: bridge.telephonyProvider,
8843
7736
  providerMode: agent.provider ?? "",
8844
7737
  agent: agentSnapshot()
@@ -8996,8 +7889,8 @@ var EmbeddedServer = class {
8996
7889
  */
8997
7890
  async stop() {
8998
7891
  if (!this.server) return;
8999
- const httpClosePromise = new Promise((resolve) => {
9000
- this.server.close(() => resolve());
7892
+ const httpClosePromise = new Promise((resolve2) => {
7893
+ this.server.close(() => resolve2());
9001
7894
  });
9002
7895
  const isTelnyx = this.config.telephonyProvider === "telnyx";
9003
7896
  for (const [ws, callId] of this.activeCallIds) {
@@ -9017,15 +7910,15 @@ var EmbeddedServer = class {
9017
7910
  if (this.activeConnections.size > 0) {
9018
7911
  getLogger().info(`Waiting for ${this.activeConnections.size} active connection(s) to close...`);
9019
7912
  await Promise.race([
9020
- new Promise((resolve) => {
7913
+ new Promise((resolve2) => {
9021
7914
  const checkInterval = setInterval(() => {
9022
7915
  if (this.activeConnections.size === 0) {
9023
7916
  clearInterval(checkInterval);
9024
- resolve();
7917
+ resolve2();
9025
7918
  }
9026
7919
  }, 100);
9027
7920
  }),
9028
- new Promise((resolve) => setTimeout(resolve, GRACEFUL_SHUTDOWN_TIMEOUT_MS))
7921
+ new Promise((resolve2) => setTimeout(resolve2, GRACEFUL_SHUTDOWN_TIMEOUT_MS))
9029
7922
  ]);
9030
7923
  }
9031
7924
  if (this.activeConnections.size > 0) {
@@ -9782,7 +8675,7 @@ var TestSession = class {
9782
8675
  input: process.stdin,
9783
8676
  output: process.stdout
9784
8677
  });
9785
- const askQuestion = (prompt) => new Promise((resolve) => rl.question(prompt, resolve));
8678
+ const askQuestion = (prompt) => new Promise((resolve2) => rl.question(prompt, resolve2));
9786
8679
  try {
9787
8680
  while (!ended) {
9788
8681
  let userInput;
@@ -9881,26 +8774,17 @@ export {
9881
8774
  AuthenticationError,
9882
8775
  ProvisionError,
9883
8776
  RateLimitError,
9884
- OpenAIRealtimeAdapter,
9885
- mulawToPcm16,
9886
- pcm16ToMulaw,
9887
- PcmCarry,
9888
- StatefulResampler,
9889
- createResampler16kTo8k,
9890
- createResampler8kTo16k,
9891
- createResampler24kTo16k,
9892
- createResampler24kTo8k,
9893
- resample8kTo16k,
9894
- resample16kTo8k,
9895
- resample24kTo16k,
9896
- OpenAIRealtime2Adapter,
9897
8777
  ElevenLabsConvAIAdapter,
8778
+ PRICING_VERSION,
8779
+ PRICING_LAST_UPDATED,
8780
+ PricingUnit,
9898
8781
  DEFAULT_PRICING,
9899
8782
  mergePricing,
9900
8783
  calculateSttCost,
9901
8784
  calculateTtsCost,
9902
8785
  calculateRealtimeCost,
9903
8786
  calculateTelephonyCost,
8787
+ VERSION,
9904
8788
  MetricsStore,
9905
8789
  makeAuthMiddleware,
9906
8790
  callsToCsv,
@@ -9910,6 +8794,7 @@ export {
9910
8794
  RemoteMessageHandler,
9911
8795
  isRemoteUrl,
9912
8796
  isWebSocketUrl,
8797
+ DeepgramModel,
9913
8798
  DeepgramSTT,
9914
8799
  CallMetricsAccumulator,
9915
8800
  SPAN_CALL,