getpatter 0.6.1 → 0.6.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,3 +1,11 @@
1
+ import {
2
+ OpenAIRealtime2Adapter,
3
+ OpenAIRealtimeAdapter,
4
+ createResampler16kTo8k,
5
+ createResampler8kTo16k,
6
+ mulawToPcm16,
7
+ pcm16ToMulaw
8
+ } from "./chunk-CL2U3YET.mjs";
1
9
  import {
2
10
  getLogger
3
11
  } from "./chunk-MVOQFAEO.mjs";
@@ -16,1264 +24,14 @@ init_esm_shims();
16
24
 
17
25
  // src/server.ts
18
26
  init_esm_shims();
19
- import crypto4 from "crypto";
27
+ import crypto5 from "crypto";
20
28
  import express from "express";
21
29
  import { createServer } from "http";
22
30
  import { WebSocketServer } from "ws";
23
31
 
24
- // src/providers/openai-realtime.ts
25
- init_esm_shims();
26
- import WebSocket from "ws";
27
- var OpenAIRealtimeAudioFormat = {
28
- G711_ULAW: "g711_ulaw",
29
- G711_ALAW: "g711_alaw",
30
- PCM16: "pcm16"
31
- };
32
- var OpenAIRealtimeModel = {
33
- GPT_REALTIME: "gpt-realtime",
34
- GPT_REALTIME_2: "gpt-realtime-2",
35
- GPT_REALTIME_MINI: "gpt-realtime-mini",
36
- GPT_4O_REALTIME_PREVIEW: "gpt-4o-realtime-preview",
37
- GPT_4O_MINI_REALTIME_PREVIEW: "gpt-4o-mini-realtime-preview"
38
- };
39
- var OpenAIVoice = {
40
- ALLOY: "alloy",
41
- ASH: "ash",
42
- BALLAD: "ballad",
43
- CORAL: "coral",
44
- ECHO: "echo",
45
- FABLE: "fable",
46
- NOVA: "nova",
47
- ONYX: "onyx",
48
- SAGE: "sage",
49
- SHIMMER: "shimmer",
50
- VERSE: "verse"
51
- };
52
- var OpenAITranscriptionModel = {
53
- WHISPER_1: "whisper-1",
54
- GPT_4O_TRANSCRIBE: "gpt-4o-transcribe",
55
- GPT_4O_MINI_TRANSCRIBE: "gpt-4o-mini-transcribe",
56
- GPT_REALTIME_WHISPER: "gpt-realtime-whisper"
57
- };
58
- var OpenAIRealtimeVADType = {
59
- SERVER_VAD: "server_vad",
60
- SEMANTIC_VAD: "semantic_vad"
61
- };
62
- var OpenAIRealtimeAdapter = class {
63
- constructor(apiKey, model = OpenAIRealtimeModel.GPT_REALTIME_MINI, voice = OpenAIVoice.ALLOY, instructions = "", tools, audioFormat = OpenAIRealtimeAudioFormat.G711_ULAW, options = {}) {
64
- this.apiKey = apiKey;
65
- this.model = model;
66
- this.voice = voice;
67
- this.instructions = instructions;
68
- this.tools = tools;
69
- this.audioFormat = audioFormat;
70
- this.options = options;
71
- }
72
- apiKey;
73
- model;
74
- voice;
75
- instructions;
76
- tools;
77
- audioFormat;
78
- // Fields exposed `protected` (not `private`) so a subclass can implement
79
- // alternate transports — e.g. `OpenAIRealtime2Adapter` overrides
80
- // `connect()` to speak the GA Realtime API while reusing the rest of
81
- // the runtime (audio dispatch, barge-in, heartbeat).
82
- ws = null;
83
- eventCallbacks = /* @__PURE__ */ new Set();
84
- messageListenerAttached = false;
85
- heartbeat = null;
86
- // Track the in-flight assistant item id so we can truncate cleanly on
87
- // barge-in (see ``cancelResponse``) — matches the Python adapter.
88
- currentResponseItemId = null;
89
- currentResponseAudioMs = 0;
90
- // Wall-clock timestamp (Date.now()) of the first ``response.audio.delta``
91
- // received since the current response item started. ``cancelResponse``
92
- // uses this to bound ``audio_end_ms`` to what the caller could plausibly
93
- // have heard — generated audio frequently arrives 5-10x real-time, so
94
- // ``audio_end_ms`` driven purely by the per-chunk byte counter overshoots
95
- // reality and leaves phantom assistant text on the conversation. The
96
- // wall-clock cap corresponds to the maximum playback that real-time TTS
97
- // could have produced, which is what the user actually heard.
98
- currentResponseFirstAudioAt = null;
99
- options;
100
- /**
101
- * Build the production session.update body. Mirrors the body sent
102
- * inside `connect()` so warmup can apply identical configuration to
103
- * the upstream session and prime it without billing.
104
- */
105
- buildSessionConfig() {
106
- const config = {
107
- input_audio_format: this.audioFormat,
108
- output_audio_format: this.audioFormat,
109
- voice: this.voice,
110
- instructions: this.instructions || "You are a helpful voice assistant. Be concise.",
111
- turn_detection: {
112
- type: this.options.vadType ?? OpenAIRealtimeVADType.SERVER_VAD,
113
- threshold: 0.5,
114
- prefix_padding_ms: 300,
115
- silence_duration_ms: this.options.silenceDurationMs ?? 300
116
- },
117
- input_audio_transcription: {
118
- model: this.options.inputAudioTranscriptionModel ?? OpenAITranscriptionModel.WHISPER_1
119
- }
120
- };
121
- if (this.options.temperature !== void 0) config.temperature = this.options.temperature;
122
- if (this.options.maxResponseOutputTokens !== void 0) {
123
- config.max_response_output_tokens = this.options.maxResponseOutputTokens;
124
- }
125
- if (this.options.modalities !== void 0) config.modalities = this.options.modalities;
126
- if (this.options.toolChoice !== void 0) config.tool_choice = this.options.toolChoice;
127
- if (this.options.reasoningEffort !== void 0) {
128
- config.reasoning = { effort: this.options.reasoningEffort };
129
- }
130
- if (this.tools?.length) {
131
- config.tools = this.tools.map((t) => {
132
- const def = {
133
- type: "function",
134
- name: t.name,
135
- description: t.description,
136
- parameters: t.parameters
137
- };
138
- if (t.strict === true) {
139
- def.strict = true;
140
- }
141
- return def;
142
- });
143
- }
144
- return config;
145
- }
146
- /**
147
- * Pre-call WebSocket warmup for the OpenAI Realtime endpoint.
148
- *
149
- * The canonical session-only warm step on the Realtime API: open the
150
- * WS, wait for `session.created`, send a single `session.update`
151
- * containing the same fields that the production `connect()` path
152
- * applies (`input_audio_format`, `output_audio_format`, `voice`,
153
- * `instructions`, `turn_detection`, `input_audio_transcription`,
154
- * plus any opt-in fields populated on the adapter), wait for the
155
- * matching `session.updated` ack, then close cleanly. This primes
156
- * the per-session state on the OpenAI side — DNS + TLS + auth
157
- * handshake + initial config exchange — without ever invoking the
158
- * model.
159
- *
160
- * Earlier revisions sent `response.create` with
161
- * `{"response": {"generate": false}}` to prime the inference path.
162
- * That field is NOT in the OpenAI Realtime API schema; the server
163
- * either ignores it (and bills tokens for a real model response) or
164
- * rejects the request with `invalid_request_error`. Both behaviours
165
- * are billing-unsafe or a no-op beyond TLS warm. The
166
- * `session.update` flow is documented and side-effect-free.
167
- *
168
- * Billing safety: `session.update` only mutates session
169
- * configuration. It does NOT invoke the model, does NOT consume any
170
- * audio buffer, and does NOT trigger token generation, so no
171
- * per-token cost is accrued. Best-effort: failures are logged at
172
- * debug level and never raised.
173
- */
174
- async warmup() {
175
- const url = `wss://api.openai.com/v1/realtime?model=${encodeURIComponent(this.model)}`;
176
- let ws = null;
177
- try {
178
- ws = await new Promise((resolve, reject) => {
179
- const sock = new WebSocket(url, {
180
- headers: {
181
- Authorization: `Bearer ${this.apiKey}`,
182
- "OpenAI-Beta": "realtime=v1"
183
- }
184
- });
185
- const timer = setTimeout(() => {
186
- try {
187
- sock.close();
188
- } catch {
189
- }
190
- reject(new Error("OpenAI Realtime warmup connect timeout"));
191
- }, 5e3);
192
- sock.once("open", () => {
193
- clearTimeout(timer);
194
- resolve(sock);
195
- });
196
- sock.once("error", (err) => {
197
- clearTimeout(timer);
198
- reject(err);
199
- });
200
- });
201
- const sessionCreated = await new Promise((resolve) => {
202
- const timer = setTimeout(() => resolve(false), 2e3);
203
- const onMsg = (raw) => {
204
- try {
205
- const data = JSON.parse(raw.toString());
206
- if (data.type === "session.created") {
207
- clearTimeout(timer);
208
- ws.off("message", onMsg);
209
- resolve(true);
210
- }
211
- } catch {
212
- }
213
- };
214
- ws.on("message", onMsg);
215
- });
216
- if (!sessionCreated) return;
217
- try {
218
- ws.send(JSON.stringify({ type: "session.update", session: this.buildSessionConfig() }));
219
- } catch {
220
- return;
221
- }
222
- await new Promise((resolve) => {
223
- const timer = setTimeout(() => resolve(), 1500);
224
- const onMsg = (raw) => {
225
- try {
226
- const data = JSON.parse(raw.toString());
227
- if (data.type === "session.updated") {
228
- clearTimeout(timer);
229
- ws.off("message", onMsg);
230
- resolve();
231
- }
232
- } catch {
233
- }
234
- };
235
- ws.on("message", onMsg);
236
- });
237
- } catch (err) {
238
- getLogger().debug(`OpenAI Realtime warmup failed (best-effort): ${String(err)}`);
239
- } finally {
240
- if (ws) {
241
- try {
242
- ws.close();
243
- } catch {
244
- }
245
- }
246
- }
247
- }
248
- /** Open the Realtime WebSocket and apply the session configuration. */
249
- async connect() {
250
- const url = `wss://api.openai.com/v1/realtime?model=${encodeURIComponent(this.model)}`;
251
- this.ws = new WebSocket(url, {
252
- headers: {
253
- Authorization: `Bearer ${this.apiKey}`,
254
- "OpenAI-Beta": "realtime=v1"
255
- }
256
- });
257
- await new Promise((resolve, reject) => {
258
- let sessionCreated = false;
259
- let settled = false;
260
- const ws = this.ws;
261
- const onSetupMessage = (raw) => {
262
- let msg;
263
- try {
264
- msg = JSON.parse(raw.toString());
265
- } catch (e) {
266
- getLogger().warn(`OpenAI Realtime: failed to parse message: ${String(e)}`);
267
- return;
268
- }
269
- if (msg.type === "session.created" && !sessionCreated) {
270
- sessionCreated = true;
271
- ws.send(JSON.stringify({ type: "session.update", session: this.buildSessionConfig() }));
272
- } else if (msg.type === "session.updated") {
273
- cleanup();
274
- resolve();
275
- }
276
- };
277
- const onSetupError = (err) => {
278
- cleanup();
279
- try {
280
- ws.close();
281
- } catch {
282
- }
283
- reject(err);
284
- };
285
- const cleanup = () => {
286
- if (settled) return;
287
- settled = true;
288
- clearTimeout(timer);
289
- ws.off("message", onSetupMessage);
290
- ws.off("error", onSetupError);
291
- };
292
- const timer = setTimeout(() => {
293
- cleanup();
294
- try {
295
- ws.close();
296
- } catch {
297
- }
298
- reject(new Error("OpenAI Realtime connect timeout"));
299
- }, 15e3);
300
- ws.on("message", onSetupMessage);
301
- ws.on("error", onSetupError);
302
- });
303
- this.armHeartbeatAndListener();
304
- }
305
- /**
306
- * Adopt a pre-opened, already-`session.updated` Realtime WebSocket
307
- * produced by the prewarm pipeline (see `Patter.parkProviderConnections`).
308
- * Skips the fresh `new WebSocket()` + `session.created` /
309
- * `session.update` round-trip — saves ~250-450 ms on first turn.
310
- *
311
- * Caller MUST verify `ws.readyState === OPEN` before calling and MUST
312
- * have already received `session.updated` on the parked socket. If
313
- * the parked WS died between park and adopt, fall back to `connect()`.
314
- */
315
- adoptWebSocket(ws) {
316
- this.ws = ws;
317
- this.armHeartbeatAndListener();
318
- }
319
- armHeartbeatAndListener() {
320
- this.heartbeat = setInterval(() => {
321
- try {
322
- this.ws?.ping();
323
- } catch {
324
- }
325
- }, 2e4);
326
- this.ensureMessageListener();
327
- }
328
- /**
329
- * Open a fresh Realtime WS, exchange `session.created` /
330
- * `session.update` / `session.updated` (so the upstream session is
331
- * fully primed), and return the OPEN socket WITHOUT arming the
332
- * heartbeat / message listener. Used by the prewarm pipeline to park
333
- * a Realtime connection during ringing; the live consumer adopts it
334
- * via {@link adoptWebSocket}.
335
- *
336
- * Bounded by 8 s. Throws on timeout / handshake failure — callers
337
- * (the prewarm pipeline) treat any error as a cache miss and the
338
- * call falls through to the cold `connect()` path.
339
- *
340
- * Billing safety: `session.update` does not invoke the model. No
341
- * tokens are billed.
342
- */
343
- async openParkedConnection() {
344
- const url = `wss://api.openai.com/v1/realtime?model=${encodeURIComponent(this.model)}`;
345
- const ws = new WebSocket(url, {
346
- headers: {
347
- Authorization: `Bearer ${this.apiKey}`,
348
- "OpenAI-Beta": "realtime=v1"
349
- }
350
- });
351
- await new Promise((resolve, reject) => {
352
- let sessionCreated = false;
353
- let settled = false;
354
- const onMessage = (raw) => {
355
- let msg;
356
- try {
357
- msg = JSON.parse(raw.toString());
358
- } catch {
359
- return;
360
- }
361
- if (msg.type === "session.created" && !sessionCreated) {
362
- sessionCreated = true;
363
- try {
364
- ws.send(JSON.stringify({ type: "session.update", session: this.buildSessionConfig() }));
365
- } catch (err) {
366
- cleanup();
367
- reject(err instanceof Error ? err : new Error(String(err)));
368
- }
369
- } else if (msg.type === "session.updated") {
370
- cleanup();
371
- resolve();
372
- }
373
- };
374
- const onError = (err) => {
375
- cleanup();
376
- reject(err);
377
- };
378
- const cleanup = () => {
379
- if (settled) return;
380
- settled = true;
381
- clearTimeout(timer);
382
- ws.off("message", onMessage);
383
- ws.off("error", onError);
384
- };
385
- const timer = setTimeout(() => {
386
- cleanup();
387
- reject(new Error("OpenAI Realtime park connect timeout"));
388
- }, 8e3);
389
- ws.on("message", onMessage);
390
- ws.on("error", onError);
391
- });
392
- return ws;
393
- }
394
- /** Append a base64-encoded audio chunk to the realtime input buffer. */
395
- sendAudio(mulawAudio) {
396
- if (!this.ws || this.ws.readyState !== WebSocket.OPEN) return;
397
- this.ws.send(JSON.stringify({ type: "input_audio_buffer.append", audio: mulawAudio.toString("base64") }));
398
- }
399
- /**
400
- * Register a listener for parsed realtime events.
401
- *
402
- * Previously every call attached a new ``ws.on('message')`` handler,
403
- * which leaked listeners across retries and multi-consumer hooks. We now
404
- * route all traffic through a single persistent handler that fans out to
405
- * a Set of callbacks. Use {@link offEvent} to remove one.
406
- */
407
- onEvent(callback) {
408
- this.eventCallbacks.add(callback);
409
- this.ensureMessageListener();
410
- }
411
- /** Remove a previously registered {@link onEvent} callback. */
412
- offEvent(callback) {
413
- this.eventCallbacks.delete(callback);
414
- }
415
- ensureMessageListener() {
416
- if (this.messageListenerAttached || !this.ws) return;
417
- this.messageListenerAttached = true;
418
- const ws = this.ws;
419
- const dispatch = (type, payload) => {
420
- for (const cb of this.eventCallbacks) {
421
- void Promise.resolve(cb(type, payload)).catch(
422
- (err) => getLogger().error("onEvent callback error:", err)
423
- );
424
- }
425
- };
426
- ws.on("message", (raw) => {
427
- let data;
428
- try {
429
- data = JSON.parse(raw.toString());
430
- } catch (e) {
431
- getLogger().warn(`OpenAI Realtime: failed to parse event message: ${String(e)}`);
432
- return;
433
- }
434
- const t = data.type;
435
- if (t === "response.audio.delta") {
436
- const buf = Buffer.from(data.delta ?? "", "base64");
437
- this.currentResponseAudioMs += estimateAudioMs(buf, this.audioFormat);
438
- if (this.currentResponseFirstAudioAt === null) {
439
- this.currentResponseFirstAudioAt = Date.now();
440
- }
441
- dispatch("audio", buf);
442
- } else if (t === "response.audio_transcript.delta") {
443
- dispatch("transcript_output", data.delta);
444
- } else if (t === "response.content_part.added" || t === "response.output_item.added") {
445
- const itemId = data.item?.id ?? data.item_id ?? null;
446
- if (itemId) {
447
- this.currentResponseItemId = itemId;
448
- this.currentResponseAudioMs = 0;
449
- this.currentResponseFirstAudioAt = null;
450
- }
451
- } else if (t === "input_audio_buffer.speech_started") {
452
- dispatch("speech_started", null);
453
- } else if (t === "input_audio_buffer.speech_stopped") {
454
- dispatch("speech_stopped", null);
455
- } else if (t === "conversation.item.input_audio_transcription.completed") {
456
- dispatch("transcript_input", data.transcript);
457
- } else if (t === "response.function_call_arguments.done") {
458
- dispatch("function_call", { call_id: data.call_id, name: data.name, arguments: data.arguments });
459
- } else if (t === "response.done") {
460
- this.currentResponseItemId = null;
461
- this.currentResponseAudioMs = 0;
462
- this.currentResponseFirstAudioAt = null;
463
- dispatch("response_done", data.response ?? null);
464
- } else if (t === "error") {
465
- dispatch("error", data.error);
466
- }
467
- });
468
- ws.on("close", (code, reason) => {
469
- if (code !== 1e3) {
470
- dispatch("error", {
471
- type: "connection_closed",
472
- code,
473
- reason: reason?.toString() ?? ""
474
- });
475
- }
476
- });
477
- ws.on("error", (err) => {
478
- dispatch("error", { type: "socket_error", message: err?.message ?? String(err) });
479
- });
480
- }
481
- /** Truncate the in-flight assistant turn and cancel the active response.
482
- *
483
- * ``audio_end_ms`` MUST reflect what the caller actually heard, not what
484
- * the server generated. OpenAI streams audio at 5-10x real-time, so the
485
- * byte-derived counter overstates playback whenever the consumer cleared
486
- * its playout buffer (e.g. ``send_clear``) before the audio reached the
487
- * speaker. We bound the truncate point by wall-clock time since the first
488
- * chunk of this response — that's the physical maximum a 1x real-time
489
- * playback could have produced. Without this cap, OpenAI keeps the full
490
- * generated assistant text on the transcript, and the model replays /
491
- * resumes from it on the next turn — manifesting as re-greetings and
492
- * mid-sentence fragments after a barge-in storm.
493
- */
494
- cancelResponse() {
495
- if (!this.ws) return;
496
- if (this.currentResponseItemId) {
497
- let audioEndMs = this.currentResponseAudioMs;
498
- if (this.currentResponseFirstAudioAt !== null) {
499
- const elapsedMs = Date.now() - this.currentResponseFirstAudioAt;
500
- audioEndMs = Math.min(audioEndMs, Math.max(elapsedMs, 0));
501
- }
502
- try {
503
- this.ws.send(JSON.stringify({
504
- type: "conversation.item.truncate",
505
- item_id: this.currentResponseItemId,
506
- content_index: 0,
507
- audio_end_ms: audioEndMs
508
- }));
509
- } catch (err) {
510
- getLogger().debug?.(`conversation.item.truncate failed: ${String(err)}`);
511
- }
512
- }
513
- this.ws.send(JSON.stringify({ type: "response.cancel" }));
514
- this.currentResponseItemId = null;
515
- this.currentResponseAudioMs = 0;
516
- this.currentResponseFirstAudioAt = null;
517
- }
518
- /** Inject a user text turn and request a new response. */
519
- async sendText(text) {
520
- this.ws?.send(JSON.stringify({
521
- type: "conversation.item.create",
522
- item: { type: "message", role: "user", content: [{ type: "input_text", text }] }
523
- }));
524
- this.ws?.send(JSON.stringify({ type: "response.create" }));
525
- }
526
- /**
527
- * Make the AI speak ``text`` as its opening line.
528
- *
529
- * Triggers ``response.create`` with explicit ``instructions`` that force
530
- * the model to render ``text`` verbatim as its first audio utterance.
531
- * This is the correct semantics for ``Agent.firstMessage`` per its
532
- * docstring ("What the AI says when the callee answers").
533
- *
534
- * Without this, ``sendText(firstMessage)`` would inject ``text`` as
535
- * ``role: user`` and the AI would *reply* to its own greeting, producing
536
- * role-confused openings (e.g. a receptionist agent responding "I'd like
537
- * to schedule a haircut" because it took its own first_message as a
538
- * customer cue).
539
- */
540
- async sendFirstMessage(text) {
541
- this.ws?.send(JSON.stringify({
542
- type: "response.create",
543
- response: {
544
- modalities: ["audio", "text"],
545
- instructions: `Say exactly the following sentence as your first turn and nothing else: "${text}"`
546
- }
547
- }));
548
- }
549
- /** Submit a tool/function-call result and request the next response. */
550
- async sendFunctionResult(callId, result) {
551
- this.ws?.send(JSON.stringify({
552
- type: "conversation.item.create",
553
- item: { type: "function_call_output", call_id: callId, output: result }
554
- }));
555
- this.ws?.send(JSON.stringify({ type: "response.create" }));
556
- }
557
- /** Stop the heartbeat, drop listeners, and close the Realtime WebSocket. */
558
- close() {
559
- if (this.heartbeat) {
560
- clearInterval(this.heartbeat);
561
- this.heartbeat = null;
562
- }
563
- this.eventCallbacks.clear();
564
- this.messageListenerAttached = false;
565
- this.ws?.close();
566
- this.ws = null;
567
- }
568
- };
569
- function estimateAudioMs(chunk, format) {
570
- if (chunk.length === 0) return 0;
571
- if (format === OpenAIRealtimeAudioFormat.G711_ULAW || format === OpenAIRealtimeAudioFormat.G711_ALAW)
572
- return Math.floor(chunk.length / 8);
573
- if (format === OpenAIRealtimeAudioFormat.PCM16) {
574
- return Math.floor(chunk.length / 48);
575
- }
576
- return 0;
577
- }
578
-
579
- // src/providers/openai-realtime-2.ts
580
- init_esm_shims();
581
- import WebSocket2 from "ws";
582
-
583
- // src/audio/transcoding.ts
584
- init_esm_shims();
585
- var MULAW_TO_PCM16_TABLE = (() => {
586
- const table = new Int16Array(256);
587
- for (let i = 0; i < 256; i++) {
588
- const mu = ~i & 255;
589
- const sign = mu & 128 ? -1 : 1;
590
- const exponent = mu >> 4 & 7;
591
- const mantissa = mu & 15;
592
- const magnitude = (mantissa << 1 | 33) << exponent + 2;
593
- table[i] = sign * (magnitude - 132);
594
- }
595
- return table;
596
- })();
597
- var PCM16_TO_MULAW_TABLE = (() => {
598
- const BIAS = 132;
599
- const CLIP = 32635;
600
- const table = new Uint8Array(65536);
601
- for (let i = 0; i < 65536; i++) {
602
- let sample = i >= 32768 ? i - 65536 : i;
603
- const sign = sample < 0 ? 128 : 0;
604
- if (sample < 0) sample = -sample;
605
- if (sample > CLIP) sample = CLIP;
606
- sample += BIAS;
607
- let exponent = 7;
608
- const exponentMask = 16384;
609
- for (let shift = exponentMask; shift > 0 && (sample & shift) === 0; shift >>= 1) {
610
- exponent--;
611
- }
612
- const mantissa = sample >> exponent + 3 & 15;
613
- const mulaw = ~(sign | exponent << 4 | mantissa) & 255;
614
- table[i] = mulaw;
615
- }
616
- return table;
617
- })();
618
- function mulawToPcm16(mulawData) {
619
- const out = Buffer.alloc(mulawData.length * 2);
620
- for (let i = 0; i < mulawData.length; i++) {
621
- out.writeInt16LE(MULAW_TO_PCM16_TABLE[mulawData[i]], i * 2);
622
- }
623
- return out;
624
- }
625
- function pcm16ToMulaw(pcmData) {
626
- const sampleCount = Math.floor(pcmData.length / 2);
627
- const out = Buffer.alloc(sampleCount);
628
- for (let i = 0; i < sampleCount; i++) {
629
- const sample = pcmData.readInt16LE(i * 2);
630
- out[i] = PCM16_TO_MULAW_TABLE[sample + 65536 & 65535];
631
- }
632
- return out;
633
- }
634
- var PcmCarry = class {
635
- pending = null;
636
- /**
637
- * Prepend any carried odd byte, return the even-length prefix, and stash
638
- * any new trailing odd byte for the next call.
639
- *
640
- * Returns a zero-length buffer when no complete sample is yet available.
641
- */
642
- push(chunk) {
643
- const combined = this.pending !== null ? Buffer.concat([this.pending, chunk]) : chunk;
644
- this.pending = null;
645
- const alignedLen = combined.length & ~1;
646
- if (alignedLen < combined.length) {
647
- this.pending = combined.subarray(alignedLen);
648
- }
649
- return combined.subarray(0, alignedLen);
650
- }
651
- /**
652
- * Return any pending byte as a 1-byte buffer (rare in practice — only if
653
- * the entire stream had an odd byte count), then reset internal state.
654
- */
655
- flush() {
656
- if (this.pending === null) return Buffer.alloc(0);
657
- const out = this.pending;
658
- this.pending = null;
659
- return out;
660
- }
661
- /** Reset carry state without flushing. */
662
- reset() {
663
- this.pending = null;
664
- }
665
- };
666
- var StatefulResampler = class {
667
- srcRate;
668
- dstRate;
669
- // 16k→8k: 5-tap FIR state.
670
- // Extended sample buffer carries the 2 history samples that precede the
671
- // current chunk AND any "pending" input sample that did not yet generate
672
- // output (i.e. the odd sample when the chunk had an odd sample count).
673
- // `firPhase` = 0 means the next output is at input position 0 of the
674
- // current chunk; 1 means it starts at input position 1 (because the
675
- // previous chunk ended on an even-output boundary).
676
- firHistory = new Int16Array(2);
677
- // [s_{-2}, s_{-1}]
678
- firHistoryValid = false;
679
- // Pending sample carried from odd-count chunks (not the byte carry —
680
- // this is a complete Int16 sample that becomes the first input for the
681
- // next call).
682
- firPendingSample = null;
683
- // 8k→16k: last input sample deferred across chunk boundaries.
684
- upsampleLast = 0;
685
- upsampleHasHistory = false;
686
- // 24k→16k: fractional phase and last input sample across chunks.
687
- resample24Last = 0;
688
- resample24Phase = 0;
689
- resample24HasHistory = false;
690
- // Odd-byte alignment carry.
691
- carry = new PcmCarry();
692
- constructor(opts) {
693
- this.srcRate = opts.srcRate;
694
- this.dstRate = opts.dstRate;
695
- if (opts.channels !== void 0 && opts.channels !== 1) {
696
- throw new Error("StatefulResampler: only mono (channels=1) is supported");
697
- }
698
- const key = `${this.srcRate}->${this.dstRate}`;
699
- if (key !== "16000->8000" && key !== "8000->16000" && key !== "24000->16000" && key !== "24000->8000") {
700
- throw new Error(
701
- `StatefulResampler: unsupported conversion ${key}. Supported: 16000->8000, 8000->16000, 24000->16000, 24000->8000`
702
- );
703
- }
704
- }
705
- /**
706
- * Process a chunk of PCM16-LE samples.
707
- *
708
- * Handles odd-byte inputs via an internal carry buffer. Returns an even-byte-
709
- * aligned output buffer; may return a zero-length buffer if not enough
710
- * aligned input is available yet.
711
- */
712
- process(pcm) {
713
- const aligned = this.carry.push(pcm);
714
- if (aligned.length === 0) return Buffer.alloc(0);
715
- if (this.srcRate === 16e3 && this.dstRate === 8e3) {
716
- return this._downsample16kTo8k(aligned);
717
- }
718
- if (this.srcRate === 8e3 && this.dstRate === 16e3) {
719
- return this._upsample8kTo16k(aligned);
720
- }
721
- if (this.srcRate === 24e3 && this.dstRate === 8e3) {
722
- return this._resample24kTo8k(aligned);
723
- }
724
- return this._resample24kTo16k(aligned);
725
- }
726
- /**
727
- * Flush internal state and return any remaining output samples.
728
- *
729
- * For 8k→16k: the deferred last sample is emitted duplicated (matching
730
- * the stateless helper's end-of-stream behaviour).
731
- * For 16k→8k: any pending odd sample is processed with edge-replication.
732
- * Resets all state after flushing.
733
- */
734
- flush() {
735
- this.carry.flush();
736
- if (this.srcRate === 16e3 && this.dstRate === 8e3 && this.firPendingSample !== null) {
737
- const s = this.firPendingSample;
738
- const tmp = Buffer.alloc(4);
739
- tmp.writeInt16LE(s, 0);
740
- tmp.writeInt16LE(s, 2);
741
- const out = this._downsample16kTo8k(tmp);
742
- this.firPendingSample = null;
743
- return out;
744
- }
745
- if (this.srcRate === 8e3 && this.dstRate === 16e3 && this.upsampleHasHistory) {
746
- const out = Buffer.alloc(4);
747
- out.writeInt16LE(this.upsampleLast, 0);
748
- out.writeInt16LE(this.upsampleLast, 2);
749
- this.upsampleHasHistory = false;
750
- this.upsampleLast = 0;
751
- return out;
752
- }
753
- return Buffer.alloc(0);
754
- }
755
- /** Reset all carried state (e.g. at call boundaries). */
756
- reset() {
757
- this.firHistory = new Int16Array(2);
758
- this.firHistoryValid = false;
759
- this.firPendingSample = null;
760
- this.upsampleLast = 0;
761
- this.upsampleHasHistory = false;
762
- this.resample24Last = 0;
763
- this.resample24Phase = 0;
764
- this.resample24HasHistory = false;
765
- this.carry.reset();
766
- }
767
- // ---------------------------------------------------------------------------
768
- // Private: 16 kHz → 8 kHz
769
- // ---------------------------------------------------------------------------
770
- /**
771
- * 2:1 decimation with a 5-tap binomial FIR anti-alias filter.
772
- *
773
- * FIR coefficients: [1, 4, 6, 4, 1] / 16 (cutoff ~Fs/4 = 4 kHz).
774
- *
775
- * Cross-chunk state:
776
- * - `firHistory[0]` = s_{-2}, `firHistory[1]` = s_{-1} relative to the
777
- * virtual stream (seeded to first-sample on the very first call).
778
- * - `firPendingSample` = a lone input sample carried from a chunk whose
779
- * sample count was odd; it will become the first input of the next chunk.
780
- *
781
- * Decimation: outputs are at even positions (0, 2, 4 …) in the virtual
782
- * extended stream, so every 2 input samples yield 1 output. An odd-sample-
783
- * count chunk leaves 1 sample in `firPendingSample`; the next chunk
784
- * prepends it so the output cadence is unbroken.
785
- */
786
- _downsample16kTo8k(buf) {
787
- const newSampleCount = buf.length >> 1;
788
- const hasPending = this.firPendingSample !== null;
789
- const totalInput = newSampleCount + (hasPending ? 1 : 0);
790
- const input = new Int16Array(totalInput);
791
- if (hasPending) {
792
- input[0] = this.firPendingSample;
793
- for (let j = 0; j < newSampleCount; j++) input[j + 1] = buf.readInt16LE(j * 2);
794
- } else {
795
- for (let j = 0; j < newSampleCount; j++) input[j] = buf.readInt16LE(j * 2);
796
- }
797
- this.firPendingSample = null;
798
- if (totalInput === 0) return Buffer.alloc(0);
799
- if (!this.firHistoryValid) {
800
- this.firHistory[0] = 0;
801
- this.firHistory[1] = 0;
802
- this.firHistoryValid = true;
803
- }
804
- const extended = new Int16Array(totalInput + 2);
805
- extended[0] = this.firHistory[0];
806
- extended[1] = this.firHistory[1];
807
- for (let j = 0; j < totalInput; j++) extended[j + 2] = input[j];
808
- const outSamples = totalInput >> 1;
809
- const out = Buffer.alloc(outSamples * 2);
810
- for (let i = 0; i < outSamples; i++) {
811
- const c = 2 + i * 2;
812
- const sM2 = extended[c - 2];
813
- const sM1 = extended[c - 1];
814
- const s0 = extended[c];
815
- const sP1 = c + 1 < extended.length ? extended[c + 1] : extended[extended.length - 1];
816
- const sP2 = c + 2 < extended.length ? extended[c + 2] : extended[extended.length - 1];
817
- const filtered = sM2 + 4 * sM1 + 6 * s0 + 4 * sP1 + sP2 + 8 >> 4;
818
- out.writeInt16LE(Math.max(-32768, Math.min(32767, filtered)), i * 2);
819
- }
820
- if (totalInput % 2 === 1) {
821
- this.firPendingSample = input[totalInput - 1];
822
- }
823
- if (totalInput >= 2) {
824
- this.firHistory[0] = input[totalInput - 2];
825
- this.firHistory[1] = input[totalInput - 1];
826
- } else {
827
- this.firHistory[0] = this.firHistory[1];
828
- this.firHistory[1] = input[0];
829
- }
830
- return out;
831
- }
832
- // ---------------------------------------------------------------------------
833
- // Private: 8 kHz → 16 kHz
834
- // ---------------------------------------------------------------------------
835
- /**
836
- * 1:2 linear-interpolation upsampler.
837
- *
838
- * For the first chunk (no history): emits 2*(N-1) samples and defers the
839
- * last sample. For subsequent chunks (with history): emits the deferred
840
- * sample + its interpolated midpoint THEN 2*(N-1) samples from the new
841
- * chunk, deferring the new last sample. Total across K chunks + flush =
842
- * 2*total_input_samples (correct output length).
843
- *
844
- * Call flush() after the final chunk to emit the last deferred sample
845
- * pair (self-duplicate at end of stream).
846
- */
847
- _upsample8kTo16k(buf) {
848
- const sampleCount = buf.length >> 1;
849
- if (sampleCount === 0) return Buffer.alloc(0);
850
- const outArr = [];
851
- if (this.upsampleHasHistory) {
852
- const next = buf.readInt16LE(0);
853
- outArr.push(this.upsampleLast);
854
- outArr.push(Math.round((this.upsampleLast + next) / 2));
855
- }
856
- for (let i = 0; i < sampleCount - 1; i++) {
857
- const s0 = buf.readInt16LE(i * 2);
858
- const s1 = buf.readInt16LE((i + 1) * 2);
859
- outArr.push(s0);
860
- outArr.push(Math.round((s0 + s1) / 2));
861
- }
862
- this.upsampleLast = buf.readInt16LE((sampleCount - 1) * 2);
863
- this.upsampleHasHistory = true;
864
- const outBuf = Buffer.alloc(outArr.length * 2);
865
- for (let j = 0; j < outArr.length; j++) outBuf.writeInt16LE(outArr[j], j * 2);
866
- return outBuf;
867
- }
868
- // ---------------------------------------------------------------------------
869
- // Private: 24 kHz → 16 kHz / 8 kHz
870
- // ---------------------------------------------------------------------------
871
- /**
872
- * 3:2 linear-interpolation decimator (ratio srcRate/dstRate = 1.5).
873
- *
874
- * `resample24Phase` tracks the fractional input position of the next output
875
- * sample relative to the START of the next chunk. Negative phase means the
876
- * next output straddles the previous/current chunk boundary; those are
877
- * handled using `resample24Last`.
878
- */
879
- _resample24kTo16k(buf) {
880
- return this._resample24kStep(buf, 24e3 / 16e3);
881
- }
882
- /** 3:1 decimation — collapses the 24k→16k→8k chain into a single step. */
883
- _resample24kTo8k(buf) {
884
- return this._resample24kStep(buf, 24e3 / 8e3);
885
- }
886
- /** Shared phase-stepping resampler used by 24→16 (step 1.5) and 24→8 (step 3). */
887
- _resample24kStep(buf, step) {
888
- const sampleCount = buf.length >> 1;
889
- if (sampleCount === 0) return Buffer.alloc(0);
890
- const outArr = [];
891
- let phase = this.resample24Phase;
892
- while (true) {
893
- const idx = Math.floor(phase);
894
- if (idx >= sampleCount) break;
895
- const frac = phase - idx;
896
- let s0;
897
- let s1;
898
- if (idx < 0) {
899
- s0 = this.resample24HasHistory ? this.resample24Last : 0;
900
- s1 = buf.readInt16LE(0);
901
- } else {
902
- s0 = buf.readInt16LE(idx * 2);
903
- s1 = idx + 1 < sampleCount ? buf.readInt16LE((idx + 1) * 2) : s0;
904
- }
905
- const interp = Math.round(s0 + (s1 - s0) * frac);
906
- outArr.push(Math.max(-32768, Math.min(32767, interp)));
907
- phase += step;
908
- }
909
- this.resample24Last = buf.readInt16LE((sampleCount - 1) * 2);
910
- this.resample24HasHistory = true;
911
- this.resample24Phase = phase - sampleCount;
912
- const outBuf = Buffer.alloc(outArr.length * 2);
913
- for (let j = 0; j < outArr.length; j++) outBuf.writeInt16LE(outArr[j], j * 2);
914
- return outBuf;
915
- }
916
- };
917
- function createResampler16kTo8k() {
918
- return new StatefulResampler({ srcRate: 16e3, dstRate: 8e3 });
919
- }
920
- function createResampler8kTo16k() {
921
- return new StatefulResampler({ srcRate: 8e3, dstRate: 16e3 });
922
- }
923
- function createResampler24kTo16k() {
924
- return new StatefulResampler({ srcRate: 24e3, dstRate: 16e3 });
925
- }
926
- function createResampler24kTo8k() {
927
- return new StatefulResampler({ srcRate: 24e3, dstRate: 8e3 });
928
- }
929
- var _warnedResample8kTo16k = false;
930
- var _warnedResample16kTo8k = false;
931
- var _warnedResample24kTo16k = false;
932
- function resample8kTo16k(pcm8k) {
933
- if (!_warnedResample8kTo16k) {
934
- _warnedResample8kTo16k = true;
935
- getLogger().warn(
936
- "[patter] resample8kTo16k() is deprecated. Use createResampler8kTo16k() (StatefulResampler) to eliminate chunk-boundary discontinuities."
937
- );
938
- }
939
- if (pcm8k.length === 0) return Buffer.alloc(0);
940
- const r = createResampler8kTo16k();
941
- const main = r.process(pcm8k);
942
- const tail = r.flush();
943
- return tail.length > 0 ? Buffer.concat([main, tail]) : main;
944
- }
945
- function resample16kTo8k(pcm16k) {
946
- if (!_warnedResample16kTo8k) {
947
- _warnedResample16kTo8k = true;
948
- getLogger().warn(
949
- "[patter] resample16kTo8k() is deprecated. Use createResampler16kTo8k() (StatefulResampler) to eliminate chunk-boundary discontinuities."
950
- );
951
- }
952
- if (pcm16k.length === 0) return Buffer.alloc(0);
953
- const r = createResampler16kTo8k();
954
- const out = r.process(pcm16k);
955
- const tail = r.flush();
956
- return tail.length > 0 ? Buffer.concat([out, tail]) : out;
957
- }
958
- function resample24kTo16k(pcm24k) {
959
- if (!_warnedResample24kTo16k) {
960
- _warnedResample24kTo16k = true;
961
- getLogger().warn(
962
- "[patter] resample24kTo16k() is deprecated. Use createResampler24kTo16k() (StatefulResampler) or OpenAITTS.resampleStreaming for anti-aliased resampling."
963
- );
964
- }
965
- if (pcm24k.length === 0) return Buffer.alloc(0);
966
- const sampleCount = Math.floor(pcm24k.length / 2);
967
- const outSamples = Math.floor(sampleCount * 2 / 3);
968
- const out = Buffer.alloc(outSamples * 2);
969
- for (let i = 0; i < outSamples; i++) {
970
- const pos = i * 1.5;
971
- const idx = Math.floor(pos);
972
- const frac = pos - idx;
973
- const s0 = pcm24k.readInt16LE(idx * 2);
974
- const s1 = idx + 1 < sampleCount ? pcm24k.readInt16LE((idx + 1) * 2) : s0;
975
- const interp = Math.round(s0 + (s1 - s0) * frac);
976
- out.writeInt16LE(Math.max(-32768, Math.min(32767, interp)), i * 2);
977
- }
978
- return out;
979
- }
980
-
981
- // src/providers/openai-realtime-2.ts
982
- var GA_TO_V1_EVENT_NAMES = {
983
- "response.output_audio.delta": "response.audio.delta",
984
- "response.output_audio.done": "response.audio.done",
985
- "response.output_audio_transcript.delta": "response.audio_transcript.delta",
986
- "response.output_audio_transcript.done": "response.audio_transcript.done"
987
- };
988
- var OpenAIRealtime2Adapter = class extends OpenAIRealtimeAdapter {
989
- /** Two-stage outbound resampler for 24 kHz → 8 kHz. Created lazily on
990
- * the first audio frame so each Realtime session has its own state.
991
- *
992
- * We chain `24k → 16k → 8k` instead of using the direct `24k → 8k`
993
- * variant of {@link StatefulResampler}: the direct path is a 3:1
994
- * decimation with linear interpolation only — no anti-alias filter
995
- * — so any energy above 4 kHz in the source aliases down into the
996
- * audible band and is heard as raspy/scratchy artefacts on speech.
997
- * `gpt-realtime-2` outputs voice with significant content above
998
- * 4 kHz. The second stage (16k → 8k) uses a 5-tap FIR anti-alias
999
- * filter which removes the offending band before decimation, and
1000
- * empirically (see commit message) the chain produces audibly
1001
- * cleaner output. The 24k → 16k step is still pure linear-interp
1002
- * but the inputs to it stay below the Nyquist of the 16 kHz stage,
1003
- * so it doesn't introduce new artefacts.
1004
- */
1005
- outboundResampler24To16 = null;
1006
- outboundResampler16To8 = null;
1007
- /** Last 8 kHz input sample carried across chunk boundaries for the
1008
- * direct 3× linear upsample (see `transcodeInboundMulaw8ToPcm24`).
1009
- * The carry guarantees the very first output of each chunk
1010
- * interpolates from the *real* preceding sample, not from the chunk's
1011
- * own first sample replicated — without it every 20 ms Twilio frame
1012
- * boundary becomes a small DC step that the GA server VAD interprets
1013
- * as constant low-energy noise, which never crosses the speech
1014
- * threshold. */
1015
- inbound8kCarry = null;
1016
- /** GA-shape `session.update` payload. See module-level docstring. */
1017
- buildGASessionConfig() {
1018
- const opts = this.options;
1019
- const fmt = { type: "audio/pcm", rate: 24e3 };
1020
- const config = {
1021
- type: "realtime",
1022
- output_modalities: opts.modalities ?? ["audio"],
1023
- audio: {
1024
- input: {
1025
- format: fmt,
1026
- transcription: {
1027
- model: opts.inputAudioTranscriptionModel ?? OpenAITranscriptionModel.WHISPER_1
1028
- },
1029
- // Lower threshold (0.3 vs the 0.5 default) because the inbound
1030
- // audio is telephony-band (8 kHz) linearly upsampled to 24 kHz —
1031
- // the upper 4-12 kHz band is interpolation, not real harmonics,
1032
- // and the GA server VAD's default tuning was calibrated against
1033
- // studio-quality 24 kHz audio. A more permissive threshold
1034
- // recovers reliable speech detection on phone-band input.
1035
- turn_detection: {
1036
- type: opts.vadType ?? OpenAIRealtimeVADType.SERVER_VAD,
1037
- threshold: 0.1,
1038
- prefix_padding_ms: 300,
1039
- silence_duration_ms: opts.silenceDurationMs ?? 500
1040
- }
1041
- },
1042
- output: {
1043
- format: fmt,
1044
- voice: this.voice
1045
- }
1046
- },
1047
- instructions: this.instructions || "You are a helpful voice assistant. Be concise."
1048
- };
1049
- if (opts.temperature !== void 0) config.temperature = opts.temperature;
1050
- if (opts.maxResponseOutputTokens !== void 0) {
1051
- config.max_output_tokens = opts.maxResponseOutputTokens;
1052
- }
1053
- if (opts.toolChoice !== void 0) config.tool_choice = opts.toolChoice;
1054
- if (opts.reasoningEffort !== void 0) {
1055
- config.reasoning = { effort: opts.reasoningEffort };
1056
- }
1057
- if (this.tools?.length) {
1058
- config.tools = this.tools.map((t) => {
1059
- const def = {
1060
- type: "function",
1061
- name: t.name,
1062
- description: t.description,
1063
- parameters: t.parameters
1064
- };
1065
- if (t.strict === true) def.strict = true;
1066
- return def;
1067
- });
1068
- }
1069
- return config;
1070
- }
1071
- /**
1072
- * Open the Realtime WebSocket against the GA endpoint and apply the GA
1073
- * session configuration. Header `OpenAI-Beta: realtime=v1` is OMITTED
1074
- * (the GA endpoint rejects it). Wire shape uses nested `audio.{input,
1075
- * output}` + `output_modalities` + `session.type === "realtime"`.
1076
- */
1077
- async connect() {
1078
- const url = `wss://api.openai.com/v1/realtime?model=${encodeURIComponent(this.model)}`;
1079
- this.ws = new WebSocket2(url, {
1080
- headers: { Authorization: `Bearer ${this.apiKey}` }
1081
- });
1082
- const wsRef = this.ws;
1083
- const originalOn = wsRef.on.bind(this.ws);
1084
- wsRef.on = (event, handler) => {
1085
- if (event !== "message") return originalOn(event, handler);
1086
- const wrapped = (raw, ...rest) => {
1087
- try {
1088
- const text = typeof raw === "string" ? raw : raw.toString();
1089
- const parsed = JSON.parse(text);
1090
- const t = parsed.type;
1091
- if (t && t in GA_TO_V1_EVENT_NAMES) {
1092
- const newType = GA_TO_V1_EVENT_NAMES[t];
1093
- if (t === "response.output_audio.delta" && typeof parsed.delta === "string") {
1094
- const mulaw = this.transcodeOutboundPcm24ToMulaw8Buffer(parsed.delta);
1095
- const FRAME_BYTES = 160;
1096
- if (mulaw.length === 0) return;
1097
- for (let off = 0; off < mulaw.length; off += FRAME_BYTES) {
1098
- const slice = mulaw.subarray(off, Math.min(off + FRAME_BYTES, mulaw.length));
1099
- const frame = { ...parsed, type: newType, delta: slice.toString("base64") };
1100
- handler(Buffer.from(JSON.stringify(frame)), ...rest);
1101
- }
1102
- return;
1103
- }
1104
- parsed.type = newType;
1105
- handler(Buffer.from(JSON.stringify(parsed)), ...rest);
1106
- return;
1107
- }
1108
- } catch {
1109
- }
1110
- handler(raw, ...rest);
1111
- };
1112
- return originalOn(event, wrapped);
1113
- };
1114
- await new Promise((resolve, reject) => {
1115
- let sessionCreated = false;
1116
- let settled = false;
1117
- const ws = this.ws;
1118
- const onSetupMessage = (raw) => {
1119
- let msg;
1120
- try {
1121
- msg = JSON.parse(raw.toString());
1122
- } catch (e) {
1123
- getLogger().warn(`OpenAI Realtime 2: failed to parse message: ${String(e)}`);
1124
- return;
1125
- }
1126
- if (msg.type === "session.created" && !sessionCreated) {
1127
- sessionCreated = true;
1128
- ws.send(JSON.stringify({ type: "session.update", session: this.buildGASessionConfig() }));
1129
- } else if (msg.type === "session.updated") {
1130
- cleanup();
1131
- resolve();
1132
- } else if (msg.type === "error") {
1133
- cleanup();
1134
- try {
1135
- ws.close();
1136
- } catch {
1137
- }
1138
- reject(new Error(`OpenAI Realtime 2 setup error: ${msg.error?.message ?? JSON.stringify(msg)}`));
1139
- }
1140
- };
1141
- const onSetupError = (err) => {
1142
- cleanup();
1143
- try {
1144
- ws.close();
1145
- } catch {
1146
- }
1147
- reject(err);
1148
- };
1149
- const cleanup = () => {
1150
- if (settled) return;
1151
- settled = true;
1152
- clearTimeout(timer);
1153
- ws.off("message", onSetupMessage);
1154
- ws.off("error", onSetupError);
1155
- };
1156
- const timer = setTimeout(() => {
1157
- cleanup();
1158
- try {
1159
- ws.close();
1160
- } catch {
1161
- }
1162
- reject(new Error("OpenAI Realtime 2 connect timeout"));
1163
- }, 15e3);
1164
- ws.on("message", onSetupMessage);
1165
- ws.on("error", onSetupError);
1166
- });
1167
- this.armHeartbeatAndListener();
1168
- }
1169
- /**
1170
- * GA-API variant of {@link OpenAIRealtimeAdapter.sendFirstMessage}. Two
1171
- * differences from the v1 path:
1172
- *
1173
- * 1. The v1 implementation sends `response.modalities` which the GA
1174
- * endpoint rejects with `Unknown parameter: 'response.modalities'`.
1175
- * Use `output_modalities` to match the GA `session.update` shape.
1176
- *
1177
- * 2. The GA `response.create` does NOT inherit `audio.output.voice`
1178
- * from the session — it falls back to the server-side default
1179
- * (`marin`, female) when the field is omitted on the response
1180
- * itself. Session-level `voice: "alloy"` only affects subsequent
1181
- * server-VAD-triggered responses, NOT this explicit
1182
- * `response.create`. We re-inject the configured voice here so the
1183
- * first-message voice matches the rest of the call.
1184
- */
1185
- /**
1186
- * Override the parent `sendAudio` to transcode inbound carrier audio
1187
- * (mulaw 8 kHz from Twilio/Telnyx) into PCM-16 24 kHz before sending
1188
- * `input_audio_buffer.append`. The GA server's audio engine ignores
1189
- * mulaw frames (commit returns "buffer only has 0.00ms of audio") even
1190
- * though it accepts `audio/pcmu` at the protocol level.
1191
- */
1192
- sendAudio(mulawAudio) {
1193
- if (!this.ws || this.ws.readyState !== this.ws.OPEN) return;
1194
- const pcm24k = this.transcodeInboundMulaw8ToPcm24(mulawAudio);
1195
- this.ws.send(JSON.stringify({
1196
- type: "input_audio_buffer.append",
1197
- audio: pcm24k.toString("base64")
1198
- }));
1199
- }
1200
- /**
1201
- * mulaw 8 kHz Buffer → PCM-16-LE 24 kHz Buffer.
1202
- *
1203
- * Direct 3× linear-interpolation upsample with a one-sample carry
1204
- * across chunk boundaries. For every consecutive pair of 8 kHz
1205
- * samples `(s_a, s_b)` we emit three 24 kHz samples:
1206
- *
1207
- * out_0 = s_a
1208
- * out_1 = 2/3·s_a + 1/3·s_b
1209
- * out_2 = 1/3·s_a + 2/3·s_b
1210
- *
1211
- * The carry stores the last 8 kHz sample of the chunk so the next
1212
- * chunk can start by pairing `(carry, firstNewSample)` — that's what
1213
- * keeps the output rate exact (each input sample → 3 output samples)
1214
- * and eliminates the chunk-boundary DC step that confused the GA
1215
- * server VAD. The first chunk has no carry and loses 3 samples at
1216
- * the leading edge (375 µs of audio); that's well below any audible
1217
- * artefact and well below the GA VAD's 300 ms prefix-padding window.
1218
- */
1219
- transcodeInboundMulaw8ToPcm24(mulaw) {
1220
- const pcm8 = mulawToPcm16(mulaw);
1221
- const samples8 = pcm8.length / 2;
1222
- if (samples8 === 0) return Buffer.alloc(0);
1223
- const GAIN = 2;
1224
- const inputs = [];
1225
- if (this.inbound8kCarry !== null) inputs.push(this.inbound8kCarry);
1226
- for (let i = 0; i < samples8; i++) {
1227
- const raw = pcm8.readInt16LE(i * 2) * GAIN;
1228
- inputs.push(Math.max(-32768, Math.min(32767, raw)));
1229
- }
1230
- this.inbound8kCarry = inputs[inputs.length - 1];
1231
- const numPairs = inputs.length - 1;
1232
- if (numPairs <= 0) return Buffer.alloc(0);
1233
- const out = Buffer.allocUnsafe(numPairs * 3 * 2);
1234
- for (let i = 0; i < numPairs; i++) {
1235
- const s0 = inputs[i];
1236
- const s1 = inputs[i + 1];
1237
- out.writeInt16LE(s0, i * 6);
1238
- out.writeInt16LE(Math.round((s0 * 2 + s1) / 3), i * 6 + 2);
1239
- out.writeInt16LE(Math.round((s0 + s1 * 2) / 3), i * 6 + 4);
1240
- }
1241
- return out;
1242
- }
1243
- /**
1244
- * Base64 PCM-16-LE 24 kHz → Base64 mulaw 8 kHz. Used by the WS
1245
- * translation shim on each `response.output_audio.delta`. The stateful
1246
- * resampler is created lazily and reused across all deltas in this
1247
- * session so the 3:1 decimator's phase carries across chunk
1248
- * boundaries — without that, every chunk boundary produces a click.
1249
- */
1250
- transcodeOutboundPcm24ToMulaw8Buffer(deltaB64) {
1251
- if (!this.outboundResampler24To16) {
1252
- this.outboundResampler24To16 = new StatefulResampler({ srcRate: 24e3, dstRate: 16e3 });
1253
- this.outboundResampler16To8 = new StatefulResampler({ srcRate: 16e3, dstRate: 8e3 });
1254
- }
1255
- const pcm24 = Buffer.from(deltaB64, "base64");
1256
- const pcm16 = this.outboundResampler24To16.process(pcm24);
1257
- const pcm8 = this.outboundResampler16To8.process(pcm16);
1258
- if (pcm8.length === 0) return Buffer.alloc(0);
1259
- return pcm16ToMulaw(pcm8);
1260
- }
1261
- async sendFirstMessage(text) {
1262
- this.ws?.send(JSON.stringify({
1263
- type: "response.create",
1264
- response: {
1265
- output_modalities: ["audio"],
1266
- audio: { output: { voice: this.voice } },
1267
- reasoning: { effort: "minimal" },
1268
- instructions: `Say exactly the following sentence as your first turn and nothing else: "${text}"`
1269
- }
1270
- }));
1271
- }
1272
- };
1273
-
1274
32
  // src/providers/elevenlabs-convai.ts
1275
33
  init_esm_shims();
1276
- import WebSocket3 from "ws";
34
+ import WebSocket from "ws";
1277
35
  var ELEVENLABS_CONVAI_URL = "wss://api.elevenlabs.io/v1/convai/conversation";
1278
36
  var ELEVENLABS_SIGNED_URL = "https://api.elevenlabs.io/v1/convai/conversation/get-signed-url";
1279
37
  var AGENT_SILENCE_MS = 500;
@@ -1395,8 +153,8 @@ var ElevenLabsConvAIAdapter = class _ElevenLabsConvAIAdapter {
1395
153
  wsUrl = this.agentId ? `${ELEVENLABS_CONVAI_URL}?agent_id=${encodeURIComponent(this.agentId)}` : ELEVENLABS_CONVAI_URL;
1396
154
  wsOptions = { headers: { "xi-api-key": this.apiKey } };
1397
155
  }
1398
- this.ws = new WebSocket3(wsUrl, wsOptions);
1399
- await new Promise((resolve, reject) => {
156
+ this.ws = new WebSocket(wsUrl, wsOptions);
157
+ await new Promise((resolve2, reject) => {
1400
158
  const timeout = setTimeout(
1401
159
  () => reject(new Error("ElevenLabs ConvAI connect timeout")),
1402
160
  15e3
@@ -1420,7 +178,7 @@ var ElevenLabsConvAIAdapter = class _ElevenLabsConvAIAdapter {
1420
178
  conversation_config_override: override
1421
179
  };
1422
180
  this.ws.send(JSON.stringify(config));
1423
- resolve();
181
+ resolve2();
1424
182
  });
1425
183
  this.ws.once("error", (err) => {
1426
184
  clearTimeout(timeout);
@@ -1457,7 +215,7 @@ var ElevenLabsConvAIAdapter = class _ElevenLabsConvAIAdapter {
1457
215
  }
1458
216
  respondToPing(eventId, delayMs) {
1459
217
  const send = () => {
1460
- if (!this.ws || this.ws.readyState !== WebSocket3.OPEN) return;
218
+ if (!this.ws || this.ws.readyState !== WebSocket.OPEN) return;
1461
219
  try {
1462
220
  this.ws.send(JSON.stringify({ type: "pong", event_id: eventId }));
1463
221
  } catch (err) {
@@ -1554,7 +312,7 @@ var ElevenLabsConvAIAdapter = class _ElevenLabsConvAIAdapter {
1554
312
  }
1555
313
  /** Send a caller-side audio chunk to ConvAI as a base64 `user_audio_chunk`. */
1556
314
  sendAudio(audioBytes) {
1557
- if (!this.ws || this.ws.readyState !== WebSocket3.OPEN) return;
315
+ if (!this.ws || this.ws.readyState !== WebSocket.OPEN) return;
1558
316
  this.ws.send(
1559
317
  JSON.stringify({
1560
318
  user_audio_chunk: audioBytes.toString("base64")
@@ -1577,20 +335,20 @@ var ElevenLabsConvAIAdapter = class _ElevenLabsConvAIAdapter {
1577
335
  return;
1578
336
  }
1579
337
  const ws = this.ws;
1580
- this.closePromise = new Promise((resolve) => {
1581
- if (ws.readyState === WebSocket3.CLOSED || ws.readyState === WebSocket3.CLOSING) {
1582
- resolve();
338
+ this.closePromise = new Promise((resolve2) => {
339
+ if (ws.readyState === WebSocket.CLOSED || ws.readyState === WebSocket.CLOSING) {
340
+ resolve2();
1583
341
  return;
1584
342
  }
1585
343
  const done = () => {
1586
- resolve();
344
+ resolve2();
1587
345
  };
1588
346
  ws.once("close", done);
1589
347
  ws.once("error", done);
1590
348
  try {
1591
349
  ws.close();
1592
350
  } catch {
1593
- resolve();
351
+ resolve2();
1594
352
  }
1595
353
  });
1596
354
  try {
@@ -1603,6 +361,157 @@ var ElevenLabsConvAIAdapter = class _ElevenLabsConvAIAdapter {
1603
361
  }
1604
362
  };
1605
363
 
364
+ // src/providers/plivo-adapter.ts
365
+ init_esm_shims();
366
+ var PLIVO_API_BASE = "https://api.plivo.com/v1";
367
+ async function dropPlivoVoicemail(callUuid, voicemailMessage, authId, authToken) {
368
+ if (!callUuid || !voicemailMessage || !authId || !authToken) return;
369
+ const auth = `Basic ${Buffer.from(`${authId}:${authToken}`).toString("base64")}`;
370
+ const base = `${PLIVO_API_BASE}/Account/${encodeURIComponent(authId)}/Call/${encodeURIComponent(callUuid)}`;
371
+ try {
372
+ const speak = await fetch(`${base}/Speak/`, {
373
+ method: "POST",
374
+ headers: { "Content-Type": "application/x-www-form-urlencoded", Authorization: auth },
375
+ body: new URLSearchParams({ text: voicemailMessage }).toString(),
376
+ signal: AbortSignal.timeout(1e4)
377
+ });
378
+ if (!speak.ok) {
379
+ getLogger().warn(
380
+ `Plivo voicemail Speak failed (${speak.status}): ${(await speak.text()).slice(0, 200)}`
381
+ );
382
+ return;
383
+ }
384
+ await new Promise(
385
+ (r) => setTimeout(r, Math.min(3e4, voicemailMessage.length * 60))
386
+ );
387
+ await fetch(`${base}/`, { method: "DELETE", headers: { Authorization: auth } });
388
+ getLogger().info(`Voicemail dropped for ${callUuid}`);
389
+ } catch (e) {
390
+ getLogger().warn(`Could not drop voicemail: ${String(e)}`);
391
+ }
392
+ }
393
+ function xmlEscapePlivo(s) {
394
+ return s.replace(/&/g, "&amp;").replace(/</g, "&lt;").replace(/>/g, "&gt;").replace(/"/g, "&quot;").replace(/'/g, "&apos;");
395
+ }
396
+ var PlivoAdapter = class {
397
+ authId;
398
+ baseUrl;
399
+ authHeader;
400
+ constructor(authId, authToken) {
401
+ if (!authId) throw new Error("PlivoAdapter: authId is required");
402
+ if (!authToken) throw new Error("PlivoAdapter: authToken is required");
403
+ this.authId = authId;
404
+ this.baseUrl = `${PLIVO_API_BASE}/Account/${encodeURIComponent(authId)}`;
405
+ this.authHeader = `Basic ${Buffer.from(`${authId}:${authToken}`).toString("base64")}`;
406
+ }
407
+ async request(method, path4, jsonBody) {
408
+ const headers = { Authorization: this.authHeader };
409
+ if (jsonBody !== void 0) headers["Content-Type"] = "application/json";
410
+ const response = await fetch(`${this.baseUrl}${path4}`, {
411
+ method,
412
+ headers,
413
+ body: jsonBody !== void 0 ? JSON.stringify(jsonBody) : void 0,
414
+ signal: AbortSignal.timeout(3e4)
415
+ });
416
+ const text = await response.text();
417
+ if (!response.ok && response.status !== 404) {
418
+ throw new Error(`Plivo ${method} ${path4} failed: ${response.status} ${text}`);
419
+ }
420
+ let data = {};
421
+ if (text) {
422
+ try {
423
+ data = JSON.parse(text);
424
+ } catch {
425
+ }
426
+ }
427
+ return { status: response.status, data };
428
+ }
429
+ /** Search and rent an available Plivo number for the given ISO country. */
430
+ async provisionNumber(countryIso) {
431
+ const { data } = await this.request(
432
+ "GET",
433
+ `/PhoneNumber/?country_iso=${encodeURIComponent(countryIso)}&limit=1`
434
+ );
435
+ const number = data.objects?.[0]?.number;
436
+ if (!number) throw new Error(`PlivoAdapter: no numbers available for ${countryIso}`);
437
+ await this.request("POST", `/PhoneNumber/${encodeURIComponent(number)}/`);
438
+ return number;
439
+ }
440
+ /**
441
+ * Point the inbound answer flow for ``number`` at ``answerUrl`` by creating
442
+ * (or reusing) a Plivo Application and linking the number to it. Most
443
+ * production deployments pre-configure this in the Plivo console; this
444
+ * mirrors Twilio's ``configureNumber`` auto-setup convenience.
445
+ */
446
+ async configureNumber(number, answerUrl) {
447
+ const { data } = await this.request("POST", "/Application/", {
448
+ app_name: "patter-inbound",
449
+ answer_url: answerUrl,
450
+ answer_method: "POST"
451
+ });
452
+ if (!data.app_id) {
453
+ getLogger().warn("Plivo Application create returned no app_id");
454
+ return;
455
+ }
456
+ await this.request("POST", `/Number/${encodeURIComponent(number)}/`, { app_id: data.app_id });
457
+ }
458
+ /**
459
+ * Place an outbound Plivo call routed through ``answerUrl``. Returns Plivo's
460
+ * ``request_uuid``. The WSS URL travels inside the answer XML, not as a dial
461
+ * parameter — mirroring the Python adapter.
462
+ */
463
+ async initiateCall(opts) {
464
+ const payload = {
465
+ from: opts.from,
466
+ to: opts.to,
467
+ answer_url: opts.answerUrl,
468
+ answer_method: "POST"
469
+ };
470
+ if (opts.ringTimeout != null) payload.ring_timeout = Math.max(1, Math.floor(opts.ringTimeout));
471
+ if (opts.machineDetection) {
472
+ payload.machine_detection = "true";
473
+ payload.machine_detection_time = 5e3;
474
+ if (opts.machineDetectionUrl) {
475
+ payload.machine_detection_url = opts.machineDetectionUrl;
476
+ payload.machine_detection_method = "POST";
477
+ }
478
+ }
479
+ const { data } = await this.request("POST", "/Call/", payload);
480
+ return { requestUuid: data.request_uuid ?? "" };
481
+ }
482
+ /** Hang up an active Plivo call by CallUUID. 204 and 404 are both success. */
483
+ async endCall(callUuid) {
484
+ if (!callUuid) throw new Error("PlivoAdapter: callUuid is required");
485
+ try {
486
+ await this.request("DELETE", `/Call/${encodeURIComponent(callUuid)}/`);
487
+ } catch (err) {
488
+ getLogger().warn(`[PlivoAdapter] endCall failed for ${callUuid}: ${String(err)}`);
489
+ throw err;
490
+ }
491
+ }
492
+ /**
493
+ * Build the Plivo answer XML. Unlike Twilio (``url=`` attribute), Plivo's
494
+ * ``<Stream>`` takes the WSS URL as its **text content**. ``bidirectional``
495
+ * enables two-way audio; ``keepCallAlive`` keeps the leg up for the lifetime
496
+ * of the WebSocket. ``extraHeaders`` (comma-separated ``key=value``) is
497
+ * delivered back on the WS ``start`` frame as a caller/callee fallback.
498
+ *
499
+ * Mirrors the Python adapter's ``generate_stream_xml``.
500
+ */
501
+ static generateStreamXml(streamUrl, contentType = "audio/x-mulaw;rate=8000", extraHeaders) {
502
+ let attrs = `bidirectional="true" keepCallAlive="true" contentType="${xmlEscapePlivo(contentType)}"`;
503
+ if (extraHeaders) {
504
+ const joined = Object.entries(extraHeaders).map(([k, v]) => `${k}=${v}`).join(",");
505
+ attrs += ` extraHeaders="${xmlEscapePlivo(joined)}"`;
506
+ }
507
+ return `<Response><Stream ${attrs}>${xmlEscapePlivo(streamUrl)}</Stream></Response>`;
508
+ }
509
+ };
510
+
511
+ // src/telephony/plivo.ts
512
+ init_esm_shims();
513
+ import crypto from "crypto";
514
+
1606
515
  // src/provider-factory.ts
1607
516
  init_esm_shims();
1608
517
  async function createSTT(agent) {
@@ -1612,8 +521,172 @@ async function createTTS(agent) {
1612
521
  return agent.tts ?? null;
1613
522
  }
1614
523
 
524
+ // src/telephony/plivo.ts
525
+ var Carrier = class {
526
+ kind = "plivo";
527
+ authId;
528
+ authToken;
529
+ constructor(opts = {}) {
530
+ const authId = opts.authId ?? process.env.PLIVO_AUTH_ID;
531
+ const authToken = opts.authToken ?? process.env.PLIVO_AUTH_TOKEN;
532
+ if (!authId) {
533
+ throw new Error(
534
+ "Plivo carrier requires authId. Pass { authId: 'MA...' } or set PLIVO_AUTH_ID in the environment."
535
+ );
536
+ }
537
+ if (!authToken) {
538
+ throw new Error(
539
+ "Plivo carrier requires authToken. Pass { authToken: '...' } or set PLIVO_AUTH_TOKEN in the environment."
540
+ );
541
+ }
542
+ this.authId = authId;
543
+ this.authToken = authToken;
544
+ }
545
+ };
546
+ function classifyPlivoAmd(result) {
547
+ const r = (result || "").trim().toLowerCase();
548
+ if (r === "human" || r === "person") return "human";
549
+ if (r.startsWith("machine") || r === "answering_machine" || r === "amd" || r === "true") {
550
+ return "machine";
551
+ }
552
+ if (r === "fax") return "fax";
553
+ return "unknown";
554
+ }
555
+ function validatePlivoSignature(url, nonce, signature, authToken, params, method = "POST") {
556
+ if (!signature || !nonce || !authToken) return false;
557
+ let base = url;
558
+ if (method === "POST" && params && Object.keys(params).length > 0) {
559
+ const keys = Object.keys(params).sort();
560
+ base += keys.map((k) => `${k}${params[k]}`).join("");
561
+ }
562
+ const signed = `${base}.${nonce}`;
563
+ const expected = crypto.createHmac("sha256", authToken).update(signed).digest("base64");
564
+ const expBuf = Buffer.from(expected);
565
+ for (const rawSig of signature.split(",")) {
566
+ const trimmed = rawSig.trim();
567
+ if (!trimmed) continue;
568
+ try {
569
+ const sigBuf = Buffer.from(trimmed);
570
+ if (sigBuf.length === expBuf.length && crypto.timingSafeEqual(sigBuf, expBuf)) {
571
+ return true;
572
+ }
573
+ } catch {
574
+ continue;
575
+ }
576
+ }
577
+ return false;
578
+ }
579
+ var PLIVO_DTMF_ALLOWED = new Set("0123456789*#ABCDabcdwW");
580
+ var PlivoBridge = class {
581
+ constructor(config) {
582
+ this.config = config;
583
+ const authId = config.plivoAuthId ?? "";
584
+ const authToken = config.plivoAuthToken ?? "";
585
+ this.authHeader = `Basic ${Buffer.from(`${authId}:${authToken}`).toString("base64")}`;
586
+ this.apiBase = `https://api.plivo.com/v1/Account/${encodeURIComponent(authId)}`;
587
+ }
588
+ config;
589
+ label = "Plivo";
590
+ telephonyProvider = "plivo";
591
+ inputWireFormat = "ulaw_8000";
592
+ authHeader;
593
+ apiBase;
594
+ sendAudio(ws, audioBase64, _streamSid) {
595
+ ws.send(
596
+ JSON.stringify({
597
+ event: "playAudio",
598
+ media: { contentType: "audio/x-mulaw", sampleRate: 8e3, payload: audioBase64 }
599
+ })
600
+ );
601
+ }
602
+ sendMark(ws, markName, streamSid) {
603
+ ws.send(JSON.stringify({ event: "checkpoint", streamId: streamSid, name: markName }));
604
+ }
605
+ sendClear(ws, streamSid) {
606
+ ws.send(JSON.stringify({ event: "clearAudio", streamId: streamSid }));
607
+ }
608
+ async transferCall(callId, toNumber) {
609
+ if (!/^\+[1-9]\d{6,14}$/.test(toNumber)) {
610
+ getLogger().warn(`PlivoBridge.transferCall rejected: invalid target ${JSON.stringify(toNumber)}`);
611
+ return;
612
+ }
613
+ if (!this.config.plivoAuthId || !this.config.plivoAuthToken || !callId) return;
614
+ if (!this.config.webhookUrl) {
615
+ getLogger().warn("PlivoBridge.transferCall skipped: no webhookUrl for aleg_url");
616
+ return;
617
+ }
618
+ const alegUrl = `https://${this.config.webhookUrl}/webhooks/plivo/transfer?to=${encodeURIComponent(toNumber)}`;
619
+ await fetch(`${this.apiBase}/Call/${encodeURIComponent(callId)}/`, {
620
+ method: "POST",
621
+ headers: { "Content-Type": "application/json", Authorization: this.authHeader },
622
+ body: JSON.stringify({ legs: "aleg", aleg_url: alegUrl, aleg_method: "GET" })
623
+ });
624
+ getLogger().info(`Call transferred to ${toNumber}`);
625
+ }
626
+ async sendDtmf(ws, _callId, digits, _delayMs) {
627
+ const filtered = Array.from(digits ?? "").filter((d) => PLIVO_DTMF_ALLOWED.has(d)).join("");
628
+ if (!filtered) {
629
+ getLogger().warn(`PlivoBridge.sendDtmf: no valid digits in ${JSON.stringify(digits)}`);
630
+ return;
631
+ }
632
+ ws.send(JSON.stringify({ event: "sendDTMF", dtmf: filtered }));
633
+ }
634
+ async startRecording(callId) {
635
+ if (!this.config.plivoAuthId || !this.config.plivoAuthToken || !callId) return;
636
+ try {
637
+ const resp = await fetch(`${this.apiBase}/Call/${encodeURIComponent(callId)}/Record/`, {
638
+ method: "POST",
639
+ headers: { Authorization: this.authHeader }
640
+ });
641
+ if (!resp.ok) {
642
+ getLogger().warn(`Plivo record start failed (${resp.status}): ${(await resp.text()).slice(0, 200)}`);
643
+ } else {
644
+ getLogger().info("Plivo recording started");
645
+ }
646
+ } catch (e) {
647
+ getLogger().warn(`Plivo record start error: ${String(e)}`);
648
+ }
649
+ }
650
+ async endCall(callId, _ws) {
651
+ if (!this.config.plivoAuthId || !this.config.plivoAuthToken || !callId) return;
652
+ try {
653
+ const resp = await fetch(`${this.apiBase}/Call/${encodeURIComponent(callId)}/`, {
654
+ method: "DELETE",
655
+ headers: { Authorization: this.authHeader }
656
+ });
657
+ if (!resp.ok && resp.status !== 404) {
658
+ getLogger().warn(`Plivo hangup returned ${resp.status}`);
659
+ }
660
+ } catch {
661
+ }
662
+ }
663
+ createStt(agent) {
664
+ return createSTT(agent);
665
+ }
666
+ async queryTelephonyCost(metricsAcc, callId) {
667
+ if (!this.config.plivoAuthId || !this.config.plivoAuthToken || !callId) return;
668
+ try {
669
+ const resp = await fetch(`${this.apiBase}/Call/${encodeURIComponent(callId)}/`, {
670
+ headers: { Authorization: this.authHeader },
671
+ signal: AbortSignal.timeout(5e3)
672
+ });
673
+ if (resp.ok) {
674
+ const data = await resp.json();
675
+ if (data.total_amount != null) {
676
+ metricsAcc.setActualTelephonyCost(Math.abs(parseFloat(data.total_amount)));
677
+ getLogger().info(`Plivo actual cost: $${data.total_amount}`);
678
+ }
679
+ }
680
+ } catch (err) {
681
+ getLogger().debug(`queryTelephonyCost(plivo) failed: ${err?.message ?? err}`);
682
+ }
683
+ }
684
+ };
685
+
1615
686
  // src/pricing.ts
1616
687
  init_esm_shims();
688
+ var PRICING_VERSION = "2026.3";
689
+ var PRICING_LAST_UPDATED = "2026-05-08";
1617
690
  var PricingUnit = {
1618
691
  MINUTE: "minute",
1619
692
  THOUSAND_CHARS: "1k_chars",
@@ -1852,7 +925,7 @@ var DEFAULT_PRICING = {
1852
925
  // twilio default = US inbound local (the 99% case for voice agents receiving
1853
926
  // calls on a local number). For US toll-free inbound ($0.022/min) or US
1854
927
  // outbound local ($0.0140/min), override via Patter({ pricing: { twilio: {...} } }).
1855
- twilio: { unit: PricingUnit.MINUTE, price: 85e-4 },
928
+ twilio: { unit: PricingUnit.MINUTE, price: 85e-4, roundUp: true },
1856
929
  // Telnyx — direction-aware rates as of 2026-05-11.
1857
930
  // Sources:
1858
931
  // https://telnyx.com/pricing/elastic-sip
@@ -1870,7 +943,17 @@ var DEFAULT_PRICING = {
1870
943
  // price: 0.0035 } } })`` to bill all inbound at the lower rate.
1871
944
  telnyx: { unit: PricingUnit.MINUTE, price: 7e-3 },
1872
945
  telnyx_inbound: { unit: PricingUnit.MINUTE, price: 35e-4 },
1873
- telnyx_outbound: { unit: PricingUnit.MINUTE, price: 7e-3 }
946
+ telnyx_outbound: { unit: PricingUnit.MINUTE, price: 7e-3 },
947
+ // Plivo — official US pay-as-you-go voice rates (per minute; Plivo rounds
948
+ // partial minutes up like Twilio). Source: https://www.plivo.com/voice/pricing/
949
+ // US local inbound: $0.0055/min
950
+ // US local outbound: $0.0115/min
951
+ // US toll-free inbound: $0.0180/min (override via new Patter({ pricing }))
952
+ // The flat ``plivo`` key defaults to inbound local; the billed amount is
953
+ // also reconciled post-call from the Plivo CDR (``total_amount``).
954
+ plivo: { unit: PricingUnit.MINUTE, price: 55e-4, roundUp: true },
955
+ plivo_inbound: { unit: PricingUnit.MINUTE, price: 55e-4, roundUp: true },
956
+ plivo_outbound: { unit: PricingUnit.MINUTE, price: 0.0115, roundUp: true }
1874
957
  };
1875
958
  function cloneProviderEntry(entry) {
1876
959
  const out = { ...entry };
@@ -2056,15 +1139,35 @@ function calculateLlmCost(provider2, model, inputTokens, outputTokens, cacheRead
2056
1139
  function calculateTelephonyCost(provider2, durationSeconds, pricing) {
2057
1140
  const config = pricing[provider2];
2058
1141
  if (!config || config.unit !== "minute") return 0;
2059
- const minutes = provider2 === "twilio" ? Math.ceil(durationSeconds / 60) : durationSeconds / 60;
1142
+ const minutes = config.roundUp ? Math.ceil(durationSeconds / 60) : durationSeconds / 60;
2060
1143
  return minutes * (config.price ?? 0);
2061
1144
  }
2062
1145
 
2063
1146
  // src/dashboard/store.ts
2064
1147
  init_esm_shims();
2065
1148
  import { EventEmitter } from "events";
1149
+ import * as fs2 from "fs";
1150
+ import * as path2 from "path";
1151
+
1152
+ // src/version.ts
1153
+ init_esm_shims();
2066
1154
  import * as fs from "fs";
2067
1155
  import * as path from "path";
1156
+ function readVersion() {
1157
+ try {
1158
+ const pkgPath = path.resolve(__dirname, "..", "package.json");
1159
+ const pkg = JSON.parse(fs.readFileSync(pkgPath, "utf8"));
1160
+ return typeof pkg.version === "string" && pkg.version.length > 0 ? pkg.version : "";
1161
+ } catch {
1162
+ return "";
1163
+ }
1164
+ }
1165
+ var VERSION = readVersion();
1166
+
1167
+ // src/dashboard/store.ts
1168
+ function sdkVersion() {
1169
+ return VERSION;
1170
+ }
2068
1171
  var MetricsStore = class extends EventEmitter {
2069
1172
  maxCalls;
2070
1173
  calls = [];
@@ -2347,15 +1450,15 @@ var MetricsStore = class extends EventEmitter {
2347
1450
  persistDeletedIds() {
2348
1451
  if (this.deletedIdsPath === null) return;
2349
1452
  try {
2350
- const dir = path.dirname(this.deletedIdsPath);
2351
- fs.mkdirSync(dir, { recursive: true });
1453
+ const dir = path2.dirname(this.deletedIdsPath);
1454
+ fs2.mkdirSync(dir, { recursive: true });
2352
1455
  const tmp = this.deletedIdsPath + ".tmp";
2353
1456
  const payload = {
2354
1457
  version: 1,
2355
1458
  deleted_call_ids: Array.from(this.deletedCallIds).sort()
2356
1459
  };
2357
- fs.writeFileSync(tmp, JSON.stringify(payload, null, 2), "utf8");
2358
- fs.renameSync(tmp, this.deletedIdsPath);
1460
+ fs2.writeFileSync(tmp, JSON.stringify(payload, null, 2), "utf8");
1461
+ fs2.renameSync(tmp, this.deletedIdsPath);
2359
1462
  } catch (err) {
2360
1463
  getLogger().debug(
2361
1464
  `MetricsStore.persistDeletedIds: ${String(err)}`
@@ -2388,7 +1491,8 @@ var MetricsStore = class extends EventEmitter {
2388
1491
  avg_duration: 0,
2389
1492
  avg_latency_ms: 0,
2390
1493
  cost_breakdown: { stt: 0, tts: 0, llm: 0, telephony: 0 },
2391
- active_calls: this.activeCalls.size
1494
+ active_calls: this.activeCalls.size,
1495
+ sdk_version: sdkVersion()
2392
1496
  };
2393
1497
  }
2394
1498
  let totalCost = 0;
@@ -2427,7 +1531,8 @@ var MetricsStore = class extends EventEmitter {
2427
1531
  llm: Math.round(costLlm * 1e6) / 1e6,
2428
1532
  telephony: Math.round(costTel * 1e6) / 1e6
2429
1533
  },
2430
- active_calls: this.activeCalls.size
1534
+ active_calls: this.activeCalls.size,
1535
+ sdk_version: sdkVersion()
2431
1536
  };
2432
1537
  }
2433
1538
  /**
@@ -2463,11 +1568,11 @@ var MetricsStore = class extends EventEmitter {
2463
1568
  */
2464
1569
  hydrate(logRoot) {
2465
1570
  if (!logRoot) return 0;
2466
- const deletedIdsPath = path.join(logRoot, ".deleted_call_ids.json");
1571
+ const deletedIdsPath = path2.join(logRoot, ".deleted_call_ids.json");
2467
1572
  this.deletedIdsPath = deletedIdsPath;
2468
- if (fs.existsSync(deletedIdsPath)) {
1573
+ if (fs2.existsSync(deletedIdsPath)) {
2469
1574
  try {
2470
- const raw = fs.readFileSync(deletedIdsPath, "utf8");
1575
+ const raw = fs2.readFileSync(deletedIdsPath, "utf8");
2471
1576
  const payload = JSON.parse(raw);
2472
1577
  const arr = Array.isArray(payload.deleted_call_ids) ? payload.deleted_call_ids : [];
2473
1578
  for (const cid of arr) {
@@ -2481,19 +1586,19 @@ var MetricsStore = class extends EventEmitter {
2481
1586
  );
2482
1587
  }
2483
1588
  }
2484
- const callsRoot = path.join(logRoot, "calls");
2485
- if (!fs.existsSync(callsRoot)) return 0;
1589
+ const callsRoot = path2.join(logRoot, "calls");
1590
+ if (!fs2.existsSync(callsRoot)) return 0;
2486
1591
  const collected = [];
2487
1592
  const seen = new Set(this.calls.map((c) => c.call_id));
2488
1593
  const walk = (dir, depth) => {
2489
1594
  let entries;
2490
1595
  try {
2491
- entries = fs.readdirSync(dir, { withFileTypes: true });
1596
+ entries = fs2.readdirSync(dir, { withFileTypes: true });
2492
1597
  } catch {
2493
1598
  return;
2494
1599
  }
2495
1600
  for (const entry of entries) {
2496
- const childPath = path.join(dir, entry.name);
1601
+ const childPath = path2.join(dir, entry.name);
2497
1602
  if (depth < 3) {
2498
1603
  if (entry.isDirectory() && /^\d+$/.test(entry.name)) {
2499
1604
  walk(childPath, depth + 1);
@@ -2501,10 +1606,10 @@ var MetricsStore = class extends EventEmitter {
2501
1606
  continue;
2502
1607
  }
2503
1608
  if (!entry.isDirectory()) continue;
2504
- const metadataPath = path.join(childPath, "metadata.json");
2505
- if (!fs.existsSync(metadataPath)) continue;
1609
+ const metadataPath = path2.join(childPath, "metadata.json");
1610
+ if (!fs2.existsSync(metadataPath)) continue;
2506
1611
  try {
2507
- const raw = fs.readFileSync(metadataPath, "utf8");
1612
+ const raw = fs2.readFileSync(metadataPath, "utf8");
2508
1613
  const meta = JSON.parse(raw);
2509
1614
  const callId = meta.call_id || entry.name;
2510
1615
  if (!callId || seen.has(callId)) continue;
@@ -2517,7 +1622,7 @@ var MetricsStore = class extends EventEmitter {
2517
1622
  }
2518
1623
  if (!record.transcript || record.transcript.length === 0) {
2519
1624
  const fromJsonl = loadTranscriptJsonl(
2520
- path.join(childPath, "transcript.jsonl")
1625
+ path2.join(childPath, "transcript.jsonl")
2521
1626
  );
2522
1627
  if (fromJsonl.length > 0) record.transcript = fromJsonl;
2523
1628
  }
@@ -2596,8 +1701,8 @@ function metadataToCallRecord(callId, meta) {
2596
1701
  }
2597
1702
  function loadTranscriptJsonl(filePath) {
2598
1703
  try {
2599
- if (!fs.existsSync(filePath)) return [];
2600
- const raw = fs.readFileSync(filePath, "utf8");
1704
+ if (!fs2.existsSync(filePath)) return [];
1705
+ const raw = fs2.readFileSync(filePath, "utf8");
2601
1706
  const lines = raw.split("\n").filter((l) => l.trim().length > 0);
2602
1707
  const out = [];
2603
1708
  for (const line of lines) {
@@ -2640,15 +1745,15 @@ init_esm_shims();
2640
1745
 
2641
1746
  // src/dashboard/auth.ts
2642
1747
  init_esm_shims();
2643
- import crypto from "crypto";
1748
+ import crypto2 from "crypto";
2644
1749
  function timingSafeCompare(a, b) {
2645
1750
  const aBuf = Buffer.from(a);
2646
1751
  const bBuf = Buffer.from(b);
2647
1752
  if (aBuf.length !== bBuf.length) {
2648
- crypto.timingSafeEqual(aBuf, aBuf);
1753
+ crypto2.timingSafeEqual(aBuf, aBuf);
2649
1754
  return false;
2650
1755
  }
2651
- return crypto.timingSafeEqual(aBuf, bBuf);
1756
+ return crypto2.timingSafeEqual(aBuf, bBuf);
2652
1757
  }
2653
1758
  function makeAuthMiddleware(token = "") {
2654
1759
  return (req, res, next) => {
@@ -2731,7 +1836,7 @@ function csvEscape(value) {
2731
1836
 
2732
1837
  // src/dashboard/ui.ts
2733
1838
  init_esm_shims();
2734
- import { readFileSync as readFileSync2 } from "fs";
1839
+ import { readFileSync as readFileSync3 } from "fs";
2735
1840
  import { join as join2, dirname as dirname2 } from "path";
2736
1841
  var FALLBACK_HTML = `<!doctype html>
2737
1842
  <html><head><meta charset="utf-8"><title>Patter dashboard</title></head>
@@ -2748,9 +1853,9 @@ function loadDashboardHtml() {
2748
1853
  join2(here, "dashboard", "ui.html"),
2749
1854
  join2(here, "..", "dashboard", "ui.html")
2750
1855
  ];
2751
- for (const path3 of candidates) {
1856
+ for (const path4 of candidates) {
2752
1857
  try {
2753
- return readFileSync2(path3, "utf8");
1858
+ return readFileSync3(path4, "utf8");
2754
1859
  } catch {
2755
1860
  }
2756
1861
  }
@@ -2937,7 +2042,7 @@ function mountApi(app, store, token = "") {
2937
2042
 
2938
2043
  // src/remote-message.ts
2939
2044
  init_esm_shims();
2940
- import crypto2 from "crypto";
2045
+ import crypto3 from "crypto";
2941
2046
  var MAX_RESPONSE_BYTES = 64 * 1024;
2942
2047
  function validateWebSocketUrl(url) {
2943
2048
  let translated = url;
@@ -2965,7 +2070,7 @@ var RemoteMessageHandler = class {
2965
2070
  if (!this.webhookSecret) {
2966
2071
  throw new Error("Cannot sign without a webhookSecret");
2967
2072
  }
2968
- return crypto2.createHmac("sha256", this.webhookSecret).update(body).digest("hex");
2073
+ return crypto3.createHmac("sha256", this.webhookSecret).update(body).digest("hex");
2969
2074
  }
2970
2075
  /**
2971
2076
  * Release resources held by this handler.
@@ -3047,8 +2152,8 @@ var RemoteMessageHandler = class {
3047
2152
  "WebSocket URL uses unencrypted ws:// \u2014 call transcripts and phone numbers will be sent in plaintext. Use wss:// in production."
3048
2153
  );
3049
2154
  }
3050
- const { WebSocket: WebSocket5 } = await import("ws");
3051
- const ws = new WebSocket5(url);
2155
+ const { WebSocket: WebSocket3 } = await import("ws");
2156
+ const ws = new WebSocket3(url);
3052
2157
  const chunks = [];
3053
2158
  let done = false;
3054
2159
  let error = null;
@@ -3102,10 +2207,10 @@ var RemoteMessageHandler = class {
3102
2207
  }
3103
2208
  });
3104
2209
  try {
3105
- await new Promise((resolve, reject) => {
2210
+ await new Promise((resolve2, reject) => {
3106
2211
  ws.on("open", () => {
3107
2212
  ws.send(JSON.stringify(data));
3108
- resolve();
2213
+ resolve2();
3109
2214
  });
3110
2215
  ws.on("error", (err) => {
3111
2216
  reject(err);
@@ -3115,11 +2220,11 @@ var RemoteMessageHandler = class {
3115
2220
  yield chunks.shift();
3116
2221
  }
3117
2222
  while (!done && !error) {
3118
- const text = await new Promise((resolve) => {
2223
+ const text = await new Promise((resolve2) => {
3119
2224
  if (chunks.length > 0) {
3120
- resolve(chunks.shift());
2225
+ resolve2(chunks.shift());
3121
2226
  } else {
3122
- resolveNext = resolve;
2227
+ resolveNext = resolve2;
3123
2228
  }
3124
2229
  });
3125
2230
  if (text === null) break;
@@ -3146,7 +2251,7 @@ init_esm_shims();
3146
2251
 
3147
2252
  // src/providers/deepgram-stt.ts
3148
2253
  init_esm_shims();
3149
- import WebSocket4 from "ws";
2254
+ import WebSocket2 from "ws";
3150
2255
 
3151
2256
  // src/errors.ts
3152
2257
  init_esm_shims();
@@ -3327,8 +2432,8 @@ var DeepgramSTT = class _DeepgramSTT {
3327
2432
  const url = `${DEEPGRAM_WS_URL}?${params.toString()}`;
3328
2433
  let ws = null;
3329
2434
  try {
3330
- ws = await new Promise((resolve, reject) => {
3331
- const sock = new WebSocket4(url, {
2435
+ ws = await new Promise((resolve2, reject) => {
2436
+ const sock = new WebSocket2(url, {
3332
2437
  headers: { Authorization: `Token ${this.apiKey}` }
3333
2438
  });
3334
2439
  const timer = setTimeout(() => {
@@ -3340,7 +2445,7 @@ var DeepgramSTT = class _DeepgramSTT {
3340
2445
  }, 5e3);
3341
2446
  sock.once("open", () => {
3342
2447
  clearTimeout(timer);
3343
- resolve(sock);
2448
+ resolve2(sock);
3344
2449
  });
3345
2450
  sock.once("error", (err) => {
3346
2451
  clearTimeout(timer);
@@ -3367,11 +2472,11 @@ var DeepgramSTT = class _DeepgramSTT {
3367
2472
  }
3368
2473
  async openSocket() {
3369
2474
  const url = this.buildUrl();
3370
- const ws = new WebSocket4(url, {
2475
+ const ws = new WebSocket2(url, {
3371
2476
  headers: { Authorization: `Token ${this.apiKey}` }
3372
2477
  });
3373
2478
  this.ws = ws;
3374
- await new Promise((resolve, reject) => {
2479
+ await new Promise((resolve2, reject) => {
3375
2480
  let settled = false;
3376
2481
  const settle = (fn) => {
3377
2482
  if (settled) return;
@@ -3383,7 +2488,7 @@ var DeepgramSTT = class _DeepgramSTT {
3383
2488
  () => settle(() => reject(new PatterConnectionError("Deepgram connect timeout"))),
3384
2489
  1e4
3385
2490
  );
3386
- ws.once("open", () => settle(resolve));
2491
+ ws.once("open", () => settle(resolve2));
3387
2492
  ws.once("error", (err) => settle(() => reject(err)));
3388
2493
  ws.once("unexpected-response", (_req, res) => {
3389
2494
  const status = res?.statusCode ?? 0;
@@ -3404,7 +2509,7 @@ var DeepgramSTT = class _DeepgramSTT {
3404
2509
  ws.on("close", (code, reason) => this.handleClose(code, reason.toString()));
3405
2510
  ws.on("error", (err) => this.handleError(err));
3406
2511
  this.keepaliveTimer = setInterval(() => {
3407
- if (this.ws && this.ws.readyState === WebSocket4.OPEN) {
2512
+ if (this.ws && this.ws.readyState === WebSocket2.OPEN) {
3408
2513
  try {
3409
2514
  this.ws.send(JSON.stringify({ type: "KeepAlive" }));
3410
2515
  } catch {
@@ -3523,7 +2628,7 @@ var DeepgramSTT = class _DeepgramSTT {
3523
2628
  }
3524
2629
  /** Send a binary audio chunk to Deepgram for transcription. */
3525
2630
  sendAudio(audio) {
3526
- if (!this.ws || this.ws.readyState !== WebSocket4.OPEN) {
2631
+ if (!this.ws || this.ws.readyState !== WebSocket2.OPEN) {
3527
2632
  this.audioDroppedCount++;
3528
2633
  if (this.audioDroppedCount === 1 || this.audioDroppedCount % 50 === 0) {
3529
2634
  getLogger().info(
@@ -3572,7 +2677,7 @@ var DeepgramSTT = class _DeepgramSTT {
3572
2677
  */
3573
2678
  finalize() {
3574
2679
  const ws = this.ws;
3575
- if (!ws || ws.readyState !== WebSocket4.OPEN) {
2680
+ if (!ws || ws.readyState !== WebSocket2.OPEN) {
3576
2681
  getLogger().info(
3577
2682
  `[DIAG] DeepgramSTT.finalize SKIPPED (ws state=${ws?.readyState ?? "null"})`
3578
2683
  );
@@ -3593,7 +2698,7 @@ var DeepgramSTT = class _DeepgramSTT {
3593
2698
  if (!ws) return;
3594
2699
  this.ws = null;
3595
2700
  const sendSafe = (payload) => {
3596
- if (ws.readyState === WebSocket4.OPEN) {
2701
+ if (ws.readyState === WebSocket2.OPEN) {
3597
2702
  try {
3598
2703
  ws.send(payload);
3599
2704
  } catch {
@@ -3607,7 +2712,7 @@ var DeepgramSTT = class _DeepgramSTT {
3607
2712
  } catch {
3608
2713
  }
3609
2714
  };
3610
- if (ws.readyState !== WebSocket4.OPEN) {
2715
+ if (ws.readyState !== WebSocket2.OPEN) {
3611
2716
  finishClose();
3612
2717
  return;
3613
2718
  }
@@ -3676,6 +2781,21 @@ var CallMetricsAccumulator = class {
3676
2781
  _bargeinStoppedAt = null;
3677
2782
  _turnUserText = "";
3678
2783
  _turnSttAudioSeconds = 0;
2784
+ /**
2785
+ * Guard against the recordTurnInterrupted / recordTurnComplete race.
2786
+ *
2787
+ * A VAD-path barge-in fires ``recordTurnInterrupted`` synchronously
2788
+ * inside ``handleAudioAsync`` while the in-flight pipeline LLM stream
2789
+ * keeps unwinding on its own task. When the LLM stream eventually
2790
+ * exits, the existing pipeline path falls through to
2791
+ * ``recordTurnComplete``, which would push a second turn for the same
2792
+ * logical exchange (this time carrying ``user_text=''`` because the
2793
+ * field was already reset). ``_turnAlreadyClosed`` is flipped by
2794
+ * ``recordTurnInterrupted`` and read by ``recordTurnComplete`` so the
2795
+ * late ``recordTurnComplete`` becomes a no-op until the next
2796
+ * ``startTurn`` re-arms the accumulator.
2797
+ */
2798
+ _turnAlreadyClosed = false;
3679
2799
  // Cumulative usage counters
3680
2800
  _totalSttAudioSeconds = 0;
3681
2801
  _totalTtsCharacters = 0;
@@ -3773,6 +2893,7 @@ var CallMetricsAccumulator = class {
3773
2893
  this._bargeinStoppedAt = null;
3774
2894
  this._turnUserText = "";
3775
2895
  this._turnSttAudioSeconds = 0;
2896
+ this._turnAlreadyClosed = false;
3776
2897
  this._vadStoppedAt = null;
3777
2898
  this._sttFinalAt = null;
3778
2899
  this._turnCommittedAt = null;
@@ -3929,8 +3050,18 @@ var CallMetricsAccumulator = class {
3929
3050
  recordTtsStopped(ts) {
3930
3051
  this._bargeinStoppedAt = ts ?? hrTimeMs();
3931
3052
  }
3932
- /** Close the current turn cleanly and append a `TurnMetrics` record. */
3053
+ /**
3054
+ * Close the current turn cleanly and append a `TurnMetrics` record.
3055
+ *
3056
+ * Returns ``null`` when ``recordTurnInterrupted`` has already closed
3057
+ * the current turn — this protects against the VAD-barge-in /
3058
+ * pipeline-LLM race where both paths try to finalise the same logical
3059
+ * turn and the second would otherwise push a phantom entry with
3060
+ * ``user_text=''``. The caller treats ``null`` as "nothing to emit";
3061
+ * ``emitTurnMetrics`` is already null-safe.
3062
+ */
3933
3063
  recordTurnComplete(agentText) {
3064
+ if (this._turnAlreadyClosed) return null;
3934
3065
  const latency = this._computeTurnLatency();
3935
3066
  const turn = {
3936
3067
  turn_index: this._turns.length,
@@ -3943,13 +3074,23 @@ var CallMetricsAccumulator = class {
3943
3074
  };
3944
3075
  this._turns.push(turn);
3945
3076
  this._resetTurnState();
3077
+ this._turnAlreadyClosed = true;
3946
3078
  this._eventBus?.emit("turn_ended", { callId: this.callId, turn });
3947
3079
  this._eventBus?.emit("metrics_collected", { callId: this.callId, turn });
3948
3080
  return turn;
3949
3081
  }
3950
- /** Close the current turn as interrupted (barge-in) and return the recorded metrics. */
3082
+ /**
3083
+ * Close the current turn as interrupted (barge-in) and return the
3084
+ * recorded metrics. Returns ``null`` when no turn is open, OR when
3085
+ * ``recordTurnComplete`` has already finalised the current turn —
3086
+ * bidirectional parity with the guard at the top of
3087
+ * ``recordTurnComplete``. Prevents an out-of-order interruption (e.g.
3088
+ * a future refactor that reorders the bargein + LLM-unwind paths)
3089
+ * from overwriting a turn that the complete path already emitted.
3090
+ */
3951
3091
  recordTurnInterrupted() {
3952
3092
  if (this._turnStart === null) return null;
3093
+ if (this._turnAlreadyClosed) return null;
3953
3094
  const latency = this._computeTurnLatency();
3954
3095
  const turn = {
3955
3096
  turn_index: this._turns.length,
@@ -3964,6 +3105,7 @@ var CallMetricsAccumulator = class {
3964
3105
  this._eventBus?.emit("turn_ended", { callId: this.callId, turn });
3965
3106
  this._eventBus?.emit("metrics_collected", { callId: this.callId, turn });
3966
3107
  this._resetTurnState();
3108
+ this._turnAlreadyClosed = true;
3967
3109
  this._turnCommittedMono = null;
3968
3110
  this._endpointSignalAt = null;
3969
3111
  return turn;
@@ -5236,6 +4378,35 @@ function maskPhoneNumber(number) {
5236
4378
  function isValidE164(number) {
5237
4379
  return /^\+[1-9]\d{6,14}$/.test(number);
5238
4380
  }
4381
+ function augmentWithBuiltinHandoffTools(userTools, callbacks) {
4382
+ const out = [...userTools ?? []];
4383
+ if (callbacks.transferCall) {
4384
+ const transferCall = callbacks.transferCall;
4385
+ out.push({
4386
+ ...TRANSFER_CALL_TOOL,
4387
+ handler: async (args) => {
4388
+ const number = typeof args.number === "string" ? args.number : "";
4389
+ if (!isValidE164(number)) {
4390
+ return JSON.stringify({ error: "Invalid phone number format", status: "rejected" });
4391
+ }
4392
+ await transferCall(number);
4393
+ return JSON.stringify({ status: "transferring", to: number });
4394
+ }
4395
+ });
4396
+ }
4397
+ if (callbacks.endCall) {
4398
+ const endCall = callbacks.endCall;
4399
+ out.push({
4400
+ ...END_CALL_TOOL,
4401
+ handler: async (args) => {
4402
+ const reason = typeof args.reason === "string" ? args.reason : "conversation_complete";
4403
+ await endCall(reason);
4404
+ return JSON.stringify({ status: "ending", reason });
4405
+ }
4406
+ });
4407
+ }
4408
+ return out;
4409
+ }
5239
4410
  var HALLUCINATIONS = /* @__PURE__ */ new Set([
5240
4411
  "you",
5241
4412
  "thank you",
@@ -5252,7 +4423,23 @@ var HALLUCINATIONS = /* @__PURE__ */ new Set([
5252
4423
  ".",
5253
4424
  "bye",
5254
4425
  "right",
5255
- "cool"
4426
+ "cool",
4427
+ // Whisper YouTube-caption hallucinations
4428
+ "thank you for watching",
4429
+ "thanks for watching",
4430
+ "thank you for watching!",
4431
+ "thanks for watching!",
4432
+ "thank you so much for watching",
4433
+ "thanks for listening",
4434
+ "please subscribe",
4435
+ "subscribe",
4436
+ "music",
4437
+ "[music]",
4438
+ "\u266A",
4439
+ "[no audio]",
4440
+ "[silence]",
4441
+ "[blank_audio]",
4442
+ "(silence)"
5256
4443
  ]);
5257
4444
  var StreamHandler = class _StreamHandler {
5258
4445
  deps;
@@ -5378,13 +4565,17 @@ var StreamHandler = class _StreamHandler {
5378
4565
  * Same as the AEC variant but for deployments where AEC is OFF
5379
4566
  * (default on PSTN — Twilio/Telnyx). Without an adaptive filter to
5380
4567
  * converge, the only justification for a gate is anti-flicker on
5381
- * micro-events (cough, click). 100 ms covers the first PSTN echo
5382
- * round-trip (~40-100 ms) while allowing barge-in from 100 ms into
5383
- * the agent's turn covering nearly all of any response.
5384
- * Previously 250 ms, which blocked barge-in entirely on short (<500 ms)
5385
- * agent responses.
5386
- */
5387
- static MIN_AGENT_SPEAKING_MS_BEFORE_BARGE_IN_NO_AEC = 100;
4568
+ * micro-events (cough, click). Raised 100 500 ms on 2026-05-19
4569
+ * after the 0.6.2 acceptance run showed a phantom VAD speech_start
4570
+ * firing on the very first inbound frame (~500 ms into the call,
4571
+ * which is past a 100 ms gate). The phantom barge-in cancelled the
4572
+ * prewarmed firstMessage, the user heard a clipped (graffiante)
4573
+ * audio fragment, and the SDK left ``_turnAlreadyClosed=true`` so
4574
+ * subsequent ``recordTurnComplete`` calls were no-ops. 500 ms
4575
+ * filters those phantoms while still letting a real interruption
4576
+ * land within half a second of agent onset.
4577
+ */
4578
+ static MIN_AGENT_SPEAKING_MS_BEFORE_BARGE_IN_NO_AEC = 500;
5388
4579
  /** Handle for the pending grace-period timer, so it can be cleared on cleanup. */
5389
4580
  graceTimer = null;
5390
4581
  /**
@@ -5424,30 +4615,12 @@ var StreamHandler = class _StreamHandler {
5424
4615
  * coexist without name collisions even when firstMessage finishes while
5425
4616
  * a Realtime turn is still streaming.
5426
4617
  */
5427
- firstMessageMarkCounter = 0;
5428
- /**
5429
- * Maximum unconfirmed Twilio marks while streaming firstMessage. Each
5430
- * chunk is 40 ms of audio at 16 kHz PCM16, so a window of 3 caps
5431
- * the in-flight queue at ~120 ms. This means a barge-in's
5432
- * ``sendClear`` has at most 120 ms of already-buffered audio to flush
5433
- * — vs. ~2-5 s with the previous burst-send code, which was the
5434
- * root cause of "firstMessage non interrompibile". Higher values
5435
- * smooth playback under jittery RTT (each mark echo adds ~150-250 ms
5436
- * RTT on PSTN) at the cost of longer barge-in latency; lower values
5437
- * risk under-buffering. 3 hit the smallest barge-in cap without
5438
- * audible gaps in 2026-05 acceptance.
5439
- */
5440
- static FIRST_MESSAGE_MARK_WINDOW = 3;
5441
- /**
5442
- * Per-chunk soft timeout (ms) while awaiting a mark echo. Twilio's
5443
- * mark echoes typically arrive within 100-250 ms of audio playback.
5444
- * Capping at 500 ms guards against carriers (or test doubles) that
5445
- * never echo — without it a stalled echo would deadlock the loop and
5446
- * the agent would freeze mid-utterance. On timeout we drop the
5447
- * waiter from the queue and continue: playout may glitch by one
5448
- * chunk but the call stays alive.
5449
- */
5450
- static MARK_AWAIT_TIMEOUT_MS = 500;
4618
+ // firstMessageMarkCounter / FIRST_MESSAGE_MARK_WINDOW /
4619
+ // MARK_AWAIT_TIMEOUT_MS were retired with the move to the Twilio-FIFO-
4620
+ // trusts model (sendPacedFirstMessageBytes no longer emits marks).
4621
+ // Marks are still consumed via ``onMark`` for any adapter that wants
4622
+ // to round-trip one, but the firstMessage path no longer back-pressures
4623
+ // on them.
5451
4624
  /**
5452
4625
  * Minimum drain window (ms) between a ``cancelSpeaking`` and the next
5453
4626
  * ``beginSpeaking``. 150 ms covers a typical PSTN jitter buffer drain
@@ -5512,6 +4685,14 @@ var StreamHandler = class _StreamHandler {
5512
4685
  } catch {
5513
4686
  }
5514
4687
  }
4688
+ const ttsCancelable = this.tts;
4689
+ if (typeof ttsCancelable?.cancelActiveStream === "function") {
4690
+ try {
4691
+ ttsCancelable.cancelActiveStream();
4692
+ } catch (err) {
4693
+ getLogger().debug(`TTS cancelActiveStream raised: ${String(err)}`);
4694
+ }
4695
+ }
5515
4696
  }
5516
4697
  /**
5517
4698
  * Resolve every entry in ``pendingMarks`` and empty the queue. Idempotent
@@ -5528,56 +4709,19 @@ var StreamHandler = class _StreamHandler {
5528
4709
  }
5529
4710
  this.pendingMarks.length = 0;
5530
4711
  }
4712
+ // Mark-based back-pressure (sendMarkAwaitable / waitForMarkWindow)
4713
+ // was removed when sendPacedFirstMessageBytes switched to the
4714
+ // Twilio-FIFO-trusts model — see that method's doc comment for
4715
+ // rationale. ``pendingMarks`` and ``onMark`` are still kept so an
4716
+ // adapter that wants to round-trip a mark for some other purpose can
4717
+ // still do so without breaking the firstMessage path.
5531
4718
  /**
5532
- * Push a Twilio ``mark`` event AFTER the corresponding audio chunk and
5533
- * return a promise that resolves when the mark is echoed back via
5534
- * ``onMark`` (or when ``cancelSpeaking`` drains the queue, or after
5535
- * ``MARK_AWAIT_TIMEOUT_MS``). Returns null on non-Twilio carriers the
5536
- * caller is expected to fall back to time-based pacing in that case.
5537
- */
5538
- sendMarkAwaitable() {
5539
- if (this.deps.bridge.telephonyProvider !== "twilio") return null;
5540
- this.firstMessageMarkCounter += 1;
5541
- const markName = `fm_${this.firstMessageMarkCounter}`;
5542
- let resolve;
5543
- const promise = new Promise((r) => {
5544
- resolve = r;
5545
- });
5546
- this.pendingMarks.push({ name: markName, resolve, promise });
5547
- try {
5548
- this.deps.bridge.sendMark(this.ws, markName, this.streamSid);
5549
- } catch (err) {
5550
- getLogger().debug(`sendMark failed (${markName}): ${String(err)}`);
5551
- const idx = this.pendingMarks.findIndex((m) => m.name === markName);
5552
- if (idx >= 0) this.pendingMarks.splice(idx, 1);
5553
- return Promise.resolve();
5554
- }
5555
- return promise;
5556
- }
5557
- /**
5558
- * If the in-flight mark queue is at or above ``FIRST_MESSAGE_MARK_WINDOW``
5559
- * entries, wait for the oldest entry to clear (mark echoed, agent
5560
- * cancelled, or per-mark timeout). Repeats until the queue depth is
5561
- * within the window — under high RTT the carrier may have several
5562
- * marks queued and we want every loop iteration to be naturally back-
5563
- * pressured by playback.
5564
- */
5565
- async waitForMarkWindow() {
5566
- while (this.isSpeaking && this.pendingMarks.length >= _StreamHandler.FIRST_MESSAGE_MARK_WINDOW) {
5567
- const oldest = this.pendingMarks[0];
5568
- const timeout = new Promise(
5569
- (resolve) => setTimeout(resolve, _StreamHandler.MARK_AWAIT_TIMEOUT_MS)
5570
- );
5571
- await Promise.race([oldest.promise, timeout]);
5572
- if (this.pendingMarks[0] === oldest) {
5573
- this.pendingMarks.shift();
5574
- }
5575
- }
5576
- }
5577
- /**
5578
- * Bytes-per-millisecond for a 16 kHz PCM16 mono stream. Used by the
5579
- * non-Twilio firstMessage pacing path to translate chunk size into a
5580
- * playout-duration sleep. 16000 samples/sec × 2 bytes = 32 bytes/ms.
4719
+ * Bytes-per-millisecond for a 16 kHz PCM16 mono stream. Used by
4720
+ * ``sendPacedFirstMessageBytes`` to translate chunk size into a
4721
+ * playout-duration sleep so we never deliver faster than the carrier
4722
+ * can decode + play out (which manifested as severe crackling on the
4723
+ * HTTP-TTS path with client-side resampling). 16000 samples/sec × 2
4724
+ * bytes/sample = 32 bytes/ms.
5581
4725
  */
5582
4726
  static PCM16_16K_BYTES_PER_MS = 32;
5583
4727
  /** Cancel and clear the pending grace timer, if any. */
@@ -5854,8 +4998,8 @@ var StreamHandler = class _StreamHandler {
5854
4998
  this.ttsByteCarry = null;
5855
4999
  }
5856
5000
  /**
5857
- * Start call recording when configured. Currently Twilio-only — bridges may
5858
- * expose ``startRecording`` for parity when we add other carriers.
5001
+ * Start call recording when configured. Bridges expose
5002
+ * ``startRecording`` for carrier parity (Twilio and Telnyx supported).
5859
5003
  */
5860
5004
  async startRecordingIfRequested(callId) {
5861
5005
  const { recording, config } = this.deps;
@@ -6015,7 +5159,7 @@ var StreamHandler = class _StreamHandler {
6015
5159
  if (activeVad && !this.vadDisabled) {
6016
5160
  try {
6017
5161
  const vadPromise = activeVad.processFrame(pcm16k, 16e3);
6018
- const timeoutPromise = new Promise((resolve) => setTimeout(() => resolve(null), 25));
5162
+ const timeoutPromise = new Promise((resolve2) => setTimeout(() => resolve2(null), 25));
6019
5163
  const evt = await Promise.race([vadPromise, timeoutPromise]);
6020
5164
  if (evt) {
6021
5165
  getLogger().info(
@@ -6101,7 +5245,7 @@ var StreamHandler = class _StreamHandler {
6101
5245
  this.metricsAcc.addSttAudioBytes(pcm16k.length);
6102
5246
  }
6103
5247
  } else if (this.adapter) {
6104
- if (this.adapter instanceof ElevenLabsConvAIAdapter && this.deps.bridge.telephonyProvider === "twilio" && this.adapter.inputAudioFormat !== "ulaw_8000") {
5248
+ if (this.adapter instanceof ElevenLabsConvAIAdapter && this.deps.bridge.inputWireFormat === "ulaw_8000" && this.adapter.inputAudioFormat !== "ulaw_8000") {
6105
5249
  const pcm8k = mulawToPcm16(audioBuffer);
6106
5250
  const pcm16k = this.inboundResampler.process(pcm8k);
6107
5251
  this.adapter.sendAudio(pcm16k);
@@ -6151,9 +5295,21 @@ var StreamHandler = class _StreamHandler {
6151
5295
  /** Handle call stop / stream end. */
6152
5296
  /** Handle a carrier-emitted `stop` event signalling the call has ended. */
6153
5297
  async handleStop() {
5298
+ if (this.llmAbort !== null) {
5299
+ try {
5300
+ this.llmAbort.abort();
5301
+ } catch {
5302
+ }
5303
+ }
5304
+ const ttsCancelable = this.tts;
5305
+ if (typeof ttsCancelable?.cancelActiveStream === "function") {
5306
+ try {
5307
+ ttsCancelable.cancelActiveStream();
5308
+ } catch {
5309
+ }
5310
+ }
6154
5311
  this.clearPendingBargeIn();
6155
5312
  this.drainPendingMarks();
6156
- this.firstMessageMarkCounter = 0;
6157
5313
  this.clearGraceTimer();
6158
5314
  this.flushResamplers();
6159
5315
  await this.closeSttOnce();
@@ -6166,9 +5322,21 @@ var StreamHandler = class _StreamHandler {
6166
5322
  /** Handle WebSocket close event. */
6167
5323
  /** Tear down adapter, STT/TTS, and per-call state when the carrier WebSocket closes. */
6168
5324
  async handleWsClose() {
5325
+ if (this.llmAbort !== null) {
5326
+ try {
5327
+ this.llmAbort.abort();
5328
+ } catch {
5329
+ }
5330
+ }
5331
+ const ttsCancelable = this.tts;
5332
+ if (typeof ttsCancelable?.cancelActiveStream === "function") {
5333
+ try {
5334
+ ttsCancelable.cancelActiveStream();
5335
+ } catch {
5336
+ }
5337
+ }
6169
5338
  this.clearPendingBargeIn();
6170
5339
  this.drainPendingMarks();
6171
- this.firstMessageMarkCounter = 0;
6172
5340
  this.clearGraceTimer();
6173
5341
  this.flushResamplers();
6174
5342
  await this.closeSttOnce();
@@ -6207,13 +5375,40 @@ var StreamHandler = class _StreamHandler {
6207
5375
  * Maintains a 1-byte carry across calls so unaligned HTTP chunks from
6208
5376
  * streaming TTS providers never byte-swap the PCM16 samples downstream.
6209
5377
  */
6210
- encodePipelineAudio(pcm16k) {
6211
- const aligned = this.alignPcm16(pcm16k);
5378
+ encodePipelineAudio(audioChunk) {
5379
+ if (this.ttsOutputFormatNativeForCarrier === true) {
5380
+ return audioChunk.toString("base64");
5381
+ }
5382
+ const aligned = this.alignPcm16(audioChunk);
6212
5383
  if (aligned.length === 0) return "";
6213
5384
  const pcm8k = this.outboundResampler.process(aligned);
6214
5385
  const mulaw = pcm16ToMulaw(pcm8k);
6215
5386
  return mulaw.toString("base64");
6216
5387
  }
5388
+ /**
5389
+ * Cached result of ``isTtsOutputFormatNativeForCarrier()`` — settled
5390
+ * once at ``initPipeline`` time after ``setTelephonyCarrier`` has run
5391
+ * on the TTS adapter. Stable for the call lifetime: changes to the
5392
+ * adapter's output format mid-call would NOT flip this. ``true`` means
5393
+ * ``encodePipelineAudio`` can take the bypass path.
5394
+ */
5395
+ ttsOutputFormatNativeForCarrier = false;
5396
+ /**
5397
+ * Probe whether the TTS adapter is configured to emit bytes already in
5398
+ * the carrier's wire codec. Currently: Twilio expects ``ulaw_8000``,
5399
+ * Telnyx expects ``pcm_16000`` (no client transcode in either case if
5400
+ * matched). Anything else takes the resample-and-encode path.
5401
+ */
5402
+ isTtsOutputFormatNativeForCarrier() {
5403
+ if (!this.tts) return false;
5404
+ const fmt = this.tts.outputFormat;
5405
+ if (typeof fmt !== "string") return false;
5406
+ const carrier = this.deps.bridge.telephonyProvider;
5407
+ if (carrier === "twilio") return fmt === "ulaw_8000";
5408
+ if (carrier === "telnyx") return fmt === "pcm_16000";
5409
+ if (carrier === "plivo") return fmt === "ulaw_8000";
5410
+ return false;
5411
+ }
6217
5412
  /**
6218
5413
  * Prepend any carry byte from the previous chunk, return the even-length
6219
5414
  * portion, and stash the final odd byte (if any) for the next call.
@@ -6224,17 +5419,11 @@ var StreamHandler = class _StreamHandler {
6224
5419
  this.ttsByteCarry = alignedLen < combined.length ? combined.subarray(alignedLen) : null;
6225
5420
  return combined.subarray(0, alignedLen);
6226
5421
  }
6227
- /**
6228
- * 40 ms @ 16 kHz mono PCM16 = 1280 bytes. Sized to mirror the smallest
6229
- * live-TTS chunk boundary so cancel granularity (mark/clear bookkeeping)
6230
- * is identical regardless of whether the firstMessage came from the
6231
- * prewarm cache or a live ``tts.synthesizeStream`` stream.
6232
- */
6233
- static PREWARM_CHUNK_BYTES = 1280;
6234
5422
  /**
6235
5423
  * Stream a cached firstMessage buffer in pacing-friendly chunks.
6236
5424
  *
6237
- * Splits ``prewarmBytes`` into ``PREWARM_CHUNK_BYTES`` slices and
5425
+ * Splits ``prewarmBytes`` into 20 ms slices (matching Twilio's PSTN
5426
+ * frame quantum) and
6238
5427
  * forwards each through ``deps.bridge.sendAudio`` exactly like the
6239
5428
  * live TTS path does — preserving Twilio mark/clear granularity. A
6240
5429
  * single multi-second sendAudio call would push the whole intro into
@@ -6250,7 +5439,7 @@ var StreamHandler = class _StreamHandler {
6250
5439
  return this.sendPacedFirstMessageBytes(prewarmBytes);
6251
5440
  }
6252
5441
  /**
6253
- * Iterate ``bytes`` as ``PREWARM_CHUNK_BYTES``-sized PCM16 slices and
5442
+ * Iterate ``bytes`` in 20 ms slices (Twilio PSTN frame quantum) and
6254
5443
  * forward each via ``deps.bridge.sendAudio`` with mark-gated pacing
6255
5444
  * (Twilio) or playout-time-based pacing (Telnyx). Caps the carrier-
6256
5445
  * side buffer at ``FIRST_MESSAGE_MARK_WINDOW`` chunks so a barge-in's
@@ -6267,30 +5456,20 @@ var StreamHandler = class _StreamHandler {
6267
5456
  */
6268
5457
  async sendPacedFirstMessageBytes(bytes) {
6269
5458
  if (this.pendingMarks.length > 0) this.drainPendingMarks();
6270
- this.firstMessageMarkCounter = 0;
6271
5459
  let firstChunkSent = false;
6272
- let initialFillComplete = false;
6273
- for (let i = 0; i < bytes.length; i += _StreamHandler.PREWARM_CHUNK_BYTES) {
6274
- if (!this.isSpeaking) break;
6275
- await this.waitForMarkWindow();
5460
+ const PSTN_FRAME_MS = 20;
5461
+ const bytesPerMs = this.ttsOutputFormatNativeForCarrier ? 8 : _StreamHandler.PCM16_16K_BYTES_PER_MS;
5462
+ const sliceBytes = bytesPerMs * PSTN_FRAME_MS;
5463
+ for (let i = 0; i < bytes.length; i += sliceBytes) {
6276
5464
  if (!this.isSpeaking) break;
6277
- const chunk = bytes.subarray(i, i + _StreamHandler.PREWARM_CHUNK_BYTES);
5465
+ const chunk = bytes.subarray(i, i + sliceBytes);
6278
5466
  if (!firstChunkSent) firstChunkSent = true;
6279
- if (this.aec) this.aec.pushFarEnd(chunk);
5467
+ if (this.aec && !this.ttsOutputFormatNativeForCarrier) {
5468
+ this.aec.pushFarEnd(chunk);
5469
+ }
6280
5470
  const encoded = this.encodePipelineAudio(chunk);
6281
5471
  this.deps.bridge.sendAudio(this.ws, encoded, this.streamSid);
6282
5472
  this.markFirstAudioSent();
6283
- const markPromise = this.sendMarkAwaitable();
6284
- if (!initialFillComplete && this.pendingMarks.length >= _StreamHandler.FIRST_MESSAGE_MARK_WINDOW) {
6285
- initialFillComplete = true;
6286
- }
6287
- if (markPromise === null || initialFillComplete) {
6288
- const playoutMs = Math.max(
6289
- 1,
6290
- Math.floor(chunk.length / _StreamHandler.PCM16_16K_BYTES_PER_MS)
6291
- );
6292
- await new Promise((resolve) => setTimeout(resolve, playoutMs));
6293
- }
6294
5473
  }
6295
5474
  return firstChunkSent;
6296
5475
  }
@@ -6310,6 +5489,12 @@ var StreamHandler = class _StreamHandler {
6310
5489
  getLogger().debug(`TTS setTelephonyCarrier failed (${label}): ${String(e)}`);
6311
5490
  }
6312
5491
  }
5492
+ this.ttsOutputFormatNativeForCarrier = this.isTtsOutputFormatNativeForCarrier();
5493
+ if (this.ttsOutputFormatNativeForCarrier) {
5494
+ getLogger().debug(
5495
+ `TTS outputFormat matches ${this.deps.bridge.telephonyProvider} wire codec \u2014 bypassing client-side transcode`
5496
+ );
5497
+ }
6313
5498
  }
6314
5499
  if (!this.stt) {
6315
5500
  getLogger().debug(`Pipeline mode (${label}): no STT configured`);
@@ -6319,7 +5504,7 @@ var StreamHandler = class _StreamHandler {
6319
5504
  }
6320
5505
  if (!this.deps.agent.vad) {
6321
5506
  try {
6322
- const { SileroVAD } = await import("./silero-vad-NSEXI4XS.mjs");
5507
+ const { SileroVAD } = await import("./silero-vad-LNDFGIY7.mjs");
6323
5508
  this.autoVad = await SileroVAD.forPhoneCall();
6324
5509
  getLogger().info(
6325
5510
  `auto-VAD enabled (SileroVAD, phone preset). Pass agent.vad=\u2026 to override.`
@@ -6338,12 +5523,9 @@ var StreamHandler = class _StreamHandler {
6338
5523
  }
6339
5524
  }
6340
5525
  if (this.deps.agent.echoCancellation) {
6341
- const carrier = this.deps.bridge.telephonyProvider;
6342
- if (carrier === "twilio" || carrier === "telnyx") {
6343
- getLogger().warn(
6344
- `echoCancellation: true on ${carrier} (PSTN). Server-side NLMS cannot model PSTN's ~250\u20131500 ms round-trip echo with a 32 ms filter window \u2014 it will silently no-op. Best practice: keep echoCancellation: false; rely on the carrier + caller device's built-in echo suppression and Patter's self-hearing guard. Enable AEC only for browser/native deployments where the SDK owns the audio path end-to-end.`
6345
- );
6346
- }
5526
+ getLogger().warn(
5527
+ `echoCancellation: true on ${this.deps.bridge.telephonyProvider} (PSTN). Server-side NLMS cannot model PSTN's ~250\u20131500 ms round-trip echo with a 32 ms filter window \u2014 it will silently no-op. Best practice: keep echoCancellation: false; rely on the carrier + caller device's built-in echo suppression and Patter's self-hearing guard. Enable AEC only for browser/native deployments where the SDK owns the audio path end-to-end.`
5528
+ );
6347
5529
  try {
6348
5530
  const { NlmsEchoCanceller } = await import("./aec-PJJMUM5E.mjs");
6349
5531
  this.aec = new NlmsEchoCanceller({ sampleRate: 16e3 });
@@ -6476,13 +5658,20 @@ var StreamHandler = class _StreamHandler {
6476
5658
  );
6477
5659
  }
6478
5660
  const providerModel = this.deps.agent.llm?.model ?? "";
5661
+ const augmentedTools = augmentWithBuiltinHandoffTools(
5662
+ this.deps.agent.tools,
5663
+ {
5664
+ transferCall: (number) => this.deps.bridge.transferCall(this.callId, number),
5665
+ endCall: () => this.deps.bridge.endCall(this.callId, this.ws)
5666
+ }
5667
+ );
6479
5668
  this.llmLoop = new LLMLoop(
6480
5669
  "",
6481
5670
  // apiKey unused when llmProvider is supplied
6482
5671
  providerModel,
6483
5672
  // propagate so calculateLlmCost can match the price row
6484
5673
  resolvedPrompt,
6485
- this.deps.agent.tools,
5674
+ augmentedTools,
6486
5675
  this.deps.agent.llm,
6487
5676
  this.deps.agent.disablePhonePreamble ?? false
6488
5677
  );
@@ -6493,11 +5682,18 @@ var StreamHandler = class _StreamHandler {
6493
5682
  } else if (!this.deps.onMessage && this.deps.config.openaiKey) {
6494
5683
  let llmModel = this.deps.agent.model || "gpt-4o-mini";
6495
5684
  if (llmModel.includes("realtime")) llmModel = "gpt-4o-mini";
5685
+ const augmentedTools = augmentWithBuiltinHandoffTools(
5686
+ this.deps.agent.tools,
5687
+ {
5688
+ transferCall: (number) => this.deps.bridge.transferCall(this.callId, number),
5689
+ endCall: () => this.deps.bridge.endCall(this.callId, this.ws)
5690
+ }
5691
+ );
6496
5692
  this.llmLoop = new LLMLoop(
6497
5693
  this.deps.config.openaiKey,
6498
5694
  llmModel,
6499
5695
  resolvedPrompt,
6500
- this.deps.agent.tools,
5696
+ augmentedTools,
6501
5697
  void 0,
6502
5698
  this.deps.agent.disablePhonePreamble ?? false
6503
5699
  );
@@ -7012,16 +6208,49 @@ var StreamHandler = class _StreamHandler {
7012
6208
  async initRealtimeAdapter(resolvedPrompt) {
7013
6209
  const label = this.deps.bridge.label;
7014
6210
  this.adapter = this.deps.buildAIAdapter(resolvedPrompt);
7015
- try {
7016
- await this.adapter.connect();
7017
- getLogger().debug(`AI adapter connected (${label})`);
7018
- } catch (e) {
7019
- getLogger().error(`AI adapter connect FAILED (${label}):`, e);
6211
+ let parked;
6212
+ if (typeof this.deps.popPrewarmedConnections === "function") {
7020
6213
  try {
7021
- await this.deps.bridge.endCall(this.callId, this.ws);
7022
- } catch {
6214
+ parked = this.deps.popPrewarmedConnections(this.callId);
6215
+ } catch (err) {
6216
+ getLogger().debug(`popPrewarmedConnections raised: ${String(err)}`);
6217
+ }
6218
+ }
6219
+ const parkedRealtimeWs = parked?.openaiRealtime;
6220
+ let adoptOk = false;
6221
+ if (parkedRealtimeWs !== void 0) {
6222
+ const adapterAny = this.adapter;
6223
+ const wsAlive = parkedRealtimeWs.readyState === 1;
6224
+ if (typeof adapterAny?.adoptWebSocket === "function" && wsAlive) {
6225
+ try {
6226
+ adapterAny.adoptWebSocket(parkedRealtimeWs);
6227
+ getLogger().info(
6228
+ `[CONNECT] callId=${this.callId} provider=openai_realtime source=adopted ms=0`
6229
+ );
6230
+ adoptOk = true;
6231
+ } catch (err) {
6232
+ getLogger().debug(`Realtime adoptWebSocket failed: ${String(err)}; falling back`);
6233
+ }
6234
+ }
6235
+ if (!adoptOk) {
6236
+ try {
6237
+ parkedRealtimeWs.close();
6238
+ } catch {
6239
+ }
6240
+ }
6241
+ }
6242
+ if (!adoptOk) {
6243
+ try {
6244
+ await this.adapter.connect();
6245
+ getLogger().debug(`AI adapter connected (${label})`);
6246
+ } catch (e) {
6247
+ getLogger().error(`AI adapter connect FAILED (${label}):`, e);
6248
+ try {
6249
+ await this.deps.bridge.endCall(this.callId, this.ws);
6250
+ } catch {
6251
+ }
6252
+ return;
7023
6253
  }
7024
- return;
7025
6254
  }
7026
6255
  if (this.deps.agent.firstMessage) {
7027
6256
  this.metricsAcc.startTurn();
@@ -7141,8 +6370,21 @@ var StreamHandler = class _StreamHandler {
7141
6370
  await this.emitUserSpeechEnded();
7142
6371
  }
7143
6372
  async onAdapterTranscriptInput(inputText) {
6373
+ const stripped = inputText.trim().toLowerCase();
6374
+ if (HALLUCINATIONS.has(stripped) || stripped === "") {
6375
+ getLogger().debug(
6376
+ `Realtime transcript_input dropped (likely Whisper hallucination on silence/echo): ${sanitizeLogValue(inputText.slice(0, 60))}`
6377
+ );
6378
+ this.userTranscriptPending = false;
6379
+ return;
6380
+ }
7144
6381
  getLogger().debug(`User (${this.deps.bridge.label}): ${sanitizeLogValue(inputText)}`);
7145
6382
  this.history.push({ role: "user", text: inputText, timestamp: Date.now() });
6383
+ if (this.adapter instanceof OpenAIRealtimeAdapter) {
6384
+ void this.adapter.requestResponse().catch(
6385
+ (err) => getLogger().debug(`Realtime requestResponse failed: ${String(err)}`)
6386
+ );
6387
+ }
7146
6388
  if (!this.metricsAcc.turnActive) {
7147
6389
  this.metricsAcc.startTurn();
7148
6390
  this.currentAgentText = "";
@@ -7294,6 +6536,18 @@ var StreamHandler = class _StreamHandler {
7294
6536
  await this.flushAssistantTurn(text);
7295
6537
  }
7296
6538
  async onAdapterSpeechInterrupt() {
6539
+ if (this.adapter instanceof OpenAIRealtimeAdapter) {
6540
+ const startedAt = this.adapter.currentResponseFirstAudioAt;
6541
+ if (startedAt !== null) {
6542
+ const elapsedMs = Date.now() - startedAt;
6543
+ if (elapsedMs < _StreamHandler.MIN_AGENT_SPEAKING_MS_BEFORE_BARGE_IN_NO_AEC) {
6544
+ getLogger().info(
6545
+ `Realtime barge-in suppressed (response < gate, ${elapsedMs}ms)`
6546
+ );
6547
+ return;
6548
+ }
6549
+ }
6550
+ }
7297
6551
  this.deps.bridge.sendClear(this.ws, this.streamSid);
7298
6552
  if (this.adapter instanceof OpenAIRealtimeAdapter) this.adapter.cancelResponse();
7299
6553
  this.metricsAcc.recordTurnInterrupted();
@@ -7528,32 +6782,32 @@ async function queryDeepgramCost(metricsAcc, deepgramKey, deepgramRequestId) {
7528
6782
 
7529
6783
  // src/services/call-log.ts
7530
6784
  init_esm_shims();
7531
- import * as crypto3 from "crypto";
7532
- import * as fs2 from "fs";
6785
+ import * as crypto4 from "crypto";
6786
+ import * as fs3 from "fs";
7533
6787
  import { promises as fsp } from "fs";
7534
6788
  import * as os from "os";
7535
- import * as path2 from "path";
6789
+ import * as path3 from "path";
7536
6790
  var SCHEMA_VERSION = "1.0";
7537
6791
  var DEFAULT_RETENTION_DAYS = 30;
7538
6792
  function xdgDataHome() {
7539
- return process.env.XDG_DATA_HOME || path2.join(os.homedir(), ".local", "share");
6793
+ return process.env.XDG_DATA_HOME || path3.join(os.homedir(), ".local", "share");
7540
6794
  }
7541
6795
  function platformDefaultRoot() {
7542
6796
  if (process.platform === "darwin") {
7543
- return path2.join(os.homedir(), "Library", "Application Support", "patter");
6797
+ return path3.join(os.homedir(), "Library", "Application Support", "patter");
7544
6798
  }
7545
6799
  if (process.platform === "win32") {
7546
6800
  const localAppData = process.env.LOCALAPPDATA;
7547
- if (localAppData) return path2.join(localAppData, "patter");
7548
- return path2.join(os.homedir(), "AppData", "Local", "patter");
6801
+ if (localAppData) return path3.join(localAppData, "patter");
6802
+ return path3.join(os.homedir(), "AppData", "Local", "patter");
7549
6803
  }
7550
- return path2.join(xdgDataHome(), "patter");
6804
+ return path3.join(xdgDataHome(), "patter");
7551
6805
  }
7552
6806
  function resolveLogRoot(explicit) {
7553
6807
  const value = explicit ?? process.env.PATTER_LOG_DIR;
7554
6808
  if (!value) return null;
7555
6809
  if (value.trim().toLowerCase() === "auto") return platformDefaultRoot();
7556
- if (value.startsWith("~")) return path2.join(os.homedir(), value.slice(1));
6810
+ if (value.startsWith("~")) return path3.join(os.homedir(), value.slice(1));
7557
6811
  return value;
7558
6812
  }
7559
6813
  function retentionDays() {
@@ -7564,16 +6818,16 @@ function retentionDays() {
7564
6818
  return Math.max(0, parsed);
7565
6819
  }
7566
6820
  function redactMode() {
7567
- const raw = (process.env.PATTER_LOG_REDACT_PHONE || "mask").trim().toLowerCase();
6821
+ const raw = (process.env.PATTER_LOG_REDACT_PHONE || "full").trim().toLowerCase();
7568
6822
  if (raw === "full" || raw === "mask" || raw === "hash_only") return raw;
7569
- return "mask";
6823
+ return "full";
7570
6824
  }
7571
6825
  function redactPhone(raw) {
7572
6826
  if (!raw) return "";
7573
6827
  const mode = redactMode();
7574
6828
  if (mode === "full") return raw;
7575
6829
  if (mode === "hash_only") {
7576
- return "sha256:" + crypto3.createHash("sha256").update(raw, "utf8").digest("hex").slice(0, 16);
6830
+ return "sha256:" + crypto4.createHash("sha256").update(raw, "utf8").digest("hex").slice(0, 16);
7577
6831
  }
7578
6832
  return maskPhoneNumber(raw);
7579
6833
  }
@@ -7582,9 +6836,9 @@ function utcIso(tsSeconds) {
7582
6836
  return new Date(ms).toISOString();
7583
6837
  }
7584
6838
  async function atomicWriteJson(filePath, payload) {
7585
- const dir = path2.dirname(filePath);
6839
+ const dir = path3.dirname(filePath);
7586
6840
  await fsp.mkdir(dir, { recursive: true });
7587
- const tmp = path2.join(dir, `.tmp.${process.pid}.${crypto3.randomBytes(4).toString("hex")}.json`);
6841
+ const tmp = path3.join(dir, `.tmp.${process.pid}.${crypto4.randomBytes(4).toString("hex")}.json`);
7588
6842
  try {
7589
6843
  const handle = await fsp.open(tmp, "w");
7590
6844
  try {
@@ -7603,7 +6857,7 @@ async function atomicWriteJson(filePath, payload) {
7603
6857
  }
7604
6858
  }
7605
6859
  async function appendJsonl(filePath, record) {
7606
- await fsp.mkdir(path2.dirname(filePath), { recursive: true });
6860
+ await fsp.mkdir(path3.dirname(filePath), { recursive: true });
7607
6861
  await fsp.appendFile(filePath, JSON.stringify(record) + "\n", { encoding: "utf8" });
7608
6862
  }
7609
6863
  var CallLogger = class {
@@ -7613,9 +6867,9 @@ var CallLogger = class {
7613
6867
  this.root = null;
7614
6868
  return;
7615
6869
  }
7616
- const resolved = root.startsWith("~") ? path2.join(os.homedir(), root.slice(1)) : root;
6870
+ const resolved = root.startsWith("~") ? path3.join(os.homedir(), root.slice(1)) : root;
7617
6871
  try {
7618
- fs2.mkdirSync(resolved, { recursive: true });
6872
+ fs3.mkdirSync(resolved, { recursive: true });
7619
6873
  this.root = resolved;
7620
6874
  getLogger().info(`Call logs: ${resolved}`);
7621
6875
  } catch (err) {
@@ -7637,7 +6891,7 @@ var CallLogger = class {
7637
6891
  const month = String(dt.getUTCMonth() + 1).padStart(2, "0");
7638
6892
  const day = String(dt.getUTCDate()).padStart(2, "0");
7639
6893
  const safeId = sanitizeLogValue(callId, 64).replace(/\//g, "_") || "unknown";
7640
- return path2.join(this.root, "calls", year, month, day, safeId);
6894
+ return path3.join(this.root, "calls", year, month, day, safeId);
7641
6895
  }
7642
6896
  /** Write the initial `metadata.json` for a new call. */
7643
6897
  async logCallStart(callId, input = {}) {
@@ -7655,6 +6909,7 @@ var CallLogger = class {
7655
6909
  status: "in_progress",
7656
6910
  caller: redactPhone(input.caller ?? ""),
7657
6911
  callee: redactPhone(input.callee ?? ""),
6912
+ direction: input.direction || "inbound",
7658
6913
  telephony_provider: input.telephonyProvider ?? "",
7659
6914
  provider_mode: input.providerMode ?? "",
7660
6915
  agent: input.agent ?? {},
@@ -7664,11 +6919,11 @@ var CallLogger = class {
7664
6919
  error: null
7665
6920
  };
7666
6921
  try {
7667
- await atomicWriteJson(path2.join(dir, "metadata.json"), metadata);
6922
+ await atomicWriteJson(path3.join(dir, "metadata.json"), metadata);
7668
6923
  } catch (err) {
7669
6924
  getLogger().warn(`call_log write failed (${sanitizeLogValue(callId)}): ${sanitizeLogValue(String(err))}`);
7670
6925
  }
7671
- if (crypto3.randomBytes(1)[0] < 5) {
6926
+ if (crypto4.randomBytes(1)[0] < 5) {
7672
6927
  this.sweepOldDays();
7673
6928
  }
7674
6929
  }
@@ -7683,7 +6938,7 @@ var CallLogger = class {
7683
6938
  ...turn
7684
6939
  };
7685
6940
  try {
7686
- await appendJsonl(path2.join(dir, "transcript.jsonl"), record);
6941
+ await appendJsonl(path3.join(dir, "transcript.jsonl"), record);
7687
6942
  } catch (err) {
7688
6943
  getLogger().warn(
7689
6944
  `call_log turn write failed (${sanitizeLogValue(callId)}): ${sanitizeLogValue(String(err))}`
@@ -7702,7 +6957,7 @@ var CallLogger = class {
7702
6957
  data: payload
7703
6958
  };
7704
6959
  try {
7705
- await appendJsonl(path2.join(dir, "events.jsonl"), record);
6960
+ await appendJsonl(path3.join(dir, "events.jsonl"), record);
7706
6961
  } catch (err) {
7707
6962
  getLogger().warn(
7708
6963
  `call_log event write failed (${sanitizeLogValue(callId)}): ${sanitizeLogValue(String(err))}`
@@ -7714,7 +6969,7 @@ var CallLogger = class {
7714
6969
  if (!this.enabled) return;
7715
6970
  const dir = this.callDir(callId);
7716
6971
  if (dir === null) return;
7717
- const metadataPath = path2.join(dir, "metadata.json");
6972
+ const metadataPath = path3.join(dir, "metadata.json");
7718
6973
  let existing = {};
7719
6974
  try {
7720
6975
  existing = JSON.parse(await fsp.readFile(metadataPath, "utf8"));
@@ -7749,20 +7004,20 @@ var CallLogger = class {
7749
7004
  const days = retentionDays();
7750
7005
  if (days === 0) return;
7751
7006
  const cutoff = Date.now() / 1e3 - days * 86400;
7752
- const callsRoot = path2.join(this.root, "calls");
7753
- if (!fs2.existsSync(callsRoot)) return;
7007
+ const callsRoot = path3.join(this.root, "calls");
7008
+ if (!fs3.existsSync(callsRoot)) return;
7754
7009
  try {
7755
- for (const yearName of fs2.readdirSync(callsRoot)) {
7010
+ for (const yearName of fs3.readdirSync(callsRoot)) {
7756
7011
  if (!/^\d+$/.test(yearName)) continue;
7757
- const yearDir = path2.join(callsRoot, yearName);
7758
- if (!fs2.statSync(yearDir).isDirectory()) continue;
7759
- for (const monthName of fs2.readdirSync(yearDir)) {
7012
+ const yearDir = path3.join(callsRoot, yearName);
7013
+ if (!fs3.statSync(yearDir).isDirectory()) continue;
7014
+ for (const monthName of fs3.readdirSync(yearDir)) {
7760
7015
  if (!/^\d+$/.test(monthName)) continue;
7761
- const monthDir = path2.join(yearDir, monthName);
7762
- if (!fs2.statSync(monthDir).isDirectory()) continue;
7763
- for (const dayName of fs2.readdirSync(monthDir)) {
7016
+ const monthDir = path3.join(yearDir, monthName);
7017
+ if (!fs3.statSync(monthDir).isDirectory()) continue;
7018
+ for (const dayName of fs3.readdirSync(monthDir)) {
7764
7019
  if (!/^\d+$/.test(dayName)) continue;
7765
- const dayDir = path2.join(monthDir, dayName);
7020
+ const dayDir = path3.join(monthDir, dayName);
7766
7021
  const y = Number.parseInt(yearName, 10);
7767
7022
  const m = Number.parseInt(monthName, 10);
7768
7023
  const d = Number.parseInt(dayName, 10);
@@ -7772,12 +7027,12 @@ var CallLogger = class {
7772
7027
  }
7773
7028
  }
7774
7029
  try {
7775
- if (fs2.readdirSync(monthDir).length === 0) fs2.rmdirSync(monthDir);
7030
+ if (fs3.readdirSync(monthDir).length === 0) fs3.rmdirSync(monthDir);
7776
7031
  } catch {
7777
7032
  }
7778
7033
  }
7779
7034
  try {
7780
- if (fs2.readdirSync(yearDir).length === 0) fs2.rmdirSync(yearDir);
7035
+ if (fs3.readdirSync(yearDir).length === 0) fs3.rmdirSync(yearDir);
7781
7036
  } catch {
7782
7037
  }
7783
7038
  }
@@ -7788,19 +7043,19 @@ var CallLogger = class {
7788
7043
  };
7789
7044
  function rmTree(target) {
7790
7045
  try {
7791
- for (const child of fs2.readdirSync(target)) {
7792
- const childPath = path2.join(target, child);
7793
- const stat = fs2.lstatSync(childPath);
7046
+ for (const child of fs3.readdirSync(target)) {
7047
+ const childPath = path3.join(target, child);
7048
+ const stat = fs3.lstatSync(childPath);
7794
7049
  if (stat.isDirectory()) {
7795
7050
  rmTree(childPath);
7796
7051
  } else {
7797
7052
  try {
7798
- fs2.unlinkSync(childPath);
7053
+ fs3.unlinkSync(childPath);
7799
7054
  } catch {
7800
7055
  }
7801
7056
  }
7802
7057
  }
7803
- fs2.rmdirSync(target);
7058
+ fs3.rmdirSync(target);
7804
7059
  } catch {
7805
7060
  }
7806
7061
  }
@@ -7848,6 +7103,19 @@ function classifyTelnyxAmd(result) {
7848
7103
  if (result === "fax") return "fax";
7849
7104
  return "unknown";
7850
7105
  }
7106
+ function twilioStatusToOutcome(callStatus) {
7107
+ const s = (callStatus || "").toLowerCase();
7108
+ if (s === "no-answer") return "no_answer";
7109
+ if (s === "busy") return "busy";
7110
+ return "failed";
7111
+ }
7112
+ function telnyxHangupOutcome(cause) {
7113
+ const c = (cause || "").toLowerCase();
7114
+ if (c === "no_answer" || c === "timeout" || c === "no_user_response") return "no_answer";
7115
+ if (c === "user_busy" || c === "busy") return "busy";
7116
+ if (c === "call_rejected" || c === "rejected" || c === "destination_out_of_order") return "failed";
7117
+ return null;
7118
+ }
7851
7119
  function validateWebhookUrl(url) {
7852
7120
  const parsed = new URL(url);
7853
7121
  if (!["http:", "https:"].includes(parsed.protocol)) {
@@ -7905,7 +7173,7 @@ function validateTelnyxSignature(rawBody, signature, timestamp, publicKey, toler
7905
7173
  if (ageMs < 0 || ageMs > toleranceSec * 1e3) return false;
7906
7174
  const payload = `${timestamp}|${rawBody}`;
7907
7175
  const keyBuffer = Buffer.from(publicKey, "base64");
7908
- const keyObject = crypto4.createPublicKey({
7176
+ const keyObject = crypto5.createPublicKey({
7909
7177
  key: keyBuffer,
7910
7178
  format: "der",
7911
7179
  type: "spki"
@@ -7915,7 +7183,7 @@ function validateTelnyxSignature(rawBody, signature, timestamp, publicKey, toler
7915
7183
  if (!trimmed) continue;
7916
7184
  try {
7917
7185
  const sigBuffer = Buffer.from(trimmed, "base64");
7918
- if (crypto4.verify(null, Buffer.from(payload), keyObject, sigBuffer)) {
7186
+ if (crypto5.verify(null, Buffer.from(payload), keyObject, sigBuffer)) {
7919
7187
  return true;
7920
7188
  }
7921
7189
  } catch {
@@ -7932,12 +7200,12 @@ function validateTwilioSid(sid, prefix = "CA") {
7932
7200
  }
7933
7201
  function validateTwilioSignature(url, params, signature, authToken) {
7934
7202
  const data = url + Object.keys(params).sort().reduce((acc, key) => acc + key + (params[key] ?? ""), "");
7935
- const expected = crypto4.createHmac("sha1", authToken).update(data).digest("base64");
7203
+ const expected = crypto5.createHmac("sha1", authToken).update(data).digest("base64");
7936
7204
  try {
7937
7205
  const sigBuf = Buffer.from(signature);
7938
7206
  const expBuf = Buffer.from(expected);
7939
7207
  if (sigBuf.length !== expBuf.length) return false;
7940
- return crypto4.timingSafeEqual(sigBuf, expBuf);
7208
+ return crypto5.timingSafeEqual(sigBuf, expBuf);
7941
7209
  } catch {
7942
7210
  return false;
7943
7211
  }
@@ -8010,6 +7278,7 @@ var TwilioBridge = class {
8010
7278
  config;
8011
7279
  label = "Twilio";
8012
7280
  telephonyProvider = "twilio";
7281
+ inputWireFormat = "ulaw_8000";
8013
7282
  sendAudio(ws, audioBase64, streamSid) {
8014
7283
  ws.send(JSON.stringify({ event: "media", streamSid, media: { payload: audioBase64 } }));
8015
7284
  }
@@ -8097,7 +7366,7 @@ var TELNYX_DTMF_ALLOWED = new Set("0123456789*#ABCDabcdwW");
8097
7366
  var TELNYX_DTMF_DURATION_MS = 250;
8098
7367
  async function sleep(ms) {
8099
7368
  if (ms <= 0) return;
8100
- await new Promise((resolve) => setTimeout(resolve, ms));
7369
+ await new Promise((resolve2) => setTimeout(resolve2, ms));
8101
7370
  }
8102
7371
  var TelnyxBridge = class {
8103
7372
  constructor(config) {
@@ -8106,6 +7375,11 @@ var TelnyxBridge = class {
8106
7375
  config;
8107
7376
  label = "Telnyx";
8108
7377
  telephonyProvider = "telnyx";
7378
+ // ``streaming_start`` negotiates PCMU bidirectional by default — keeping
7379
+ // ``ulaw_8000`` here matches what TwilioBridge does and keeps the stream
7380
+ // handler's input-transcode branch in the right shape. If a deployment
7381
+ // overrides the negotiation to L16, this should flip to ``pcm_16000``.
7382
+ inputWireFormat = "ulaw_8000";
8109
7383
  sendAudio(ws, audioBase64, _streamSid) {
8110
7384
  ws.send(JSON.stringify({ event: "media", media: { payload: audioBase64 } }));
8111
7385
  }
@@ -8127,7 +7401,7 @@ var TelnyxBridge = class {
8127
7401
  });
8128
7402
  getLogger().info(`Telnyx call transferred to ${toNumber}`);
8129
7403
  }
8130
- async sendDtmf(callId, digits, delayMs) {
7404
+ async sendDtmf(_ws, callId, digits, delayMs) {
8131
7405
  if (!digits) {
8132
7406
  getLogger().warn("TelnyxBridge.sendDtmf called with empty digits");
8133
7407
  return;
@@ -8325,6 +7599,99 @@ var EmbeddedServer = class {
8325
7599
  * (tests) work without further setup. See FIX #91.
8326
7600
  */
8327
7601
  recordPrewarmWaste = () => void 0;
7602
+ /**
7603
+ * Per-callId completion deferreds for ``Patter.call({ wait: true })``.
7604
+ * Resolved by the FIRST terminal signal: the Twilio/Telnyx status callback
7605
+ * for no-media outcomes (no-answer / busy / failed), or ``onCallEnd`` for a
7606
+ * connected call (answered / voicemail). The AMD classification is recorded
7607
+ * per callId so the connected-call path can distinguish ``answered`` from
7608
+ * ``voicemail``. This is what lets ``call({ wait: true })`` resolve to a
7609
+ * structured {@link CallResult} without the caller hand-wiring ``onCallEnd``
7610
+ * to a promise. Public so ``client.ts`` can register/await + fail in-flight
7611
+ * waiters on ``disconnect()``. Mirrors Python's ``EmbeddedServer._completions``.
7612
+ */
7613
+ completions = /* @__PURE__ */ new Map();
7614
+ /** AMD classification recorded per callId, used by the connected-call path. */
7615
+ amdClass = /* @__PURE__ */ new Map();
7616
+ // === Outbound completion registry (call({ wait: true })) ===
7617
+ /**
7618
+ * Register (or return) a completion promise for an outbound call.
7619
+ *
7620
+ * Called by ``Patter.call({ wait: true })`` immediately after the carrier
7621
+ * accepts the dial — the promise resolves to a {@link CallResult} once a
7622
+ * terminal signal arrives. Idempotent: returns the existing pending promise
7623
+ * if one is already registered for ``callId``. Mirrors Python's
7624
+ * ``register_completion``.
7625
+ */
7626
+ registerCompletion(callId) {
7627
+ const existing = this.completions.get(callId);
7628
+ if (existing && !existing.done) {
7629
+ return existing.promise;
7630
+ }
7631
+ let resolve2;
7632
+ let reject;
7633
+ const promise = new Promise((res, rej) => {
7634
+ resolve2 = res;
7635
+ reject = rej;
7636
+ });
7637
+ this.completions.set(callId, { promise, resolve: resolve2, reject, done: false });
7638
+ return promise;
7639
+ }
7640
+ /** Drop a registered completion (e.g. on a backstop timeout) without resolving it. */
7641
+ deleteCompletion(callId) {
7642
+ this.completions.delete(callId);
7643
+ this.amdClass.delete(callId);
7644
+ }
7645
+ /**
7646
+ * Resolve a pending completion with a {@link CallResult}.
7647
+ *
7648
+ * No-op when no completion is registered for ``callId`` (the common case —
7649
+ * most calls are placed without ``wait: true``) or it is already done.
7650
+ * Builds the result from the ``onCallEnd`` payload when ``data`` is provided
7651
+ * (connected calls carry transcript + {@link CallMetrics}); no-media
7652
+ * outcomes pass ``data`` undefined and yield an empty transcript / no cost.
7653
+ * Mirrors Python's ``_resolve_completion``.
7654
+ */
7655
+ resolveCompletion(callId, args) {
7656
+ const entry = this.completions.get(callId);
7657
+ if (!entry || entry.done) return;
7658
+ const data = args.data;
7659
+ const metrics = data?.metrics ?? null;
7660
+ const cost = metrics?.cost ?? null;
7661
+ const durationRaw = metrics?.duration_seconds;
7662
+ const duration = typeof durationRaw === "number" ? durationRaw : 0;
7663
+ const transcriptRaw = data?.transcript;
7664
+ const transcript = Array.isArray(transcriptRaw) ? transcriptRaw : [];
7665
+ const result = {
7666
+ callId,
7667
+ outcome: args.outcome,
7668
+ status: args.status,
7669
+ durationSeconds: duration,
7670
+ transcript,
7671
+ cost,
7672
+ metrics
7673
+ };
7674
+ entry.done = true;
7675
+ entry.resolve(result);
7676
+ this.completions.delete(callId);
7677
+ this.amdClass.delete(callId);
7678
+ }
7679
+ /**
7680
+ * Fail every in-flight completion with ``error``. Called by
7681
+ * ``Patter.disconnect()`` so a ``call({ wait: true })`` awaiter does not
7682
+ * hang until its backstop timeout once the server is gone. Mirrors the
7683
+ * Python ``disconnect()`` change that fails in-flight ``wait=True`` awaiters.
7684
+ */
7685
+ failPendingCompletions(error) {
7686
+ for (const entry of this.completions.values()) {
7687
+ if (!entry.done) {
7688
+ entry.done = true;
7689
+ entry.reject(error);
7690
+ }
7691
+ }
7692
+ this.completions.clear();
7693
+ this.amdClass.clear();
7694
+ }
8328
7695
  /** Bind HTTP + WebSocket listeners on `port`, mount carrier webhooks and dashboard routes. */
8329
7696
  async start(port = 8e3) {
8330
7697
  const webhookUrlPattern = /^[a-zA-Z0-9][a-zA-Z0-9.\-]+[a-zA-Z0-9]$/;
@@ -8388,8 +7755,10 @@ var EmbeddedServer = class {
8388
7755
  return;
8389
7756
  }
8390
7757
  const body = req.body;
8391
- const callSid = sanitizeLogValue(body["CallSid"] ?? "");
8392
- const callStatus = sanitizeLogValue(body["CallStatus"] ?? "");
7758
+ const rawCallSid = body["CallSid"] ?? "";
7759
+ const rawCallStatus = body["CallStatus"] ?? "";
7760
+ const callSid = sanitizeLogValue(rawCallSid);
7761
+ const callStatus = sanitizeLogValue(rawCallStatus);
8393
7762
  const duration = body["CallDuration"] ?? body["Duration"] ?? "";
8394
7763
  getLogger().info(
8395
7764
  `Twilio status ${callStatus} for call ${callSid} (duration=${duration})`
@@ -8406,6 +7775,10 @@ var EmbeddedServer = class {
8406
7775
  } catch (err) {
8407
7776
  getLogger().debug(`recordPrewarmWaste threw: ${String(err)}`);
8408
7777
  }
7778
+ this.resolveCompletion(rawCallSid, {
7779
+ outcome: twilioStatusToOutcome(rawCallStatus),
7780
+ status: rawCallStatus
7781
+ });
8409
7782
  }
8410
7783
  res.status(204).send();
8411
7784
  });
@@ -8448,6 +7821,9 @@ var EmbeddedServer = class {
8448
7821
  const answeredBy = body["AnsweredBy"] ?? "";
8449
7822
  const callSid = body["CallSid"] ?? "";
8450
7823
  getLogger().info(`AMD result for ${sanitizeLogValue(callSid)}: ${sanitizeLogValue(answeredBy)}`);
7824
+ if (callSid) {
7825
+ this.amdClass.set(callSid, classifyTwilioAmd(answeredBy));
7826
+ }
8451
7827
  const cb = this.onMachineDetection;
8452
7828
  if (cb && callSid) {
8453
7829
  try {
@@ -8573,6 +7949,9 @@ var EmbeddedServer = class {
8573
7949
  getLogger().info(
8574
7950
  `Telnyx AMD result for ${sanitizeLogValue(amdCallId)}: ${sanitizeLogValue(amdResult)}`
8575
7951
  );
7952
+ if (amdCallId) {
7953
+ this.amdClass.set(amdCallId, classifyTelnyxAmd(amdResult));
7954
+ }
8576
7955
  const cbTx = this.onMachineDetection;
8577
7956
  if (cbTx && amdCallId) {
8578
7957
  try {
@@ -8609,6 +7988,13 @@ var EmbeddedServer = class {
8609
7988
  } catch (err) {
8610
7989
  getLogger().debug(`recordPrewarmWaste threw: ${String(err)}`);
8611
7990
  }
7991
+ const noMediaOutcome = telnyxHangupOutcome(hangupCause);
7992
+ if (noMediaOutcome !== null) {
7993
+ this.resolveCompletion(hangupCallId, {
7994
+ outcome: noMediaOutcome,
7995
+ status: hangupCause
7996
+ });
7997
+ }
8612
7998
  }
8613
7999
  return res.status(200).send();
8614
8000
  }
@@ -8661,6 +8047,121 @@ var EmbeddedServer = class {
8661
8047
  }
8662
8048
  return res.status(200).send();
8663
8049
  });
8050
+ const validatePlivoRequest = (req, res) => {
8051
+ const authToken = this.config.plivoAuthToken;
8052
+ if (!authToken) {
8053
+ if (this.config.requireSignature !== false) {
8054
+ getLogger().error(
8055
+ "Plivo webhook rejected: plivoAuthToken not configured and requireSignature is not false"
8056
+ );
8057
+ res.status(503).send("Webhook signature required");
8058
+ return false;
8059
+ }
8060
+ return true;
8061
+ }
8062
+ const method = req.method.toUpperCase();
8063
+ const params = method === "POST" && req.body && typeof req.body === "object" ? Object.fromEntries(
8064
+ Object.entries(req.body).map(([k, v]) => [k, String(v)])
8065
+ ) : {};
8066
+ const signature = req.headers["x-plivo-signature-v3"] || "";
8067
+ const nonce = req.headers["x-plivo-signature-v3-nonce"] || "";
8068
+ const url = `https://${this.config.webhookUrl}${req.originalUrl}`;
8069
+ if (!validatePlivoSignature(url, nonce, signature, authToken, params, method)) {
8070
+ getLogger().warn("Plivo webhook rejected: invalid or missing V3 signature");
8071
+ res.status(403).send("Invalid signature");
8072
+ return false;
8073
+ }
8074
+ return true;
8075
+ };
8076
+ app.post("/webhooks/plivo/voice", (req, res) => {
8077
+ if (!validatePlivoRequest(req, res)) return;
8078
+ const body = req.body ?? {};
8079
+ const callUuid = body["CallUUID"] ?? "";
8080
+ const caller = body["From"] ?? "";
8081
+ const callee = body["To"] ?? "";
8082
+ const qs = `?caller=${encodeURIComponent(caller)}&callee=${encodeURIComponent(callee)}`;
8083
+ const streamUrl = `wss://${this.config.webhookUrl}/ws/plivo/stream/${callUuid || "outbound"}${qs}`;
8084
+ const xml = PlivoAdapter.generateStreamXml(streamUrl, "audio/x-mulaw;rate=8000", {
8085
+ "X-PH-caller": caller,
8086
+ "X-PH-callee": callee
8087
+ });
8088
+ res.type("text/xml").send(xml);
8089
+ });
8090
+ app.post("/webhooks/plivo/status", (req, res) => {
8091
+ if (!validatePlivoRequest(req, res)) return;
8092
+ const body = req.body ?? {};
8093
+ const callUuid = body["CallUUID"] ?? "";
8094
+ const callStatus = body["CallStatus"] ?? body["Status"] ?? "";
8095
+ const duration = body["Duration"] ?? body["BillDuration"] ?? "";
8096
+ getLogger().info(
8097
+ `Plivo status ${sanitizeLogValue(callStatus)} for call ${sanitizeLogValue(callUuid)} (duration=${duration})`
8098
+ );
8099
+ if (callUuid && callStatus) {
8100
+ const extra = {};
8101
+ const parsed = parseFloat(duration);
8102
+ if (!Number.isNaN(parsed)) extra.duration_seconds = parsed;
8103
+ this.metricsStore.updateCallStatus(callUuid, callStatus, extra);
8104
+ }
8105
+ if (callUuid && ["no-answer", "busy", "failed", "timeout", "cancel"].includes(callStatus)) {
8106
+ try {
8107
+ this.recordPrewarmWaste(callUuid);
8108
+ } catch (err) {
8109
+ getLogger().debug(`recordPrewarmWaste threw: ${String(err)}`);
8110
+ }
8111
+ const outcome = callStatus === "no-answer" || callStatus === "timeout" ? "no_answer" : callStatus === "busy" ? "busy" : "failed";
8112
+ this.resolveCompletion(callUuid, { outcome, status: callStatus });
8113
+ }
8114
+ res.status(200).send();
8115
+ });
8116
+ app.post("/webhooks/plivo/amd", async (req, res) => {
8117
+ if (!validatePlivoRequest(req, res)) return;
8118
+ const body = req.body ?? {};
8119
+ const callUuid = body["CallUUID"] ?? "";
8120
+ const amdRaw = body["Machine"] || body["MachineDetection"] || body["AnsweredBy"] || body["CallStatus"] || "";
8121
+ getLogger().info(`AMD result for ${sanitizeLogValue(callUuid)}: ${sanitizeLogValue(amdRaw)}`);
8122
+ const classification = classifyPlivoAmd(amdRaw);
8123
+ if (callUuid) this.amdClass.set(callUuid, classification);
8124
+ const cb = this.onMachineDetection;
8125
+ if (cb && callUuid) {
8126
+ try {
8127
+ await cb({
8128
+ call_id: callUuid,
8129
+ carrier: "plivo",
8130
+ classification,
8131
+ raw: amdRaw,
8132
+ detected_at: Date.now() / 1e3
8133
+ });
8134
+ } catch (err) {
8135
+ getLogger().warn(`onMachineDetection callback threw: ${sanitizeLogValue(String(err))}`);
8136
+ }
8137
+ }
8138
+ if (classification === "machine" && callUuid) {
8139
+ try {
8140
+ this.recordPrewarmWaste(callUuid);
8141
+ } catch (err) {
8142
+ getLogger().debug(`recordPrewarmWaste threw: ${String(err)}`);
8143
+ }
8144
+ if (this.voicemailMessage && this.config.plivoAuthId && this.config.plivoAuthToken) {
8145
+ await dropPlivoVoicemail(
8146
+ callUuid,
8147
+ this.voicemailMessage,
8148
+ this.config.plivoAuthId,
8149
+ this.config.plivoAuthToken
8150
+ );
8151
+ }
8152
+ }
8153
+ res.status(200).send();
8154
+ });
8155
+ app.all("/webhooks/plivo/transfer", (req, res) => {
8156
+ if (!validatePlivoRequest(req, res)) return;
8157
+ const to = String(req.query.to ?? "");
8158
+ if (!to || !/^\+[1-9]\d{6,14}$/.test(to)) {
8159
+ getLogger().warn(`Plivo transfer XML: invalid target ${JSON.stringify(to)}`);
8160
+ res.type("text/xml").send("<Response><Hangup/></Response>");
8161
+ return;
8162
+ }
8163
+ res.type("text/xml").send(`<Response><Dial><Number>${xmlEscape(to)}</Number></Dial></Response>`);
8164
+ });
8664
8165
  this.server = createServer(app);
8665
8166
  this.wss = new WebSocketServer({ noServer: true });
8666
8167
  const MAX_WS_PER_IP = 10;
@@ -8693,14 +8194,16 @@ var EmbeddedServer = class {
8693
8194
  ws.once("close", () => {
8694
8195
  this.activeConnections.delete(ws);
8695
8196
  });
8696
- const isTelnyx = this.config.telephonyProvider === "telnyx";
8697
- if (isTelnyx) {
8197
+ const provider2 = this.config.telephonyProvider;
8198
+ if (provider2 === "telnyx") {
8698
8199
  this.handleTelnyxStream(ws, url);
8200
+ } else if (provider2 === "plivo") {
8201
+ this.handlePlivoStream(ws, url);
8699
8202
  } else {
8700
8203
  this.handleTwilioStream(ws, url);
8701
8204
  }
8702
8205
  });
8703
- await new Promise((resolve) => {
8206
+ await new Promise((resolve2) => {
8704
8207
  const bindHost = process.env.PATTER_BIND_HOST ?? "127.0.0.1";
8705
8208
  this.server.listen(port, bindHost, () => {
8706
8209
  getLogger().info(`Server on port ${port}`);
@@ -8722,7 +8225,7 @@ var EmbeddedServer = class {
8722
8225
  }
8723
8226
  console.log("\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n");
8724
8227
  }
8725
- resolve();
8228
+ resolve2();
8726
8229
  });
8727
8230
  });
8728
8231
  }
@@ -8765,7 +8268,7 @@ var EmbeddedServer = class {
8765
8268
  `Telnyx voicemail speak failed: ${speakResp.status} ${(await speakResp.text()).slice(0, 200)}`
8766
8269
  );
8767
8270
  }
8768
- await new Promise((resolve) => setTimeout(resolve, estimatedMs));
8271
+ await new Promise((resolve2) => setTimeout(resolve2, estimatedMs));
8769
8272
  await fetch(`https://api.telnyx.com/v2/calls/${encoded}/actions/hangup`, {
8770
8273
  method: "POST",
8771
8274
  headers,
@@ -8836,9 +8339,11 @@ var EmbeddedServer = class {
8836
8339
  const active = callId ? store.getActive(callId) : void 0;
8837
8340
  const resolvedCaller = dataCaller || active?.caller || "";
8838
8341
  const resolvedCallee = dataCallee || active?.callee || "";
8342
+ const resolvedDirection = (typeof data.direction === "string" ? data.direction : "") || active?.direction || "inbound";
8839
8343
  void logger.logCallStart(callId, {
8840
8344
  caller: resolvedCaller,
8841
8345
  callee: resolvedCallee,
8346
+ direction: resolvedDirection,
8842
8347
  telephonyProvider: bridge.telephonyProvider,
8843
8348
  providerMode: agent.provider ?? "",
8844
8349
  agent: agentSnapshot()
@@ -8877,6 +8382,12 @@ var EmbeddedServer = class {
8877
8382
  }).catch((err) => getLogger().error(`call_log end error: ${String(err)}`));
8878
8383
  }
8879
8384
  if (userEnd) await userEnd(data);
8385
+ const cid = typeof data.call_id === "string" ? data.call_id : "";
8386
+ if (cid) {
8387
+ const cls = this.amdClass.get(cid);
8388
+ const outcome = cls === "machine" ? "voicemail" : "answered";
8389
+ this.resolveCompletion(cid, { outcome, status: "completed", data });
8390
+ }
8880
8391
  };
8881
8392
  return [wrappedStart, wrappedMetrics, wrappedEnd];
8882
8393
  }
@@ -8983,6 +8494,52 @@ var EmbeddedServer = class {
8983
8494
  });
8984
8495
  }
8985
8496
  // ---------------------------------------------------------------------------
8497
+ // Plivo WebSocket message parser (thin layer)
8498
+ // ---------------------------------------------------------------------------
8499
+ handlePlivoStream(ws, url) {
8500
+ const caller = url.searchParams.get("caller") ?? "";
8501
+ const callee = url.searchParams.get("callee") ?? "";
8502
+ const bridge = new PlivoBridge(this.config);
8503
+ const handler = new StreamHandler(this.buildStreamHandlerDeps(bridge), ws, caller, callee);
8504
+ ws.on("message", async (raw) => {
8505
+ try {
8506
+ let data;
8507
+ try {
8508
+ data = JSON.parse(raw.toString());
8509
+ } catch (e) {
8510
+ getLogger().error("Failed to parse Plivo WS message:", e);
8511
+ return;
8512
+ }
8513
+ const event = data.event ?? "";
8514
+ if (event === "start") {
8515
+ handler.setStreamSid(data.start?.streamId ?? "");
8516
+ const callId = data.start?.callId ?? "";
8517
+ if (callId) this.activeCallIds.set(ws, callId);
8518
+ await handler.handleCallStart(callId);
8519
+ } else if (event === "media") {
8520
+ const payload = data.media?.payload ?? "";
8521
+ if (payload) handler.handleAudio(Buffer.from(payload, "base64"));
8522
+ } else if (event === "playedStream") {
8523
+ const markName = String(data.name ?? "");
8524
+ if (markName) await handler.onMark(markName);
8525
+ } else if (event === "dtmf") {
8526
+ const digit = String(data.dtmf?.digit ?? "").trim();
8527
+ if (digit) await handler.handleDtmf(digit);
8528
+ } else if (event === "playFailed" || event === "error") {
8529
+ getLogger().warn(`Plivo ${event}: ${data.reason ?? "unknown"}`);
8530
+ } else if (event === "stop") {
8531
+ await handler.handleStop();
8532
+ }
8533
+ } catch (err) {
8534
+ getLogger().error("Stream handler error (Plivo):", err);
8535
+ }
8536
+ });
8537
+ ws.on("close", async () => {
8538
+ this.activeCallIds.delete(ws);
8539
+ await handler.handleWsClose();
8540
+ });
8541
+ }
8542
+ // ---------------------------------------------------------------------------
8986
8543
  // Graceful shutdown
8987
8544
  // ---------------------------------------------------------------------------
8988
8545
  /**
@@ -8996,13 +8553,13 @@ var EmbeddedServer = class {
8996
8553
  */
8997
8554
  async stop() {
8998
8555
  if (!this.server) return;
8999
- const httpClosePromise = new Promise((resolve) => {
9000
- this.server.close(() => resolve());
8556
+ const httpClosePromise = new Promise((resolve2) => {
8557
+ this.server.close(() => resolve2());
9001
8558
  });
9002
- const isTelnyx = this.config.telephonyProvider === "telnyx";
8559
+ const provider2 = this.config.telephonyProvider;
9003
8560
  for (const [ws, callId] of this.activeCallIds) {
9004
8561
  try {
9005
- const bridge = isTelnyx ? new TelnyxBridge(this.config) : new TwilioBridge(this.config);
8562
+ const bridge = provider2 === "telnyx" ? new TelnyxBridge(this.config) : provider2 === "plivo" ? new PlivoBridge(this.config) : new TwilioBridge(this.config);
9006
8563
  await bridge.endCall(callId, ws);
9007
8564
  } catch {
9008
8565
  }
@@ -9017,15 +8574,15 @@ var EmbeddedServer = class {
9017
8574
  if (this.activeConnections.size > 0) {
9018
8575
  getLogger().info(`Waiting for ${this.activeConnections.size} active connection(s) to close...`);
9019
8576
  await Promise.race([
9020
- new Promise((resolve) => {
8577
+ new Promise((resolve2) => {
9021
8578
  const checkInterval = setInterval(() => {
9022
8579
  if (this.activeConnections.size === 0) {
9023
8580
  clearInterval(checkInterval);
9024
- resolve();
8581
+ resolve2();
9025
8582
  }
9026
8583
  }, 100);
9027
8584
  }),
9028
- new Promise((resolve) => setTimeout(resolve, GRACEFUL_SHUTDOWN_TIMEOUT_MS))
8585
+ new Promise((resolve2) => setTimeout(resolve2, GRACEFUL_SHUTDOWN_TIMEOUT_MS))
9029
8586
  ]);
9030
8587
  }
9031
8588
  if (this.activeConnections.size > 0) {
@@ -9782,7 +9339,7 @@ var TestSession = class {
9782
9339
  input: process.stdin,
9783
9340
  output: process.stdout
9784
9341
  });
9785
- const askQuestion = (prompt) => new Promise((resolve) => rl.question(prompt, resolve));
9342
+ const askQuestion = (prompt) => new Promise((resolve2) => rl.question(prompt, resolve2));
9786
9343
  try {
9787
9344
  while (!ended) {
9788
9345
  let userInput;
@@ -9881,26 +9438,19 @@ export {
9881
9438
  AuthenticationError,
9882
9439
  ProvisionError,
9883
9440
  RateLimitError,
9884
- OpenAIRealtimeAdapter,
9885
- mulawToPcm16,
9886
- pcm16ToMulaw,
9887
- PcmCarry,
9888
- StatefulResampler,
9889
- createResampler16kTo8k,
9890
- createResampler8kTo16k,
9891
- createResampler24kTo16k,
9892
- createResampler24kTo8k,
9893
- resample8kTo16k,
9894
- resample16kTo8k,
9895
- resample24kTo16k,
9896
- OpenAIRealtime2Adapter,
9897
9441
  ElevenLabsConvAIAdapter,
9442
+ PlivoAdapter,
9443
+ Carrier,
9444
+ PRICING_VERSION,
9445
+ PRICING_LAST_UPDATED,
9446
+ PricingUnit,
9898
9447
  DEFAULT_PRICING,
9899
9448
  mergePricing,
9900
9449
  calculateSttCost,
9901
9450
  calculateTtsCost,
9902
9451
  calculateRealtimeCost,
9903
9452
  calculateTelephonyCost,
9453
+ VERSION,
9904
9454
  MetricsStore,
9905
9455
  makeAuthMiddleware,
9906
9456
  callsToCsv,
@@ -9910,6 +9460,7 @@ export {
9910
9460
  RemoteMessageHandler,
9911
9461
  isRemoteUrl,
9912
9462
  isWebSocketUrl,
9463
+ DeepgramModel,
9913
9464
  DeepgramSTT,
9914
9465
  CallMetricsAccumulator,
9915
9466
  SPAN_CALL,