getpatter 0.6.0 → 0.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,3 +1,11 @@
1
+ import {
2
+ OpenAIRealtime2Adapter,
3
+ OpenAIRealtimeAdapter,
4
+ createResampler16kTo8k,
5
+ createResampler8kTo16k,
6
+ mulawToPcm16,
7
+ pcm16ToMulaw
8
+ } from "./chunk-CL2U3YET.mjs";
1
9
  import {
2
10
  getLogger
3
11
  } from "./chunk-MVOQFAEO.mjs";
@@ -21,367 +29,9 @@ import express from "express";
21
29
  import { createServer } from "http";
22
30
  import { WebSocketServer } from "ws";
23
31
 
24
- // src/providers/openai-realtime.ts
25
- init_esm_shims();
26
- import WebSocket from "ws";
27
- var OpenAIRealtimeAudioFormat = {
28
- G711_ULAW: "g711_ulaw",
29
- G711_ALAW: "g711_alaw",
30
- PCM16: "pcm16"
31
- };
32
- var OpenAIRealtimeModel = {
33
- GPT_REALTIME: "gpt-realtime",
34
- GPT_REALTIME_2: "gpt-realtime-2",
35
- GPT_REALTIME_MINI: "gpt-realtime-mini",
36
- GPT_4O_REALTIME_PREVIEW: "gpt-4o-realtime-preview",
37
- GPT_4O_MINI_REALTIME_PREVIEW: "gpt-4o-mini-realtime-preview"
38
- };
39
- var OpenAIVoice = {
40
- ALLOY: "alloy",
41
- ASH: "ash",
42
- BALLAD: "ballad",
43
- CORAL: "coral",
44
- ECHO: "echo",
45
- FABLE: "fable",
46
- NOVA: "nova",
47
- ONYX: "onyx",
48
- SAGE: "sage",
49
- SHIMMER: "shimmer",
50
- VERSE: "verse"
51
- };
52
- var OpenAITranscriptionModel = {
53
- WHISPER_1: "whisper-1",
54
- GPT_4O_TRANSCRIBE: "gpt-4o-transcribe",
55
- GPT_4O_MINI_TRANSCRIBE: "gpt-4o-mini-transcribe",
56
- GPT_REALTIME_WHISPER: "gpt-realtime-whisper"
57
- };
58
- var OpenAIRealtimeVADType = {
59
- SERVER_VAD: "server_vad",
60
- SEMANTIC_VAD: "semantic_vad"
61
- };
62
- var OpenAIRealtimeAdapter = class {
63
- constructor(apiKey, model = OpenAIRealtimeModel.GPT_REALTIME_MINI, voice = OpenAIVoice.ALLOY, instructions = "", tools, audioFormat = OpenAIRealtimeAudioFormat.G711_ULAW, options = {}) {
64
- this.apiKey = apiKey;
65
- this.model = model;
66
- this.voice = voice;
67
- this.instructions = instructions;
68
- this.tools = tools;
69
- this.audioFormat = audioFormat;
70
- this.options = options;
71
- }
72
- apiKey;
73
- model;
74
- voice;
75
- instructions;
76
- tools;
77
- audioFormat;
78
- ws = null;
79
- eventCallbacks = /* @__PURE__ */ new Set();
80
- messageListenerAttached = false;
81
- heartbeat = null;
82
- // Track the in-flight assistant item id so we can truncate cleanly on
83
- // barge-in (see ``cancelResponse``) — matches the Python adapter.
84
- currentResponseItemId = null;
85
- currentResponseAudioMs = 0;
86
- // Wall-clock timestamp (Date.now()) of the first ``response.audio.delta``
87
- // received since the current response item started. ``cancelResponse``
88
- // uses this to bound ``audio_end_ms`` to what the caller could plausibly
89
- // have heard — generated audio frequently arrives 5-10x real-time, so
90
- // ``audio_end_ms`` driven purely by the per-chunk byte counter overshoots
91
- // reality and leaves phantom assistant text on the conversation. The
92
- // wall-clock cap corresponds to the maximum playback that real-time TTS
93
- // could have produced, which is what the user actually heard.
94
- currentResponseFirstAudioAt = null;
95
- options;
96
- /** Open the Realtime WebSocket and apply the session configuration. */
97
- async connect() {
98
- const url = `wss://api.openai.com/v1/realtime?model=${encodeURIComponent(this.model)}`;
99
- this.ws = new WebSocket(url, {
100
- headers: {
101
- Authorization: `Bearer ${this.apiKey}`,
102
- "OpenAI-Beta": "realtime=v1"
103
- }
104
- });
105
- await new Promise((resolve, reject) => {
106
- let sessionCreated = false;
107
- let settled = false;
108
- const ws = this.ws;
109
- const onSetupMessage = (raw) => {
110
- let msg;
111
- try {
112
- msg = JSON.parse(raw.toString());
113
- } catch (e) {
114
- getLogger().warn(`OpenAI Realtime: failed to parse message: ${String(e)}`);
115
- return;
116
- }
117
- if (msg.type === "session.created" && !sessionCreated) {
118
- sessionCreated = true;
119
- const config = {
120
- input_audio_format: this.audioFormat,
121
- output_audio_format: this.audioFormat,
122
- voice: this.voice,
123
- instructions: this.instructions || "You are a helpful voice assistant. Be concise.",
124
- turn_detection: {
125
- type: this.options.vadType ?? OpenAIRealtimeVADType.SERVER_VAD,
126
- threshold: 0.5,
127
- prefix_padding_ms: 300,
128
- silence_duration_ms: this.options.silenceDurationMs ?? 300
129
- },
130
- input_audio_transcription: {
131
- model: this.options.inputAudioTranscriptionModel ?? OpenAITranscriptionModel.WHISPER_1
132
- }
133
- };
134
- if (this.options.temperature !== void 0) config.temperature = this.options.temperature;
135
- if (this.options.maxResponseOutputTokens !== void 0) {
136
- config.max_response_output_tokens = this.options.maxResponseOutputTokens;
137
- }
138
- if (this.options.modalities !== void 0) config.modalities = this.options.modalities;
139
- if (this.options.toolChoice !== void 0) config.tool_choice = this.options.toolChoice;
140
- if (this.options.reasoningEffort !== void 0) {
141
- config.reasoning = { effort: this.options.reasoningEffort };
142
- }
143
- if (this.tools?.length) {
144
- config.tools = this.tools.map((t) => {
145
- const def = {
146
- type: "function",
147
- name: t.name,
148
- description: t.description,
149
- parameters: t.parameters
150
- };
151
- if (t.strict === true) {
152
- def.strict = true;
153
- }
154
- return def;
155
- });
156
- }
157
- ws.send(JSON.stringify({ type: "session.update", session: config }));
158
- } else if (msg.type === "session.updated") {
159
- cleanup();
160
- resolve();
161
- }
162
- };
163
- const onSetupError = (err) => {
164
- cleanup();
165
- try {
166
- ws.close();
167
- } catch {
168
- }
169
- reject(err);
170
- };
171
- const cleanup = () => {
172
- if (settled) return;
173
- settled = true;
174
- clearTimeout(timer);
175
- ws.off("message", onSetupMessage);
176
- ws.off("error", onSetupError);
177
- };
178
- const timer = setTimeout(() => {
179
- cleanup();
180
- try {
181
- ws.close();
182
- } catch {
183
- }
184
- reject(new Error("OpenAI Realtime connect timeout"));
185
- }, 15e3);
186
- ws.on("message", onSetupMessage);
187
- ws.on("error", onSetupError);
188
- });
189
- this.heartbeat = setInterval(() => {
190
- try {
191
- this.ws?.ping();
192
- } catch {
193
- }
194
- }, 2e4);
195
- this.ensureMessageListener();
196
- }
197
- /** Append a base64-encoded audio chunk to the realtime input buffer. */
198
- sendAudio(mulawAudio) {
199
- if (!this.ws || this.ws.readyState !== WebSocket.OPEN) return;
200
- this.ws.send(JSON.stringify({ type: "input_audio_buffer.append", audio: mulawAudio.toString("base64") }));
201
- }
202
- /**
203
- * Register a listener for parsed realtime events.
204
- *
205
- * Previously every call attached a new ``ws.on('message')`` handler,
206
- * which leaked listeners across retries and multi-consumer hooks. We now
207
- * route all traffic through a single persistent handler that fans out to
208
- * a Set of callbacks. Use {@link offEvent} to remove one.
209
- */
210
- onEvent(callback) {
211
- this.eventCallbacks.add(callback);
212
- this.ensureMessageListener();
213
- }
214
- /** Remove a previously registered {@link onEvent} callback. */
215
- offEvent(callback) {
216
- this.eventCallbacks.delete(callback);
217
- }
218
- ensureMessageListener() {
219
- if (this.messageListenerAttached || !this.ws) return;
220
- this.messageListenerAttached = true;
221
- const ws = this.ws;
222
- const dispatch = (type, payload) => {
223
- for (const cb of this.eventCallbacks) {
224
- void Promise.resolve(cb(type, payload)).catch(
225
- (err) => getLogger().error("onEvent callback error:", err)
226
- );
227
- }
228
- };
229
- ws.on("message", (raw) => {
230
- let data;
231
- try {
232
- data = JSON.parse(raw.toString());
233
- } catch (e) {
234
- getLogger().warn(`OpenAI Realtime: failed to parse event message: ${String(e)}`);
235
- return;
236
- }
237
- const t = data.type;
238
- if (t === "response.audio.delta") {
239
- const buf = Buffer.from(data.delta ?? "", "base64");
240
- this.currentResponseAudioMs += estimateAudioMs(buf, this.audioFormat);
241
- if (this.currentResponseFirstAudioAt === null) {
242
- this.currentResponseFirstAudioAt = Date.now();
243
- }
244
- dispatch("audio", buf);
245
- } else if (t === "response.audio_transcript.delta") {
246
- dispatch("transcript_output", data.delta);
247
- } else if (t === "response.content_part.added" || t === "response.output_item.added") {
248
- const itemId = data.item?.id ?? data.item_id ?? null;
249
- if (itemId) {
250
- this.currentResponseItemId = itemId;
251
- this.currentResponseAudioMs = 0;
252
- this.currentResponseFirstAudioAt = null;
253
- }
254
- } else if (t === "input_audio_buffer.speech_started") {
255
- dispatch("speech_started", null);
256
- } else if (t === "input_audio_buffer.speech_stopped") {
257
- dispatch("speech_stopped", null);
258
- } else if (t === "conversation.item.input_audio_transcription.completed") {
259
- dispatch("transcript_input", data.transcript);
260
- } else if (t === "response.function_call_arguments.done") {
261
- dispatch("function_call", { call_id: data.call_id, name: data.name, arguments: data.arguments });
262
- } else if (t === "response.done") {
263
- this.currentResponseItemId = null;
264
- this.currentResponseAudioMs = 0;
265
- this.currentResponseFirstAudioAt = null;
266
- dispatch("response_done", data.response ?? null);
267
- } else if (t === "error") {
268
- dispatch("error", data.error);
269
- }
270
- });
271
- ws.on("close", (code, reason) => {
272
- if (code !== 1e3) {
273
- dispatch("error", {
274
- type: "connection_closed",
275
- code,
276
- reason: reason?.toString() ?? ""
277
- });
278
- }
279
- });
280
- ws.on("error", (err) => {
281
- dispatch("error", { type: "socket_error", message: err?.message ?? String(err) });
282
- });
283
- }
284
- /** Truncate the in-flight assistant turn and cancel the active response.
285
- *
286
- * ``audio_end_ms`` MUST reflect what the caller actually heard, not what
287
- * the server generated. OpenAI streams audio at 5-10x real-time, so the
288
- * byte-derived counter overstates playback whenever the consumer cleared
289
- * its playout buffer (e.g. ``send_clear``) before the audio reached the
290
- * speaker. We bound the truncate point by wall-clock time since the first
291
- * chunk of this response — that's the physical maximum a 1x real-time
292
- * playback could have produced. Without this cap, OpenAI keeps the full
293
- * generated assistant text on the transcript, and the model replays /
294
- * resumes from it on the next turn — manifesting as re-greetings and
295
- * mid-sentence fragments after a barge-in storm.
296
- */
297
- cancelResponse() {
298
- if (!this.ws) return;
299
- if (this.currentResponseItemId) {
300
- let audioEndMs = this.currentResponseAudioMs;
301
- if (this.currentResponseFirstAudioAt !== null) {
302
- const elapsedMs = Date.now() - this.currentResponseFirstAudioAt;
303
- audioEndMs = Math.min(audioEndMs, Math.max(elapsedMs, 0));
304
- }
305
- try {
306
- this.ws.send(JSON.stringify({
307
- type: "conversation.item.truncate",
308
- item_id: this.currentResponseItemId,
309
- content_index: 0,
310
- audio_end_ms: audioEndMs
311
- }));
312
- } catch (err) {
313
- getLogger().debug?.(`conversation.item.truncate failed: ${String(err)}`);
314
- }
315
- }
316
- this.ws.send(JSON.stringify({ type: "response.cancel" }));
317
- this.currentResponseItemId = null;
318
- this.currentResponseAudioMs = 0;
319
- this.currentResponseFirstAudioAt = null;
320
- }
321
- /** Inject a user text turn and request a new response. */
322
- async sendText(text) {
323
- this.ws?.send(JSON.stringify({
324
- type: "conversation.item.create",
325
- item: { type: "message", role: "user", content: [{ type: "input_text", text }] }
326
- }));
327
- this.ws?.send(JSON.stringify({ type: "response.create" }));
328
- }
329
- /**
330
- * Make the AI speak ``text`` as its opening line.
331
- *
332
- * Triggers ``response.create`` with explicit ``instructions`` that force
333
- * the model to render ``text`` verbatim as its first audio utterance.
334
- * This is the correct semantics for ``Agent.firstMessage`` per its
335
- * docstring ("What the AI says when the callee answers").
336
- *
337
- * Without this, ``sendText(firstMessage)`` would inject ``text`` as
338
- * ``role: user`` and the AI would *reply* to its own greeting, producing
339
- * role-confused openings (e.g. a receptionist agent responding "I'd like
340
- * to schedule a haircut" because it took its own first_message as a
341
- * customer cue).
342
- */
343
- async sendFirstMessage(text) {
344
- this.ws?.send(JSON.stringify({
345
- type: "response.create",
346
- response: {
347
- modalities: ["audio", "text"],
348
- instructions: `Say exactly the following sentence as your first turn and nothing else: "${text}"`
349
- }
350
- }));
351
- }
352
- /** Submit a tool/function-call result and request the next response. */
353
- async sendFunctionResult(callId, result) {
354
- this.ws?.send(JSON.stringify({
355
- type: "conversation.item.create",
356
- item: { type: "function_call_output", call_id: callId, output: result }
357
- }));
358
- this.ws?.send(JSON.stringify({ type: "response.create" }));
359
- }
360
- /** Stop the heartbeat, drop listeners, and close the Realtime WebSocket. */
361
- close() {
362
- if (this.heartbeat) {
363
- clearInterval(this.heartbeat);
364
- this.heartbeat = null;
365
- }
366
- this.eventCallbacks.clear();
367
- this.messageListenerAttached = false;
368
- this.ws?.close();
369
- this.ws = null;
370
- }
371
- };
372
- function estimateAudioMs(chunk, format) {
373
- if (chunk.length === 0) return 0;
374
- if (format === OpenAIRealtimeAudioFormat.G711_ULAW || format === OpenAIRealtimeAudioFormat.G711_ALAW)
375
- return Math.floor(chunk.length / 8);
376
- if (format === OpenAIRealtimeAudioFormat.PCM16) {
377
- return Math.floor(chunk.length / 48);
378
- }
379
- return 0;
380
- }
381
-
382
32
  // src/providers/elevenlabs-convai.ts
383
33
  init_esm_shims();
384
- import WebSocket2 from "ws";
34
+ import WebSocket from "ws";
385
35
  var ELEVENLABS_CONVAI_URL = "wss://api.elevenlabs.io/v1/convai/conversation";
386
36
  var ELEVENLABS_SIGNED_URL = "https://api.elevenlabs.io/v1/convai/conversation/get-signed-url";
387
37
  var AGENT_SILENCE_MS = 500;
@@ -503,8 +153,8 @@ var ElevenLabsConvAIAdapter = class _ElevenLabsConvAIAdapter {
503
153
  wsUrl = this.agentId ? `${ELEVENLABS_CONVAI_URL}?agent_id=${encodeURIComponent(this.agentId)}` : ELEVENLABS_CONVAI_URL;
504
154
  wsOptions = { headers: { "xi-api-key": this.apiKey } };
505
155
  }
506
- this.ws = new WebSocket2(wsUrl, wsOptions);
507
- await new Promise((resolve, reject) => {
156
+ this.ws = new WebSocket(wsUrl, wsOptions);
157
+ await new Promise((resolve2, reject) => {
508
158
  const timeout = setTimeout(
509
159
  () => reject(new Error("ElevenLabs ConvAI connect timeout")),
510
160
  15e3
@@ -528,7 +178,7 @@ var ElevenLabsConvAIAdapter = class _ElevenLabsConvAIAdapter {
528
178
  conversation_config_override: override
529
179
  };
530
180
  this.ws.send(JSON.stringify(config));
531
- resolve();
181
+ resolve2();
532
182
  });
533
183
  this.ws.once("error", (err) => {
534
184
  clearTimeout(timeout);
@@ -565,7 +215,7 @@ var ElevenLabsConvAIAdapter = class _ElevenLabsConvAIAdapter {
565
215
  }
566
216
  respondToPing(eventId, delayMs) {
567
217
  const send = () => {
568
- if (!this.ws || this.ws.readyState !== WebSocket2.OPEN) return;
218
+ if (!this.ws || this.ws.readyState !== WebSocket.OPEN) return;
569
219
  try {
570
220
  this.ws.send(JSON.stringify({ type: "pong", event_id: eventId }));
571
221
  } catch (err) {
@@ -662,7 +312,7 @@ var ElevenLabsConvAIAdapter = class _ElevenLabsConvAIAdapter {
662
312
  }
663
313
  /** Send a caller-side audio chunk to ConvAI as a base64 `user_audio_chunk`. */
664
314
  sendAudio(audioBytes) {
665
- if (!this.ws || this.ws.readyState !== WebSocket2.OPEN) return;
315
+ if (!this.ws || this.ws.readyState !== WebSocket.OPEN) return;
666
316
  this.ws.send(
667
317
  JSON.stringify({
668
318
  user_audio_chunk: audioBytes.toString("base64")
@@ -685,20 +335,20 @@ var ElevenLabsConvAIAdapter = class _ElevenLabsConvAIAdapter {
685
335
  return;
686
336
  }
687
337
  const ws = this.ws;
688
- this.closePromise = new Promise((resolve) => {
689
- if (ws.readyState === WebSocket2.CLOSED || ws.readyState === WebSocket2.CLOSING) {
690
- resolve();
338
+ this.closePromise = new Promise((resolve2) => {
339
+ if (ws.readyState === WebSocket.CLOSED || ws.readyState === WebSocket.CLOSING) {
340
+ resolve2();
691
341
  return;
692
342
  }
693
343
  const done = () => {
694
- resolve();
344
+ resolve2();
695
345
  };
696
346
  ws.once("close", done);
697
347
  ws.once("error", done);
698
348
  try {
699
349
  ws.close();
700
350
  } catch {
701
- resolve();
351
+ resolve2();
702
352
  }
703
353
  });
704
354
  try {
@@ -722,6 +372,8 @@ async function createTTS(agent) {
722
372
 
723
373
  // src/pricing.ts
724
374
  init_esm_shims();
375
+ var PRICING_VERSION = "2026.3";
376
+ var PRICING_LAST_UPDATED = "2026-05-08";
725
377
  var PricingUnit = {
726
378
  MINUTE: "minute",
727
379
  THOUSAND_CHARS: "1k_chars",
@@ -750,14 +402,26 @@ var DEFAULT_PRICING = {
750
402
  // STT — per minute of audio processed.
751
403
  deepgram: {
752
404
  unit: PricingUnit.MINUTE,
753
- // Default = Nova-3 streaming monolingual ($0.0077/min). Previous $0.0043
754
- // was the batch rate; streaming is ~80% more expensive.
755
- price: 77e-4,
405
+ // Default = Nova-3 streaming monolingual ($0.0048/min, current Pay-
406
+ // As-You-Go promotional rate). Source: https://deepgram.com/pricing
407
+ // (verified 2026-05-11). The promo replaces the standard $0.0077/min
408
+ // quoted at Nova-3 launch and is the rate customers actually pay
409
+ // today; revisit when Deepgram removes the "Limited-time promotional
410
+ // rates on streaming" banner.
411
+ price: 48e-4,
756
412
  models: {
757
- "nova-3": { price: 77e-4 },
758
- "nova-3-multilingual": { price: 92e-4 },
413
+ // Nova-3 family current flagship.
414
+ "nova-3": { price: 48e-4 },
415
+ "nova-3-multilingual": { price: 58e-4 },
416
+ // Flux family — new event-driven turn-taking STT (2026 launch).
417
+ flux: { price: 65e-4 },
418
+ "flux-english": { price: 65e-4 },
419
+ "flux-multilingual": { price: 78e-4 },
420
+ // Legacy Nova-2 / Nova-1 — still supported but no longer featured on
421
+ // the public pricing page; rates kept as last verified.
759
422
  "nova-2": { price: 58e-4 },
760
423
  nova: { price: 43e-4 },
424
+ // Whisper Cloud via Deepgram — separate tier.
761
425
  "whisper-large": { price: 48e-4 },
762
426
  "whisper-medium": { price: 48e-4 }
763
427
  }
@@ -796,27 +460,30 @@ var DEFAULT_PRICING = {
796
460
  // retired; users were being over-billed ~4.3x.
797
461
  speechmatics: { unit: PricingUnit.MINUTE, price: 4e-3 },
798
462
  // TTS — per 1,000 characters synthesized.
463
+ // Source: https://elevenlabs.io/pricing/api (verified 2026-05-11). The
464
+ // per-1K-character API/overage rate is flat across all plan tiers (Free
465
+ // through Business); only the included character bundle varies by plan.
799
466
  elevenlabs: {
800
467
  unit: PricingUnit.THOUSAND_CHARS,
801
- // Default = eleven_flash_v2_5 (Patter's default model) at $0.06/1k.
802
- price: 0.06,
468
+ // Default = eleven_flash_v2_5 (Patter's default model) at $0.05/1k.
469
+ price: 0.05,
803
470
  models: {
804
- eleven_flash_v2_5: { price: 0.06 },
471
+ eleven_flash_v2_5: { price: 0.05 },
805
472
  eleven_turbo_v2_5: { price: 0.05 },
806
- eleven_multilingual_v2: { price: 0.18 },
807
- eleven_monolingual_v1: { price: 0.18 },
808
- eleven_v3: { price: 0.3 }
473
+ eleven_multilingual_v2: { price: 0.1 },
474
+ eleven_monolingual_v1: { price: 0.1 },
475
+ eleven_v3: { price: 0.1 }
809
476
  }
810
477
  },
811
478
  // ElevenLabs WebSocket streaming TTS shares pricing with REST.
812
479
  elevenlabs_ws: {
813
480
  unit: PricingUnit.THOUSAND_CHARS,
814
- price: 0.06,
481
+ price: 0.05,
815
482
  models: {
816
- eleven_flash_v2_5: { price: 0.06 },
483
+ eleven_flash_v2_5: { price: 0.05 },
817
484
  eleven_turbo_v2_5: { price: 0.05 },
818
- eleven_multilingual_v2: { price: 0.18 },
819
- eleven_v3: { price: 0.3 }
485
+ eleven_multilingual_v2: { price: 0.1 },
486
+ eleven_v3: { price: 0.1 }
820
487
  }
821
488
  },
822
489
  openai_tts: {
@@ -946,7 +613,24 @@ var DEFAULT_PRICING = {
946
613
  // calls on a local number). For US toll-free inbound ($0.022/min) or US
947
614
  // outbound local ($0.0140/min), override via Patter({ pricing: { twilio: {...} } }).
948
615
  twilio: { unit: PricingUnit.MINUTE, price: 85e-4 },
949
- telnyx: { unit: PricingUnit.MINUTE, price: 7e-3 }
616
+ // Telnyx direction-aware rates as of 2026-05-11.
617
+ // Sources:
618
+ // https://telnyx.com/pricing/elastic-sip
619
+ // https://telnyx.com/pricing/voice-api
620
+ // US inbound (DID / local termination, Pay-As-You-Go): $0.0035/min
621
+ // US outbound (Pay-As-You-Go, mid-range of $0.005-$0.009): $0.007/min
622
+ // Billing granularity is per-MINUTE (Telnyx rounds partial minutes up
623
+ // on the invoice; prior internal docs incorrectly claimed per-second).
624
+ // The legacy ``telnyx`` key is preserved at the outbound rate as a
625
+ // safe fallback for users who override ``pricing: { telnyx: {...} }``
626
+ // without knowing the direction; the metrics layer currently uses
627
+ // this flat key (direction is not threaded through to
628
+ // ``calculateTelephonyCost``). Direction-aware billing can be enabled
629
+ // by override-only: ``new Patter({ pricing: { telnyx: { unit: 'minute',
630
+ // price: 0.0035 } } })`` to bill all inbound at the lower rate.
631
+ telnyx: { unit: PricingUnit.MINUTE, price: 7e-3 },
632
+ telnyx_inbound: { unit: PricingUnit.MINUTE, price: 35e-4 },
633
+ telnyx_outbound: { unit: PricingUnit.MINUTE, price: 7e-3 }
950
634
  };
951
635
  function cloneProviderEntry(entry) {
952
636
  const out = { ...entry };
@@ -1083,16 +767,18 @@ var llmPricing = {
1083
767
  "gemma2-9b-it": { input: 0.2, output: 0.2 }
1084
768
  },
1085
769
  cerebras: {
1086
- // Rates as of 2026-05-08; verify against cerebras.net/inference.
1087
- // ``gpt-oss-120b`` is the Patter default for Cerebras (set in 0.5.4).
1088
- // On WSE-3 hardware every model size saturates the downstream TTS
1089
- // consumption rate (~150-300 tok/sec), so the 120B price stays in line
1090
- // with the 70B tier rather than scaling with weight count.
1091
- "gpt-oss-120b": { input: 0.85, output: 1.2 },
1092
- "llama3.1-8b": { input: 0.1, output: 0.2 },
770
+ // Rates as of 2026-05-11 verified against the canonical per-model docs
771
+ // pages at ``https://inference-docs.cerebras.ai/models/<model>``. The
772
+ // previous 2026-05-08 update overcharged across the board (gpt-oss-120b
773
+ // 2.4x input, qwen-3-235b 1.67x input) because it conflated the launch
774
+ // blog quotes with the "Exploration pricing" banner now shown on each
775
+ // model page. Parity with libraries/python/getpatter/pricing.py.
776
+ "gpt-oss-120b": { input: 0.35, output: 0.75 },
777
+ "llama3.1-8b": { input: 0.1, output: 0.1 },
1093
778
  "llama-3.3-70b": { input: 0.85, output: 1.2 },
1094
779
  "qwen-3-32b": { input: 0.4, output: 0.8 },
1095
- "qwen-3-235b-a22b-instruct-2507": { input: 1, output: 1.5 },
780
+ "qwen-3-235b-a22b-instruct-2507": { input: 0.6, output: 1.2 },
781
+ "qwen-3-coder-480b": { input: 2, output: 2 },
1096
782
  "zai-glm-4.7": { input: 0.85, output: 1.2 }
1097
783
  },
1098
784
  // OpenAI Chat Completions (non-Realtime) — mirrors the Python SDK pricing table.
@@ -1137,12 +823,45 @@ function calculateTelephonyCost(provider2, durationSeconds, pricing) {
1137
823
  // src/dashboard/store.ts
1138
824
  init_esm_shims();
1139
825
  import { EventEmitter } from "events";
826
+ import * as fs2 from "fs";
827
+ import * as path2 from "path";
828
+
829
+ // src/version.ts
830
+ init_esm_shims();
1140
831
  import * as fs from "fs";
1141
832
  import * as path from "path";
833
+ function readVersion() {
834
+ try {
835
+ const pkgPath = path.resolve(__dirname, "..", "package.json");
836
+ const pkg = JSON.parse(fs.readFileSync(pkgPath, "utf8"));
837
+ return typeof pkg.version === "string" && pkg.version.length > 0 ? pkg.version : "";
838
+ } catch {
839
+ return "";
840
+ }
841
+ }
842
+ var VERSION = readVersion();
843
+
844
+ // src/dashboard/store.ts
845
+ function sdkVersion() {
846
+ return VERSION;
847
+ }
1142
848
  var MetricsStore = class extends EventEmitter {
1143
849
  maxCalls;
1144
850
  calls = [];
1145
851
  activeCalls = /* @__PURE__ */ new Map();
852
+ /**
853
+ * User-driven soft delete: call_ids the operator removed from the
854
+ * dashboard view. The on-disk artefacts written by ``CallLogger``
855
+ * (``metadata.json``, ``transcript.jsonl``) are intentionally NOT
856
+ * touched — they serve as the durable backup. All read paths
857
+ * (``getCalls`` / ``getCall`` / ``getAggregates`` / ``getCallsInRange``
858
+ * / ``hydrate``) filter against this set so the call is invisible
859
+ * to the UI and excluded from rolling metrics. Populated from
860
+ * ``<logRoot>/.deleted_call_ids.json`` on hydrate so deletions
861
+ * survive a process restart. Parity with Python.
862
+ */
863
+ deletedCallIds = /* @__PURE__ */ new Set();
864
+ deletedIdsPath = null;
1146
865
  /**
1147
866
  * Accepts either a numeric ``maxCalls`` (legacy positional — matches the
1148
867
  * original TS API) or an options object ``{ maxCalls }`` to align with the
@@ -1235,6 +954,8 @@ var MetricsStore = class extends EventEmitter {
1235
954
  ended_at: Date.now() / 1e3,
1236
955
  status,
1237
956
  metrics: null,
957
+ ...active.turns && active.turns.length > 0 ? { turns: active.turns } : {},
958
+ ...active.transcript && active.transcript.length > 0 ? { transcript: active.transcript } : {},
1238
959
  ...extra
1239
960
  };
1240
961
  this.activeCalls.delete(callId);
@@ -1263,6 +984,21 @@ var MetricsStore = class extends EventEmitter {
1263
984
  if (active) {
1264
985
  if (!active.turns) active.turns = [];
1265
986
  active.turns.push(turn);
987
+ if (!active.transcript) active.transcript = [];
988
+ const turnRecord = turn;
989
+ const userText = typeof turnRecord.user_text === "string" ? turnRecord.user_text : "";
990
+ const agentText = typeof turnRecord.agent_text === "string" ? turnRecord.agent_text : "";
991
+ const ts = typeof turnRecord.timestamp === "number" ? turnRecord.timestamp : Date.now() / 1e3;
992
+ if (userText.length > 0) {
993
+ active.transcript.push({ role: "user", text: userText, timestamp: ts });
994
+ }
995
+ if (agentText.length > 0 && agentText !== "[interrupted]") {
996
+ active.transcript.push({
997
+ role: "assistant",
998
+ text: agentText,
999
+ timestamp: ts
1000
+ });
1001
+ }
1266
1002
  }
1267
1003
  this.publish("turn_complete", { call_id: callId, turn });
1268
1004
  }
@@ -1272,40 +1008,140 @@ var MetricsStore = class extends EventEmitter {
1272
1008
  if (!callId) return;
1273
1009
  const active = this.activeCalls.get(callId);
1274
1010
  this.activeCalls.delete(callId);
1275
- const activeStatus = active?.status;
1276
- const resolvedStatus = activeStatus && activeStatus !== "in-progress" ? activeStatus : "completed";
1011
+ let existingIdx = -1;
1012
+ if (active === void 0) {
1013
+ for (let i = this.calls.length - 1; i >= 0; i--) {
1014
+ if (this.calls[i].call_id === callId) {
1015
+ existingIdx = i;
1016
+ break;
1017
+ }
1018
+ }
1019
+ }
1020
+ const existing = existingIdx >= 0 ? this.calls[existingIdx] : void 0;
1021
+ const priorStatus = active?.status ?? existing?.status;
1022
+ const resolvedStatus = priorStatus && priorStatus !== "in-progress" ? priorStatus : "completed";
1023
+ const dataTranscript = data.transcript;
1024
+ const resolvedTranscript = dataTranscript && dataTranscript.length > 0 ? dataTranscript : active?.transcript && active.transcript.length > 0 ? active.transcript : existing?.transcript && existing.transcript.length > 0 ? existing.transcript : [];
1025
+ const resolvedTurns = active?.turns && active.turns.length > 0 ? active.turns : existing?.turns && existing.turns.length > 0 ? existing.turns : void 0;
1277
1026
  const entry = {
1278
1027
  call_id: callId,
1279
- caller: data.caller || active?.caller || "",
1280
- callee: data.callee || active?.callee || "",
1281
- direction: active?.direction || data.direction || "inbound",
1282
- started_at: active?.started_at || 0,
1028
+ caller: data.caller || active?.caller || existing?.caller || "",
1029
+ callee: data.callee || active?.callee || existing?.callee || "",
1030
+ direction: active?.direction || existing?.direction || data.direction || "inbound",
1031
+ started_at: active?.started_at || existing?.started_at || 0,
1283
1032
  ended_at: Date.now() / 1e3,
1284
- transcript: data.transcript || [],
1033
+ transcript: resolvedTranscript,
1034
+ ...resolvedTurns ? { turns: resolvedTurns } : {},
1285
1035
  status: resolvedStatus,
1286
- metrics: metrics ?? null
1036
+ metrics: metrics ?? existing?.metrics ?? null
1287
1037
  };
1288
- this.calls.push(entry);
1289
- if (this.calls.length > this.maxCalls) {
1290
- this.calls = this.calls.slice(-this.maxCalls);
1038
+ if (existingIdx >= 0) {
1039
+ this.calls[existingIdx] = entry;
1040
+ } else {
1041
+ this.calls.push(entry);
1042
+ if (this.calls.length > this.maxCalls) {
1043
+ this.calls = this.calls.slice(-this.maxCalls);
1044
+ }
1291
1045
  }
1292
1046
  this.publish("call_end", {
1293
1047
  call_id: callId,
1294
1048
  metrics: entry.metrics ?? null
1295
1049
  });
1296
1050
  }
1297
- /** Return a window of completed calls in newest-first order. */
1051
+ /**
1052
+ * Return a window of completed calls in newest-first order.
1053
+ *
1054
+ * Soft-deleted call_ids (see ``deleteCalls``) are filtered out so the
1055
+ * dashboard never re-shows a row the user removed. The on-disk
1056
+ * artefacts are intentionally preserved as a backup.
1057
+ */
1298
1058
  getCalls(limit = 50, offset = 0) {
1299
- const ordered = [...this.calls].reverse();
1059
+ const visible = this.calls.filter((c) => !this.deletedCallIds.has(c.call_id));
1060
+ const ordered = visible.reverse();
1300
1061
  return ordered.slice(offset, offset + limit);
1301
1062
  }
1302
- /** Look up a completed call by id (newest match wins). */
1063
+ /**
1064
+ * Look up a completed call by id (newest match wins).
1065
+ *
1066
+ * Soft-deleted call_ids resolve to ``null`` so the SPA's detail pane
1067
+ * cannot render a row the user removed.
1068
+ */
1303
1069
  getCall(callId) {
1070
+ if (this.deletedCallIds.has(callId)) return null;
1304
1071
  for (let i = this.calls.length - 1; i >= 0; i--) {
1305
1072
  if (this.calls[i].call_id === callId) return this.calls[i];
1306
1073
  }
1307
1074
  return null;
1308
1075
  }
1076
+ /**
1077
+ * Soft-delete one or more calls from the dashboard view.
1078
+ *
1079
+ * Adds each ``call_id`` to an in-memory set. Subsequent reads via
1080
+ * ``getCalls`` / ``getCall`` / ``getAggregates`` / ``getCallsInRange``
1081
+ * exclude the deleted ids, so rolling metrics (avg latency, total
1082
+ * spend) are recomputed without them. The on-disk
1083
+ * ``metadata.json`` / ``transcript.jsonl`` files written by
1084
+ * ``CallLogger`` are NOT touched — they serve as a durable backup
1085
+ * the operator can audit outside the dashboard.
1086
+ *
1087
+ * Active calls are never deletable. A call_id that is currently
1088
+ * in ``activeCalls`` is silently skipped so a mid-call delete
1089
+ * from the UI cannot orphan the live transcript pane.
1090
+ *
1091
+ * Persisted to ``<logRoot>/.deleted_call_ids.json`` (best-effort)
1092
+ * when ``hydrate()`` has been called with a log root. Parity with
1093
+ * Python ``delete_calls``.
1094
+ *
1095
+ * @returns The list of call_ids actually accepted as deleted.
1096
+ */
1097
+ deleteCalls(callIds) {
1098
+ const ids = /* @__PURE__ */ new Set();
1099
+ for (const cid of callIds || []) {
1100
+ if (typeof cid === "string" && cid && !this.activeCalls.has(cid)) {
1101
+ ids.add(cid);
1102
+ }
1103
+ }
1104
+ if (ids.size === 0) return [];
1105
+ const accepted = [];
1106
+ for (const cid of ids) {
1107
+ if (!this.deletedCallIds.has(cid)) {
1108
+ this.deletedCallIds.add(cid);
1109
+ accepted.push(cid);
1110
+ }
1111
+ }
1112
+ if (accepted.length === 0) return [];
1113
+ accepted.sort();
1114
+ this.persistDeletedIds();
1115
+ this.publish("calls_deleted", { call_ids: accepted });
1116
+ return accepted;
1117
+ }
1118
+ /** Whether ``callId`` was soft-deleted from the dashboard. */
1119
+ isDeleted(callId) {
1120
+ return this.deletedCallIds.has(callId);
1121
+ }
1122
+ /** Snapshot of soft-deleted call_ids (sorted). */
1123
+ getDeletedCallIds() {
1124
+ return Array.from(this.deletedCallIds).sort();
1125
+ }
1126
+ /** Atomically persist the deleted-ids set to disk. Best-effort. */
1127
+ persistDeletedIds() {
1128
+ if (this.deletedIdsPath === null) return;
1129
+ try {
1130
+ const dir = path2.dirname(this.deletedIdsPath);
1131
+ fs2.mkdirSync(dir, { recursive: true });
1132
+ const tmp = this.deletedIdsPath + ".tmp";
1133
+ const payload = {
1134
+ version: 1,
1135
+ deleted_call_ids: Array.from(this.deletedCallIds).sort()
1136
+ };
1137
+ fs2.writeFileSync(tmp, JSON.stringify(payload, null, 2), "utf8");
1138
+ fs2.renameSync(tmp, this.deletedIdsPath);
1139
+ } catch (err) {
1140
+ getLogger().debug(
1141
+ `MetricsStore.persistDeletedIds: ${String(err)}`
1142
+ );
1143
+ }
1144
+ }
1309
1145
  /** Look up an active call by id (returns undefined if not active or unknown). */
1310
1146
  getActive(callId) {
1311
1147
  return this.activeCalls.get(callId);
@@ -1314,9 +1150,17 @@ var MetricsStore = class extends EventEmitter {
1314
1150
  getActiveCalls() {
1315
1151
  return Array.from(this.activeCalls.values());
1316
1152
  }
1317
- /** Compute summary statistics across the buffered call history. */
1153
+ /**
1154
+ * Compute summary statistics across the buffered call history.
1155
+ *
1156
+ * Soft-deleted calls are excluded so rolling metrics (avg latency,
1157
+ * total spend) match exactly what the operator sees in the call list.
1158
+ */
1318
1159
  getAggregates() {
1319
- const totalCalls = this.calls.length;
1160
+ const visible = this.calls.filter(
1161
+ (c) => !this.deletedCallIds.has(c.call_id)
1162
+ );
1163
+ const totalCalls = visible.length;
1320
1164
  if (totalCalls === 0) {
1321
1165
  return {
1322
1166
  total_calls: 0,
@@ -1324,7 +1168,8 @@ var MetricsStore = class extends EventEmitter {
1324
1168
  avg_duration: 0,
1325
1169
  avg_latency_ms: 0,
1326
1170
  cost_breakdown: { stt: 0, tts: 0, llm: 0, telephony: 0 },
1327
- active_calls: this.activeCalls.size
1171
+ active_calls: this.activeCalls.size,
1172
+ sdk_version: sdkVersion()
1328
1173
  };
1329
1174
  }
1330
1175
  let totalCost = 0;
@@ -1335,7 +1180,7 @@ var MetricsStore = class extends EventEmitter {
1335
1180
  let costTts = 0;
1336
1181
  let costLlm = 0;
1337
1182
  let costTel = 0;
1338
- for (const call of this.calls) {
1183
+ for (const call of visible) {
1339
1184
  const m = call.metrics;
1340
1185
  if (!m) continue;
1341
1186
  const cost = m.cost || {};
@@ -1346,7 +1191,7 @@ var MetricsStore = class extends EventEmitter {
1346
1191
  costTel += cost.telephony || 0;
1347
1192
  totalDuration += m.duration_seconds || 0;
1348
1193
  const avgLat = m.latency_avg || {};
1349
- const tMs = avgLat.total_ms || 0;
1194
+ const tMs = avgLat.agent_response_ms || avgLat.total_ms || 0;
1350
1195
  if (tMs > 0) {
1351
1196
  totalLatency += tMs;
1352
1197
  latencyCount++;
@@ -1363,21 +1208,30 @@ var MetricsStore = class extends EventEmitter {
1363
1208
  llm: Math.round(costLlm * 1e6) / 1e6,
1364
1209
  telephony: Math.round(costTel * 1e6) / 1e6
1365
1210
  },
1366
- active_calls: this.activeCalls.size
1211
+ active_calls: this.activeCalls.size,
1212
+ sdk_version: sdkVersion()
1367
1213
  };
1368
1214
  }
1369
- /** Return calls whose `started_at` falls within `[fromTs, toTs]` (Unix seconds). */
1215
+ /**
1216
+ * Return calls whose `started_at` falls within `[fromTs, toTs]` (Unix
1217
+ * seconds). Soft-deleted calls are filtered out.
1218
+ */
1370
1219
  getCallsInRange(fromTs = 0, toTs = 0) {
1371
1220
  return this.calls.filter((call) => {
1221
+ if (this.deletedCallIds.has(call.call_id)) return false;
1372
1222
  const started = call.started_at || 0;
1373
1223
  if (fromTs && started < fromTs) return false;
1374
1224
  if (toTs && started > toTs) return false;
1375
1225
  return true;
1376
1226
  });
1377
1227
  }
1378
- /** Number of completed calls currently in the ring buffer. */
1228
+ /** Number of completed (non-deleted) calls currently in the ring buffer. */
1379
1229
  get callCount() {
1380
- return this.calls.length;
1230
+ let n = 0;
1231
+ for (const c of this.calls) {
1232
+ if (!this.deletedCallIds.has(c.call_id)) n++;
1233
+ }
1234
+ return n;
1381
1235
  }
1382
1236
  /**
1383
1237
  * Rebuild the in-memory call list from `metadata.json` files written by
@@ -1391,19 +1245,37 @@ var MetricsStore = class extends EventEmitter {
1391
1245
  */
1392
1246
  hydrate(logRoot) {
1393
1247
  if (!logRoot) return 0;
1394
- const callsRoot = path.join(logRoot, "calls");
1395
- if (!fs.existsSync(callsRoot)) return 0;
1248
+ const deletedIdsPath = path2.join(logRoot, ".deleted_call_ids.json");
1249
+ this.deletedIdsPath = deletedIdsPath;
1250
+ if (fs2.existsSync(deletedIdsPath)) {
1251
+ try {
1252
+ const raw = fs2.readFileSync(deletedIdsPath, "utf8");
1253
+ const payload = JSON.parse(raw);
1254
+ const arr = Array.isArray(payload.deleted_call_ids) ? payload.deleted_call_ids : [];
1255
+ for (const cid of arr) {
1256
+ if (typeof cid === "string" && cid.length > 0) {
1257
+ this.deletedCallIds.add(cid);
1258
+ }
1259
+ }
1260
+ } catch (err) {
1261
+ getLogger().debug(
1262
+ `MetricsStore.hydrate: skipping ${deletedIdsPath}: ${String(err)}`
1263
+ );
1264
+ }
1265
+ }
1266
+ const callsRoot = path2.join(logRoot, "calls");
1267
+ if (!fs2.existsSync(callsRoot)) return 0;
1396
1268
  const collected = [];
1397
1269
  const seen = new Set(this.calls.map((c) => c.call_id));
1398
1270
  const walk = (dir, depth) => {
1399
1271
  let entries;
1400
1272
  try {
1401
- entries = fs.readdirSync(dir, { withFileTypes: true });
1273
+ entries = fs2.readdirSync(dir, { withFileTypes: true });
1402
1274
  } catch {
1403
1275
  return;
1404
1276
  }
1405
1277
  for (const entry of entries) {
1406
- const childPath = path.join(dir, entry.name);
1278
+ const childPath = path2.join(dir, entry.name);
1407
1279
  if (depth < 3) {
1408
1280
  if (entry.isDirectory() && /^\d+$/.test(entry.name)) {
1409
1281
  walk(childPath, depth + 1);
@@ -1411,10 +1283,10 @@ var MetricsStore = class extends EventEmitter {
1411
1283
  continue;
1412
1284
  }
1413
1285
  if (!entry.isDirectory()) continue;
1414
- const metadataPath = path.join(childPath, "metadata.json");
1415
- if (!fs.existsSync(metadataPath)) continue;
1286
+ const metadataPath = path2.join(childPath, "metadata.json");
1287
+ if (!fs2.existsSync(metadataPath)) continue;
1416
1288
  try {
1417
- const raw = fs.readFileSync(metadataPath, "utf8");
1289
+ const raw = fs2.readFileSync(metadataPath, "utf8");
1418
1290
  const meta = JSON.parse(raw);
1419
1291
  const callId = meta.call_id || entry.name;
1420
1292
  if (!callId || seen.has(callId)) continue;
@@ -1425,6 +1297,12 @@ var MetricsStore = class extends EventEmitter {
1425
1297
  );
1426
1298
  continue;
1427
1299
  }
1300
+ if (!record.transcript || record.transcript.length === 0) {
1301
+ const fromJsonl = loadTranscriptJsonl(
1302
+ path2.join(childPath, "transcript.jsonl")
1303
+ );
1304
+ if (fromJsonl.length > 0) record.transcript = fromJsonl;
1305
+ }
1428
1306
  collected.push(record);
1429
1307
  seen.add(callId);
1430
1308
  } catch (err) {
@@ -1446,12 +1324,45 @@ var MetricsStore = class extends EventEmitter {
1446
1324
  return collected.length;
1447
1325
  }
1448
1326
  };
1327
+ function metricsFromTopLevel(meta) {
1328
+ const cost = meta.cost && typeof meta.cost === "object" ? meta.cost : null;
1329
+ const latency = meta.latency && typeof meta.latency === "object" ? meta.latency : null;
1330
+ const durationMs = meta.duration_ms;
1331
+ const telephony = meta.telephony_provider;
1332
+ if (cost === null && latency === null && durationMs == null && !telephony) {
1333
+ return null;
1334
+ }
1335
+ const out = {};
1336
+ if (cost !== null) out.cost = cost;
1337
+ if (latency !== null) {
1338
+ const fullAvg = latency.avg && typeof latency.avg === "object" ? latency.avg : null;
1339
+ const fullP50 = latency.p50 && typeof latency.p50 === "object" ? latency.p50 : null;
1340
+ const fullP95 = latency.p95 && typeof latency.p95 === "object" ? latency.p95 : null;
1341
+ const fullP99 = latency.p99 && typeof latency.p99 === "object" ? latency.p99 : null;
1342
+ if (fullAvg) out.latency_avg = fullAvg;
1343
+ if (fullP50) out.latency_p50 = fullP50;
1344
+ if (fullP95) out.latency_p95 = fullP95;
1345
+ if (fullP99) out.latency_p99 = fullP99;
1346
+ if (!fullAvg && !fullP50 && !fullP95) {
1347
+ const totalMs = typeof latency.p95_ms === "number" && latency.p95_ms || typeof latency.p50_ms === "number" && latency.p50_ms || 0;
1348
+ out.latency_avg = { total_ms: totalMs };
1349
+ }
1350
+ out.latency = latency;
1351
+ }
1352
+ if (typeof durationMs === "number" && durationMs > 0) {
1353
+ out.duration_seconds = durationMs / 1e3;
1354
+ }
1355
+ if (typeof telephony === "string" && telephony) {
1356
+ out.telephony_provider = telephony;
1357
+ }
1358
+ return Object.keys(out).length > 0 ? out : null;
1359
+ }
1449
1360
  function metadataToCallRecord(callId, meta) {
1450
1361
  const startedAt = parseTimestamp(meta.started_at);
1451
1362
  if (startedAt === null) return null;
1452
1363
  const endedAt = parseTimestamp(meta.ended_at);
1453
1364
  const status = meta.status || "completed";
1454
- const metrics = meta.metrics && typeof meta.metrics === "object" ? meta.metrics : null;
1365
+ const metrics = meta.metrics && typeof meta.metrics === "object" ? meta.metrics : metricsFromTopLevel(meta);
1455
1366
  const transcript = Array.isArray(meta.transcript) ? meta.transcript : [];
1456
1367
  return {
1457
1368
  call_id: callId,
@@ -1465,6 +1376,36 @@ function metadataToCallRecord(callId, meta) {
1465
1376
  transcript
1466
1377
  };
1467
1378
  }
1379
+ function loadTranscriptJsonl(filePath) {
1380
+ try {
1381
+ if (!fs2.existsSync(filePath)) return [];
1382
+ const raw = fs2.readFileSync(filePath, "utf8");
1383
+ const lines = raw.split("\n").filter((l) => l.trim().length > 0);
1384
+ const out = [];
1385
+ for (const line of lines) {
1386
+ let row;
1387
+ try {
1388
+ row = JSON.parse(line);
1389
+ } catch {
1390
+ continue;
1391
+ }
1392
+ const tsIso = typeof row.ts === "string" ? Date.parse(row.ts) : NaN;
1393
+ const tsNumeric = typeof row.timestamp === "number" ? row.timestamp * 1e3 : NaN;
1394
+ const timestamp = Number.isFinite(tsIso) ? tsIso : Number.isFinite(tsNumeric) ? tsNumeric : 0;
1395
+ const userText = typeof row.user_text === "string" ? row.user_text : "";
1396
+ const agentText = typeof row.agent_text === "string" ? row.agent_text : "";
1397
+ if (userText.length > 0) {
1398
+ out.push({ role: "user", text: userText, timestamp });
1399
+ }
1400
+ if (agentText.length > 0 && agentText !== "[interrupted]") {
1401
+ out.push({ role: "assistant", text: agentText, timestamp });
1402
+ }
1403
+ }
1404
+ return out;
1405
+ } catch {
1406
+ return [];
1407
+ }
1408
+ }
1468
1409
  function parseTimestamp(raw) {
1469
1410
  if (typeof raw === "number") {
1470
1411
  return Number.isFinite(raw) ? raw : null;
@@ -1572,8 +1513,8 @@ function csvEscape(value) {
1572
1513
 
1573
1514
  // src/dashboard/ui.ts
1574
1515
  init_esm_shims();
1575
- import { readFileSync as readFileSync2 } from "fs";
1576
- import { join as join2, dirname } from "path";
1516
+ import { readFileSync as readFileSync3 } from "fs";
1517
+ import { join as join2, dirname as dirname2 } from "path";
1577
1518
  var FALLBACK_HTML = `<!doctype html>
1578
1519
  <html><head><meta charset="utf-8"><title>Patter dashboard</title></head>
1579
1520
  <body style="font-family:ui-sans-serif,system-ui;padding:2rem;color:#1a1a1a">
@@ -1583,15 +1524,15 @@ Run <code>cd dashboard-app &amp;&amp; npm run build &amp;&amp; npm run sync</cod
1583
1524
  from the repo root to regenerate it.</p>
1584
1525
  </body></html>`;
1585
1526
  function loadDashboardHtml() {
1586
- const here = typeof __dirname !== "undefined" ? __dirname : dirname(".");
1527
+ const here = typeof __dirname !== "undefined" ? __dirname : dirname2(".");
1587
1528
  const candidates = [
1588
1529
  join2(here, "ui.html"),
1589
1530
  join2(here, "dashboard", "ui.html"),
1590
1531
  join2(here, "..", "dashboard", "ui.html")
1591
1532
  ];
1592
- for (const path3 of candidates) {
1533
+ for (const path4 of candidates) {
1593
1534
  try {
1594
- return readFileSync2(path3, "utf8");
1535
+ return readFileSync3(path4, "utf8");
1595
1536
  } catch {
1596
1537
  }
1597
1538
  }
@@ -1611,7 +1552,8 @@ function mountDashboard(app, store, token = "") {
1611
1552
  res.json(store.getCalls(limit, offset));
1612
1553
  });
1613
1554
  app.get("/api/dashboard/calls/:callId", auth, (req, res) => {
1614
- const call = store.getCall(String(req.params.callId));
1555
+ const callId = String(req.params.callId);
1556
+ const call = store.getCall(callId) ?? store.getActive(callId);
1615
1557
  if (!call) {
1616
1558
  res.status(404).json({ error: "Not found" });
1617
1559
  return;
@@ -1624,6 +1566,24 @@ function mountDashboard(app, store, token = "") {
1624
1566
  app.get("/api/dashboard/aggregates", auth, (_req, res) => {
1625
1567
  res.json(store.getAggregates());
1626
1568
  });
1569
+ app.delete("/api/dashboard/calls/:callId", auth, (req, res) => {
1570
+ const callId = String(req.params.callId);
1571
+ const accepted = store.deleteCalls([callId]);
1572
+ res.json({ deleted: accepted, count: accepted.length });
1573
+ });
1574
+ app.post("/api/dashboard/calls/delete", auth, (req, res) => {
1575
+ const body = req.body ?? {};
1576
+ const raw = body.call_ids;
1577
+ if (!Array.isArray(raw)) {
1578
+ res.status(400).json({ error: "Expected JSON body { 'call_ids': [...] }" });
1579
+ return;
1580
+ }
1581
+ const ids = raw.filter(
1582
+ (cid) => typeof cid === "string" && cid.length > 0
1583
+ );
1584
+ const accepted = store.deleteCalls(ids);
1585
+ res.json({ deleted: accepted, count: accepted.length });
1586
+ });
1627
1587
  app.get("/api/dashboard/events", auth, (req, res) => {
1628
1588
  res.writeHead(200, {
1629
1589
  "Content-Type": "text/event-stream",
@@ -1696,7 +1656,8 @@ function mountApi(app, store, token = "") {
1696
1656
  res.json({ data: active, count: active.length });
1697
1657
  });
1698
1658
  app.get("/api/v1/calls/:callId", auth, (req, res) => {
1699
- const call = store.getCall(String(req.params.callId));
1659
+ const callId = String(req.params.callId);
1660
+ const call = store.getCall(callId) ?? store.getActive(callId);
1700
1661
  if (!call) {
1701
1662
  res.status(404).json({ error: "Call not found" });
1702
1663
  return;
@@ -1868,8 +1829,8 @@ var RemoteMessageHandler = class {
1868
1829
  "WebSocket URL uses unencrypted ws:// \u2014 call transcripts and phone numbers will be sent in plaintext. Use wss:// in production."
1869
1830
  );
1870
1831
  }
1871
- const { WebSocket: WebSocket4 } = await import("ws");
1872
- const ws = new WebSocket4(url);
1832
+ const { WebSocket: WebSocket3 } = await import("ws");
1833
+ const ws = new WebSocket3(url);
1873
1834
  const chunks = [];
1874
1835
  let done = false;
1875
1836
  let error = null;
@@ -1923,10 +1884,10 @@ var RemoteMessageHandler = class {
1923
1884
  }
1924
1885
  });
1925
1886
  try {
1926
- await new Promise((resolve, reject) => {
1887
+ await new Promise((resolve2, reject) => {
1927
1888
  ws.on("open", () => {
1928
1889
  ws.send(JSON.stringify(data));
1929
- resolve();
1890
+ resolve2();
1930
1891
  });
1931
1892
  ws.on("error", (err) => {
1932
1893
  reject(err);
@@ -1936,11 +1897,11 @@ var RemoteMessageHandler = class {
1936
1897
  yield chunks.shift();
1937
1898
  }
1938
1899
  while (!done && !error) {
1939
- const text = await new Promise((resolve) => {
1900
+ const text = await new Promise((resolve2) => {
1940
1901
  if (chunks.length > 0) {
1941
- resolve(chunks.shift());
1902
+ resolve2(chunks.shift());
1942
1903
  } else {
1943
- resolveNext = resolve;
1904
+ resolveNext = resolve2;
1944
1905
  }
1945
1906
  });
1946
1907
  if (text === null) break;
@@ -1967,7 +1928,7 @@ init_esm_shims();
1967
1928
 
1968
1929
  // src/providers/deepgram-stt.ts
1969
1930
  init_esm_shims();
1970
- import WebSocket3 from "ws";
1931
+ import WebSocket2 from "ws";
1971
1932
 
1972
1933
  // src/errors.ts
1973
1934
  init_esm_shims();
@@ -2060,6 +2021,8 @@ var FINALIZE_DRAIN_MS = 100;
2060
2021
  var CLOSE_LATENCY_BUDGET_MS = 500;
2061
2022
  var RECONNECT_CLOSE_CODES = /* @__PURE__ */ new Set([1006, 1011]);
2062
2023
  var DeepgramSTT = class _DeepgramSTT {
2024
+ /** Stable pricing/dashboard key — read by stream-handler/metrics. */
2025
+ static providerKey = "deepgram";
2063
2026
  ws = null;
2064
2027
  transcriptCallbacks = /* @__PURE__ */ new Set();
2065
2028
  errorCallbacks = /* @__PURE__ */ new Set();
@@ -2120,6 +2083,64 @@ var DeepgramSTT = class _DeepgramSTT {
2120
2083
  }
2121
2084
  return `${DEEPGRAM_WS_URL}?${params.toString()}`;
2122
2085
  }
2086
+ /**
2087
+ * Pre-call WebSocket warmup for the Deepgram `/v1/listen` endpoint.
2088
+ *
2089
+ * Opens the WS (full DNS + TLS + auth handshake), idles ~250 ms so the
2090
+ * provider edge keeps the session warm in its routing table, then
2091
+ * closes cleanly. By the time `connect()` is invoked at call-pickup
2092
+ * the DNS resolver is hot, the TCP+TLS session is in the connection
2093
+ * pool, and recent WS auth is still warm at Deepgram's edge — net
2094
+ * wire time saving of 200-500 ms vs a cold WS open.
2095
+ *
2096
+ * Billing safety: Deepgram bills on streamed audio seconds (per
2097
+ * https://deepgram.com/pricing). Opening + closing the WebSocket
2098
+ * without sending any audio frames does not consume billable seconds.
2099
+ * Best-effort: any failure is logged at debug level and never raised.
2100
+ */
2101
+ async warmup() {
2102
+ const params = new URLSearchParams({
2103
+ model: this.model,
2104
+ language: this.language,
2105
+ encoding: this.encoding,
2106
+ sample_rate: String(this.sampleRate),
2107
+ channels: "1"
2108
+ });
2109
+ const url = `${DEEPGRAM_WS_URL}?${params.toString()}`;
2110
+ let ws = null;
2111
+ try {
2112
+ ws = await new Promise((resolve2, reject) => {
2113
+ const sock = new WebSocket2(url, {
2114
+ headers: { Authorization: `Token ${this.apiKey}` }
2115
+ });
2116
+ const timer = setTimeout(() => {
2117
+ try {
2118
+ sock.close();
2119
+ } catch {
2120
+ }
2121
+ reject(new Error("Deepgram STT warmup connect timeout"));
2122
+ }, 5e3);
2123
+ sock.once("open", () => {
2124
+ clearTimeout(timer);
2125
+ resolve2(sock);
2126
+ });
2127
+ sock.once("error", (err) => {
2128
+ clearTimeout(timer);
2129
+ reject(err);
2130
+ });
2131
+ });
2132
+ await new Promise((r) => setTimeout(r, 250));
2133
+ } catch (err) {
2134
+ getLogger().debug(`Deepgram STT warmup failed (best-effort): ${String(err)}`);
2135
+ } finally {
2136
+ if (ws) {
2137
+ try {
2138
+ ws.close();
2139
+ } catch {
2140
+ }
2141
+ }
2142
+ }
2143
+ }
2123
2144
  /** Open the streaming WebSocket and arm message + keepalive handlers. */
2124
2145
  async connect() {
2125
2146
  await this.openSocket();
@@ -2128,11 +2149,11 @@ var DeepgramSTT = class _DeepgramSTT {
2128
2149
  }
2129
2150
  async openSocket() {
2130
2151
  const url = this.buildUrl();
2131
- const ws = new WebSocket3(url, {
2152
+ const ws = new WebSocket2(url, {
2132
2153
  headers: { Authorization: `Token ${this.apiKey}` }
2133
2154
  });
2134
2155
  this.ws = ws;
2135
- await new Promise((resolve, reject) => {
2156
+ await new Promise((resolve2, reject) => {
2136
2157
  let settled = false;
2137
2158
  const settle = (fn) => {
2138
2159
  if (settled) return;
@@ -2144,7 +2165,7 @@ var DeepgramSTT = class _DeepgramSTT {
2144
2165
  () => settle(() => reject(new PatterConnectionError("Deepgram connect timeout"))),
2145
2166
  1e4
2146
2167
  );
2147
- ws.once("open", () => settle(resolve));
2168
+ ws.once("open", () => settle(resolve2));
2148
2169
  ws.once("error", (err) => settle(() => reject(err)));
2149
2170
  ws.once("unexpected-response", (_req, res) => {
2150
2171
  const status = res?.statusCode ?? 0;
@@ -2165,7 +2186,7 @@ var DeepgramSTT = class _DeepgramSTT {
2165
2186
  ws.on("close", (code, reason) => this.handleClose(code, reason.toString()));
2166
2187
  ws.on("error", (err) => this.handleError(err));
2167
2188
  this.keepaliveTimer = setInterval(() => {
2168
- if (this.ws && this.ws.readyState === WebSocket3.OPEN) {
2189
+ if (this.ws && this.ws.readyState === WebSocket2.OPEN) {
2169
2190
  try {
2170
2191
  this.ws.send(JSON.stringify({ type: "KeepAlive" }));
2171
2192
  } catch {
@@ -2284,7 +2305,7 @@ var DeepgramSTT = class _DeepgramSTT {
2284
2305
  }
2285
2306
  /** Send a binary audio chunk to Deepgram for transcription. */
2286
2307
  sendAudio(audio) {
2287
- if (!this.ws || this.ws.readyState !== WebSocket3.OPEN) {
2308
+ if (!this.ws || this.ws.readyState !== WebSocket2.OPEN) {
2288
2309
  this.audioDroppedCount++;
2289
2310
  if (this.audioDroppedCount === 1 || this.audioDroppedCount % 50 === 0) {
2290
2311
  getLogger().info(
@@ -2333,7 +2354,7 @@ var DeepgramSTT = class _DeepgramSTT {
2333
2354
  */
2334
2355
  finalize() {
2335
2356
  const ws = this.ws;
2336
- if (!ws || ws.readyState !== WebSocket3.OPEN) {
2357
+ if (!ws || ws.readyState !== WebSocket2.OPEN) {
2337
2358
  getLogger().info(
2338
2359
  `[DIAG] DeepgramSTT.finalize SKIPPED (ws state=${ws?.readyState ?? "null"})`
2339
2360
  );
@@ -2354,7 +2375,7 @@ var DeepgramSTT = class _DeepgramSTT {
2354
2375
  if (!ws) return;
2355
2376
  this.ws = null;
2356
2377
  const sendSafe = (payload) => {
2357
- if (ws.readyState === WebSocket3.OPEN) {
2378
+ if (ws.readyState === WebSocket2.OPEN) {
2358
2379
  try {
2359
2380
  ws.send(payload);
2360
2381
  } catch {
@@ -2368,7 +2389,7 @@ var DeepgramSTT = class _DeepgramSTT {
2368
2389
  } catch {
2369
2390
  }
2370
2391
  };
2371
- if (ws.readyState !== WebSocket3.OPEN) {
2392
+ if (ws.readyState !== WebSocket2.OPEN) {
2372
2393
  finishClose();
2373
2394
  return;
2374
2395
  }
@@ -2437,6 +2458,21 @@ var CallMetricsAccumulator = class {
2437
2458
  _bargeinStoppedAt = null;
2438
2459
  _turnUserText = "";
2439
2460
  _turnSttAudioSeconds = 0;
2461
+ /**
2462
+ * Guard against the recordTurnInterrupted / recordTurnComplete race.
2463
+ *
2464
+ * A VAD-path barge-in fires ``recordTurnInterrupted`` synchronously
2465
+ * inside ``handleAudioAsync`` while the in-flight pipeline LLM stream
2466
+ * keeps unwinding on its own task. When the LLM stream eventually
2467
+ * exits, the existing pipeline path falls through to
2468
+ * ``recordTurnComplete``, which would push a second turn for the same
2469
+ * logical exchange (this time carrying ``user_text=''`` because the
2470
+ * field was already reset). ``_turnAlreadyClosed`` is flipped by
2471
+ * ``recordTurnInterrupted`` and read by ``recordTurnComplete`` so the
2472
+ * late ``recordTurnComplete`` becomes a no-op until the next
2473
+ * ``startTurn`` re-arms the accumulator.
2474
+ */
2475
+ _turnAlreadyClosed = false;
2440
2476
  // Cumulative usage counters
2441
2477
  _totalSttAudioSeconds = 0;
2442
2478
  _totalTtsCharacters = 0;
@@ -2449,6 +2485,10 @@ var CallMetricsAccumulator = class {
2449
2485
  _actualSttCost = null;
2450
2486
  // Fix 10: accumulated LLM token cost for non-Realtime pipeline mode.
2451
2487
  _totalLlmCost = 0;
2488
+ // Last LLM model identifier from a recordLlmUsage call — emitted on
2489
+ // CallMetrics.llm_model so the dashboard cost panel can display
2490
+ // "Cerebras gpt-oss-120b" instead of just "Cerebras".
2491
+ _llmModel = "";
2452
2492
  // ---- EventBus integration (item 3) ----
2453
2493
  _eventBus;
2454
2494
  // ---- EOUMetrics — 4 timestamps (item 4) ----
@@ -2467,6 +2507,22 @@ var CallMetricsAccumulator = class {
2467
2507
  // ---- report_only_initial_ttfb (item 6) ----
2468
2508
  _reportOnlyInitialTtfb;
2469
2509
  _initialTtfbEmitted = false;
2510
+ // ---- Barge-in anchor hygiene ----
2511
+ /**
2512
+ * Last barge-in detection timestamp (hrTimeMs). Used by
2513
+ * ``_computeTurnLatency`` to gate endpoint_ms / stt_ms emission on turns
2514
+ * that started immediately after a barge-in — those turns have unreliable
2515
+ * VAD/STT anchors and would otherwise pollute the p95 distribution with
2516
+ * synthetic 6+ second spikes.
2517
+ */
2518
+ _lastBargeinAt = null;
2519
+ /**
2520
+ * Count of turns where ``recordSttComplete`` fired but no legitimate VAD
2521
+ * ``speech_end`` had stamped ``_endpointSignalAt``. Exposed via metrics so
2522
+ * we can spot environments where PSTN packet loss is dropping VAD stops
2523
+ * (the common cause of missing endpoint signals).
2524
+ */
2525
+ _endpointSignalMissingCount = 0;
2470
2526
  constructor(opts) {
2471
2527
  this.callId = opts.callId;
2472
2528
  this.providerMode = opts.providerMode;
@@ -2514,6 +2570,7 @@ var CallMetricsAccumulator = class {
2514
2570
  this._bargeinStoppedAt = null;
2515
2571
  this._turnUserText = "";
2516
2572
  this._turnSttAudioSeconds = 0;
2573
+ this._turnAlreadyClosed = false;
2517
2574
  this._vadStoppedAt = null;
2518
2575
  this._sttFinalAt = null;
2519
2576
  this._turnCommittedAt = null;
@@ -2530,12 +2587,46 @@ var CallMetricsAccumulator = class {
2530
2587
  this.startTurn();
2531
2588
  }
2532
2589
  }
2590
+ /**
2591
+ * Anchor the current turn at a legitimate VAD ``speech_start`` event.
2592
+ *
2593
+ * Industry-standard pattern: every VAD ``speech_start`` that fires while the agent
2594
+ * is NOT in the suppressed warmup window re-anchors the turn timer to
2595
+ * the wall-clock moment the user actually started speaking. Re-anchors:
2596
+ *
2597
+ * * ``_turnStart`` — fixes the case where a phantom ``speech_start``
2598
+ * during agent TTS or a partial transcript from the previous user
2599
+ * attempt already stamped the field. Without this, the legitimate
2600
+ * user-speech ``speech_start`` no-op'd and ``user_speech_duration_ms``
2601
+ * inflated from ~1 s to 5-7 s (the original "I waited 7 seconds"
2602
+ * dashboard symptom).
2603
+ * * ``_endpointSignalAt``, ``_vadStoppedAt``, ``_sttFinalAt`` — any
2604
+ * stale anchor from a rejected barge-in / dropped final transcript
2605
+ * on the same uncommitted turn is cleared, so the next
2606
+ * ``recordVadStop`` / ``recordSttFinalTimestamp`` stamps fresh.
2607
+ * * ``_sttComplete``, ``_llmFirstToken``, ``_initialTtfbEmitted`` — same
2608
+ * rationale for the downstream pipeline timestamps.
2609
+ *
2610
+ * No-op once the turn is committed (``_turnCommittedMono`` set): a
2611
+ * VAD ``speech_start`` after commit belongs to the NEXT turn's
2612
+ * barge-in path, handled by ``recordTurnInterrupted`` instead.
2613
+ */
2614
+ anchorUserSpeechStart() {
2615
+ if (this._turnCommittedMono !== null) return;
2616
+ this._turnStart = hrTimeMs();
2617
+ this._endpointSignalAt = null;
2618
+ this._vadStoppedAt = null;
2619
+ this._sttFinalAt = null;
2620
+ this._sttComplete = null;
2621
+ this._llmFirstToken = null;
2622
+ this._initialTtfbEmitted = false;
2623
+ }
2533
2624
  /** Stamp end-of-STT, capture the user's transcript, and accrue billed STT seconds. */
2534
2625
  recordSttComplete(text, audioSeconds = 0) {
2535
2626
  this._sttComplete = hrTimeMs();
2536
2627
  this._sttFinalAt = this._sttComplete;
2537
2628
  if (this._endpointSignalAt === null) {
2538
- this._endpointSignalAt = this._sttComplete;
2629
+ this._endpointSignalMissingCount++;
2539
2630
  }
2540
2631
  this._turnUserText = text;
2541
2632
  this._turnSttAudioSeconds = audioSeconds;
@@ -2624,7 +2715,9 @@ var CallMetricsAccumulator = class {
2624
2715
  * ``recordTtsStopped`` to compute ``bargein_ms``.
2625
2716
  */
2626
2717
  recordBargeinDetected(ts) {
2627
- this._bargeinDetectedAt = ts ?? hrTimeMs();
2718
+ const t = ts ?? hrTimeMs();
2719
+ this._bargeinDetectedAt = t;
2720
+ this._lastBargeinAt = t;
2628
2721
  }
2629
2722
  /**
2630
2723
  * Mark the moment TTS playback was actually halted after a barge-in. Call
@@ -2634,8 +2727,18 @@ var CallMetricsAccumulator = class {
2634
2727
  recordTtsStopped(ts) {
2635
2728
  this._bargeinStoppedAt = ts ?? hrTimeMs();
2636
2729
  }
2637
- /** Close the current turn cleanly and append a `TurnMetrics` record. */
2730
+ /**
2731
+ * Close the current turn cleanly and append a `TurnMetrics` record.
2732
+ *
2733
+ * Returns ``null`` when ``recordTurnInterrupted`` has already closed
2734
+ * the current turn — this protects against the VAD-barge-in /
2735
+ * pipeline-LLM race where both paths try to finalise the same logical
2736
+ * turn and the second would otherwise push a phantom entry with
2737
+ * ``user_text=''``. The caller treats ``null`` as "nothing to emit";
2738
+ * ``emitTurnMetrics`` is already null-safe.
2739
+ */
2638
2740
  recordTurnComplete(agentText) {
2741
+ if (this._turnAlreadyClosed) return null;
2639
2742
  const latency = this._computeTurnLatency();
2640
2743
  const turn = {
2641
2744
  turn_index: this._turns.length,
@@ -2648,13 +2751,23 @@ var CallMetricsAccumulator = class {
2648
2751
  };
2649
2752
  this._turns.push(turn);
2650
2753
  this._resetTurnState();
2754
+ this._turnAlreadyClosed = true;
2651
2755
  this._eventBus?.emit("turn_ended", { callId: this.callId, turn });
2652
2756
  this._eventBus?.emit("metrics_collected", { callId: this.callId, turn });
2653
2757
  return turn;
2654
2758
  }
2655
- /** Close the current turn as interrupted (barge-in) and return the recorded metrics. */
2759
+ /**
2760
+ * Close the current turn as interrupted (barge-in) and return the
2761
+ * recorded metrics. Returns ``null`` when no turn is open, OR when
2762
+ * ``recordTurnComplete`` has already finalised the current turn —
2763
+ * bidirectional parity with the guard at the top of
2764
+ * ``recordTurnComplete``. Prevents an out-of-order interruption (e.g.
2765
+ * a future refactor that reorders the bargein + LLM-unwind paths)
2766
+ * from overwriting a turn that the complete path already emitted.
2767
+ */
2656
2768
  recordTurnInterrupted() {
2657
2769
  if (this._turnStart === null) return null;
2770
+ if (this._turnAlreadyClosed) return null;
2658
2771
  const latency = this._computeTurnLatency();
2659
2772
  const turn = {
2660
2773
  turn_index: this._turns.length,
@@ -2666,7 +2779,12 @@ var CallMetricsAccumulator = class {
2666
2779
  timestamp: Date.now() / 1e3
2667
2780
  };
2668
2781
  this._turns.push(turn);
2782
+ this._eventBus?.emit("turn_ended", { callId: this.callId, turn });
2783
+ this._eventBus?.emit("metrics_collected", { callId: this.callId, turn });
2669
2784
  this._resetTurnState();
2785
+ this._turnAlreadyClosed = true;
2786
+ this._turnCommittedMono = null;
2787
+ this._endpointSignalAt = null;
2670
2788
  return turn;
2671
2789
  }
2672
2790
  // ---- EOU metrics (item 4) ----
@@ -2812,6 +2930,7 @@ var CallMetricsAccumulator = class {
2812
2930
  * @param cacheWriteTokens Cache write tokens (billed at cache_write rate if present)
2813
2931
  */
2814
2932
  recordLlmUsage(provider2, model, inputTokens, outputTokens, cacheReadTokens = 0, cacheWriteTokens = 0) {
2933
+ this._llmModel = model;
2815
2934
  this._totalLlmCost += calculateLlmCost(
2816
2935
  provider2,
2817
2936
  model,
@@ -2851,7 +2970,10 @@ var CallMetricsAccumulator = class {
2851
2970
  stt_provider: this.sttProvider,
2852
2971
  tts_provider: this.ttsProvider,
2853
2972
  llm_provider: this.llmProvider,
2854
- telephony_provider: this.telephonyProvider
2973
+ telephony_provider: this.telephonyProvider,
2974
+ stt_model: this.sttModel,
2975
+ tts_model: this.ttsModel,
2976
+ llm_model: this._llmModel
2855
2977
  };
2856
2978
  this._eventBus?.emit("call_ended", { callId: this.callId, metrics });
2857
2979
  return metrics;
@@ -2861,6 +2983,15 @@ var CallMetricsAccumulator = class {
2861
2983
  const duration = (hrTimeMs() - this._callStart) / 1e3;
2862
2984
  return this._computeCost(duration);
2863
2985
  }
2986
+ /**
2987
+ * Number of turns where recordSttComplete fired without a prior legitimate
2988
+ * VAD speech_end. Surfaced for diagnostics — a non-zero value points at
2989
+ * dropped VAD stops (commonly PSTN packet loss), which is why we stopped
2990
+ * faking _endpointSignalAt from _sttComplete in 0.6.x.
2991
+ */
2992
+ get endpointSignalMissingCount() {
2993
+ return this._endpointSignalMissingCount;
2994
+ }
2864
2995
  // ---- Internal ----
2865
2996
  _resetTurnState() {
2866
2997
  this._turnStart = null;
@@ -2876,6 +3007,7 @@ var CallMetricsAccumulator = class {
2876
3007
  this._bargeinStoppedAt = null;
2877
3008
  this._turnUserText = "";
2878
3009
  this._turnSttAudioSeconds = 0;
3010
+ this._initialTtfbEmitted = false;
2879
3011
  }
2880
3012
  _computeTurnLatency() {
2881
3013
  let stt_ms = 0;
@@ -2887,8 +3019,19 @@ var CallMetricsAccumulator = class {
2887
3019
  let endpoint_ms;
2888
3020
  let bargein_ms;
2889
3021
  let tts_total_ms;
2890
- if (this._turnStart !== null && this._sttComplete !== null) {
2891
- stt_ms = this._sttComplete - this._turnStart;
3022
+ let user_speech_duration_ms;
3023
+ const postBargein = this._lastBargeinAt !== null && this._turnStart !== null && Math.abs(this._turnStart - this._lastBargeinAt) <= 100;
3024
+ if (this._sttComplete !== null) {
3025
+ const anchor = this._endpointSignalAt ?? this._turnStart;
3026
+ if (anchor !== null) {
3027
+ stt_ms = Math.max(0, this._sttComplete - anchor);
3028
+ }
3029
+ }
3030
+ if (this._turnStart !== null && this._endpointSignalAt !== null) {
3031
+ user_speech_duration_ms = Math.max(
3032
+ 0,
3033
+ this._endpointSignalAt - this._turnStart
3034
+ );
2892
3035
  }
2893
3036
  if (this._sttComplete !== null && this._llmFirstToken !== null) {
2894
3037
  llm_ttft_ms = Math.max(0, this._llmFirstToken - this._sttComplete);
@@ -2921,9 +3064,14 @@ var CallMetricsAccumulator = class {
2921
3064
  if (endpoint_ms !== void 0 && llm_ttft_ms !== void 0 && tts_ms > 0) {
2922
3065
  agent_response_ms = round(endpoint_ms + llm_ttft_ms + tts_ms, 1);
2923
3066
  }
3067
+ if (postBargein) {
3068
+ stt_ms = 0;
3069
+ endpoint_ms = void 0;
3070
+ }
2924
3071
  return {
2925
3072
  stt_ms: round(stt_ms, 1),
2926
3073
  llm_ms: round(llm_ms, 1),
3074
+ ...user_speech_duration_ms !== void 0 ? { user_speech_duration_ms: round(user_speech_duration_ms, 1) } : {},
2927
3075
  ...llm_ttft_ms !== void 0 ? { llm_ttft_ms: round(llm_ttft_ms, 1) } : {},
2928
3076
  ...llm_total_ms !== void 0 ? { llm_total_ms: round(llm_total_ms, 1) } : {},
2929
3077
  tts_ms: round(tts_ms, 1),
@@ -3002,6 +3150,8 @@ var CallMetricsAccumulator = class {
3002
3150
  const endpointAvg = optAvg("endpoint_ms");
3003
3151
  const bargeinAvg = optAvg("bargein_ms");
3004
3152
  const ttsTotalAvg = optAvg("tts_total_ms");
3153
+ const userSpeechAvg = optAvg("user_speech_duration_ms");
3154
+ const agentResponseAvg = optAvg("agent_response_ms");
3005
3155
  return {
3006
3156
  stt_ms: round(turns.reduce((s, t) => s + t.latency.stt_ms, 0) / n, 1),
3007
3157
  llm_ms: round(turns.reduce((s, t) => s + t.latency.llm_ms, 0) / n, 1),
@@ -3011,7 +3161,9 @@ var CallMetricsAccumulator = class {
3011
3161
  total_ms: round(turns.reduce((s, t) => s + t.latency.total_ms, 0) / n, 1),
3012
3162
  ...endpointAvg !== void 0 ? { endpoint_ms: endpointAvg } : {},
3013
3163
  ...bargeinAvg !== void 0 ? { bargein_ms: bargeinAvg } : {},
3014
- ...ttsTotalAvg !== void 0 ? { tts_total_ms: ttsTotalAvg } : {}
3164
+ ...ttsTotalAvg !== void 0 ? { tts_total_ms: ttsTotalAvg } : {},
3165
+ ...userSpeechAvg !== void 0 ? { user_speech_duration_ms: userSpeechAvg } : {},
3166
+ ...agentResponseAvg !== void 0 ? { agent_response_ms: agentResponseAvg } : {}
3015
3167
  };
3016
3168
  }
3017
3169
  _computePercentileLatency(p) {
@@ -3030,6 +3182,8 @@ var CallMetricsAccumulator = class {
3030
3182
  const endpointP = optPct("endpoint_ms");
3031
3183
  const bargeinP = optPct("bargein_ms");
3032
3184
  const ttsTotalP = optPct("tts_total_ms");
3185
+ const userSpeechP = optPct("user_speech_duration_ms");
3186
+ const agentResponseP = optPct("agent_response_ms");
3033
3187
  return {
3034
3188
  stt_ms: round(percentile(nonZero(turns.map((t) => t.latency.stt_ms)), p), 1),
3035
3189
  llm_ms: round(percentile(nonZero(turns.map((t) => t.latency.llm_ms)), p), 1),
@@ -3039,409 +3193,13 @@ var CallMetricsAccumulator = class {
3039
3193
  total_ms: round(percentile(nonZero(turns.map((t) => t.latency.total_ms)), p), 1),
3040
3194
  ...endpointP !== void 0 ? { endpoint_ms: endpointP } : {},
3041
3195
  ...bargeinP !== void 0 ? { bargein_ms: bargeinP } : {},
3042
- ...ttsTotalP !== void 0 ? { tts_total_ms: ttsTotalP } : {}
3196
+ ...ttsTotalP !== void 0 ? { tts_total_ms: ttsTotalP } : {},
3197
+ ...userSpeechP !== void 0 ? { user_speech_duration_ms: userSpeechP } : {},
3198
+ ...agentResponseP !== void 0 ? { agent_response_ms: agentResponseP } : {}
3043
3199
  };
3044
3200
  }
3045
3201
  };
3046
3202
 
3047
- // src/audio/transcoding.ts
3048
- init_esm_shims();
3049
- var MULAW_TO_PCM16_TABLE = (() => {
3050
- const table = new Int16Array(256);
3051
- for (let i = 0; i < 256; i++) {
3052
- const mu = ~i & 255;
3053
- const sign = mu & 128 ? -1 : 1;
3054
- const exponent = mu >> 4 & 7;
3055
- const mantissa = mu & 15;
3056
- const magnitude = (mantissa << 1 | 33) << exponent + 2;
3057
- table[i] = sign * (magnitude - 132);
3058
- }
3059
- return table;
3060
- })();
3061
- var PCM16_TO_MULAW_TABLE = (() => {
3062
- const BIAS = 132;
3063
- const CLIP = 32635;
3064
- const table = new Uint8Array(65536);
3065
- for (let i = 0; i < 65536; i++) {
3066
- let sample = i >= 32768 ? i - 65536 : i;
3067
- const sign = sample < 0 ? 128 : 0;
3068
- if (sample < 0) sample = -sample;
3069
- if (sample > CLIP) sample = CLIP;
3070
- sample += BIAS;
3071
- let exponent = 7;
3072
- const exponentMask = 16384;
3073
- for (let shift = exponentMask; shift > 0 && (sample & shift) === 0; shift >>= 1) {
3074
- exponent--;
3075
- }
3076
- const mantissa = sample >> exponent + 3 & 15;
3077
- const mulaw = ~(sign | exponent << 4 | mantissa) & 255;
3078
- table[i] = mulaw;
3079
- }
3080
- return table;
3081
- })();
3082
- function mulawToPcm16(mulawData) {
3083
- const out = Buffer.alloc(mulawData.length * 2);
3084
- for (let i = 0; i < mulawData.length; i++) {
3085
- out.writeInt16LE(MULAW_TO_PCM16_TABLE[mulawData[i]], i * 2);
3086
- }
3087
- return out;
3088
- }
3089
- function pcm16ToMulaw(pcmData) {
3090
- const sampleCount = Math.floor(pcmData.length / 2);
3091
- const out = Buffer.alloc(sampleCount);
3092
- for (let i = 0; i < sampleCount; i++) {
3093
- const sample = pcmData.readInt16LE(i * 2);
3094
- out[i] = PCM16_TO_MULAW_TABLE[sample + 65536 & 65535];
3095
- }
3096
- return out;
3097
- }
3098
- var PcmCarry = class {
3099
- pending = null;
3100
- /**
3101
- * Prepend any carried odd byte, return the even-length prefix, and stash
3102
- * any new trailing odd byte for the next call.
3103
- *
3104
- * Returns a zero-length buffer when no complete sample is yet available.
3105
- */
3106
- push(chunk) {
3107
- const combined = this.pending !== null ? Buffer.concat([this.pending, chunk]) : chunk;
3108
- this.pending = null;
3109
- const alignedLen = combined.length & ~1;
3110
- if (alignedLen < combined.length) {
3111
- this.pending = combined.subarray(alignedLen);
3112
- }
3113
- return combined.subarray(0, alignedLen);
3114
- }
3115
- /**
3116
- * Return any pending byte as a 1-byte buffer (rare in practice — only if
3117
- * the entire stream had an odd byte count), then reset internal state.
3118
- */
3119
- flush() {
3120
- if (this.pending === null) return Buffer.alloc(0);
3121
- const out = this.pending;
3122
- this.pending = null;
3123
- return out;
3124
- }
3125
- /** Reset carry state without flushing. */
3126
- reset() {
3127
- this.pending = null;
3128
- }
3129
- };
3130
- var StatefulResampler = class {
3131
- srcRate;
3132
- dstRate;
3133
- // 16k→8k: 5-tap FIR state.
3134
- // Extended sample buffer carries the 2 history samples that precede the
3135
- // current chunk AND any "pending" input sample that did not yet generate
3136
- // output (i.e. the odd sample when the chunk had an odd sample count).
3137
- // `firPhase` = 0 means the next output is at input position 0 of the
3138
- // current chunk; 1 means it starts at input position 1 (because the
3139
- // previous chunk ended on an even-output boundary).
3140
- firHistory = new Int16Array(2);
3141
- // [s_{-2}, s_{-1}]
3142
- firHistoryValid = false;
3143
- // Pending sample carried from odd-count chunks (not the byte carry —
3144
- // this is a complete Int16 sample that becomes the first input for the
3145
- // next call).
3146
- firPendingSample = null;
3147
- // 8k→16k: last input sample deferred across chunk boundaries.
3148
- upsampleLast = 0;
3149
- upsampleHasHistory = false;
3150
- // 24k→16k: fractional phase and last input sample across chunks.
3151
- resample24Last = 0;
3152
- resample24Phase = 0;
3153
- resample24HasHistory = false;
3154
- // Odd-byte alignment carry.
3155
- carry = new PcmCarry();
3156
- constructor(opts) {
3157
- this.srcRate = opts.srcRate;
3158
- this.dstRate = opts.dstRate;
3159
- if (opts.channels !== void 0 && opts.channels !== 1) {
3160
- throw new Error("StatefulResampler: only mono (channels=1) is supported");
3161
- }
3162
- const key = `${this.srcRate}->${this.dstRate}`;
3163
- if (key !== "16000->8000" && key !== "8000->16000" && key !== "24000->16000" && key !== "24000->8000") {
3164
- throw new Error(
3165
- `StatefulResampler: unsupported conversion ${key}. Supported: 16000->8000, 8000->16000, 24000->16000, 24000->8000`
3166
- );
3167
- }
3168
- }
3169
- /**
3170
- * Process a chunk of PCM16-LE samples.
3171
- *
3172
- * Handles odd-byte inputs via an internal carry buffer. Returns an even-byte-
3173
- * aligned output buffer; may return a zero-length buffer if not enough
3174
- * aligned input is available yet.
3175
- */
3176
- process(pcm) {
3177
- const aligned = this.carry.push(pcm);
3178
- if (aligned.length === 0) return Buffer.alloc(0);
3179
- if (this.srcRate === 16e3 && this.dstRate === 8e3) {
3180
- return this._downsample16kTo8k(aligned);
3181
- }
3182
- if (this.srcRate === 8e3 && this.dstRate === 16e3) {
3183
- return this._upsample8kTo16k(aligned);
3184
- }
3185
- if (this.srcRate === 24e3 && this.dstRate === 8e3) {
3186
- return this._resample24kTo8k(aligned);
3187
- }
3188
- return this._resample24kTo16k(aligned);
3189
- }
3190
- /**
3191
- * Flush internal state and return any remaining output samples.
3192
- *
3193
- * For 8k→16k: the deferred last sample is emitted duplicated (matching
3194
- * the stateless helper's end-of-stream behaviour).
3195
- * For 16k→8k: any pending odd sample is processed with edge-replication.
3196
- * Resets all state after flushing.
3197
- */
3198
- flush() {
3199
- this.carry.flush();
3200
- if (this.srcRate === 16e3 && this.dstRate === 8e3 && this.firPendingSample !== null) {
3201
- const s = this.firPendingSample;
3202
- const tmp = Buffer.alloc(4);
3203
- tmp.writeInt16LE(s, 0);
3204
- tmp.writeInt16LE(s, 2);
3205
- const out = this._downsample16kTo8k(tmp);
3206
- this.firPendingSample = null;
3207
- return out;
3208
- }
3209
- if (this.srcRate === 8e3 && this.dstRate === 16e3 && this.upsampleHasHistory) {
3210
- const out = Buffer.alloc(4);
3211
- out.writeInt16LE(this.upsampleLast, 0);
3212
- out.writeInt16LE(this.upsampleLast, 2);
3213
- this.upsampleHasHistory = false;
3214
- this.upsampleLast = 0;
3215
- return out;
3216
- }
3217
- return Buffer.alloc(0);
3218
- }
3219
- /** Reset all carried state (e.g. at call boundaries). */
3220
- reset() {
3221
- this.firHistory = new Int16Array(2);
3222
- this.firHistoryValid = false;
3223
- this.firPendingSample = null;
3224
- this.upsampleLast = 0;
3225
- this.upsampleHasHistory = false;
3226
- this.resample24Last = 0;
3227
- this.resample24Phase = 0;
3228
- this.resample24HasHistory = false;
3229
- this.carry.reset();
3230
- }
3231
- // ---------------------------------------------------------------------------
3232
- // Private: 16 kHz → 8 kHz
3233
- // ---------------------------------------------------------------------------
3234
- /**
3235
- * 2:1 decimation with a 5-tap binomial FIR anti-alias filter.
3236
- *
3237
- * FIR coefficients: [1, 4, 6, 4, 1] / 16 (cutoff ~Fs/4 = 4 kHz).
3238
- *
3239
- * Cross-chunk state:
3240
- * - `firHistory[0]` = s_{-2}, `firHistory[1]` = s_{-1} relative to the
3241
- * virtual stream (seeded to first-sample on the very first call).
3242
- * - `firPendingSample` = a lone input sample carried from a chunk whose
3243
- * sample count was odd; it will become the first input of the next chunk.
3244
- *
3245
- * Decimation: outputs are at even positions (0, 2, 4 …) in the virtual
3246
- * extended stream, so every 2 input samples yield 1 output. An odd-sample-
3247
- * count chunk leaves 1 sample in `firPendingSample`; the next chunk
3248
- * prepends it so the output cadence is unbroken.
3249
- */
3250
- _downsample16kTo8k(buf) {
3251
- const newSampleCount = buf.length >> 1;
3252
- const hasPending = this.firPendingSample !== null;
3253
- const totalInput = newSampleCount + (hasPending ? 1 : 0);
3254
- const input = new Int16Array(totalInput);
3255
- if (hasPending) {
3256
- input[0] = this.firPendingSample;
3257
- for (let j = 0; j < newSampleCount; j++) input[j + 1] = buf.readInt16LE(j * 2);
3258
- } else {
3259
- for (let j = 0; j < newSampleCount; j++) input[j] = buf.readInt16LE(j * 2);
3260
- }
3261
- this.firPendingSample = null;
3262
- if (totalInput === 0) return Buffer.alloc(0);
3263
- if (!this.firHistoryValid) {
3264
- this.firHistory[0] = input[0];
3265
- this.firHistory[1] = input[0];
3266
- this.firHistoryValid = true;
3267
- }
3268
- const extended = new Int16Array(totalInput + 2);
3269
- extended[0] = this.firHistory[0];
3270
- extended[1] = this.firHistory[1];
3271
- for (let j = 0; j < totalInput; j++) extended[j + 2] = input[j];
3272
- const outSamples = totalInput >> 1;
3273
- const out = Buffer.alloc(outSamples * 2);
3274
- for (let i = 0; i < outSamples; i++) {
3275
- const c = 2 + i * 2;
3276
- const sM2 = extended[c - 2];
3277
- const sM1 = extended[c - 1];
3278
- const s0 = extended[c];
3279
- const sP1 = c + 1 < extended.length ? extended[c + 1] : extended[extended.length - 1];
3280
- const sP2 = c + 2 < extended.length ? extended[c + 2] : extended[extended.length - 1];
3281
- const filtered = sM2 + 4 * sM1 + 6 * s0 + 4 * sP1 + sP2 + 8 >> 4;
3282
- out.writeInt16LE(Math.max(-32768, Math.min(32767, filtered)), i * 2);
3283
- }
3284
- if (totalInput % 2 === 1) {
3285
- this.firPendingSample = input[totalInput - 1];
3286
- }
3287
- if (totalInput >= 2) {
3288
- this.firHistory[0] = input[totalInput - 2];
3289
- this.firHistory[1] = input[totalInput - 1];
3290
- } else {
3291
- this.firHistory[0] = this.firHistory[1];
3292
- this.firHistory[1] = input[0];
3293
- }
3294
- return out;
3295
- }
3296
- // ---------------------------------------------------------------------------
3297
- // Private: 8 kHz → 16 kHz
3298
- // ---------------------------------------------------------------------------
3299
- /**
3300
- * 1:2 linear-interpolation upsampler.
3301
- *
3302
- * For the first chunk (no history): emits 2*(N-1) samples and defers the
3303
- * last sample. For subsequent chunks (with history): emits the deferred
3304
- * sample + its interpolated midpoint THEN 2*(N-1) samples from the new
3305
- * chunk, deferring the new last sample. Total across K chunks + flush =
3306
- * 2*total_input_samples (correct output length).
3307
- *
3308
- * Call flush() after the final chunk to emit the last deferred sample
3309
- * pair (self-duplicate at end of stream).
3310
- */
3311
- _upsample8kTo16k(buf) {
3312
- const sampleCount = buf.length >> 1;
3313
- if (sampleCount === 0) return Buffer.alloc(0);
3314
- const outArr = [];
3315
- if (this.upsampleHasHistory) {
3316
- const next = buf.readInt16LE(0);
3317
- outArr.push(this.upsampleLast);
3318
- outArr.push(Math.round((this.upsampleLast + next) / 2));
3319
- }
3320
- for (let i = 0; i < sampleCount - 1; i++) {
3321
- const s0 = buf.readInt16LE(i * 2);
3322
- const s1 = buf.readInt16LE((i + 1) * 2);
3323
- outArr.push(s0);
3324
- outArr.push(Math.round((s0 + s1) / 2));
3325
- }
3326
- this.upsampleLast = buf.readInt16LE((sampleCount - 1) * 2);
3327
- this.upsampleHasHistory = true;
3328
- const outBuf = Buffer.alloc(outArr.length * 2);
3329
- for (let j = 0; j < outArr.length; j++) outBuf.writeInt16LE(outArr[j], j * 2);
3330
- return outBuf;
3331
- }
3332
- // ---------------------------------------------------------------------------
3333
- // Private: 24 kHz → 16 kHz / 8 kHz
3334
- // ---------------------------------------------------------------------------
3335
- /**
3336
- * 3:2 linear-interpolation decimator (ratio srcRate/dstRate = 1.5).
3337
- *
3338
- * `resample24Phase` tracks the fractional input position of the next output
3339
- * sample relative to the START of the next chunk. Negative phase means the
3340
- * next output straddles the previous/current chunk boundary; those are
3341
- * handled using `resample24Last`.
3342
- */
3343
- _resample24kTo16k(buf) {
3344
- return this._resample24kStep(buf, 24e3 / 16e3);
3345
- }
3346
- /** 3:1 decimation — collapses the 24k→16k→8k chain into a single step. */
3347
- _resample24kTo8k(buf) {
3348
- return this._resample24kStep(buf, 24e3 / 8e3);
3349
- }
3350
- /** Shared phase-stepping resampler used by 24→16 (step 1.5) and 24→8 (step 3). */
3351
- _resample24kStep(buf, step) {
3352
- const sampleCount = buf.length >> 1;
3353
- if (sampleCount === 0) return Buffer.alloc(0);
3354
- const outArr = [];
3355
- let phase = this.resample24Phase;
3356
- while (true) {
3357
- const idx = Math.floor(phase);
3358
- if (idx >= sampleCount) break;
3359
- const frac = phase - idx;
3360
- let s0;
3361
- let s1;
3362
- if (idx < 0) {
3363
- s0 = this.resample24HasHistory ? this.resample24Last : 0;
3364
- s1 = buf.readInt16LE(0);
3365
- } else {
3366
- s0 = buf.readInt16LE(idx * 2);
3367
- s1 = idx + 1 < sampleCount ? buf.readInt16LE((idx + 1) * 2) : s0;
3368
- }
3369
- const interp = Math.round(s0 + (s1 - s0) * frac);
3370
- outArr.push(Math.max(-32768, Math.min(32767, interp)));
3371
- phase += step;
3372
- }
3373
- this.resample24Last = buf.readInt16LE((sampleCount - 1) * 2);
3374
- this.resample24HasHistory = true;
3375
- this.resample24Phase = phase - sampleCount;
3376
- const outBuf = Buffer.alloc(outArr.length * 2);
3377
- for (let j = 0; j < outArr.length; j++) outBuf.writeInt16LE(outArr[j], j * 2);
3378
- return outBuf;
3379
- }
3380
- };
3381
- function createResampler16kTo8k() {
3382
- return new StatefulResampler({ srcRate: 16e3, dstRate: 8e3 });
3383
- }
3384
- function createResampler8kTo16k() {
3385
- return new StatefulResampler({ srcRate: 8e3, dstRate: 16e3 });
3386
- }
3387
- function createResampler24kTo16k() {
3388
- return new StatefulResampler({ srcRate: 24e3, dstRate: 16e3 });
3389
- }
3390
- function createResampler24kTo8k() {
3391
- return new StatefulResampler({ srcRate: 24e3, dstRate: 8e3 });
3392
- }
3393
- var _warnedResample8kTo16k = false;
3394
- var _warnedResample16kTo8k = false;
3395
- var _warnedResample24kTo16k = false;
3396
- function resample8kTo16k(pcm8k) {
3397
- if (!_warnedResample8kTo16k) {
3398
- _warnedResample8kTo16k = true;
3399
- getLogger().warn(
3400
- "[patter] resample8kTo16k() is deprecated. Use createResampler8kTo16k() (StatefulResampler) to eliminate chunk-boundary discontinuities."
3401
- );
3402
- }
3403
- if (pcm8k.length === 0) return Buffer.alloc(0);
3404
- const r = createResampler8kTo16k();
3405
- const main = r.process(pcm8k);
3406
- const tail = r.flush();
3407
- return tail.length > 0 ? Buffer.concat([main, tail]) : main;
3408
- }
3409
- function resample16kTo8k(pcm16k) {
3410
- if (!_warnedResample16kTo8k) {
3411
- _warnedResample16kTo8k = true;
3412
- getLogger().warn(
3413
- "[patter] resample16kTo8k() is deprecated. Use createResampler16kTo8k() (StatefulResampler) to eliminate chunk-boundary discontinuities."
3414
- );
3415
- }
3416
- if (pcm16k.length === 0) return Buffer.alloc(0);
3417
- const r = createResampler16kTo8k();
3418
- const out = r.process(pcm16k);
3419
- const tail = r.flush();
3420
- return tail.length > 0 ? Buffer.concat([out, tail]) : out;
3421
- }
3422
- function resample24kTo16k(pcm24k) {
3423
- if (!_warnedResample24kTo16k) {
3424
- _warnedResample24kTo16k = true;
3425
- getLogger().warn(
3426
- "[patter] resample24kTo16k() is deprecated. Use createResampler24kTo16k() (StatefulResampler) or OpenAITTS.resampleStreaming for anti-aliased resampling."
3427
- );
3428
- }
3429
- if (pcm24k.length === 0) return Buffer.alloc(0);
3430
- const sampleCount = Math.floor(pcm24k.length / 2);
3431
- const outSamples = Math.floor(sampleCount * 2 / 3);
3432
- const out = Buffer.alloc(outSamples * 2);
3433
- for (let i = 0; i < outSamples; i++) {
3434
- const pos = i * 1.5;
3435
- const idx = Math.floor(pos);
3436
- const frac = pos - idx;
3437
- const s0 = pcm24k.readInt16LE(idx * 2);
3438
- const s1 = idx + 1 < sampleCount ? pcm24k.readInt16LE((idx + 1) * 2) : s0;
3439
- const interp = Math.round(s0 + (s1 - s0) * frac);
3440
- out.writeInt16LE(Math.max(-32768, Math.min(32767, interp)), i * 2);
3441
- }
3442
- return out;
3443
- }
3444
-
3445
3203
  // src/handler-utils.ts
3446
3204
  init_esm_shims();
3447
3205
  function createHistoryManager(maxSize) {
@@ -4313,7 +4071,23 @@ var HALLUCINATIONS = /* @__PURE__ */ new Set([
4313
4071
  ".",
4314
4072
  "bye",
4315
4073
  "right",
4316
- "cool"
4074
+ "cool",
4075
+ // Whisper YouTube-caption hallucinations
4076
+ "thank you for watching",
4077
+ "thanks for watching",
4078
+ "thank you for watching!",
4079
+ "thanks for watching!",
4080
+ "thank you so much for watching",
4081
+ "thanks for listening",
4082
+ "please subscribe",
4083
+ "subscribe",
4084
+ "music",
4085
+ "[music]",
4086
+ "\u266A",
4087
+ "[no audio]",
4088
+ "[silence]",
4089
+ "[blank_audio]",
4090
+ "(silence)"
4317
4091
  ]);
4318
4092
  var StreamHandler = class _StreamHandler {
4319
4093
  deps;
@@ -4391,6 +4165,43 @@ var StreamHandler = class _StreamHandler {
4391
4165
  * sentence.
4392
4166
  */
4393
4167
  speakingStartedAt = null;
4168
+ /**
4169
+ * Wall-clock (ms) when the FIRST TTS audio chunk actually reached the
4170
+ * carrier wire — set in ``markFirstAudioSent`` after ``bridge.sendAudio``
4171
+ * succeeds, cleared by ``beginSpeaking`` / ``cancelSpeaking``. The barge-in
4172
+ * gate measures elapsed from this instant, NOT from ``speakingStartedAt``,
4173
+ * because ElevenLabs (and other cloud TTS) take 200-700 ms to emit the
4174
+ * first byte. A gate anchored to ``beginSpeaking`` would expire on
4175
+ * background noise before any audio went out, exit the TTS loop on
4176
+ * ``isSpeaking=false``, and silently cut the agent's first turn.
4177
+ */
4178
+ firstAudioSentAt = null;
4179
+ /**
4180
+ * Optional barge-in confirmation strategies. With an empty array the
4181
+ * SDK falls back to the legacy "cancel on first VAD speech_start"
4182
+ * behaviour. With one or more strategies, a VAD speech_start during
4183
+ * TTS marks the barge-in as *pending* — TTS keeps streaming naturally
4184
+ * — and the strategies are consulted on every STT transcript via
4185
+ * ``handleBargeIn``. The first strategy that returns ``true`` cancels
4186
+ * the agent; if none confirm within ``bargeInConfirmMs`` the pending
4187
+ * state is dropped and the agent finishes its sentence.
4188
+ */
4189
+ bargeInStrategies;
4190
+ /** Pending-barge-in confirmation timeout in milliseconds. */
4191
+ bargeInConfirmMs;
4192
+ /** Wall-clock (ms) when the current pending barge-in started, or
4193
+ * ``null`` if no barge-in is pending. */
4194
+ bargeInPendingSince = null;
4195
+ /** Timer that fires the pending-barge-in timeout. */
4196
+ bargeInPendingTimer = null;
4197
+ /**
4198
+ * Set to true when a VAD ``speech_start`` was suppressed by the
4199
+ * anti-echo gate during the current agent turn. Cleared on
4200
+ * ``beginSpeaking`` and ``cancelSpeaking``. When the turn ends
4201
+ * naturally (grace timer), the inbound audio ring is flushed to STT
4202
+ * so the user's speech is not silently discarded.
4203
+ */
4204
+ suppressedSpeechPending = false;
4394
4205
  /**
4395
4206
  * Minimum wall-clock duration (ms) the agent must have been speaking
4396
4207
  * before barge-in is allowed to fire when AEC is active. Covers the
@@ -4402,10 +4213,17 @@ var StreamHandler = class _StreamHandler {
4402
4213
  * Same as the AEC variant but for deployments where AEC is OFF
4403
4214
  * (default on PSTN — Twilio/Telnyx). Without an adaptive filter to
4404
4215
  * converge, the only justification for a gate is anti-flicker on
4405
- * micro-events (cough, click). A short 250 ms window keeps real-user
4406
- * barge-in responsive while still filtering tiny noise spikes.
4216
+ * micro-events (cough, click). Raised 100 500 ms on 2026-05-19
4217
+ * after the 0.6.2 acceptance run showed a phantom VAD speech_start
4218
+ * firing on the very first inbound frame (~500 ms into the call,
4219
+ * which is past a 100 ms gate). The phantom barge-in cancelled the
4220
+ * prewarmed firstMessage, the user heard a clipped (graffiante)
4221
+ * audio fragment, and the SDK left ``_turnAlreadyClosed=true`` so
4222
+ * subsequent ``recordTurnComplete`` calls were no-ops. 500 ms
4223
+ * filters those phantoms while still letting a real interruption
4224
+ * land within half a second of agent onset.
4407
4225
  */
4408
- static MIN_AGENT_SPEAKING_MS_BEFORE_BARGE_IN_NO_AEC = 250;
4226
+ static MIN_AGENT_SPEAKING_MS_BEFORE_BARGE_IN_NO_AEC = 500;
4409
4227
  /** Handle for the pending grace-period timer, so it can be cleared on cleanup. */
4410
4228
  graceTimer = null;
4411
4229
  /**
@@ -4425,6 +4243,32 @@ var StreamHandler = class _StreamHandler {
4425
4243
  * the tail of the cancelled turn (~50-200 ms of doubled audio).
4426
4244
  */
4427
4245
  lastCancelAt = null;
4246
+ /**
4247
+ * Promise queue tracking outstanding Twilio marks the SDK has sent but
4248
+ * not yet seen echoed back. Used by the firstMessage send loop to bound
4249
+ * the depth of audio queued at the carrier — without this the loop
4250
+ * pushes the entire TTS stream into Twilio's WebSocket in one burst,
4251
+ * and a sendClear issued mid-buffer races against several seconds of
4252
+ * already-queued media frames (BUG #128). The window depth is
4253
+ * ``FIRST_MESSAGE_MARK_WINDOW``; ``onMark`` drains entries as Twilio
4254
+ * confirms playback, ``cancelSpeaking`` resolves every pending entry so
4255
+ * any awaiter exits immediately. Telnyx never populates this queue
4256
+ * (Telnyx's media-stream protocol has no mark concept — the loop
4257
+ * falls back to time-based pacing on that carrier).
4258
+ */
4259
+ pendingMarks = [];
4260
+ /**
4261
+ * Monotonic counter for first-message mark names. Distinct from
4262
+ * ``chunkCount`` (which the Realtime path uses) so the two paths can
4263
+ * coexist without name collisions even when firstMessage finishes while
4264
+ * a Realtime turn is still streaming.
4265
+ */
4266
+ // firstMessageMarkCounter / FIRST_MESSAGE_MARK_WINDOW /
4267
+ // MARK_AWAIT_TIMEOUT_MS were retired with the move to the Twilio-FIFO-
4268
+ // trusts model (sendPacedFirstMessageBytes no longer emits marks).
4269
+ // Marks are still consumed via ``onMark`` for any adapter that wants
4270
+ // to round-trip one, but the firstMessage path no longer back-pressures
4271
+ // on them.
4428
4272
  /**
4429
4273
  * Minimum drain window (ms) between a ``cancelSpeaking`` and the next
4430
4274
  * ``beginSpeaking``. 150 ms covers a typical PSTN jitter buffer drain
@@ -4439,7 +4283,7 @@ var StreamHandler = class _StreamHandler {
4439
4283
  * directly. Awaits the post-cancel drain window before flipping state
4440
4284
  * so the remote player has time to flush the cancelled turn's tail.
4441
4285
  */
4442
- async beginSpeaking() {
4286
+ async beginSpeaking(isFirstMessage = false) {
4443
4287
  if (this.lastCancelAt !== null) {
4444
4288
  const elapsed = Date.now() - this.lastCancelAt;
4445
4289
  const remaining = _StreamHandler.POST_CANCEL_DRAIN_MS - elapsed;
@@ -4450,7 +4294,23 @@ var StreamHandler = class _StreamHandler {
4450
4294
  this.speakingGeneration++;
4451
4295
  this.isSpeaking = true;
4452
4296
  this.speakingStartedAt = Date.now();
4297
+ this.suppressedSpeechPending = false;
4298
+ void isFirstMessage;
4299
+ this.firstAudioSentAt = Date.now();
4453
4300
  this.inboundAudioRing = [];
4301
+ this.resetVad();
4302
+ }
4303
+ /**
4304
+ * Record that the first TTS audio chunk of the current turn has hit the
4305
+ * carrier wire. Idempotent within a turn — only the first call sets the
4306
+ * timestamp; later chunks are no-ops. Must be invoked AFTER the underlying
4307
+ * ``bridge.sendAudio`` resolves so the gate is anchored to "audio actually
4308
+ * went out", not "we asked the carrier to send it".
4309
+ */
4310
+ markFirstAudioSent() {
4311
+ if (this.firstAudioSentAt === null) {
4312
+ this.firstAudioSentAt = Date.now();
4313
+ }
4454
4314
  }
4455
4315
  /**
4456
4316
  * Atomically end speaking AND invalidate any pending grace timer.
@@ -4463,14 +4323,55 @@ var StreamHandler = class _StreamHandler {
4463
4323
  this.speakingGeneration++;
4464
4324
  this.isSpeaking = false;
4465
4325
  this.speakingStartedAt = null;
4326
+ this.firstAudioSentAt = null;
4466
4327
  this.lastCancelAt = Date.now();
4328
+ this.suppressedSpeechPending = false;
4329
+ this.drainPendingMarks();
4467
4330
  if (this.llmAbort !== null) {
4468
4331
  try {
4469
4332
  this.llmAbort.abort();
4470
4333
  } catch {
4471
4334
  }
4472
4335
  }
4336
+ const ttsCancelable = this.tts;
4337
+ if (typeof ttsCancelable?.cancelActiveStream === "function") {
4338
+ try {
4339
+ ttsCancelable.cancelActiveStream();
4340
+ } catch (err) {
4341
+ getLogger().debug(`TTS cancelActiveStream raised: ${String(err)}`);
4342
+ }
4343
+ }
4344
+ }
4345
+ /**
4346
+ * Resolve every entry in ``pendingMarks`` and empty the queue. Idempotent
4347
+ * — safe to call from ``cancelSpeaking`` and again from the grace path
4348
+ * without leaking pending promises.
4349
+ */
4350
+ drainPendingMarks() {
4351
+ if (this.pendingMarks.length === 0) return;
4352
+ for (const entry of this.pendingMarks) {
4353
+ try {
4354
+ entry.resolve();
4355
+ } catch {
4356
+ }
4357
+ }
4358
+ this.pendingMarks.length = 0;
4473
4359
  }
4360
+ // Mark-based back-pressure (sendMarkAwaitable / waitForMarkWindow)
4361
+ // was removed when sendPacedFirstMessageBytes switched to the
4362
+ // Twilio-FIFO-trusts model — see that method's doc comment for
4363
+ // rationale. ``pendingMarks`` and ``onMark`` are still kept so an
4364
+ // adapter that wants to round-trip a mark for some other purpose can
4365
+ // still do so without breaking the firstMessage path.
4366
+ /**
4367
+ * Bytes-per-millisecond for a 16 kHz PCM16 mono stream. Used by
4368
+ * ``sendPacedFirstMessageBytes`` to translate chunk size into a
4369
+ * playout-duration sleep so we never deliver faster than the carrier
4370
+ * can decode + play out (which manifested as severe crackling on the
4371
+ * HTTP-TTS path with client-side resampling). 16000 samples/sec × 2
4372
+ * bytes/sample = 32 bytes/ms.
4373
+ */
4374
+ static PCM16_16K_BYTES_PER_MS = 32;
4474
4375
  /** Cancel and clear the pending grace timer, if any. */
4475
4376
  clearGraceTimer() {
4476
4377
  if (this.graceTimer !== null) {
@@ -4495,11 +4396,53 @@ var StreamHandler = class _StreamHandler {
4495
4396
  if (this.speakingGeneration === gen) {
4496
4397
  this.isSpeaking = false;
4497
4398
  this.speakingStartedAt = null;
4399
+ this.firstAudioSentAt = null;
4400
+ this.clearPendingBargeIn();
4401
+ void this.resetBargeInStrategies();
4402
+ if (this.suppressedSpeechPending) {
4403
+ this.suppressedSpeechPending = false;
4404
+ this.flushInboundAudioRing();
4405
+ }
4406
+ this.resetVad();
4498
4407
  }
4499
4408
  }, grace);
4500
4409
  } else {
4501
4410
  this.isSpeaking = false;
4502
4411
  this.speakingStartedAt = null;
4412
+ this.firstAudioSentAt = null;
4413
+ this.clearPendingBargeIn();
4414
+ void this.resetBargeInStrategies();
4415
+ if (this.suppressedSpeechPending) {
4416
+ this.suppressedSpeechPending = false;
4417
+ this.flushInboundAudioRing();
4418
+ }
4419
+ this.resetVad();
4420
+ }
4421
+ }
4422
+ async resetBargeInStrategies() {
4423
+ if (this.bargeInStrategies.length === 0) return;
4424
+ const { resetStrategies } = await import("./barge-in-strategies-X6ARMGIQ.mjs");
4425
+ await resetStrategies(this.bargeInStrategies);
4426
+ }
4427
+ /**
4428
+ * Reset the active VAD provider's per-utterance state. No-op when the
4429
+ * provider does not implement the optional ``reset()`` hook. Safe to call
4430
+ * from any context — failures are swallowed and the VAD is disabled for
4431
+ * the rest of the call so a flaky reset can never silently kill barge-in
4432
+ * for every subsequent turn.
4433
+ */
4434
+ resetVad() {
4435
+ const activeVad = this.deps.agent.vad ?? this.autoVad;
4436
+ if (!activeVad || this.vadDisabled) return;
4437
+ try {
4438
+ const ret = activeVad.reset?.();
4439
+ if (ret instanceof Promise) {
4440
+ ret.catch((err) => {
4441
+ getLogger().debug(`VAD reset threw: ${String(err)}`);
4442
+ });
4443
+ }
4444
+ } catch (err) {
4445
+ getLogger().debug(`VAD reset threw: ${String(err)}`);
4503
4446
  }
4504
4447
  }
4505
4448
  /**
@@ -4509,7 +4452,8 @@ var StreamHandler = class _StreamHandler {
4509
4452
  */
4510
4453
  canBargeIn() {
4511
4454
  if (this.speakingStartedAt === null) return true;
4512
- const elapsed = Date.now() - this.speakingStartedAt;
4455
+ if (this.firstAudioSentAt === null) return false;
4456
+ const elapsed = Date.now() - this.firstAudioSentAt;
4513
4457
  const gate = this.aec ? _StreamHandler.MIN_AGENT_SPEAKING_MS_BEFORE_BARGE_IN_AEC : _StreamHandler.MIN_AGENT_SPEAKING_MS_BEFORE_BARGE_IN_NO_AEC;
4514
4458
  return elapsed >= gate;
4515
4459
  }
@@ -4613,6 +4557,9 @@ var StreamHandler = class _StreamHandler {
4613
4557
  this.ws = ws;
4614
4558
  this.caller = caller;
4615
4559
  this.callee = callee;
4560
+ this.bargeInStrategies = (deps.agent.bargeInStrategies ?? []).slice();
4561
+ const confirmMs = deps.agent.bargeInConfirmMs;
4562
+ this.bargeInConfirmMs = typeof confirmMs === "number" && Number.isFinite(confirmMs) && confirmMs > 0 ? confirmMs : 1500;
4616
4563
  this.history = createHistoryManager(200);
4617
4564
  const sttKey = deps.agent.stt?.constructor?.providerKey;
4618
4565
  const sttProviderName = deps.agent.stt ? sttKey ?? deps.agent.stt.constructor?.name ?? "custom" : void 0;
@@ -4860,7 +4807,7 @@ var StreamHandler = class _StreamHandler {
4860
4807
  if (activeVad && !this.vadDisabled) {
4861
4808
  try {
4862
4809
  const vadPromise = activeVad.processFrame(pcm16k, 16e3);
4863
- const timeoutPromise = new Promise((resolve) => setTimeout(() => resolve(null), 25));
4810
+ const timeoutPromise = new Promise((resolve2) => setTimeout(() => resolve2(null), 25));
4864
4811
  const evt = await Promise.race([vadPromise, timeoutPromise]);
4865
4812
  if (evt) {
4866
4813
  getLogger().info(
@@ -4868,11 +4815,18 @@ var StreamHandler = class _StreamHandler {
4868
4815
  );
4869
4816
  }
4870
4817
  if (evt?.type === "speech_start") {
4871
- if (this.isSpeaking && !this.canBargeIn()) {
4818
+ const phantomSuppressed = this.isSpeaking && !this.canBargeIn();
4819
+ if (phantomSuppressed) {
4872
4820
  getLogger().info(
4873
4821
  `[VAD] speech_start suppressed (agent speaking < gate, aec=${this.aec ? "on" : "off"})`
4874
4822
  );
4823
+ this.suppressedSpeechPending = true;
4875
4824
  } else if (this.isSpeaking) {
4825
+ if (this.bargeInStrategies.length > 0) {
4826
+ this.startPendingBargeIn();
4827
+ this.metricsAcc.anchorUserSpeechStart();
4828
+ return;
4829
+ }
4876
4830
  getLogger().info("[VAD] speech_start during TTS \u2192 BARGE-IN");
4877
4831
  this.metricsAcc.recordOverlapStart();
4878
4832
  this.metricsAcc.recordBargeinDetected();
@@ -4895,7 +4849,9 @@ var StreamHandler = class _StreamHandler {
4895
4849
  }
4896
4850
  }
4897
4851
  }
4898
- this.metricsAcc.startTurnIfIdle();
4852
+ if (!phantomSuppressed) {
4853
+ this.metricsAcc.anchorUserSpeechStart();
4854
+ }
4899
4855
  } else if (evt?.type === "speech_end") {
4900
4856
  this.metricsAcc.recordVadStop();
4901
4857
  try {
@@ -4972,13 +4928,36 @@ var StreamHandler = class _StreamHandler {
4972
4928
  */
4973
4929
  /** Handle a Twilio Media Streams `mark` event acknowledging audio playback boundaries. */
4974
4930
  async onMark(markName) {
4975
- if (markName) {
4976
- this.lastConfirmedMark = markName;
4931
+ if (!markName) return;
4932
+ const idx = this.pendingMarks.findIndex((m) => m.name === markName);
4933
+ if (idx < 0) return;
4934
+ this.lastConfirmedMark = markName;
4935
+ const resolved = this.pendingMarks.splice(0, idx + 1);
4936
+ for (const entry of resolved) {
4937
+ try {
4938
+ entry.resolve();
4939
+ } catch {
4940
+ }
4977
4941
  }
4978
4942
  }
4979
4943
  /** Handle call stop / stream end. */
4980
4944
  /** Handle a carrier-emitted `stop` event signalling the call has ended. */
4981
4945
  async handleStop() {
4946
+ if (this.llmAbort !== null) {
4947
+ try {
4948
+ this.llmAbort.abort();
4949
+ } catch {
4950
+ }
4951
+ }
4952
+ const ttsCancelable = this.tts;
4953
+ if (typeof ttsCancelable?.cancelActiveStream === "function") {
4954
+ try {
4955
+ ttsCancelable.cancelActiveStream();
4956
+ } catch {
4957
+ }
4958
+ }
4959
+ this.clearPendingBargeIn();
4960
+ this.drainPendingMarks();
4982
4961
  this.clearGraceTimer();
4983
4962
  this.flushResamplers();
4984
4963
  await this.closeSttOnce();
@@ -4991,6 +4970,21 @@ var StreamHandler = class _StreamHandler {
4991
4970
  /** Handle WebSocket close event. */
4992
4971
  /** Tear down adapter, STT/TTS, and per-call state when the carrier WebSocket closes. */
4993
4972
  async handleWsClose() {
4973
+ if (this.llmAbort !== null) {
4974
+ try {
4975
+ this.llmAbort.abort();
4976
+ } catch {
4977
+ }
4978
+ }
4979
+ const ttsCancelable = this.tts;
4980
+ if (typeof ttsCancelable?.cancelActiveStream === "function") {
4981
+ try {
4982
+ ttsCancelable.cancelActiveStream();
4983
+ } catch {
4984
+ }
4985
+ }
4986
+ this.clearPendingBargeIn();
4987
+ this.drainPendingMarks();
4994
4988
  this.clearGraceTimer();
4995
4989
  this.flushResamplers();
4996
4990
  await this.closeSttOnce();
@@ -5029,13 +5023,39 @@ var StreamHandler = class _StreamHandler {
5029
5023
  * Maintains a 1-byte carry across calls so unaligned HTTP chunks from
5030
5024
  * streaming TTS providers never byte-swap the PCM16 samples downstream.
5031
5025
  */
5032
- encodePipelineAudio(pcm16k) {
5033
- const aligned = this.alignPcm16(pcm16k);
5026
+ encodePipelineAudio(audioChunk) {
5027
+ if (this.ttsOutputFormatNativeForCarrier === true) {
5028
+ return audioChunk.toString("base64");
5029
+ }
5030
+ const aligned = this.alignPcm16(audioChunk);
5034
5031
  if (aligned.length === 0) return "";
5035
5032
  const pcm8k = this.outboundResampler.process(aligned);
5036
5033
  const mulaw = pcm16ToMulaw(pcm8k);
5037
5034
  return mulaw.toString("base64");
5038
5035
  }
5036
+ /**
5037
+ * Cached result of ``isTtsOutputFormatNativeForCarrier()`` — settled
5038
+ * once at ``initPipeline`` time after ``setTelephonyCarrier`` has run
5039
+ * on the TTS adapter. Stable for the call lifetime: changes to the
5040
+ * adapter's output format mid-call would NOT flip this. ``true`` means
5041
+ * ``encodePipelineAudio`` can take the bypass path.
5042
+ */
5043
+ ttsOutputFormatNativeForCarrier = false;
5044
+ /**
5045
+ * Probe whether the TTS adapter is configured to emit bytes already in
5046
+ * the carrier's wire codec. Currently: Twilio expects ``ulaw_8000``,
5047
+ * Telnyx expects ``pcm_16000`` (no client transcode in either case if
5048
+ * matched). Anything else takes the resample-and-encode path.
5049
+ */
5050
+ isTtsOutputFormatNativeForCarrier() {
5051
+ if (!this.tts) return false;
5052
+ const fmt = this.tts.outputFormat;
5053
+ if (typeof fmt !== "string") return false;
5054
+ const carrier = this.deps.bridge.telephonyProvider;
5055
+ if (carrier === "twilio") return fmt === "ulaw_8000";
5056
+ if (carrier === "telnyx") return fmt === "pcm_16000";
5057
+ return false;
5058
+ }
5039
5059
  /**
5040
5060
  * Prepend any carry byte from the previous chunk, return the even-length
5041
5061
  * portion, and stash the final odd byte (if any) for the next call.
@@ -5046,6 +5066,60 @@ var StreamHandler = class _StreamHandler {
5046
5066
  this.ttsByteCarry = alignedLen < combined.length ? combined.subarray(alignedLen) : null;
5047
5067
  return combined.subarray(0, alignedLen);
5048
5068
  }
5069
+ /**
5070
+ * Stream a cached firstMessage buffer in pacing-friendly chunks.
5071
+ *
5072
+ * Splits ``prewarmBytes`` into 20 ms slices (matching Twilio's PSTN
5073
+ * frame quantum) and
5074
+ * forwards each through ``deps.bridge.sendAudio`` exactly like the
5075
+ * live TTS path does — preserving Twilio mark/clear granularity. A
5076
+ * single multi-second sendAudio call would push the whole intro into
5077
+ * the carrier in one go and a ``sendClear`` issued mid-buffer would
5078
+ * have nothing to clear ("agent keeps talking after barge-in" UX bug
5079
+ * on the very first turn).
5080
+ *
5081
+ * Returns ``true`` when at least one chunk hit the wire — the caller
5082
+ * uses that to decide whether to record TTS-first-byte / turn-complete
5083
+ * metrics.
5084
+ */
5085
+ async streamPrewarmBytes(prewarmBytes) {
5086
+ return this.sendPacedFirstMessageBytes(prewarmBytes);
5087
+ }
5088
+ /**
5089
+ * Iterate ``bytes`` in 20 ms slices (Twilio PSTN frame quantum) and
5090
+ * forward each via ``deps.bridge.sendAudio`` with mark-gated pacing
5091
+ * (Twilio) or playout-time-based pacing (Telnyx). Caps the carrier-
5092
+ * side buffer at ``FIRST_MESSAGE_MARK_WINDOW`` chunks so a barge-in's
5093
+ * ``sendClear`` has ~120 ms (Twilio) or zero (Telnyx, immediately
5094
+ * after the latest sleep) of audio to flush.
5095
+ *
5096
+ * Bails immediately when ``isSpeaking`` flips to false — both via the
5097
+ * loop's pre-iter check and via ``drainPendingMarks`` (called from
5098
+ * ``cancelSpeaking``) which unblocks any in-flight ``waitForMarkWindow``.
5099
+ *
5100
+ * Returns ``true`` when at least one chunk hit the wire — the caller
5101
+ * uses that to decide whether to record TTS-first-byte / turn-complete
5102
+ * metrics. See BUG #128 for the regression this fix targets.
5103
+ */
5104
+ async sendPacedFirstMessageBytes(bytes) {
5105
+ if (this.pendingMarks.length > 0) this.drainPendingMarks();
5106
+ let firstChunkSent = false;
5107
+ const PSTN_FRAME_MS = 20;
5108
+ const bytesPerMs = this.ttsOutputFormatNativeForCarrier ? 8 : _StreamHandler.PCM16_16K_BYTES_PER_MS;
5109
+ const sliceBytes = bytesPerMs * PSTN_FRAME_MS;
5110
+ for (let i = 0; i < bytes.length; i += sliceBytes) {
5111
+ if (!this.isSpeaking) break;
5112
+ const chunk = bytes.subarray(i, i + sliceBytes);
5113
+ if (!firstChunkSent) firstChunkSent = true;
5114
+ if (this.aec && !this.ttsOutputFormatNativeForCarrier) {
5115
+ this.aec.pushFarEnd(chunk);
5116
+ }
5117
+ const encoded = this.encodePipelineAudio(chunk);
5118
+ this.deps.bridge.sendAudio(this.ws, encoded, this.streamSid);
5119
+ this.markFirstAudioSent();
5120
+ }
5121
+ return firstChunkSent;
5122
+ }
5049
5123
  // ---------------------------------------------------------------------------
5050
5124
  // Private: Pipeline mode
5051
5125
  // ---------------------------------------------------------------------------
@@ -5062,6 +5136,12 @@ var StreamHandler = class _StreamHandler {
5062
5136
  getLogger().debug(`TTS setTelephonyCarrier failed (${label}): ${String(e)}`);
5063
5137
  }
5064
5138
  }
5139
+ this.ttsOutputFormatNativeForCarrier = this.isTtsOutputFormatNativeForCarrier();
5140
+ if (this.ttsOutputFormatNativeForCarrier) {
5141
+ getLogger().debug(
5142
+ `TTS outputFormat matches ${this.deps.bridge.telephonyProvider} wire codec \u2014 bypassing client-side transcode`
5143
+ );
5144
+ }
5065
5145
  }
5066
5146
  if (!this.stt) {
5067
5147
  getLogger().debug(`Pipeline mode (${label}): no STT configured`);
@@ -5071,7 +5151,7 @@ var StreamHandler = class _StreamHandler {
5071
5151
  }
5072
5152
  if (!this.deps.agent.vad) {
5073
5153
  try {
5074
- const { SileroVAD } = await import("./silero-vad-YLCXT5GQ.mjs");
5154
+ const { SileroVAD } = await import("./silero-vad-LNDFGIY7.mjs");
5075
5155
  this.autoVad = await SileroVAD.forPhoneCall();
5076
5156
  getLogger().info(
5077
5157
  `auto-VAD enabled (SileroVAD, phone preset). Pass agent.vad=\u2026 to override.`
@@ -5108,35 +5188,106 @@ var StreamHandler = class _StreamHandler {
5108
5188
  );
5109
5189
  }
5110
5190
  }
5111
- try {
5112
- if (this.stt) await this.stt.connect();
5113
- getLogger().debug(`Pipeline mode (${label}): STT + TTS connected`);
5114
- } catch (e) {
5115
- getLogger().error(`Pipeline connect FAILED (${label}):`, e);
5191
+ let parked;
5192
+ if (this.deps.popPrewarmedConnections) {
5116
5193
  try {
5117
- await this.deps.bridge.endCall(this.callId, this.ws);
5118
- } catch {
5194
+ parked = this.deps.popPrewarmedConnections(this.callId);
5195
+ } catch (err) {
5196
+ getLogger().debug(`popPrewarmedConnections raised: ${String(err)}`);
5197
+ }
5198
+ }
5199
+ const parkedTts = parked?.tts;
5200
+ if (parkedTts && this.tts) {
5201
+ const ttsAny = this.tts;
5202
+ if (typeof ttsAny.adoptWebSocket === "function" && parkedTts.ws.readyState === 1) {
5203
+ try {
5204
+ ttsAny.adoptWebSocket(parkedTts);
5205
+ getLogger().info(`[CONNECT] callId=${this.callId} provider=tts source=adopted ms=0`);
5206
+ } catch (err) {
5207
+ getLogger().debug(`TTS adoptWebSocket failed: ${String(err)}; falling back`);
5208
+ try {
5209
+ parkedTts.ws.close();
5210
+ } catch {
5211
+ }
5212
+ }
5213
+ } else {
5214
+ try {
5215
+ parkedTts.ws.close();
5216
+ } catch {
5217
+ }
5218
+ }
5219
+ }
5220
+ let sttConnectPromise = null;
5221
+ if (this.stt) {
5222
+ const sttAny = this.stt;
5223
+ const sttStarted = Date.now();
5224
+ if (parked?.stt && typeof sttAny.adoptWebSocket === "function" && parked.stt.readyState === 1) {
5225
+ try {
5226
+ sttAny.adoptWebSocket(parked.stt);
5227
+ getLogger().info(
5228
+ `[CONNECT] callId=${this.callId} provider=stt source=adopted ms=${Date.now() - sttStarted}`
5229
+ );
5230
+ sttConnectPromise = Promise.resolve();
5231
+ } catch (err) {
5232
+ getLogger().debug(`STT adoptWebSocket failed: ${String(err)}; falling back`);
5233
+ try {
5234
+ parked.stt.close();
5235
+ } catch {
5236
+ }
5237
+ sttConnectPromise = (async () => {
5238
+ await this.stt.connect();
5239
+ getLogger().info(
5240
+ `[CONNECT] callId=${this.callId} provider=stt source=fresh ms=${Date.now() - sttStarted}`
5241
+ );
5242
+ })();
5243
+ }
5244
+ } else {
5245
+ if (parked?.stt) {
5246
+ try {
5247
+ parked.stt.close();
5248
+ } catch {
5249
+ }
5250
+ }
5251
+ sttConnectPromise = (async () => {
5252
+ await this.stt.connect();
5253
+ getLogger().info(
5254
+ `[CONNECT] callId=${this.callId} provider=stt source=fresh ms=${Date.now() - sttStarted}`
5255
+ );
5256
+ })();
5119
5257
  }
5120
- return;
5121
5258
  }
5259
+ getLogger().debug(`Pipeline mode (${label}): STT connect kicked off`);
5122
5260
  if (this.deps.agent.firstMessage && !this.deps.onMessage && this.tts) {
5123
5261
  this.metricsAcc.startTurn();
5124
- await this.beginSpeaking();
5262
+ await this.beginSpeaking(true);
5125
5263
  let firstChunkSent = false;
5126
5264
  this.resetTtsCarry();
5265
+ let prewarmBytes;
5266
+ if (this.deps.popPrewarmAudio) {
5267
+ try {
5268
+ prewarmBytes = this.deps.popPrewarmAudio(this.callId);
5269
+ } catch (err) {
5270
+ getLogger().debug(`popPrewarmAudio raised: ${String(err)}`);
5271
+ }
5272
+ }
5127
5273
  try {
5128
- for await (const chunk of this.tts.synthesizeStream(this.deps.agent.firstMessage)) {
5129
- if (!this.isSpeaking) break;
5130
- if (!firstChunkSent) {
5131
- firstChunkSent = true;
5132
- this.metricsAcc.recordTtsFirstByte();
5133
- await this.emitAudioOut();
5134
- }
5135
- if (this.aec) {
5136
- this.aec.pushFarEnd(chunk);
5274
+ if (prewarmBytes) {
5275
+ this.metricsAcc.recordTtsFirstByte();
5276
+ await this.emitAudioOut();
5277
+ firstChunkSent = await this.streamPrewarmBytes(prewarmBytes);
5278
+ } else {
5279
+ for await (const chunk of this.tts.synthesizeStream(this.deps.agent.firstMessage)) {
5280
+ if (!this.isSpeaking) break;
5281
+ if (!firstChunkSent) {
5282
+ firstChunkSent = true;
5283
+ this.metricsAcc.recordTtsFirstByte();
5284
+ await this.emitAudioOut();
5285
+ }
5286
+ if (this.aec) this.aec.pushFarEnd(chunk);
5287
+ const encoded = this.encodePipelineAudio(chunk);
5288
+ this.deps.bridge.sendAudio(this.ws, encoded, this.streamSid);
5289
+ this.markFirstAudioSent();
5137
5290
  }
5138
- const encoded = this.encodePipelineAudio(chunk);
5139
- this.deps.bridge.sendAudio(this.ws, encoded, this.streamSid);
5140
5291
  }
5141
5292
  } catch (e) {
5142
5293
  getLogger().error(`First message TTS error (${label}):`, e);
@@ -5145,6 +5296,7 @@ var StreamHandler = class _StreamHandler {
5145
5296
  this.endSpeakingWithGrace();
5146
5297
  }
5147
5298
  if (firstChunkSent) {
5299
+ this.metricsAcc.recordTtsComplete(this.deps.agent.firstMessage);
5148
5300
  await this.emitTurnMetrics(this.metricsAcc.recordTurnComplete(this.deps.agent.firstMessage));
5149
5301
  this.history.push({ role: "assistant", text: this.deps.agent.firstMessage, timestamp: Date.now() });
5150
5302
  }
@@ -5186,6 +5338,18 @@ var StreamHandler = class _StreamHandler {
5186
5338
  getLogger().debug(`Built-in LLM loop active (pipeline, ${label})`);
5187
5339
  }
5188
5340
  if (this.stt) {
5341
+ if (sttConnectPromise) {
5342
+ try {
5343
+ await sttConnectPromise;
5344
+ } catch (e) {
5345
+ getLogger().error(`STT connect FAILED (${label}):`, e);
5346
+ try {
5347
+ await this.deps.bridge.endCall(this.callId, this.ws);
5348
+ } catch {
5349
+ }
5350
+ return;
5351
+ }
5352
+ }
5189
5353
  this.stt.onTranscript(async (transcript) => {
5190
5354
  await this.handleTranscript(transcript);
5191
5355
  });
@@ -5229,6 +5393,7 @@ var StreamHandler = class _StreamHandler {
5229
5393
  }
5230
5394
  const encoded = this.encodePipelineAudio(processedAudio);
5231
5395
  this.deps.bridge.sendAudio(this.ws, encoded, this.streamSid);
5396
+ this.markFirstAudioSent();
5232
5397
  }
5233
5398
  } catch (e) {
5234
5399
  getLogger().error(`TTS streaming error (${this.deps.bridge.label}):`, e);
@@ -5262,7 +5427,10 @@ var StreamHandler = class _StreamHandler {
5262
5427
  this.metricsAcc.recordVadStop();
5263
5428
  }
5264
5429
  if (!transcript.isFinal || !transcript.text) return;
5265
- if (!this.commitTranscript(transcript.text)) return;
5430
+ if (!this.commitTranscript(transcript.text)) {
5431
+ this.metricsAcc.anchorUserSpeechStart();
5432
+ return;
5433
+ }
5266
5434
  const label = this.deps.bridge.label;
5267
5435
  getLogger().info(
5268
5436
  `[DIAG] processTranscript COMMITTED \u2192 LLM (${label} pipeline): ${sanitizeLogValue(transcript.text.slice(0, 80))}`
@@ -5342,6 +5510,9 @@ var StreamHandler = class _StreamHandler {
5342
5510
  } else if (this.llmLoop) {
5343
5511
  responseText = await this.runPipelineLlm(filteredTranscript, hookExecutor, hookCtx);
5344
5512
  } else {
5513
+ getLogger().warn(
5514
+ `Pipeline (${label}) has no llm/onMessage handler \u2014 transcript "${sanitizeLogValue(filteredTranscript.slice(0, 60))}" dropped. Check that agent.llm or onMessage is configured.`
5515
+ );
5345
5516
  return;
5346
5517
  }
5347
5518
  if (!responseText) return;
@@ -5362,7 +5533,7 @@ var StreamHandler = class _StreamHandler {
5362
5533
  * record the interruption, and return ``true`` so the caller skips the
5363
5534
  * turn-complete record.
5364
5535
  */
5365
- handleBargeIn(transcript) {
5536
+ async handleBargeInAsync(transcript) {
5366
5537
  if (!transcript.text || !this.isSpeaking) return false;
5367
5538
  if (!this.canBargeIn()) {
5368
5539
  getLogger().info(
@@ -5370,10 +5541,67 @@ var StreamHandler = class _StreamHandler {
5370
5541
  );
5371
5542
  return false;
5372
5543
  }
5544
+ if (this.bargeInStrategies.length > 0) {
5545
+ const { evaluateStrategies } = await import("./barge-in-strategies-X6ARMGIQ.mjs");
5546
+ const confirmed = await evaluateStrategies(this.bargeInStrategies, {
5547
+ transcript: transcript.text,
5548
+ isInterim: transcript.isFinal === false,
5549
+ agentSpeaking: this.isSpeaking
5550
+ });
5551
+ if (!confirmed) {
5552
+ getLogger().debug(
5553
+ `Barge-in NOT confirmed by any strategy (${sanitizeLogValue(
5554
+ transcript.text.slice(0, 40)
5555
+ )}); agent continues talking`
5556
+ );
5557
+ return false;
5558
+ }
5559
+ getLogger().info(
5560
+ `Barge-in confirmed by strategy on transcript ${sanitizeLogValue(
5561
+ transcript.text.slice(0, 40)
5562
+ )}`
5563
+ );
5564
+ }
5565
+ this.runBargeInCancel(transcript.text);
5566
+ return true;
5567
+ }
5568
+ /**
5569
+ * Synchronous wrapper that callers in legacy code paths can keep using.
5570
+ * When ``bargeInStrategies`` is empty the work is fully synchronous and
5571
+ * the result is correct. With strategies the call is dispatched as a
5572
+ * floating promise — non-confirmed transcripts simply skip the cancel
5573
+ * and the legacy boolean return is meaningless under that opt-in path.
5574
+ */
5575
+ handleBargeIn(transcript) {
5576
+ if (!transcript.text || !this.isSpeaking) return false;
5577
+ if (this.bargeInStrategies.length === 0) {
5578
+ if (!this.canBargeIn()) {
5579
+ getLogger().info(
5580
+ `Barge-in transcript suppressed (agent speaking < gate, aec=${this.aec ? "on" : "off"})`
5581
+ );
5582
+ return false;
5583
+ }
5584
+ this.runBargeInCancel(transcript.text);
5585
+ return true;
5586
+ }
5587
+ void this.handleBargeInAsync(transcript).catch(
5588
+ (err) => getLogger().debug(`handleBargeInAsync threw: ${String(err)}`)
5589
+ );
5590
+ return false;
5591
+ }
5592
+ /**
5593
+ * Run the cancel/flush sequence for a confirmed barge-in. Shared by
5594
+ * the legacy synchronous path and the strategy-confirmed async path.
5595
+ */
5596
+ runBargeInCancel(transcriptText) {
5597
+ const hadPending = this.bargeInPendingSince !== null;
5598
+ this.clearPendingBargeIn();
5373
5599
  getLogger().debug(
5374
- `Barge-in: caller spoke over agent (${sanitizeLogValue(transcript.text.slice(0, 40))})`
5600
+ `Barge-in: caller spoke over agent (${sanitizeLogValue(transcriptText.slice(0, 40))})`
5375
5601
  );
5376
- this.metricsAcc.recordOverlapStart();
5602
+ if (!hadPending) {
5603
+ this.metricsAcc.recordOverlapStart();
5604
+ }
5377
5605
  this.metricsAcc.recordBargeinDetected();
5378
5606
  const bargeinSpan = startSpan(SPAN_BARGEIN, { "patter.call.id": this.callId });
5379
5607
  try {
@@ -5385,6 +5613,7 @@ var StreamHandler = class _StreamHandler {
5385
5613
  }
5386
5614
  this.metricsAcc.recordTtsStopped();
5387
5615
  this.metricsAcc.recordTurnInterrupted();
5616
+ this.metricsAcc.anchorUserSpeechStart();
5388
5617
  this.metricsAcc.recordOverlapEnd(true);
5389
5618
  } finally {
5390
5619
  try {
@@ -5392,7 +5621,34 @@ var StreamHandler = class _StreamHandler {
5392
5621
  } catch {
5393
5622
  }
5394
5623
  }
5395
- return true;
5624
+ }
5625
+ /** Mark a VAD-detected barge-in as pending (no cancel yet). */
5626
+ startPendingBargeIn() {
5627
+ if (this.bargeInPendingSince !== null) return;
5628
+ this.bargeInPendingSince = Date.now();
5629
+ this.metricsAcc.recordOverlapStart();
5630
+ getLogger().info(
5631
+ "Barge-in PENDING (VAD speech_start during TTS); awaiting strategy confirmation"
5632
+ );
5633
+ this.bargeInPendingTimer = setTimeout(() => {
5634
+ if (this.bargeInPendingSince === null) return;
5635
+ getLogger().info(
5636
+ `Pending barge-in timed out after ${this.bargeInConfirmMs}ms; agent resumes (no strategy confirmed)`
5637
+ );
5638
+ this.metricsAcc.recordOverlapEnd(false);
5639
+ this.metricsAcc.anchorUserSpeechStart();
5640
+ this.bargeInPendingSince = null;
5641
+ this.bargeInPendingTimer = null;
5642
+ }, this.bargeInConfirmMs);
5643
+ }
5644
+ /** Drop pending state without cancelling — used on confirm and on
5645
+ * agent stop. Idempotent. */
5646
+ clearPendingBargeIn() {
5647
+ if (this.bargeInPendingTimer !== null) {
5648
+ clearTimeout(this.bargeInPendingTimer);
5649
+ this.bargeInPendingTimer = null;
5650
+ }
5651
+ this.bargeInPendingSince = null;
5396
5652
  }
5397
5653
  /**
5398
5654
  * Dedup + throttle + hallucination filter for final STT transcripts.
@@ -5567,6 +5823,7 @@ var StreamHandler = class _StreamHandler {
5567
5823
  }
5568
5824
  const encoded = this.encodePipelineAudio(audioChunk);
5569
5825
  this.deps.bridge.sendAudio(this.ws, encoded, this.streamSid);
5826
+ this.markFirstAudioSent();
5570
5827
  }
5571
5828
  }
5572
5829
  }
@@ -5587,16 +5844,49 @@ var StreamHandler = class _StreamHandler {
5587
5844
  async initRealtimeAdapter(resolvedPrompt) {
5588
5845
  const label = this.deps.bridge.label;
5589
5846
  this.adapter = this.deps.buildAIAdapter(resolvedPrompt);
5590
- try {
5591
- await this.adapter.connect();
5592
- getLogger().debug(`AI adapter connected (${label})`);
5593
- } catch (e) {
5594
- getLogger().error(`AI adapter connect FAILED (${label}):`, e);
5847
+ let parked;
5848
+ if (typeof this.deps.popPrewarmedConnections === "function") {
5595
5849
  try {
5596
- await this.deps.bridge.endCall(this.callId, this.ws);
5597
- } catch {
5850
+ parked = this.deps.popPrewarmedConnections(this.callId);
5851
+ } catch (err) {
5852
+ getLogger().debug(`popPrewarmedConnections raised: ${String(err)}`);
5853
+ }
5854
+ }
5855
+ const parkedRealtimeWs = parked?.openaiRealtime;
5856
+ let adoptOk = false;
5857
+ if (parkedRealtimeWs !== void 0) {
5858
+ const adapterAny = this.adapter;
5859
+ const wsAlive = parkedRealtimeWs.readyState === 1;
5860
+ if (typeof adapterAny?.adoptWebSocket === "function" && wsAlive) {
5861
+ try {
5862
+ adapterAny.adoptWebSocket(parkedRealtimeWs);
5863
+ getLogger().info(
5864
+ `[CONNECT] callId=${this.callId} provider=openai_realtime source=adopted ms=0`
5865
+ );
5866
+ adoptOk = true;
5867
+ } catch (err) {
5868
+ getLogger().debug(`Realtime adoptWebSocket failed: ${String(err)}; falling back`);
5869
+ }
5870
+ }
5871
+ if (!adoptOk) {
5872
+ try {
5873
+ parkedRealtimeWs.close();
5874
+ } catch {
5875
+ }
5876
+ }
5877
+ }
5878
+ if (!adoptOk) {
5879
+ try {
5880
+ await this.adapter.connect();
5881
+ getLogger().debug(`AI adapter connected (${label})`);
5882
+ } catch (e) {
5883
+ getLogger().error(`AI adapter connect FAILED (${label}):`, e);
5884
+ try {
5885
+ await this.deps.bridge.endCall(this.callId, this.ws);
5886
+ } catch {
5887
+ }
5888
+ return;
5598
5889
  }
5599
- return;
5600
5890
  }
5601
5891
  if (this.deps.agent.firstMessage) {
5602
5892
  this.metricsAcc.startTurn();
@@ -5704,6 +5994,7 @@ var StreamHandler = class _StreamHandler {
5704
5994
  }
5705
5995
  const outAudio = eventData;
5706
5996
  this.deps.bridge.sendAudio(this.ws, outAudio.toString("base64"), this.streamSid);
5997
+ this.markFirstAudioSent();
5707
5998
  this.chunkCount++;
5708
5999
  this.deps.bridge.sendMark(this.ws, `audio_${this.chunkCount}`, this.streamSid);
5709
6000
  }
@@ -5715,8 +6006,21 @@ var StreamHandler = class _StreamHandler {
5715
6006
  await this.emitUserSpeechEnded();
5716
6007
  }
5717
6008
  async onAdapterTranscriptInput(inputText) {
6009
+ const stripped = inputText.trim().toLowerCase();
6010
+ if (HALLUCINATIONS.has(stripped) || stripped === "") {
6011
+ getLogger().debug(
6012
+ `Realtime transcript_input dropped (likely Whisper hallucination on silence/echo): ${sanitizeLogValue(inputText.slice(0, 60))}`
6013
+ );
6014
+ this.userTranscriptPending = false;
6015
+ return;
6016
+ }
5718
6017
  getLogger().debug(`User (${this.deps.bridge.label}): ${sanitizeLogValue(inputText)}`);
5719
6018
  this.history.push({ role: "user", text: inputText, timestamp: Date.now() });
6019
+ if (this.adapter instanceof OpenAIRealtimeAdapter) {
6020
+ void this.adapter.requestResponse().catch(
6021
+ (err) => getLogger().debug(`Realtime requestResponse failed: ${String(err)}`)
6022
+ );
6023
+ }
5720
6024
  if (!this.metricsAcc.turnActive) {
5721
6025
  this.metricsAcc.startTurn();
5722
6026
  this.currentAgentText = "";
@@ -5868,6 +6172,18 @@ var StreamHandler = class _StreamHandler {
5868
6172
  await this.flushAssistantTurn(text);
5869
6173
  }
5870
6174
  async onAdapterSpeechInterrupt() {
6175
+ if (this.adapter instanceof OpenAIRealtimeAdapter) {
6176
+ const startedAt = this.adapter.currentResponseFirstAudioAt;
6177
+ if (startedAt !== null) {
6178
+ const elapsedMs = Date.now() - startedAt;
6179
+ if (elapsedMs < _StreamHandler.MIN_AGENT_SPEAKING_MS_BEFORE_BARGE_IN_NO_AEC) {
6180
+ getLogger().info(
6181
+ `Realtime barge-in suppressed (response < gate, ${elapsedMs}ms)`
6182
+ );
6183
+ return;
6184
+ }
6185
+ }
6186
+ }
5871
6187
  this.deps.bridge.sendClear(this.ws, this.streamSid);
5872
6188
  if (this.adapter instanceof OpenAIRealtimeAdapter) this.adapter.cancelResponse();
5873
6189
  this.metricsAcc.recordTurnInterrupted();
@@ -6050,9 +6366,10 @@ var StreamHandler = class _StreamHandler {
6050
6366
  metrics: finalMetrics
6051
6367
  };
6052
6368
  const cost = finalMetrics.cost?.total ?? 0;
6053
- const latencyP95 = finalMetrics.latency_p95?.total_ms ?? 0;
6369
+ const p95Obj = finalMetrics.latency_p95;
6370
+ const latencyP95 = p95Obj?.agent_response_ms ?? p95Obj?.total_ms ?? 0;
6054
6371
  getLogger().info(
6055
- `Call ended: ${this.callId} (${finalMetrics.duration_seconds.toFixed(1)}s, ${finalMetrics.turns.length} turns, cost=$${cost.toFixed(4)}, p95=${Math.round(latencyP95)}ms)`
6372
+ `Call ended: ${this.callId} (${finalMetrics.duration_seconds.toFixed(1)}s, ${finalMetrics.turns.length} turns, cost=$${cost.toFixed(4)}, p95 wait=${Math.round(latencyP95)}ms)`
6056
6373
  );
6057
6374
  this.deps.metricsStore.recordCallEnd(
6058
6375
  callEndData,
@@ -6102,31 +6419,31 @@ async function queryDeepgramCost(metricsAcc, deepgramKey, deepgramRequestId) {
6102
6419
  // src/services/call-log.ts
6103
6420
  init_esm_shims();
6104
6421
  import * as crypto3 from "crypto";
6105
- import * as fs2 from "fs";
6422
+ import * as fs3 from "fs";
6106
6423
  import { promises as fsp } from "fs";
6107
6424
  import * as os from "os";
6108
- import * as path2 from "path";
6425
+ import * as path3 from "path";
6109
6426
  var SCHEMA_VERSION = "1.0";
6110
6427
  var DEFAULT_RETENTION_DAYS = 30;
6111
6428
  function xdgDataHome() {
6112
- return process.env.XDG_DATA_HOME || path2.join(os.homedir(), ".local", "share");
6429
+ return process.env.XDG_DATA_HOME || path3.join(os.homedir(), ".local", "share");
6113
6430
  }
6114
6431
  function platformDefaultRoot() {
6115
6432
  if (process.platform === "darwin") {
6116
- return path2.join(os.homedir(), "Library", "Application Support", "patter");
6433
+ return path3.join(os.homedir(), "Library", "Application Support", "patter");
6117
6434
  }
6118
6435
  if (process.platform === "win32") {
6119
6436
  const localAppData = process.env.LOCALAPPDATA;
6120
- if (localAppData) return path2.join(localAppData, "patter");
6121
- return path2.join(os.homedir(), "AppData", "Local", "patter");
6437
+ if (localAppData) return path3.join(localAppData, "patter");
6438
+ return path3.join(os.homedir(), "AppData", "Local", "patter");
6122
6439
  }
6123
- return path2.join(xdgDataHome(), "patter");
6440
+ return path3.join(xdgDataHome(), "patter");
6124
6441
  }
6125
6442
  function resolveLogRoot(explicit) {
6126
6443
  const value = explicit ?? process.env.PATTER_LOG_DIR;
6127
6444
  if (!value) return null;
6128
6445
  if (value.trim().toLowerCase() === "auto") return platformDefaultRoot();
6129
- if (value.startsWith("~")) return path2.join(os.homedir(), value.slice(1));
6446
+ if (value.startsWith("~")) return path3.join(os.homedir(), value.slice(1));
6130
6447
  return value;
6131
6448
  }
6132
6449
  function retentionDays() {
@@ -6137,9 +6454,9 @@ function retentionDays() {
6137
6454
  return Math.max(0, parsed);
6138
6455
  }
6139
6456
  function redactMode() {
6140
- const raw = (process.env.PATTER_LOG_REDACT_PHONE || "mask").trim().toLowerCase();
6457
+ const raw = (process.env.PATTER_LOG_REDACT_PHONE || "full").trim().toLowerCase();
6141
6458
  if (raw === "full" || raw === "mask" || raw === "hash_only") return raw;
6142
- return "mask";
6459
+ return "full";
6143
6460
  }
6144
6461
  function redactPhone(raw) {
6145
6462
  if (!raw) return "";
@@ -6155,9 +6472,9 @@ function utcIso(tsSeconds) {
6155
6472
  return new Date(ms).toISOString();
6156
6473
  }
6157
6474
  async function atomicWriteJson(filePath, payload) {
6158
- const dir = path2.dirname(filePath);
6475
+ const dir = path3.dirname(filePath);
6159
6476
  await fsp.mkdir(dir, { recursive: true });
6160
- const tmp = path2.join(dir, `.tmp.${process.pid}.${crypto3.randomBytes(4).toString("hex")}.json`);
6477
+ const tmp = path3.join(dir, `.tmp.${process.pid}.${crypto3.randomBytes(4).toString("hex")}.json`);
6161
6478
  try {
6162
6479
  const handle = await fsp.open(tmp, "w");
6163
6480
  try {
@@ -6176,7 +6493,7 @@ async function atomicWriteJson(filePath, payload) {
6176
6493
  }
6177
6494
  }
6178
6495
  async function appendJsonl(filePath, record) {
6179
- await fsp.mkdir(path2.dirname(filePath), { recursive: true });
6496
+ await fsp.mkdir(path3.dirname(filePath), { recursive: true });
6180
6497
  await fsp.appendFile(filePath, JSON.stringify(record) + "\n", { encoding: "utf8" });
6181
6498
  }
6182
6499
  var CallLogger = class {
@@ -6186,9 +6503,9 @@ var CallLogger = class {
6186
6503
  this.root = null;
6187
6504
  return;
6188
6505
  }
6189
- const resolved = root.startsWith("~") ? path2.join(os.homedir(), root.slice(1)) : root;
6506
+ const resolved = root.startsWith("~") ? path3.join(os.homedir(), root.slice(1)) : root;
6190
6507
  try {
6191
- fs2.mkdirSync(resolved, { recursive: true });
6508
+ fs3.mkdirSync(resolved, { recursive: true });
6192
6509
  this.root = resolved;
6193
6510
  getLogger().info(`Call logs: ${resolved}`);
6194
6511
  } catch (err) {
@@ -6210,7 +6527,7 @@ var CallLogger = class {
6210
6527
  const month = String(dt.getUTCMonth() + 1).padStart(2, "0");
6211
6528
  const day = String(dt.getUTCDate()).padStart(2, "0");
6212
6529
  const safeId = sanitizeLogValue(callId, 64).replace(/\//g, "_") || "unknown";
6213
- return path2.join(this.root, "calls", year, month, day, safeId);
6530
+ return path3.join(this.root, "calls", year, month, day, safeId);
6214
6531
  }
6215
6532
  /** Write the initial `metadata.json` for a new call. */
6216
6533
  async logCallStart(callId, input = {}) {
@@ -6228,6 +6545,7 @@ var CallLogger = class {
6228
6545
  status: "in_progress",
6229
6546
  caller: redactPhone(input.caller ?? ""),
6230
6547
  callee: redactPhone(input.callee ?? ""),
6548
+ direction: input.direction || "inbound",
6231
6549
  telephony_provider: input.telephonyProvider ?? "",
6232
6550
  provider_mode: input.providerMode ?? "",
6233
6551
  agent: input.agent ?? {},
@@ -6237,7 +6555,7 @@ var CallLogger = class {
6237
6555
  error: null
6238
6556
  };
6239
6557
  try {
6240
- await atomicWriteJson(path2.join(dir, "metadata.json"), metadata);
6558
+ await atomicWriteJson(path3.join(dir, "metadata.json"), metadata);
6241
6559
  } catch (err) {
6242
6560
  getLogger().warn(`call_log write failed (${sanitizeLogValue(callId)}): ${sanitizeLogValue(String(err))}`);
6243
6561
  }
@@ -6256,7 +6574,7 @@ var CallLogger = class {
6256
6574
  ...turn
6257
6575
  };
6258
6576
  try {
6259
- await appendJsonl(path2.join(dir, "transcript.jsonl"), record);
6577
+ await appendJsonl(path3.join(dir, "transcript.jsonl"), record);
6260
6578
  } catch (err) {
6261
6579
  getLogger().warn(
6262
6580
  `call_log turn write failed (${sanitizeLogValue(callId)}): ${sanitizeLogValue(String(err))}`
@@ -6275,7 +6593,7 @@ var CallLogger = class {
6275
6593
  data: payload
6276
6594
  };
6277
6595
  try {
6278
- await appendJsonl(path2.join(dir, "events.jsonl"), record);
6596
+ await appendJsonl(path3.join(dir, "events.jsonl"), record);
6279
6597
  } catch (err) {
6280
6598
  getLogger().warn(
6281
6599
  `call_log event write failed (${sanitizeLogValue(callId)}): ${sanitizeLogValue(String(err))}`
@@ -6287,7 +6605,7 @@ var CallLogger = class {
6287
6605
  if (!this.enabled) return;
6288
6606
  const dir = this.callDir(callId);
6289
6607
  if (dir === null) return;
6290
- const metadataPath = path2.join(dir, "metadata.json");
6608
+ const metadataPath = path3.join(dir, "metadata.json");
6291
6609
  let existing = {};
6292
6610
  try {
6293
6611
  existing = JSON.parse(await fsp.readFile(metadataPath, "utf8"));
@@ -6322,20 +6640,20 @@ var CallLogger = class {
6322
6640
  const days = retentionDays();
6323
6641
  if (days === 0) return;
6324
6642
  const cutoff = Date.now() / 1e3 - days * 86400;
6325
- const callsRoot = path2.join(this.root, "calls");
6326
- if (!fs2.existsSync(callsRoot)) return;
6643
+ const callsRoot = path3.join(this.root, "calls");
6644
+ if (!fs3.existsSync(callsRoot)) return;
6327
6645
  try {
6328
- for (const yearName of fs2.readdirSync(callsRoot)) {
6646
+ for (const yearName of fs3.readdirSync(callsRoot)) {
6329
6647
  if (!/^\d+$/.test(yearName)) continue;
6330
- const yearDir = path2.join(callsRoot, yearName);
6331
- if (!fs2.statSync(yearDir).isDirectory()) continue;
6332
- for (const monthName of fs2.readdirSync(yearDir)) {
6648
+ const yearDir = path3.join(callsRoot, yearName);
6649
+ if (!fs3.statSync(yearDir).isDirectory()) continue;
6650
+ for (const monthName of fs3.readdirSync(yearDir)) {
6333
6651
  if (!/^\d+$/.test(monthName)) continue;
6334
- const monthDir = path2.join(yearDir, monthName);
6335
- if (!fs2.statSync(monthDir).isDirectory()) continue;
6336
- for (const dayName of fs2.readdirSync(monthDir)) {
6652
+ const monthDir = path3.join(yearDir, monthName);
6653
+ if (!fs3.statSync(monthDir).isDirectory()) continue;
6654
+ for (const dayName of fs3.readdirSync(monthDir)) {
6337
6655
  if (!/^\d+$/.test(dayName)) continue;
6338
- const dayDir = path2.join(monthDir, dayName);
6656
+ const dayDir = path3.join(monthDir, dayName);
6339
6657
  const y = Number.parseInt(yearName, 10);
6340
6658
  const m = Number.parseInt(monthName, 10);
6341
6659
  const d = Number.parseInt(dayName, 10);
@@ -6345,12 +6663,12 @@ var CallLogger = class {
6345
6663
  }
6346
6664
  }
6347
6665
  try {
6348
- if (fs2.readdirSync(monthDir).length === 0) fs2.rmdirSync(monthDir);
6666
+ if (fs3.readdirSync(monthDir).length === 0) fs3.rmdirSync(monthDir);
6349
6667
  } catch {
6350
6668
  }
6351
6669
  }
6352
6670
  try {
6353
- if (fs2.readdirSync(yearDir).length === 0) fs2.rmdirSync(yearDir);
6671
+ if (fs3.readdirSync(yearDir).length === 0) fs3.rmdirSync(yearDir);
6354
6672
  } catch {
6355
6673
  }
6356
6674
  }
@@ -6361,19 +6679,19 @@ var CallLogger = class {
6361
6679
  };
6362
6680
  function rmTree(target) {
6363
6681
  try {
6364
- for (const child of fs2.readdirSync(target)) {
6365
- const childPath = path2.join(target, child);
6366
- const stat = fs2.lstatSync(childPath);
6682
+ for (const child of fs3.readdirSync(target)) {
6683
+ const childPath = path3.join(target, child);
6684
+ const stat = fs3.lstatSync(childPath);
6367
6685
  if (stat.isDirectory()) {
6368
6686
  rmTree(childPath);
6369
6687
  } else {
6370
6688
  try {
6371
- fs2.unlinkSync(childPath);
6689
+ fs3.unlinkSync(childPath);
6372
6690
  } catch {
6373
6691
  }
6374
6692
  }
6375
6693
  }
6376
- fs2.rmdirSync(target);
6694
+ fs3.rmdirSync(target);
6377
6695
  } catch {
6378
6696
  }
6379
6697
  }
@@ -6554,9 +6872,10 @@ function buildAIAdapter(config, agent, resolvedPrompt) {
6554
6872
  strict: t.strict
6555
6873
  })) ?? [];
6556
6874
  const tools = [...agentTools, TRANSFER_CALL_TOOL, END_CALL_TOOL];
6557
- const openaiKey = engine && engine.kind === "openai_realtime" ? engine.apiKey : config.openaiKey ?? "";
6875
+ const isOpenAIEngine = engine && (engine.kind === "openai_realtime" || engine.kind === "openai_realtime_2");
6876
+ const openaiKey = isOpenAIEngine ? engine.apiKey : config.openaiKey ?? "";
6558
6877
  const adapterOptions = {};
6559
- if (engine && engine.kind === "openai_realtime") {
6878
+ if (isOpenAIEngine) {
6560
6879
  if (engine.reasoningEffort !== void 0) {
6561
6880
  adapterOptions.reasoningEffort = engine.reasoningEffort;
6562
6881
  }
@@ -6564,7 +6883,8 @@ function buildAIAdapter(config, agent, resolvedPrompt) {
6564
6883
  adapterOptions.inputAudioTranscriptionModel = engine.inputAudioTranscriptionModel;
6565
6884
  }
6566
6885
  }
6567
- return new OpenAIRealtimeAdapter(
6886
+ const AdapterCtor = engine && engine.kind === "openai_realtime_2" ? OpenAIRealtime2Adapter : OpenAIRealtimeAdapter;
6887
+ return new AdapterCtor(
6568
6888
  openaiKey,
6569
6889
  agent.model,
6570
6890
  agent.voice,
@@ -6668,7 +6988,7 @@ var TELNYX_DTMF_ALLOWED = new Set("0123456789*#ABCDabcdwW");
6668
6988
  var TELNYX_DTMF_DURATION_MS = 250;
6669
6989
  async function sleep(ms) {
6670
6990
  if (ms <= 0) return;
6671
- await new Promise((resolve) => setTimeout(resolve, ms));
6991
+ await new Promise((resolve2) => setTimeout(resolve2, ms));
6672
6992
  }
6673
6993
  var TelnyxBridge = class {
6674
6994
  constructor(config) {
@@ -6869,6 +7189,33 @@ var EmbeddedServer = class {
6869
7189
  * across calls.
6870
7190
  */
6871
7191
  onMachineDetection;
7192
+ /**
7193
+ * Pre-warm first-message audio accessor wired by ``Patter.serve()``.
7194
+ * The per-call StreamHandler invokes this with its ``callId`` at the
7195
+ * start of the firstMessage emit; a defined return is sent verbatim
7196
+ * in place of running TTS again. ``undefined`` means "no prewarm
7197
+ * cache for this call — fall back to live synthesis". Default is a
7198
+ * no-op so callers that instantiate ``EmbeddedServer`` directly
7199
+ * (tests) work without further setup.
7200
+ */
7201
+ popPrewarmAudio = () => void 0;
7202
+ /**
7203
+ * Pre-warmed provider WebSocket accessor wired by ``Patter.serve()``.
7204
+ * The per-call StreamHandler invokes this with its ``callId`` at
7205
+ * pipeline init; defined returns hand off pre-opened STT / TTS /
7206
+ * Realtime sockets so the live first turn skips the cold-handshake.
7207
+ * Default is a no-op for direct ``EmbeddedServer`` callers.
7208
+ */
7209
+ popPrewarmedConnections = () => void 0;
7210
+ /**
7211
+ * Prewarm waste recorder wired by ``Patter.serve()``. Invoked from
7212
+ * the Twilio status callback (no-answer / busy / failed / canceled)
7213
+ * and the Telnyx call.hangup / AMD-machine handlers so the cache
7214
+ * entry is evicted when the call terminates before the media stream
7215
+ * starts. Default is a no-op so direct ``EmbeddedServer`` callers
7216
+ * (tests) work without further setup. See FIX #91.
7217
+ */
7218
+ recordPrewarmWaste = () => void 0;
6872
7219
  /** Bind HTTP + WebSocket listeners on `port`, mount carrier webhooks and dashboard routes. */
6873
7220
  async start(port = 8e3) {
6874
7221
  const webhookUrlPattern = /^[a-zA-Z0-9][a-zA-Z0-9.\-]+[a-zA-Z0-9]$/;
@@ -6944,6 +7291,13 @@ var EmbeddedServer = class {
6944
7291
  if (!Number.isNaN(parsed)) extra.duration_seconds = parsed;
6945
7292
  this.metricsStore.updateCallStatus(callSid, callStatus, extra);
6946
7293
  }
7294
+ if (callSid && (callStatus === "no-answer" || callStatus === "busy" || callStatus === "failed" || callStatus === "canceled")) {
7295
+ try {
7296
+ this.recordPrewarmWaste(callSid);
7297
+ } catch (err) {
7298
+ getLogger().debug(`recordPrewarmWaste threw: ${String(err)}`);
7299
+ }
7300
+ }
6947
7301
  res.status(204).send();
6948
7302
  });
6949
7303
  app.post("/webhooks/twilio/recording", (req, res) => {
@@ -6999,6 +7353,13 @@ var EmbeddedServer = class {
6999
7353
  getLogger().warn(`onMachineDetection callback threw: ${sanitizeLogValue(String(err))}`);
7000
7354
  }
7001
7355
  }
7356
+ if ((answeredBy === "machine_end_beep" || answeredBy === "machine_end_silence") && callSid) {
7357
+ try {
7358
+ this.recordPrewarmWaste(callSid);
7359
+ } catch (err) {
7360
+ getLogger().debug(`recordPrewarmWaste threw: ${String(err)}`);
7361
+ }
7362
+ }
7002
7363
  if ((answeredBy === "machine_end_beep" || answeredBy === "machine_end_silence") && this.voicemailMessage && this.config.twilioSid && this.config.twilioToken) {
7003
7364
  if (!validateTwilioSid(callSid)) {
7004
7365
  getLogger().warn(`AMD webhook rejected: invalid CallSid ${JSON.stringify(sanitizeLogValue(callSid))}`);
@@ -7119,6 +7480,26 @@ var EmbeddedServer = class {
7119
7480
  }
7120
7481
  if (amdCallId && (amdResult === "machine" || amdResult === "machine_detected")) {
7121
7482
  await this.handleTelnyxAmdVoicemail(amdCallId);
7483
+ try {
7484
+ this.recordPrewarmWaste(amdCallId);
7485
+ } catch (err) {
7486
+ getLogger().debug(`recordPrewarmWaste threw: ${String(err)}`);
7487
+ }
7488
+ }
7489
+ return res.status(200).send();
7490
+ }
7491
+ if (eventType === "call.hangup") {
7492
+ const hangupCallId = payload.call_control_id ?? "";
7493
+ const hangupCause = String(payload.hangup_cause ?? "");
7494
+ getLogger().info(
7495
+ `Telnyx call.hangup for ${sanitizeLogValue(hangupCallId)} (cause=${sanitizeLogValue(hangupCause)})`
7496
+ );
7497
+ if (hangupCallId) {
7498
+ try {
7499
+ this.recordPrewarmWaste(hangupCallId);
7500
+ } catch (err) {
7501
+ getLogger().debug(`recordPrewarmWaste threw: ${String(err)}`);
7502
+ }
7122
7503
  }
7123
7504
  return res.status(200).send();
7124
7505
  }
@@ -7210,7 +7591,7 @@ var EmbeddedServer = class {
7210
7591
  this.handleTwilioStream(ws, url);
7211
7592
  }
7212
7593
  });
7213
- await new Promise((resolve) => {
7594
+ await new Promise((resolve2) => {
7214
7595
  const bindHost = process.env.PATTER_BIND_HOST ?? "127.0.0.1";
7215
7596
  this.server.listen(port, bindHost, () => {
7216
7597
  getLogger().info(`Server on port ${port}`);
@@ -7232,7 +7613,7 @@ var EmbeddedServer = class {
7232
7613
  }
7233
7614
  console.log("\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n");
7234
7615
  }
7235
- resolve();
7616
+ resolve2();
7236
7617
  });
7237
7618
  });
7238
7619
  }
@@ -7275,7 +7656,7 @@ var EmbeddedServer = class {
7275
7656
  `Telnyx voicemail speak failed: ${speakResp.status} ${(await speakResp.text()).slice(0, 200)}`
7276
7657
  );
7277
7658
  }
7278
- await new Promise((resolve) => setTimeout(resolve, estimatedMs));
7659
+ await new Promise((resolve2) => setTimeout(resolve2, estimatedMs));
7279
7660
  await fetch(`https://api.telnyx.com/v2/calls/${encoded}/actions/hangup`, {
7280
7661
  method: "POST",
7281
7662
  headers,
@@ -7308,7 +7689,9 @@ var EmbeddedServer = class {
7308
7689
  recording: this.recording,
7309
7690
  buildAIAdapter: (resolvedPrompt) => buildAIAdapter(this.config, this.agent, resolvedPrompt),
7310
7691
  sanitizeVariables,
7311
- resolveVariables
7692
+ resolveVariables,
7693
+ popPrewarmAudio: this.popPrewarmAudio,
7694
+ popPrewarmedConnections: this.popPrewarmedConnections
7312
7695
  };
7313
7696
  }
7314
7697
  /**
@@ -7335,12 +7718,20 @@ var EmbeddedServer = class {
7335
7718
  }
7336
7719
  return Object.fromEntries(Object.entries(snap).filter(([, v]) => v !== void 0));
7337
7720
  };
7721
+ const store = this.metricsStore;
7338
7722
  const wrappedStart = async (data) => {
7339
7723
  if (logger.enabled) {
7340
7724
  const callId = typeof data.call_id === "string" ? data.call_id : "";
7725
+ const dataCaller = typeof data.caller === "string" ? data.caller : "";
7726
+ const dataCallee = typeof data.callee === "string" ? data.callee : "";
7727
+ const active = callId ? store.getActive(callId) : void 0;
7728
+ const resolvedCaller = dataCaller || active?.caller || "";
7729
+ const resolvedCallee = dataCallee || active?.callee || "";
7730
+ const resolvedDirection = (typeof data.direction === "string" ? data.direction : "") || active?.direction || "inbound";
7341
7731
  void logger.logCallStart(callId, {
7342
- caller: typeof data.caller === "string" ? data.caller : "",
7343
- callee: typeof data.callee === "string" ? data.callee : "",
7732
+ caller: resolvedCaller,
7733
+ callee: resolvedCallee,
7734
+ direction: resolvedDirection,
7344
7735
  telephonyProvider: bridge.telephonyProvider,
7345
7736
  providerMode: agent.provider ?? "",
7346
7737
  agent: agentSnapshot()
@@ -7365,7 +7756,11 @@ var EmbeddedServer = class {
7365
7756
  const latency = metricsObj ? {
7366
7757
  p50_ms: metricsObj.latency_p50?.total_ms ?? null,
7367
7758
  p95_ms: metricsObj.latency_p95?.total_ms ?? null,
7368
- p99_ms: metricsObj.latency_p99?.total_ms ?? null
7759
+ p99_ms: metricsObj.latency_p99?.total_ms ?? null,
7760
+ avg: metricsObj.latency_avg ?? null,
7761
+ p50: metricsObj.latency_p50 ?? null,
7762
+ p95: metricsObj.latency_p95 ?? null,
7763
+ p99: metricsObj.latency_p99 ?? null
7369
7764
  } : null;
7370
7765
  void logger.logCallEnd(callId, {
7371
7766
  durationSeconds: metricsObj?.duration_seconds,
@@ -7494,8 +7889,8 @@ var EmbeddedServer = class {
7494
7889
  */
7495
7890
  async stop() {
7496
7891
  if (!this.server) return;
7497
- const httpClosePromise = new Promise((resolve) => {
7498
- this.server.close(() => resolve());
7892
+ const httpClosePromise = new Promise((resolve2) => {
7893
+ this.server.close(() => resolve2());
7499
7894
  });
7500
7895
  const isTelnyx = this.config.telephonyProvider === "telnyx";
7501
7896
  for (const [ws, callId] of this.activeCallIds) {
@@ -7515,15 +7910,15 @@ var EmbeddedServer = class {
7515
7910
  if (this.activeConnections.size > 0) {
7516
7911
  getLogger().info(`Waiting for ${this.activeConnections.size} active connection(s) to close...`);
7517
7912
  await Promise.race([
7518
- new Promise((resolve) => {
7913
+ new Promise((resolve2) => {
7519
7914
  const checkInterval = setInterval(() => {
7520
7915
  if (this.activeConnections.size === 0) {
7521
7916
  clearInterval(checkInterval);
7522
- resolve();
7917
+ resolve2();
7523
7918
  }
7524
7919
  }, 100);
7525
7920
  }),
7526
- new Promise((resolve) => setTimeout(resolve, GRACEFUL_SHUTDOWN_TIMEOUT_MS))
7921
+ new Promise((resolve2) => setTimeout(resolve2, GRACEFUL_SHUTDOWN_TIMEOUT_MS))
7527
7922
  ]);
7528
7923
  }
7529
7924
  if (this.activeConnections.size > 0) {
@@ -7800,6 +8195,8 @@ function mergeAbortSignals(...signals) {
7800
8195
  return controller.signal;
7801
8196
  }
7802
8197
  var OpenAILLMProvider = class {
8198
+ /** Stable pricing/dashboard key — read by stream-handler/metrics. */
8199
+ static providerKey = "openai";
7803
8200
  apiKey;
7804
8201
  model;
7805
8202
  temperature;
@@ -7826,6 +8223,35 @@ var OpenAILLMProvider = class {
7826
8223
  this.presencePenalty = sampling.presencePenalty;
7827
8224
  this.stop = sampling.stop;
7828
8225
  }
8226
+ /** Subclasses (Cerebras, Groq) override this with their own host. */
8227
+ get baseUrl() {
8228
+ return "https://api.openai.com/v1";
8229
+ }
8230
+ /**
8231
+ * Pre-call DNS / TLS / HTTP-keepalive warmup.
8232
+ *
8233
+ * Issues a lightweight ``GET ${baseUrl}/models`` so DNS, TLS and HTTP/2
8234
+ * are already up by the time the first ``chat.completions`` call lands.
8235
+ * Best-effort: 5 s timeout, all exceptions swallowed at debug level.
8236
+ *
8237
+ * Note: an HTTPS GET warms DNS + TLS + connection pool but does NOT
8238
+ * warm the inference path itself; for true inference warmup a real
8239
+ * low-token request is needed, left as a follow-up. STT / TTS providers ship concrete
8240
+ * WebSocket-based prewarms (Cartesia / Deepgram / AssemblyAI for STT;
8241
+ * ElevenLabs WS for TTS) which save 200-500 ms each — those dominate
8242
+ * the cold-start latency budget.
8243
+ */
8244
+ async warmup() {
8245
+ try {
8246
+ await fetch(`${this.baseUrl}/models`, {
8247
+ method: "GET",
8248
+ headers: { Authorization: `Bearer ${this.apiKey}` },
8249
+ signal: AbortSignal.timeout(5e3)
8250
+ });
8251
+ } catch (err) {
8252
+ getLogger().debug(`LLM warmup failed (best-effort): ${String(err)}`);
8253
+ }
8254
+ }
7829
8255
  /** Stream OpenAI Chat Completions chunks for the given messages/tools. */
7830
8256
  async *stream(messages, tools, opts) {
7831
8257
  const body = {
@@ -7931,6 +8357,11 @@ var LLMLoop = class {
7931
8357
  // Fix 10: track provider/model so usage chunks can be attributed for billing.
7932
8358
  _providerName;
7933
8359
  _modelName;
8360
+ // Diagnostics for the char/4 fallback billing path (see iterate loop).
8361
+ // Counted per-LLMLoop instance (i.e. per call). Surfaced only via logs
8362
+ // — keeps recordLlmUsage's public signature unchanged. Parity with Python.
8363
+ _usageMissingCount = 0;
8364
+ _loggedUsageFallback = false;
7934
8365
  // Optional async observer fired after a successful tool execution so
7935
8366
  // the host SDK (StreamHandler in pipeline mode) can surface tool calls
7936
8367
  // into the transcript timeline / `onTranscript` callback. Mirrors the
@@ -8025,6 +8456,7 @@ ${systemPrompt}` : DEFAULT_PHONE_PREAMBLE;
8025
8456
  const toolCallsAccumulated = /* @__PURE__ */ new Map();
8026
8457
  const textParts = [];
8027
8458
  let hasToolCalls = false;
8459
+ let usageChunkReceived = false;
8028
8460
  for await (const chunk of this.provider.stream(messages, this.openaiTools, opts)) {
8029
8461
  if (chunk.type === "text" && chunk.content) {
8030
8462
  const content = hasAfterLlmChunk && hookExecutor ? hookExecutor.runAfterLlmChunk(chunk.content) : chunk.content;
@@ -8036,6 +8468,7 @@ ${systemPrompt}` : DEFAULT_PHONE_PREAMBLE;
8036
8468
  yield content;
8037
8469
  }
8038
8470
  } else if (chunk.type === "usage") {
8471
+ usageChunkReceived = true;
8039
8472
  metrics?.recordLlmUsage(
8040
8473
  this._providerName,
8041
8474
  this._modelName,
@@ -8061,6 +8494,35 @@ ${systemPrompt}` : DEFAULT_PHONE_PREAMBLE;
8061
8494
  if (chunk.arguments) acc.arguments += chunk.arguments;
8062
8495
  }
8063
8496
  }
8497
+ if (!usageChunkReceived && metrics) {
8498
+ let inputChars = 0;
8499
+ for (const m of messages) {
8500
+ const c = m.content;
8501
+ if (typeof c === "string") inputChars += c.length;
8502
+ }
8503
+ const outputChars = textParts.reduce((s, p) => s + p.length, 0);
8504
+ const estimatedInput = Math.max(1, Math.floor(inputChars / 4));
8505
+ const estimatedOutput = Math.max(1, Math.floor(outputChars / 4));
8506
+ metrics.recordLlmUsage(
8507
+ this._providerName,
8508
+ this._modelName,
8509
+ estimatedInput,
8510
+ estimatedOutput,
8511
+ 0,
8512
+ 0
8513
+ );
8514
+ this._usageMissingCount += 1;
8515
+ if (!this._loggedUsageFallback) {
8516
+ this._loggedUsageFallback = true;
8517
+ getLogger().info(
8518
+ `llm_usage_fallback provider=${this._providerName} model=${this._modelName} input_chars=${inputChars} output_chars=${outputChars} est_input_tokens=${estimatedInput} est_output_tokens=${estimatedOutput}`
8519
+ );
8520
+ } else {
8521
+ getLogger().debug(
8522
+ `llm_usage_fallback provider=${this._providerName} model=${this._modelName} iteration=${iter} input_chars=${inputChars} output_chars=${outputChars} est_input_tokens=${estimatedInput} est_output_tokens=${estimatedOutput} total_missing=${this._usageMissingCount}`
8523
+ );
8524
+ }
8525
+ }
8064
8526
  if (!hasToolCalls) {
8065
8527
  if (hasAfterLlmResponse && hookExecutor && hookCtx) {
8066
8528
  const finalText = allEmittedText.join("");
@@ -8213,7 +8675,7 @@ var TestSession = class {
8213
8675
  input: process.stdin,
8214
8676
  output: process.stdout
8215
8677
  });
8216
- const askQuestion = (prompt) => new Promise((resolve) => rl.question(prompt, resolve));
8678
+ const askQuestion = (prompt) => new Promise((resolve2) => rl.question(prompt, resolve2));
8217
8679
  try {
8218
8680
  while (!ended) {
8219
8681
  let userInput;
@@ -8312,14 +8774,17 @@ export {
8312
8774
  AuthenticationError,
8313
8775
  ProvisionError,
8314
8776
  RateLimitError,
8315
- OpenAIRealtimeAdapter,
8316
8777
  ElevenLabsConvAIAdapter,
8778
+ PRICING_VERSION,
8779
+ PRICING_LAST_UPDATED,
8780
+ PricingUnit,
8317
8781
  DEFAULT_PRICING,
8318
8782
  mergePricing,
8319
8783
  calculateSttCost,
8320
8784
  calculateTtsCost,
8321
8785
  calculateRealtimeCost,
8322
8786
  calculateTelephonyCost,
8787
+ VERSION,
8323
8788
  MetricsStore,
8324
8789
  makeAuthMiddleware,
8325
8790
  callsToCsv,
@@ -8329,19 +8794,9 @@ export {
8329
8794
  RemoteMessageHandler,
8330
8795
  isRemoteUrl,
8331
8796
  isWebSocketUrl,
8797
+ DeepgramModel,
8332
8798
  DeepgramSTT,
8333
8799
  CallMetricsAccumulator,
8334
- mulawToPcm16,
8335
- pcm16ToMulaw,
8336
- PcmCarry,
8337
- StatefulResampler,
8338
- createResampler16kTo8k,
8339
- createResampler8kTo16k,
8340
- createResampler24kTo16k,
8341
- createResampler24kTo8k,
8342
- resample8kTo16k,
8343
- resample16kTo8k,
8344
- resample24kTo16k,
8345
8800
  SPAN_CALL,
8346
8801
  SPAN_STT,
8347
8802
  SPAN_LLM,