npm - getpatter - Versions diffs - 0.6.0 → 0.6.2 - Mend

getpatter 0.6.0 → 0.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

package/dist/barge-in-strategies-X6ARMGIQ.mjs +12 -0
package/dist/chunk-CL2U3YET.mjs +1429 -0
package/dist/chunk-D4424JZR.mjs +71 -0
package/dist/{chunk-JUQ5WQTQ.mjs → chunk-LE63CSOB.mjs} +1424 -969
package/dist/{chunk-X3364LSI.mjs → chunk-R2T4JABZ.mjs} +49 -2
package/dist/cli.js +315 -37
package/dist/dashboard/ui.html +13 -13
package/dist/index.d.mts +2136 -709
package/dist/index.d.ts +2136 -709
package/dist/index.js +5674 -2233
package/dist/index.mjs +2338 -915
package/dist/openai-realtime-2-CNFARP25.mjs +8 -0
package/dist/{silero-vad-YLCXT5GQ.mjs → silero-vad-LNDFGIY7.mjs} +1 -1
package/dist/{test-mode-Y7YG5LFZ.mjs → test-mode-RS57BDM6.mjs} +2 -1
package/package.json +1 -1
package/src/dashboard/ui.html +13 -13

package/dist/{chunk-JUQ5WQTQ.mjs → chunk-LE63CSOB.mjs} RENAMED Viewed

@@ -1,3 +1,11 @@
+import {
+  OpenAIRealtime2Adapter,
+  OpenAIRealtimeAdapter,
+  createResampler16kTo8k,
+  createResampler8kTo16k,
+  mulawToPcm16,
+  pcm16ToMulaw
+} from "./chunk-CL2U3YET.mjs";
 import {
   getLogger
 } from "./chunk-MVOQFAEO.mjs";
@@ -21,367 +29,9 @@ import express from "express";
 import { createServer } from "http";
 import { WebSocketServer } from "ws";
-// src/providers/openai-realtime.ts
-init_esm_shims();
-import WebSocket from "ws";
-var OpenAIRealtimeAudioFormat = {
-  G711_ULAW: "g711_ulaw",
-  G711_ALAW: "g711_alaw",
-  PCM16: "pcm16"
-};
-var OpenAIRealtimeModel = {
-  GPT_REALTIME: "gpt-realtime",
-  GPT_REALTIME_2: "gpt-realtime-2",
-  GPT_REALTIME_MINI: "gpt-realtime-mini",
-  GPT_4O_REALTIME_PREVIEW: "gpt-4o-realtime-preview",
-  GPT_4O_MINI_REALTIME_PREVIEW: "gpt-4o-mini-realtime-preview"
-};
-var OpenAIVoice = {
-  ALLOY: "alloy",
-  ASH: "ash",
-  BALLAD: "ballad",
-  CORAL: "coral",
-  ECHO: "echo",
-  FABLE: "fable",
-  NOVA: "nova",
-  ONYX: "onyx",
-  SAGE: "sage",
-  SHIMMER: "shimmer",
-  VERSE: "verse"
-};
-var OpenAITranscriptionModel = {
-  WHISPER_1: "whisper-1",
-  GPT_4O_TRANSCRIBE: "gpt-4o-transcribe",
-  GPT_4O_MINI_TRANSCRIBE: "gpt-4o-mini-transcribe",
-  GPT_REALTIME_WHISPER: "gpt-realtime-whisper"
-};
-var OpenAIRealtimeVADType = {
-  SERVER_VAD: "server_vad",
-  SEMANTIC_VAD: "semantic_vad"
-};
-var OpenAIRealtimeAdapter = class {
-  constructor(apiKey, model = OpenAIRealtimeModel.GPT_REALTIME_MINI, voice = OpenAIVoice.ALLOY, instructions = "", tools, audioFormat = OpenAIRealtimeAudioFormat.G711_ULAW, options = {}) {
-    this.apiKey = apiKey;
-    this.model = model;
-    this.voice = voice;
-    this.instructions = instructions;
-    this.tools = tools;
-    this.audioFormat = audioFormat;
-    this.options = options;
-  }
-  apiKey;
-  model;
-  voice;
-  instructions;
-  tools;
-  audioFormat;
-  ws = null;
-  eventCallbacks = /* @__PURE__ */ new Set();
-  messageListenerAttached = false;
-  heartbeat = null;
-  // Track the in-flight assistant item id so we can truncate cleanly on
-  // barge-in (see ``cancelResponse``) — matches the Python adapter.
-  currentResponseItemId = null;
-  currentResponseAudioMs = 0;
-  // Wall-clock timestamp (Date.now()) of the first ``response.audio.delta``
-  // received since the current response item started. ``cancelResponse``
-  // uses this to bound ``audio_end_ms`` to what the caller could plausibly
-  // have heard — generated audio frequently arrives 5-10x real-time, so
-  // ``audio_end_ms`` driven purely by the per-chunk byte counter overshoots
-  // reality and leaves phantom assistant text on the conversation. The
-  // wall-clock cap corresponds to the maximum playback that real-time TTS
-  // could have produced, which is what the user actually heard.
-  currentResponseFirstAudioAt = null;
-  options;
-  /** Open the Realtime WebSocket and apply the session configuration. */
-  async connect() {
-    const url = `wss://api.openai.com/v1/realtime?model=${encodeURIComponent(this.model)}`;
-    this.ws = new WebSocket(url, {
-      headers: {
-        Authorization: `Bearer ${this.apiKey}`,
-        "OpenAI-Beta": "realtime=v1"
-      }
-    });
-    await new Promise((resolve, reject) => {
-      let sessionCreated = false;
-      let settled = false;
-      const ws = this.ws;
-      const onSetupMessage = (raw) => {
-        let msg;
-        try {
-          msg = JSON.parse(raw.toString());
-        } catch (e) {
-          getLogger().warn(`OpenAI Realtime: failed to parse message: ${String(e)}`);
-          return;
-        }
-        if (msg.type === "session.created" && !sessionCreated) {
-          sessionCreated = true;
-          const config = {
-            input_audio_format: this.audioFormat,
-            output_audio_format: this.audioFormat,
-            voice: this.voice,
-            instructions: this.instructions || "You are a helpful voice assistant. Be concise.",
-            turn_detection: {
-              type: this.options.vadType ?? OpenAIRealtimeVADType.SERVER_VAD,
-              threshold: 0.5,
-              prefix_padding_ms: 300,
-              silence_duration_ms: this.options.silenceDurationMs ?? 300
-            },
-            input_audio_transcription: {
-              model: this.options.inputAudioTranscriptionModel ?? OpenAITranscriptionModel.WHISPER_1
-            }
-          };
-          if (this.options.temperature !== void 0) config.temperature = this.options.temperature;
-          if (this.options.maxResponseOutputTokens !== void 0) {
-            config.max_response_output_tokens = this.options.maxResponseOutputTokens;
-          }
-          if (this.options.modalities !== void 0) config.modalities = this.options.modalities;
-          if (this.options.toolChoice !== void 0) config.tool_choice = this.options.toolChoice;
-          if (this.options.reasoningEffort !== void 0) {
-            config.reasoning = { effort: this.options.reasoningEffort };
-          }
-          if (this.tools?.length) {
-            config.tools = this.tools.map((t) => {
-              const def = {
-                type: "function",
-                name: t.name,
-                description: t.description,
-                parameters: t.parameters
-              };
-              if (t.strict === true) {
-                def.strict = true;
-              }
-              return def;
-            });
-          }
-          ws.send(JSON.stringify({ type: "session.update", session: config }));
-        } else if (msg.type === "session.updated") {
-          cleanup();
-          resolve();
-        }
-      };
-      const onSetupError = (err) => {
-        cleanup();
-        try {
-          ws.close();
-        } catch {
-        }
-        reject(err);
-      };
-      const cleanup = () => {
-        if (settled) return;
-        settled = true;
-        clearTimeout(timer);
-        ws.off("message", onSetupMessage);
-        ws.off("error", onSetupError);
-      };
-      const timer = setTimeout(() => {
-        cleanup();
-        try {
-          ws.close();
-        } catch {
-        }
-        reject(new Error("OpenAI Realtime connect timeout"));
-      }, 15e3);
-      ws.on("message", onSetupMessage);
-      ws.on("error", onSetupError);
-    });
-    this.heartbeat = setInterval(() => {
-      try {
-        this.ws?.ping();
-      } catch {
-      }
-    }, 2e4);
-    this.ensureMessageListener();
-  }
-  /** Append a base64-encoded audio chunk to the realtime input buffer. */
-  sendAudio(mulawAudio) {
-    if (!this.ws || this.ws.readyState !== WebSocket.OPEN) return;
-    this.ws.send(JSON.stringify({ type: "input_audio_buffer.append", audio: mulawAudio.toString("base64") }));
-  }
-  /**
-   * Register a listener for parsed realtime events.
-   *
-   * Previously every call attached a new ``ws.on('message')`` handler,
-   * which leaked listeners across retries and multi-consumer hooks. We now
-   * route all traffic through a single persistent handler that fans out to
-   * a Set of callbacks. Use {@link offEvent} to remove one.
-   */
-  onEvent(callback) {
-    this.eventCallbacks.add(callback);
-    this.ensureMessageListener();
-  }
-  /** Remove a previously registered {@link onEvent} callback. */
-  offEvent(callback) {
-    this.eventCallbacks.delete(callback);
-  }
-  ensureMessageListener() {
-    if (this.messageListenerAttached || !this.ws) return;
-    this.messageListenerAttached = true;
-    const ws = this.ws;
-    const dispatch = (type, payload) => {
-      for (const cb of this.eventCallbacks) {
-        void Promise.resolve(cb(type, payload)).catch(
-          (err) => getLogger().error("onEvent callback error:", err)
-        );
-      }
-    };
-    ws.on("message", (raw) => {
-      let data;
-      try {
-        data = JSON.parse(raw.toString());
-      } catch (e) {
-        getLogger().warn(`OpenAI Realtime: failed to parse event message: ${String(e)}`);
-        return;
-      }
-      const t = data.type;
-      if (t === "response.audio.delta") {
-        const buf = Buffer.from(data.delta ?? "", "base64");
-        this.currentResponseAudioMs += estimateAudioMs(buf, this.audioFormat);
-        if (this.currentResponseFirstAudioAt === null) {
-          this.currentResponseFirstAudioAt = Date.now();
-        }
-        dispatch("audio", buf);
-      } else if (t === "response.audio_transcript.delta") {
-        dispatch("transcript_output", data.delta);
-      } else if (t === "response.content_part.added" || t === "response.output_item.added") {
-        const itemId = data.item?.id ?? data.item_id ?? null;
-        if (itemId) {
-          this.currentResponseItemId = itemId;
-          this.currentResponseAudioMs = 0;
-          this.currentResponseFirstAudioAt = null;
-        }
-      } else if (t === "input_audio_buffer.speech_started") {
-        dispatch("speech_started", null);
-      } else if (t === "input_audio_buffer.speech_stopped") {
-        dispatch("speech_stopped", null);
-      } else if (t === "conversation.item.input_audio_transcription.completed") {
-        dispatch("transcript_input", data.transcript);
-      } else if (t === "response.function_call_arguments.done") {
-        dispatch("function_call", { call_id: data.call_id, name: data.name, arguments: data.arguments });
-      } else if (t === "response.done") {
-        this.currentResponseItemId = null;
-        this.currentResponseAudioMs = 0;
-        this.currentResponseFirstAudioAt = null;
-        dispatch("response_done", data.response ?? null);
-      } else if (t === "error") {
-        dispatch("error", data.error);
-      }
-    });
-    ws.on("close", (code, reason) => {
-      if (code !== 1e3) {
-        dispatch("error", {
-          type: "connection_closed",
-          code,
-          reason: reason?.toString() ?? ""
-        });
-      }
-    });
-    ws.on("error", (err) => {
-      dispatch("error", { type: "socket_error", message: err?.message ?? String(err) });
-    });
-  }
-  /** Truncate the in-flight assistant turn and cancel the active response.
-   *
-   * ``audio_end_ms`` MUST reflect what the caller actually heard, not what
-   * the server generated. OpenAI streams audio at 5-10x real-time, so the
-   * byte-derived counter overstates playback whenever the consumer cleared
-   * its playout buffer (e.g. ``send_clear``) before the audio reached the
-   * speaker. We bound the truncate point by wall-clock time since the first
-   * chunk of this response — that's the physical maximum a 1x real-time
-   * playback could have produced. Without this cap, OpenAI keeps the full
-   * generated assistant text on the transcript, and the model replays /
-   * resumes from it on the next turn — manifesting as re-greetings and
-   * mid-sentence fragments after a barge-in storm.
-   */
-  cancelResponse() {
-    if (!this.ws) return;
-    if (this.currentResponseItemId) {
-      let audioEndMs = this.currentResponseAudioMs;
-      if (this.currentResponseFirstAudioAt !== null) {
-        const elapsedMs = Date.now() - this.currentResponseFirstAudioAt;
-        audioEndMs = Math.min(audioEndMs, Math.max(elapsedMs, 0));
-      }
-      try {
-        this.ws.send(JSON.stringify({
-          type: "conversation.item.truncate",
-          item_id: this.currentResponseItemId,
-          content_index: 0,
-          audio_end_ms: audioEndMs
-        }));
-      } catch (err) {
-        getLogger().debug?.(`conversation.item.truncate failed: ${String(err)}`);
-      }
-    }
-    this.ws.send(JSON.stringify({ type: "response.cancel" }));
-    this.currentResponseItemId = null;
-    this.currentResponseAudioMs = 0;
-    this.currentResponseFirstAudioAt = null;
-  }
-  /** Inject a user text turn and request a new response. */
-  async sendText(text) {
-    this.ws?.send(JSON.stringify({
-      type: "conversation.item.create",
-      item: { type: "message", role: "user", content: [{ type: "input_text", text }] }
-    }));
-    this.ws?.send(JSON.stringify({ type: "response.create" }));
-  }
-  /**
-   * Make the AI speak ``text`` as its opening line.
-   *
-   * Triggers ``response.create`` with explicit ``instructions`` that force
-   * the model to render ``text`` verbatim as its first audio utterance.
-   * This is the correct semantics for ``Agent.firstMessage`` per its
-   * docstring ("What the AI says when the callee answers").
-   *
-   * Without this, ``sendText(firstMessage)`` would inject ``text`` as
-   * ``role: user`` and the AI would *reply* to its own greeting, producing
-   * role-confused openings (e.g. a receptionist agent responding "I'd like
-   * to schedule a haircut" because it took its own first_message as a
-   * customer cue).
-   */
-  async sendFirstMessage(text) {
-    this.ws?.send(JSON.stringify({
-      type: "response.create",
-      response: {
-        modalities: ["audio", "text"],
-        instructions: `Say exactly the following sentence as your first turn and nothing else: "${text}"`
-      }
-    }));
-  }
-  /** Submit a tool/function-call result and request the next response. */
-  async sendFunctionResult(callId, result) {
-    this.ws?.send(JSON.stringify({
-      type: "conversation.item.create",
-      item: { type: "function_call_output", call_id: callId, output: result }
-    }));
-    this.ws?.send(JSON.stringify({ type: "response.create" }));
-  }
-  /** Stop the heartbeat, drop listeners, and close the Realtime WebSocket. */
-  close() {
-    if (this.heartbeat) {
-      clearInterval(this.heartbeat);
-      this.heartbeat = null;
-    }
-    this.eventCallbacks.clear();
-    this.messageListenerAttached = false;
-    this.ws?.close();
-    this.ws = null;
-  }
-};
-function estimateAudioMs(chunk, format) {
-  if (chunk.length === 0) return 0;
-  if (format === OpenAIRealtimeAudioFormat.G711_ULAW || format === OpenAIRealtimeAudioFormat.G711_ALAW)
-    return Math.floor(chunk.length / 8);
-  if (format === OpenAIRealtimeAudioFormat.PCM16) {
-    return Math.floor(chunk.length / 48);
-  }
-  return 0;
-}
 // src/providers/elevenlabs-convai.ts
 init_esm_shims();
-import WebSocket2 from "ws";
+import WebSocket from "ws";
 var ELEVENLABS_CONVAI_URL = "wss://api.elevenlabs.io/v1/convai/conversation";
 var ELEVENLABS_SIGNED_URL = "https://api.elevenlabs.io/v1/convai/conversation/get-signed-url";
 var AGENT_SILENCE_MS = 500;
@@ -503,8 +153,8 @@ var ElevenLabsConvAIAdapter = class _ElevenLabsConvAIAdapter {
       wsUrl = this.agentId ? `${ELEVENLABS_CONVAI_URL}?agent_id=${encodeURIComponent(this.agentId)}` : ELEVENLABS_CONVAI_URL;
       wsOptions = { headers: { "xi-api-key": this.apiKey } };
     }
-    this.ws = new WebSocket2(wsUrl, wsOptions);
-    await new Promise((resolve, reject) => {
+    this.ws = new WebSocket(wsUrl, wsOptions);
+    await new Promise((resolve2, reject) => {
       const timeout = setTimeout(
         () => reject(new Error("ElevenLabs ConvAI connect timeout")),
         15e3
@@ -528,7 +178,7 @@ var ElevenLabsConvAIAdapter = class _ElevenLabsConvAIAdapter {
           conversation_config_override: override
         };
         this.ws.send(JSON.stringify(config));
-        resolve();
+        resolve2();
       });
       this.ws.once("error", (err) => {
         clearTimeout(timeout);
@@ -565,7 +215,7 @@ var ElevenLabsConvAIAdapter = class _ElevenLabsConvAIAdapter {
   }
   respondToPing(eventId, delayMs) {
     const send = () => {
-      if (!this.ws || this.ws.readyState !== WebSocket2.OPEN) return;
+      if (!this.ws || this.ws.readyState !== WebSocket.OPEN) return;
       try {
         this.ws.send(JSON.stringify({ type: "pong", event_id: eventId }));
       } catch (err) {
@@ -662,7 +312,7 @@ var ElevenLabsConvAIAdapter = class _ElevenLabsConvAIAdapter {
   }
   /** Send a caller-side audio chunk to ConvAI as a base64 `user_audio_chunk`. */
   sendAudio(audioBytes) {
-    if (!this.ws || this.ws.readyState !== WebSocket2.OPEN) return;
+    if (!this.ws || this.ws.readyState !== WebSocket.OPEN) return;
     this.ws.send(
       JSON.stringify({
         user_audio_chunk: audioBytes.toString("base64")
@@ -685,20 +335,20 @@ var ElevenLabsConvAIAdapter = class _ElevenLabsConvAIAdapter {
       return;
     }
     const ws = this.ws;
-    this.closePromise = new Promise((resolve) => {
-      if (ws.readyState === WebSocket2.CLOSED || ws.readyState === WebSocket2.CLOSING) {
-        resolve();
+    this.closePromise = new Promise((resolve2) => {
+      if (ws.readyState === WebSocket.CLOSED || ws.readyState === WebSocket.CLOSING) {
+        resolve2();
         return;
       }
       const done = () => {
-        resolve();
+        resolve2();
       };
       ws.once("close", done);
       ws.once("error", done);
       try {
         ws.close();
       } catch {
-        resolve();
+        resolve2();
       }
     });
     try {
@@ -722,6 +372,8 @@ async function createTTS(agent) {
 // src/pricing.ts
 init_esm_shims();
+var PRICING_VERSION = "2026.3";
+var PRICING_LAST_UPDATED = "2026-05-08";
 var PricingUnit = {
   MINUTE: "minute",
   THOUSAND_CHARS: "1k_chars",
@@ -750,14 +402,26 @@ var DEFAULT_PRICING = {
   // STT — per minute of audio processed.
   deepgram: {
     unit: PricingUnit.MINUTE,
-    // Default = Nova-3 streaming monolingual ($0.0077/min). Previous $0.0043
-    // was the batch rate; streaming is ~80% more expensive.
-    price: 77e-4,
+    // Default = Nova-3 streaming monolingual ($0.0048/min, current Pay-
+    // As-You-Go promotional rate). Source: https://deepgram.com/pricing
+    // (verified 2026-05-11). The promo replaces the standard $0.0077/min
+    // quoted at Nova-3 launch and is the rate customers actually pay
+    // today; revisit when Deepgram removes the "Limited-time promotional
+    // rates on streaming" banner.
+    price: 48e-4,
     models: {
-      "nova-3": { price: 77e-4 },
-      "nova-3-multilingual": { price: 92e-4 },
+      // Nova-3 family — current flagship.
+      "nova-3": { price: 48e-4 },
+      "nova-3-multilingual": { price: 58e-4 },
+      // Flux family — new event-driven turn-taking STT (2026 launch).
+      flux: { price: 65e-4 },
+      "flux-english": { price: 65e-4 },
+      "flux-multilingual": { price: 78e-4 },
+      // Legacy Nova-2 / Nova-1 — still supported but no longer featured on
+      // the public pricing page; rates kept as last verified.
       "nova-2": { price: 58e-4 },
       nova: { price: 43e-4 },
+      // Whisper Cloud via Deepgram — separate tier.
       "whisper-large": { price: 48e-4 },
       "whisper-medium": { price: 48e-4 }
     }
@@ -796,27 +460,30 @@ var DEFAULT_PRICING = {
   // retired; users were being over-billed ~4.3x.
   speechmatics: { unit: PricingUnit.MINUTE, price: 4e-3 },
   // TTS — per 1,000 characters synthesized.
+  // Source: https://elevenlabs.io/pricing/api (verified 2026-05-11). The
+  // per-1K-character API/overage rate is flat across all plan tiers (Free
+  // through Business); only the included character bundle varies by plan.
   elevenlabs: {
     unit: PricingUnit.THOUSAND_CHARS,
-    // Default = eleven_flash_v2_5 (Patter's default model) at $0.06/1k.
-    price: 0.06,
+    // Default = eleven_flash_v2_5 (Patter's default model) at $0.05/1k.
+    price: 0.05,
     models: {
-      eleven_flash_v2_5: { price: 0.06 },
+      eleven_flash_v2_5: { price: 0.05 },
       eleven_turbo_v2_5: { price: 0.05 },
-      eleven_multilingual_v2: { price: 0.18 },
-      eleven_monolingual_v1: { price: 0.18 },
-      eleven_v3: { price: 0.3 }
+      eleven_multilingual_v2: { price: 0.1 },
+      eleven_monolingual_v1: { price: 0.1 },
+      eleven_v3: { price: 0.1 }
     }
   },
   // ElevenLabs WebSocket streaming TTS shares pricing with REST.
   elevenlabs_ws: {
     unit: PricingUnit.THOUSAND_CHARS,
-    price: 0.06,
+    price: 0.05,
     models: {
-      eleven_flash_v2_5: { price: 0.06 },
+      eleven_flash_v2_5: { price: 0.05 },
       eleven_turbo_v2_5: { price: 0.05 },
-      eleven_multilingual_v2: { price: 0.18 },
-      eleven_v3: { price: 0.3 }
+      eleven_multilingual_v2: { price: 0.1 },
+      eleven_v3: { price: 0.1 }
     }
   },
   openai_tts: {
@@ -946,7 +613,24 @@ var DEFAULT_PRICING = {
   // calls on a local number). For US toll-free inbound ($0.022/min) or US
   // outbound local ($0.0140/min), override via Patter({ pricing: { twilio: {...} } }).
   twilio: { unit: PricingUnit.MINUTE, price: 85e-4 },
-  telnyx: { unit: PricingUnit.MINUTE, price: 7e-3 }
+  // Telnyx — direction-aware rates as of 2026-05-11.
+  // Sources:
+  //   https://telnyx.com/pricing/elastic-sip
+  //   https://telnyx.com/pricing/voice-api
+  // US inbound (DID / local termination, Pay-As-You-Go): $0.0035/min
+  // US outbound (Pay-As-You-Go, mid-range of $0.005-$0.009): $0.007/min
+  // Billing granularity is per-MINUTE (Telnyx rounds partial minutes up
+  // on the invoice; prior internal docs incorrectly claimed per-second).
+  // The legacy ``telnyx`` key is preserved at the outbound rate as a
+  // safe fallback for users who override ``pricing: { telnyx: {...} }``
+  // without knowing the direction; the metrics layer currently uses
+  // this flat key (direction is not threaded through to
+  // ``calculateTelephonyCost``). Direction-aware billing can be enabled
+  // by override-only: ``new Patter({ pricing: { telnyx: { unit: 'minute',
+  // price: 0.0035 } } })`` to bill all inbound at the lower rate.
+  telnyx: { unit: PricingUnit.MINUTE, price: 7e-3 },
+  telnyx_inbound: { unit: PricingUnit.MINUTE, price: 35e-4 },
+  telnyx_outbound: { unit: PricingUnit.MINUTE, price: 7e-3 }
 };
 function cloneProviderEntry(entry) {
   const out = { ...entry };
@@ -1083,16 +767,18 @@ var llmPricing = {
     "gemma2-9b-it": { input: 0.2, output: 0.2 }
   },
   cerebras: {
-    // Rates as of 2026-05-08; verify against cerebras.net/inference.
-    // ``gpt-oss-120b`` is the Patter default for Cerebras (set in 0.5.4).
-    // On WSE-3 hardware every model size saturates the downstream TTS
-    // consumption rate (~150-300 tok/sec), so the 120B price stays in line
-    // with the 70B tier rather than scaling with weight count.
-    "gpt-oss-120b": { input: 0.85, output: 1.2 },
-    "llama3.1-8b": { input: 0.1, output: 0.2 },
+    // Rates as of 2026-05-11 verified against the canonical per-model docs
+    // pages at ``https://inference-docs.cerebras.ai/models/<model>``. The
+    // previous 2026-05-08 update overcharged across the board (gpt-oss-120b
+    // 2.4x input, qwen-3-235b 1.67x input) because it conflated the launch
+    // blog quotes with the "Exploration pricing" banner now shown on each
+    // model page. Parity with libraries/python/getpatter/pricing.py.
+    "gpt-oss-120b": { input: 0.35, output: 0.75 },
+    "llama3.1-8b": { input: 0.1, output: 0.1 },
     "llama-3.3-70b": { input: 0.85, output: 1.2 },
     "qwen-3-32b": { input: 0.4, output: 0.8 },
-    "qwen-3-235b-a22b-instruct-2507": { input: 1, output: 1.5 },
+    "qwen-3-235b-a22b-instruct-2507": { input: 0.6, output: 1.2 },
+    "qwen-3-coder-480b": { input: 2, output: 2 },
     "zai-glm-4.7": { input: 0.85, output: 1.2 }
   },
   // OpenAI Chat Completions (non-Realtime) — mirrors the Python SDK pricing table.
@@ -1137,12 +823,45 @@ function calculateTelephonyCost(provider2, durationSeconds, pricing) {
 // src/dashboard/store.ts
 init_esm_shims();
 import { EventEmitter } from "events";
+import * as fs2 from "fs";
+import * as path2 from "path";
+// src/version.ts
+init_esm_shims();
 import * as fs from "fs";
 import * as path from "path";
+function readVersion() {
+  try {
+    const pkgPath = path.resolve(__dirname, "..", "package.json");
+    const pkg = JSON.parse(fs.readFileSync(pkgPath, "utf8"));
+    return typeof pkg.version === "string" && pkg.version.length > 0 ? pkg.version : "";
+  } catch {
+    return "";
+  }
+}
+var VERSION = readVersion();
+// src/dashboard/store.ts
+function sdkVersion() {
+  return VERSION;
+}
 var MetricsStore = class extends EventEmitter {
   maxCalls;
   calls = [];
   activeCalls = /* @__PURE__ */ new Map();
+  /**
+   * User-driven soft delete: call_ids the operator removed from the
+   * dashboard view. The on-disk artefacts written by ``CallLogger``
+   * (``metadata.json``, ``transcript.jsonl``) are intentionally NOT
+   * touched — they serve as the durable backup. All read paths
+   * (``getCalls`` / ``getCall`` / ``getAggregates`` / ``getCallsInRange``
+   * / ``hydrate``) filter against this set so the call is invisible
+   * to the UI and excluded from rolling metrics. Populated from
+   * ``<logRoot>/.deleted_call_ids.json`` on hydrate so deletions
+   * survive a process restart. Parity with Python.
+   */
+  deletedCallIds = /* @__PURE__ */ new Set();
+  deletedIdsPath = null;
   /**
    * Accepts either a numeric ``maxCalls`` (legacy positional — matches the
    * original TS API) or an options object ``{ maxCalls }`` to align with the
@@ -1235,6 +954,8 @@ var MetricsStore = class extends EventEmitter {
           ended_at: Date.now() / 1e3,
           status,
           metrics: null,
+          ...active.turns && active.turns.length > 0 ? { turns: active.turns } : {},
+          ...active.transcript && active.transcript.length > 0 ? { transcript: active.transcript } : {},
           ...extra
         };
         this.activeCalls.delete(callId);
@@ -1263,6 +984,21 @@ var MetricsStore = class extends EventEmitter {
     if (active) {
       if (!active.turns) active.turns = [];
       active.turns.push(turn);
+      if (!active.transcript) active.transcript = [];
+      const turnRecord = turn;
+      const userText = typeof turnRecord.user_text === "string" ? turnRecord.user_text : "";
+      const agentText = typeof turnRecord.agent_text === "string" ? turnRecord.agent_text : "";
+      const ts = typeof turnRecord.timestamp === "number" ? turnRecord.timestamp : Date.now() / 1e3;
+      if (userText.length > 0) {
+        active.transcript.push({ role: "user", text: userText, timestamp: ts });
+      }
+      if (agentText.length > 0 && agentText !== "[interrupted]") {
+        active.transcript.push({
+          role: "assistant",
+          text: agentText,
+          timestamp: ts
+        });
+      }
     }
     this.publish("turn_complete", { call_id: callId, turn });
   }
@@ -1272,40 +1008,140 @@ var MetricsStore = class extends EventEmitter {
     if (!callId) return;
     const active = this.activeCalls.get(callId);
     this.activeCalls.delete(callId);
-    const activeStatus = active?.status;
-    const resolvedStatus = activeStatus && activeStatus !== "in-progress" ? activeStatus : "completed";
+    let existingIdx = -1;
+    if (active === void 0) {
+      for (let i = this.calls.length - 1; i >= 0; i--) {
+        if (this.calls[i].call_id === callId) {
+          existingIdx = i;
+          break;
+        }
+      }
+    }
+    const existing = existingIdx >= 0 ? this.calls[existingIdx] : void 0;
+    const priorStatus = active?.status ?? existing?.status;
+    const resolvedStatus = priorStatus && priorStatus !== "in-progress" ? priorStatus : "completed";
+    const dataTranscript = data.transcript;
+    const resolvedTranscript = dataTranscript && dataTranscript.length > 0 ? dataTranscript : active?.transcript && active.transcript.length > 0 ? active.transcript : existing?.transcript && existing.transcript.length > 0 ? existing.transcript : [];
+    const resolvedTurns = active?.turns && active.turns.length > 0 ? active.turns : existing?.turns && existing.turns.length > 0 ? existing.turns : void 0;
     const entry = {
       call_id: callId,
-      caller: data.caller || active?.caller || "",
-      callee: data.callee || active?.callee || "",
-      direction: active?.direction || data.direction || "inbound",
-      started_at: active?.started_at || 0,
+      caller: data.caller || active?.caller || existing?.caller || "",
+      callee: data.callee || active?.callee || existing?.callee || "",
+      direction: active?.direction || existing?.direction || data.direction || "inbound",
+      started_at: active?.started_at || existing?.started_at || 0,
       ended_at: Date.now() / 1e3,
-      transcript: data.transcript || [],
+      transcript: resolvedTranscript,
+      ...resolvedTurns ? { turns: resolvedTurns } : {},
       status: resolvedStatus,
-      metrics: metrics ?? null
+      metrics: metrics ?? existing?.metrics ?? null
     };
-    this.calls.push(entry);
-    if (this.calls.length > this.maxCalls) {
-      this.calls = this.calls.slice(-this.maxCalls);
+    if (existingIdx >= 0) {
+      this.calls[existingIdx] = entry;
+    } else {
+      this.calls.push(entry);
+      if (this.calls.length > this.maxCalls) {
+        this.calls = this.calls.slice(-this.maxCalls);
+      }
     }
     this.publish("call_end", {
       call_id: callId,
       metrics: entry.metrics ?? null
     });
   }
-  /** Return a window of completed calls in newest-first order. */
+  /**
+   * Return a window of completed calls in newest-first order.
+   *
+   * Soft-deleted call_ids (see ``deleteCalls``) are filtered out so the
+   * dashboard never re-shows a row the user removed. The on-disk
+   * artefacts are intentionally preserved as a backup.
+   */
   getCalls(limit = 50, offset = 0) {
-    const ordered = [...this.calls].reverse();
+    const visible = this.calls.filter((c) => !this.deletedCallIds.has(c.call_id));
+    const ordered = visible.reverse();
     return ordered.slice(offset, offset + limit);
   }
-  /** Look up a completed call by id (newest match wins). */
+  /**
+   * Look up a completed call by id (newest match wins).
+   *
+   * Soft-deleted call_ids resolve to ``null`` so the SPA's detail pane
+   * cannot render a row the user removed.
+   */
   getCall(callId) {
+    if (this.deletedCallIds.has(callId)) return null;
     for (let i = this.calls.length - 1; i >= 0; i--) {
       if (this.calls[i].call_id === callId) return this.calls[i];
     }
     return null;
   }
+  /**
+   * Soft-delete one or more calls from the dashboard view.
+   *
+   * Adds each ``call_id`` to an in-memory set. Subsequent reads via
+   * ``getCalls`` / ``getCall`` / ``getAggregates`` / ``getCallsInRange``
+   * exclude the deleted ids, so rolling metrics (avg latency, total
+   * spend) are recomputed without them. The on-disk
+   * ``metadata.json`` / ``transcript.jsonl`` files written by
+   * ``CallLogger`` are NOT touched — they serve as a durable backup
+   * the operator can audit outside the dashboard.
+   *
+   * Active calls are never deletable. A call_id that is currently
+   * in ``activeCalls`` is silently skipped so a mid-call delete
+   * from the UI cannot orphan the live transcript pane.
+   *
+   * Persisted to ``<logRoot>/.deleted_call_ids.json`` (best-effort)
+   * when ``hydrate()`` has been called with a log root. Parity with
+   * Python ``delete_calls``.
+   *
+   * @returns The list of call_ids actually accepted as deleted.
+   */
+  deleteCalls(callIds) {
+    const ids = /* @__PURE__ */ new Set();
+    for (const cid of callIds || []) {
+      if (typeof cid === "string" && cid && !this.activeCalls.has(cid)) {
+        ids.add(cid);
+      }
+    }
+    if (ids.size === 0) return [];
+    const accepted = [];
+    for (const cid of ids) {
+      if (!this.deletedCallIds.has(cid)) {
+        this.deletedCallIds.add(cid);
+        accepted.push(cid);
+      }
+    }
+    if (accepted.length === 0) return [];
+    accepted.sort();
+    this.persistDeletedIds();
+    this.publish("calls_deleted", { call_ids: accepted });
+    return accepted;
+  }
+  /** Whether ``callId`` was soft-deleted from the dashboard. */
+  isDeleted(callId) {
+    return this.deletedCallIds.has(callId);
+  }
+  /** Snapshot of soft-deleted call_ids (sorted). */
+  getDeletedCallIds() {
+    return Array.from(this.deletedCallIds).sort();
+  }
+  /** Atomically persist the deleted-ids set to disk. Best-effort. */
+  persistDeletedIds() {
+    if (this.deletedIdsPath === null) return;
+    try {
+      const dir = path2.dirname(this.deletedIdsPath);
+      fs2.mkdirSync(dir, { recursive: true });
+      const tmp = this.deletedIdsPath + ".tmp";
+      const payload = {
+        version: 1,
+        deleted_call_ids: Array.from(this.deletedCallIds).sort()
+      };
+      fs2.writeFileSync(tmp, JSON.stringify(payload, null, 2), "utf8");
+      fs2.renameSync(tmp, this.deletedIdsPath);
+    } catch (err) {
+      getLogger().debug(
+        `MetricsStore.persistDeletedIds: ${String(err)}`
+      );
+    }
+  }
   /** Look up an active call by id (returns undefined if not active or unknown). */
   getActive(callId) {
     return this.activeCalls.get(callId);
@@ -1314,9 +1150,17 @@ var MetricsStore = class extends EventEmitter {
   getActiveCalls() {
     return Array.from(this.activeCalls.values());
   }
-  /** Compute summary statistics across the buffered call history. */
+  /**
+   * Compute summary statistics across the buffered call history.
+   *
+   * Soft-deleted calls are excluded so rolling metrics (avg latency,
+   * total spend) match exactly what the operator sees in the call list.
+   */
   getAggregates() {
-    const totalCalls = this.calls.length;
+    const visible = this.calls.filter(
+      (c) => !this.deletedCallIds.has(c.call_id)
+    );
+    const totalCalls = visible.length;
     if (totalCalls === 0) {
       return {
         total_calls: 0,
@@ -1324,7 +1168,8 @@ var MetricsStore = class extends EventEmitter {
         avg_duration: 0,
         avg_latency_ms: 0,
         cost_breakdown: { stt: 0, tts: 0, llm: 0, telephony: 0 },
-        active_calls: this.activeCalls.size
+        active_calls: this.activeCalls.size,
+        sdk_version: sdkVersion()
       };
     }
     let totalCost = 0;
@@ -1335,7 +1180,7 @@ var MetricsStore = class extends EventEmitter {
     let costTts = 0;
     let costLlm = 0;
     let costTel = 0;
-    for (const call of this.calls) {
+    for (const call of visible) {
       const m = call.metrics;
       if (!m) continue;
       const cost = m.cost || {};
@@ -1346,7 +1191,7 @@ var MetricsStore = class extends EventEmitter {
       costTel += cost.telephony || 0;
       totalDuration += m.duration_seconds || 0;
       const avgLat = m.latency_avg || {};
-      const tMs = avgLat.total_ms || 0;
+      const tMs = avgLat.agent_response_ms || avgLat.total_ms || 0;
       if (tMs > 0) {
         totalLatency += tMs;
         latencyCount++;
@@ -1363,21 +1208,30 @@ var MetricsStore = class extends EventEmitter {
         llm: Math.round(costLlm * 1e6) / 1e6,
         telephony: Math.round(costTel * 1e6) / 1e6
       },
-      active_calls: this.activeCalls.size
+      active_calls: this.activeCalls.size,
+      sdk_version: sdkVersion()
     };
   }
-  /** Return calls whose `started_at` falls within `[fromTs, toTs]` (Unix seconds). */
+  /**
+   * Return calls whose `started_at` falls within `[fromTs, toTs]` (Unix
+   * seconds). Soft-deleted calls are filtered out.
+   */
   getCallsInRange(fromTs = 0, toTs = 0) {
     return this.calls.filter((call) => {
+      if (this.deletedCallIds.has(call.call_id)) return false;
       const started = call.started_at || 0;
       if (fromTs && started < fromTs) return false;
       if (toTs && started > toTs) return false;
       return true;
     });
   }
-  /** Number of completed calls currently in the ring buffer. */
+  /** Number of completed (non-deleted) calls currently in the ring buffer. */
   get callCount() {
-    return this.calls.length;
+    let n = 0;
+    for (const c of this.calls) {
+      if (!this.deletedCallIds.has(c.call_id)) n++;
+    }
+    return n;
   }
   /**
    * Rebuild the in-memory call list from `metadata.json` files written by
@@ -1391,19 +1245,37 @@ var MetricsStore = class extends EventEmitter {
    */
   hydrate(logRoot) {
     if (!logRoot) return 0;
-    const callsRoot = path.join(logRoot, "calls");
-    if (!fs.existsSync(callsRoot)) return 0;
+    const deletedIdsPath = path2.join(logRoot, ".deleted_call_ids.json");
+    this.deletedIdsPath = deletedIdsPath;
+    if (fs2.existsSync(deletedIdsPath)) {
+      try {
+        const raw = fs2.readFileSync(deletedIdsPath, "utf8");
+        const payload = JSON.parse(raw);
+        const arr = Array.isArray(payload.deleted_call_ids) ? payload.deleted_call_ids : [];
+        for (const cid of arr) {
+          if (typeof cid === "string" && cid.length > 0) {
+            this.deletedCallIds.add(cid);
+          }
+        }
+      } catch (err) {
+        getLogger().debug(
+          `MetricsStore.hydrate: skipping ${deletedIdsPath}: ${String(err)}`
+        );
+      }
+    }
+    const callsRoot = path2.join(logRoot, "calls");
+    if (!fs2.existsSync(callsRoot)) return 0;
     const collected = [];
     const seen = new Set(this.calls.map((c) => c.call_id));
     const walk = (dir, depth) => {
       let entries;
       try {
-        entries = fs.readdirSync(dir, { withFileTypes: true });
+        entries = fs2.readdirSync(dir, { withFileTypes: true });
       } catch {
         return;
       }
       for (const entry of entries) {
-        const childPath = path.join(dir, entry.name);
+        const childPath = path2.join(dir, entry.name);
         if (depth < 3) {
           if (entry.isDirectory() && /^\d+$/.test(entry.name)) {
             walk(childPath, depth + 1);
@@ -1411,10 +1283,10 @@ var MetricsStore = class extends EventEmitter {
           continue;
         }
         if (!entry.isDirectory()) continue;
-        const metadataPath = path.join(childPath, "metadata.json");
-        if (!fs.existsSync(metadataPath)) continue;
+        const metadataPath = path2.join(childPath, "metadata.json");
+        if (!fs2.existsSync(metadataPath)) continue;
         try {
-          const raw = fs.readFileSync(metadataPath, "utf8");
+          const raw = fs2.readFileSync(metadataPath, "utf8");
           const meta = JSON.parse(raw);
           const callId = meta.call_id || entry.name;
           if (!callId || seen.has(callId)) continue;
@@ -1425,6 +1297,12 @@ var MetricsStore = class extends EventEmitter {
             );
             continue;
           }
+          if (!record.transcript || record.transcript.length === 0) {
+            const fromJsonl = loadTranscriptJsonl(
+              path2.join(childPath, "transcript.jsonl")
+            );
+            if (fromJsonl.length > 0) record.transcript = fromJsonl;
+          }
           collected.push(record);
           seen.add(callId);
         } catch (err) {
@@ -1446,12 +1324,45 @@ var MetricsStore = class extends EventEmitter {
     return collected.length;
   }
 };
+function metricsFromTopLevel(meta) {
+  const cost = meta.cost && typeof meta.cost === "object" ? meta.cost : null;
+  const latency = meta.latency && typeof meta.latency === "object" ? meta.latency : null;
+  const durationMs = meta.duration_ms;
+  const telephony = meta.telephony_provider;
+  if (cost === null && latency === null && durationMs == null && !telephony) {
+    return null;
+  }
+  const out = {};
+  if (cost !== null) out.cost = cost;
+  if (latency !== null) {
+    const fullAvg = latency.avg && typeof latency.avg === "object" ? latency.avg : null;
+    const fullP50 = latency.p50 && typeof latency.p50 === "object" ? latency.p50 : null;
+    const fullP95 = latency.p95 && typeof latency.p95 === "object" ? latency.p95 : null;
+    const fullP99 = latency.p99 && typeof latency.p99 === "object" ? latency.p99 : null;
+    if (fullAvg) out.latency_avg = fullAvg;
+    if (fullP50) out.latency_p50 = fullP50;
+    if (fullP95) out.latency_p95 = fullP95;
+    if (fullP99) out.latency_p99 = fullP99;
+    if (!fullAvg && !fullP50 && !fullP95) {
+      const totalMs = typeof latency.p95_ms === "number" && latency.p95_ms || typeof latency.p50_ms === "number" && latency.p50_ms || 0;
+      out.latency_avg = { total_ms: totalMs };
+    }
+    out.latency = latency;
+  }
+  if (typeof durationMs === "number" && durationMs > 0) {
+    out.duration_seconds = durationMs / 1e3;
+  }
+  if (typeof telephony === "string" && telephony) {
+    out.telephony_provider = telephony;
+  }
+  return Object.keys(out).length > 0 ? out : null;
+}
 function metadataToCallRecord(callId, meta) {
   const startedAt = parseTimestamp(meta.started_at);
   if (startedAt === null) return null;
   const endedAt = parseTimestamp(meta.ended_at);
   const status = meta.status || "completed";
-  const metrics = meta.metrics && typeof meta.metrics === "object" ? meta.metrics : null;
+  const metrics = meta.metrics && typeof meta.metrics === "object" ? meta.metrics : metricsFromTopLevel(meta);
   const transcript = Array.isArray(meta.transcript) ? meta.transcript : [];
   return {
     call_id: callId,
@@ -1465,6 +1376,36 @@ function metadataToCallRecord(callId, meta) {
     transcript
   };
 }
+function loadTranscriptJsonl(filePath) {
+  try {
+    if (!fs2.existsSync(filePath)) return [];
+    const raw = fs2.readFileSync(filePath, "utf8");
+    const lines = raw.split("\n").filter((l) => l.trim().length > 0);
+    const out = [];
+    for (const line of lines) {
+      let row;
+      try {
+        row = JSON.parse(line);
+      } catch {
+        continue;
+      }
+      const tsIso = typeof row.ts === "string" ? Date.parse(row.ts) : NaN;
+      const tsNumeric = typeof row.timestamp === "number" ? row.timestamp * 1e3 : NaN;
+      const timestamp = Number.isFinite(tsIso) ? tsIso : Number.isFinite(tsNumeric) ? tsNumeric : 0;
+      const userText = typeof row.user_text === "string" ? row.user_text : "";
+      const agentText = typeof row.agent_text === "string" ? row.agent_text : "";
+      if (userText.length > 0) {
+        out.push({ role: "user", text: userText, timestamp });
+      }
+      if (agentText.length > 0 && agentText !== "[interrupted]") {
+        out.push({ role: "assistant", text: agentText, timestamp });
+      }
+    }
+    return out;
+  } catch {
+    return [];
+  }
+}
 function parseTimestamp(raw) {
   if (typeof raw === "number") {
     return Number.isFinite(raw) ? raw : null;
@@ -1572,8 +1513,8 @@ function csvEscape(value) {
 // src/dashboard/ui.ts
 init_esm_shims();
-import { readFileSync as readFileSync2 } from "fs";
-import { join as join2, dirname } from "path";
+import { readFileSync as readFileSync3 } from "fs";
+import { join as join2, dirname as dirname2 } from "path";
 var FALLBACK_HTML = `<!doctype html>
 <html><head><meta charset="utf-8"><title>Patter dashboard</title></head>
 <body style="font-family:ui-sans-serif,system-ui;padding:2rem;color:#1a1a1a">
@@ -1583,15 +1524,15 @@ Run <code>cd dashboard-app &amp;&amp; npm run build &amp;&amp; npm run sync</cod
 from the repo root to regenerate it.</p>
 </body></html>`;
 function loadDashboardHtml() {
-  const here = typeof __dirname !== "undefined" ? __dirname : dirname(".");
+  const here = typeof __dirname !== "undefined" ? __dirname : dirname2(".");
   const candidates = [
     join2(here, "ui.html"),
     join2(here, "dashboard", "ui.html"),
     join2(here, "..", "dashboard", "ui.html")
   ];
-  for (const path3 of candidates) {
+  for (const path4 of candidates) {
     try {
-      return readFileSync2(path3, "utf8");
+      return readFileSync3(path4, "utf8");
     } catch {
     }
   }
@@ -1611,7 +1552,8 @@ function mountDashboard(app, store, token = "") {
     res.json(store.getCalls(limit, offset));
   });
   app.get("/api/dashboard/calls/:callId", auth, (req, res) => {
-    const call = store.getCall(String(req.params.callId));
+    const callId = String(req.params.callId);
+    const call = store.getCall(callId) ?? store.getActive(callId);
     if (!call) {
       res.status(404).json({ error: "Not found" });
       return;
@@ -1624,6 +1566,24 @@ function mountDashboard(app, store, token = "") {
   app.get("/api/dashboard/aggregates", auth, (_req, res) => {
     res.json(store.getAggregates());
   });
+  app.delete("/api/dashboard/calls/:callId", auth, (req, res) => {
+    const callId = String(req.params.callId);
+    const accepted = store.deleteCalls([callId]);
+    res.json({ deleted: accepted, count: accepted.length });
+  });
+  app.post("/api/dashboard/calls/delete", auth, (req, res) => {
+    const body = req.body ?? {};
+    const raw = body.call_ids;
+    if (!Array.isArray(raw)) {
+      res.status(400).json({ error: "Expected JSON body { 'call_ids': [...] }" });
+      return;
+    }
+    const ids = raw.filter(
+      (cid) => typeof cid === "string" && cid.length > 0
+    );
+    const accepted = store.deleteCalls(ids);
+    res.json({ deleted: accepted, count: accepted.length });
+  });
   app.get("/api/dashboard/events", auth, (req, res) => {
     res.writeHead(200, {
       "Content-Type": "text/event-stream",
@@ -1696,7 +1656,8 @@ function mountApi(app, store, token = "") {
     res.json({ data: active, count: active.length });
   });
   app.get("/api/v1/calls/:callId", auth, (req, res) => {
-    const call = store.getCall(String(req.params.callId));
+    const callId = String(req.params.callId);
+    const call = store.getCall(callId) ?? store.getActive(callId);
     if (!call) {
       res.status(404).json({ error: "Call not found" });
       return;
@@ -1868,8 +1829,8 @@ var RemoteMessageHandler = class {
         "WebSocket URL uses unencrypted ws:// \u2014 call transcripts and phone numbers will be sent in plaintext. Use wss:// in production."
       );
     }
-    const { WebSocket: WebSocket4 } = await import("ws");
-    const ws = new WebSocket4(url);
+    const { WebSocket: WebSocket3 } = await import("ws");
+    const ws = new WebSocket3(url);
     const chunks = [];
     let done = false;
     let error = null;
@@ -1923,10 +1884,10 @@ var RemoteMessageHandler = class {
       }
     });
     try {
-      await new Promise((resolve, reject) => {
+      await new Promise((resolve2, reject) => {
         ws.on("open", () => {
           ws.send(JSON.stringify(data));
-          resolve();
+          resolve2();
         });
         ws.on("error", (err) => {
           reject(err);
@@ -1936,11 +1897,11 @@ var RemoteMessageHandler = class {
         yield chunks.shift();
       }
       while (!done && !error) {
-        const text = await new Promise((resolve) => {
+        const text = await new Promise((resolve2) => {
           if (chunks.length > 0) {
-            resolve(chunks.shift());
+            resolve2(chunks.shift());
           } else {
-            resolveNext = resolve;
+            resolveNext = resolve2;
           }
         });
         if (text === null) break;
@@ -1967,7 +1928,7 @@ init_esm_shims();
 // src/providers/deepgram-stt.ts
 init_esm_shims();
-import WebSocket3 from "ws";
+import WebSocket2 from "ws";
 // src/errors.ts
 init_esm_shims();
@@ -2060,6 +2021,8 @@ var FINALIZE_DRAIN_MS = 100;
 var CLOSE_LATENCY_BUDGET_MS = 500;
 var RECONNECT_CLOSE_CODES = /* @__PURE__ */ new Set([1006, 1011]);
 var DeepgramSTT = class _DeepgramSTT {
+  /** Stable pricing/dashboard key — read by stream-handler/metrics. */
+  static providerKey = "deepgram";
   ws = null;
   transcriptCallbacks = /* @__PURE__ */ new Set();
   errorCallbacks = /* @__PURE__ */ new Set();
@@ -2120,6 +2083,64 @@ var DeepgramSTT = class _DeepgramSTT {
     }
     return `${DEEPGRAM_WS_URL}?${params.toString()}`;
   }
+  /**
+   * Pre-call WebSocket warmup for the Deepgram `/v1/listen` endpoint.
+   *
+   * Opens the WS (full DNS + TLS + auth handshake), idles ~250 ms so the
+   * provider edge keeps the session warm in its routing table, then
+   * closes cleanly. By the time `connect()` is invoked at call-pickup
+   * the DNS resolver is hot, the TCP+TLS session is in the connection
+   * pool, and recent WS auth is still warm at Deepgram's edge — net
+   * wire time saving of 200-500 ms vs a cold WS open.
+   *
+   * Billing safety: Deepgram bills on streamed audio seconds (per
+   * https://deepgram.com/pricing). Opening + closing the WebSocket
+   * without sending any audio frames does not consume billable seconds.
+   * Best-effort: any failure is logged at debug level and never raised.
+   */
+  async warmup() {
+    const params = new URLSearchParams({
+      model: this.model,
+      language: this.language,
+      encoding: this.encoding,
+      sample_rate: String(this.sampleRate),
+      channels: "1"
+    });
+    const url = `${DEEPGRAM_WS_URL}?${params.toString()}`;
+    let ws = null;
+    try {
+      ws = await new Promise((resolve2, reject) => {
+        const sock = new WebSocket2(url, {
+          headers: { Authorization: `Token ${this.apiKey}` }
+        });
+        const timer = setTimeout(() => {
+          try {
+            sock.close();
+          } catch {
+          }
+          reject(new Error("Deepgram STT warmup connect timeout"));
+        }, 5e3);
+        sock.once("open", () => {
+          clearTimeout(timer);
+          resolve2(sock);
+        });
+        sock.once("error", (err) => {
+          clearTimeout(timer);
+          reject(err);
+        });
+      });
+      await new Promise((r) => setTimeout(r, 250));
+    } catch (err) {
+      getLogger().debug(`Deepgram STT warmup failed (best-effort): ${String(err)}`);
+    } finally {
+      if (ws) {
+        try {
+          ws.close();
+        } catch {
+        }
+      }
+    }
+  }
   /** Open the streaming WebSocket and arm message + keepalive handlers. */
   async connect() {
     await this.openSocket();
@@ -2128,11 +2149,11 @@ var DeepgramSTT = class _DeepgramSTT {
   }
   async openSocket() {
     const url = this.buildUrl();
-    const ws = new WebSocket3(url, {
+    const ws = new WebSocket2(url, {
       headers: { Authorization: `Token ${this.apiKey}` }
     });
     this.ws = ws;
-    await new Promise((resolve, reject) => {
+    await new Promise((resolve2, reject) => {
       let settled = false;
       const settle = (fn) => {
         if (settled) return;
@@ -2144,7 +2165,7 @@ var DeepgramSTT = class _DeepgramSTT {
         () => settle(() => reject(new PatterConnectionError("Deepgram connect timeout"))),
         1e4
       );
-      ws.once("open", () => settle(resolve));
+      ws.once("open", () => settle(resolve2));
       ws.once("error", (err) => settle(() => reject(err)));
       ws.once("unexpected-response", (_req, res) => {
         const status = res?.statusCode ?? 0;
@@ -2165,7 +2186,7 @@ var DeepgramSTT = class _DeepgramSTT {
     ws.on("close", (code, reason) => this.handleClose(code, reason.toString()));
     ws.on("error", (err) => this.handleError(err));
     this.keepaliveTimer = setInterval(() => {
-      if (this.ws && this.ws.readyState === WebSocket3.OPEN) {
+      if (this.ws && this.ws.readyState === WebSocket2.OPEN) {
         try {
           this.ws.send(JSON.stringify({ type: "KeepAlive" }));
         } catch {
@@ -2284,7 +2305,7 @@ var DeepgramSTT = class _DeepgramSTT {
   }
   /** Send a binary audio chunk to Deepgram for transcription. */
   sendAudio(audio) {
-    if (!this.ws || this.ws.readyState !== WebSocket3.OPEN) {
+    if (!this.ws || this.ws.readyState !== WebSocket2.OPEN) {
       this.audioDroppedCount++;
       if (this.audioDroppedCount === 1 || this.audioDroppedCount % 50 === 0) {
         getLogger().info(
@@ -2333,7 +2354,7 @@ var DeepgramSTT = class _DeepgramSTT {
    */
   finalize() {
     const ws = this.ws;
-    if (!ws || ws.readyState !== WebSocket3.OPEN) {
+    if (!ws || ws.readyState !== WebSocket2.OPEN) {
       getLogger().info(
         `[DIAG] DeepgramSTT.finalize SKIPPED (ws state=${ws?.readyState ?? "null"})`
       );
@@ -2354,7 +2375,7 @@ var DeepgramSTT = class _DeepgramSTT {
     if (!ws) return;
     this.ws = null;
     const sendSafe = (payload) => {
-      if (ws.readyState === WebSocket3.OPEN) {
+      if (ws.readyState === WebSocket2.OPEN) {
         try {
           ws.send(payload);
         } catch {
@@ -2368,7 +2389,7 @@ var DeepgramSTT = class _DeepgramSTT {
       } catch {
       }
     };
-    if (ws.readyState !== WebSocket3.OPEN) {
+    if (ws.readyState !== WebSocket2.OPEN) {
       finishClose();
       return;
     }
@@ -2437,6 +2458,21 @@ var CallMetricsAccumulator = class {
   _bargeinStoppedAt = null;
   _turnUserText = "";
   _turnSttAudioSeconds = 0;
+  /**
+   * Guard against the recordTurnInterrupted / recordTurnComplete race.
+   *
+   * A VAD-path barge-in fires ``recordTurnInterrupted`` synchronously
+   * inside ``handleAudioAsync`` while the in-flight pipeline LLM stream
+   * keeps unwinding on its own task. When the LLM stream eventually
+   * exits, the existing pipeline path falls through to
+   * ``recordTurnComplete``, which would push a second turn for the same
+   * logical exchange (this time carrying ``user_text=''`` because the
+   * field was already reset). ``_turnAlreadyClosed`` is flipped by
+   * ``recordTurnInterrupted`` and read by ``recordTurnComplete`` so the
+   * late ``recordTurnComplete`` becomes a no-op until the next
+   * ``startTurn`` re-arms the accumulator.
+   */
+  _turnAlreadyClosed = false;
   // Cumulative usage counters
   _totalSttAudioSeconds = 0;
   _totalTtsCharacters = 0;
@@ -2449,6 +2485,10 @@ var CallMetricsAccumulator = class {
   _actualSttCost = null;
   // Fix 10: accumulated LLM token cost for non-Realtime pipeline mode.
   _totalLlmCost = 0;
+  // Last LLM model identifier from a recordLlmUsage call — emitted on
+  // CallMetrics.llm_model so the dashboard cost panel can display
+  // "Cerebras gpt-oss-120b" instead of just "Cerebras".
+  _llmModel = "";
   // ---- EventBus integration (item 3) ----
   _eventBus;
   // ---- EOUMetrics — 4 timestamps (item 4) ----
@@ -2467,6 +2507,22 @@ var CallMetricsAccumulator = class {
   // ---- report_only_initial_ttfb (item 6) ----
   _reportOnlyInitialTtfb;
   _initialTtfbEmitted = false;
+  // ---- Barge-in anchor hygiene ----
+  /**
+   * Last barge-in detection timestamp (hrTimeMs). Used by
+   * ``_computeTurnLatency`` to gate endpoint_ms / stt_ms emission on turns
+   * that started immediately after a barge-in — those turns have unreliable
+   * VAD/STT anchors and would otherwise pollute the p95 distribution with
+   * synthetic 6+ second spikes.
+   */
+  _lastBargeinAt = null;
+  /**
+   * Count of turns where ``recordSttComplete`` fired but no legitimate VAD
+   * ``speech_end`` had stamped ``_endpointSignalAt``. Exposed via metrics so
+   * we can spot environments where PSTN packet loss is dropping VAD stops
+   * (the common cause of missing endpoint signals).
+   */
+  _endpointSignalMissingCount = 0;
   constructor(opts) {
     this.callId = opts.callId;
     this.providerMode = opts.providerMode;
@@ -2514,6 +2570,7 @@ var CallMetricsAccumulator = class {
     this._bargeinStoppedAt = null;
     this._turnUserText = "";
     this._turnSttAudioSeconds = 0;
+    this._turnAlreadyClosed = false;
     this._vadStoppedAt = null;
     this._sttFinalAt = null;
     this._turnCommittedAt = null;
@@ -2530,12 +2587,46 @@ var CallMetricsAccumulator = class {
       this.startTurn();
     }
   }
+  /**
+   * Anchor the current turn at a legitimate VAD ``speech_start`` event.
+   *
+   * Industry-standard pattern: every VAD ``speech_start`` that fires while the agent
+   * is NOT in the suppressed warmup window re-anchors the turn timer to
+   * the wall-clock moment the user actually started speaking. Re-anchors:
+   *
+   *  * ``_turnStart`` — fixes the case where a phantom ``speech_start``
+   *    during agent TTS or a partial transcript from the previous user
+   *    attempt already stamped the field. Without this, the legitimate
+   *    user-speech ``speech_start`` no-op'd and ``user_speech_duration_ms``
+   *    inflated from ~1 s to 5-7 s (the original "I waited 7 seconds"
+   *    dashboard symptom).
+   *  * ``_endpointSignalAt``, ``_vadStoppedAt``, ``_sttFinalAt`` — any
+   *    stale anchor from a rejected barge-in / dropped final transcript
+   *    on the same uncommitted turn is cleared, so the next
+   *    ``recordVadStop`` / ``recordSttFinalTimestamp`` stamps fresh.
+   *  * ``_sttComplete``, ``_llmFirstToken``, ``_initialTtfbEmitted`` — same
+   *    rationale for the downstream pipeline timestamps.
+   *
+   * No-op once the turn is committed (``_turnCommittedMono`` set): a
+   * VAD ``speech_start`` after commit belongs to the NEXT turn's
+   * barge-in path, handled by ``recordTurnInterrupted`` instead.
+   */
+  anchorUserSpeechStart() {
+    if (this._turnCommittedMono !== null) return;
+    this._turnStart = hrTimeMs();
+    this._endpointSignalAt = null;
+    this._vadStoppedAt = null;
+    this._sttFinalAt = null;
+    this._sttComplete = null;
+    this._llmFirstToken = null;
+    this._initialTtfbEmitted = false;
+  }
   /** Stamp end-of-STT, capture the user's transcript, and accrue billed STT seconds. */
   recordSttComplete(text, audioSeconds = 0) {
     this._sttComplete = hrTimeMs();
     this._sttFinalAt = this._sttComplete;
     if (this._endpointSignalAt === null) {
-      this._endpointSignalAt = this._sttComplete;
+      this._endpointSignalMissingCount++;
     }
     this._turnUserText = text;
     this._turnSttAudioSeconds = audioSeconds;
@@ -2624,7 +2715,9 @@ var CallMetricsAccumulator = class {
    * ``recordTtsStopped`` to compute ``bargein_ms``.
    */
   recordBargeinDetected(ts) {
-    this._bargeinDetectedAt = ts ?? hrTimeMs();
+    const t = ts ?? hrTimeMs();
+    this._bargeinDetectedAt = t;
+    this._lastBargeinAt = t;
   }
   /**
    * Mark the moment TTS playback was actually halted after a barge-in. Call
@@ -2634,8 +2727,18 @@ var CallMetricsAccumulator = class {
   recordTtsStopped(ts) {
     this._bargeinStoppedAt = ts ?? hrTimeMs();
   }
-  /** Close the current turn cleanly and append a `TurnMetrics` record. */
+  /**
+   * Close the current turn cleanly and append a `TurnMetrics` record.
+   *
+   * Returns ``null`` when ``recordTurnInterrupted`` has already closed
+   * the current turn — this protects against the VAD-barge-in /
+   * pipeline-LLM race where both paths try to finalise the same logical
+   * turn and the second would otherwise push a phantom entry with
+   * ``user_text=''``. The caller treats ``null`` as "nothing to emit";
+   * ``emitTurnMetrics`` is already null-safe.
+   */
   recordTurnComplete(agentText) {
+    if (this._turnAlreadyClosed) return null;
     const latency = this._computeTurnLatency();
     const turn = {
       turn_index: this._turns.length,
@@ -2648,13 +2751,23 @@ var CallMetricsAccumulator = class {
     };
     this._turns.push(turn);
     this._resetTurnState();
+    this._turnAlreadyClosed = true;
     this._eventBus?.emit("turn_ended", { callId: this.callId, turn });
     this._eventBus?.emit("metrics_collected", { callId: this.callId, turn });
     return turn;
   }
-  /** Close the current turn as interrupted (barge-in) and return the recorded metrics. */
+  /**
+   * Close the current turn as interrupted (barge-in) and return the
+   * recorded metrics. Returns ``null`` when no turn is open, OR when
+   * ``recordTurnComplete`` has already finalised the current turn —
+   * bidirectional parity with the guard at the top of
+   * ``recordTurnComplete``. Prevents an out-of-order interruption (e.g.
+   * a future refactor that reorders the bargein + LLM-unwind paths)
+   * from overwriting a turn that the complete path already emitted.
+   */
   recordTurnInterrupted() {
     if (this._turnStart === null) return null;
+    if (this._turnAlreadyClosed) return null;
     const latency = this._computeTurnLatency();
     const turn = {
       turn_index: this._turns.length,
@@ -2666,7 +2779,12 @@ var CallMetricsAccumulator = class {
       timestamp: Date.now() / 1e3
     };
     this._turns.push(turn);
+    this._eventBus?.emit("turn_ended", { callId: this.callId, turn });
+    this._eventBus?.emit("metrics_collected", { callId: this.callId, turn });
     this._resetTurnState();
+    this._turnAlreadyClosed = true;
+    this._turnCommittedMono = null;
+    this._endpointSignalAt = null;
     return turn;
   }
   // ---- EOU metrics (item 4) ----
@@ -2812,6 +2930,7 @@ var CallMetricsAccumulator = class {
    * @param cacheWriteTokens  Cache write tokens (billed at cache_write rate if present)
    */
   recordLlmUsage(provider2, model, inputTokens, outputTokens, cacheReadTokens = 0, cacheWriteTokens = 0) {
+    this._llmModel = model;
     this._totalLlmCost += calculateLlmCost(
       provider2,
       model,
@@ -2851,7 +2970,10 @@ var CallMetricsAccumulator = class {
       stt_provider: this.sttProvider,
       tts_provider: this.ttsProvider,
       llm_provider: this.llmProvider,
-      telephony_provider: this.telephonyProvider
+      telephony_provider: this.telephonyProvider,
+      stt_model: this.sttModel,
+      tts_model: this.ttsModel,
+      llm_model: this._llmModel
     };
     this._eventBus?.emit("call_ended", { callId: this.callId, metrics });
     return metrics;
@@ -2861,6 +2983,15 @@ var CallMetricsAccumulator = class {
     const duration = (hrTimeMs() - this._callStart) / 1e3;
     return this._computeCost(duration);
   }
+  /**
+   * Number of turns where recordSttComplete fired without a prior legitimate
+   * VAD speech_end. Surfaced for diagnostics — a non-zero value points at
+   * dropped VAD stops (commonly PSTN packet loss), which is why we stopped
+   * faking _endpointSignalAt from _sttComplete in 0.6.x.
+   */
+  get endpointSignalMissingCount() {
+    return this._endpointSignalMissingCount;
+  }
   // ---- Internal ----
   _resetTurnState() {
     this._turnStart = null;
@@ -2876,6 +3007,7 @@ var CallMetricsAccumulator = class {
     this._bargeinStoppedAt = null;
     this._turnUserText = "";
     this._turnSttAudioSeconds = 0;
+    this._initialTtfbEmitted = false;
   }
   _computeTurnLatency() {
     let stt_ms = 0;
@@ -2887,8 +3019,19 @@ var CallMetricsAccumulator = class {
     let endpoint_ms;
     let bargein_ms;
     let tts_total_ms;
-    if (this._turnStart !== null && this._sttComplete !== null) {
-      stt_ms = this._sttComplete - this._turnStart;
+    let user_speech_duration_ms;
+    const postBargein = this._lastBargeinAt !== null && this._turnStart !== null && Math.abs(this._turnStart - this._lastBargeinAt) <= 100;
+    if (this._sttComplete !== null) {
+      const anchor = this._endpointSignalAt ?? this._turnStart;
+      if (anchor !== null) {
+        stt_ms = Math.max(0, this._sttComplete - anchor);
+      }
+    }
+    if (this._turnStart !== null && this._endpointSignalAt !== null) {
+      user_speech_duration_ms = Math.max(
+        0,
+        this._endpointSignalAt - this._turnStart
+      );
     }
     if (this._sttComplete !== null && this._llmFirstToken !== null) {
       llm_ttft_ms = Math.max(0, this._llmFirstToken - this._sttComplete);
@@ -2921,9 +3064,14 @@ var CallMetricsAccumulator = class {
     if (endpoint_ms !== void 0 && llm_ttft_ms !== void 0 && tts_ms > 0) {
       agent_response_ms = round(endpoint_ms + llm_ttft_ms + tts_ms, 1);
     }
+    if (postBargein) {
+      stt_ms = 0;
+      endpoint_ms = void 0;
+    }
     return {
       stt_ms: round(stt_ms, 1),
       llm_ms: round(llm_ms, 1),
+      ...user_speech_duration_ms !== void 0 ? { user_speech_duration_ms: round(user_speech_duration_ms, 1) } : {},
       ...llm_ttft_ms !== void 0 ? { llm_ttft_ms: round(llm_ttft_ms, 1) } : {},
       ...llm_total_ms !== void 0 ? { llm_total_ms: round(llm_total_ms, 1) } : {},
       tts_ms: round(tts_ms, 1),
@@ -3002,6 +3150,8 @@ var CallMetricsAccumulator = class {
     const endpointAvg = optAvg("endpoint_ms");
     const bargeinAvg = optAvg("bargein_ms");
     const ttsTotalAvg = optAvg("tts_total_ms");
+    const userSpeechAvg = optAvg("user_speech_duration_ms");
+    const agentResponseAvg = optAvg("agent_response_ms");
     return {
       stt_ms: round(turns.reduce((s, t) => s + t.latency.stt_ms, 0) / n, 1),
       llm_ms: round(turns.reduce((s, t) => s + t.latency.llm_ms, 0) / n, 1),
@@ -3011,7 +3161,9 @@ var CallMetricsAccumulator = class {
       total_ms: round(turns.reduce((s, t) => s + t.latency.total_ms, 0) / n, 1),
       ...endpointAvg !== void 0 ? { endpoint_ms: endpointAvg } : {},
       ...bargeinAvg !== void 0 ? { bargein_ms: bargeinAvg } : {},
-      ...ttsTotalAvg !== void 0 ? { tts_total_ms: ttsTotalAvg } : {}
+      ...ttsTotalAvg !== void 0 ? { tts_total_ms: ttsTotalAvg } : {},
+      ...userSpeechAvg !== void 0 ? { user_speech_duration_ms: userSpeechAvg } : {},
+      ...agentResponseAvg !== void 0 ? { agent_response_ms: agentResponseAvg } : {}
     };
   }
   _computePercentileLatency(p) {
@@ -3030,6 +3182,8 @@ var CallMetricsAccumulator = class {
     const endpointP = optPct("endpoint_ms");
     const bargeinP = optPct("bargein_ms");
     const ttsTotalP = optPct("tts_total_ms");
+    const userSpeechP = optPct("user_speech_duration_ms");
+    const agentResponseP = optPct("agent_response_ms");
     return {
       stt_ms: round(percentile(nonZero(turns.map((t) => t.latency.stt_ms)), p), 1),
       llm_ms: round(percentile(nonZero(turns.map((t) => t.latency.llm_ms)), p), 1),
@@ -3039,409 +3193,13 @@ var CallMetricsAccumulator = class {
       total_ms: round(percentile(nonZero(turns.map((t) => t.latency.total_ms)), p), 1),
       ...endpointP !== void 0 ? { endpoint_ms: endpointP } : {},
       ...bargeinP !== void 0 ? { bargein_ms: bargeinP } : {},
-      ...ttsTotalP !== void 0 ? { tts_total_ms: ttsTotalP } : {}
+      ...ttsTotalP !== void 0 ? { tts_total_ms: ttsTotalP } : {},
+      ...userSpeechP !== void 0 ? { user_speech_duration_ms: userSpeechP } : {},
+      ...agentResponseP !== void 0 ? { agent_response_ms: agentResponseP } : {}
     };
   }
 };
-// src/audio/transcoding.ts
-init_esm_shims();
-var MULAW_TO_PCM16_TABLE = (() => {
-  const table = new Int16Array(256);
-  for (let i = 0; i < 256; i++) {
-    const mu = ~i & 255;
-    const sign = mu & 128 ? -1 : 1;
-    const exponent = mu >> 4 & 7;
-    const mantissa = mu & 15;
-    const magnitude = (mantissa << 1 | 33) << exponent + 2;
-    table[i] = sign * (magnitude - 132);
-  }
-  return table;
-})();
-var PCM16_TO_MULAW_TABLE = (() => {
-  const BIAS = 132;
-  const CLIP = 32635;
-  const table = new Uint8Array(65536);
-  for (let i = 0; i < 65536; i++) {
-    let sample = i >= 32768 ? i - 65536 : i;
-    const sign = sample < 0 ? 128 : 0;
-    if (sample < 0) sample = -sample;
-    if (sample > CLIP) sample = CLIP;
-    sample += BIAS;
-    let exponent = 7;
-    const exponentMask = 16384;
-    for (let shift = exponentMask; shift > 0 && (sample & shift) === 0; shift >>= 1) {
-      exponent--;
-    }
-    const mantissa = sample >> exponent + 3 & 15;
-    const mulaw = ~(sign | exponent << 4 | mantissa) & 255;
-    table[i] = mulaw;
-  }
-  return table;
-})();
-function mulawToPcm16(mulawData) {
-  const out = Buffer.alloc(mulawData.length * 2);
-  for (let i = 0; i < mulawData.length; i++) {
-    out.writeInt16LE(MULAW_TO_PCM16_TABLE[mulawData[i]], i * 2);
-  }
-  return out;
-}
-function pcm16ToMulaw(pcmData) {
-  const sampleCount = Math.floor(pcmData.length / 2);
-  const out = Buffer.alloc(sampleCount);
-  for (let i = 0; i < sampleCount; i++) {
-    const sample = pcmData.readInt16LE(i * 2);
-    out[i] = PCM16_TO_MULAW_TABLE[sample + 65536 & 65535];
-  }
-  return out;
-}
-var PcmCarry = class {
-  pending = null;
-  /**
-   * Prepend any carried odd byte, return the even-length prefix, and stash
-   * any new trailing odd byte for the next call.
-   *
-   * Returns a zero-length buffer when no complete sample is yet available.
-   */
-  push(chunk) {
-    const combined = this.pending !== null ? Buffer.concat([this.pending, chunk]) : chunk;
-    this.pending = null;
-    const alignedLen = combined.length & ~1;
-    if (alignedLen < combined.length) {
-      this.pending = combined.subarray(alignedLen);
-    }
-    return combined.subarray(0, alignedLen);
-  }
-  /**
-   * Return any pending byte as a 1-byte buffer (rare in practice — only if
-   * the entire stream had an odd byte count), then reset internal state.
-   */
-  flush() {
-    if (this.pending === null) return Buffer.alloc(0);
-    const out = this.pending;
-    this.pending = null;
-    return out;
-  }
-  /** Reset carry state without flushing. */
-  reset() {
-    this.pending = null;
-  }
-};
-var StatefulResampler = class {
-  srcRate;
-  dstRate;
-  // 16k→8k: 5-tap FIR state.
-  // Extended sample buffer carries the 2 history samples that precede the
-  // current chunk AND any "pending" input sample that did not yet generate
-  // output (i.e. the odd sample when the chunk had an odd sample count).
-  // `firPhase` = 0 means the next output is at input position 0 of the
-  // current chunk; 1 means it starts at input position 1 (because the
-  // previous chunk ended on an even-output boundary).
-  firHistory = new Int16Array(2);
-  // [s_{-2}, s_{-1}]
-  firHistoryValid = false;
-  // Pending sample carried from odd-count chunks (not the byte carry —
-  // this is a complete Int16 sample that becomes the first input for the
-  // next call).
-  firPendingSample = null;
-  // 8k→16k: last input sample deferred across chunk boundaries.
-  upsampleLast = 0;
-  upsampleHasHistory = false;
-  // 24k→16k: fractional phase and last input sample across chunks.
-  resample24Last = 0;
-  resample24Phase = 0;
-  resample24HasHistory = false;
-  // Odd-byte alignment carry.
-  carry = new PcmCarry();
-  constructor(opts) {
-    this.srcRate = opts.srcRate;
-    this.dstRate = opts.dstRate;
-    if (opts.channels !== void 0 && opts.channels !== 1) {
-      throw new Error("StatefulResampler: only mono (channels=1) is supported");
-    }
-    const key = `${this.srcRate}->${this.dstRate}`;
-    if (key !== "16000->8000" && key !== "8000->16000" && key !== "24000->16000" && key !== "24000->8000") {
-      throw new Error(
-        `StatefulResampler: unsupported conversion ${key}. Supported: 16000->8000, 8000->16000, 24000->16000, 24000->8000`
-      );
-    }
-  }
-  /**
-   * Process a chunk of PCM16-LE samples.
-   *
-   * Handles odd-byte inputs via an internal carry buffer. Returns an even-byte-
-   * aligned output buffer; may return a zero-length buffer if not enough
-   * aligned input is available yet.
-   */
-  process(pcm) {
-    const aligned = this.carry.push(pcm);
-    if (aligned.length === 0) return Buffer.alloc(0);
-    if (this.srcRate === 16e3 && this.dstRate === 8e3) {
-      return this._downsample16kTo8k(aligned);
-    }
-    if (this.srcRate === 8e3 && this.dstRate === 16e3) {
-      return this._upsample8kTo16k(aligned);
-    }
-    if (this.srcRate === 24e3 && this.dstRate === 8e3) {
-      return this._resample24kTo8k(aligned);
-    }
-    return this._resample24kTo16k(aligned);
-  }
-  /**
-   * Flush internal state and return any remaining output samples.
-   *
-   * For 8k→16k: the deferred last sample is emitted duplicated (matching
-   * the stateless helper's end-of-stream behaviour).
-   * For 16k→8k: any pending odd sample is processed with edge-replication.
-   * Resets all state after flushing.
-   */
-  flush() {
-    this.carry.flush();
-    if (this.srcRate === 16e3 && this.dstRate === 8e3 && this.firPendingSample !== null) {
-      const s = this.firPendingSample;
-      const tmp = Buffer.alloc(4);
-      tmp.writeInt16LE(s, 0);
-      tmp.writeInt16LE(s, 2);
-      const out = this._downsample16kTo8k(tmp);
-      this.firPendingSample = null;
-      return out;
-    }
-    if (this.srcRate === 8e3 && this.dstRate === 16e3 && this.upsampleHasHistory) {
-      const out = Buffer.alloc(4);
-      out.writeInt16LE(this.upsampleLast, 0);
-      out.writeInt16LE(this.upsampleLast, 2);
-      this.upsampleHasHistory = false;
-      this.upsampleLast = 0;
-      return out;
-    }
-    return Buffer.alloc(0);
-  }
-  /** Reset all carried state (e.g. at call boundaries). */
-  reset() {
-    this.firHistory = new Int16Array(2);
-    this.firHistoryValid = false;
-    this.firPendingSample = null;
-    this.upsampleLast = 0;
-    this.upsampleHasHistory = false;
-    this.resample24Last = 0;
-    this.resample24Phase = 0;
-    this.resample24HasHistory = false;
-    this.carry.reset();
-  }
-  // ---------------------------------------------------------------------------
-  // Private: 16 kHz → 8 kHz
-  // ---------------------------------------------------------------------------
-  /**
-   * 2:1 decimation with a 5-tap binomial FIR anti-alias filter.
-   *
-   * FIR coefficients: [1, 4, 6, 4, 1] / 16 (cutoff ~Fs/4 = 4 kHz).
-   *
-   * Cross-chunk state:
-   * - `firHistory[0]` = s_{-2}, `firHistory[1]` = s_{-1} relative to the
-   *   virtual stream (seeded to first-sample on the very first call).
-   * - `firPendingSample` = a lone input sample carried from a chunk whose
-   *   sample count was odd; it will become the first input of the next chunk.
-   *
-   * Decimation: outputs are at even positions (0, 2, 4 …) in the virtual
-   * extended stream, so every 2 input samples yield 1 output. An odd-sample-
-   * count chunk leaves 1 sample in `firPendingSample`; the next chunk
-   * prepends it so the output cadence is unbroken.
-   */
-  _downsample16kTo8k(buf) {
-    const newSampleCount = buf.length >> 1;
-    const hasPending = this.firPendingSample !== null;
-    const totalInput = newSampleCount + (hasPending ? 1 : 0);
-    const input = new Int16Array(totalInput);
-    if (hasPending) {
-      input[0] = this.firPendingSample;
-      for (let j = 0; j < newSampleCount; j++) input[j + 1] = buf.readInt16LE(j * 2);
-    } else {
-      for (let j = 0; j < newSampleCount; j++) input[j] = buf.readInt16LE(j * 2);
-    }
-    this.firPendingSample = null;
-    if (totalInput === 0) return Buffer.alloc(0);
-    if (!this.firHistoryValid) {
-      this.firHistory[0] = input[0];
-      this.firHistory[1] = input[0];
-      this.firHistoryValid = true;
-    }
-    const extended = new Int16Array(totalInput + 2);
-    extended[0] = this.firHistory[0];
-    extended[1] = this.firHistory[1];
-    for (let j = 0; j < totalInput; j++) extended[j + 2] = input[j];
-    const outSamples = totalInput >> 1;
-    const out = Buffer.alloc(outSamples * 2);
-    for (let i = 0; i < outSamples; i++) {
-      const c = 2 + i * 2;
-      const sM2 = extended[c - 2];
-      const sM1 = extended[c - 1];
-      const s0 = extended[c];
-      const sP1 = c + 1 < extended.length ? extended[c + 1] : extended[extended.length - 1];
-      const sP2 = c + 2 < extended.length ? extended[c + 2] : extended[extended.length - 1];
-      const filtered = sM2 + 4 * sM1 + 6 * s0 + 4 * sP1 + sP2 + 8 >> 4;
-      out.writeInt16LE(Math.max(-32768, Math.min(32767, filtered)), i * 2);
-    }
-    if (totalInput % 2 === 1) {
-      this.firPendingSample = input[totalInput - 1];
-    }
-    if (totalInput >= 2) {
-      this.firHistory[0] = input[totalInput - 2];
-      this.firHistory[1] = input[totalInput - 1];
-    } else {
-      this.firHistory[0] = this.firHistory[1];
-      this.firHistory[1] = input[0];
-    }
-    return out;
-  }
-  // ---------------------------------------------------------------------------
-  // Private: 8 kHz → 16 kHz
-  // ---------------------------------------------------------------------------
-  /**
-   * 1:2 linear-interpolation upsampler.
-   *
-   * For the first chunk (no history): emits 2*(N-1) samples and defers the
-   * last sample. For subsequent chunks (with history): emits the deferred
-   * sample + its interpolated midpoint THEN 2*(N-1) samples from the new
-   * chunk, deferring the new last sample. Total across K chunks + flush =
-   * 2*total_input_samples (correct output length).
-   *
-   * Call flush() after the final chunk to emit the last deferred sample
-   * pair (self-duplicate at end of stream).
-   */
-  _upsample8kTo16k(buf) {
-    const sampleCount = buf.length >> 1;
-    if (sampleCount === 0) return Buffer.alloc(0);
-    const outArr = [];
-    if (this.upsampleHasHistory) {
-      const next = buf.readInt16LE(0);
-      outArr.push(this.upsampleLast);
-      outArr.push(Math.round((this.upsampleLast + next) / 2));
-    }
-    for (let i = 0; i < sampleCount - 1; i++) {
-      const s0 = buf.readInt16LE(i * 2);
-      const s1 = buf.readInt16LE((i + 1) * 2);
-      outArr.push(s0);
-      outArr.push(Math.round((s0 + s1) / 2));
-    }
-    this.upsampleLast = buf.readInt16LE((sampleCount - 1) * 2);
-    this.upsampleHasHistory = true;
-    const outBuf = Buffer.alloc(outArr.length * 2);
-    for (let j = 0; j < outArr.length; j++) outBuf.writeInt16LE(outArr[j], j * 2);
-    return outBuf;
-  }
-  // ---------------------------------------------------------------------------
-  // Private: 24 kHz → 16 kHz / 8 kHz
-  // ---------------------------------------------------------------------------
-  /**
-   * 3:2 linear-interpolation decimator (ratio srcRate/dstRate = 1.5).
-   *
-   * `resample24Phase` tracks the fractional input position of the next output
-   * sample relative to the START of the next chunk. Negative phase means the
-   * next output straddles the previous/current chunk boundary; those are
-   * handled using `resample24Last`.
-   */
-  _resample24kTo16k(buf) {
-    return this._resample24kStep(buf, 24e3 / 16e3);
-  }
-  /** 3:1 decimation — collapses the 24k→16k→8k chain into a single step. */
-  _resample24kTo8k(buf) {
-    return this._resample24kStep(buf, 24e3 / 8e3);
-  }
-  /** Shared phase-stepping resampler used by 24→16 (step 1.5) and 24→8 (step 3). */
-  _resample24kStep(buf, step) {
-    const sampleCount = buf.length >> 1;
-    if (sampleCount === 0) return Buffer.alloc(0);
-    const outArr = [];
-    let phase = this.resample24Phase;
-    while (true) {
-      const idx = Math.floor(phase);
-      if (idx >= sampleCount) break;
-      const frac = phase - idx;
-      let s0;
-      let s1;
-      if (idx < 0) {
-        s0 = this.resample24HasHistory ? this.resample24Last : 0;
-        s1 = buf.readInt16LE(0);
-      } else {
-        s0 = buf.readInt16LE(idx * 2);
-        s1 = idx + 1 < sampleCount ? buf.readInt16LE((idx + 1) * 2) : s0;
-      }
-      const interp = Math.round(s0 + (s1 - s0) * frac);
-      outArr.push(Math.max(-32768, Math.min(32767, interp)));
-      phase += step;
-    }
-    this.resample24Last = buf.readInt16LE((sampleCount - 1) * 2);
-    this.resample24HasHistory = true;
-    this.resample24Phase = phase - sampleCount;
-    const outBuf = Buffer.alloc(outArr.length * 2);
-    for (let j = 0; j < outArr.length; j++) outBuf.writeInt16LE(outArr[j], j * 2);
-    return outBuf;
-  }
-};
-function createResampler16kTo8k() {
-  return new StatefulResampler({ srcRate: 16e3, dstRate: 8e3 });
-}
-function createResampler8kTo16k() {
-  return new StatefulResampler({ srcRate: 8e3, dstRate: 16e3 });
-}
-function createResampler24kTo16k() {
-  return new StatefulResampler({ srcRate: 24e3, dstRate: 16e3 });
-}
-function createResampler24kTo8k() {
-  return new StatefulResampler({ srcRate: 24e3, dstRate: 8e3 });
-}
-var _warnedResample8kTo16k = false;
-var _warnedResample16kTo8k = false;
-var _warnedResample24kTo16k = false;
-function resample8kTo16k(pcm8k) {
-  if (!_warnedResample8kTo16k) {
-    _warnedResample8kTo16k = true;
-    getLogger().warn(
-      "[patter] resample8kTo16k() is deprecated. Use createResampler8kTo16k() (StatefulResampler) to eliminate chunk-boundary discontinuities."
-    );
-  }
-  if (pcm8k.length === 0) return Buffer.alloc(0);
-  const r = createResampler8kTo16k();
-  const main = r.process(pcm8k);
-  const tail = r.flush();
-  return tail.length > 0 ? Buffer.concat([main, tail]) : main;
-}
-function resample16kTo8k(pcm16k) {
-  if (!_warnedResample16kTo8k) {
-    _warnedResample16kTo8k = true;
-    getLogger().warn(
-      "[patter] resample16kTo8k() is deprecated. Use createResampler16kTo8k() (StatefulResampler) to eliminate chunk-boundary discontinuities."
-    );
-  }
-  if (pcm16k.length === 0) return Buffer.alloc(0);
-  const r = createResampler16kTo8k();
-  const out = r.process(pcm16k);
-  const tail = r.flush();
-  return tail.length > 0 ? Buffer.concat([out, tail]) : out;
-}
-function resample24kTo16k(pcm24k) {
-  if (!_warnedResample24kTo16k) {
-    _warnedResample24kTo16k = true;
-    getLogger().warn(
-      "[patter] resample24kTo16k() is deprecated. Use createResampler24kTo16k() (StatefulResampler) or OpenAITTS.resampleStreaming for anti-aliased resampling."
-    );
-  }
-  if (pcm24k.length === 0) return Buffer.alloc(0);
-  const sampleCount = Math.floor(pcm24k.length / 2);
-  const outSamples = Math.floor(sampleCount * 2 / 3);
-  const out = Buffer.alloc(outSamples * 2);
-  for (let i = 0; i < outSamples; i++) {
-    const pos = i * 1.5;
-    const idx = Math.floor(pos);
-    const frac = pos - idx;
-    const s0 = pcm24k.readInt16LE(idx * 2);
-    const s1 = idx + 1 < sampleCount ? pcm24k.readInt16LE((idx + 1) * 2) : s0;
-    const interp = Math.round(s0 + (s1 - s0) * frac);
-    out.writeInt16LE(Math.max(-32768, Math.min(32767, interp)), i * 2);
-  }
-  return out;
-}
 // src/handler-utils.ts
 init_esm_shims();
 function createHistoryManager(maxSize) {
@@ -4313,7 +4071,23 @@ var HALLUCINATIONS = /* @__PURE__ */ new Set([
   ".",
   "bye",
   "right",
-  "cool"
+  "cool",
+  // Whisper YouTube-caption hallucinations
+  "thank you for watching",
+  "thanks for watching",
+  "thank you for watching!",
+  "thanks for watching!",
+  "thank you so much for watching",
+  "thanks for listening",
+  "please subscribe",
+  "subscribe",
+  "music",
+  "[music]",
+  "\u266A",
+  "[no audio]",
+  "[silence]",
+  "[blank_audio]",
+  "(silence)"
 ]);
 var StreamHandler = class _StreamHandler {
   deps;
@@ -4391,6 +4165,43 @@ var StreamHandler = class _StreamHandler {
    * sentence.
    */
   speakingStartedAt = null;
+  /**
+   * Wall-clock (ms) when the FIRST TTS audio chunk actually reached the
+   * carrier wire — set in ``markFirstAudioSent`` after ``bridge.sendAudio``
+   * succeeds, cleared by ``beginSpeaking`` / ``cancelSpeaking``. The barge-in
+   * gate measures elapsed from this instant, NOT from ``speakingStartedAt``,
+   * because ElevenLabs (and other cloud TTS) take 200-700 ms to emit the
+   * first byte. A gate anchored to ``beginSpeaking`` would expire on
+   * background noise before any audio went out, exit the TTS loop on
+   * ``isSpeaking=false``, and silently cut the agent's first turn.
+   */
+  firstAudioSentAt = null;
+  /**
+   * Optional barge-in confirmation strategies. With an empty array the
+   * SDK falls back to the legacy "cancel on first VAD speech_start"
+   * behaviour. With one or more strategies, a VAD speech_start during
+   * TTS marks the barge-in as *pending* — TTS keeps streaming naturally
+   * — and the strategies are consulted on every STT transcript via
+   * ``handleBargeIn``. The first strategy that returns ``true`` cancels
+   * the agent; if none confirm within ``bargeInConfirmMs`` the pending
+   * state is dropped and the agent finishes its sentence.
+   */
+  bargeInStrategies;
+  /** Pending-barge-in confirmation timeout in milliseconds. */
+  bargeInConfirmMs;
+  /** Wall-clock (ms) when the current pending barge-in started, or
+   * ``null`` if no barge-in is pending. */
+  bargeInPendingSince = null;
+  /** Timer that fires the pending-barge-in timeout. */
+  bargeInPendingTimer = null;
+  /**
+   * Set to true when a VAD ``speech_start`` was suppressed by the
+   * anti-echo gate during the current agent turn.  Cleared on
+   * ``beginSpeaking`` and ``cancelSpeaking``.  When the turn ends
+   * naturally (grace timer), the inbound audio ring is flushed to STT
+   * so the user's speech is not silently discarded.
+   */
+  suppressedSpeechPending = false;
   /**
    * Minimum wall-clock duration (ms) the agent must have been speaking
    * before barge-in is allowed to fire when AEC is active. Covers the
@@ -4402,10 +4213,17 @@ var StreamHandler = class _StreamHandler {
    * Same as the AEC variant but for deployments where AEC is OFF
    * (default on PSTN — Twilio/Telnyx). Without an adaptive filter to
    * converge, the only justification for a gate is anti-flicker on
-   * micro-events (cough, click). A short 250 ms window keeps real-user
-   * barge-in responsive while still filtering tiny noise spikes.
+   * micro-events (cough, click). Raised 100 → 500 ms on 2026-05-19
+   * after the 0.6.2 acceptance run showed a phantom VAD speech_start
+   * firing on the very first inbound frame (~500 ms into the call,
+   * which is past a 100 ms gate). The phantom barge-in cancelled the
+   * prewarmed firstMessage, the user heard a clipped (graffiante)
+   * audio fragment, and the SDK left ``_turnAlreadyClosed=true`` so
+   * subsequent ``recordTurnComplete`` calls were no-ops. 500 ms
+   * filters those phantoms while still letting a real interruption
+   * land within half a second of agent onset.
    */
-  static MIN_AGENT_SPEAKING_MS_BEFORE_BARGE_IN_NO_AEC = 250;
+  static MIN_AGENT_SPEAKING_MS_BEFORE_BARGE_IN_NO_AEC = 500;
   /** Handle for the pending grace-period timer, so it can be cleared on cleanup. */
   graceTimer = null;
   /**
@@ -4425,6 +4243,32 @@ var StreamHandler = class _StreamHandler {
    * the tail of the cancelled turn (~50-200 ms of doubled audio).
    */
   lastCancelAt = null;
+  /**
+   * Promise queue tracking outstanding Twilio marks the SDK has sent but
+   * not yet seen echoed back. Used by the firstMessage send loop to bound
+   * the depth of audio queued at the carrier — without this the loop
+   * pushes the entire TTS stream into Twilio's WebSocket in one burst,
+   * and a sendClear issued mid-buffer races against several seconds of
+   * already-queued media frames (BUG #128). The window depth is
+   * ``FIRST_MESSAGE_MARK_WINDOW``; ``onMark`` drains entries as Twilio
+   * confirms playback, ``cancelSpeaking`` resolves every pending entry so
+   * any awaiter exits immediately. Telnyx never populates this queue
+   * (Telnyx's media-stream protocol has no mark concept — the loop
+   * falls back to time-based pacing on that carrier).
+   */
+  pendingMarks = [];
+  /**
+   * Monotonic counter for first-message mark names. Distinct from
+   * ``chunkCount`` (which the Realtime path uses) so the two paths can
+   * coexist without name collisions even when firstMessage finishes while
+   * a Realtime turn is still streaming.
+   */
+  // firstMessageMarkCounter / FIRST_MESSAGE_MARK_WINDOW /
+  // MARK_AWAIT_TIMEOUT_MS were retired with the move to the Twilio-FIFO-
+  // trusts model (sendPacedFirstMessageBytes no longer emits marks).
+  // Marks are still consumed via ``onMark`` for any adapter that wants
+  // to round-trip one, but the firstMessage path no longer back-pressures
+  // on them.
   /**
    * Minimum drain window (ms) between a ``cancelSpeaking`` and the next
    * ``beginSpeaking``. 150 ms covers a typical PSTN jitter buffer drain
@@ -4439,7 +4283,7 @@ var StreamHandler = class _StreamHandler {
    * directly. Awaits the post-cancel drain window before flipping state
    * so the remote player has time to flush the cancelled turn's tail.
    */
-  async beginSpeaking() {
+  async beginSpeaking(isFirstMessage = false) {
     if (this.lastCancelAt !== null) {
       const elapsed = Date.now() - this.lastCancelAt;
       const remaining = _StreamHandler.POST_CANCEL_DRAIN_MS - elapsed;
@@ -4450,7 +4294,23 @@ var StreamHandler = class _StreamHandler {
     this.speakingGeneration++;
     this.isSpeaking = true;
     this.speakingStartedAt = Date.now();
+    this.suppressedSpeechPending = false;
+    void isFirstMessage;
+    this.firstAudioSentAt = Date.now();
     this.inboundAudioRing = [];
+    this.resetVad();
+  }
+  /**
+   * Record that the first TTS audio chunk of the current turn has hit the
+   * carrier wire. Idempotent within a turn — only the first call sets the
+   * timestamp; later chunks are no-ops. Must be invoked AFTER the underlying
+   * ``bridge.sendAudio`` resolves so the gate is anchored to "audio actually
+   * went out", not "we asked the carrier to send it".
+   */
+  markFirstAudioSent() {
+    if (this.firstAudioSentAt === null) {
+      this.firstAudioSentAt = Date.now();
+    }
   }
   /**
    * Atomically end speaking AND invalidate any pending grace timer.
@@ -4463,14 +4323,55 @@ var StreamHandler = class _StreamHandler {
     this.speakingGeneration++;
     this.isSpeaking = false;
     this.speakingStartedAt = null;
+    this.firstAudioSentAt = null;
     this.lastCancelAt = Date.now();
+    this.suppressedSpeechPending = false;
+    this.drainPendingMarks();
     if (this.llmAbort !== null) {
       try {
         this.llmAbort.abort();
       } catch {
       }
     }
+    const ttsCancelable = this.tts;
+    if (typeof ttsCancelable?.cancelActiveStream === "function") {
+      try {
+        ttsCancelable.cancelActiveStream();
+      } catch (err) {
+        getLogger().debug(`TTS cancelActiveStream raised: ${String(err)}`);
+      }
+    }
+  }
+  /**
+   * Resolve every entry in ``pendingMarks`` and empty the queue. Idempotent
+   * — safe to call from ``cancelSpeaking`` and again from the grace path
+   * without leaking pending promises.
+   */
+  drainPendingMarks() {
+    if (this.pendingMarks.length === 0) return;
+    for (const entry of this.pendingMarks) {
+      try {
+        entry.resolve();
+      } catch {
+      }
+    }
+    this.pendingMarks.length = 0;
   }
+  // Mark-based back-pressure (sendMarkAwaitable / waitForMarkWindow)
+  // was removed when sendPacedFirstMessageBytes switched to the
+  // Twilio-FIFO-trusts model — see that method's doc comment for
+  // rationale. ``pendingMarks`` and ``onMark`` are still kept so an
+  // adapter that wants to round-trip a mark for some other purpose can
+  // still do so without breaking the firstMessage path.
+  /**
+   * Bytes-per-millisecond for a 16 kHz PCM16 mono stream. Used by
+   * ``sendPacedFirstMessageBytes`` to translate chunk size into a
+   * playout-duration sleep so we never deliver faster than the carrier
+   * can decode + play out (which manifested as severe crackling on the
+   * HTTP-TTS path with client-side resampling). 16000 samples/sec × 2
+   * bytes/sample = 32 bytes/ms.
+   */
+  static PCM16_16K_BYTES_PER_MS = 32;
   /** Cancel and clear the pending grace timer, if any. */
   clearGraceTimer() {
     if (this.graceTimer !== null) {
@@ -4495,11 +4396,53 @@ var StreamHandler = class _StreamHandler {
         if (this.speakingGeneration === gen) {
           this.isSpeaking = false;
           this.speakingStartedAt = null;
+          this.firstAudioSentAt = null;
+          this.clearPendingBargeIn();
+          void this.resetBargeInStrategies();
+          if (this.suppressedSpeechPending) {
+            this.suppressedSpeechPending = false;
+            this.flushInboundAudioRing();
+          }
+          this.resetVad();
         }
       }, grace);
     } else {
       this.isSpeaking = false;
       this.speakingStartedAt = null;
+      this.firstAudioSentAt = null;
+      this.clearPendingBargeIn();
+      void this.resetBargeInStrategies();
+      if (this.suppressedSpeechPending) {
+        this.suppressedSpeechPending = false;
+        this.flushInboundAudioRing();
+      }
+      this.resetVad();
+    }
+  }
+  async resetBargeInStrategies() {
+    if (this.bargeInStrategies.length === 0) return;
+    const { resetStrategies } = await import("./barge-in-strategies-X6ARMGIQ.mjs");
+    await resetStrategies(this.bargeInStrategies);
+  }
+  /**
+   * Reset the active VAD provider's per-utterance state. No-op when the
+   * provider does not implement the optional ``reset()`` hook. Safe to call
+   * from any context — failures are swallowed and the VAD is disabled for
+   * the rest of the call so a flaky reset can never silently kill barge-in
+   * for every subsequent turn.
+   */
+  resetVad() {
+    const activeVad = this.deps.agent.vad ?? this.autoVad;
+    if (!activeVad || this.vadDisabled) return;
+    try {
+      const ret = activeVad.reset?.();
+      if (ret instanceof Promise) {
+        ret.catch((err) => {
+          getLogger().debug(`VAD reset threw: ${String(err)}`);
+        });
+      }
+    } catch (err) {
+      getLogger().debug(`VAD reset threw: ${String(err)}`);
     }
   }
   /**
@@ -4509,7 +4452,8 @@ var StreamHandler = class _StreamHandler {
    */
   canBargeIn() {
     if (this.speakingStartedAt === null) return true;
-    const elapsed = Date.now() - this.speakingStartedAt;
+    if (this.firstAudioSentAt === null) return false;
+    const elapsed = Date.now() - this.firstAudioSentAt;
     const gate = this.aec ? _StreamHandler.MIN_AGENT_SPEAKING_MS_BEFORE_BARGE_IN_AEC : _StreamHandler.MIN_AGENT_SPEAKING_MS_BEFORE_BARGE_IN_NO_AEC;
     return elapsed >= gate;
   }
@@ -4613,6 +4557,9 @@ var StreamHandler = class _StreamHandler {
     this.ws = ws;
     this.caller = caller;
     this.callee = callee;
+    this.bargeInStrategies = (deps.agent.bargeInStrategies ?? []).slice();
+    const confirmMs = deps.agent.bargeInConfirmMs;
+    this.bargeInConfirmMs = typeof confirmMs === "number" && Number.isFinite(confirmMs) && confirmMs > 0 ? confirmMs : 1500;
     this.history = createHistoryManager(200);
     const sttKey = deps.agent.stt?.constructor?.providerKey;
     const sttProviderName = deps.agent.stt ? sttKey ?? deps.agent.stt.constructor?.name ?? "custom" : void 0;
@@ -4860,7 +4807,7 @@ var StreamHandler = class _StreamHandler {
       if (activeVad && !this.vadDisabled) {
         try {
           const vadPromise = activeVad.processFrame(pcm16k, 16e3);
-          const timeoutPromise = new Promise((resolve) => setTimeout(() => resolve(null), 25));
+          const timeoutPromise = new Promise((resolve2) => setTimeout(() => resolve2(null), 25));
           const evt = await Promise.race([vadPromise, timeoutPromise]);
           if (evt) {
             getLogger().info(
@@ -4868,11 +4815,18 @@ var StreamHandler = class _StreamHandler {
             );
           }
           if (evt?.type === "speech_start") {
-            if (this.isSpeaking && !this.canBargeIn()) {
+            const phantomSuppressed = this.isSpeaking && !this.canBargeIn();
+            if (phantomSuppressed) {
               getLogger().info(
                 `[VAD] speech_start suppressed (agent speaking < gate, aec=${this.aec ? "on" : "off"})`
               );
+              this.suppressedSpeechPending = true;
             } else if (this.isSpeaking) {
+              if (this.bargeInStrategies.length > 0) {
+                this.startPendingBargeIn();
+                this.metricsAcc.anchorUserSpeechStart();
+                return;
+              }
               getLogger().info("[VAD] speech_start during TTS \u2192 BARGE-IN");
               this.metricsAcc.recordOverlapStart();
               this.metricsAcc.recordBargeinDetected();
@@ -4895,7 +4849,9 @@ var StreamHandler = class _StreamHandler {
                 }
               }
             }
-            this.metricsAcc.startTurnIfIdle();
+            if (!phantomSuppressed) {
+              this.metricsAcc.anchorUserSpeechStart();
+            }
           } else if (evt?.type === "speech_end") {
             this.metricsAcc.recordVadStop();
             try {
@@ -4972,13 +4928,36 @@ var StreamHandler = class _StreamHandler {
    */
   /** Handle a Twilio Media Streams `mark` event acknowledging audio playback boundaries. */
   async onMark(markName) {
-    if (markName) {
-      this.lastConfirmedMark = markName;
+    if (!markName) return;
+    const idx = this.pendingMarks.findIndex((m) => m.name === markName);
+    if (idx < 0) return;
+    this.lastConfirmedMark = markName;
+    const resolved = this.pendingMarks.splice(0, idx + 1);
+    for (const entry of resolved) {
+      try {
+        entry.resolve();
+      } catch {
+      }
     }
   }
   /** Handle call stop / stream end. */
   /** Handle a carrier-emitted `stop` event signalling the call has ended. */
   async handleStop() {
+    if (this.llmAbort !== null) {
+      try {
+        this.llmAbort.abort();
+      } catch {
+      }
+    }
+    const ttsCancelable = this.tts;
+    if (typeof ttsCancelable?.cancelActiveStream === "function") {
+      try {
+        ttsCancelable.cancelActiveStream();
+      } catch {
+      }
+    }
+    this.clearPendingBargeIn();
+    this.drainPendingMarks();
     this.clearGraceTimer();
     this.flushResamplers();
     await this.closeSttOnce();
@@ -4991,6 +4970,21 @@ var StreamHandler = class _StreamHandler {
   /** Handle WebSocket close event. */
   /** Tear down adapter, STT/TTS, and per-call state when the carrier WebSocket closes. */
   async handleWsClose() {
+    if (this.llmAbort !== null) {
+      try {
+        this.llmAbort.abort();
+      } catch {
+      }
+    }
+    const ttsCancelable = this.tts;
+    if (typeof ttsCancelable?.cancelActiveStream === "function") {
+      try {
+        ttsCancelable.cancelActiveStream();
+      } catch {
+      }
+    }
+    this.clearPendingBargeIn();
+    this.drainPendingMarks();
     this.clearGraceTimer();
     this.flushResamplers();
     await this.closeSttOnce();
@@ -5029,13 +5023,39 @@ var StreamHandler = class _StreamHandler {
    * Maintains a 1-byte carry across calls so unaligned HTTP chunks from
    * streaming TTS providers never byte-swap the PCM16 samples downstream.
    */
-  encodePipelineAudio(pcm16k) {
-    const aligned = this.alignPcm16(pcm16k);
+  encodePipelineAudio(audioChunk) {
+    if (this.ttsOutputFormatNativeForCarrier === true) {
+      return audioChunk.toString("base64");
+    }
+    const aligned = this.alignPcm16(audioChunk);
     if (aligned.length === 0) return "";
     const pcm8k = this.outboundResampler.process(aligned);
     const mulaw = pcm16ToMulaw(pcm8k);
     return mulaw.toString("base64");
   }
+  /**
+   * Cached result of ``isTtsOutputFormatNativeForCarrier()`` — settled
+   * once at ``initPipeline`` time after ``setTelephonyCarrier`` has run
+   * on the TTS adapter. Stable for the call lifetime: changes to the
+   * adapter's output format mid-call would NOT flip this. ``true`` means
+   * ``encodePipelineAudio`` can take the bypass path.
+   */
+  ttsOutputFormatNativeForCarrier = false;
+  /**
+   * Probe whether the TTS adapter is configured to emit bytes already in
+   * the carrier's wire codec. Currently: Twilio expects ``ulaw_8000``,
+   * Telnyx expects ``pcm_16000`` (no client transcode in either case if
+   * matched). Anything else takes the resample-and-encode path.
+   */
+  isTtsOutputFormatNativeForCarrier() {
+    if (!this.tts) return false;
+    const fmt = this.tts.outputFormat;
+    if (typeof fmt !== "string") return false;
+    const carrier = this.deps.bridge.telephonyProvider;
+    if (carrier === "twilio") return fmt === "ulaw_8000";
+    if (carrier === "telnyx") return fmt === "pcm_16000";
+    return false;
+  }
   /**
    * Prepend any carry byte from the previous chunk, return the even-length
    * portion, and stash the final odd byte (if any) for the next call.
@@ -5046,6 +5066,60 @@ var StreamHandler = class _StreamHandler {
     this.ttsByteCarry = alignedLen < combined.length ? combined.subarray(alignedLen) : null;
     return combined.subarray(0, alignedLen);
   }
+  /**
+   * Stream a cached firstMessage buffer in pacing-friendly chunks.
+   *
+   * Splits ``prewarmBytes`` into 20 ms slices (matching Twilio's PSTN
+   * frame quantum) and
+   * forwards each through ``deps.bridge.sendAudio`` exactly like the
+   * live TTS path does — preserving Twilio mark/clear granularity. A
+   * single multi-second sendAudio call would push the whole intro into
+   * the carrier in one go and a ``sendClear`` issued mid-buffer would
+   * have nothing to clear ("agent keeps talking after barge-in" UX bug
+   * on the very first turn).
+   *
+   * Returns ``true`` when at least one chunk hit the wire — the caller
+   * uses that to decide whether to record TTS-first-byte / turn-complete
+   * metrics.
+   */
+  async streamPrewarmBytes(prewarmBytes) {
+    return this.sendPacedFirstMessageBytes(prewarmBytes);
+  }
+  /**
+   * Iterate ``bytes`` in 20 ms slices (Twilio PSTN frame quantum) and
+   * forward each via ``deps.bridge.sendAudio`` with mark-gated pacing
+   * (Twilio) or playout-time-based pacing (Telnyx). Caps the carrier-
+   * side buffer at ``FIRST_MESSAGE_MARK_WINDOW`` chunks so a barge-in's
+   * ``sendClear`` has ~120 ms (Twilio) or zero (Telnyx, immediately
+   * after the latest sleep) of audio to flush.
+   *
+   * Bails immediately when ``isSpeaking`` flips to false — both via the
+   * loop's pre-iter check and via ``drainPendingMarks`` (called from
+   * ``cancelSpeaking``) which unblocks any in-flight ``waitForMarkWindow``.
+   *
+   * Returns ``true`` when at least one chunk hit the wire — the caller
+   * uses that to decide whether to record TTS-first-byte / turn-complete
+   * metrics. See BUG #128 for the regression this fix targets.
+   */
+  async sendPacedFirstMessageBytes(bytes) {
+    if (this.pendingMarks.length > 0) this.drainPendingMarks();
+    let firstChunkSent = false;
+    const PSTN_FRAME_MS = 20;
+    const bytesPerMs = this.ttsOutputFormatNativeForCarrier ? 8 : _StreamHandler.PCM16_16K_BYTES_PER_MS;
+    const sliceBytes = bytesPerMs * PSTN_FRAME_MS;
+    for (let i = 0; i < bytes.length; i += sliceBytes) {
+      if (!this.isSpeaking) break;
+      const chunk = bytes.subarray(i, i + sliceBytes);
+      if (!firstChunkSent) firstChunkSent = true;
+      if (this.aec && !this.ttsOutputFormatNativeForCarrier) {
+        this.aec.pushFarEnd(chunk);
+      }
+      const encoded = this.encodePipelineAudio(chunk);
+      this.deps.bridge.sendAudio(this.ws, encoded, this.streamSid);
+      this.markFirstAudioSent();
+    }
+    return firstChunkSent;
+  }
   // ---------------------------------------------------------------------------
   // Private: Pipeline mode
   // ---------------------------------------------------------------------------
@@ -5062,6 +5136,12 @@ var StreamHandler = class _StreamHandler {
           getLogger().debug(`TTS setTelephonyCarrier failed (${label}): ${String(e)}`);
         }
       }
+      this.ttsOutputFormatNativeForCarrier = this.isTtsOutputFormatNativeForCarrier();
+      if (this.ttsOutputFormatNativeForCarrier) {
+        getLogger().debug(
+          `TTS outputFormat matches ${this.deps.bridge.telephonyProvider} wire codec \u2014 bypassing client-side transcode`
+        );
+      }
     }
     if (!this.stt) {
       getLogger().debug(`Pipeline mode (${label}): no STT configured`);
@@ -5071,7 +5151,7 @@ var StreamHandler = class _StreamHandler {
     }
     if (!this.deps.agent.vad) {
       try {
-        const { SileroVAD } = await import("./silero-vad-YLCXT5GQ.mjs");
+        const { SileroVAD } = await import("./silero-vad-LNDFGIY7.mjs");
         this.autoVad = await SileroVAD.forPhoneCall();
         getLogger().info(
           `auto-VAD enabled (SileroVAD, phone preset). Pass agent.vad=\u2026 to override.`
@@ -5108,35 +5188,106 @@ var StreamHandler = class _StreamHandler {
         );
       }
     }
-    try {
-      if (this.stt) await this.stt.connect();
-      getLogger().debug(`Pipeline mode (${label}): STT + TTS connected`);
-    } catch (e) {
-      getLogger().error(`Pipeline connect FAILED (${label}):`, e);
+    let parked;
+    if (this.deps.popPrewarmedConnections) {
       try {
-        await this.deps.bridge.endCall(this.callId, this.ws);
-      } catch {
+        parked = this.deps.popPrewarmedConnections(this.callId);
+      } catch (err) {
+        getLogger().debug(`popPrewarmedConnections raised: ${String(err)}`);
+      }
+    }
+    const parkedTts = parked?.tts;
+    if (parkedTts && this.tts) {
+      const ttsAny = this.tts;
+      if (typeof ttsAny.adoptWebSocket === "function" && parkedTts.ws.readyState === 1) {
+        try {
+          ttsAny.adoptWebSocket(parkedTts);
+          getLogger().info(`[CONNECT] callId=${this.callId} provider=tts source=adopted ms=0`);
+        } catch (err) {
+          getLogger().debug(`TTS adoptWebSocket failed: ${String(err)}; falling back`);
+          try {
+            parkedTts.ws.close();
+          } catch {
+          }
+        }
+      } else {
+        try {
+          parkedTts.ws.close();
+        } catch {
+        }
+      }
+    }
+    let sttConnectPromise = null;
+    if (this.stt) {
+      const sttAny = this.stt;
+      const sttStarted = Date.now();
+      if (parked?.stt && typeof sttAny.adoptWebSocket === "function" && parked.stt.readyState === 1) {
+        try {
+          sttAny.adoptWebSocket(parked.stt);
+          getLogger().info(
+            `[CONNECT] callId=${this.callId} provider=stt source=adopted ms=${Date.now() - sttStarted}`
+          );
+          sttConnectPromise = Promise.resolve();
+        } catch (err) {
+          getLogger().debug(`STT adoptWebSocket failed: ${String(err)}; falling back`);
+          try {
+            parked.stt.close();
+          } catch {
+          }
+          sttConnectPromise = (async () => {
+            await this.stt.connect();
+            getLogger().info(
+              `[CONNECT] callId=${this.callId} provider=stt source=fresh ms=${Date.now() - sttStarted}`
+            );
+          })();
+        }
+      } else {
+        if (parked?.stt) {
+          try {
+            parked.stt.close();
+          } catch {
+          }
+        }
+        sttConnectPromise = (async () => {
+          await this.stt.connect();
+          getLogger().info(
+            `[CONNECT] callId=${this.callId} provider=stt source=fresh ms=${Date.now() - sttStarted}`
+          );
+        })();
       }
-      return;
     }
+    getLogger().debug(`Pipeline mode (${label}): STT connect kicked off`);
     if (this.deps.agent.firstMessage && !this.deps.onMessage && this.tts) {
       this.metricsAcc.startTurn();
-      await this.beginSpeaking();
+      await this.beginSpeaking(true);
       let firstChunkSent = false;
       this.resetTtsCarry();
+      let prewarmBytes;
+      if (this.deps.popPrewarmAudio) {
+        try {
+          prewarmBytes = this.deps.popPrewarmAudio(this.callId);
+        } catch (err) {
+          getLogger().debug(`popPrewarmAudio raised: ${String(err)}`);
+        }
+      }
       try {
-        for await (const chunk of this.tts.synthesizeStream(this.deps.agent.firstMessage)) {
-          if (!this.isSpeaking) break;
-          if (!firstChunkSent) {
-            firstChunkSent = true;
-            this.metricsAcc.recordTtsFirstByte();
-            await this.emitAudioOut();
-          }
-          if (this.aec) {
-            this.aec.pushFarEnd(chunk);
+        if (prewarmBytes) {
+          this.metricsAcc.recordTtsFirstByte();
+          await this.emitAudioOut();
+          firstChunkSent = await this.streamPrewarmBytes(prewarmBytes);
+        } else {
+          for await (const chunk of this.tts.synthesizeStream(this.deps.agent.firstMessage)) {
+            if (!this.isSpeaking) break;
+            if (!firstChunkSent) {
+              firstChunkSent = true;
+              this.metricsAcc.recordTtsFirstByte();
+              await this.emitAudioOut();
+            }
+            if (this.aec) this.aec.pushFarEnd(chunk);
+            const encoded = this.encodePipelineAudio(chunk);
+            this.deps.bridge.sendAudio(this.ws, encoded, this.streamSid);
+            this.markFirstAudioSent();
           }
-          const encoded = this.encodePipelineAudio(chunk);
-          this.deps.bridge.sendAudio(this.ws, encoded, this.streamSid);
         }
       } catch (e) {
         getLogger().error(`First message TTS error (${label}):`, e);
@@ -5145,6 +5296,7 @@ var StreamHandler = class _StreamHandler {
         this.endSpeakingWithGrace();
       }
       if (firstChunkSent) {
+        this.metricsAcc.recordTtsComplete(this.deps.agent.firstMessage);
         await this.emitTurnMetrics(this.metricsAcc.recordTurnComplete(this.deps.agent.firstMessage));
         this.history.push({ role: "assistant", text: this.deps.agent.firstMessage, timestamp: Date.now() });
       }
@@ -5186,6 +5338,18 @@ var StreamHandler = class _StreamHandler {
       getLogger().debug(`Built-in LLM loop active (pipeline, ${label})`);
     }
     if (this.stt) {
+      if (sttConnectPromise) {
+        try {
+          await sttConnectPromise;
+        } catch (e) {
+          getLogger().error(`STT connect FAILED (${label}):`, e);
+          try {
+            await this.deps.bridge.endCall(this.callId, this.ws);
+          } catch {
+          }
+          return;
+        }
+      }
       this.stt.onTranscript(async (transcript) => {
         await this.handleTranscript(transcript);
       });
@@ -5229,6 +5393,7 @@ var StreamHandler = class _StreamHandler {
         }
         const encoded = this.encodePipelineAudio(processedAudio);
         this.deps.bridge.sendAudio(this.ws, encoded, this.streamSid);
+        this.markFirstAudioSent();
       }
     } catch (e) {
       getLogger().error(`TTS streaming error (${this.deps.bridge.label}):`, e);
@@ -5262,7 +5427,10 @@ var StreamHandler = class _StreamHandler {
       this.metricsAcc.recordVadStop();
     }
     if (!transcript.isFinal || !transcript.text) return;
-    if (!this.commitTranscript(transcript.text)) return;
+    if (!this.commitTranscript(transcript.text)) {
+      this.metricsAcc.anchorUserSpeechStart();
+      return;
+    }
     const label = this.deps.bridge.label;
     getLogger().info(
       `[DIAG] processTranscript COMMITTED \u2192 LLM (${label} pipeline): ${sanitizeLogValue(transcript.text.slice(0, 80))}`
@@ -5342,6 +5510,9 @@ var StreamHandler = class _StreamHandler {
     } else if (this.llmLoop) {
       responseText = await this.runPipelineLlm(filteredTranscript, hookExecutor, hookCtx);
     } else {
+      getLogger().warn(
+        `Pipeline (${label}) has no llm/onMessage handler \u2014 transcript "${sanitizeLogValue(filteredTranscript.slice(0, 60))}" dropped. Check that agent.llm or onMessage is configured.`
+      );
       return;
     }
     if (!responseText) return;
@@ -5362,7 +5533,7 @@ var StreamHandler = class _StreamHandler {
    * record the interruption, and return ``true`` so the caller skips the
    * turn-complete record.
    */
-  handleBargeIn(transcript) {
+  async handleBargeInAsync(transcript) {
     if (!transcript.text || !this.isSpeaking) return false;
     if (!this.canBargeIn()) {
       getLogger().info(
@@ -5370,10 +5541,67 @@ var StreamHandler = class _StreamHandler {
       );
       return false;
     }
+    if (this.bargeInStrategies.length > 0) {
+      const { evaluateStrategies } = await import("./barge-in-strategies-X6ARMGIQ.mjs");
+      const confirmed = await evaluateStrategies(this.bargeInStrategies, {
+        transcript: transcript.text,
+        isInterim: transcript.isFinal === false,
+        agentSpeaking: this.isSpeaking
+      });
+      if (!confirmed) {
+        getLogger().debug(
+          `Barge-in NOT confirmed by any strategy (${sanitizeLogValue(
+            transcript.text.slice(0, 40)
+          )}); agent continues talking`
+        );
+        return false;
+      }
+      getLogger().info(
+        `Barge-in confirmed by strategy on transcript ${sanitizeLogValue(
+          transcript.text.slice(0, 40)
+        )}`
+      );
+    }
+    this.runBargeInCancel(transcript.text);
+    return true;
+  }
+  /**
+   * Synchronous wrapper that callers in legacy code paths can keep using.
+   * When ``bargeInStrategies`` is empty the work is fully synchronous and
+   * the result is correct. With strategies the call is dispatched as a
+   * floating promise — non-confirmed transcripts simply skip the cancel
+   * and the legacy boolean return is meaningless under that opt-in path.
+   */
+  handleBargeIn(transcript) {
+    if (!transcript.text || !this.isSpeaking) return false;
+    if (this.bargeInStrategies.length === 0) {
+      if (!this.canBargeIn()) {
+        getLogger().info(
+          `Barge-in transcript suppressed (agent speaking < gate, aec=${this.aec ? "on" : "off"})`
+        );
+        return false;
+      }
+      this.runBargeInCancel(transcript.text);
+      return true;
+    }
+    void this.handleBargeInAsync(transcript).catch(
+      (err) => getLogger().debug(`handleBargeInAsync threw: ${String(err)}`)
+    );
+    return false;
+  }
+  /**
+   * Run the cancel/flush sequence for a confirmed barge-in. Shared by
+   * the legacy synchronous path and the strategy-confirmed async path.
+   */
+  runBargeInCancel(transcriptText) {
+    const hadPending = this.bargeInPendingSince !== null;
+    this.clearPendingBargeIn();
     getLogger().debug(
-      `Barge-in: caller spoke over agent (${sanitizeLogValue(transcript.text.slice(0, 40))})`
+      `Barge-in: caller spoke over agent (${sanitizeLogValue(transcriptText.slice(0, 40))})`
     );
-    this.metricsAcc.recordOverlapStart();
+    if (!hadPending) {
+      this.metricsAcc.recordOverlapStart();
+    }
     this.metricsAcc.recordBargeinDetected();
     const bargeinSpan = startSpan(SPAN_BARGEIN, { "patter.call.id": this.callId });
     try {
@@ -5385,6 +5613,7 @@ var StreamHandler = class _StreamHandler {
       }
       this.metricsAcc.recordTtsStopped();
       this.metricsAcc.recordTurnInterrupted();
+      this.metricsAcc.anchorUserSpeechStart();
       this.metricsAcc.recordOverlapEnd(true);
     } finally {
       try {
@@ -5392,7 +5621,34 @@ var StreamHandler = class _StreamHandler {
       } catch {
       }
     }
-    return true;
+  }
+  /** Mark a VAD-detected barge-in as pending (no cancel yet). */
+  startPendingBargeIn() {
+    if (this.bargeInPendingSince !== null) return;
+    this.bargeInPendingSince = Date.now();
+    this.metricsAcc.recordOverlapStart();
+    getLogger().info(
+      "Barge-in PENDING (VAD speech_start during TTS); awaiting strategy confirmation"
+    );
+    this.bargeInPendingTimer = setTimeout(() => {
+      if (this.bargeInPendingSince === null) return;
+      getLogger().info(
+        `Pending barge-in timed out after ${this.bargeInConfirmMs}ms; agent resumes (no strategy confirmed)`
+      );
+      this.metricsAcc.recordOverlapEnd(false);
+      this.metricsAcc.anchorUserSpeechStart();
+      this.bargeInPendingSince = null;
+      this.bargeInPendingTimer = null;
+    }, this.bargeInConfirmMs);
+  }
+  /** Drop pending state without cancelling — used on confirm and on
+   * agent stop. Idempotent. */
+  clearPendingBargeIn() {
+    if (this.bargeInPendingTimer !== null) {
+      clearTimeout(this.bargeInPendingTimer);
+      this.bargeInPendingTimer = null;
+    }
+    this.bargeInPendingSince = null;
   }
   /**
    * Dedup + throttle + hallucination filter for final STT transcripts.
@@ -5567,6 +5823,7 @@ var StreamHandler = class _StreamHandler {
             }
             const encoded = this.encodePipelineAudio(audioChunk);
             this.deps.bridge.sendAudio(this.ws, encoded, this.streamSid);
+            this.markFirstAudioSent();
           }
         }
       }
@@ -5587,16 +5844,49 @@ var StreamHandler = class _StreamHandler {
   async initRealtimeAdapter(resolvedPrompt) {
     const label = this.deps.bridge.label;
     this.adapter = this.deps.buildAIAdapter(resolvedPrompt);
-    try {
-      await this.adapter.connect();
-      getLogger().debug(`AI adapter connected (${label})`);
-    } catch (e) {
-      getLogger().error(`AI adapter connect FAILED (${label}):`, e);
+    let parked;
+    if (typeof this.deps.popPrewarmedConnections === "function") {
       try {
-        await this.deps.bridge.endCall(this.callId, this.ws);
-      } catch {
+        parked = this.deps.popPrewarmedConnections(this.callId);
+      } catch (err) {
+        getLogger().debug(`popPrewarmedConnections raised: ${String(err)}`);
+      }
+    }
+    const parkedRealtimeWs = parked?.openaiRealtime;
+    let adoptOk = false;
+    if (parkedRealtimeWs !== void 0) {
+      const adapterAny = this.adapter;
+      const wsAlive = parkedRealtimeWs.readyState === 1;
+      if (typeof adapterAny?.adoptWebSocket === "function" && wsAlive) {
+        try {
+          adapterAny.adoptWebSocket(parkedRealtimeWs);
+          getLogger().info(
+            `[CONNECT] callId=${this.callId} provider=openai_realtime source=adopted ms=0`
+          );
+          adoptOk = true;
+        } catch (err) {
+          getLogger().debug(`Realtime adoptWebSocket failed: ${String(err)}; falling back`);
+        }
+      }
+      if (!adoptOk) {
+        try {
+          parkedRealtimeWs.close();
+        } catch {
+        }
+      }
+    }
+    if (!adoptOk) {
+      try {
+        await this.adapter.connect();
+        getLogger().debug(`AI adapter connected (${label})`);
+      } catch (e) {
+        getLogger().error(`AI adapter connect FAILED (${label}):`, e);
+        try {
+          await this.deps.bridge.endCall(this.callId, this.ws);
+        } catch {
+        }
+        return;
       }
-      return;
     }
     if (this.deps.agent.firstMessage) {
       this.metricsAcc.startTurn();
@@ -5704,6 +5994,7 @@ var StreamHandler = class _StreamHandler {
     }
     const outAudio = eventData;
     this.deps.bridge.sendAudio(this.ws, outAudio.toString("base64"), this.streamSid);
+    this.markFirstAudioSent();
     this.chunkCount++;
     this.deps.bridge.sendMark(this.ws, `audio_${this.chunkCount}`, this.streamSid);
   }
@@ -5715,8 +6006,21 @@ var StreamHandler = class _StreamHandler {
     await this.emitUserSpeechEnded();
   }
   async onAdapterTranscriptInput(inputText) {
+    const stripped = inputText.trim().toLowerCase();
+    if (HALLUCINATIONS.has(stripped) || stripped === "") {
+      getLogger().debug(
+        `Realtime transcript_input dropped (likely Whisper hallucination on silence/echo): ${sanitizeLogValue(inputText.slice(0, 60))}`
+      );
+      this.userTranscriptPending = false;
+      return;
+    }
     getLogger().debug(`User (${this.deps.bridge.label}): ${sanitizeLogValue(inputText)}`);
     this.history.push({ role: "user", text: inputText, timestamp: Date.now() });
+    if (this.adapter instanceof OpenAIRealtimeAdapter) {
+      void this.adapter.requestResponse().catch(
+        (err) => getLogger().debug(`Realtime requestResponse failed: ${String(err)}`)
+      );
+    }
     if (!this.metricsAcc.turnActive) {
       this.metricsAcc.startTurn();
       this.currentAgentText = "";
@@ -5868,6 +6172,18 @@ var StreamHandler = class _StreamHandler {
     await this.flushAssistantTurn(text);
   }
   async onAdapterSpeechInterrupt() {
+    if (this.adapter instanceof OpenAIRealtimeAdapter) {
+      const startedAt = this.adapter.currentResponseFirstAudioAt;
+      if (startedAt !== null) {
+        const elapsedMs = Date.now() - startedAt;
+        if (elapsedMs < _StreamHandler.MIN_AGENT_SPEAKING_MS_BEFORE_BARGE_IN_NO_AEC) {
+          getLogger().info(
+            `Realtime barge-in suppressed (response < gate, ${elapsedMs}ms)`
+          );
+          return;
+        }
+      }
+    }
     this.deps.bridge.sendClear(this.ws, this.streamSid);
     if (this.adapter instanceof OpenAIRealtimeAdapter) this.adapter.cancelResponse();
     this.metricsAcc.recordTurnInterrupted();
@@ -6050,9 +6366,10 @@ var StreamHandler = class _StreamHandler {
       metrics: finalMetrics
     };
     const cost = finalMetrics.cost?.total ?? 0;
-    const latencyP95 = finalMetrics.latency_p95?.total_ms ?? 0;
+    const p95Obj = finalMetrics.latency_p95;
+    const latencyP95 = p95Obj?.agent_response_ms ?? p95Obj?.total_ms ?? 0;
     getLogger().info(
-      `Call ended: ${this.callId} (${finalMetrics.duration_seconds.toFixed(1)}s, ${finalMetrics.turns.length} turns, cost=$${cost.toFixed(4)}, p95=${Math.round(latencyP95)}ms)`
+      `Call ended: ${this.callId} (${finalMetrics.duration_seconds.toFixed(1)}s, ${finalMetrics.turns.length} turns, cost=$${cost.toFixed(4)}, p95 wait=${Math.round(latencyP95)}ms)`
     );
     this.deps.metricsStore.recordCallEnd(
       callEndData,
@@ -6102,31 +6419,31 @@ async function queryDeepgramCost(metricsAcc, deepgramKey, deepgramRequestId) {
 // src/services/call-log.ts
 init_esm_shims();
 import * as crypto3 from "crypto";
-import * as fs2 from "fs";
+import * as fs3 from "fs";
 import { promises as fsp } from "fs";
 import * as os from "os";
-import * as path2 from "path";
+import * as path3 from "path";
 var SCHEMA_VERSION = "1.0";
 var DEFAULT_RETENTION_DAYS = 30;
 function xdgDataHome() {
-  return process.env.XDG_DATA_HOME || path2.join(os.homedir(), ".local", "share");
+  return process.env.XDG_DATA_HOME || path3.join(os.homedir(), ".local", "share");
 }
 function platformDefaultRoot() {
   if (process.platform === "darwin") {
-    return path2.join(os.homedir(), "Library", "Application Support", "patter");
+    return path3.join(os.homedir(), "Library", "Application Support", "patter");
   }
   if (process.platform === "win32") {
     const localAppData = process.env.LOCALAPPDATA;
-    if (localAppData) return path2.join(localAppData, "patter");
-    return path2.join(os.homedir(), "AppData", "Local", "patter");
+    if (localAppData) return path3.join(localAppData, "patter");
+    return path3.join(os.homedir(), "AppData", "Local", "patter");
   }
-  return path2.join(xdgDataHome(), "patter");
+  return path3.join(xdgDataHome(), "patter");
 }
 function resolveLogRoot(explicit) {
   const value = explicit ?? process.env.PATTER_LOG_DIR;
   if (!value) return null;
   if (value.trim().toLowerCase() === "auto") return platformDefaultRoot();
-  if (value.startsWith("~")) return path2.join(os.homedir(), value.slice(1));
+  if (value.startsWith("~")) return path3.join(os.homedir(), value.slice(1));
   return value;
 }
 function retentionDays() {
@@ -6137,9 +6454,9 @@ function retentionDays() {
   return Math.max(0, parsed);
 }
 function redactMode() {
-  const raw = (process.env.PATTER_LOG_REDACT_PHONE || "mask").trim().toLowerCase();
+  const raw = (process.env.PATTER_LOG_REDACT_PHONE || "full").trim().toLowerCase();
   if (raw === "full" || raw === "mask" || raw === "hash_only") return raw;
-  return "mask";
+  return "full";
 }
 function redactPhone(raw) {
   if (!raw) return "";
@@ -6155,9 +6472,9 @@ function utcIso(tsSeconds) {
   return new Date(ms).toISOString();
 }
 async function atomicWriteJson(filePath, payload) {
-  const dir = path2.dirname(filePath);
+  const dir = path3.dirname(filePath);
   await fsp.mkdir(dir, { recursive: true });
-  const tmp = path2.join(dir, `.tmp.${process.pid}.${crypto3.randomBytes(4).toString("hex")}.json`);
+  const tmp = path3.join(dir, `.tmp.${process.pid}.${crypto3.randomBytes(4).toString("hex")}.json`);
   try {
     const handle = await fsp.open(tmp, "w");
     try {
@@ -6176,7 +6493,7 @@ async function atomicWriteJson(filePath, payload) {
   }
 }
 async function appendJsonl(filePath, record) {
-  await fsp.mkdir(path2.dirname(filePath), { recursive: true });
+  await fsp.mkdir(path3.dirname(filePath), { recursive: true });
   await fsp.appendFile(filePath, JSON.stringify(record) + "\n", { encoding: "utf8" });
 }
 var CallLogger = class {
@@ -6186,9 +6503,9 @@ var CallLogger = class {
       this.root = null;
       return;
     }
-    const resolved = root.startsWith("~") ? path2.join(os.homedir(), root.slice(1)) : root;
+    const resolved = root.startsWith("~") ? path3.join(os.homedir(), root.slice(1)) : root;
     try {
-      fs2.mkdirSync(resolved, { recursive: true });
+      fs3.mkdirSync(resolved, { recursive: true });
       this.root = resolved;
       getLogger().info(`Call logs: ${resolved}`);
     } catch (err) {
@@ -6210,7 +6527,7 @@ var CallLogger = class {
     const month = String(dt.getUTCMonth() + 1).padStart(2, "0");
     const day = String(dt.getUTCDate()).padStart(2, "0");
     const safeId = sanitizeLogValue(callId, 64).replace(/\//g, "_") || "unknown";
-    return path2.join(this.root, "calls", year, month, day, safeId);
+    return path3.join(this.root, "calls", year, month, day, safeId);
   }
   /** Write the initial `metadata.json` for a new call. */
   async logCallStart(callId, input = {}) {
@@ -6228,6 +6545,7 @@ var CallLogger = class {
       status: "in_progress",
       caller: redactPhone(input.caller ?? ""),
       callee: redactPhone(input.callee ?? ""),
+      direction: input.direction || "inbound",
       telephony_provider: input.telephonyProvider ?? "",
       provider_mode: input.providerMode ?? "",
       agent: input.agent ?? {},
@@ -6237,7 +6555,7 @@ var CallLogger = class {
       error: null
     };
     try {
-      await atomicWriteJson(path2.join(dir, "metadata.json"), metadata);
+      await atomicWriteJson(path3.join(dir, "metadata.json"), metadata);
     } catch (err) {
       getLogger().warn(`call_log write failed (${sanitizeLogValue(callId)}): ${sanitizeLogValue(String(err))}`);
     }
@@ -6256,7 +6574,7 @@ var CallLogger = class {
       ...turn
     };
     try {
-      await appendJsonl(path2.join(dir, "transcript.jsonl"), record);
+      await appendJsonl(path3.join(dir, "transcript.jsonl"), record);
     } catch (err) {
       getLogger().warn(
         `call_log turn write failed (${sanitizeLogValue(callId)}): ${sanitizeLogValue(String(err))}`
@@ -6275,7 +6593,7 @@ var CallLogger = class {
       data: payload
     };
     try {
-      await appendJsonl(path2.join(dir, "events.jsonl"), record);
+      await appendJsonl(path3.join(dir, "events.jsonl"), record);
     } catch (err) {
       getLogger().warn(
         `call_log event write failed (${sanitizeLogValue(callId)}): ${sanitizeLogValue(String(err))}`
@@ -6287,7 +6605,7 @@ var CallLogger = class {
     if (!this.enabled) return;
     const dir = this.callDir(callId);
     if (dir === null) return;
-    const metadataPath = path2.join(dir, "metadata.json");
+    const metadataPath = path3.join(dir, "metadata.json");
     let existing = {};
     try {
       existing = JSON.parse(await fsp.readFile(metadataPath, "utf8"));
@@ -6322,20 +6640,20 @@ var CallLogger = class {
     const days = retentionDays();
     if (days === 0) return;
     const cutoff = Date.now() / 1e3 - days * 86400;
-    const callsRoot = path2.join(this.root, "calls");
-    if (!fs2.existsSync(callsRoot)) return;
+    const callsRoot = path3.join(this.root, "calls");
+    if (!fs3.existsSync(callsRoot)) return;
     try {
-      for (const yearName of fs2.readdirSync(callsRoot)) {
+      for (const yearName of fs3.readdirSync(callsRoot)) {
         if (!/^\d+$/.test(yearName)) continue;
-        const yearDir = path2.join(callsRoot, yearName);
-        if (!fs2.statSync(yearDir).isDirectory()) continue;
-        for (const monthName of fs2.readdirSync(yearDir)) {
+        const yearDir = path3.join(callsRoot, yearName);
+        if (!fs3.statSync(yearDir).isDirectory()) continue;
+        for (const monthName of fs3.readdirSync(yearDir)) {
           if (!/^\d+$/.test(monthName)) continue;
-          const monthDir = path2.join(yearDir, monthName);
-          if (!fs2.statSync(monthDir).isDirectory()) continue;
-          for (const dayName of fs2.readdirSync(monthDir)) {
+          const monthDir = path3.join(yearDir, monthName);
+          if (!fs3.statSync(monthDir).isDirectory()) continue;
+          for (const dayName of fs3.readdirSync(monthDir)) {
             if (!/^\d+$/.test(dayName)) continue;
-            const dayDir = path2.join(monthDir, dayName);
+            const dayDir = path3.join(monthDir, dayName);
             const y = Number.parseInt(yearName, 10);
             const m = Number.parseInt(monthName, 10);
             const d = Number.parseInt(dayName, 10);
@@ -6345,12 +6663,12 @@ var CallLogger = class {
             }
           }
           try {
-            if (fs2.readdirSync(monthDir).length === 0) fs2.rmdirSync(monthDir);
+            if (fs3.readdirSync(monthDir).length === 0) fs3.rmdirSync(monthDir);
           } catch {
           }
         }
         try {
-          if (fs2.readdirSync(yearDir).length === 0) fs2.rmdirSync(yearDir);
+          if (fs3.readdirSync(yearDir).length === 0) fs3.rmdirSync(yearDir);
         } catch {
         }
       }
@@ -6361,19 +6679,19 @@ var CallLogger = class {
 };
 function rmTree(target) {
   try {
-    for (const child of fs2.readdirSync(target)) {
-      const childPath = path2.join(target, child);
-      const stat = fs2.lstatSync(childPath);
+    for (const child of fs3.readdirSync(target)) {
+      const childPath = path3.join(target, child);
+      const stat = fs3.lstatSync(childPath);
       if (stat.isDirectory()) {
         rmTree(childPath);
       } else {
         try {
-          fs2.unlinkSync(childPath);
+          fs3.unlinkSync(childPath);
         } catch {
         }
       }
     }
-    fs2.rmdirSync(target);
+    fs3.rmdirSync(target);
   } catch {
   }
 }
@@ -6554,9 +6872,10 @@ function buildAIAdapter(config, agent, resolvedPrompt) {
     strict: t.strict
   })) ?? [];
   const tools = [...agentTools, TRANSFER_CALL_TOOL, END_CALL_TOOL];
-  const openaiKey = engine && engine.kind === "openai_realtime" ? engine.apiKey : config.openaiKey ?? "";
+  const isOpenAIEngine = engine && (engine.kind === "openai_realtime" || engine.kind === "openai_realtime_2");
+  const openaiKey = isOpenAIEngine ? engine.apiKey : config.openaiKey ?? "";
   const adapterOptions = {};
-  if (engine && engine.kind === "openai_realtime") {
+  if (isOpenAIEngine) {
     if (engine.reasoningEffort !== void 0) {
       adapterOptions.reasoningEffort = engine.reasoningEffort;
     }
@@ -6564,7 +6883,8 @@ function buildAIAdapter(config, agent, resolvedPrompt) {
       adapterOptions.inputAudioTranscriptionModel = engine.inputAudioTranscriptionModel;
     }
   }
-  return new OpenAIRealtimeAdapter(
+  const AdapterCtor = engine && engine.kind === "openai_realtime_2" ? OpenAIRealtime2Adapter : OpenAIRealtimeAdapter;
+  return new AdapterCtor(
     openaiKey,
     agent.model,
     agent.voice,
@@ -6668,7 +6988,7 @@ var TELNYX_DTMF_ALLOWED = new Set("0123456789*#ABCDabcdwW");
 var TELNYX_DTMF_DURATION_MS = 250;
 async function sleep(ms) {
   if (ms <= 0) return;
-  await new Promise((resolve) => setTimeout(resolve, ms));
+  await new Promise((resolve2) => setTimeout(resolve2, ms));
 }
 var TelnyxBridge = class {
   constructor(config) {
@@ -6869,6 +7189,33 @@ var EmbeddedServer = class {
    * across calls.
    */
   onMachineDetection;
+  /**
+   * Pre-warm first-message audio accessor wired by ``Patter.serve()``.
+   * The per-call StreamHandler invokes this with its ``callId`` at the
+   * start of the firstMessage emit; a defined return is sent verbatim
+   * in place of running TTS again. ``undefined`` means "no prewarm
+   * cache for this call — fall back to live synthesis". Default is a
+   * no-op so callers that instantiate ``EmbeddedServer`` directly
+   * (tests) work without further setup.
+   */
+  popPrewarmAudio = () => void 0;
+  /**
+   * Pre-warmed provider WebSocket accessor wired by ``Patter.serve()``.
+   * The per-call StreamHandler invokes this with its ``callId`` at
+   * pipeline init; defined returns hand off pre-opened STT / TTS /
+   * Realtime sockets so the live first turn skips the cold-handshake.
+   * Default is a no-op for direct ``EmbeddedServer`` callers.
+   */
+  popPrewarmedConnections = () => void 0;
+  /**
+   * Prewarm waste recorder wired by ``Patter.serve()``. Invoked from
+   * the Twilio status callback (no-answer / busy / failed / canceled)
+   * and the Telnyx call.hangup / AMD-machine handlers so the cache
+   * entry is evicted when the call terminates before the media stream
+   * starts. Default is a no-op so direct ``EmbeddedServer`` callers
+   * (tests) work without further setup. See FIX #91.
+   */
+  recordPrewarmWaste = () => void 0;
   /** Bind HTTP + WebSocket listeners on `port`, mount carrier webhooks and dashboard routes. */
   async start(port = 8e3) {
     const webhookUrlPattern = /^[a-zA-Z0-9][a-zA-Z0-9.\-]+[a-zA-Z0-9]$/;
@@ -6944,6 +7291,13 @@ var EmbeddedServer = class {
         if (!Number.isNaN(parsed)) extra.duration_seconds = parsed;
         this.metricsStore.updateCallStatus(callSid, callStatus, extra);
       }
+      if (callSid && (callStatus === "no-answer" || callStatus === "busy" || callStatus === "failed" || callStatus === "canceled")) {
+        try {
+          this.recordPrewarmWaste(callSid);
+        } catch (err) {
+          getLogger().debug(`recordPrewarmWaste threw: ${String(err)}`);
+        }
+      }
       res.status(204).send();
     });
     app.post("/webhooks/twilio/recording", (req, res) => {
@@ -6999,6 +7353,13 @@ var EmbeddedServer = class {
           getLogger().warn(`onMachineDetection callback threw: ${sanitizeLogValue(String(err))}`);
         }
       }
+      if ((answeredBy === "machine_end_beep" || answeredBy === "machine_end_silence") && callSid) {
+        try {
+          this.recordPrewarmWaste(callSid);
+        } catch (err) {
+          getLogger().debug(`recordPrewarmWaste threw: ${String(err)}`);
+        }
+      }
       if ((answeredBy === "machine_end_beep" || answeredBy === "machine_end_silence") && this.voicemailMessage && this.config.twilioSid && this.config.twilioToken) {
         if (!validateTwilioSid(callSid)) {
           getLogger().warn(`AMD webhook rejected: invalid CallSid ${JSON.stringify(sanitizeLogValue(callSid))}`);
@@ -7119,6 +7480,26 @@ var EmbeddedServer = class {
         }
         if (amdCallId && (amdResult === "machine" || amdResult === "machine_detected")) {
           await this.handleTelnyxAmdVoicemail(amdCallId);
+          try {
+            this.recordPrewarmWaste(amdCallId);
+          } catch (err) {
+            getLogger().debug(`recordPrewarmWaste threw: ${String(err)}`);
+          }
+        }
+        return res.status(200).send();
+      }
+      if (eventType === "call.hangup") {
+        const hangupCallId = payload.call_control_id ?? "";
+        const hangupCause = String(payload.hangup_cause ?? "");
+        getLogger().info(
+          `Telnyx call.hangup for ${sanitizeLogValue(hangupCallId)} (cause=${sanitizeLogValue(hangupCause)})`
+        );
+        if (hangupCallId) {
+          try {
+            this.recordPrewarmWaste(hangupCallId);
+          } catch (err) {
+            getLogger().debug(`recordPrewarmWaste threw: ${String(err)}`);
+          }
         }
         return res.status(200).send();
       }
@@ -7210,7 +7591,7 @@ var EmbeddedServer = class {
         this.handleTwilioStream(ws, url);
       }
     });
-    await new Promise((resolve) => {
+    await new Promise((resolve2) => {
       const bindHost = process.env.PATTER_BIND_HOST ?? "127.0.0.1";
       this.server.listen(port, bindHost, () => {
         getLogger().info(`Server on port ${port}`);
@@ -7232,7 +7613,7 @@ var EmbeddedServer = class {
           }
           console.log("\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n");
         }
-        resolve();
+        resolve2();
       });
     });
   }
@@ -7275,7 +7656,7 @@ var EmbeddedServer = class {
           `Telnyx voicemail speak failed: ${speakResp.status} ${(await speakResp.text()).slice(0, 200)}`
         );
       }
-      await new Promise((resolve) => setTimeout(resolve, estimatedMs));
+      await new Promise((resolve2) => setTimeout(resolve2, estimatedMs));
       await fetch(`https://api.telnyx.com/v2/calls/${encoded}/actions/hangup`, {
         method: "POST",
         headers,
@@ -7308,7 +7689,9 @@ var EmbeddedServer = class {
       recording: this.recording,
       buildAIAdapter: (resolvedPrompt) => buildAIAdapter(this.config, this.agent, resolvedPrompt),
       sanitizeVariables,
-      resolveVariables
+      resolveVariables,
+      popPrewarmAudio: this.popPrewarmAudio,
+      popPrewarmedConnections: this.popPrewarmedConnections
     };
   }
   /**
@@ -7335,12 +7718,20 @@ var EmbeddedServer = class {
       }
       return Object.fromEntries(Object.entries(snap).filter(([, v]) => v !== void 0));
     };
+    const store = this.metricsStore;
     const wrappedStart = async (data) => {
       if (logger.enabled) {
         const callId = typeof data.call_id === "string" ? data.call_id : "";
+        const dataCaller = typeof data.caller === "string" ? data.caller : "";
+        const dataCallee = typeof data.callee === "string" ? data.callee : "";
+        const active = callId ? store.getActive(callId) : void 0;
+        const resolvedCaller = dataCaller || active?.caller || "";
+        const resolvedCallee = dataCallee || active?.callee || "";
+        const resolvedDirection = (typeof data.direction === "string" ? data.direction : "") || active?.direction || "inbound";
         void logger.logCallStart(callId, {
-          caller: typeof data.caller === "string" ? data.caller : "",
-          callee: typeof data.callee === "string" ? data.callee : "",
+          caller: resolvedCaller,
+          callee: resolvedCallee,
+          direction: resolvedDirection,
           telephonyProvider: bridge.telephonyProvider,
           providerMode: agent.provider ?? "",
           agent: agentSnapshot()
@@ -7365,7 +7756,11 @@ var EmbeddedServer = class {
         const latency = metricsObj ? {
           p50_ms: metricsObj.latency_p50?.total_ms ?? null,
           p95_ms: metricsObj.latency_p95?.total_ms ?? null,
-          p99_ms: metricsObj.latency_p99?.total_ms ?? null
+          p99_ms: metricsObj.latency_p99?.total_ms ?? null,
+          avg: metricsObj.latency_avg ?? null,
+          p50: metricsObj.latency_p50 ?? null,
+          p95: metricsObj.latency_p95 ?? null,
+          p99: metricsObj.latency_p99 ?? null
         } : null;
         void logger.logCallEnd(callId, {
           durationSeconds: metricsObj?.duration_seconds,
@@ -7494,8 +7889,8 @@ var EmbeddedServer = class {
    */
   async stop() {
     if (!this.server) return;
-    const httpClosePromise = new Promise((resolve) => {
-      this.server.close(() => resolve());
+    const httpClosePromise = new Promise((resolve2) => {
+      this.server.close(() => resolve2());
     });
     const isTelnyx = this.config.telephonyProvider === "telnyx";
     for (const [ws, callId] of this.activeCallIds) {
@@ -7515,15 +7910,15 @@ var EmbeddedServer = class {
     if (this.activeConnections.size > 0) {
       getLogger().info(`Waiting for ${this.activeConnections.size} active connection(s) to close...`);
       await Promise.race([
-        new Promise((resolve) => {
+        new Promise((resolve2) => {
           const checkInterval = setInterval(() => {
             if (this.activeConnections.size === 0) {
               clearInterval(checkInterval);
-              resolve();
+              resolve2();
             }
           }, 100);
         }),
-        new Promise((resolve) => setTimeout(resolve, GRACEFUL_SHUTDOWN_TIMEOUT_MS))
+        new Promise((resolve2) => setTimeout(resolve2, GRACEFUL_SHUTDOWN_TIMEOUT_MS))
       ]);
     }
     if (this.activeConnections.size > 0) {
@@ -7800,6 +8195,8 @@ function mergeAbortSignals(...signals) {
   return controller.signal;
 }
 var OpenAILLMProvider = class {
+  /** Stable pricing/dashboard key — read by stream-handler/metrics. */
+  static providerKey = "openai";
   apiKey;
   model;
   temperature;
@@ -7826,6 +8223,35 @@ var OpenAILLMProvider = class {
     this.presencePenalty = sampling.presencePenalty;
     this.stop = sampling.stop;
   }
+  /** Subclasses (Cerebras, Groq) override this with their own host. */
+  get baseUrl() {
+    return "https://api.openai.com/v1";
+  }
+  /**
+   * Pre-call DNS / TLS / HTTP-keepalive warmup.
+   *
+   * Issues a lightweight ``GET ${baseUrl}/models`` so DNS, TLS and HTTP/2
+   * are already up by the time the first ``chat.completions`` call lands.
+   * Best-effort: 5 s timeout, all exceptions swallowed at debug level.
+   *
+   * Note: an HTTPS GET warms DNS + TLS + connection pool but does NOT
+   * warm the inference path itself; for true inference warmup a real
+   * low-token request is needed, left as a follow-up. STT / TTS providers ship concrete
+   * WebSocket-based prewarms (Cartesia / Deepgram / AssemblyAI for STT;
+   * ElevenLabs WS for TTS) which save 200-500 ms each — those dominate
+   * the cold-start latency budget.
+   */
+  async warmup() {
+    try {
+      await fetch(`${this.baseUrl}/models`, {
+        method: "GET",
+        headers: { Authorization: `Bearer ${this.apiKey}` },
+        signal: AbortSignal.timeout(5e3)
+      });
+    } catch (err) {
+      getLogger().debug(`LLM warmup failed (best-effort): ${String(err)}`);
+    }
+  }
   /** Stream OpenAI Chat Completions chunks for the given messages/tools. */
   async *stream(messages, tools, opts) {
     const body = {
@@ -7931,6 +8357,11 @@ var LLMLoop = class {
   // Fix 10: track provider/model so usage chunks can be attributed for billing.
   _providerName;
   _modelName;
+  // Diagnostics for the char/4 fallback billing path (see iterate loop).
+  // Counted per-LLMLoop instance (i.e. per call). Surfaced only via logs
+  // — keeps recordLlmUsage's public signature unchanged. Parity with Python.
+  _usageMissingCount = 0;
+  _loggedUsageFallback = false;
   // Optional async observer fired after a successful tool execution so
   // the host SDK (StreamHandler in pipeline mode) can surface tool calls
   // into the transcript timeline / `onTranscript` callback. Mirrors the
@@ -8025,6 +8456,7 @@ ${systemPrompt}` : DEFAULT_PHONE_PREAMBLE;
       const toolCallsAccumulated = /* @__PURE__ */ new Map();
       const textParts = [];
       let hasToolCalls = false;
+      let usageChunkReceived = false;
       for await (const chunk of this.provider.stream(messages, this.openaiTools, opts)) {
         if (chunk.type === "text" && chunk.content) {
           const content = hasAfterLlmChunk && hookExecutor ? hookExecutor.runAfterLlmChunk(chunk.content) : chunk.content;
@@ -8036,6 +8468,7 @@ ${systemPrompt}` : DEFAULT_PHONE_PREAMBLE;
             yield content;
           }
         } else if (chunk.type === "usage") {
+          usageChunkReceived = true;
           metrics?.recordLlmUsage(
             this._providerName,
             this._modelName,
@@ -8061,6 +8494,35 @@ ${systemPrompt}` : DEFAULT_PHONE_PREAMBLE;
           if (chunk.arguments) acc.arguments += chunk.arguments;
         }
       }
+      if (!usageChunkReceived && metrics) {
+        let inputChars = 0;
+        for (const m of messages) {
+          const c = m.content;
+          if (typeof c === "string") inputChars += c.length;
+        }
+        const outputChars = textParts.reduce((s, p) => s + p.length, 0);
+        const estimatedInput = Math.max(1, Math.floor(inputChars / 4));
+        const estimatedOutput = Math.max(1, Math.floor(outputChars / 4));
+        metrics.recordLlmUsage(
+          this._providerName,
+          this._modelName,
+          estimatedInput,
+          estimatedOutput,
+          0,
+          0
+        );
+        this._usageMissingCount += 1;
+        if (!this._loggedUsageFallback) {
+          this._loggedUsageFallback = true;
+          getLogger().info(
+            `llm_usage_fallback provider=${this._providerName} model=${this._modelName} input_chars=${inputChars} output_chars=${outputChars} est_input_tokens=${estimatedInput} est_output_tokens=${estimatedOutput}`
+          );
+        } else {
+          getLogger().debug(
+            `llm_usage_fallback provider=${this._providerName} model=${this._modelName} iteration=${iter} input_chars=${inputChars} output_chars=${outputChars} est_input_tokens=${estimatedInput} est_output_tokens=${estimatedOutput} total_missing=${this._usageMissingCount}`
+          );
+        }
+      }
       if (!hasToolCalls) {
         if (hasAfterLlmResponse && hookExecutor && hookCtx) {
           const finalText = allEmittedText.join("");
@@ -8213,7 +8675,7 @@ var TestSession = class {
       input: process.stdin,
       output: process.stdout
     });
-    const askQuestion = (prompt) => new Promise((resolve) => rl.question(prompt, resolve));
+    const askQuestion = (prompt) => new Promise((resolve2) => rl.question(prompt, resolve2));
     try {
       while (!ended) {
         let userInput;
@@ -8312,14 +8774,17 @@ export {
   AuthenticationError,
   ProvisionError,
   RateLimitError,
-  OpenAIRealtimeAdapter,
   ElevenLabsConvAIAdapter,
+  PRICING_VERSION,
+  PRICING_LAST_UPDATED,
+  PricingUnit,
   DEFAULT_PRICING,
   mergePricing,
   calculateSttCost,
   calculateTtsCost,
   calculateRealtimeCost,
   calculateTelephonyCost,
+  VERSION,
   MetricsStore,
   makeAuthMiddleware,
   callsToCsv,
@@ -8329,19 +8794,9 @@ export {
   RemoteMessageHandler,
   isRemoteUrl,
   isWebSocketUrl,
+  DeepgramModel,
   DeepgramSTT,
   CallMetricsAccumulator,
-  mulawToPcm16,
-  pcm16ToMulaw,
-  PcmCarry,
-  StatefulResampler,
-  createResampler16kTo8k,
-  createResampler8kTo16k,
-  createResampler24kTo16k,
-  createResampler24kTo8k,
-  resample8kTo16k,
-  resample16kTo8k,
-  resample24kTo16k,
   SPAN_CALL,
   SPAN_STT,
   SPAN_LLM,