npm - getpatter - Versions diffs - 0.5.2 → 0.5.3 - Mend

getpatter 0.5.2 → 0.5.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

package/README.md +5 -5
package/dist/{banner-FLR2HE5Z.mjs → banner-3GNZ6VQK.mjs} +1 -1
package/dist/{carrier-config-CPG5CROM.mjs → carrier-config-33HQ2W4V.mjs} +2 -2
package/dist/{chunk-7SDDK2AO.mjs → chunk-FIFIWBL7.mjs} +3255 -588
package/dist/chunk-QHHBUCMT.mjs +25 -0
package/dist/{chunk-AKQFOFLG.mjs → chunk-SEMKNPCD.mjs} +7 -2
package/dist/{chunk-FMNRCP5X.mjs → chunk-VJVDG4V5.mjs} +1 -1
package/dist/cli.js +126 -13
package/dist/dist-YRCCJQ26.mjs +1631 -0
package/dist/index.d.mts +2000 -289
package/dist/index.d.ts +2000 -289
package/dist/index.js +7944 -1927
package/dist/index.mjs +1881 -617
package/dist/node-cron-6PRPSBG5.mjs +1348 -0
package/dist/onnxruntime_binding-4Q2WV26X.node +0 -0
package/dist/onnxruntime_binding-5PVQ7RFC.node +0 -0
package/dist/onnxruntime_binding-FNOPH2XG.node +0 -0
package/dist/onnxruntime_binding-HSGOY4IT.node +0 -0
package/dist/onnxruntime_binding-OY2N3XIT.node +0 -0
package/dist/onnxruntime_binding-ZPEJPBCV.node +0 -0
package/dist/{persistence-CYIGNHSU.mjs → persistence-LQBYQPQQ.mjs} +1 -1
package/dist/test-mode-MVJ3SKG4.mjs +8 -0
package/dist/tunnel-UVR3PPAU.mjs +8 -0
package/package.json +10 -3
package/dist/chunk-OOIUSZB4.mjs +0 -37
package/dist/node-cron-373UVDIO.mjs +0 -935
package/dist/test-mode-K2TTPRGE.mjs +0 -8
package/dist/tunnel-O7ICMSTP.mjs +0 -8

package/dist/{chunk-7SDDK2AO.mjs → chunk-FIFIWBL7.mjs} RENAMED Viewed

@@ -1,12 +1,15 @@
 import {
   getLogger
-} from "./chunk-FMNRCP5X.mjs";
+} from "./chunk-VJVDG4V5.mjs";
+import {
+  __require
+} from "./chunk-QHHBUCMT.mjs";
 // src/test-mode.ts
 import { createInterface } from "readline";
 // src/server.ts
-import crypto3 from "crypto";
+import crypto4 from "crypto";
 import express from "express";
 import { createServer } from "http";
 import { WebSocketServer } from "ws";
@@ -14,14 +17,24 @@ import { WebSocketServer } from "ws";
 // src/providers/openai-realtime.ts
 import WebSocket from "ws";
 var OpenAIRealtimeAdapter = class {
-  constructor(apiKey, model = "gpt-4o-mini-realtime-preview", voice = "alloy", instructions = "", tools) {
+  constructor(apiKey, model = "gpt-realtime-mini", voice = "alloy", instructions = "", tools, audioFormat = "g711_ulaw", options = {}) {
     this.apiKey = apiKey;
     this.model = model;
     this.voice = voice;
     this.instructions = instructions;
     this.tools = tools;
+    this.audioFormat = audioFormat;
+    this.options = options;
   }
   ws = null;
+  eventCallbacks = /* @__PURE__ */ new Set();
+  messageListenerAttached = false;
+  heartbeat = null;
+  // Track the in-flight assistant item id so we can truncate cleanly on
+  // barge-in (see ``cancelResponse``) — matches the Python adapter.
+  currentResponseItemId = null;
+  currentResponseAudioMs = 0;
+  options;
   async connect() {
     const url = `wss://api.openai.com/v1/realtime?model=${encodeURIComponent(this.model)}`;
     this.ws = new WebSocket(url, {
@@ -45,13 +58,24 @@ var OpenAIRealtimeAdapter = class {
         if (msg.type === "session.created" && !sessionCreated) {
           sessionCreated = true;
           const config = {
-            input_audio_format: "g711_ulaw",
-            output_audio_format: "g711_ulaw",
+            input_audio_format: this.audioFormat,
+            output_audio_format: this.audioFormat,
             voice: this.voice,
             instructions: this.instructions || "You are a helpful voice assistant. Be concise.",
-            turn_detection: { type: "server_vad", threshold: 0.5, prefix_padding_ms: 300, silence_duration_ms: 500 },
-            input_audio_transcription: { model: "whisper-1" }
+            turn_detection: {
+              type: this.options.vadType ?? "server_vad",
+              threshold: 0.5,
+              prefix_padding_ms: 300,
+              silence_duration_ms: this.options.silenceDurationMs ?? 300
+            },
+            input_audio_transcription: { model: this.options.inputAudioTranscriptionModel ?? "whisper-1" }
           };
+          if (this.options.temperature !== void 0) config.temperature = this.options.temperature;
+          if (this.options.maxResponseOutputTokens !== void 0) {
+            config.max_response_output_tokens = this.options.maxResponseOutputTokens;
+          }
+          if (this.options.modalities !== void 0) config.modalities = this.options.modalities;
+          if (this.options.toolChoice !== void 0) config.tool_choice = this.options.toolChoice;
           if (this.tools?.length) {
             config.tools = this.tools.map((t) => ({
               type: "function",
@@ -92,19 +116,45 @@ var OpenAIRealtimeAdapter = class {
       ws.on("message", onSetupMessage);
       ws.on("error", onSetupError);
     });
+    this.heartbeat = setInterval(() => {
+      try {
+        this.ws?.ping();
+      } catch {
+      }
+    }, 2e4);
+    this.ensureMessageListener();
   }
   sendAudio(mulawAudio) {
     if (!this.ws || this.ws.readyState !== WebSocket.OPEN) return;
     this.ws.send(JSON.stringify({ type: "input_audio_buffer.append", audio: mulawAudio.toString("base64") }));
   }
+  /**
+   * Register a listener for parsed realtime events.
+   *
+   * Previously every call attached a new ``ws.on('message')`` handler,
+   * which leaked listeners across retries and multi-consumer hooks. We now
+   * route all traffic through a single persistent handler that fans out to
+   * a Set of callbacks. Use {@link offEvent} to remove one.
+   */
   onEvent(callback) {
-    if (!this.ws) return;
-    const safeInvoke = (type, data) => {
-      void Promise.resolve(callback(type, data)).catch(
-        (err) => getLogger().error("onEvent callback error:", err)
-      );
+    this.eventCallbacks.add(callback);
+    this.ensureMessageListener();
+  }
+  offEvent(callback) {
+    this.eventCallbacks.delete(callback);
+  }
+  ensureMessageListener() {
+    if (this.messageListenerAttached || !this.ws) return;
+    this.messageListenerAttached = true;
+    const ws = this.ws;
+    const dispatch = (type, payload) => {
+      for (const cb of this.eventCallbacks) {
+        void Promise.resolve(cb(type, payload)).catch(
+          (err) => getLogger().error("onEvent callback error:", err)
+        );
+      }
     };
-    this.ws.on("message", (raw) => {
+    ws.on("message", (raw) => {
       let data;
       try {
         data = JSON.parse(raw.toString());
@@ -114,24 +164,61 @@ var OpenAIRealtimeAdapter = class {
       }
       const t = data.type;
       if (t === "response.audio.delta") {
-        safeInvoke("audio", Buffer.from(data.delta ?? "", "base64"));
+        const buf = Buffer.from(data.delta ?? "", "base64");
+        this.currentResponseAudioMs += estimateAudioMs(buf, this.audioFormat);
+        dispatch("audio", buf);
       } else if (t === "response.audio_transcript.delta") {
-        safeInvoke("transcript_output", data.delta);
+        dispatch("transcript_output", data.delta);
+      } else if (t === "response.content_part.added" || t === "response.output_item.added") {
+        const itemId = data.item?.id ?? data.item_id ?? null;
+        if (itemId) {
+          this.currentResponseItemId = itemId;
+          this.currentResponseAudioMs = 0;
+        }
       } else if (t === "input_audio_buffer.speech_started") {
-        safeInvoke("speech_started", null);
+        dispatch("speech_started", null);
+      } else if (t === "input_audio_buffer.speech_stopped") {
+        dispatch("speech_stopped", null);
       } else if (t === "conversation.item.input_audio_transcription.completed") {
-        safeInvoke("transcript_input", data.transcript);
+        dispatch("transcript_input", data.transcript);
       } else if (t === "response.function_call_arguments.done") {
-        safeInvoke("function_call", { call_id: data.call_id, name: data.name, arguments: data.arguments });
+        dispatch("function_call", { call_id: data.call_id, name: data.name, arguments: data.arguments });
       } else if (t === "response.done") {
-        safeInvoke("response_done", data.response ?? null);
+        this.currentResponseItemId = null;
+        this.currentResponseAudioMs = 0;
+        dispatch("response_done", data.response ?? null);
       } else if (t === "error") {
-        safeInvoke("error", data.error);
+        dispatch("error", data.error);
+      }
+    });
+    ws.on("close", (code, reason) => {
+      if (code !== 1e3) {
+        dispatch("error", {
+          type: "connection_closed",
+          code,
+          reason: reason?.toString() ?? ""
+        });
       }
     });
+    ws.on("error", (err) => {
+      dispatch("error", { type: "socket_error", message: err?.message ?? String(err) });
+    });
   }
   cancelResponse() {
-    this.ws?.send(JSON.stringify({ type: "response.cancel" }));
+    if (!this.ws) return;
+    if (this.currentResponseItemId) {
+      try {
+        this.ws.send(JSON.stringify({
+          type: "conversation.item.truncate",
+          item_id: this.currentResponseItemId,
+          content_index: 0,
+          audio_end_ms: this.currentResponseAudioMs
+        }));
+      } catch (err) {
+        getLogger().debug?.(`conversation.item.truncate failed: ${String(err)}`);
+      }
+    }
+    this.ws.send(JSON.stringify({ type: "response.cancel" }));
   }
   async sendText(text) {
     this.ws?.send(JSON.stringify({
@@ -148,28 +235,148 @@ var OpenAIRealtimeAdapter = class {
     this.ws?.send(JSON.stringify({ type: "response.create" }));
   }
   close() {
+    if (this.heartbeat) {
+      clearInterval(this.heartbeat);
+      this.heartbeat = null;
+    }
+    this.eventCallbacks.clear();
+    this.messageListenerAttached = false;
     this.ws?.close();
     this.ws = null;
   }
 };
+function estimateAudioMs(chunk, format) {
+  if (chunk.length === 0) return 0;
+  if (format === "g711_ulaw" || format === "g711_alaw") return Math.floor(chunk.length / 8);
+  if (format === "pcm16") {
+    return Math.floor(chunk.length / 48);
+  }
+  return 0;
+}
 // src/providers/elevenlabs-convai.ts
 import WebSocket2 from "ws";
 var ELEVENLABS_CONVAI_URL = "wss://api.elevenlabs.io/v1/convai/conversation";
-var ElevenLabsConvAIAdapter = class {
-  constructor(apiKey, agentId = "", voiceId = "EXAVITQu4vr4xnSDxMaL", _modelId = "eleven_turbo_v2_5", _language = "en", firstMessage = "") {
-    this.apiKey = apiKey;
-    this.agentId = agentId;
-    this.voiceId = voiceId;
-    this.firstMessage = firstMessage;
-  }
+var ELEVENLABS_SIGNED_URL = "https://api.elevenlabs.io/v1/convai/conversation/get-signed-url";
+var AGENT_SILENCE_MS = 500;
+var ElevenLabsConvAIAdapter = class _ElevenLabsConvAIAdapter {
   ws = null;
   eventCallback = null;
-  async connect() {
-    const url = this.agentId ? `${ELEVENLABS_CONVAI_URL}?agent_id=${encodeURIComponent(this.agentId)}` : ELEVENLABS_CONVAI_URL;
-    this.ws = new WebSocket2(url, {
-      headers: { "xi-api-key": this.apiKey }
+  apiKey;
+  agentId;
+  voiceId;
+  // Exposed for parity with Python SDK (`self.model_id`). ConvAI does not
+  // accept a client-side model override today, but we preserve the value so
+  // callers can introspect it and we can ship the override the day the
+  // server exposes it.
+  modelId;
+  language;
+  firstMessage;
+  // Exposed publicly so the stream handler can detect μ-law negotiation
+  // (``"ulaw_8000"``) and skip resampling / transcoding on the audio path.
+  outputAudioFormat;
+  inputAudioFormat;
+  useSignedUrl;
+  // Populated from `conversation_initiation_metadata`.
+  conversationId = null;
+  agentOutputAudioFormat = null;
+  userInputAudioFormat = null;
+  agentSpeaking = false;
+  silenceTimer = null;
+  closePromise = null;
+  constructor(apiKeyOrOptions, agentId = "", voiceId = "EXAVITQu4vr4xnSDxMaL", firstMessage = "") {
+    if (typeof apiKeyOrOptions === "object") {
+      const o = apiKeyOrOptions;
+      this.apiKey = o.apiKey;
+      this.agentId = o.agentId ?? "";
+      this.voiceId = o.voiceId ?? "EXAVITQu4vr4xnSDxMaL";
+      this.modelId = o.modelId ?? "eleven_flash_v2_5";
+      this.language = o.language ?? "it";
+      this.firstMessage = o.firstMessage ?? "";
+      this.outputAudioFormat = o.outputAudioFormat;
+      this.inputAudioFormat = o.inputAudioFormat;
+      this.useSignedUrl = o.useSignedUrl ?? false;
+    } else {
+      this.apiKey = apiKeyOrOptions;
+      this.agentId = agentId;
+      this.voiceId = voiceId;
+      this.modelId = "eleven_flash_v2_5";
+      this.language = "it";
+      this.firstMessage = firstMessage;
+      this.outputAudioFormat = void 0;
+      this.inputAudioFormat = void 0;
+      this.useSignedUrl = false;
+    }
+  }
+  // ------------------------------------------------------------------
+  // Telephony factories
+  // ------------------------------------------------------------------
+  /**
+   * Build an adapter pre-configured for Twilio Media Streams.
+   *
+   * Negotiates `ulaw_8000` for both `outputAudioFormat` and
+   * `inputAudioFormat`, matching Twilio's μ-law @ 8 kHz wire format. The
+   * SDK's stream handler detects this and skips the 8 kHz → 16 kHz inbound
+   * resample and the 16 kHz → 8 kHz / PCM → μ-law outbound transcode.
+   * Saves ~30–80 ms first-byte plus per-frame CPU on every turn.
+   */
+  static forTwilio(apiKey, agentId, options = {}) {
+    return new _ElevenLabsConvAIAdapter({
+      ...options,
+      apiKey,
+      agentId,
+      outputAudioFormat: "ulaw_8000",
+      inputAudioFormat: "ulaw_8000"
+    });
+  }
+  /**
+   * Build an adapter pre-configured for Telnyx bidirectional media.
+   *
+   * Telnyx negotiates PCMU @ 8 kHz when `streaming_start` sets
+   * `stream_bidirectional_codec=PCMU` (the SDK default). Picking
+   * `ulaw_8000` on both ConvAI directions removes every transcode on the
+   * audio path — same optimization as `forTwilio`.
+   */
+  static forTelnyx(apiKey, agentId, options = {}) {
+    return new _ElevenLabsConvAIAdapter({
+      ...options,
+      apiKey,
+      agentId,
+      outputAudioFormat: "ulaw_8000",
+      inputAudioFormat: "ulaw_8000"
     });
+  }
+  async fetchSignedUrl() {
+    if (!this.agentId) {
+      throw new Error("useSignedUrl=true requires agentId");
+    }
+    const url = `${ELEVENLABS_SIGNED_URL}?agent_id=${encodeURIComponent(this.agentId)}`;
+    const resp = await fetch(url, {
+      method: "GET",
+      headers: { "xi-api-key": this.apiKey },
+      signal: AbortSignal.timeout(15e3)
+    });
+    if (!resp.ok) {
+      const body = await resp.text();
+      throw new Error(`ElevenLabs signed-url error ${resp.status}: ${body}`);
+    }
+    const data = await resp.json();
+    if (!data.signed_url) {
+      throw new Error("ElevenLabs signed-url response missing 'signed_url'");
+    }
+    return data.signed_url;
+  }
+  async connect() {
+    let wsUrl;
+    let wsOptions;
+    if (this.useSignedUrl) {
+      wsUrl = await this.fetchSignedUrl();
+      wsOptions = void 0;
+    } else {
+      wsUrl = this.agentId ? `${ELEVENLABS_CONVAI_URL}?agent_id=${encodeURIComponent(this.agentId)}` : ELEVENLABS_CONVAI_URL;
+      wsOptions = { headers: { "xi-api-key": this.apiKey } };
+    }
+    this.ws = new WebSocket2(wsUrl, wsOptions);
     await new Promise((resolve, reject) => {
       const timeout = setTimeout(
         () => reject(new Error("ElevenLabs ConvAI connect timeout")),
@@ -177,17 +384,22 @@ var ElevenLabsConvAIAdapter = class {
       );
       this.ws.once("open", () => {
         clearTimeout(timeout);
+        const agentCfg = {};
+        if (this.firstMessage) agentCfg["first_message"] = this.firstMessage;
+        if (this.language) agentCfg["language"] = this.language;
+        const override = {
+          tts: this.outputAudioFormat ? { voice_id: this.voiceId, output_format: this.outputAudioFormat } : { voice_id: this.voiceId }
+        };
+        if (this.inputAudioFormat) {
+          override["asr"] = { input_format: this.inputAudioFormat };
+        }
+        if (Object.keys(agentCfg).length > 0) {
+          override["agent"] = agentCfg;
+        }
         const config = {
           type: "conversation_initiation_client_data",
-          conversation_config_override: {
-            tts: { voice_id: this.voiceId }
-          }
+          conversation_config_override: override
         };
-        if (this.firstMessage) {
-          config["conversation_config_override"]["agent"] = {
-            first_message: this.firstMessage
-          };
-        }
         this.ws.send(JSON.stringify(config));
         resolve();
       });
@@ -196,54 +408,176 @@ var ElevenLabsConvAIAdapter = class {
         reject(err);
       });
     });
+    this.ws.on("error", (err) => {
+      getLogger().error("ElevenLabs ConvAI WS error:", err);
+      this.safeInvoke("error", err instanceof Error ? err.message : String(err));
+    });
+    this.ws.on("close", (code, reason) => {
+      this.clearSilenceTimer();
+      this.safeInvoke("close", {
+        code,
+        reason: reason?.toString() ?? ""
+      });
+    });
     this.ws.on("message", (raw) => {
-      const cb = this.eventCallback;
-      if (!cb) return;
-      const safeInvoke = (type, data) => {
-        void Promise.resolve(cb(type, data)).catch(
-          (err) => getLogger().error("onEvent callback error:", err)
-        );
-      };
       let parsed;
       try {
         parsed = JSON.parse(raw.toString());
       } catch {
         return;
       }
-      const msgType = parsed["type"];
-      if (msgType === "audio") {
-        const audioB64 = parsed["audio"];
-        if (audioB64) {
-          safeInvoke("audio", Buffer.from(audioB64, "base64"));
-        }
-      } else if (msgType === "user_transcript") {
-        safeInvoke("transcript_input", parsed["text"] ?? "");
-      } else if (msgType === "agent_response") {
-        safeInvoke("transcript_output", parsed["text"] ?? "");
-        safeInvoke("response_done", null);
-      } else if (msgType === "interruption") {
-        safeInvoke("interruption", null);
-      } else if (msgType === "error") {
-        safeInvoke("error", parsed);
-      }
+      this.handleMessage(parsed);
     });
   }
+  safeInvoke(type, data) {
+    const cb = this.eventCallback;
+    if (!cb) return;
+    void Promise.resolve(cb(type, data)).catch(
+      (err) => getLogger().error("onEvent callback error:", err)
+    );
+  }
+  respondToPing(eventId, delayMs) {
+    const send = () => {
+      if (!this.ws || this.ws.readyState !== WebSocket2.OPEN) return;
+      try {
+        this.ws.send(JSON.stringify({ type: "pong", event_id: eventId }));
+      } catch (err) {
+        getLogger().warn("ElevenLabs ConvAI pong send failed:", err);
+      }
+    };
+    if (delayMs && delayMs > 0) {
+      setTimeout(send, delayMs);
+    } else {
+      send();
+    }
+  }
+  clearSilenceTimer() {
+    if (this.silenceTimer) {
+      clearTimeout(this.silenceTimer);
+      this.silenceTimer = null;
+    }
+  }
+  finalizeAgentTurn() {
+    this.clearSilenceTimer();
+    if (this.agentSpeaking) {
+      this.agentSpeaking = false;
+      this.safeInvoke("response_done", null);
+    }
+  }
+  scheduleSilenceDone() {
+    this.clearSilenceTimer();
+    this.silenceTimer = setTimeout(() => {
+      if (this.agentSpeaking) {
+        this.agentSpeaking = false;
+        this.safeInvoke("response_done", null);
+      }
+    }, AGENT_SILENCE_MS);
+  }
+  handleMessage(parsed) {
+    const msgType = parsed["type"];
+    if (msgType === "ping") {
+      const pingPayload = parsed["ping_event"] ?? parsed["ping"] ?? {};
+      const eventId = pingPayload["event_id"] ?? parsed["event_id"];
+      const pingMs = pingPayload["ping_ms"] ?? 0;
+      this.respondToPing(eventId, pingMs);
+      return;
+    }
+    if (msgType === "conversation_initiation_metadata") {
+      const meta = parsed["conversation_initiation_metadata_event"] ?? parsed;
+      this.conversationId = meta["conversation_id"] ?? this.conversationId;
+      this.agentOutputAudioFormat = meta["agent_output_audio_format"] ?? this.agentOutputAudioFormat;
+      this.userInputAudioFormat = meta["user_input_audio_format"] ?? this.userInputAudioFormat;
+      this.finalizeAgentTurn();
+      return;
+    }
+    if (msgType === "audio") {
+      const audioEvt = parsed["audio_event"];
+      let audioB64;
+      if (audioEvt) {
+        audioB64 = audioEvt["audio_base_64"] ?? audioEvt["audio"];
+      }
+      if (!audioB64) {
+        audioB64 = parsed["audio"];
+      }
+      if (audioB64) {
+        this.agentSpeaking = true;
+        this.safeInvoke("audio", Buffer.from(audioB64, "base64"));
+        this.scheduleSilenceDone();
+      }
+      return;
+    }
+    if (msgType === "user_transcript") {
+      const evt = parsed["user_transcription_event"] ?? parsed;
+      const text = evt["user_transcript"] ?? evt["text"] ?? "";
+      this.finalizeAgentTurn();
+      this.safeInvoke("transcript_input", text);
+      return;
+    }
+    if (msgType === "agent_response") {
+      const evt = parsed["agent_response_event"] ?? parsed;
+      const text = evt["agent_response"] ?? evt["text"] ?? "";
+      this.safeInvoke("transcript_output", text);
+      this.agentSpeaking = true;
+      this.safeInvoke("response_start", { text });
+      return;
+    }
+    if (msgType === "interruption") {
+      this.finalizeAgentTurn();
+      this.safeInvoke("interruption", null);
+      return;
+    }
+    if (msgType === "error") {
+      const errText = parsed["message"] ?? parsed["error"] ?? JSON.stringify(parsed);
+      getLogger().error("ElevenLabs ConvAI error:", errText);
+      this.safeInvoke("error", errText);
+      return;
+    }
+  }
   sendAudio(audioBytes) {
     if (!this.ws || this.ws.readyState !== WebSocket2.OPEN) return;
     this.ws.send(
       JSON.stringify({
-        type: "audio",
-        audio: audioBytes.toString("base64")
+        user_audio_chunk: audioBytes.toString("base64")
       })
     );
   }
   onEvent(callback) {
     this.eventCallback = callback;
   }
-  close() {
-    this.ws?.close();
-    this.ws = null;
-    this.eventCallback = null;
+  async close() {
+    this.clearSilenceTimer();
+    if (!this.ws) {
+      this.eventCallback = null;
+      return;
+    }
+    if (this.closePromise) {
+      await this.closePromise;
+      return;
+    }
+    const ws = this.ws;
+    this.closePromise = new Promise((resolve) => {
+      if (ws.readyState === WebSocket2.CLOSED || ws.readyState === WebSocket2.CLOSING) {
+        resolve();
+        return;
+      }
+      const done = () => {
+        resolve();
+      };
+      ws.once("close", done);
+      ws.once("error", done);
+      try {
+        ws.close();
+      } catch {
+        resolve();
+      }
+    });
+    try {
+      await this.closePromise;
+    } finally {
+      this.ws = null;
+      this.eventCallback = null;
+      this.closePromise = null;
+    }
   }
 };
@@ -258,21 +592,57 @@ async function createTTS(agent) {
 // src/pricing.ts
 var DEFAULT_PRICING = {
   // STT — per minute of audio processed
-  deepgram: { unit: "minute", price: 43e-4 },
+  // Deepgram Nova-3 streaming (monolingual) — the default model Patter ships.
+  // The previous $0.0043/min was the batch rate; streaming is $0.0077/min per
+  // deepgram.com/pricing. For multilingual Nova-3 ($0.0092/min) override.
+  deepgram: { unit: "minute", price: 77e-4 },
   whisper: { unit: "minute", price: 6e-3 },
-  // TTS — per 1,000 characters synthesized
-  elevenlabs: { unit: "1k_chars", price: 0.18 },
+  // AssemblyAI Universal-Streaming — $0.15/hr = $0.0025/min
+  assemblyai: { unit: "minute", price: 25e-4 },
+  // Cartesia ink-whisper streaming STT — ~$0.15/hr on usage plans
+  cartesia_stt: { unit: "minute", price: 25e-4 },
+  // Soniox real-time STT — $0.12/hr = $0.002/min
+  soniox: { unit: "minute", price: 2e-3 },
+  // Speechmatics Pro tier — $0.24/hr = $0.0040/min (new users land here).
+  // Previous $0.0173 default reflected a legacy Standard tier that was
+  // retired; users were being over-billed ~4.3x.
+  speechmatics: { unit: "minute", price: 4e-3 },
+  // TTS — per 1,000 characters synthesized.
+  // ElevenLabs default model is eleven_flash_v2_5 billed at $0.06/1k via the
+  // direct API. The previous $0.18 matched only the Creator plan overage.
+  elevenlabs: { unit: "1k_chars", price: 0.06 },
   openai_tts: { unit: "1k_chars", price: 0.015 },
-  // OpenAI Realtime — per token
+  openai_tts_hd: { unit: "1k_chars", price: 0.03 },
+  // Cartesia Sonic TTS — ~1 credit/char, effective $0.030/1k chars on usage plans
+  cartesia_tts: { unit: "1k_chars", price: 0.03 },
+  // Rime mist v2 — $0.030/1k chars pay-as-you-go
+  rime: { unit: "1k_chars", price: 0.03 },
+  // LMNT aurora/blizzard — $0.050/1k chars Indie overage
+  lmnt: { unit: "1k_chars", price: 0.05 },
+  // OpenAI Realtime — per token.
+  // Calibrated for gpt-4o-mini-realtime-preview (the Patter default):
+  //   audio  input  $10 / M  ->  0.00001 per token
+  //   audio  output $20 / M  ->  0.00002 per token
+  //   text   input  $0.60/ M ->  0.0000006 per token
+  //   text   output $2.40/ M ->  0.0000024 per token
+  // For gpt-4o-realtime-preview multiply by ~10, for gpt-realtime by ~3.
   openai_realtime: {
     unit: "token",
-    audio_input_per_token: 1e-4,
-    audio_output_per_token: 4e-4,
-    text_input_per_token: 5e-6,
-    text_output_per_token: 2e-5
+    audio_input_per_token: 1e-5,
+    audio_output_per_token: 2e-5,
+    text_input_per_token: 6e-7,
+    text_output_per_token: 24e-7,
+    // Prompt caching rates (official): audio cached $0.30/M ~= 3% of full,
+    // text cached $0.06/M = 10% of full. OpenAI bills the cached portion of
+    // input_token_details.audio_tokens / text_tokens at these reduced rates.
+    cached_audio_input_per_token: 3e-7,
+    cached_text_input_per_token: 6e-8
   },
-  // Telephony — per minute of call duration
-  twilio: { unit: "minute", price: 0.013 },
+  // Telephony — per minute of call duration.
+  // twilio default = US inbound local (the 99% case for voice agents receiving
+  // calls on a local number). For US toll-free inbound ($0.022/min) or US
+  // outbound local ($0.0140/min), override via Patter({ pricing: { twilio: {...} } }).
+  twilio: { unit: "minute", price: 85e-4 },
   telnyx: { unit: "minute", price: 7e-3 }
 };
 function mergePricing(overrides) {
@@ -281,22 +651,22 @@ function mergePricing(overrides) {
     merged[k] = { ...v };
   }
   if (!overrides) return merged;
-  for (const [provider, values] of Object.entries(overrides)) {
-    if (merged[provider]) {
-      merged[provider] = { ...merged[provider], ...values };
+  for (const [provider2, values] of Object.entries(overrides)) {
+    if (merged[provider2]) {
+      merged[provider2] = { ...merged[provider2], ...values };
     } else {
-      merged[provider] = { unit: "minute", ...values };
+      merged[provider2] = { ...values };
     }
   }
   return merged;
 }
-function calculateSttCost(provider, audioSeconds, pricing) {
-  const config = pricing[provider];
+function calculateSttCost(provider2, audioSeconds, pricing) {
+  const config = pricing[provider2];
   if (!config || config.unit !== "minute") return 0;
   return audioSeconds / 60 * (config.price ?? 0);
 }
-function calculateTtsCost(provider, characterCount, pricing) {
-  const config = pricing[provider];
+function calculateTtsCost(provider2, characterCount, pricing) {
+  const config = pricing[provider2];
   if (!config || config.unit !== "1k_chars") return 0;
   return characterCount / 1e3 * (config.price ?? 0);
 }
@@ -305,21 +675,126 @@ function calculateRealtimeCost(usage, pricing) {
   if (!config || config.unit !== "token") return 0;
   const input = usage.input_token_details ?? {};
   const output = usage.output_token_details ?? {};
+  const cachedAudioRate = config.cached_audio_input_per_token ?? config.audio_input_per_token ?? 0;
+  const cachedTextRate = config.cached_text_input_per_token ?? config.text_input_per_token ?? 0;
+  const totalAudioIn = input.audio_tokens ?? 0;
+  const totalTextIn = input.text_tokens ?? 0;
+  let cachedAudioIn;
+  let cachedTextIn;
+  const details = input.cached_tokens_details;
+  if (details && (details.audio_tokens !== void 0 || details.text_tokens !== void 0)) {
+    cachedAudioIn = Math.min(details.audio_tokens ?? 0, totalAudioIn);
+    cachedTextIn = Math.min(details.text_tokens ?? 0, totalTextIn);
+  } else if (input.cached_tokens && input.cached_tokens > 0) {
+    const totalIn = totalAudioIn + totalTextIn;
+    const ratio = totalIn > 0 ? input.cached_tokens / totalIn : 0;
+    cachedAudioIn = Math.min(Math.round(totalAudioIn * ratio), totalAudioIn);
+    cachedTextIn = Math.min(Math.round(totalTextIn * ratio), totalTextIn);
+  } else {
+    cachedAudioIn = 0;
+    cachedTextIn = 0;
+  }
   let cost = 0;
-  cost += (input.audio_tokens ?? 0) * (config.audio_input_per_token ?? 0);
-  cost += (input.text_tokens ?? 0) * (config.text_input_per_token ?? 0);
+  cost += (totalAudioIn - cachedAudioIn) * (config.audio_input_per_token ?? 0);
+  cost += cachedAudioIn * cachedAudioRate;
+  cost += (totalTextIn - cachedTextIn) * (config.text_input_per_token ?? 0);
+  cost += cachedTextIn * cachedTextRate;
   cost += (output.audio_tokens ?? 0) * (config.audio_output_per_token ?? 0);
   cost += (output.text_tokens ?? 0) * (config.text_output_per_token ?? 0);
-  return cost;
+  return Math.max(0, cost);
+}
+function calculateRealtimeCachedSavings(usage, pricing) {
+  const config = pricing.openai_realtime;
+  if (!config || config.unit !== "token") return 0;
+  const input = usage.input_token_details ?? {};
+  const cached = input.cached_tokens_details ?? {};
+  const cachedAudioRate = config.cached_audio_input_per_token ?? config.audio_input_per_token ?? 0;
+  const cachedTextRate = config.cached_text_input_per_token ?? config.text_input_per_token ?? 0;
+  const cachedAudio = Math.min(cached.audio_tokens ?? 0, input.audio_tokens ?? 0);
+  const cachedText = Math.min(cached.text_tokens ?? 0, input.text_tokens ?? 0);
+  const fullAudio = cachedAudio * (config.audio_input_per_token ?? 0);
+  const fullText = cachedText * (config.text_input_per_token ?? 0);
+  const discountedAudio = cachedAudio * cachedAudioRate;
+  const discountedText = cachedText * cachedTextRate;
+  return Math.max(0, fullAudio + fullText - (discountedAudio + discountedText));
+}
+var llmPricing = {
+  anthropic: {
+    "claude-opus-4-7": {
+      input: 15,
+      output: 75,
+      cache_read: 1.5,
+      cache_write: 18.75
+    },
+    "claude-sonnet-4-6": {
+      input: 3,
+      output: 15,
+      cache_read: 0.3,
+      cache_write: 3.75
+    },
+    "claude-haiku-4-5": {
+      input: 1,
+      output: 5,
+      cache_read: 0.1,
+      cache_write: 1.25
+    }
+  },
+  google: {
+    "gemini-2.5-pro": { input: 1.25, output: 10 },
+    "gemini-2.5-flash": { input: 0.3, output: 2.5 },
+    "gemini-live-2.5-flash-native-audio": { input: 0.3, output: 2.5 }
+  },
+  groq: {
+    "llama-3.3-70b-versatile": { input: 0.59, output: 0.79 },
+    "llama-3.1-8b-instant": { input: 0.05, output: 0.08 }
+  },
+  cerebras: {
+    "llama-3.3-70b": { input: 0.85, output: 1.2 },
+    "qwen-3-32b": { input: 0.4, output: 0.8 }
+  },
+  // OpenAI Chat Completions (non-Realtime) — mirrors sdk-py pricing table.
+  // Rates are per 1M tokens (USD), cache_read = cached input rate.
+  openai: {
+    "gpt-4o": { input: 2.5, output: 10, cache_read: 1.25 },
+    "gpt-4o-mini": { input: 0.15, output: 0.6, cache_read: 0.075 },
+    "gpt-4.1": { input: 3, output: 12, cache_read: 0.75 },
+    "gpt-4.1-mini": { input: 0.8, output: 3.2, cache_read: 0.2 },
+    "o3": { input: 2, output: 8, cache_read: 0.5 },
+    "o4-mini": { input: 1.1, output: 4.4, cache_read: 0.275 }
+  }
+};
+function calculateLlmCost(provider2, model, inputTokens, outputTokens, cacheReadTokens = 0, cacheWriteTokens = 0) {
+  const providerTable = llmPricing[provider2];
+  if (!providerTable) return 0;
+  let rates = providerTable[model];
+  if (!rates) {
+    let bestKey = "";
+    for (const key of Object.keys(providerTable)) {
+      if (model.startsWith(key) && key.length > bestKey.length) {
+        bestKey = key;
+      }
+    }
+    if (bestKey) rates = providerTable[bestKey];
+  }
+  if (!rates) return 0;
+  let cost = 0;
+  cost += inputTokens / 1e6 * (rates.input ?? 0);
+  cost += outputTokens / 1e6 * (rates.output ?? 0);
+  cost += cacheReadTokens / 1e6 * (rates.cache_read ?? 0);
+  cost += cacheWriteTokens / 1e6 * (rates.cache_write ?? 0);
+  return Math.max(0, cost);
 }
-function calculateTelephonyCost(provider, durationSeconds, pricing) {
-  const config = pricing[provider];
+function calculateTelephonyCost(provider2, durationSeconds, pricing) {
+  const config = pricing[provider2];
   if (!config || config.unit !== "minute") return 0;
-  return durationSeconds / 60 * (config.price ?? 0);
+  const minutes = provider2 === "twilio" ? Math.ceil(durationSeconds / 60) : durationSeconds / 60;
+  return minutes * (config.price ?? 0);
 }
 // src/dashboard/store.ts
 import { EventEmitter } from "events";
+import * as fs from "fs";
+import * as path from "path";
 var MetricsStore = class extends EventEmitter {
   maxCalls;
   calls = [];
@@ -482,6 +957,10 @@ var MetricsStore = class extends EventEmitter {
     }
     return null;
   }
+  /** Look up an active call by id (returns undefined if not active or unknown). */
+  getActive(callId) {
+    return this.activeCalls.get(callId);
+  }
   getActiveCalls() {
     return Array.from(this.activeCalls.values());
   }
@@ -547,7 +1026,102 @@ var MetricsStore = class extends EventEmitter {
   get callCount() {
     return this.calls.length;
   }
+  /**
+   * Rebuild the in-memory call list from `metadata.json` files written by
+   * `CallLogger` under `<logRoot>/calls/YYYY/MM/DD/<call_id>/`. Idempotent:
+   * call_ids already in the store are skipped. Errors per file are logged
+   * and swallowed so a single corrupt entry doesn't block hydration.
+   *
+   * Returns the number of calls newly added to the store.
+   *
+   * Safe to call before any traffic; intended to run once at server startup.
+   */
+  hydrate(logRoot) {
+    if (!logRoot) return 0;
+    const callsRoot = path.join(logRoot, "calls");
+    if (!fs.existsSync(callsRoot)) return 0;
+    const collected = [];
+    const seen = new Set(this.calls.map((c) => c.call_id));
+    const walk = (dir, depth) => {
+      let entries;
+      try {
+        entries = fs.readdirSync(dir, { withFileTypes: true });
+      } catch {
+        return;
+      }
+      for (const entry of entries) {
+        const childPath = path.join(dir, entry.name);
+        if (depth < 3) {
+          if (entry.isDirectory() && /^\d+$/.test(entry.name)) {
+            walk(childPath, depth + 1);
+          }
+          continue;
+        }
+        if (!entry.isDirectory()) continue;
+        const metadataPath = path.join(childPath, "metadata.json");
+        if (!fs.existsSync(metadataPath)) continue;
+        try {
+          const raw = fs.readFileSync(metadataPath, "utf8");
+          const meta = JSON.parse(raw);
+          const callId = meta.call_id || entry.name;
+          if (!callId || seen.has(callId)) continue;
+          const record = metadataToCallRecord(callId, meta);
+          if (record === null) {
+            getLogger().debug(
+              `MetricsStore.hydrate: skipping ${metadataPath}: unparseable started_at`
+            );
+            continue;
+          }
+          collected.push(record);
+          seen.add(callId);
+        } catch (err) {
+          getLogger().debug(
+            `MetricsStore.hydrate: skipping ${metadataPath}: ${String(err)}`
+          );
+        }
+      }
+    };
+    walk(callsRoot, 0);
+    collected.sort((a, b) => (a.started_at || 0) - (b.started_at || 0));
+    for (const rec of collected) {
+      if (this.calls.some((c) => c.call_id === rec.call_id)) continue;
+      this.calls.push(rec);
+      if (this.calls.length > this.maxCalls) {
+        this.calls = this.calls.slice(-this.maxCalls);
+      }
+    }
+    return collected.length;
+  }
 };
+function metadataToCallRecord(callId, meta) {
+  const startedAt = parseTimestamp(meta.started_at);
+  if (startedAt === null) return null;
+  const endedAt = parseTimestamp(meta.ended_at);
+  const status = meta.status || "completed";
+  const metrics = meta.metrics && typeof meta.metrics === "object" ? meta.metrics : null;
+  const transcript = Array.isArray(meta.transcript) ? meta.transcript : [];
+  return {
+    call_id: callId,
+    caller: meta.caller || "",
+    callee: meta.callee || "",
+    direction: meta.direction || "inbound",
+    started_at: startedAt,
+    ended_at: endedAt ?? void 0,
+    status,
+    metrics,
+    transcript
+  };
+}
+function parseTimestamp(raw) {
+  if (typeof raw === "number") {
+    return Number.isFinite(raw) ? raw : null;
+  }
+  if (typeof raw === "string") {
+    const ms = Date.parse(raw);
+    return Number.isFinite(ms) ? ms / 1e3 : null;
+  }
+  return null;
+}
 // src/dashboard/auth.ts
 import crypto from "crypto";
@@ -1628,10 +2202,52 @@ function isWebSocketUrl(url) {
 // src/providers/deepgram-stt.ts
 import WebSocket3 from "ws";
+// src/errors.ts
+var PatterError = class extends Error {
+  constructor(message) {
+    super(message);
+    this.name = "PatterError";
+  }
+};
+var PatterConnectionError = class extends PatterError {
+  constructor(message) {
+    super(message);
+    this.name = "PatterConnectionError";
+  }
+};
+var AuthenticationError = class extends PatterError {
+  constructor(message) {
+    super(message);
+    this.name = "AuthenticationError";
+  }
+};
+var ProvisionError = class extends PatterError {
+  constructor(message) {
+    super(message);
+    this.name = "ProvisionError";
+  }
+};
+var RateLimitError = class extends PatterConnectionError {
+  constructor(message) {
+    super(message);
+    this.name = "RateLimitError";
+  }
+};
+// src/providers/deepgram-stt.ts
 var DEEPGRAM_WS_URL = "wss://api.deepgram.com/v1/listen";
+var KEEPALIVE_INTERVAL_MS = 4e3;
+var FINALIZE_DRAIN_MS = 100;
+var CLOSE_LATENCY_BUDGET_MS = 500;
+var RECONNECT_CLOSE_CODES = /* @__PURE__ */ new Set([1006, 1011]);
 var DeepgramSTT = class _DeepgramSTT {
   ws = null;
-  callbacks = [];
+  transcriptCallbacks = /* @__PURE__ */ new Set();
+  errorCallbacks = /* @__PURE__ */ new Set();
+  keepaliveTimer = null;
+  running = false;
+  reconnectAttempted = false;
   /** Request ID from Deepgram — used to query actual cost post-call. */
   requestId = "";
   apiKey;
@@ -1653,7 +2269,7 @@ var DeepgramSTT = class _DeepgramSTT {
     this.sampleRate = sampleRate ?? opts.sampleRate ?? 16e3;
     this.endpointingMs = opts.endpointingMs ?? 150;
     this.utteranceEndMs = opts.utteranceEndMs === null ? null : opts.utteranceEndMs ?? 1e3;
-    this.smartFormat = opts.smartFormat ?? true;
+    this.smartFormat = opts.smartFormat ?? false;
     this.interimResults = opts.interimResults ?? true;
     this.vadEvents = opts.vadEvents ?? true;
   }
@@ -1661,7 +2277,7 @@ var DeepgramSTT = class _DeepgramSTT {
   static forTwilio(apiKey, language = "en", model = "nova-3", options = {}) {
     return new _DeepgramSTT(apiKey, language, model, "mulaw", 8e3, options);
   }
-  async connect() {
+  buildUrl() {
     const params = new URLSearchParams({
       model: this.model,
       language: this.language,
@@ -1677,72 +2293,204 @@ var DeepgramSTT = class _DeepgramSTT {
     if (this.utteranceEndMs !== null) {
       params.set("utterance_end_ms", String(Math.max(this.utteranceEndMs, 1e3)));
     }
-    const url = `${DEEPGRAM_WS_URL}?${params.toString()}`;
-    this.ws = new WebSocket3(url, {
+    return `${DEEPGRAM_WS_URL}?${params.toString()}`;
+  }
+  async connect() {
+    await this.openSocket();
+    this.running = true;
+    this.reconnectAttempted = false;
+  }
+  async openSocket() {
+    const url = this.buildUrl();
+    const ws = new WebSocket3(url, {
       headers: { Authorization: `Token ${this.apiKey}` }
     });
+    this.ws = ws;
     await new Promise((resolve, reject) => {
-      const timer = setTimeout(() => reject(new Error("Deepgram connect timeout")), 1e4);
-      this.ws.once("open", () => {
-        clearTimeout(timer);
-        resolve();
-      });
-      this.ws.once("error", (err) => {
+      let settled = false;
+      const settle = (fn) => {
+        if (settled) return;
+        settled = true;
         clearTimeout(timer);
-        reject(err);
+        fn();
+      };
+      const timer = setTimeout(
+        () => settle(() => reject(new PatterConnectionError("Deepgram connect timeout"))),
+        1e4
+      );
+      ws.once("open", () => settle(resolve));
+      ws.once("error", (err) => settle(() => reject(err)));
+      ws.once("unexpected-response", (_req, res) => {
+        const status = res?.statusCode ?? 0;
+        settle(() => {
+          if (status === 401 || status === 403) {
+            reject(new AuthenticationError(`Deepgram rejected the API key (HTTP ${status}).`));
+            return;
+          }
+          if (status === 429) {
+            reject(new RateLimitError("Deepgram rate limit exceeded (HTTP 429)."));
+            return;
+          }
+          reject(new PatterConnectionError(`Deepgram WebSocket upgrade failed (HTTP ${status}).`));
+        });
       });
     });
-    this.ws.on("message", (raw) => {
-      let data;
-      try {
-        data = JSON.parse(raw.toString());
-      } catch {
-        return;
-      }
-      if (data.type === "Metadata" && data.request_id) {
-        this.requestId = data.request_id;
-        return;
+    ws.on("message", (raw) => this.handleMessage(raw.toString()));
+    ws.on("close", (code, reason) => this.handleClose(code, reason.toString()));
+    ws.on("error", (err) => this.handleError(err));
+    this.keepaliveTimer = setInterval(() => {
+      if (this.ws && this.ws.readyState === WebSocket3.OPEN) {
+        try {
+          this.ws.send(JSON.stringify({ type: "KeepAlive" }));
+        } catch {
+        }
       }
-      if (data.type !== "Results") return;
-      const alternatives = data.channel?.alternatives ?? [];
-      if (!alternatives.length) return;
-      const best = alternatives[0];
-      const text = (best.transcript ?? "").trim();
-      if (!text) return;
-      const transcript = {
-        text,
-        isFinal: Boolean(data.is_final) || Boolean(data.speech_final),
-        confidence: best.confidence ?? 0
-      };
-      for (const cb of this.callbacks) {
+    }, KEEPALIVE_INTERVAL_MS);
+  }
+  clearKeepalive() {
+    if (this.keepaliveTimer) {
+      clearInterval(this.keepaliveTimer);
+      this.keepaliveTimer = null;
+    }
+  }
+  handleMessage(raw) {
+    let data;
+    try {
+      data = JSON.parse(raw);
+    } catch {
+      return;
+    }
+    if (data.type === "Metadata" && data.request_id) {
+      this.requestId = data.request_id;
+      return;
+    }
+    if (data.type === "SpeechStarted") {
+      this.emitTranscript({
+        text: "",
+        isFinal: false,
+        confidence: 0,
+        eventType: "SpeechStarted",
+        requestId: this.requestId || void 0
+      });
+      return;
+    }
+    if (data.type === "UtteranceEnd") {
+      this.emitTranscript({
+        text: "",
+        isFinal: true,
+        confidence: 0,
+        eventType: "UtteranceEnd",
+        requestId: this.requestId || void 0
+      });
+      return;
+    }
+    if (data.type !== "Results") return;
+    const alternatives = data.channel?.alternatives ?? [];
+    if (!alternatives.length) return;
+    const best = alternatives[0];
+    const text = (best.transcript ?? "").trim();
+    if (!text) return;
+    const speechFinal = Boolean(data.speech_final);
+    const transcript = {
+      text,
+      isFinal: Boolean(data.is_final) || speechFinal,
+      confidence: best.confidence ?? 0,
+      speechFinal,
+      fromFinalize: Boolean(data.from_finalize),
+      requestId: this.requestId || void 0,
+      words: best.words,
+      eventType: "Results"
+    };
+    this.emitTranscript(transcript);
+  }
+  emitTranscript(transcript) {
+    for (const cb of this.transcriptCallbacks) {
+      try {
         cb(transcript);
+      } catch (err) {
+        getLogger().error(`DeepgramSTT transcript callback threw: ${String(err)}`);
       }
-    });
+    }
+  }
+  emitError(err) {
+    for (const cb of this.errorCallbacks) {
+      try {
+        cb(err);
+      } catch (cbErr) {
+        getLogger().error(`DeepgramSTT error callback threw: ${String(cbErr)}`);
+      }
+    }
+  }
+  handleError(err) {
+    getLogger().error(`DeepgramSTT WebSocket error: ${err.message}`);
+    this.emitError(err);
+  }
+  handleClose(code, reason) {
+    this.clearKeepalive();
+    if (!this.running) {
+      return;
+    }
+    const closeError = new PatterConnectionError(
+      `Deepgram WebSocket closed (code=${code}${reason ? `, reason=${reason}` : ""}).`
+    );
+    this.emitError(closeError);
+    if (RECONNECT_CLOSE_CODES.has(code) && !this.reconnectAttempted) {
+      this.reconnectAttempted = true;
+      this.openSocket().catch((err) => {
+        this.running = false;
+        this.emitError(err instanceof Error ? err : new Error(String(err)));
+      });
+    } else {
+      this.running = false;
+    }
   }
   sendAudio(audio) {
     if (!this.ws || this.ws.readyState !== WebSocket3.OPEN) return;
+    if (audio.length === 0) return;
     this.ws.send(audio);
   }
   onTranscript(callback) {
-    if (this.callbacks.length >= 10) {
-      getLogger().warn("DeepgramSTT: maximum of 10 onTranscript callbacks reached; replacing the last callback.");
-      this.callbacks[this.callbacks.length - 1] = callback;
-      return;
-    }
-    this.callbacks.push(callback);
+    this.transcriptCallbacks.add(callback);
   }
-  close() {
-    if (this.ws) {
-      try {
-        this.ws.send(JSON.stringify({ type: "CloseStream" }));
-      } catch {
-      }
-      this.ws.close();
-      this.ws = null;
-    }
+  offTranscript(callback) {
+    this.transcriptCallbacks.delete(callback);
   }
-};
+  onError(callback) {
+    this.errorCallbacks.add(callback);
+  }
+  offError(callback) {
+    this.errorCallbacks.delete(callback);
+  }
+  close() {
+    this.running = false;
+    this.clearKeepalive();
+    const ws = this.ws;
+    if (!ws) return;
+    this.ws = null;
+    const sendSafe = (payload) => {
+      if (ws.readyState === WebSocket3.OPEN) {
+        try {
+          ws.send(payload);
+        } catch {
+        }
+      }
+    };
+    const finishClose = () => {
+      sendSafe(JSON.stringify({ type: "CloseStream" }));
+      try {
+        ws.close();
+      } catch {
+      }
+    };
+    if (ws.readyState !== WebSocket3.OPEN) {
+      finishClose();
+      return;
+    }
+    sendSafe(JSON.stringify({ type: "Finalize" }));
+    setTimeout(finishClose, Math.min(FINALIZE_DRAIN_MS, CLOSE_LATENCY_BUDGET_MS));
+  }
+};
 // src/metrics.ts
 function round(value, decimals) {
   const factor = 10 ** decimals;
@@ -1752,11 +2500,16 @@ function hrTimeMs() {
   const [sec, ns] = process.hrtime();
   return sec * 1e3 + ns / 1e6;
 }
-function p95(values) {
+function percentile(values, p) {
   if (values.length === 0) return 0;
   const sorted = [...values].sort((a, b) => a - b);
-  const idx = Math.min(Math.floor(sorted.length * 0.95), sorted.length - 1);
-  return sorted[idx];
+  if (sorted.length === 1) return sorted[0];
+  const rank = p * (sorted.length - 1);
+  const lo = Math.floor(rank);
+  const hi = Math.ceil(rank);
+  if (lo === hi) return sorted[lo];
+  const frac = rank - lo;
+  return sorted[lo] + (sorted[hi] - sorted[lo]) * frac;
 }
 var CallMetricsAccumulator = class {
   callId;
@@ -1771,19 +2524,54 @@ var CallMetricsAccumulator = class {
   // Per-turn timing state
   _turnStart = null;
   _sttComplete = null;
+  _llmFirstToken = null;
+  _llmFirstSentenceComplete = null;
   _llmComplete = null;
   _ttsFirstByte = null;
+  /** Last TTS audio byte sent (hrTimeMs). Stamped by ``recordTtsComplete`` /
+   *  ``recordTtsCompleteTs``. Used to compute ``tts_total_ms``. */
+  _ttsLastByte = null;
+  /** Endpoint signal (hrTimeMs) — VAD stop or STT speech_final, whichever
+   *  fires first. Used to compute ``endpoint_ms``. */
+  _endpointSignalAt = null;
+  /** Monotonic stamp of LLM dispatch (paired with ``_endpointSignalAt``). */
+  _turnCommittedMono = null;
+  /** Barge-in detected timestamp (hrTimeMs). */
+  _bargeinDetectedAt = null;
+  /** TTS-stopped timestamp after barge-in (hrTimeMs). */
+  _bargeinStoppedAt = null;
   _turnUserText = "";
   _turnSttAudioSeconds = 0;
   // Cumulative usage counters
   _totalSttAudioSeconds = 0;
   _totalTtsCharacters = 0;
   _totalRealtimeCost = 0;
+  _totalRealtimeCachedSavings = 0;
   _sttByteCount = 0;
   _sttSampleRate = 16e3;
   _sttBytesPerSample = 2;
   _actualTelephonyCost = null;
   _actualSttCost = null;
+  // Fix 10: accumulated LLM token cost for non-Realtime pipeline mode.
+  _totalLlmCost = 0;
+  // ---- EventBus integration (item 3) ----
+  _eventBus;
+  // ---- EOUMetrics — 4 timestamps (item 4) ----
+  /** Timestamp (hrTimeMs) when VAD emitted speech_end. */
+  _vadStoppedAt = null;
+  /** Timestamp (hrTimeMs) when STT emitted its final transcript. */
+  _sttFinalAt = null;
+  /** Timestamp (hrTimeMs) when the transcript was committed to the LLM. */
+  _turnCommittedAt = null;
+  /** Delta (ms) from turn-committed to on_user_turn_completed hook done. */
+  _onUserTurnCompletedDelayMs = null;
+  // ---- InterruptionMetrics — simplified no-ML (item 5) ----
+  _numInterruptions = 0;
+  _numBackchannels = 0;
+  _overlapStartedAt = null;
+  // ---- report_only_initial_ttfb (item 6) ----
+  _reportOnlyInitialTtfb;
+  _initialTtfbEmitted = false;
   constructor(opts) {
     this.callId = opts.callId;
     this.providerMode = opts.providerMode;
@@ -1793,6 +2581,15 @@ var CallMetricsAccumulator = class {
     this.llmProvider = opts.llmProvider ?? "";
     this._pricing = mergePricing(opts.pricing);
     this._callStart = hrTimeMs();
+    this._eventBus = opts.eventBus;
+    this._reportOnlyInitialTtfb = opts.reportOnlyInitialTtfb ?? false;
+  }
+  /**
+   * Attach (or replace) an EventBus after construction.
+   * Useful when the bus is created after the accumulator (e.g. in tests).
+   */
+  attachEventBus(bus) {
+    this._eventBus = bus;
   }
   /** Configure audio format for STT byte-to-seconds conversion. */
   configureSttFormat(sampleRate = 16e3, bytesPerSample = 2) {
@@ -1807,17 +2604,60 @@ var CallMetricsAccumulator = class {
   startTurn() {
     this._turnStart = hrTimeMs();
     this._sttComplete = null;
+    this._llmFirstToken = null;
+    this._llmFirstSentenceComplete = null;
     this._llmComplete = null;
     this._ttsFirstByte = null;
+    this._ttsLastByte = null;
+    this._endpointSignalAt = null;
+    this._turnCommittedMono = null;
+    this._bargeinDetectedAt = null;
+    this._bargeinStoppedAt = null;
     this._turnUserText = "";
     this._turnSttAudioSeconds = 0;
+    this._vadStoppedAt = null;
+    this._sttFinalAt = null;
+    this._turnCommittedAt = null;
+    this._onUserTurnCompletedDelayMs = null;
+    this._eventBus?.emit("turn_started", { callId: this.callId });
+  }
+  /**
+   * Start a new turn only if no turn is currently open.
+   * Use this at inbound-audio ingestion points so the turn timer begins
+   * on the first audio byte rather than just before recordSttComplete().
+   */
+  startTurnIfIdle() {
+    if (this._turnStart === null) {
+      this.startTurn();
+    }
   }
   recordSttComplete(text, audioSeconds = 0) {
     this._sttComplete = hrTimeMs();
+    this._sttFinalAt = this._sttComplete;
+    if (this._endpointSignalAt === null) {
+      this._endpointSignalAt = this._sttComplete;
+    }
     this._turnUserText = text;
     this._turnSttAudioSeconds = audioSeconds;
     this._totalSttAudioSeconds += audioSeconds;
   }
+  /** Record the timestamp of the first LLM token (TTFT). No-op after first call. */
+  recordLlmFirstToken() {
+    if (this._llmFirstToken === null) {
+      this._llmFirstToken = hrTimeMs();
+    }
+  }
+  /**
+   * Record when the sentence chunker emits the first complete sentence.
+   * Used as the TTS span start so tts_ms reflects true TTS-provider latency
+   * rather than the gap from llm_complete (which fires after the full response).
+   * No-op after first call.
+   */
+  recordLlmFirstSentenceComplete() {
+    if (this._llmFirstSentenceComplete === null) {
+      this._llmFirstSentenceComplete = hrTimeMs();
+    }
+  }
   recordLlmComplete() {
     this._llmComplete = hrTimeMs();
   }
@@ -1825,9 +2665,40 @@ var CallMetricsAccumulator = class {
     if (this._ttsFirstByte === null) {
       this._ttsFirstByte = hrTimeMs();
     }
+    if (this._reportOnlyInitialTtfb && this._initialTtfbEmitted) {
+      return;
+    }
+    this._initialTtfbEmitted = true;
   }
   recordTtsComplete(text) {
     this._totalTtsCharacters += text.length;
+    if (this._ttsLastByte === null) {
+      this._ttsLastByte = hrTimeMs();
+    }
+  }
+  /**
+   * Capture the timestamp when the last TTS audio byte was sent on the wire.
+   * Useful when the caller wants to record the timing without bumping the
+   * character counter (e.g. interrupted turns where audio actually went out
+   * but synthesis was truncated).
+   */
+  recordTtsCompleteTs(ts) {
+    this._ttsLastByte = ts ?? hrTimeMs();
+  }
+  /**
+   * Mark the moment a user interrupt (barge-in) was detected. Pairs with
+   * ``recordTtsStopped`` to compute ``bargein_ms``.
+   */
+  recordBargeinDetected(ts) {
+    this._bargeinDetectedAt = ts ?? hrTimeMs();
+  }
+  /**
+   * Mark the moment TTS playback was actually halted after a barge-in. Call
+   * this *after* ``sendClear`` returns. Pairs with ``recordBargeinDetected``
+   * to compute ``bargein_ms``.
+   */
+  recordTtsStopped(ts) {
+    this._bargeinStoppedAt = ts ?? hrTimeMs();
   }
   recordTurnComplete(agentText) {
     const latency = this._computeTurnLatency();
@@ -1842,6 +2713,8 @@ var CallMetricsAccumulator = class {
     };
     this._turns.push(turn);
     this._resetTurnState();
+    this._eventBus?.emit("turn_ended", { callId: this.callId, turn });
+    this._eventBus?.emit("metrics_collected", { callId: this.callId, turn });
     return turn;
   }
   recordTurnInterrupted() {
@@ -1860,12 +2733,111 @@ var CallMetricsAccumulator = class {
     this._resetTurnState();
     return turn;
   }
+  // ---- EOU metrics (item 4) ----
+  /**
+   * Record the moment VAD emitted speech_end for the current utterance.
+   * @param ts Optional override timestamp in hrTimeMs units (defaults to now).
+   */
+  recordVadStop(ts) {
+    this._vadStoppedAt = ts ?? hrTimeMs();
+    if (this._endpointSignalAt === null) {
+      this._endpointSignalAt = this._vadStoppedAt;
+    }
+  }
+  /**
+   * Record the moment the STT provider delivered its final transcript.
+   * Aliased to the same instant as recordSttComplete() when called from
+   * the standard pipeline; can be called independently for custom pipelines.
+   * @param ts Optional override timestamp in hrTimeMs units.
+   */
+  recordSttFinalTimestamp(ts) {
+    this._sttFinalAt = ts ?? hrTimeMs();
+    if (this._endpointSignalAt === null) {
+      this._endpointSignalAt = this._sttFinalAt;
+    }
+  }
+  /**
+   * Record the moment the transcript was committed to the LLM (turn start).
+   * After this call, ``emitEouMetrics()`` can produce a complete EOUMetrics payload.
+   * @param ts Optional override timestamp in hrTimeMs units.
+   */
+  recordTurnCommitted(ts) {
+    this._turnCommittedAt = ts ?? hrTimeMs();
+    this._turnCommittedMono = hrTimeMs();
+    this.emitEouMetrics();
+  }
+  /**
+   * Record the delta (ms) between turn-committed and when on_user_turn_completed
+   * pipeline hook finished.  Stored for inclusion in the next ``emitEouMetrics``
+   * call (or an explicit re-emit if desired).
+   */
+  recordOnUserTurnCompletedDelay(delayMs) {
+    this._onUserTurnCompletedDelayMs = delayMs;
+  }
+  /**
+   * Compute and emit EOUMetrics when all three prerequisite timestamps are
+   * available (VAD stop, STT final, turn committed).
+   *
+   * ``endOfUtteranceDelay``     = sttFinal − vadStopped  (ms)
+   * ``transcriptionDelay``       = turnCommitted − vadStopped  (ms)
+   * ``onUserTurnCompletedDelay`` = caller-supplied delta (ms) or 0
+   */
+  emitEouMetrics() {
+    if (this._vadStoppedAt === null || this._sttFinalAt === null || this._turnCommittedAt === null) {
+      return;
+    }
+    const payload = {
+      timestamp: Date.now() / 1e3,
+      endOfUtteranceDelay: Math.max(0, this._sttFinalAt - this._vadStoppedAt),
+      transcriptionDelay: Math.max(0, this._turnCommittedAt - this._vadStoppedAt),
+      onUserTurnCompletedDelay: this._onUserTurnCompletedDelayMs ?? 0
+    };
+    this._eventBus?.emit("eou_metrics", payload);
+  }
+  // ---- InterruptionMetrics (item 5) ----
+  /**
+   * Record that a caller utterance started overlapping with agent speech.
+   * Call this when VAD detects speech_start during TTS playback.
+   * @param ts Optional override timestamp in hrTimeMs units.
+   */
+  recordOverlapStart(ts) {
+    this._overlapStartedAt = ts ?? hrTimeMs();
+  }
+  /**
+   * Record that the overlap ended.  Emits ``InterruptionMetrics`` via the
+   * event bus.
+   *
+   * @param wasInterruption  true → barge-in (increments ``numInterruptions``),
+   *                         false → backchannel (increments ``numBackchannels``).
+   * @param ts Optional override timestamp in hrTimeMs units.
+   */
+  recordOverlapEnd(wasInterruption, ts) {
+    const now = ts ?? hrTimeMs();
+    const detectionDelay = this._overlapStartedAt !== null ? Math.max(0, now - this._overlapStartedAt) : 0;
+    this._overlapStartedAt = null;
+    if (wasInterruption) {
+      this._numInterruptions++;
+    } else {
+      this._numBackchannels++;
+    }
+    const payload = {
+      timestamp: Date.now() / 1e3,
+      // Simplified: totalDuration == detectionDelay (no ML prediction window)
+      totalDuration: detectionDelay,
+      predictionDuration: 0,
+      detectionDelay,
+      numInterruptions: this._numInterruptions,
+      numBackchannels: this._numBackchannels
+    };
+    this._eventBus?.emit("interruption", payload);
+  }
   // ---- Usage tracking ----
   addSttAudioBytes(byteCount) {
     this._sttByteCount += byteCount;
   }
   recordRealtimeUsage(usage) {
     this._totalRealtimeCost += calculateRealtimeCost(usage, this._pricing);
+    this._totalRealtimeCachedSavings += calculateRealtimeCachedSavings(usage, this._pricing);
   }
   setActualTelephonyCost(cost) {
     this._actualTelephonyCost = cost;
@@ -1873,28 +2845,62 @@ var CallMetricsAccumulator = class {
   setActualSttCost(cost) {
     this._actualSttCost = cost;
   }
+  /**
+   * Accumulate LLM token cost for pipeline mode (non-Realtime).
+   *
+   * Called by LLMLoop.run() when a usage chunk arrives from the provider.
+   * Mirrors Python's CallMetricsAccumulator.record_llm_usage().
+   *
+   * @param provider   LLM provider key (e.g. 'openai', 'anthropic')
+   * @param model      Model name (e.g. 'gpt-4o-mini')
+   * @param inputTokens       Total input tokens (includes cached)
+   * @param outputTokens      Total output tokens
+   * @param cacheReadTokens   Cached input tokens (subtracted from input before billing full rate)
+   * @param cacheWriteTokens  Cache write tokens (billed at cache_write rate if present)
+   */
+  recordLlmUsage(provider2, model, inputTokens, outputTokens, cacheReadTokens = 0, cacheWriteTokens = 0) {
+    this._totalLlmCost += calculateLlmCost(
+      provider2,
+      model,
+      inputTokens,
+      outputTokens,
+      cacheReadTokens,
+      cacheWriteTokens
+    );
+  }
   // ---- Finalize ----
   endCall() {
     const duration = (hrTimeMs() - this._callStart) / 1e3;
+    if (this.turnActive) {
+      this.recordTurnInterrupted();
+    }
     if (this._totalSttAudioSeconds === 0 && this._sttByteCount > 0) {
       this._totalSttAudioSeconds = this._sttByteCount / (this._sttSampleRate * this._sttBytesPerSample);
     }
     const cost = this._computeCost(duration);
     const latencyAvg = this._computeAverageLatency();
-    const latencyP95 = this._computeP95Latency();
-    return {
+    const latencyP50 = this._computePercentileLatency(0.5);
+    const latencyP90 = this._computePercentileLatency(0.9);
+    const latencyP95 = this._computePercentileLatency(0.95);
+    const latencyP99 = this._computePercentileLatency(0.99);
+    const metrics = {
       call_id: this.callId,
       duration_seconds: round(duration, 2),
       turns: [...this._turns],
       cost,
       latency_avg: latencyAvg,
+      latency_p50: latencyP50,
+      latency_p90: latencyP90,
       latency_p95: latencyP95,
+      latency_p99: latencyP99,
       provider_mode: this.providerMode,
       stt_provider: this.sttProvider,
       tts_provider: this.ttsProvider,
       llm_provider: this.llmProvider,
       telephony_provider: this.telephonyProvider
     };
+    this._eventBus?.emit("call_ended", { callId: this.callId, metrics });
+    return metrics;
   }
   getCostSoFar() {
     const duration = (hrTimeMs() - this._callStart) / 1e3;
@@ -1904,36 +2910,68 @@ var CallMetricsAccumulator = class {
   _resetTurnState() {
     this._turnStart = null;
     this._sttComplete = null;
+    this._llmFirstToken = null;
+    this._llmFirstSentenceComplete = null;
     this._llmComplete = null;
     this._ttsFirstByte = null;
+    this._ttsLastByte = null;
+    this._endpointSignalAt = null;
+    this._turnCommittedMono = null;
+    this._bargeinDetectedAt = null;
+    this._bargeinStoppedAt = null;
     this._turnUserText = "";
     this._turnSttAudioSeconds = 0;
   }
   _computeTurnLatency() {
     let stt_ms = 0;
     let llm_ms = 0;
+    let llm_ttft_ms;
+    let llm_total_ms;
     let tts_ms = 0;
     let total_ms = 0;
+    let endpoint_ms;
+    let bargein_ms;
+    let tts_total_ms;
     if (this._turnStart !== null && this._sttComplete !== null) {
       stt_ms = this._sttComplete - this._turnStart;
     }
-    if (this._sttComplete !== null && this._llmComplete !== null) {
+    if (this._sttComplete !== null && this._llmFirstToken !== null) {
+      llm_ttft_ms = Math.max(0, this._llmFirstToken - this._sttComplete);
+      llm_ms = llm_ttft_ms;
+    } else if (this._sttComplete !== null && this._llmComplete !== null) {
       llm_ms = this._llmComplete - this._sttComplete;
     }
-    if (this._llmComplete !== null && this._ttsFirstByte !== null) {
-      tts_ms = this._ttsFirstByte - this._llmComplete;
+    if (this._sttComplete !== null && this._llmComplete !== null) {
+      llm_total_ms = Math.max(0, this._llmComplete - this._sttComplete);
+    }
+    const ttsSpanStart = this._llmFirstSentenceComplete ?? this._llmComplete;
+    if (ttsSpanStart !== null && this._ttsFirstByte !== null) {
+      tts_ms = this._ttsFirstByte - ttsSpanStart;
+      if (tts_ms < 0) tts_ms = 0;
     }
     if (this._turnStart !== null && this._ttsFirstByte !== null) {
       total_ms = this._ttsFirstByte - this._turnStart;
     }
-    if (total_ms > 0 && stt_ms === 0 && llm_ms === 0 && tts_ms === 0) {
-      llm_ms = total_ms;
+    if (this._endpointSignalAt !== null && this._turnCommittedMono !== null) {
+      endpoint_ms = Math.max(0, this._turnCommittedMono - this._endpointSignalAt);
+    }
+    if (this._bargeinDetectedAt !== null && this._bargeinStoppedAt !== null) {
+      bargein_ms = Math.max(0, this._bargeinStoppedAt - this._bargeinDetectedAt);
+    }
+    const ttsTotalRef = this._llmFirstToken ?? this._llmFirstSentenceComplete ?? this._llmComplete;
+    if (ttsTotalRef !== null && this._ttsLastByte !== null) {
+      tts_total_ms = Math.max(0, this._ttsLastByte - ttsTotalRef);
     }
     return {
       stt_ms: round(stt_ms, 1),
       llm_ms: round(llm_ms, 1),
+      ...llm_ttft_ms !== void 0 ? { llm_ttft_ms: round(llm_ttft_ms, 1) } : {},
+      ...llm_total_ms !== void 0 ? { llm_total_ms: round(llm_total_ms, 1) } : {},
       tts_ms: round(tts_ms, 1),
-      total_ms: round(total_ms, 1)
+      total_ms: round(total_ms, 1),
+      ...endpoint_ms !== void 0 ? { endpoint_ms: round(endpoint_ms, 1) } : {},
+      ...bargein_ms !== void 0 ? { bargein_ms: round(bargein_ms, 1) } : {},
+      ...tts_total_ms !== void 0 ? { tts_total_ms: round(tts_total_ms, 1) } : {}
     };
   }
   _computeCost(durationSeconds) {
@@ -1951,7 +2989,7 @@ var CallMetricsAccumulator = class {
     } else {
       stt = this._actualSttCost !== null ? this._actualSttCost : calculateSttCost(this.sttProvider, this._totalSttAudioSeconds, this._pricing);
       tts = calculateTtsCost(this.ttsProvider, this._totalTtsCharacters, this._pricing);
-      llm = 0;
+      llm = this._totalLlmCost;
     }
     const telephony = this._actualTelephonyCost !== null ? this._actualTelephonyCost : calculateTelephonyCost(this.telephonyProvider, durationSeconds, this._pricing);
     const total = stt + tts + llm + telephony;
@@ -1960,30 +2998,78 @@ var CallMetricsAccumulator = class {
       tts: round(tts, 6),
       llm: round(llm, 6),
       telephony: round(telephony, 6),
-      total: round(total, 6)
+      total: round(total, 6),
+      // Always emit (default 0) for parity with Python dataclass where
+      // llm_cached_savings is a required field with default 0.0.
+      llm_cached_savings: round(Math.max(0, this._totalRealtimeCachedSavings), 6)
     };
   }
+  /**
+   * Turns eligible for latency statistics.
+   *
+   * Excludes turns marked ``[interrupted]`` (barge-in, cancelled replacements)
+   * because their recorded latency either reflects partial state or zero —
+   * including them would drag every p95/avg bucket toward meaningless numbers.
+   */
+  _completedTurns() {
+    return this._turns.filter(
+      (t) => t.agent_text !== "[interrupted]" && t.latency.total_ms > 0
+    );
+  }
   _computeAverageLatency() {
-    if (this._turns.length === 0) {
+    const turns = this._completedTurns();
+    if (turns.length === 0) {
       return { stt_ms: 0, llm_ms: 0, tts_ms: 0, total_ms: 0 };
     }
-    const n = this._turns.length;
+    const n = turns.length;
+    const ttftValues = turns.map((t) => t.latency.llm_ttft_ms ?? 0).filter((v) => v > 0);
+    const ttftAvg = ttftValues.length > 0 ? round(ttftValues.reduce((s, v) => s + v, 0) / ttftValues.length, 1) : void 0;
+    const optAvg = (key) => {
+      const vals = turns.map((t) => t.latency[key]).filter((v) => typeof v === "number" && v > 0);
+      return vals.length > 0 ? round(vals.reduce((s, v) => s + v, 0) / vals.length, 1) : void 0;
+    };
+    const llmTotalAvg = optAvg("llm_total_ms");
+    const endpointAvg = optAvg("endpoint_ms");
+    const bargeinAvg = optAvg("bargein_ms");
+    const ttsTotalAvg = optAvg("tts_total_ms");
     return {
-      stt_ms: round(this._turns.reduce((s, t) => s + t.latency.stt_ms, 0) / n, 1),
-      llm_ms: round(this._turns.reduce((s, t) => s + t.latency.llm_ms, 0) / n, 1),
-      tts_ms: round(this._turns.reduce((s, t) => s + t.latency.tts_ms, 0) / n, 1),
-      total_ms: round(this._turns.reduce((s, t) => s + t.latency.total_ms, 0) / n, 1)
+      stt_ms: round(turns.reduce((s, t) => s + t.latency.stt_ms, 0) / n, 1),
+      llm_ms: round(turns.reduce((s, t) => s + t.latency.llm_ms, 0) / n, 1),
+      ...ttftAvg !== void 0 ? { llm_ttft_ms: ttftAvg } : {},
+      ...llmTotalAvg !== void 0 ? { llm_total_ms: llmTotalAvg } : {},
+      tts_ms: round(turns.reduce((s, t) => s + t.latency.tts_ms, 0) / n, 1),
+      total_ms: round(turns.reduce((s, t) => s + t.latency.total_ms, 0) / n, 1),
+      ...endpointAvg !== void 0 ? { endpoint_ms: endpointAvg } : {},
+      ...bargeinAvg !== void 0 ? { bargein_ms: bargeinAvg } : {},
+      ...ttsTotalAvg !== void 0 ? { tts_total_ms: ttsTotalAvg } : {}
     };
   }
-  _computeP95Latency() {
-    if (this._turns.length === 0) {
+  _computePercentileLatency(p) {
+    const turns = this._completedTurns();
+    if (turns.length === 0) {
       return { stt_ms: 0, llm_ms: 0, tts_ms: 0, total_ms: 0 };
     }
+    const nonZero = (vals) => vals.filter((v) => v > 0);
+    const ttftSamples = nonZero(turns.map((t) => t.latency.llm_ttft_ms ?? 0));
+    const ttftP = ttftSamples.length > 0 ? round(percentile(ttftSamples, p), 1) : void 0;
+    const optPct = (key) => {
+      const vals = turns.map((t) => t.latency[key]).filter((v) => typeof v === "number" && v > 0);
+      return vals.length > 0 ? round(percentile(vals, p), 1) : void 0;
+    };
+    const llmTotalP = optPct("llm_total_ms");
+    const endpointP = optPct("endpoint_ms");
+    const bargeinP = optPct("bargein_ms");
+    const ttsTotalP = optPct("tts_total_ms");
     return {
-      stt_ms: round(p95(this._turns.map((t) => t.latency.stt_ms)), 1),
-      llm_ms: round(p95(this._turns.map((t) => t.latency.llm_ms)), 1),
-      tts_ms: round(p95(this._turns.map((t) => t.latency.tts_ms)), 1),
-      total_ms: round(p95(this._turns.map((t) => t.latency.total_ms)), 1)
+      stt_ms: round(percentile(nonZero(turns.map((t) => t.latency.stt_ms)), p), 1),
+      llm_ms: round(percentile(nonZero(turns.map((t) => t.latency.llm_ms)), p), 1),
+      ...ttftP !== void 0 ? { llm_ttft_ms: ttftP } : {},
+      ...llmTotalP !== void 0 ? { llm_total_ms: llmTotalP } : {},
+      tts_ms: round(percentile(nonZero(turns.map((t) => t.latency.tts_ms)), p), 1),
+      total_ms: round(percentile(nonZero(turns.map((t) => t.latency.total_ms)), p), 1),
+      ...endpointP !== void 0 ? { endpoint_ms: endpointP } : {},
+      ...bargeinP !== void 0 ? { bargein_ms: bargeinP } : {},
+      ...ttsTotalP !== void 0 ? { tts_total_ms: ttsTotalP } : {}
     };
   }
 };
@@ -2038,40 +3124,335 @@ function pcm16ToMulaw(pcmData) {
   }
   return out;
 }
+var PcmCarry = class {
+  pending = null;
+  /**
+   * Prepend any carried odd byte, return the even-length prefix, and stash
+   * any new trailing odd byte for the next call.
+   *
+   * Returns a zero-length buffer when no complete sample is yet available.
+   */
+  push(chunk) {
+    const combined = this.pending !== null ? Buffer.concat([this.pending, chunk]) : chunk;
+    this.pending = null;
+    const alignedLen = combined.length & ~1;
+    if (alignedLen < combined.length) {
+      this.pending = combined.subarray(alignedLen);
+    }
+    return combined.subarray(0, alignedLen);
+  }
+  /**
+   * Return any pending byte as a 1-byte buffer (rare in practice — only if
+   * the entire stream had an odd byte count), then reset internal state.
+   */
+  flush() {
+    if (this.pending === null) return Buffer.alloc(0);
+    const out = this.pending;
+    this.pending = null;
+    return out;
+  }
+  /** Reset carry state without flushing. */
+  reset() {
+    this.pending = null;
+  }
+};
+var StatefulResampler = class {
+  srcRate;
+  dstRate;
+  // 16k→8k: 5-tap FIR state.
+  // Extended sample buffer carries the 2 history samples that precede the
+  // current chunk AND any "pending" input sample that did not yet generate
+  // output (i.e. the odd sample when the chunk had an odd sample count).
+  // `firPhase` = 0 means the next output is at input position 0 of the
+  // current chunk; 1 means it starts at input position 1 (because the
+  // previous chunk ended on an even-output boundary).
+  firHistory = new Int16Array(2);
+  // [s_{-2}, s_{-1}]
+  firHistoryValid = false;
+  // Pending sample carried from odd-count chunks (not the byte carry —
+  // this is a complete Int16 sample that becomes the first input for the
+  // next call).
+  firPendingSample = null;
+  // 8k→16k: last input sample deferred across chunk boundaries.
+  upsampleLast = 0;
+  upsampleHasHistory = false;
+  // 24k→16k: fractional phase and last input sample across chunks.
+  resample24Last = 0;
+  resample24Phase = 0;
+  resample24HasHistory = false;
+  // Odd-byte alignment carry.
+  carry = new PcmCarry();
+  constructor(opts) {
+    this.srcRate = opts.srcRate;
+    this.dstRate = opts.dstRate;
+    if (opts.channels !== void 0 && opts.channels !== 1) {
+      throw new Error("StatefulResampler: only mono (channels=1) is supported");
+    }
+    const key = `${this.srcRate}->${this.dstRate}`;
+    if (key !== "16000->8000" && key !== "8000->16000" && key !== "24000->16000") {
+      throw new Error(
+        `StatefulResampler: unsupported conversion ${key}. Supported: 16000->8000, 8000->16000, 24000->16000`
+      );
+    }
+  }
+  /**
+   * Process a chunk of PCM16-LE samples.
+   *
+   * Handles odd-byte inputs via an internal carry buffer. Returns an even-byte-
+   * aligned output buffer; may return a zero-length buffer if not enough
+   * aligned input is available yet.
+   */
+  process(pcm) {
+    const aligned = this.carry.push(pcm);
+    if (aligned.length === 0) return Buffer.alloc(0);
+    if (this.srcRate === 16e3 && this.dstRate === 8e3) {
+      return this._downsample16kTo8k(aligned);
+    }
+    if (this.srcRate === 8e3 && this.dstRate === 16e3) {
+      return this._upsample8kTo16k(aligned);
+    }
+    return this._resample24kTo16k(aligned);
+  }
+  /**
+   * Flush internal state and return any remaining output samples.
+   *
+   * For 8k→16k: the deferred last sample is emitted duplicated (matching
+   * the stateless helper's end-of-stream behaviour).
+   * For 16k→8k: any pending odd sample is processed with edge-replication.
+   * Resets all state after flushing.
+   */
+  flush() {
+    this.carry.flush();
+    if (this.srcRate === 16e3 && this.dstRate === 8e3 && this.firPendingSample !== null) {
+      const s = this.firPendingSample;
+      const tmp = Buffer.alloc(4);
+      tmp.writeInt16LE(s, 0);
+      tmp.writeInt16LE(s, 2);
+      const out = this._downsample16kTo8k(tmp);
+      this.firPendingSample = null;
+      return out;
+    }
+    if (this.srcRate === 8e3 && this.dstRate === 16e3 && this.upsampleHasHistory) {
+      const out = Buffer.alloc(4);
+      out.writeInt16LE(this.upsampleLast, 0);
+      out.writeInt16LE(this.upsampleLast, 2);
+      this.upsampleHasHistory = false;
+      this.upsampleLast = 0;
+      return out;
+    }
+    return Buffer.alloc(0);
+  }
+  /** Reset all carried state (e.g. at call boundaries). */
+  reset() {
+    this.firHistory = new Int16Array(2);
+    this.firHistoryValid = false;
+    this.firPendingSample = null;
+    this.upsampleLast = 0;
+    this.upsampleHasHistory = false;
+    this.resample24Last = 0;
+    this.resample24Phase = 0;
+    this.resample24HasHistory = false;
+    this.carry.reset();
+  }
+  // ---------------------------------------------------------------------------
+  // Private: 16 kHz → 8 kHz
+  // ---------------------------------------------------------------------------
+  /**
+   * 2:1 decimation with a 5-tap binomial FIR anti-alias filter.
+   *
+   * FIR coefficients: [1, 4, 6, 4, 1] / 16 (cutoff ~Fs/4 = 4 kHz).
+   *
+   * Cross-chunk state:
+   * - `firHistory[0]` = s_{-2}, `firHistory[1]` = s_{-1} relative to the
+   *   virtual stream (seeded to first-sample on the very first call).
+   * - `firPendingSample` = a lone input sample carried from a chunk whose
+   *   sample count was odd; it will become the first input of the next chunk.
+   *
+   * Decimation: outputs are at even positions (0, 2, 4 …) in the virtual
+   * extended stream, so every 2 input samples yield 1 output. An odd-sample-
+   * count chunk leaves 1 sample in `firPendingSample`; the next chunk
+   * prepends it so the output cadence is unbroken.
+   */
+  _downsample16kTo8k(buf) {
+    const newSampleCount = buf.length >> 1;
+    const hasPending = this.firPendingSample !== null;
+    const totalInput = newSampleCount + (hasPending ? 1 : 0);
+    const input = new Int16Array(totalInput);
+    if (hasPending) {
+      input[0] = this.firPendingSample;
+      for (let j = 0; j < newSampleCount; j++) input[j + 1] = buf.readInt16LE(j * 2);
+    } else {
+      for (let j = 0; j < newSampleCount; j++) input[j] = buf.readInt16LE(j * 2);
+    }
+    this.firPendingSample = null;
+    if (totalInput === 0) return Buffer.alloc(0);
+    if (!this.firHistoryValid) {
+      this.firHistory[0] = input[0];
+      this.firHistory[1] = input[0];
+      this.firHistoryValid = true;
+    }
+    const extended = new Int16Array(totalInput + 2);
+    extended[0] = this.firHistory[0];
+    extended[1] = this.firHistory[1];
+    for (let j = 0; j < totalInput; j++) extended[j + 2] = input[j];
+    const outSamples = totalInput >> 1;
+    const out = Buffer.alloc(outSamples * 2);
+    for (let i = 0; i < outSamples; i++) {
+      const c = 2 + i * 2;
+      const sM2 = extended[c - 2];
+      const sM1 = extended[c - 1];
+      const s0 = extended[c];
+      const sP1 = c + 1 < extended.length ? extended[c + 1] : extended[extended.length - 1];
+      const sP2 = c + 2 < extended.length ? extended[c + 2] : extended[extended.length - 1];
+      const filtered = sM2 + 4 * sM1 + 6 * s0 + 4 * sP1 + sP2 + 8 >> 4;
+      out.writeInt16LE(Math.max(-32768, Math.min(32767, filtered)), i * 2);
+    }
+    if (totalInput % 2 === 1) {
+      this.firPendingSample = input[totalInput - 1];
+    }
+    if (totalInput >= 2) {
+      this.firHistory[0] = input[totalInput - 2];
+      this.firHistory[1] = input[totalInput - 1];
+    } else {
+      this.firHistory[0] = this.firHistory[1];
+      this.firHistory[1] = input[0];
+    }
+    return out;
+  }
+  // ---------------------------------------------------------------------------
+  // Private: 8 kHz → 16 kHz
+  // ---------------------------------------------------------------------------
+  /**
+   * 1:2 linear-interpolation upsampler.
+   *
+   * For the first chunk (no history): emits 2*(N-1) samples and defers the
+   * last sample. For subsequent chunks (with history): emits the deferred
+   * sample + its interpolated midpoint THEN 2*(N-1) samples from the new
+   * chunk, deferring the new last sample. Total across K chunks + flush =
+   * 2*total_input_samples (correct output length).
+   *
+   * Call flush() after the final chunk to emit the last deferred sample
+   * pair (self-duplicate at end of stream).
+   */
+  _upsample8kTo16k(buf) {
+    const sampleCount = buf.length >> 1;
+    if (sampleCount === 0) return Buffer.alloc(0);
+    const outArr = [];
+    if (this.upsampleHasHistory) {
+      const next = buf.readInt16LE(0);
+      outArr.push(this.upsampleLast);
+      outArr.push(Math.round((this.upsampleLast + next) / 2));
+    }
+    for (let i = 0; i < sampleCount - 1; i++) {
+      const s0 = buf.readInt16LE(i * 2);
+      const s1 = buf.readInt16LE((i + 1) * 2);
+      outArr.push(s0);
+      outArr.push(Math.round((s0 + s1) / 2));
+    }
+    this.upsampleLast = buf.readInt16LE((sampleCount - 1) * 2);
+    this.upsampleHasHistory = true;
+    const outBuf = Buffer.alloc(outArr.length * 2);
+    for (let j = 0; j < outArr.length; j++) outBuf.writeInt16LE(outArr[j], j * 2);
+    return outBuf;
+  }
+  // ---------------------------------------------------------------------------
+  // Private: 24 kHz → 16 kHz
+  // ---------------------------------------------------------------------------
+  /**
+   * 3:2 linear-interpolation decimator (ratio srcRate/dstRate = 1.5).
+   *
+   * `resample24Phase` tracks the fractional input position of the next output
+   * sample relative to the START of the next chunk. Negative phase means the
+   * next output straddles the previous/current chunk boundary; those are
+   * handled using `resample24Last`.
+   */
+  _resample24kTo16k(buf) {
+    const sampleCount = buf.length >> 1;
+    if (sampleCount === 0) return Buffer.alloc(0);
+    const outArr = [];
+    let phase = this.resample24Phase;
+    while (true) {
+      const idx = Math.floor(phase);
+      if (idx >= sampleCount) break;
+      const frac = phase - idx;
+      let s0;
+      let s1;
+      if (idx < 0) {
+        s0 = this.resample24HasHistory ? this.resample24Last : 0;
+        s1 = buf.readInt16LE(0);
+      } else {
+        s0 = buf.readInt16LE(idx * 2);
+        s1 = idx + 1 < sampleCount ? buf.readInt16LE((idx + 1) * 2) : s0;
+      }
+      const interp = Math.round(s0 + (s1 - s0) * frac);
+      outArr.push(Math.max(-32768, Math.min(32767, interp)));
+      phase += 24e3 / 16e3;
+    }
+    this.resample24Last = buf.readInt16LE((sampleCount - 1) * 2);
+    this.resample24HasHistory = true;
+    this.resample24Phase = phase - sampleCount;
+    const outBuf = Buffer.alloc(outArr.length * 2);
+    for (let j = 0; j < outArr.length; j++) outBuf.writeInt16LE(outArr[j], j * 2);
+    return outBuf;
+  }
+};
+function createResampler16kTo8k() {
+  return new StatefulResampler({ srcRate: 16e3, dstRate: 8e3 });
+}
+function createResampler8kTo16k() {
+  return new StatefulResampler({ srcRate: 8e3, dstRate: 16e3 });
+}
+function createResampler24kTo16k() {
+  return new StatefulResampler({ srcRate: 24e3, dstRate: 16e3 });
+}
+var _warnedResample8kTo16k = false;
+var _warnedResample16kTo8k = false;
+var _warnedResample24kTo16k = false;
 function resample8kTo16k(pcm8k) {
-  if (pcm8k.length === 0) return Buffer.alloc(0);
-  const sampleCount = Math.floor(pcm8k.length / 2);
-  const out = Buffer.alloc(sampleCount * 2 * 2);
-  for (let i = 0; i < sampleCount; i++) {
-    const current = pcm8k.readInt16LE(i * 2);
-    const next = i + 1 < sampleCount ? pcm8k.readInt16LE((i + 1) * 2) : current;
-    const interpolated = Math.round((current + next) / 2);
-    out.writeInt16LE(current, i * 4);
-    out.writeInt16LE(interpolated, i * 4 + 2);
+  if (!_warnedResample8kTo16k) {
+    _warnedResample8kTo16k = true;
+    getLogger().warn(
+      "[patter] resample8kTo16k() is deprecated. Use createResampler8kTo16k() (StatefulResampler) to eliminate chunk-boundary discontinuities."
+    );
   }
-  return out;
+  if (pcm8k.length === 0) return Buffer.alloc(0);
+  const r = createResampler8kTo16k();
+  const main = r.process(pcm8k);
+  const tail = r.flush();
+  return tail.length > 0 ? Buffer.concat([main, tail]) : main;
 }
 function resample16kTo8k(pcm16k) {
-  if (pcm16k.length === 0) return Buffer.alloc(0);
-  const sampleCount = Math.floor(pcm16k.length / 2);
-  const outSamples = Math.floor(sampleCount / 2);
-  const out = Buffer.alloc(outSamples * 2);
-  for (let i = 0; i < outSamples; i++) {
-    const sample = pcm16k.readInt16LE(i * 2 * 2);
-    out.writeInt16LE(sample, i * 2);
+  if (!_warnedResample16kTo8k) {
+    _warnedResample16kTo8k = true;
+    getLogger().warn(
+      "[patter] resample16kTo8k() is deprecated. Use createResampler16kTo8k() (StatefulResampler) to eliminate chunk-boundary discontinuities."
+    );
   }
-  return out;
+  if (pcm16k.length === 0) return Buffer.alloc(0);
+  const r = createResampler16kTo8k();
+  const out = r.process(pcm16k);
+  const tail = r.flush();
+  return tail.length > 0 ? Buffer.concat([out, tail]) : out;
 }
 function resample24kTo16k(pcm24k) {
+  if (!_warnedResample24kTo16k) {
+    _warnedResample24kTo16k = true;
+    getLogger().warn(
+      "[patter] resample24kTo16k() is deprecated. Use createResampler24kTo16k() (StatefulResampler) or OpenAITTS.resampleStreaming for anti-aliased resampling."
+    );
+  }
   if (pcm24k.length === 0) return Buffer.alloc(0);
   const sampleCount = Math.floor(pcm24k.length / 2);
   const outSamples = Math.floor(sampleCount * 2 / 3);
   const out = Buffer.alloc(outSamples * 2);
-  let outIdx = 0;
-  for (let i = 0; i < sampleCount && outIdx < outSamples; i++) {
-    if (i % 3 === 2) continue;
-    out.writeInt16LE(pcm24k.readInt16LE(i * 2), outIdx * 2);
-    outIdx++;
+  for (let i = 0; i < outSamples; i++) {
+    const pos = i * 1.5;
+    const idx = Math.floor(pos);
+    const frac = pos - idx;
+    const s0 = pcm24k.readInt16LE(idx * 2);
+    const s1 = idx + 1 < sampleCount ? pcm24k.readInt16LE((idx + 1) * 2) : s0;
+    const interp = Math.round(s0 + (s1 - s0) * frac);
+    out.writeInt16LE(Math.max(-32768, Math.min(32767, interp)), i * 2);
   }
   return out;
 }
@@ -2133,6 +3514,8 @@ async function executeToolWebhook(webhookUrl, toolName, parsedArgs, context, lab
 // src/sentence-chunker.ts
 var DEFAULT_MIN_SENTENCE_LEN = 20;
+var DEFAULT_MIN_WORDS_FOR_SHORT_FLUSH = 2;
+var SENTENCE_TERMINATORS = ".!?\u3002\uFF01\uFF1F";
 function splitSentences(text, minSentenceLen = DEFAULT_MIN_SENTENCE_LEN) {
   const alphabets = "([A-Za-z])";
   const prefixes = "(Mr|St|Mrs|Ms|Dr)[.]";
@@ -2191,14 +3574,29 @@ function splitSentences(text, minSentenceLen = DEFAULT_MIN_SENTENCE_LEN) {
 var SentenceChunker = class {
   buffer = "";
   minSentenceLen;
+  minWordsForShortFlush;
   constructor(options) {
     this.minSentenceLen = options?.minSentenceLen ?? DEFAULT_MIN_SENTENCE_LEN;
+    this.minWordsForShortFlush = options?.minWordsForShortFlush ?? DEFAULT_MIN_WORDS_FOR_SHORT_FLUSH;
   }
-  /** Feed a token. Returns zero or more complete sentences. */
+  /**
+   * Feed a token. Returns zero or more complete sentences.
+   *
+   * Two emission paths:
+   * - **Standard path** — when the buffer is at least `minSentenceLen`
+   *   characters long and the regex tokenizer reports more than one
+   *   sentence, all but the last (potentially incomplete) are emitted.
+   * - **Short-flush path** — when the buffer is shorter than `minSentenceLen`
+   *   but ends with a sentence terminator AND has at least
+   *   `minWordsForShortFlush` whitespace-separated words, emit it
+   *   immediately. This drops TTS TTFB on short greetings like `"Hi there!"`
+   *   while keeping single-word utterances (`"Sì."`) buffered until
+   *   `flush()`.
+   */
   push(token) {
     this.buffer += token;
     if (this.buffer.length < this.minSentenceLen) {
-      return [];
+      return this.maybeShortFlush();
     }
     const sentences = splitSentences(this.buffer, this.minSentenceLen);
     if (sentences.length <= 1) {
@@ -2212,6 +3610,41 @@ var SentenceChunker = class {
     this.buffer = sentences[sentences.length - 1]?.[0] ?? "";
     return result;
   }
+  /**
+   * Emit the buffer when it's a short, complete single-sentence utterance.
+   *
+   * A buffer qualifies when **all** of these hold:
+   * 1. Last non-whitespace char is a sentence terminator.
+   * 2. Word count is at least `minWordsForShortFlush` (default 2 — keeps
+   *    single-word "Sì." / "Yes." buffered until `flush()`).
+   * 3. The buffer contains exactly one terminator (the trailing one).
+   *    Multiple terminators mean we may be mid-stream of a longer merged
+   *    utterance like `"Hey! Hi! Hello! This is a sentence."` — let the
+   *    standard path keep merging.
+   * 4. The char immediately before the terminator is NOT a digit (avoids
+   *    decimal mid-stream like `"f(x) = x * 2."` flushing before `54`).
+   * 5. The char immediately before the terminator is NOT an uppercase
+   *    ASCII letter (avoids acronym patterns like `"U.S."` / `"U."`).
+   */
+  maybeShortFlush() {
+    const stripped = this.buffer.replace(/\s+$/, "");
+    if (!stripped) return [];
+    const last = stripped[stripped.length - 1];
+    if (!SENTENCE_TERMINATORS.includes(last)) return [];
+    let terminatorCount = 0;
+    for (const c of stripped) {
+      if (SENTENCE_TERMINATORS.includes(c)) terminatorCount++;
+    }
+    if (terminatorCount !== 1) return [];
+    const wordCount = stripped.split(/\s+/).filter((w) => w.length > 0).length;
+    if (wordCount < this.minWordsForShortFlush) return [];
+    if (stripped.length >= 2) {
+      const prev = stripped[stripped.length - 2];
+      if (/\d/.test(prev) || /[A-Z]/.test(prev)) return [];
+    }
+    this.buffer = "";
+    return [stripped];
+  }
   /** Flush remaining buffer as final sentence(s). Call at end of stream. */
   flush() {
     const remaining = this.buffer.trim();
@@ -2258,6 +3691,44 @@ var PipelineHookExecutor = class {
       return transcript;
     }
   }
+  /**
+   * Run beforeLlm hook. Returns a possibly-modified messages list.
+   * Returning ``null`` from the hook means "keep the original" — the LLM
+   * call is too important to be silently vetoed.
+   * Fail-open: on exception, the original messages pass through.
+   */
+  async runBeforeLlm(messages, ctx) {
+    if (!this.hooks?.beforeLlm) return messages;
+    try {
+      const result = await this.hooks.beforeLlm(messages, ctx);
+      return result ?? messages;
+    } catch (e) {
+      getLogger().error("Pipeline hook beforeLlm threw:", e);
+      return messages;
+    }
+  }
+  /**
+   * Run afterLlm hook. Returns a possibly-modified assistant text.
+   * Returning ``null`` from the hook means "keep the original".
+   * Fail-open: on exception, the original text passes through.
+   */
+  async runAfterLlm(text, ctx) {
+    if (!this.hooks?.afterLlm) return text;
+    try {
+      const result = await this.hooks.afterLlm(text, ctx);
+      return result ?? text;
+    } catch (e) {
+      getLogger().error("Pipeline hook afterLlm threw:", e);
+      return text;
+    }
+  }
+  /**
+   * Whether ``afterLlm`` is configured. Used by the LLM loop to decide
+   * whether to buffer streaming tokens before yielding them.
+   */
+  hasAfterLlm() {
+    return Boolean(this.hooks?.afterLlm);
+  }
   /**
    * Run beforeSynthesize hook. Returns null if hook vetoes TTS for this sentence.
    * If no hook is defined, returns the text unchanged.
@@ -2286,73 +3757,453 @@ var PipelineHookExecutor = class {
   }
 };
-// src/stream-handler.ts
-function checkGuardrails(text, guardrails) {
-  if (!guardrails) return null;
-  for (const guard of guardrails) {
-    let blocked = false;
-    if (guard.blockedTerms) {
-      blocked = guard.blockedTerms.some((term) => text.toLowerCase().includes(term.toLowerCase()));
+// src/observability/event-bus.ts
+var EventBus = class {
+  listeners = /* @__PURE__ */ new Map();
+  /**
+   * Subscribe to an event type.  Returns an unsubscribe function.
+   */
+  on(event, cb) {
+    let set = this.listeners.get(event);
+    if (!set) {
+      set = /* @__PURE__ */ new Set();
+      this.listeners.set(event, set);
     }
-    if (!blocked && guard.check) {
-      blocked = guard.check(text);
+    set.add(cb);
+    return () => set.delete(cb);
+  }
+  /**
+   * Emit an event synchronously.  Async listeners are fire-and-forget with
+   * rejection logging so a badly-behaved observer never stalls the call path.
+   */
+  emit(event, payload) {
+    const set = this.listeners.get(event);
+    if (!set) return;
+    for (const cb of [...set]) {
+      try {
+        const res = cb(payload);
+        if (res && typeof res.catch === "function") {
+          res.catch(
+            (e) => getLogger().warn(`[EventBus] listener for "${event}" rejected:`, e)
+          );
+        }
+      } catch (e) {
+        getLogger().warn(`[EventBus] listener for "${event}" threw:`, e);
+      }
     }
-    if (blocked) return guard;
   }
-  return null;
+};
+// src/observability/tracing.ts
+var ENV_FLAG = "PATTER_OTEL_ENABLED";
+var SERVICE_NAME = "patter";
+var SPAN_CALL = "getpatter.call";
+var SPAN_STT = "getpatter.stt";
+var SPAN_LLM = "getpatter.llm";
+var SPAN_TTS = "getpatter.tts";
+var SPAN_TOOL = "getpatter.tool";
+var SPAN_ENDPOINT = "getpatter.endpoint";
+var SPAN_BARGEIN = "getpatter.bargein";
+var otel = null;
+var initialized = false;
+var tracerAvailable = false;
+var provider = null;
+function tryLoadOtel() {
+  if (otel !== null) return otel;
+  try {
+    const mod = __require("@opentelemetry/api");
+    otel = mod;
+    return mod;
+  } catch {
+    return null;
+  }
 }
-function sanitizeLogValue(v, maxLen = 200) {
-  const cleaned = v.replace(/[\x00-\x1f\x7f]/g, "");
-  return cleaned.length > maxLen ? cleaned.slice(0, maxLen) + "..." : cleaned;
+function trySetupSdk(options, api) {
+  try {
+    const sdkTraceNode = __require("@opentelemetry/sdk-trace-node");
+    const sdkTraceBase = __require("@opentelemetry/sdk-trace-base");
+    const otlpHttp = __require("@opentelemetry/exporter-trace-otlp-http");
+    const serviceName = options.serviceName ?? SERVICE_NAME;
+    const providerInstance = new sdkTraceNode.NodeTracerProvider({
+      resource: {
+        attributes: {
+          "service.name": serviceName,
+          ...options.resourceAttributes ?? {}
+        }
+      }
+    });
+    const endpoint = options.otlpEndpoint ?? process.env.OTEL_EXPORTER_OTLP_ENDPOINT ?? void 0;
+    const exporter = new otlpHttp.OTLPTraceExporter(
+      endpoint ? { url: `${endpoint.replace(/\/$/, "")}/v1/traces` } : void 0
+    );
+    const processor = new sdkTraceBase.BatchSpanProcessor(exporter);
+    providerInstance.addSpanProcessor?.(processor);
+    providerInstance.register?.();
+    try {
+      api.trace.setGlobalTracerProvider?.(providerInstance);
+    } catch {
+    }
+    return providerInstance;
+  } catch (e) {
+    getLogger().debug(
+      `[observability] OTel SDK wire-up skipped: ${String(e?.message ?? e)}`
+    );
+    return null;
+  }
 }
-function isValidE164(number) {
-  return /^\+[1-9]\d{6,14}$/.test(number);
+function envFlagEnabled() {
+  const raw = (process.env[ENV_FLAG] ?? "").toLowerCase();
+  return raw === "1" || raw === "true" || raw === "yes";
 }
-var StreamHandler = class {
-  deps;
-  ws;
-  caller;
-  callee;
-  // Mutable call state
-  streamSid = "";
-  callId = "";
-  adapter = null;
-  stt = null;
-  tts = null;
-  isSpeaking = false;
-  llmLoop = null;
-  chunkCount = 0;
-  callEndFired = false;
+function initTracing(options = {}) {
+  if (initialized) return tracerAvailable;
+  initialized = true;
+  if (!envFlagEnabled()) {
+    tracerAvailable = false;
+    return false;
+  }
+  const api = tryLoadOtel();
+  if (!api) {
+    getLogger().warn(
+      `${ENV_FLAG}=1 but @opentelemetry/api is not installed. Install with: npm install @opentelemetry/api @opentelemetry/sdk-trace-node @opentelemetry/sdk-trace-base @opentelemetry/exporter-trace-otlp-http`
+    );
+    tracerAvailable = false;
+    return false;
+  }
+  provider = trySetupSdk(options, api);
+  tracerAvailable = true;
+  const serviceName = options.serviceName ?? SERVICE_NAME;
+  getLogger().info(
+    `[observability] Patter OTel tracing enabled (service=${serviceName}${provider ? ", exporter=otlp-http" : ", exporter=noop"})`
+  );
+  return true;
+}
+function isTracingEnabled() {
+  return tracerAvailable && envFlagEnabled();
+}
+var NoopSpan = class {
+  setAttribute(_key, _value) {
+  }
+  recordException(_exception) {
+  }
+  end() {
+  }
+};
+var NOOP_SPAN = new NoopSpan();
+var RealSpan = class {
+  span;
+  constructor(span) {
+    this.span = span;
+  }
+  setAttribute(key, value) {
+    try {
+      this.span.setAttribute(key, value);
+    } catch {
+    }
+  }
+  recordException(exception) {
+    try {
+      this.span.recordException(exception);
+    } catch {
+    }
+  }
+  end() {
+    try {
+      this.span.end();
+    } catch {
+    }
+  }
+};
+function startSpan(name, attrs) {
+  if (!isTracingEnabled() || !otel) return NOOP_SPAN;
+  try {
+    const tracer = otel.trace.getTracer(SERVICE_NAME);
+    const rawSpan = tracer.startSpan(name, attrs ? { attributes: attrs } : void 0);
+    return new RealSpan(rawSpan);
+  } catch {
+    return NOOP_SPAN;
+  }
+}
+async function withSpan(name, attrs, fn) {
+  const span = startSpan(name, attrs);
+  try {
+    return await fn(span);
+  } catch (exc) {
+    span.recordException(exc);
+    throw exc;
+  } finally {
+    span.end();
+  }
+}
+// src/stream-handler.ts
+function checkGuardrails(text, guardrails) {
+  if (!guardrails) return null;
+  for (const guard of guardrails) {
+    let blocked = false;
+    if (guard.blockedTerms) {
+      blocked = guard.blockedTerms.some((term) => text.toLowerCase().includes(term.toLowerCase()));
+    }
+    if (!blocked && guard.check) {
+      blocked = guard.check(text);
+    }
+    if (blocked) return guard;
+  }
+  return null;
+}
+function sanitizeLogValue(v, maxLen = 200) {
+  const cleaned = v.replace(/[\x00-\x1f\x7f]/g, "");
+  return cleaned.length > maxLen ? cleaned.slice(0, maxLen) + "..." : cleaned;
+}
+function maskPhoneNumber(number) {
+  if (!number) return "***";
+  const text = String(number);
+  if (text.length <= 4) return "***";
+  return `***${text.slice(-4)}`;
+}
+function isValidE164(number) {
+  return /^\+[1-9]\d{6,14}$/.test(number);
+}
+var HALLUCINATIONS = /* @__PURE__ */ new Set([
+  "you",
+  "thank you",
+  "thanks",
+  "yeah",
+  "yes",
+  "no",
+  "okay",
+  "ok",
+  "uh",
+  "um",
+  "mmm",
+  "hmm",
+  ".",
+  "bye",
+  "right",
+  "cool"
+]);
+var StreamHandler = class {
+  deps;
+  ws;
+  caller;
+  callee;
+  // Mutable call state
+  streamSid = "";
+  callId = "";
+  adapter = null;
+  stt = null;
+  tts = null;
+  isSpeaking = false;
+  /** Set to true after a VAD error to suppress log spam for the rest of the call. */
+  vadDisabled = false;
+  /**
+   * Monotonic counter incremented on every TTS-start. The grace timer
+   * scheduled by ``endSpeakingWithGrace`` only flips ``isSpeaking=false``
+   * if the counter still matches its capture — a new turn that started in
+   * the meantime invalidates the obsolete timer instead of clobbering its
+   * own ``isSpeaking=true``.
+   */
+  speakingGeneration = 0;
+  /** Handle for the pending grace-period timer, so it can be cleared on cleanup. */
+  graceTimer = null;
+  /** Mark the start of a TTS span. Use instead of setting isSpeaking directly. */
+  beginSpeaking() {
+    this.speakingGeneration++;
+    this.isSpeaking = true;
+  }
+  /**
+   * Atomically end speaking AND invalidate any pending grace timer.
+   * Use instead of ``this.isSpeaking = false`` at barge-in sites.
+   */
+  cancelSpeaking() {
+    this.speakingGeneration++;
+    this.isSpeaking = false;
+  }
+  /** Cancel and clear the pending grace timer, if any. */
+  clearGraceTimer() {
+    if (this.graceTimer !== null) {
+      clearTimeout(this.graceTimer);
+      this.graceTimer = null;
+    }
+  }
+  /**
+   * Mark the agent as no longer producing TTS, honoring a grace period that
+   * approximates the carrier's playback buffer. The user may still hear the
+   * agent for ~1 s after we finish pushing audio (Twilio buffers ~1500 ms);
+   * keeping isSpeaking=true through that window keeps the VAD-driven
+   * barge-in armed during the audible tail. Tunable via env.
+   */
+  endSpeakingWithGrace() {
+    const grace = Number(process.env.PATTER_TTS_TAIL_GRACE_MS ?? 1500);
+    if (grace > 0) {
+      const gen = this.speakingGeneration;
+      this.clearGraceTimer();
+      this.graceTimer = setTimeout(() => {
+        this.graceTimer = null;
+        if (this.speakingGeneration === gen) this.isSpeaking = false;
+      }, grace);
+    } else {
+      this.isSpeaking = false;
+    }
+  }
+  llmLoop = null;
+  chunkCount = 0;
+  callEndFired = false;
   sttClosed = false;
   currentAgentText = "";
   responseAudioStarted = false;
   maxDurationTimer = null;
   transcriptProcessing = false;
   transcriptQueue = [];
-  // BUG #22 throttle state — mirror Python impl.
+  // Throttle state for back-to-back STT finals — see ``commitTranscript``.
   lastCommitText = "";
   lastCommitAt = 0;
+  // PCM16 byte-alignment carry for TTS streaming (pipeline mode).
+  // HTTP streams from ElevenLabs / OpenAI / Cartesia can yield chunks of any
+  // size, including odd byte counts. Silently dropping the trailing odd byte
+  // misaligns every subsequent int16 sample in the stream (hi/lo bytes get
+  // swapped), producing a voice drowned in loud hiss. We buffer the odd byte
+  // across chunks so resample/mulaw encoding always sees aligned int16 frames.
+  ttsByteCarry = null;
+  // Per-session stateful resamplers eliminate chunk-boundary discontinuities.
+  // Created lazily on first use; reset() on call end.
+  inboundResampler = createResampler8kTo16k();
+  outboundResampler = createResampler16kTo8k();
   history;
   metricsAcc;
+  _eventBus;
   constructor(deps, ws, caller, callee) {
     this.deps = deps;
     this.ws = ws;
     this.caller = caller;
     this.callee = callee;
     this.history = createHistoryManager(200);
-    const sttProviderName = deps.agent.stt ? deps.agent.stt.constructor?.name ?? "custom" : void 0;
-    const ttsProviderName = deps.agent.tts ? deps.agent.tts.constructor?.name ?? "custom" : void 0;
+    const sttKey = deps.agent.stt?.constructor?.providerKey;
+    const sttProviderName = deps.agent.stt ? sttKey ?? deps.agent.stt.constructor?.name ?? "custom" : void 0;
+    const ttsKey = deps.agent.tts?.constructor?.providerKey;
+    const ttsProviderName = deps.agent.tts ? ttsKey ?? deps.agent.tts.constructor?.name ?? "custom" : void 0;
     const providerMode = deps.agent.provider ?? "openai_realtime";
+    const llmKey = deps.agent.llm?.constructor?.providerKey;
+    let llmProviderName;
+    if (deps.agent.llm) {
+      if (llmKey) {
+        llmProviderName = llmKey;
+      } else {
+        const stripped = (deps.agent.llm.constructor?.name ?? "custom").replace(/LLMProvider$/i, "").replace(/LLM$/i, "").replace(/Provider$/i, "").toLowerCase();
+        llmProviderName = stripped || "custom";
+      }
+    } else {
+      llmProviderName = providerMode === "openai_realtime" ? "openai_realtime" : "openai";
+    }
+    this._eventBus = new EventBus();
     this.metricsAcc = new CallMetricsAccumulator({
       callId: "",
       providerMode,
       telephonyProvider: deps.bridge.telephonyProvider,
       sttProvider: sttProviderName,
       ttsProvider: ttsProviderName,
-      pricing: deps.pricing
+      llmProvider: llmProviderName,
+      pricing: deps.pricing,
+      eventBus: this._eventBus,
+      reportOnlyInitialTtfb: deps.reportOnlyInitialTtfb ?? false
     });
     getLogger().debug(`WebSocket connection opened (${deps.bridge.label})`);
   }
+  /**
+   * Record a completed turn in the dashboard store and fire the user-supplied
+   * ``onMetrics`` callback. Centralises the 4 emit sites (firstMessage, pipeline
+   * streaming/regular LLM, WebSocket remote, Realtime response_done) so the
+   * payload shape lives in one place.
+   */
+  async emitTurnMetrics(turn) {
+    if (turn == null) return;
+    this.deps.metricsStore.recordTurn({ call_id: this.callId, turn });
+    if (!this.deps.onMetrics) return;
+    const turnMetrics = turn;
+    const llm_ttft_ms = turnMetrics?.latency?.llm_ttft_ms;
+    await this.deps.onMetrics({
+      call_id: this.callId,
+      turn,
+      ...llm_ttft_ms !== void 0 ? { llm_ttft_ms } : {},
+      cost_so_far: this.metricsAcc.getCostSoFar()
+    });
+  }
+  /** Reset the TTS odd-byte carry — call at every TTS stream entry/exit. */
+  resetTtsCarry() {
+    this.ttsByteCarry = null;
+  }
+  /**
+   * Flush both stateful resamplers and any TTS byte carry on call close.
+   * Emits tail bytes through the telephony bridge so the last ~20 ms of audio
+   * is not silently clipped on hangup. No-op if the WebSocket is already gone.
+   */
+  flushResamplers() {
+    try {
+      const inTail = this.inboundResampler.flush();
+      if (inTail.length > 0 && this.stt) {
+        this.stt.sendAudio(inTail);
+      }
+    } catch {
+    }
+    try {
+      const outTail = this.outboundResampler.flush();
+      if (outTail.length > 0 && this.ws.readyState === this.ws.OPEN) {
+        const mulaw = pcm16ToMulaw(outTail);
+        this.deps.bridge.sendAudio(this.ws, mulaw.toString("base64"), this.streamSid);
+      }
+    } catch {
+    }
+    this.ttsByteCarry = null;
+  }
+  /**
+   * Start call recording when configured. Currently Twilio-only — bridges may
+   * expose ``startRecording`` for parity when we add other carriers.
+   */
+  async startRecordingIfRequested(callId) {
+    const { recording, config } = this.deps;
+    if (!recording || !config.twilioSid || !config.twilioToken || !callId) return;
+    if (!validateTwilioSid(callId)) {
+      getLogger().warn(`Recording skipped: invalid Twilio CallSid format ${JSON.stringify(callId)}`);
+      return;
+    }
+    try {
+      const recUrl = `https://api.twilio.com/2010-04-01/Accounts/${config.twilioSid}/Calls/${callId}/Recordings.json`;
+      const recResp = await fetch(recUrl, {
+        method: "POST",
+        headers: {
+          "Authorization": `Basic ${Buffer.from(`${config.twilioSid}:${config.twilioToken}`).toString("base64")}`
+        }
+      });
+      if (recResp.ok) {
+        getLogger().debug(`Recording started for ${callId}`);
+      } else {
+        getLogger().warn(`could not start recording: ${await recResp.text()}`);
+      }
+    } catch (e) {
+      getLogger().warn(`could not start recording: ${String(e)}`);
+    }
+  }
+  // ---------------------------------------------------------------------------
+  // Public: observer API
+  // ---------------------------------------------------------------------------
+  /**
+   * Subscribe to a Patter event on the per-call EventBus.
+   *
+   * The most common use-case is 'metrics_collected' — fired after every
+   * completed turn with the TurnMetrics payload.
+   *
+   * Returns an unsubscribe function; call it to stop receiving events.
+   *
+   * @example
+   * const off = handler.addObserver((payload) => {
+   *   console.log('turn metrics:', payload);
+   * });
+   * // later:
+   * off();
+   */
+  addObserver(cb, event = "metrics_collected") {
+    return this._eventBus.on(event, cb);
+  }
   // ---------------------------------------------------------------------------
   // Public: called by the provider-specific parsers in server.ts
   // ---------------------------------------------------------------------------
@@ -2377,8 +4228,7 @@ var StreamHandler = class {
     this.deps.metricsStore.recordCallStart({
       call_id: callId,
       caller: this.caller,
-      callee: this.callee,
-      direction: "inbound"
+      callee: this.callee
     });
     const MAX_CALL_DURATION_MS = 60 * 60 * 1e3;
     this.maxDurationTimer = setTimeout(async () => {
@@ -2389,52 +4239,32 @@ var StreamHandler = class {
       }
     }, MAX_CALL_DURATION_MS);
     try {
-      const { notifyDashboard } = await import("./persistence-CYIGNHSU.mjs");
+      const { notifyDashboard } = await import("./persistence-LQBYQPQQ.mjs");
       notifyDashboard({
         call_id: callId,
         caller: this.caller,
-        callee: this.callee,
-        direction: "inbound"
+        callee: this.callee
       });
     } catch {
     }
     if (this.deps.onCallStart) {
+      const direction = this.deps.metricsStore.getActive(callId)?.direction ?? "inbound";
       await this.deps.onCallStart({
         call_id: callId,
         caller: this.caller,
         callee: this.callee,
-        direction: "inbound",
+        direction,
+        telephony_provider: this.deps.bridge.telephonyProvider,
         ...Object.keys(customParams).length > 0 ? { custom_params: customParams } : {}
       });
     }
-    if (this.deps.recording && this.deps.config.twilioSid && this.deps.config.twilioToken && callId) {
-      if (!validateTwilioSid(callId)) {
-        getLogger().warn(`Recording skipped: invalid Twilio CallSid format ${JSON.stringify(callId)}`);
-      } else {
-        try {
-          const recUrl = `https://api.twilio.com/2010-04-01/Accounts/${this.deps.config.twilioSid}/Calls/${callId}/Recordings.json`;
-          const recResp = await fetch(recUrl, {
-            method: "POST",
-            headers: {
-              "Authorization": `Basic ${Buffer.from(`${this.deps.config.twilioSid}:${this.deps.config.twilioToken}`).toString("base64")}`
-            }
-          });
-          if (recResp.ok) {
-            getLogger().debug(`Recording started for ${callId}`);
-          } else {
-            getLogger().warn(`could not start recording: ${await recResp.text()}`);
-          }
-        } catch (e) {
-          getLogger().warn(`could not start recording: ${String(e)}`);
-        }
-      }
-    }
+    await this.startRecordingIfRequested(callId);
     const agentVars = this.deps.sanitizeVariables(this.deps.agent.variables ?? {});
     const safeCustomParams = this.deps.sanitizeVariables(customParams);
     const allVars = { ...agentVars, ...safeCustomParams };
     const resolvedPrompt = Object.keys(allVars).length > 0 ? this.deps.resolveVariables(this.deps.agent.systemPrompt, allVars) : this.deps.agent.systemPrompt;
-    const provider = this.deps.agent.provider ?? "openai_realtime";
-    if (provider === "pipeline") {
+    const provider2 = this.deps.agent.provider ?? "openai_realtime";
+    if (provider2 === "pipeline") {
       await this.initPipeline(resolvedPrompt);
     } else {
       await this.initRealtimeAdapter(resolvedPrompt);
@@ -2446,13 +4276,56 @@ var StreamHandler = class {
   }
   /** Handle an incoming audio chunk (already decoded from base64). */
   async handleAudio(audioBuffer) {
-    const provider = this.deps.agent.provider ?? "openai_realtime";
-    if (provider === "pipeline" && this.stt) {
-      if (this.isSpeaking && (this.deps.agent.bargeInThresholdMs ?? 300) === 0) {
-        return;
-      }
+    const provider2 = this.deps.agent.provider ?? "openai_realtime";
+    if (provider2 === "pipeline" && this.stt) {
       const pcm8k = mulawToPcm16(audioBuffer);
-      const pcm16k = resample8kTo16k(pcm8k);
+      const pcm16k = this.inboundResampler.process(pcm8k);
+      if (this.deps.agent.vad && !this.vadDisabled) {
+        try {
+          const vadPromise = this.deps.agent.vad.processFrame(pcm16k, 16e3);
+          const timeoutPromise = new Promise((resolve) => setTimeout(() => resolve(null), 25));
+          const evt = await Promise.race([vadPromise, timeoutPromise]);
+          if (evt) {
+            getLogger().info(
+              `[VAD] ${evt.type}  agentSpeaking=${this.isSpeaking}`
+            );
+          }
+          if (evt?.type === "speech_start") {
+            if (this.isSpeaking) {
+              getLogger().info("[VAD] speech_start during TTS \u2192 BARGE-IN");
+              this.metricsAcc.recordOverlapStart();
+              this.metricsAcc.recordBargeinDetected();
+              const bargeinSpan = startSpan(SPAN_BARGEIN, { "patter.call.id": this.callId });
+              try {
+                this.cancelSpeaking();
+                try {
+                  this.deps.bridge.sendClear(this.ws, this.streamSid);
+                } catch (err) {
+                  getLogger().debug(`sendClear during VAD barge-in failed: ${String(err)}`);
+                }
+                this.metricsAcc.recordTtsStopped();
+                this.metricsAcc.recordTurnInterrupted();
+                this.metricsAcc.recordOverlapEnd(true);
+              } finally {
+                try {
+                  bargeinSpan.end();
+                } catch {
+                }
+              }
+            }
+            this.metricsAcc.startTurnIfIdle();
+          } else if (evt?.type === "speech_end") {
+            this.metricsAcc.recordVadStop();
+          }
+        } catch (err) {
+          this.vadDisabled = true;
+          getLogger().warn(`VAD processFrame failed \u2014 disabling VAD for this call: ${String(err)}`);
+        }
+      }
+      if (this.isSpeaking) {
+        if (this.deps.agent.vad) return;
+        if ((this.deps.agent.bargeInThresholdMs ?? 300) === 0) return;
+      }
       const hooks = this.deps.agent.hooks;
       if (hooks) {
         const hookExecutor = new PipelineHookExecutor(hooks);
@@ -2460,13 +4333,15 @@ var StreamHandler = class {
         const processed = await hookExecutor.runBeforeSendToStt(pcm16k, hookCtx);
         if (processed === null) return;
         this.stt.sendAudio(processed);
+        this.metricsAcc.addSttAudioBytes(processed.length);
       } else {
         this.stt.sendAudio(pcm16k);
+        this.metricsAcc.addSttAudioBytes(pcm16k.length);
       }
     } else if (this.adapter) {
-      if (this.adapter instanceof ElevenLabsConvAIAdapter && this.deps.bridge.telephonyProvider === "twilio") {
+      if (this.adapter instanceof ElevenLabsConvAIAdapter && this.deps.bridge.telephonyProvider === "twilio" && this.adapter.inputAudioFormat !== "ulaw_8000") {
         const pcm8k = mulawToPcm16(audioBuffer);
-        const pcm16k = resample8kTo16k(pcm8k);
+        const pcm16k = this.inboundResampler.process(pcm8k);
         this.adapter.sendAudio(pcm16k);
       } else {
         this.adapter.sendAudio(audioBuffer);
@@ -2483,8 +4358,28 @@ var StreamHandler = class {
       await this.deps.onTranscript({ role: "user", text: `[DTMF: ${digit}]`, call_id: this.callId });
     }
   }
+  /**
+   * Last mark name Twilio has confirmed playback of. Mirrors the Python
+   * ``TwilioAudioSender.last_confirmed_mark`` field — barge-in heuristics
+   * compare this against the latest sent mark to decide whether the agent's
+   * audio has actually reached the caller yet.
+   */
+  lastConfirmedMark = "";
+  /**
+   * Handle a Twilio ``mark`` event acknowledging that a previously sent
+   * audio chunk has been played out. Mirrors Python's
+   * ``twilio_handler.py``: ``audio_sender.on_mark_confirmed(mark_name)`` +
+   * ``handler.on_mark(mark_name)``.
+   */
+  async onMark(markName) {
+    if (markName) {
+      this.lastConfirmedMark = markName;
+    }
+  }
   /** Handle call stop / stream end. */
   async handleStop() {
+    this.clearGraceTimer();
+    this.flushResamplers();
     await this.closeSttOnce();
     try {
       this.adapter?.close();
@@ -2494,6 +4389,8 @@ var StreamHandler = class {
   }
   /** Handle WebSocket close event. */
   async handleWsClose() {
+    this.clearGraceTimer();
+    this.flushResamplers();
     await this.closeSttOnce();
     try {
       this.adapter?.close();
@@ -2519,15 +4416,33 @@ var StreamHandler = class {
   // ---------------------------------------------------------------------------
   /**
    * Encode a PCM 16kHz audio chunk for the telephony provider.
-   * Twilio requires mulaw 8kHz; Telnyx accepts PCM 16kHz natively.
+   *
+   * Both Twilio and Telnyx negotiate PCMU (mulaw) 8 kHz on the bidirectional
+   * media stream — Twilio always, and Telnyx because ``streaming_start``
+   * (server.ts) requests ``stream_bidirectional_codec=PCMU`` at 8 kHz. So
+   * the wire format for both providers is mulaw 8 kHz; we resample 16 kHz
+   * PCM16 → 8 kHz then encode to mulaw. Mirrors the Python pipeline path
+   * (sdk-py/getpatter/handlers/telnyx_handler.py::TelnyxAudioSender).
+   *
+   * Maintains a 1-byte carry across calls so unaligned HTTP chunks from
+   * streaming TTS providers never byte-swap the PCM16 samples downstream.
    */
   encodePipelineAudio(pcm16k) {
-    if (this.deps.bridge.telephonyProvider === "twilio") {
-      const pcm8k = resample16kTo8k(pcm16k);
-      const mulaw = pcm16ToMulaw(pcm8k);
-      return mulaw.toString("base64");
-    }
-    return pcm16k.toString("base64");
+    const aligned = this.alignPcm16(pcm16k);
+    if (aligned.length === 0) return "";
+    const pcm8k = this.outboundResampler.process(aligned);
+    const mulaw = pcm16ToMulaw(pcm8k);
+    return mulaw.toString("base64");
+  }
+  /**
+   * Prepend any carry byte from the previous chunk, return the even-length
+   * portion, and stash the final odd byte (if any) for the next call.
+   */
+  alignPcm16(chunk) {
+    const combined = this.ttsByteCarry ? Buffer.concat([this.ttsByteCarry, chunk]) : chunk;
+    const alignedLen = combined.length & ~1;
+    this.ttsByteCarry = alignedLen < combined.length ? combined.subarray(alignedLen) : null;
+    return combined.subarray(0, alignedLen);
   }
   // ---------------------------------------------------------------------------
   // Private: Pipeline mode
@@ -2556,6 +4471,7 @@ var StreamHandler = class {
     if (this.deps.agent.firstMessage && !this.deps.onMessage && this.tts) {
       this.metricsAcc.startTurn();
       let firstChunkSent = false;
+      this.resetTtsCarry();
       try {
         for await (const chunk of this.tts.synthesizeStream(this.deps.agent.firstMessage)) {
           if (!firstChunkSent) {
@@ -2567,13 +4483,11 @@ var StreamHandler = class {
         }
       } catch (e) {
         getLogger().error(`First message TTS error (${label}):`, e);
+      } finally {
+        this.resetTtsCarry();
       }
       if (firstChunkSent) {
-        const turn = this.metricsAcc.recordTurnComplete(this.deps.agent.firstMessage);
-        if (turn) {
-          this.deps.metricsStore.recordTurn({ call_id: this.callId, turn });
-          if (this.deps.onMetrics) await this.deps.onMetrics({ call_id: this.callId, turn });
-        }
+        await this.emitTurnMetrics(this.metricsAcc.recordTurnComplete(this.deps.agent.firstMessage));
         this.history.push({ role: "assistant", text: this.deps.agent.firstMessage, timestamp: Date.now() });
       }
     }
@@ -2583,15 +4497,17 @@ var StreamHandler = class {
           "Cannot pass both agent({ llm }) and serve({ onMessage }). Pick one \u2014 `llm` for built-in LLMs, `onMessage` for custom logic."
         );
       }
+      const providerModel = this.deps.agent.llm?.model ?? "";
       this.llmLoop = new LLMLoop(
         "",
         // apiKey unused when llmProvider is supplied
-        "",
-        // model unused when llmProvider is supplied
+        providerModel,
+        // propagate so calculateLlmCost can match the price row
         resolvedPrompt,
         this.deps.agent.tools,
         this.deps.agent.llm
       );
+      this.llmLoop.setEventBus(this._eventBus);
       const llmLabel = this.deps.agent.llm.constructor?.name ?? "custom";
       getLogger().debug(`Built-in LLM loop active (pipeline, ${label}, llm=${llmLabel})`);
     } else if (!this.deps.onMessage && this.deps.config.openaiKey) {
@@ -2603,6 +4519,7 @@ var StreamHandler = class {
         resolvedPrompt,
         this.deps.agent.tools
       );
+      this.llmLoop.setEventBus(this._eventBus);
       getLogger().debug(`Built-in LLM loop active (pipeline, ${label})`);
     }
     if (this.stt) {
@@ -2632,6 +4549,7 @@ var StreamHandler = class {
     }
     const processedText = await hookExecutor.runBeforeSynthesize(transformed, hookCtx);
     if (processedText === null) return;
+    this.resetTtsCarry();
     try {
       for await (const chunk of this.tts.synthesizeStream(processedText)) {
         if (!this.isSpeaking) break;
@@ -2646,6 +4564,8 @@ var StreamHandler = class {
       }
     } catch (e) {
       getLogger().error(`TTS streaming error (${this.deps.bridge.label}):`, e);
+    } finally {
+      this.resetTtsCarry();
     }
   }
   /** Handle a final transcript from STT in pipeline mode. */
@@ -2663,63 +4583,30 @@ var StreamHandler = class {
     }
   }
   async processTranscript(transcript) {
-    if (transcript.text && this.isSpeaking) {
-      getLogger().debug(
-        `Barge-in: caller spoke over agent (${sanitizeLogValue(transcript.text.slice(0, 40))})`
-      );
-      this.isSpeaking = false;
-      try {
-        this.deps.bridge.sendClear(this.ws, this.streamSid);
-      } catch (err) {
-        getLogger().debug(`sendClear during barge-in failed: ${String(err)}`);
-      }
-      this.metricsAcc.recordTurnInterrupted();
+    let interrupted = this.handleBargeIn(transcript);
+    if (transcript.text) {
+      this.metricsAcc.startTurnIfIdle();
     }
-    if (!transcript.isFinal || !transcript.text) return;
-    const now = Date.now();
-    const normalised = transcript.text.trim().toLowerCase();
-    const stripped = normalised.replace(/[.,!?;: ]+$/, "").trim();
-    const sinceLastMs = now - this.lastCommitAt;
-    const HALLUCINATIONS = /* @__PURE__ */ new Set([
-      "you",
-      "thank you",
-      "thanks",
-      "yeah",
-      "yes",
-      "no",
-      "okay",
-      "ok",
-      "uh",
-      "um",
-      "mmm",
-      "hmm",
-      ".",
-      "bye",
-      "right",
-      "cool"
-    ]);
-    if (HALLUCINATIONS.has(stripped) || stripped === "") {
-      getLogger().debug(`Dropped likely STT hallucination: ${sanitizeLogValue(normalised.slice(0, 40))}`);
-      return;
-    }
-    if (sinceLastMs < 2e3 && normalised === this.lastCommitText) {
-      getLogger().debug(
-        `Dropped duplicate final transcript (${(sinceLastMs / 1e3).toFixed(1)}s since last): ${sanitizeLogValue(normalised.slice(0, 40))}`
-      );
-      return;
+    if (transcript.speechFinal) {
+      this.metricsAcc.recordVadStop();
     }
-    if (sinceLastMs < 500) {
-      getLogger().debug(
-        `Dropped back-to-back final transcript (${(sinceLastMs / 1e3).toFixed(2)}s since last): ${sanitizeLogValue(normalised.slice(0, 40))}`
-      );
-      return;
-    }
-    this.lastCommitText = normalised;
-    this.lastCommitAt = now;
+    if (!transcript.isFinal || !transcript.text) return;
+    if (!this.commitTranscript(transcript.text)) return;
     const label = this.deps.bridge.label;
     getLogger().debug(`User (${label} pipeline): ${sanitizeLogValue(transcript.text)}`);
-    this.metricsAcc.startTurn();
+    this.metricsAcc.startTurnIfIdle();
     this.metricsAcc.recordSttComplete(transcript.text);
+    this.metricsAcc.recordSttFinalTimestamp();
+    const endpointSpan = startSpan(SPAN_ENDPOINT, { "patter.call.id": this.callId });
+    let endpointSpanClosed = false;
+    const closeEndpointSpan = () => {
+      if (endpointSpanClosed) return;
+      endpointSpanClosed = true;
+      try {
+        endpointSpan.end();
+      } catch {
+      }
+    };
     if (this.deps.onTranscript) {
       await this.deps.onTranscript({
         role: "user",
@@ -2734,10 +4621,14 @@ var StreamHandler = class {
     if (filteredTranscript === null) {
       getLogger().debug(`afterTranscribe hook vetoed turn (${label})`);
       this.metricsAcc.recordTurnInterrupted();
+      closeEndpointSpan();
       return;
     }
     this.history.push({ role: "user", text: filteredTranscript, timestamp: Date.now() });
     let responseText = "";
+    this.metricsAcc.recordOnUserTurnCompletedDelay(0);
+    this.metricsAcc.recordTurnCommitted();
+    closeEndpointSpan();
     if (this.deps.onMessage && typeof this.deps.onMessage === "function") {
       try {
         responseText = await this.deps.onMessage({
@@ -2767,104 +4658,203 @@ var StreamHandler = class {
       if (isWebSocketUrl(this.deps.onMessage)) {
         await this.handleWebSocketResponse(msgData);
         return;
-      } else {
-        try {
-          responseText = await this.deps.remoteHandler.callWebhook(this.deps.onMessage, msgData);
-        } catch (e) {
-          getLogger().error(`Webhook remote error (${label}):`, e);
-          return;
-        }
       }
-    } else if (this.llmLoop) {
-      const callCtx = { call_id: this.callId, caller: this.caller, callee: this.callee };
-      const chunker = new SentenceChunker();
-      const allParts = [];
-      const ttsFirstByteSent = { value: false };
-      this.isSpeaking = true;
-      let llmError = false;
       try {
-        try {
-          for await (const token of this.llmLoop.run(filteredTranscript, this.history.entries, callCtx)) {
-            allParts.push(token);
-            const sentences = chunker.push(token);
-            for (const sentence of sentences) {
-              if (!this.isSpeaking) break;
-              const guard = checkGuardrails(sentence, this.deps.agent.guardrails);
-              const sentenceText = guard ? guard.replacement ?? "I'm sorry, I can't respond to that." : sentence;
-              await this.synthesizeSentence(sentenceText, hookExecutor, hookCtx, ttsFirstByteSent);
-            }
-            if (!this.isSpeaking) break;
-          }
-        } catch (e) {
-          llmError = true;
-          chunker.reset();
-          getLogger().error(`LLM loop error (${label}):`, e);
-        }
-        this.metricsAcc.recordLlmComplete();
-        if (!llmError && this.isSpeaking) {
-          for (const sentence of chunker.flush()) {
-            if (!this.isSpeaking) break;
-            const guard = checkGuardrails(sentence, this.deps.agent.guardrails);
-            const sentenceText = guard ? guard.replacement ?? "I'm sorry, I can't respond to that." : sentence;
-            await this.synthesizeSentence(sentenceText, hookExecutor, hookCtx, ttsFirstByteSent);
-          }
-        }
-      } finally {
-        this.isSpeaking = false;
+        responseText = await this.deps.remoteHandler.callWebhook(this.deps.onMessage, msgData);
+      } catch (e) {
+        getLogger().error(`Webhook remote error (${label}):`, e);
+        return;
       }
-      responseText = allParts.join("");
+    } else if (this.llmLoop) {
+      responseText = await this.runPipelineLlm(filteredTranscript, hookExecutor, hookCtx);
     } else {
       return;
     }
     if (!responseText) return;
-    if (!this.llmLoop) {
-      const guard = checkGuardrails(responseText, this.deps.agent.guardrails);
-      if (guard) {
-        getLogger().debug(`Guardrail '${guard.name}' triggered (pipeline)`);
-        responseText = guard.replacement ?? "I'm sorry, I can't respond to that.";
-      }
-      this.metricsAcc.recordLlmComplete();
+    if (this.llmLoop) {
       this.history.push({ role: "assistant", text: responseText, timestamp: Date.now() });
-      const chunker = new SentenceChunker();
-      const sentences = [...chunker.push(responseText), ...chunker.flush()];
-      const ttsFirstByteSent = { value: false };
-      let interrupted = false;
-      this.isSpeaking = true;
+      this.metricsAcc.recordTtsComplete(responseText);
+    } else {
+      interrupted = await this.runRegularLlm(responseText, hookExecutor, hookCtx) || interrupted;
+      responseText = this.history.entries[this.history.entries.length - 1]?.text ?? responseText;
+    }
+    if (!interrupted) {
+      await this.emitTurnMetrics(this.metricsAcc.recordTurnComplete(responseText));
+    }
+  }
+  /**
+   * Barge-in: caller spoke over in-flight TTS. Flip ``isSpeaking`` so the
+   * sentence loop exits on its next check, clear downstream audio buffers,
+   * record the interruption, and return ``true`` so the caller skips the
+   * turn-complete record.
+   */
+  handleBargeIn(transcript) {
+    if (!transcript.text || !this.isSpeaking) return false;
+    getLogger().debug(
+      `Barge-in: caller spoke over agent (${sanitizeLogValue(transcript.text.slice(0, 40))})`
+    );
+    this.metricsAcc.recordOverlapStart();
+    this.metricsAcc.recordBargeinDetected();
+    const bargeinSpan = startSpan(SPAN_BARGEIN, { "patter.call.id": this.callId });
+    try {
+      this.cancelSpeaking();
+      try {
+        this.deps.bridge.sendClear(this.ws, this.streamSid);
+      } catch (err) {
+        getLogger().debug(`sendClear during barge-in failed: ${String(err)}`);
+      }
+      this.metricsAcc.recordTtsStopped();
+      this.metricsAcc.recordTurnInterrupted();
+      this.metricsAcc.recordOverlapEnd(true);
+    } finally {
       try {
-        for (const sentence of sentences) {
-          if (!this.isSpeaking) {
-            interrupted = true;
-            break;
+        bargeinSpan.end();
+      } catch {
+      }
+    }
+    return true;
+  }
+  /**
+   * Dedup + throttle + hallucination filter for final STT transcripts.
+   * Mirrors ``PipelineStreamHandler._stt_loop`` on the Python side.
+   * Returns ``true`` when the transcript should be committed to a turn,
+   * ``false`` when it must be dropped. Drop reasons:
+   *   - text matches common short hallucinations ("you", "thanks", ...)
+   *   - duplicate final within 2 s of previous commit
+   *   - back-to-back finals under 500 ms (too tight to be real utterances)
+   */
+  commitTranscript(text) {
+    const now = Date.now();
+    const normalised = text.trim().toLowerCase();
+    const stripped = normalised.replace(/[.,!?;: ]+$/, "").trim();
+    const sinceLastMs = now - this.lastCommitAt;
+    if (HALLUCINATIONS.has(stripped) || stripped === "") {
+      getLogger().debug(`Dropped likely STT hallucination: ${sanitizeLogValue(normalised.slice(0, 40))}`);
+      return false;
+    }
+    if (sinceLastMs < 2e3 && normalised === this.lastCommitText) {
+      getLogger().debug(
+        `Dropped duplicate final transcript (${(sinceLastMs / 1e3).toFixed(1)}s since last): ${sanitizeLogValue(normalised.slice(0, 40))}`
+      );
+      return false;
+    }
+    if (sinceLastMs < 500) {
+      getLogger().debug(
+        `Dropped back-to-back final transcript (${(sinceLastMs / 1e3).toFixed(2)}s since last): ${sanitizeLogValue(normalised.slice(0, 40))}`
+      );
+      return false;
+    }
+    this.lastCommitText = normalised;
+    this.lastCommitAt = now;
+    return true;
+  }
+  /**
+   * Streaming built-in LLM path with sentence chunking and per-sentence
+   * guardrails/TTS. Returns the concatenated response text.
+   */
+  async runPipelineLlm(filteredTranscript, hookExecutor, hookCtx) {
+    const label = this.deps.bridge.label;
+    const callCtx = { call_id: this.callId, caller: this.caller, callee: this.callee };
+    const chunker = new SentenceChunker();
+    const allParts = [];
+    const ttsFirstByteSent = { value: false };
+    this.beginSpeaking();
+    let llmError = false;
+    const llmSpan = startSpan(SPAN_LLM, { "patter.call.id": this.callId });
+    const guardAndSpeak = async (sentence, isFirst) => {
+      if (isFirst) this.metricsAcc.recordLlmFirstSentenceComplete();
+      const guard = checkGuardrails(sentence, this.deps.agent.guardrails);
+      const sentenceText = guard ? guard.replacement ?? "I'm sorry, I can't respond to that." : sentence;
+      await this.synthesizeSentence(sentenceText, hookExecutor, hookCtx, ttsFirstByteSent);
+    };
+    let firstSentenceEmitted = false;
+    try {
+      try {
+        for await (const token of this.llmLoop.run(
+          filteredTranscript,
+          this.history.entries,
+          callCtx,
+          this.metricsAcc,
+          hookExecutor,
+          hookCtx
+        )) {
+          this.metricsAcc.recordLlmFirstToken();
+          allParts.push(token);
+          for (const sentence of chunker.push(token)) {
+            if (!this.isSpeaking) break;
+            await guardAndSpeak(sentence, !firstSentenceEmitted);
+            firstSentenceEmitted = true;
           }
-          await this.synthesizeSentence(sentence, hookExecutor, hookCtx, ttsFirstByteSent);
+          if (!this.isSpeaking) break;
         }
-      } finally {
-        this.isSpeaking = false;
+      } catch (e) {
+        llmError = true;
+        chunker.reset();
+        getLogger().error(`LLM loop error (${label}):`, e);
+        this.metricsAcc.recordTurnInterrupted();
       }
-      if (!interrupted) {
-        this.metricsAcc.recordTtsComplete(responseText);
+      this.metricsAcc.recordLlmComplete();
+      if (!llmError && this.isSpeaking) {
+        for (const sentence of chunker.flush()) {
+          if (!this.isSpeaking) break;
+          await guardAndSpeak(sentence, !firstSentenceEmitted);
+          firstSentenceEmitted = true;
+        }
       }
-    } else {
-      this.history.push({ role: "assistant", text: responseText, timestamp: Date.now() });
-      this.metricsAcc.recordTtsComplete(responseText);
+    } finally {
+      this.endSpeakingWithGrace();
+      try {
+        llmSpan.end();
+      } catch {
+      }
+    }
+    return allParts.join("");
+  }
+  /**
+   * Non-streaming path (onMessage function / webhook): apply output guardrails,
+   * push to history, sentence-chunk the text, synthesize. Returns ``true`` if
+   * TTS was interrupted mid-flight so the caller can skip turn-complete.
+   */
+  async runRegularLlm(responseText, hookExecutor, hookCtx) {
+    const guard = checkGuardrails(responseText, this.deps.agent.guardrails);
+    let text = responseText;
+    if (guard) {
+      getLogger().debug(`Guardrail '${guard.name}' triggered (pipeline)`);
+      text = guard.replacement ?? "I'm sorry, I can't respond to that.";
     }
-    const turn = this.metricsAcc.recordTurnComplete(responseText);
-    if (turn) {
-      this.deps.metricsStore.recordTurn({ call_id: this.callId, turn });
-      if (this.deps.onMetrics) await this.deps.onMetrics({ call_id: this.callId, turn });
+    this.metricsAcc.recordLlmComplete();
+    this.history.push({ role: "assistant", text, timestamp: Date.now() });
+    const chunker = new SentenceChunker();
+    const sentences = [...chunker.push(text), ...chunker.flush()];
+    const ttsFirstByteSent = { value: false };
+    this.beginSpeaking();
+    let interrupted = false;
+    try {
+      for (const sentence of sentences) {
+        if (!this.isSpeaking) {
+          interrupted = true;
+          break;
+        }
+        await this.synthesizeSentence(sentence, hookExecutor, hookCtx, ttsFirstByteSent);
+      }
+    } finally {
+      this.endSpeakingWithGrace();
     }
+    if (!interrupted) this.metricsAcc.recordTtsComplete(text);
+    return interrupted;
   }
   /** Handle streaming WebSocket remote response with TTS. */
   async handleWebSocketResponse(msgData) {
     const onMessage = this.deps.onMessage;
     const parts = [];
     this.metricsAcc.recordLlmComplete();
-    this.isSpeaking = true;
+    this.beginSpeaking();
     let wsTtsStarted = false;
     try {
       for await (const chunk of this.deps.remoteHandler.callWebSocket(onMessage, msgData)) {
         parts.push(chunk);
         if (this.tts) {
+          this.resetTtsCarry();
           for await (const audioChunk of this.tts.synthesizeStream(chunk)) {
             if (!this.isSpeaking) break;
             if (!wsTtsStarted) {
@@ -2879,15 +4869,12 @@ var StreamHandler = class {
     } catch (e) {
       getLogger().error(`WebSocket remote error (${this.deps.bridge.label}):`, e);
     } finally {
-      this.isSpeaking = false;
+      this.endSpeakingWithGrace();
+      this.resetTtsCarry();
     }
     const responseText = parts.join("");
     this.metricsAcc.recordTtsComplete(responseText);
-    const turn = this.metricsAcc.recordTurnComplete(responseText);
-    if (turn) {
-      this.deps.metricsStore.recordTurn({ call_id: this.callId, turn });
-      if (this.deps.onMetrics) await this.deps.onMetrics({ call_id: this.callId, turn });
-    }
+    await this.emitTurnMetrics(this.metricsAcc.recordTurnComplete(responseText));
     if (responseText) this.history.push({ role: "assistant", text: responseText, timestamp: Date.now() });
   }
   // ---------------------------------------------------------------------------
@@ -2917,91 +4904,97 @@ var StreamHandler = class {
       try {
         await this.handleAdapterEvent(type, eventData);
       } catch (err) {
-        getLogger().error(`Adapter event handler error (${label}):`, err);
-      }
-    });
-  }
-  async handleAdapterEvent(type, eventData) {
-    if (type === "audio") {
-      if (!this.responseAudioStarted) {
-        this.responseAudioStarted = true;
-        if (this.metricsAcc.turnActive === false) {
-          this.metricsAcc.startTurn();
-        }
-        this.metricsAcc.recordTtsFirstByte();
-      }
-      let outAudio = eventData;
-      if (this.deps.bridge.telephonyProvider === "telnyx") {
-        outAudio = resample8kTo16k(mulawToPcm16(outAudio));
-      }
-      const encoded = outAudio.toString("base64");
-      this.deps.bridge.sendAudio(this.ws, encoded, this.streamSid);
-      this.chunkCount++;
-      this.deps.bridge.sendMark(this.ws, `audio_${this.chunkCount}`, this.streamSid);
-    } else if (type === "transcript_input") {
-      const inputText = eventData;
-      getLogger().debug(`User (${this.deps.bridge.label}): ${sanitizeLogValue(inputText)}`);
-      this.history.push({ role: "user", text: inputText, timestamp: Date.now() });
-      this.metricsAcc.startTurn();
-      this.currentAgentText = "";
-      this.responseAudioStarted = false;
-      if (this.deps.onTranscript) {
-        await this.deps.onTranscript({
-          role: "user",
-          text: inputText,
-          call_id: this.callId,
-          history: [...this.history.entries]
-        });
-      }
-    } else if (type === "transcript_output") {
-      const outputText = eventData;
-      if (outputText) {
-        const triggered = checkGuardrails(outputText, this.deps.agent.guardrails);
-        if (triggered) {
-          getLogger().debug(`Guardrail '${triggered.name}' triggered`);
-          if (this.adapter instanceof OpenAIRealtimeAdapter) {
-            this.adapter.cancelResponse();
-            await this.adapter.sendText(triggered.replacement ?? "I'm sorry, I can't respond to that.");
-          }
-        }
-        this.currentAgentText += outputText;
-      }
-    } else if (type === "response_done") {
-      const responseData = eventData;
-      if (responseData) {
-        const usage = responseData.usage;
-        if (usage) {
-          this.metricsAcc.recordRealtimeUsage(usage);
-        }
-      }
-      if (this.currentAgentText) {
-        this.history.push({ role: "assistant", text: this.currentAgentText, timestamp: Date.now() });
-        const turn = this.metricsAcc.recordTurnComplete(this.currentAgentText);
-        this.responseAudioStarted = false;
-        if (this.deps.onMetrics) {
-          await this.deps.onMetrics({
-            call_id: this.callId,
-            turn
-          });
-        }
-        this.deps.metricsStore.recordTurn({ call_id: this.callId, turn });
-        this.currentAgentText = "";
-      } else {
-        this.metricsAcc.recordTurnInterrupted();
-        this.responseAudioStarted = false;
+        getLogger().error(`Adapter event handler error (${label}):`, err);
+      }
+    });
+  }
+  async handleAdapterEvent(type, eventData) {
+    const handler = this.adapterEventHandlers[type];
+    if (handler) await handler(eventData);
+  }
+  /** Event-type → handler dispatch table for the Realtime adapter. */
+  adapterEventHandlers = {
+    audio: async (eventData) => this.onAdapterAudio(eventData),
+    speech_stopped: async () => this.onAdapterSpeechStopped(),
+    transcript_input: async (eventData) => this.onAdapterTranscriptInput(eventData),
+    transcript_output: async (eventData) => this.onAdapterTranscriptOutput(eventData),
+    response_done: async (eventData) => this.onAdapterResponseDone(eventData),
+    speech_started: async () => this.onAdapterSpeechInterrupt(),
+    interruption: async () => this.onAdapterSpeechInterrupt(),
+    function_call: async (eventData) => {
+      if (this.adapter instanceof OpenAIRealtimeAdapter) {
+        await this.handleFunctionCall(eventData);
       }
-    } else if (type === "speech_started" || type === "interruption") {
-      this.deps.bridge.sendClear(this.ws, this.streamSid);
+    }
+  };
+  async onAdapterAudio(eventData) {
+    if (!this.responseAudioStarted) {
+      this.responseAudioStarted = true;
+      if (this.metricsAcc.turnActive === false) this.metricsAcc.startTurn();
+      this.metricsAcc.recordTtsFirstByte();
+    }
+    const outAudio = eventData;
+    this.deps.bridge.sendAudio(this.ws, outAudio.toString("base64"), this.streamSid);
+    this.chunkCount++;
+    this.deps.bridge.sendMark(this.ws, `audio_${this.chunkCount}`, this.streamSid);
+  }
+  onAdapterSpeechStopped() {
+    if (!this.metricsAcc.turnActive) this.metricsAcc.startTurn();
+    this.currentAgentText = "";
+    this.responseAudioStarted = false;
+  }
+  async onAdapterTranscriptInput(inputText) {
+    getLogger().debug(`User (${this.deps.bridge.label}): ${sanitizeLogValue(inputText)}`);
+    this.history.push({ role: "user", text: inputText, timestamp: Date.now() });
+    if (!this.metricsAcc.turnActive) {
+      this.metricsAcc.startTurn();
+      this.currentAgentText = "";
+      this.responseAudioStarted = false;
+    }
+    this.metricsAcc.recordSttComplete(inputText);
+    if (this.deps.onTranscript) {
+      await this.deps.onTranscript({
+        role: "user",
+        text: inputText,
+        call_id: this.callId,
+        history: [...this.history.entries]
+      });
+    }
+  }
+  async onAdapterTranscriptOutput(outputText) {
+    if (!outputText) return;
+    const triggered = checkGuardrails(outputText, this.deps.agent.guardrails);
+    if (triggered) {
+      getLogger().debug(`Guardrail '${triggered.name}' triggered`);
       if (this.adapter instanceof OpenAIRealtimeAdapter) {
         this.adapter.cancelResponse();
+        await this.adapter.sendText(triggered.replacement ?? "I'm sorry, I can't respond to that.");
       }
-      this.metricsAcc.recordTurnInterrupted();
+    }
+    this.currentAgentText += outputText;
+  }
+  async onAdapterResponseDone(responseData) {
+    if (responseData) {
+      const usage = responseData.usage;
+      if (usage) this.metricsAcc.recordRealtimeUsage(usage);
+    }
+    if (this.currentAgentText) {
+      this.history.push({ role: "assistant", text: this.currentAgentText, timestamp: Date.now() });
+      this.responseAudioStarted = false;
+      await this.emitTurnMetrics(this.metricsAcc.recordTurnComplete(this.currentAgentText));
       this.currentAgentText = "";
+    } else {
+      this.metricsAcc.recordTurnInterrupted();
       this.responseAudioStarted = false;
-    } else if (type === "function_call" && this.adapter instanceof OpenAIRealtimeAdapter) {
-      await this.handleFunctionCall(eventData);
     }
   }
+  onAdapterSpeechInterrupt() {
+    this.deps.bridge.sendClear(this.ws, this.streamSid);
+    if (this.adapter instanceof OpenAIRealtimeAdapter) this.adapter.cancelResponse();
+    this.metricsAcc.recordTurnInterrupted();
+    this.currentAgentText = "";
+    this.responseAudioStarted = false;
+  }
   async handleFunctionCall(fc) {
     const adapter = this.adapter;
     if (fc.name === "transfer_call") {
@@ -3095,7 +5088,7 @@ var StreamHandler = class {
       finalMetrics
     );
     try {
-      const { notifyDashboard } = await import("./persistence-CYIGNHSU.mjs");
+      const { notifyDashboard } = await import("./persistence-LQBYQPQQ.mjs");
       notifyDashboard(callEndData);
     } catch {
     }
@@ -3135,6 +5128,279 @@ async function queryDeepgramCost(metricsAcc, deepgramKey, deepgramRequestId) {
   }
 }
+// src/services/call-log.ts
+import * as crypto3 from "crypto";
+import * as fs2 from "fs";
+import { promises as fsp } from "fs";
+import * as os from "os";
+import * as path2 from "path";
+var SCHEMA_VERSION = "1.0";
+var DEFAULT_RETENTION_DAYS = 30;
+function xdgDataHome() {
+  return process.env.XDG_DATA_HOME || path2.join(os.homedir(), ".local", "share");
+}
+function platformDefaultRoot() {
+  if (process.platform === "darwin") {
+    return path2.join(os.homedir(), "Library", "Application Support", "patter");
+  }
+  if (process.platform === "win32") {
+    const localAppData = process.env.LOCALAPPDATA;
+    if (localAppData) return path2.join(localAppData, "patter");
+    return path2.join(os.homedir(), "AppData", "Local", "patter");
+  }
+  return path2.join(xdgDataHome(), "patter");
+}
+function resolveLogRoot(explicit) {
+  const value = explicit ?? process.env.PATTER_LOG_DIR;
+  if (!value) return null;
+  if (value.trim().toLowerCase() === "auto") return platformDefaultRoot();
+  if (value.startsWith("~")) return path2.join(os.homedir(), value.slice(1));
+  return value;
+}
+function retentionDays() {
+  const raw = process.env.PATTER_LOG_RETENTION_DAYS;
+  if (raw === void 0) return DEFAULT_RETENTION_DAYS;
+  const parsed = Number.parseInt(raw, 10);
+  if (Number.isNaN(parsed)) return DEFAULT_RETENTION_DAYS;
+  return Math.max(0, parsed);
+}
+function redactMode() {
+  const raw = (process.env.PATTER_LOG_REDACT_PHONE || "mask").trim().toLowerCase();
+  if (raw === "full" || raw === "mask" || raw === "hash_only") return raw;
+  return "mask";
+}
+function redactPhone(raw) {
+  if (!raw) return "";
+  const mode = redactMode();
+  if (mode === "full") return raw;
+  if (mode === "hash_only") {
+    return "sha256:" + crypto3.createHash("sha256").update(raw, "utf8").digest("hex").slice(0, 16);
+  }
+  return maskPhoneNumber(raw);
+}
+function utcIso(tsSeconds) {
+  const ms = tsSeconds !== void 0 ? tsSeconds * 1e3 : Date.now();
+  return new Date(ms).toISOString();
+}
+async function atomicWriteJson(filePath, payload) {
+  const dir = path2.dirname(filePath);
+  await fsp.mkdir(dir, { recursive: true });
+  const tmp = path2.join(dir, `.tmp.${process.pid}.${crypto3.randomBytes(4).toString("hex")}.json`);
+  try {
+    const handle = await fsp.open(tmp, "w");
+    try {
+      await handle.writeFile(JSON.stringify(payload, null, 2) + "\n", { encoding: "utf8" });
+      await handle.sync();
+    } finally {
+      await handle.close();
+    }
+    await fsp.rename(tmp, filePath);
+  } catch (err) {
+    try {
+      await fsp.unlink(tmp);
+    } catch {
+    }
+    throw err;
+  }
+}
+async function appendJsonl(filePath, record) {
+  await fsp.mkdir(path2.dirname(filePath), { recursive: true });
+  await fsp.appendFile(filePath, JSON.stringify(record) + "\n", { encoding: "utf8" });
+}
+var CallLogger = class {
+  root;
+  constructor(root) {
+    if (!root) {
+      this.root = null;
+      return;
+    }
+    const resolved = root.startsWith("~") ? path2.join(os.homedir(), root.slice(1)) : root;
+    try {
+      fs2.mkdirSync(resolved, { recursive: true });
+      this.root = resolved;
+      getLogger().info(`Call logs: ${resolved}`);
+    } catch (err) {
+      getLogger().warn(
+        `Could not create call log root ${resolved}: ${sanitizeLogValue(String(err))}`
+      );
+      this.root = null;
+    }
+  }
+  get enabled() {
+    return this.root !== null;
+  }
+  callDir(callId, startedAtSeconds) {
+    if (this.root === null) return null;
+    const ms = startedAtSeconds !== void 0 ? startedAtSeconds * 1e3 : Date.now();
+    const dt = new Date(ms);
+    const year = String(dt.getUTCFullYear()).padStart(4, "0");
+    const month = String(dt.getUTCMonth() + 1).padStart(2, "0");
+    const day = String(dt.getUTCDate()).padStart(2, "0");
+    const safeId = sanitizeLogValue(callId, 64).replace(/\//g, "_") || "unknown";
+    return path2.join(this.root, "calls", year, month, day, safeId);
+  }
+  async logCallStart(callId, input = {}) {
+    if (!this.enabled) return;
+    const startedAt = Date.now() / 1e3;
+    const dir = this.callDir(callId, startedAt);
+    if (dir === null) return;
+    const metadata = {
+      schema_version: SCHEMA_VERSION,
+      call_id: callId,
+      trace_id: input.traceId ?? null,
+      started_at: utcIso(startedAt),
+      ended_at: null,
+      duration_ms: null,
+      status: "in_progress",
+      caller: redactPhone(input.caller ?? ""),
+      callee: redactPhone(input.callee ?? ""),
+      telephony_provider: input.telephonyProvider ?? "",
+      provider_mode: input.providerMode ?? "",
+      agent: input.agent ?? {},
+      turns: 0,
+      cost: null,
+      latency: null,
+      error: null
+    };
+    try {
+      await atomicWriteJson(path2.join(dir, "metadata.json"), metadata);
+    } catch (err) {
+      getLogger().warn(`call_log write failed (${sanitizeLogValue(callId)}): ${sanitizeLogValue(String(err))}`);
+    }
+    if (crypto3.randomBytes(1)[0] < 5) {
+      this.sweepOldDays();
+    }
+  }
+  async logTurn(callId, turn) {
+    if (!this.enabled) return;
+    const dir = this.callDir(callId);
+    if (dir === null) return;
+    const record = {
+      schema_version: SCHEMA_VERSION,
+      ts: utcIso(typeof turn.timestamp === "number" ? turn.timestamp : void 0),
+      ...turn
+    };
+    try {
+      await appendJsonl(path2.join(dir, "transcript.jsonl"), record);
+    } catch (err) {
+      getLogger().warn(
+        `call_log turn write failed (${sanitizeLogValue(callId)}): ${sanitizeLogValue(String(err))}`
+      );
+    }
+  }
+  async logEvent(callId, eventType, payload = {}) {
+    if (!this.enabled) return;
+    const dir = this.callDir(callId);
+    if (dir === null) return;
+    const record = {
+      schema_version: SCHEMA_VERSION,
+      ts: utcIso(),
+      type: eventType,
+      data: payload
+    };
+    try {
+      await appendJsonl(path2.join(dir, "events.jsonl"), record);
+    } catch (err) {
+      getLogger().warn(
+        `call_log event write failed (${sanitizeLogValue(callId)}): ${sanitizeLogValue(String(err))}`
+      );
+    }
+  }
+  async logCallEnd(callId, input = {}) {
+    if (!this.enabled) return;
+    const dir = this.callDir(callId);
+    if (dir === null) return;
+    const metadataPath = path2.join(dir, "metadata.json");
+    let existing = {};
+    try {
+      existing = JSON.parse(await fsp.readFile(metadataPath, "utf8"));
+    } catch {
+      existing = {
+        schema_version: SCHEMA_VERSION,
+        call_id: callId,
+        started_at: null
+      };
+    }
+    const merged = {
+      ...existing,
+      ended_at: utcIso(),
+      duration_ms: input.durationSeconds !== void 0 ? Math.round(input.durationSeconds * 1e3 * 10) / 10 : null,
+      status: input.status ?? "completed",
+      turns: input.turns ?? null,
+      cost: input.cost ?? null,
+      latency: input.latency ?? null,
+      error: input.error ?? null
+    };
+    try {
+      await atomicWriteJson(metadataPath, merged);
+    } catch (err) {
+      getLogger().warn(
+        `call_log finalize failed (${sanitizeLogValue(callId)}): ${sanitizeLogValue(String(err))}`
+      );
+    }
+  }
+  // --- Retention ---------------------------------------------------------
+  sweepOldDays() {
+    if (this.root === null) return;
+    const days = retentionDays();
+    if (days === 0) return;
+    const cutoff = Date.now() / 1e3 - days * 86400;
+    const callsRoot = path2.join(this.root, "calls");
+    if (!fs2.existsSync(callsRoot)) return;
+    try {
+      for (const yearName of fs2.readdirSync(callsRoot)) {
+        if (!/^\d+$/.test(yearName)) continue;
+        const yearDir = path2.join(callsRoot, yearName);
+        if (!fs2.statSync(yearDir).isDirectory()) continue;
+        for (const monthName of fs2.readdirSync(yearDir)) {
+          if (!/^\d+$/.test(monthName)) continue;
+          const monthDir = path2.join(yearDir, monthName);
+          if (!fs2.statSync(monthDir).isDirectory()) continue;
+          for (const dayName of fs2.readdirSync(monthDir)) {
+            if (!/^\d+$/.test(dayName)) continue;
+            const dayDir = path2.join(monthDir, dayName);
+            const y = Number.parseInt(yearName, 10);
+            const m = Number.parseInt(monthName, 10);
+            const d = Number.parseInt(dayName, 10);
+            const ts = Date.UTC(y, m - 1, d) / 1e3;
+            if (ts < cutoff) {
+              rmTree(dayDir);
+            }
+          }
+          try {
+            if (fs2.readdirSync(monthDir).length === 0) fs2.rmdirSync(monthDir);
+          } catch {
+          }
+        }
+        try {
+          if (fs2.readdirSync(yearDir).length === 0) fs2.rmdirSync(yearDir);
+        } catch {
+        }
+      }
+    } catch (err) {
+      getLogger().debug(`call_log sweep failed: ${sanitizeLogValue(String(err))}`);
+    }
+  }
+};
+function rmTree(target) {
+  try {
+    for (const child of fs2.readdirSync(target)) {
+      const childPath = path2.join(target, child);
+      const stat = fs2.lstatSync(childPath);
+      if (stat.isDirectory()) {
+        rmTree(childPath);
+      } else {
+        try {
+          fs2.unlinkSync(childPath);
+        } catch {
+        }
+      }
+    }
+    fs2.rmdirSync(target);
+  } catch {
+  }
+}
 // src/server.ts
 var TRANSFER_CALL_TOOL = {
   name: "transfer_call",
@@ -3171,37 +5437,76 @@ function validateWebhookUrl(url) {
   if (!["http:", "https:"].includes(parsed.protocol)) {
     throw new Error(`Invalid webhook URL scheme: ${parsed.protocol}`);
   }
-  const hostname = parsed.hostname;
-  const blocked = [
-    /^127\./,
-    /^10\./,
-    /^172\.(1[6-9]|2\d|3[01])\./,
-    /^192\.168\./,
-    /^169\.254\./,
-    /^0\./,
-    /^::1$/,
-    /^localhost$/i,
-    /^metadata\.google\.internal$/i
-  ];
-  if (blocked.some((re) => re.test(hostname))) {
-    throw new Error(`Webhook URL blocked: ${hostname} is a private/internal address`);
+  const rawHost = parsed.hostname;
+  const host = rawHost.replace(/^\[/, "").replace(/\]$/, "").toLowerCase();
+  const BLOCKED_HOSTNAMES = /* @__PURE__ */ new Set([
+    "localhost",
+    "ip6-localhost",
+    "ip6-loopback",
+    "metadata",
+    "metadata.google.internal",
+    "metadata.azure.com"
+  ]);
+  if (BLOCKED_HOSTNAMES.has(host)) {
+    throw new Error(`Webhook URL blocked: ${rawHost} is a private/internal address`);
+  }
+  const IPV4_RE = /^(\d{1,3})\.(\d{1,3})\.(\d{1,3})\.(\d{1,3})$/;
+  const v4 = IPV4_RE.exec(host);
+  if (v4) {
+    const oct = v4.slice(1, 5).map((s) => parseInt(s, 10));
+    if (oct.some((n) => n < 0 || n > 255)) {
+      throw new Error(`Webhook URL blocked: ${rawHost} is not a valid IPv4 address`);
+    }
+    const [a, b] = oct;
+    if (a === 0 || // 0.0.0.0/8 (any 0.x)
+    a === 10 || // 10.0.0.0/8
+    a === 127 || // 127.0.0.0/8 loopback
+    a === 169 && b === 254 || // 169.254.0.0/16 link-local
+    a === 172 && b >= 16 && b <= 31 || // 172.16.0.0/12
+    a === 192 && b === 168) {
+      throw new Error(`Webhook URL blocked: ${rawHost} is a private/internal address`);
+    }
+    return;
+  }
+  if (host.includes(":")) {
+    if (host === "::1" || host === "::") {
+      throw new Error(`Webhook URL blocked: ${rawHost} is a private/internal address`);
+    }
+    if (/^fc[0-9a-f]{0,2}:/.test(host) || /^fd[0-9a-f]{0,2}:/.test(host)) {
+      throw new Error(`Webhook URL blocked: ${rawHost} is a private/internal address`);
+    }
+    if (/^fe[89ab][0-9a-f]?:/.test(host)) {
+      throw new Error(`Webhook URL blocked: ${rawHost} is a private/internal address`);
+    }
   }
 }
 function validateTelnyxSignature(rawBody, signature, timestamp, publicKey, toleranceSec = 300) {
   try {
     const ts = parseInt(timestamp, 10);
     if (!Number.isFinite(ts)) return false;
-    const ageMs = Date.now() - ts;
+    const tsMs = ts < 1e12 ? ts * 1e3 : ts;
+    const ageMs = Date.now() - tsMs;
     if (ageMs < 0 || ageMs > toleranceSec * 1e3) return false;
     const payload = `${timestamp}|${rawBody}`;
     const keyBuffer = Buffer.from(publicKey, "base64");
-    const sigBuffer = Buffer.from(signature, "base64");
-    const keyObject = crypto3.createPublicKey({
+    const keyObject = crypto4.createPublicKey({
       key: keyBuffer,
       format: "der",
       type: "spki"
     });
-    return crypto3.verify(null, Buffer.from(payload), keyObject, sigBuffer);
+    for (const rawSig of signature.split(",")) {
+      const trimmed = rawSig.trim();
+      if (!trimmed) continue;
+      try {
+        const sigBuffer = Buffer.from(trimmed, "base64");
+        if (crypto4.verify(null, Buffer.from(payload), keyObject, sigBuffer)) {
+          return true;
+        }
+      } catch {
+        continue;
+      }
+    }
+    return false;
   } catch {
     return false;
   }
@@ -3211,9 +5516,12 @@ function validateTwilioSid(sid, prefix = "CA") {
 }
 function validateTwilioSignature(url, params, signature, authToken) {
   const data = url + Object.keys(params).sort().reduce((acc, key) => acc + key + (params[key] ?? ""), "");
-  const expected = crypto3.createHmac("sha1", authToken).update(data).digest("base64");
+  const expected = crypto4.createHmac("sha1", authToken).update(data).digest("base64");
   try {
-    return crypto3.timingSafeEqual(Buffer.from(signature), Buffer.from(expected));
+    const sigBuf = Buffer.from(signature);
+    const expBuf = Buffer.from(expected);
+    if (sigBuf.length !== expBuf.length) return false;
+    return crypto4.timingSafeEqual(sigBuf, expBuf);
   } catch {
     return false;
   }
@@ -3247,8 +5555,6 @@ function buildAIAdapter(config, agent, resolvedPrompt) {
       engine.apiKey,
       engine.agentId,
       agent.voice ?? "EXAVITQu4vr4xnSDxMaL",
-      "eleven_turbo_v2_5",
-      agent.language ?? "en",
       agent.firstMessage ?? ""
     );
   }
@@ -3353,7 +5659,7 @@ function isValidTelnyxTransferTarget(target) {
   if (/^\+[1-9]\d{6,14}$/.test(target)) return true;
   return /^sips?:[^\s@]+(@[^\s]+)?$/i.test(target);
 }
-var TELNYX_DTMF_ALLOWED = new Set("0123456789*#ABCDabcd");
+var TELNYX_DTMF_ALLOWED = new Set("0123456789*#ABCDabcdwW");
 var TELNYX_DTMF_DURATION_MS = 250;
 async function sleep(ms) {
   if (ms <= 0) return;
@@ -3379,7 +5685,7 @@ var TelnyxBridge = class {
       return;
     }
     const telnyxKey = this.config.telnyxKey ?? "";
-    await fetch(`https://api.telnyx.com/v2/calls/${callId}/actions/transfer`, {
+    await fetch(`https://api.telnyx.com/v2/calls/${encodeURIComponent(callId)}/actions/transfer`, {
       method: "POST",
       headers: { "Content-Type": "application/json", "Authorization": `Bearer ${telnyxKey}` },
       body: JSON.stringify({ to: toNumber })
@@ -3403,7 +5709,7 @@ var TelnyxBridge = class {
     }
     const duration = Math.max(100, Math.min(500, TELNYX_DTMF_DURATION_MS));
     for (let i = 0; i < filtered.length; i += 1) {
-      await fetch(`https://api.telnyx.com/v2/calls/${callId}/actions/send_dtmf`, {
+      await fetch(`https://api.telnyx.com/v2/calls/${encodeURIComponent(callId)}/actions/send_dtmf`, {
         method: "POST",
         headers: { "Content-Type": "application/json", "Authorization": `Bearer ${telnyxKey}` },
         body: JSON.stringify({ digits: filtered[i], duration_millis: duration })
@@ -3418,7 +5724,7 @@ var TelnyxBridge = class {
     const telnyxKey = this.config.telnyxKey ?? "";
     if (!telnyxKey || !callId) return;
     try {
-      const resp = await fetch(`https://api.telnyx.com/v2/calls/${callId}/actions/record_start`, {
+      const resp = await fetch(`https://api.telnyx.com/v2/calls/${encodeURIComponent(callId)}/actions/record_start`, {
         method: "POST",
         headers: { "Content-Type": "application/json", "Authorization": `Bearer ${telnyxKey}` },
         body: JSON.stringify({ format: "mp3", channels: "single" })
@@ -3436,7 +5742,7 @@ var TelnyxBridge = class {
     const telnyxKey = this.config.telnyxKey ?? "";
     if (!telnyxKey || !callId) return;
     try {
-      const resp = await fetch(`https://api.telnyx.com/v2/calls/${callId}/actions/record_stop`, {
+      const resp = await fetch(`https://api.telnyx.com/v2/calls/${encodeURIComponent(callId)}/actions/record_stop`, {
         method: "POST",
         headers: { "Content-Type": "application/json", "Authorization": `Bearer ${telnyxKey}` },
         body: JSON.stringify({})
@@ -3450,11 +5756,11 @@ var TelnyxBridge = class {
       getLogger().warn(`Telnyx record_stop error: ${String(e)}`);
     }
   }
-  async endCall(callId, ws) {
+  async endCall(callId, _ws) {
     const telnyxKey = this.config.telnyxKey ?? "";
     if (callId && telnyxKey) {
       try {
-        await fetch(`https://api.telnyx.com/v2/calls/${callId}/actions/hangup`, {
+        await fetch(`https://api.telnyx.com/v2/calls/${encodeURIComponent(callId)}/actions/hangup`, {
           method: "POST",
           headers: { "Content-Type": "application/json", "Authorization": `Bearer ${telnyxKey}` },
           body: JSON.stringify({})
@@ -3462,7 +5768,6 @@ var TelnyxBridge = class {
       } catch {
       }
     }
-    ws.close();
   }
   createStt(agent) {
     return createSTT(agent);
@@ -3471,7 +5776,7 @@ var TelnyxBridge = class {
     if (this.config.telnyxKey && callId) {
       try {
         const resp = await fetch(
-          `https://api.telnyx.com/v2/calls/${callId}`,
+          `https://api.telnyx.com/v2/calls/${encodeURIComponent(callId)}`,
           {
             headers: { "Authorization": `Bearer ${this.config.telnyxKey}` },
             signal: AbortSignal.timeout(5e3)
@@ -3506,6 +5811,17 @@ var EmbeddedServer = class {
     this.dashboardToken = dashboardToken;
     this.metricsStore = new MetricsStore();
     this.pricing = mergePricing(pricingOverrides);
+    const logRoot = resolveLogRoot();
+    if (logRoot) {
+      try {
+        const restored = this.metricsStore.hydrate(logRoot);
+        if (restored > 0) {
+          getLogger().info(`Dashboard hydrated ${restored} call(s) from ${logRoot}`);
+        }
+      } catch (err) {
+        getLogger().warn(`Dashboard hydration failed: ${String(err)}`);
+      }
+    }
   }
   server = null;
   wss = null;
@@ -3514,6 +5830,8 @@ var EmbeddedServer = class {
   metricsStore;
   pricing;
   remoteHandler = new RemoteMessageHandler();
+  /** Opt-in per-call filesystem logger (set via PATTER_LOG_DIR). */
+  callLogger = new CallLogger(resolveLogRoot());
   /** Active WebSocket connections tracked for graceful shutdown. */
   activeConnections = /* @__PURE__ */ new Set();
   activeCallIds = /* @__PURE__ */ new Map();
@@ -3522,6 +5840,18 @@ var EmbeddedServer = class {
     if (!webhookUrlPattern.test(this.config.webhookUrl)) {
       throw new Error(`Invalid webhookUrl: must be a hostname with no protocol prefix or path (got: '${this.config.webhookUrl}')`);
     }
+    if (this.config.requireSignature !== false) {
+      if (this.config.telephonyProvider === "twilio" && !this.config.twilioToken) {
+        getLogger().warn(
+          "Twilio webhook enforcement ACTIVE but twilioToken is empty \u2014 webhooks will 503. Set requireSignature=false for local dev."
+        );
+      }
+      if (this.config.telephonyProvider === "telnyx" && !this.config.telnyxPublicKey) {
+        getLogger().warn(
+          "Telnyx webhook enforcement ACTIVE but telnyxPublicKey is empty \u2014 webhooks will 503. Set requireSignature=false for local dev."
+        );
+      }
+    }
     const app = express();
     app.use((req, _res, next) => {
       if (req.path === "/webhooks/telnyx/voice") {
@@ -3561,6 +5891,10 @@ var EmbeddedServer = class {
           res.status(403).send("Invalid signature");
           return;
         }
+      } else if (this.config.requireSignature !== false) {
+        getLogger().error("Twilio webhook rejected: twilioToken not configured and requireSignature is not false");
+        res.status(503).send("Webhook signature required");
+        return;
       }
       const body = req.body;
       const callSid = sanitizeLogValue(body["CallSid"] ?? "");
@@ -3586,6 +5920,10 @@ var EmbeddedServer = class {
           res.status(403).send("Invalid signature");
           return;
         }
+      } else if (this.config.requireSignature !== false) {
+        getLogger().error("Twilio webhook rejected: twilioToken not configured and requireSignature is not false");
+        res.status(503).send("Webhook signature required");
+        return;
       }
       const body = req.body;
       const recordingSid = sanitizeLogValue(body["RecordingSid"] ?? "");
@@ -3603,6 +5941,10 @@ var EmbeddedServer = class {
           res.status(403).send("Invalid signature");
           return;
         }
+      } else if (this.config.requireSignature !== false) {
+        getLogger().error("Twilio webhook rejected: twilioToken not configured and requireSignature is not false");
+        res.status(503).send("Webhook signature required");
+        return;
       }
       const body = req.body;
       const answeredBy = body["AnsweredBy"] ?? "";
@@ -3645,6 +5987,10 @@ var EmbeddedServer = class {
           res.status(403).send("Invalid signature");
           return;
         }
+      } else if (this.config.requireSignature !== false) {
+        getLogger().error("Twilio webhook rejected: twilioToken not configured and requireSignature is not false");
+        res.status(503).send("Webhook signature required");
+        return;
       } else if (!this.twilioTokenWarningLogged) {
         this.twilioTokenWarningLogged = true;
         getLogger().warn("Twilio webhook signature validation disabled \u2014 set twilioToken for production");
@@ -3671,6 +6017,9 @@ var EmbeddedServer = class {
           getLogger().warn("Telnyx webhook rejected: invalid or missing Ed25519 signature");
           return res.status(403).send("Invalid signature");
         }
+      } else if (this.config.requireSignature !== false) {
+        getLogger().error("Telnyx webhook rejected: telnyxPublicKey not configured and requireSignature is not false");
+        return res.status(503).send("Webhook signature required");
       } else if (!this.telnyxSigWarningLogged) {
         this.telnyxSigWarningLogged = true;
         getLogger().warn("Telnyx webhook signature verification is disabled. Set telnyxPublicKey in LocalOptions for production use.");
@@ -3698,6 +6047,17 @@ var EmbeddedServer = class {
         }
         return res.status(200).send();
       }
+      if (eventType === "call.machine.detection.ended") {
+        const amdCallId = payload.call_control_id ?? "";
+        const amdResult = String(payload.result ?? "");
+        getLogger().info(
+          `Telnyx AMD result for ${sanitizeLogValue(amdCallId)}: ${sanitizeLogValue(amdResult)}`
+        );
+        if (amdCallId && (amdResult === "machine" || amdResult === "machine_detected")) {
+          await this.handleTelnyxAmdVoicemail(amdCallId);
+        }
+        return res.status(200).send();
+      }
       const callControlId = payload.call_control_id ?? "";
       if (!callControlId) {
         getLogger().warn("Telnyx webhook rejected: missing call_control_id");
@@ -3715,27 +6075,18 @@ var EmbeddedServer = class {
       };
       try {
         if (eventType === "call.initiated") {
-          getLogger().info(`Telnyx call.initiated ${callControlId} \u2014 answering`);
-          const resp = await fetch(`${apiBase}/calls/${encodeURIComponent(callControlId)}/actions/answer`, {
-            method: "POST",
-            headers: authHeaders,
-            body: JSON.stringify({}),
-            signal: AbortSignal.timeout(1e4)
-          });
-          if (!resp.ok) {
-            getLogger().warn(`Telnyx answer failed: ${resp.status} ${(await resp.text()).slice(0, 200)}`);
-          }
-        } else if (eventType === "call.answered") {
           const caller = payload.from ?? "";
           const callee = payload.to ?? "";
           const streamUrl = `wss://${this.config.webhookUrl}/ws/stream/${encodeURIComponent(callControlId)}?caller=${encodeURIComponent(caller)}&callee=${encodeURIComponent(callee)}`;
-          getLogger().info(`Telnyx call.answered ${callControlId} \u2014 starting stream`);
-          const resp = await fetch(`${apiBase}/calls/${encodeURIComponent(callControlId)}/actions/streaming_start`, {
+          getLogger().info(`Telnyx call.initiated ${callControlId} \u2014 answering with inline stream`);
+          const resp = await fetch(`${apiBase}/calls/${encodeURIComponent(callControlId)}/actions/answer`, {
             method: "POST",
             headers: authHeaders,
             body: JSON.stringify({
               stream_url: streamUrl,
-              stream_track: "both_tracks",
+              // ``inbound_track`` halves WS upstream bandwidth — outbound
+              // echo was always filtered downstream anyway.
+              stream_track: "inbound_track",
               stream_bidirectional_mode: "rtp",
               stream_bidirectional_codec: "PCMU",
               stream_bidirectional_sampling_rate: 8e3,
@@ -3744,8 +6095,10 @@ var EmbeddedServer = class {
             signal: AbortSignal.timeout(1e4)
           });
           if (!resp.ok) {
-            getLogger().warn(`Telnyx streaming_start failed: ${resp.status} ${(await resp.text()).slice(0, 200)}`);
+            getLogger().warn(`Telnyx answer failed: ${resp.status} ${(await resp.text()).slice(0, 200)}`);
           }
+        } else if (eventType === "call.answered") {
+          getLogger().debug(`Telnyx call.answered ${callControlId} \u2014 stream already active (inline)`);
         } else {
           getLogger().debug(`Telnyx event ignored: ${eventType}`);
         }
@@ -3798,6 +6151,12 @@ var EmbeddedServer = class {
         getLogger().info(`Server on port ${port}`);
         getLogger().info(`Webhook: https://${this.config.webhookUrl}`);
         getLogger().info(`Phone:   ${this.config.phoneNumber}`);
+        const model = this.agent.model ?? "";
+        if (model && model !== "gpt-4o-mini-realtime-preview" && model.includes("realtime")) {
+          getLogger().warn(
+            `Agent uses "${sanitizeLogValue(model)}" but DEFAULT_PRICING.openai_realtime is calibrated for "gpt-4o-mini-realtime-preview". Pass Patter({ pricing: { openai_realtime: {...} } }) to set rates for this model, otherwise the dashboard cost display will under-report.`
+          );
+        }
         if (this.dashboard) {
           console.log("\n\u2500\u2500\u2500\u2500 Dashboard \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500");
           getLogger().info(`URL: http://127.0.0.1:${port}/`);
@@ -3812,11 +6171,63 @@ var EmbeddedServer = class {
       });
     });
   }
+  /**
+   * Handle a Telnyx ``call.machine.detection.ended`` event when AMD returns
+   * ``machine``: speak the configured voicemail message via ``actions/speak``
+   * then hang up via ``actions/hangup``. Mirrors the Python
+   * ``handle_amd_result`` helper.
+   */
+  async handleTelnyxAmdVoicemail(callControlId) {
+    const telnyxKey = this.config.telnyxKey ?? "";
+    if (!callControlId || !telnyxKey || !this.voicemailMessage) {
+      return;
+    }
+    const encoded = encodeURIComponent(callControlId);
+    const headers = {
+      "Content-Type": "application/json",
+      Authorization: `Bearer ${telnyxKey}`
+    };
+    const estimatedMs = Math.min(
+      3e4,
+      Math.ceil(this.voicemailMessage.length / 14 * 1e3) + 1500
+    );
+    try {
+      const speakResp = await fetch(
+        `https://api.telnyx.com/v2/calls/${encoded}/actions/speak`,
+        {
+          method: "POST",
+          headers,
+          body: JSON.stringify({
+            payload: this.voicemailMessage,
+            voice: "female",
+            language: "en-US"
+          }),
+          signal: AbortSignal.timeout(1e4)
+        }
+      );
+      if (!speakResp.ok) {
+        getLogger().warn(
+          `Telnyx voicemail speak failed: ${speakResp.status} ${(await speakResp.text()).slice(0, 200)}`
+        );
+      }
+      await new Promise((resolve) => setTimeout(resolve, estimatedMs));
+      await fetch(`https://api.telnyx.com/v2/calls/${encoded}/actions/hangup`, {
+        method: "POST",
+        headers,
+        body: JSON.stringify({}),
+        signal: AbortSignal.timeout(1e4)
+      });
+      getLogger().info(`Voicemail dropped for Telnyx call ${sanitizeLogValue(callControlId)}`);
+    } catch (e) {
+      getLogger().warn(`Could not drop voicemail (Telnyx): ${String(e)}`);
+    }
+  }
   // ---------------------------------------------------------------------------
   // Stream handler helpers
   // ---------------------------------------------------------------------------
   /** Build the shared StreamHandlerDeps for the current server configuration. */
   buildStreamHandlerDeps(bridge) {
+    const [wrappedStart, wrappedMetrics, wrappedEnd] = this.wrapLoggingCallbacks(bridge);
     return {
       config: this.config,
       agent: this.agent,
@@ -3824,17 +6235,84 @@ var EmbeddedServer = class {
       metricsStore: this.metricsStore,
       pricing: this.pricing,
       remoteHandler: this.remoteHandler,
-      onCallStart: this.onCallStart,
-      onCallEnd: this.onCallEnd,
+      onCallStart: wrappedStart,
+      onCallEnd: wrappedEnd,
       onTranscript: this.onTranscript,
       onMessage: this.onMessage,
-      onMetrics: this.onMetrics,
+      onMetrics: wrappedMetrics,
       recording: this.recording,
       buildAIAdapter: (resolvedPrompt) => buildAIAdapter(this.config, this.agent, resolvedPrompt),
       sanitizeVariables,
       resolveVariables
     };
   }
+  /**
+   * Wrap user-supplied call lifecycle callbacks with CallLogger side-effects.
+   * When PATTER_LOG_DIR is unset, the logger is disabled and the returned
+   * wrappers degrade to just calling the user callbacks (still wrapped so
+   * the logger stays consistent with future configuration changes).
+   */
+  wrapLoggingCallbacks(bridge) {
+    const logger = this.callLogger;
+    const agent = this.agent;
+    const userStart = this.onCallStart;
+    const userMetrics = this.onMetrics;
+    const userEnd = this.onCallEnd;
+    const agentSnapshot = () => {
+      const snap = {
+        provider: agent.provider,
+        model: agent.model,
+        voice: agent.voice,
+        language: agent.language
+      };
+      if (agent.stt && agent.tts && !("engine" in agent && agent.engine)) {
+        snap.mode = "pipeline";
+      }
+      return Object.fromEntries(Object.entries(snap).filter(([, v]) => v !== void 0));
+    };
+    const wrappedStart = async (data) => {
+      if (logger.enabled) {
+        const callId = typeof data.call_id === "string" ? data.call_id : "";
+        void logger.logCallStart(callId, {
+          caller: typeof data.caller === "string" ? data.caller : "",
+          callee: typeof data.callee === "string" ? data.callee : "",
+          telephonyProvider: bridge.telephonyProvider,
+          providerMode: agent.provider ?? "",
+          agent: agentSnapshot()
+        }).catch((err) => getLogger().error(`call_log start error: ${String(err)}`));
+      }
+      if (userStart) await userStart(data);
+    };
+    const wrappedMetrics = async (data) => {
+      if (logger.enabled) {
+        const callId = typeof data.call_id === "string" ? data.call_id : "";
+        const turn = data.turn;
+        if (turn && typeof turn === "object") {
+          void logger.logTurn(callId, turn).catch((err) => getLogger().error(`call_log turn error: ${String(err)}`));
+        }
+      }
+      if (userMetrics) await userMetrics(data);
+    };
+    const wrappedEnd = async (data) => {
+      if (logger.enabled) {
+        const callId = typeof data.call_id === "string" ? data.call_id : "";
+        const metricsObj = data.metrics ?? null;
+        const latency = metricsObj ? {
+          p50_ms: metricsObj.latency_p50?.total_ms ?? null,
+          p95_ms: metricsObj.latency_p95?.total_ms ?? null,
+          p99_ms: metricsObj.latency_p99?.total_ms ?? null
+        } : null;
+        void logger.logCallEnd(callId, {
+          durationSeconds: metricsObj?.duration_seconds,
+          turns: metricsObj?.turns?.length,
+          cost: metricsObj?.cost ?? null,
+          latency
+        }).catch((err) => getLogger().error(`call_log end error: ${String(err)}`));
+      }
+      if (userEnd) await userEnd(data);
+    };
+    return [wrappedStart, wrappedMetrics, wrappedEnd];
+  }
   // ---------------------------------------------------------------------------
   // Twilio WebSocket message parser (thin layer)
   // ---------------------------------------------------------------------------
@@ -3863,6 +6341,8 @@ var EmbeddedServer = class {
           const payload = data.media?.payload ?? "";
           handler.handleAudio(Buffer.from(payload, "base64"));
         } else if (event === "mark") {
+          const markName = String(data.mark?.name ?? "");
+          if (markName) await handler.onMark(markName);
         } else if (event === "dtmf") {
           const digit = data.dtmf?.digit ?? "";
           await handler.handleDtmf(digit);
@@ -3998,19 +6478,145 @@ var EmbeddedServer = class {
 };
 // src/llm-loop.ts
+var DEFAULT_TOOL_MAX_RETRIES = 2;
+var DEFAULT_TOOL_RETRY_DELAY_MS = 500;
+var DEFAULT_TOOL_TIMEOUT_MS = 1e4;
+var TOOL_MAX_RESPONSE_BYTES = 1 * 1024 * 1024;
+var DefaultToolExecutor = class {
+  maxRetries;
+  retryDelayMs;
+  requestTimeoutMs;
+  constructor(opts = {}) {
+    this.maxRetries = opts.maxRetries ?? DEFAULT_TOOL_MAX_RETRIES;
+    this.retryDelayMs = opts.retryDelayMs ?? DEFAULT_TOOL_RETRY_DELAY_MS;
+    this.requestTimeoutMs = opts.requestTimeoutMs ?? DEFAULT_TOOL_TIMEOUT_MS;
+  }
+  async execute(toolDef, args, callContext) {
+    if (toolDef.handler) {
+      try {
+        return await toolDef.handler(args, callContext);
+      } catch (e) {
+        return JSON.stringify({
+          error: `Tool handler error: ${String(e)}`,
+          fallback: true
+        });
+      }
+    }
+    if (toolDef.webhookUrl) {
+      try {
+        validateWebhookUrl(toolDef.webhookUrl);
+      } catch (e) {
+        return JSON.stringify({ error: `Tool webhook URL rejected: ${String(e)}` });
+      }
+      const callId = typeof callContext.call_id === "string" ? callContext.call_id : "";
+      return await withSpan(
+        SPAN_TOOL,
+        {
+          "patter.tool.name": toolDef.name,
+          "patter.tool.transport": "webhook",
+          "patter.call.id": callId
+        },
+        async (span) => {
+          const totalAttempts = this.maxRetries + 1;
+          for (let attempt = 0; attempt < totalAttempts; attempt++) {
+            span.setAttribute("patter.tool.attempt", attempt + 1);
+            try {
+              const resp = await fetch(toolDef.webhookUrl, {
+                method: "POST",
+                headers: { "Content-Type": "application/json" },
+                body: JSON.stringify({
+                  tool: toolDef.name,
+                  arguments: args,
+                  ...callContext,
+                  attempt: attempt + 1
+                }),
+                signal: AbortSignal.timeout(this.requestTimeoutMs)
+              });
+              if (!resp.ok) throw new Error(`HTTP ${resp.status}`);
+              const result = JSON.stringify(await resp.json());
+              if (result.length > TOOL_MAX_RESPONSE_BYTES) {
+                return JSON.stringify({
+                  error: `Webhook response too large: ${result.length} bytes (max ${TOOL_MAX_RESPONSE_BYTES})`,
+                  fallback: true
+                });
+              }
+              return result;
+            } catch (e) {
+              if (attempt < totalAttempts - 1) {
+                getLogger().warn(
+                  `Tool webhook '${toolDef.name}' failed (attempt ${attempt + 1}), retrying: ${String(e)}`
+                );
+                await new Promise((r) => setTimeout(r, this.retryDelayMs));
+              } else {
+                span.recordException(e);
+                return JSON.stringify({
+                  error: `Tool failed after ${totalAttempts} attempts: ${String(e)}`,
+                  fallback: true
+                });
+              }
+            }
+          }
+          return JSON.stringify({
+            error: `Tool '${toolDef.name}' exited retry loop unexpectedly`,
+            fallback: true
+          });
+        }
+      );
+    }
+    return JSON.stringify({
+      error: `No handler or webhookUrl for tool '${toolDef.name}'`,
+      fallback: true
+    });
+  }
+};
 var OpenAILLMProvider = class {
   apiKey;
   model;
-  constructor(apiKey, model) {
+  temperature;
+  maxTokens;
+  responseFormat;
+  parallelToolCalls;
+  toolChoice;
+  seed;
+  topP;
+  frequencyPenalty;
+  presencePenalty;
+  stop;
+  constructor(apiKey, model, sampling = {}) {
     this.apiKey = apiKey;
     this.model = model;
+    this.temperature = sampling.temperature;
+    this.maxTokens = sampling.maxTokens;
+    this.responseFormat = sampling.responseFormat;
+    this.parallelToolCalls = sampling.parallelToolCalls;
+    this.toolChoice = sampling.toolChoice;
+    this.seed = sampling.seed;
+    this.topP = sampling.topP;
+    this.frequencyPenalty = sampling.frequencyPenalty;
+    this.presencePenalty = sampling.presencePenalty;
+    this.stop = sampling.stop;
   }
   async *stream(messages, tools) {
     const body = {
       model: this.model,
       messages,
-      stream: true
+      stream: true,
+      // Ask OpenAI to include a final usage chunk so we can attribute token
+      // cost. Without this the dashboard shows LLM cost = 0 for OpenAI.
+      stream_options: { include_usage: true }
     };
+    if (this.temperature !== void 0) body.temperature = this.temperature;
+    if (this.maxTokens !== void 0) {
+      body.max_completion_tokens = this.maxTokens;
+    }
+    if (this.responseFormat !== void 0) body.response_format = this.responseFormat;
+    if (this.parallelToolCalls !== void 0) body.parallel_tool_calls = this.parallelToolCalls;
+    if (this.toolChoice !== void 0) body.tool_choice = this.toolChoice;
+    if (this.seed !== void 0) body.seed = this.seed;
+    if (this.topP !== void 0) body.top_p = this.topP;
+    if (this.frequencyPenalty !== void 0) body.frequency_penalty = this.frequencyPenalty;
+    if (this.presencePenalty !== void 0) body.presence_penalty = this.presencePenalty;
+    if (this.stop !== void 0) body.stop = this.stop;
     if (tools) {
       body.tools = tools;
     }
@@ -4049,6 +6655,16 @@ var OpenAILLMProvider = class {
         } catch {
           continue;
         }
+        if (chunk.usage) {
+          const cached = chunk.usage.prompt_tokens_details?.cached_tokens ?? 0;
+          const uncachedInput = Math.max(0, (chunk.usage.prompt_tokens ?? 0) - cached);
+          yield {
+            type: "usage",
+            inputTokens: uncachedInput,
+            outputTokens: chunk.usage.completion_tokens,
+            cacheReadInputTokens: cached
+          };
+        }
         const delta = chunk.choices?.[0]?.delta;
         if (!delta) continue;
         if (delta.content) {
@@ -4075,10 +6691,28 @@ var LLMLoop = class {
   tools;
   openaiTools;
   toolMap;
+  toolExecutor;
+  eventBus;
+  // Fix 10: track provider/model so usage chunks can be attributed for billing.
+  _providerName;
+  _modelName;
   constructor(apiKey, model, systemPrompt, tools, llmProvider) {
     this.provider = llmProvider ?? new OpenAILLMProvider(apiKey, model);
     this.systemPrompt = systemPrompt;
+    if (llmProvider) {
+      const key = llmProvider.constructor?.providerKey;
+      if (key) {
+        this._providerName = key;
+      } else {
+        const stripped = (llmProvider.constructor?.name ?? "custom").replace(/LLMProvider$/i, "").replace(/LLM$/i, "").replace(/Provider$/i, "").toLowerCase();
+        this._providerName = stripped || "custom";
+      }
+    } else {
+      this._providerName = "openai";
+    }
+    this._modelName = model;
     this.tools = tools ?? null;
+    this.toolExecutor = new DefaultToolExecutor();
     this.toolMap = /* @__PURE__ */ new Map();
     this.openaiTools = null;
     if (this.tools && this.tools.length > 0) {
@@ -4096,13 +6730,40 @@ var LLMLoop = class {
       }
     }
   }
+  /**
+   * Swap in a custom tool executor (e.g. different retry policy, metrics
+   * wrapping, tenant-aware fan-out). The default is ``DefaultToolExecutor``.
+   */
+  setToolExecutor(executor) {
+    this.toolExecutor = executor;
+  }
+  /**
+   * Wire an :class:`EventBus` so the loop emits ``llm_chunk`` per text
+   * token and ``tool_call_started`` the first time each tool-call index
+   * appears. Set to ``undefined`` to disable.
+   */
+  setEventBus(bus) {
+    this.eventBus = bus;
+  }
   /**
    * Stream LLM response tokens, handling tool calls automatically.
    * Yields text tokens as they arrive from the LLM.
+   *
+   * @param metrics Optional usage recorder — when provided, usage chunks
+   *   from the provider are forwarded to {@link LlmUsageRecorder.recordLlmUsage}
+   *   so token costs are included in the call cost breakdown (fix 10).
    */
-  async *run(userText, history, callContext) {
-    const messages = this.buildMessages(history, userText);
+  async *run(userText, history, callContext, metrics, hookExecutor, hookCtx) {
+    let messages = this.buildMessages(history, userText);
     const maxIterations = 10;
+    if (hookExecutor && hookCtx) {
+      messages = await hookExecutor.runBeforeLlm(
+        messages,
+        hookCtx
+      );
+    }
+    const hasAfterLlm = Boolean(hookExecutor?.hasAfterLlm() && hookCtx);
+    const allEmittedText = [];
     for (let iter = 0; iter < maxIterations; iter++) {
       const toolCallsAccumulated = /* @__PURE__ */ new Map();
       const textParts = [];
@@ -4110,12 +6771,31 @@ var LLMLoop = class {
       for await (const chunk of this.provider.stream(messages, this.openaiTools)) {
         if (chunk.type === "text" && chunk.content) {
           textParts.push(chunk.content);
-          yield chunk.content;
+          this.eventBus?.emit("llm_chunk", { text: chunk.content, iteration: iter });
+          if (hasAfterLlm) {
+            allEmittedText.push(chunk.content);
+          } else {
+            yield chunk.content;
+          }
+        } else if (chunk.type === "usage") {
+          metrics?.recordLlmUsage(
+            this._providerName,
+            this._modelName,
+            chunk.inputTokens ?? 0,
+            chunk.outputTokens ?? 0,
+            chunk.cacheReadInputTokens ?? 0,
+            chunk.cacheCreationInputTokens ?? 0
+          );
         } else if (chunk.type === "tool_call") {
           hasToolCalls = true;
           const idx = chunk.index ?? 0;
           if (!toolCallsAccumulated.has(idx)) {
             toolCallsAccumulated.set(idx, { id: "", name: "", arguments: "" });
+            this.eventBus?.emit("tool_call_started", {
+              index: idx,
+              name: chunk.name ?? "",
+              args: chunk.arguments ?? ""
+            });
           }
           const acc = toolCallsAccumulated.get(idx);
           if (chunk.id) acc.id = chunk.id;
@@ -4123,7 +6803,14 @@ var LLMLoop = class {
           if (chunk.arguments) acc.arguments += chunk.arguments;
         }
       }
-      if (!hasToolCalls) return;
+      if (!hasToolCalls) {
+        if (hasAfterLlm && hookExecutor && hookCtx) {
+          const finalText = allEmittedText.join("");
+          const rewritten = await hookExecutor.runAfterLlm(finalText, hookCtx);
+          if (rewritten) yield rewritten;
+        }
+        return;
+      }
       const assistantMsg = {
         role: "assistant",
         content: textParts.join("") || null,
@@ -4162,49 +6849,7 @@ var LLMLoop = class {
     if (!toolDef) {
       return JSON.stringify({ error: `Unknown tool: ${toolName}` });
     }
-    if (toolDef.handler) {
-      try {
-        return await toolDef.handler(args, callContext);
-      } catch (e) {
-        return JSON.stringify({ error: `Tool handler error: ${String(e)}` });
-      }
-    }
-    if (toolDef.webhookUrl) {
-      try {
-        validateWebhookUrl(toolDef.webhookUrl);
-      } catch (e) {
-        return JSON.stringify({ error: `Tool webhook URL rejected: ${String(e)}` });
-      }
-      for (let attempt = 0; attempt < 3; attempt++) {
-        try {
-          const resp = await fetch(toolDef.webhookUrl, {
-            method: "POST",
-            headers: { "Content-Type": "application/json" },
-            body: JSON.stringify({
-              tool: toolName,
-              arguments: args,
-              ...callContext,
-              attempt: attempt + 1
-            }),
-            signal: AbortSignal.timeout(1e4)
-          });
-          if (!resp.ok) throw new Error(`HTTP ${resp.status}`);
-          const result = JSON.stringify(await resp.json());
-          const MAX_RESPONSE_BYTES2 = 1 * 1024 * 1024;
-          if (result.length > MAX_RESPONSE_BYTES2) {
-            return JSON.stringify({ error: `Webhook response too large: ${result.length} bytes (max ${MAX_RESPONSE_BYTES2})`, fallback: true });
-          }
-          return result;
-        } catch (e) {
-          if (attempt < 2) {
-            await new Promise((r) => setTimeout(r, 500));
-          } else {
-            return JSON.stringify({ error: `Tool failed after 3 attempts: ${String(e)}` });
-          }
-        }
-      }
-    }
-    return JSON.stringify({ error: `No handler or webhookUrl for tool '${toolName}'` });
+    return this.toolExecutor.execute(toolDef, args, callContext);
   }
   buildMessages(history, userText) {
     const messages = [
@@ -4391,6 +7036,11 @@ var TestSession = class {
 };
 export {
+  PatterError,
+  PatterConnectionError,
+  AuthenticationError,
+  ProvisionError,
+  RateLimitError,
   OpenAIRealtimeAdapter,
   ElevenLabsConvAIAdapter,
   DEFAULT_PRICING,
@@ -4412,14 +7062,31 @@ export {
   CallMetricsAccumulator,
   mulawToPcm16,
   pcm16ToMulaw,
+  PcmCarry,
+  StatefulResampler,
+  createResampler16kTo8k,
+  createResampler8kTo16k,
+  createResampler24kTo16k,
   resample8kTo16k,
   resample16kTo8k,
   resample24kTo16k,
+  SPAN_CALL,
+  SPAN_STT,
+  SPAN_LLM,
+  SPAN_TTS,
+  SPAN_TOOL,
+  SPAN_ENDPOINT,
+  SPAN_BARGEIN,
+  initTracing,
+  isTracingEnabled,
+  startSpan,
+  DefaultToolExecutor,
   OpenAILLMProvider,
   LLMLoop,
   DEFAULT_MIN_SENTENCE_LEN,
   SentenceChunker,
   PipelineHookExecutor,
+  EventBus,
   EmbeddedServer,
   TestSession
 };