npm - getpatter - Versions diffs - 0.5.3 → 0.6.0 - Mend

getpatter 0.5.3 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

package/LICENSE +1 -1
package/README.md +5 -2
package/dist/aec-PJJMUM5E.mjs +228 -0
package/dist/{banner-3GNZ6VQK.mjs → banner-UYW6UM3J.mjs} +4 -1
package/dist/{carrier-config-33HQ2W4V.mjs → carrier-config-4ZKVYAWV.mjs} +5 -2
package/dist/{chunk-AFUYSNDH.mjs → chunk-6GR5MHHQ.mjs} +9 -0
package/dist/chunk-CYLJVT5G.mjs +7031 -0
package/dist/{chunk-FIFIWBL7.mjs → chunk-JUQ5WQTQ.mjs} +2157 -883
package/dist/{chunk-VJVDG4V5.mjs → chunk-MVOQFAEO.mjs} +5 -0
package/dist/chunk-N565J3CF.mjs +69 -0
package/dist/chunk-X3364LSI.mjs +363 -0
package/dist/{chunk-SEMKNPCD.mjs → chunk-XS45BAQL.mjs} +5 -1
package/dist/cli.js +32 -621
package/dist/client-2GJVZT42.mjs +8935 -0
package/dist/dashboard/ui.html +63 -0
package/dist/{dist-YRCCJQ26.mjs → dist-RYMPCILF.mjs} +28 -2
package/dist/index.d.mts +2199 -240
package/dist/index.d.ts +2199 -240
package/dist/index.js +28942 -7073
package/dist/index.mjs +2337 -447
package/dist/{node-cron-6PRPSBG5.mjs → node-cron-JFWQQRBU.mjs} +23 -2
package/dist/persistence-LVIAHESK.mjs +7 -0
package/dist/silero-vad-YLCXT5GQ.mjs +7 -0
package/dist/streamableHttp-WKNGHDVO.mjs +1496 -0
package/dist/test-mode-Y7YG5LFZ.mjs +8 -0
package/dist/tunnel-43CHWPVQ.mjs +8 -0
package/package.json +7 -7
package/src/dashboard/ui.html +63 -0
package/dist/chunk-QHHBUCMT.mjs +0 -25
package/dist/persistence-LQBYQPQQ.mjs +0 -7
package/dist/test-mode-MVJ3SKG4.mjs +0 -8
package/dist/tunnel-UVR3PPAU.mjs +0 -8

package/dist/{chunk-FIFIWBL7.mjs → chunk-JUQ5WQTQ.mjs} RENAMED Viewed

@@ -1,23 +1,66 @@
 import {
   getLogger
-} from "./chunk-VJVDG4V5.mjs";
+} from "./chunk-MVOQFAEO.mjs";
 import {
-  __require
-} from "./chunk-QHHBUCMT.mjs";
+  __dirname,
+  __require,
+  init_esm_shims
+} from "./chunk-N565J3CF.mjs";
 // src/test-mode.ts
+init_esm_shims();
 import { createInterface } from "readline";
+// src/llm-loop.ts
+init_esm_shims();
 // src/server.ts
+init_esm_shims();
 import crypto4 from "crypto";
 import express from "express";
 import { createServer } from "http";
 import { WebSocketServer } from "ws";
 // src/providers/openai-realtime.ts
+init_esm_shims();
 import WebSocket from "ws";
+var OpenAIRealtimeAudioFormat = {
+  G711_ULAW: "g711_ulaw",
+  G711_ALAW: "g711_alaw",
+  PCM16: "pcm16"
+};
+var OpenAIRealtimeModel = {
+  GPT_REALTIME: "gpt-realtime",
+  GPT_REALTIME_2: "gpt-realtime-2",
+  GPT_REALTIME_MINI: "gpt-realtime-mini",
+  GPT_4O_REALTIME_PREVIEW: "gpt-4o-realtime-preview",
+  GPT_4O_MINI_REALTIME_PREVIEW: "gpt-4o-mini-realtime-preview"
+};
+var OpenAIVoice = {
+  ALLOY: "alloy",
+  ASH: "ash",
+  BALLAD: "ballad",
+  CORAL: "coral",
+  ECHO: "echo",
+  FABLE: "fable",
+  NOVA: "nova",
+  ONYX: "onyx",
+  SAGE: "sage",
+  SHIMMER: "shimmer",
+  VERSE: "verse"
+};
+var OpenAITranscriptionModel = {
+  WHISPER_1: "whisper-1",
+  GPT_4O_TRANSCRIBE: "gpt-4o-transcribe",
+  GPT_4O_MINI_TRANSCRIBE: "gpt-4o-mini-transcribe",
+  GPT_REALTIME_WHISPER: "gpt-realtime-whisper"
+};
+var OpenAIRealtimeVADType = {
+  SERVER_VAD: "server_vad",
+  SEMANTIC_VAD: "semantic_vad"
+};
 var OpenAIRealtimeAdapter = class {
-  constructor(apiKey, model = "gpt-realtime-mini", voice = "alloy", instructions = "", tools, audioFormat = "g711_ulaw", options = {}) {
+  constructor(apiKey, model = OpenAIRealtimeModel.GPT_REALTIME_MINI, voice = OpenAIVoice.ALLOY, instructions = "", tools, audioFormat = OpenAIRealtimeAudioFormat.G711_ULAW, options = {}) {
     this.apiKey = apiKey;
     this.model = model;
     this.voice = voice;
@@ -26,6 +69,12 @@ var OpenAIRealtimeAdapter = class {
     this.audioFormat = audioFormat;
     this.options = options;
   }
+  apiKey;
+  model;
+  voice;
+  instructions;
+  tools;
+  audioFormat;
   ws = null;
   eventCallbacks = /* @__PURE__ */ new Set();
   messageListenerAttached = false;
@@ -34,7 +83,17 @@ var OpenAIRealtimeAdapter = class {
   // barge-in (see ``cancelResponse``) — matches the Python adapter.
   currentResponseItemId = null;
   currentResponseAudioMs = 0;
+  // Wall-clock timestamp (Date.now()) of the first ``response.audio.delta``
+  // received since the current response item started. ``cancelResponse``
+  // uses this to bound ``audio_end_ms`` to what the caller could plausibly
+  // have heard — generated audio frequently arrives 5-10x real-time, so
+  // ``audio_end_ms`` driven purely by the per-chunk byte counter overshoots
+  // reality and leaves phantom assistant text on the conversation. The
+  // wall-clock cap corresponds to the maximum playback that real-time TTS
+  // could have produced, which is what the user actually heard.
+  currentResponseFirstAudioAt = null;
   options;
+  /** Open the Realtime WebSocket and apply the session configuration. */
   async connect() {
     const url = `wss://api.openai.com/v1/realtime?model=${encodeURIComponent(this.model)}`;
     this.ws = new WebSocket(url, {
@@ -63,12 +122,14 @@ var OpenAIRealtimeAdapter = class {
             voice: this.voice,
             instructions: this.instructions || "You are a helpful voice assistant. Be concise.",
             turn_detection: {
-              type: this.options.vadType ?? "server_vad",
+              type: this.options.vadType ?? OpenAIRealtimeVADType.SERVER_VAD,
               threshold: 0.5,
               prefix_padding_ms: 300,
               silence_duration_ms: this.options.silenceDurationMs ?? 300
             },
-            input_audio_transcription: { model: this.options.inputAudioTranscriptionModel ?? "whisper-1" }
+            input_audio_transcription: {
+              model: this.options.inputAudioTranscriptionModel ?? OpenAITranscriptionModel.WHISPER_1
+            }
           };
           if (this.options.temperature !== void 0) config.temperature = this.options.temperature;
           if (this.options.maxResponseOutputTokens !== void 0) {
@@ -76,13 +137,22 @@ var OpenAIRealtimeAdapter = class {
           }
           if (this.options.modalities !== void 0) config.modalities = this.options.modalities;
           if (this.options.toolChoice !== void 0) config.tool_choice = this.options.toolChoice;
+          if (this.options.reasoningEffort !== void 0) {
+            config.reasoning = { effort: this.options.reasoningEffort };
+          }
           if (this.tools?.length) {
-            config.tools = this.tools.map((t) => ({
-              type: "function",
-              name: t.name,
-              description: t.description,
-              parameters: t.parameters
-            }));
+            config.tools = this.tools.map((t) => {
+              const def = {
+                type: "function",
+                name: t.name,
+                description: t.description,
+                parameters: t.parameters
+              };
+              if (t.strict === true) {
+                def.strict = true;
+              }
+              return def;
+            });
           }
           ws.send(JSON.stringify({ type: "session.update", session: config }));
         } else if (msg.type === "session.updated") {
@@ -124,6 +194,7 @@ var OpenAIRealtimeAdapter = class {
     }, 2e4);
     this.ensureMessageListener();
   }
+  /** Append a base64-encoded audio chunk to the realtime input buffer. */
   sendAudio(mulawAudio) {
     if (!this.ws || this.ws.readyState !== WebSocket.OPEN) return;
     this.ws.send(JSON.stringify({ type: "input_audio_buffer.append", audio: mulawAudio.toString("base64") }));
@@ -140,6 +211,7 @@ var OpenAIRealtimeAdapter = class {
     this.eventCallbacks.add(callback);
     this.ensureMessageListener();
   }
+  /** Remove a previously registered {@link onEvent} callback. */
   offEvent(callback) {
     this.eventCallbacks.delete(callback);
   }
@@ -166,6 +238,9 @@ var OpenAIRealtimeAdapter = class {
       if (t === "response.audio.delta") {
         const buf = Buffer.from(data.delta ?? "", "base64");
         this.currentResponseAudioMs += estimateAudioMs(buf, this.audioFormat);
+        if (this.currentResponseFirstAudioAt === null) {
+          this.currentResponseFirstAudioAt = Date.now();
+        }
         dispatch("audio", buf);
       } else if (t === "response.audio_transcript.delta") {
         dispatch("transcript_output", data.delta);
@@ -174,6 +249,7 @@ var OpenAIRealtimeAdapter = class {
         if (itemId) {
           this.currentResponseItemId = itemId;
           this.currentResponseAudioMs = 0;
+          this.currentResponseFirstAudioAt = null;
         }
       } else if (t === "input_audio_buffer.speech_started") {
         dispatch("speech_started", null);
@@ -186,6 +262,7 @@ var OpenAIRealtimeAdapter = class {
       } else if (t === "response.done") {
         this.currentResponseItemId = null;
         this.currentResponseAudioMs = 0;
+        this.currentResponseFirstAudioAt = null;
         dispatch("response_done", data.response ?? null);
       } else if (t === "error") {
         dispatch("error", data.error);
@@ -204,22 +281,44 @@ var OpenAIRealtimeAdapter = class {
       dispatch("error", { type: "socket_error", message: err?.message ?? String(err) });
     });
   }
+  /** Truncate the in-flight assistant turn and cancel the active response.
+   *
+   * ``audio_end_ms`` MUST reflect what the caller actually heard, not what
+   * the server generated. OpenAI streams audio at 5-10x real-time, so the
+   * byte-derived counter overstates playback whenever the consumer cleared
+   * its playout buffer (e.g. ``send_clear``) before the audio reached the
+   * speaker. We bound the truncate point by wall-clock time since the first
+   * chunk of this response — that's the physical maximum a 1x real-time
+   * playback could have produced. Without this cap, OpenAI keeps the full
+   * generated assistant text on the transcript, and the model replays /
+   * resumes from it on the next turn — manifesting as re-greetings and
+   * mid-sentence fragments after a barge-in storm.
+   */
   cancelResponse() {
     if (!this.ws) return;
     if (this.currentResponseItemId) {
+      let audioEndMs = this.currentResponseAudioMs;
+      if (this.currentResponseFirstAudioAt !== null) {
+        const elapsedMs = Date.now() - this.currentResponseFirstAudioAt;
+        audioEndMs = Math.min(audioEndMs, Math.max(elapsedMs, 0));
+      }
       try {
         this.ws.send(JSON.stringify({
           type: "conversation.item.truncate",
           item_id: this.currentResponseItemId,
           content_index: 0,
-          audio_end_ms: this.currentResponseAudioMs
+          audio_end_ms: audioEndMs
         }));
       } catch (err) {
         getLogger().debug?.(`conversation.item.truncate failed: ${String(err)}`);
       }
     }
     this.ws.send(JSON.stringify({ type: "response.cancel" }));
+    this.currentResponseItemId = null;
+    this.currentResponseAudioMs = 0;
+    this.currentResponseFirstAudioAt = null;
   }
+  /** Inject a user text turn and request a new response. */
   async sendText(text) {
     this.ws?.send(JSON.stringify({
       type: "conversation.item.create",
@@ -227,6 +326,30 @@ var OpenAIRealtimeAdapter = class {
     }));
     this.ws?.send(JSON.stringify({ type: "response.create" }));
   }
+  /**
+   * Make the AI speak ``text`` as its opening line.
+   *
+   * Triggers ``response.create`` with explicit ``instructions`` that force
+   * the model to render ``text`` verbatim as its first audio utterance.
+   * This is the correct semantics for ``Agent.firstMessage`` per its
+   * docstring ("What the AI says when the callee answers").
+   *
+   * Without this, ``sendText(firstMessage)`` would inject ``text`` as
+   * ``role: user`` and the AI would *reply* to its own greeting, producing
+   * role-confused openings (e.g. a receptionist agent responding "I'd like
+   * to schedule a haircut" because it took its own first_message as a
+   * customer cue).
+   */
+  async sendFirstMessage(text) {
+    this.ws?.send(JSON.stringify({
+      type: "response.create",
+      response: {
+        modalities: ["audio", "text"],
+        instructions: `Say exactly the following sentence as your first turn and nothing else: "${text}"`
+      }
+    }));
+  }
+  /** Submit a tool/function-call result and request the next response. */
   async sendFunctionResult(callId, result) {
     this.ws?.send(JSON.stringify({
       type: "conversation.item.create",
@@ -234,6 +357,7 @@ var OpenAIRealtimeAdapter = class {
     }));
     this.ws?.send(JSON.stringify({ type: "response.create" }));
   }
+  /** Stop the heartbeat, drop listeners, and close the Realtime WebSocket. */
   close() {
     if (this.heartbeat) {
       clearInterval(this.heartbeat);
@@ -247,14 +371,16 @@ var OpenAIRealtimeAdapter = class {
 };
 function estimateAudioMs(chunk, format) {
   if (chunk.length === 0) return 0;
-  if (format === "g711_ulaw" || format === "g711_alaw") return Math.floor(chunk.length / 8);
-  if (format === "pcm16") {
+  if (format === OpenAIRealtimeAudioFormat.G711_ULAW || format === OpenAIRealtimeAudioFormat.G711_ALAW)
+    return Math.floor(chunk.length / 8);
+  if (format === OpenAIRealtimeAudioFormat.PCM16) {
     return Math.floor(chunk.length / 48);
   }
   return 0;
 }
 // src/providers/elevenlabs-convai.ts
+init_esm_shims();
 import WebSocket2 from "ws";
 var ELEVENLABS_CONVAI_URL = "wss://api.elevenlabs.io/v1/convai/conversation";
 var ELEVENLABS_SIGNED_URL = "https://api.elevenlabs.io/v1/convai/conversation/get-signed-url";
@@ -366,6 +492,7 @@ var ElevenLabsConvAIAdapter = class _ElevenLabsConvAIAdapter {
     }
     return data.signed_url;
   }
+  /** Open the ConvAI WebSocket and send the conversation init payload. */
   async connect() {
     let wsUrl;
     let wsOptions;
@@ -533,6 +660,7 @@ var ElevenLabsConvAIAdapter = class _ElevenLabsConvAIAdapter {
       return;
     }
   }
+  /** Send a caller-side audio chunk to ConvAI as a base64 `user_audio_chunk`. */
   sendAudio(audioBytes) {
     if (!this.ws || this.ws.readyState !== WebSocket2.OPEN) return;
     this.ws.send(
@@ -541,9 +669,11 @@ var ElevenLabsConvAIAdapter = class _ElevenLabsConvAIAdapter {
       })
     );
   }
+  /** Register the event callback that receives ConvAI server messages. */
   onEvent(callback) {
     this.eventCallback = callback;
   }
+  /** Close the ConvAI WebSocket and release the event callback. */
   async close() {
     this.clearSilenceTimer();
     if (!this.ws) {
@@ -582,6 +712,7 @@ var ElevenLabsConvAIAdapter = class _ElevenLabsConvAIAdapter {
 };
 // src/provider-factory.ts
+init_esm_shims();
 async function createSTT(agent) {
   return agent.stt ?? null;
 }
@@ -590,44 +721,166 @@ async function createTTS(agent) {
 }
 // src/pricing.ts
+init_esm_shims();
+var PricingUnit = {
+  MINUTE: "minute",
+  THOUSAND_CHARS: "1k_chars",
+  TOKEN: "token"
+};
+function resolveProviderRates(providerConfig, model) {
+  if (!providerConfig) return { unit: "" };
+  const { models, ...base } = providerConfig;
+  if (!model || !models) return { ...base };
+  let override = models[model];
+  if (!override) {
+    let bestKey = "";
+    for (const key of Object.keys(models)) {
+      if (model.startsWith(key) && key.length > bestKey.length) {
+        bestKey = key;
+      }
+    }
+    if (bestKey) override = models[bestKey];
+  }
+  if (override) {
+    return { ...base, ...override };
+  }
+  return { ...base };
+}
 var DEFAULT_PRICING = {
-  // STT — per minute of audio processed
-  // Deepgram Nova-3 streaming (monolingual) — the default model Patter ships.
-  // The previous $0.0043/min was the batch rate; streaming is $0.0077/min per
-  // deepgram.com/pricing. For multilingual Nova-3 ($0.0092/min) override.
-  deepgram: { unit: "minute", price: 77e-4 },
-  whisper: { unit: "minute", price: 6e-3 },
+  // STT — per minute of audio processed.
+  deepgram: {
+    unit: PricingUnit.MINUTE,
+    // Default = Nova-3 streaming monolingual ($0.0077/min). Previous $0.0043
+    // was the batch rate; streaming is ~80% more expensive.
+    price: 77e-4,
+    models: {
+      "nova-3": { price: 77e-4 },
+      "nova-3-multilingual": { price: 92e-4 },
+      "nova-2": { price: 58e-4 },
+      nova: { price: 43e-4 },
+      "whisper-large": { price: 48e-4 },
+      "whisper-medium": { price: 48e-4 }
+    }
+  },
+  whisper: {
+    unit: PricingUnit.MINUTE,
+    // Default = whisper-1 REST ($0.006/min).
+    price: 6e-3,
+    models: {
+      "whisper-1": { price: 6e-3 },
+      "gpt-4o-transcribe": { price: 6e-3 },
+      "gpt-4o-mini-transcribe": { price: 3e-3 },
+      // Streaming Whisper variant for Realtime sessions.
+      "gpt-realtime-whisper": { price: 0.017 }
+    }
+  },
+  // OpenAI standalone transcription endpoint (separate provider_key from
+  // ``whisper`` so the dashboard can distinguish them).
+  openai_transcribe: {
+    unit: PricingUnit.MINUTE,
+    price: 6e-3,
+    models: {
+      "gpt-4o-transcribe": { price: 6e-3 },
+      "gpt-4o-mini-transcribe": { price: 3e-3 },
+      "whisper-1": { price: 6e-3 }
+    }
+  },
   // AssemblyAI Universal-Streaming — $0.15/hr = $0.0025/min
-  assemblyai: { unit: "minute", price: 25e-4 },
+  assemblyai: { unit: PricingUnit.MINUTE, price: 25e-4 },
   // Cartesia ink-whisper streaming STT — ~$0.15/hr on usage plans
-  cartesia_stt: { unit: "minute", price: 25e-4 },
+  cartesia_stt: { unit: PricingUnit.MINUTE, price: 25e-4 },
   // Soniox real-time STT — $0.12/hr = $0.002/min
-  soniox: { unit: "minute", price: 2e-3 },
+  soniox: { unit: PricingUnit.MINUTE, price: 2e-3 },
   // Speechmatics Pro tier — $0.24/hr = $0.0040/min (new users land here).
   // Previous $0.0173 default reflected a legacy Standard tier that was
   // retired; users were being over-billed ~4.3x.
-  speechmatics: { unit: "minute", price: 4e-3 },
+  speechmatics: { unit: PricingUnit.MINUTE, price: 4e-3 },
   // TTS — per 1,000 characters synthesized.
-  // ElevenLabs default model is eleven_flash_v2_5 billed at $0.06/1k via the
-  // direct API. The previous $0.18 matched only the Creator plan overage.
-  elevenlabs: { unit: "1k_chars", price: 0.06 },
-  openai_tts: { unit: "1k_chars", price: 0.015 },
-  openai_tts_hd: { unit: "1k_chars", price: 0.03 },
-  // Cartesia Sonic TTS — ~1 credit/char, effective $0.030/1k chars on usage plans
-  cartesia_tts: { unit: "1k_chars", price: 0.03 },
-  // Rime mist v2 — $0.030/1k chars pay-as-you-go
-  rime: { unit: "1k_chars", price: 0.03 },
-  // LMNT aurora/blizzard — $0.050/1k chars Indie overage
-  lmnt: { unit: "1k_chars", price: 0.05 },
-  // OpenAI Realtime — per token.
-  // Calibrated for gpt-4o-mini-realtime-preview (the Patter default):
-  //   audio  input  $10 / M  ->  0.00001 per token
-  //   audio  output $20 / M  ->  0.00002 per token
-  //   text   input  $0.60/ M ->  0.0000006 per token
-  //   text   output $2.40/ M ->  0.0000024 per token
-  // For gpt-4o-realtime-preview multiply by ~10, for gpt-realtime by ~3.
+  elevenlabs: {
+    unit: PricingUnit.THOUSAND_CHARS,
+    // Default = eleven_flash_v2_5 (Patter's default model) at $0.06/1k.
+    price: 0.06,
+    models: {
+      eleven_flash_v2_5: { price: 0.06 },
+      eleven_turbo_v2_5: { price: 0.05 },
+      eleven_multilingual_v2: { price: 0.18 },
+      eleven_monolingual_v1: { price: 0.18 },
+      eleven_v3: { price: 0.3 }
+    }
+  },
+  // ElevenLabs WebSocket streaming TTS shares pricing with REST.
+  elevenlabs_ws: {
+    unit: PricingUnit.THOUSAND_CHARS,
+    price: 0.06,
+    models: {
+      eleven_flash_v2_5: { price: 0.06 },
+      eleven_turbo_v2_5: { price: 0.05 },
+      eleven_multilingual_v2: { price: 0.18 },
+      eleven_v3: { price: 0.3 }
+    }
+  },
+  openai_tts: {
+    unit: PricingUnit.THOUSAND_CHARS,
+    // Default = tts-1 ($0.015/1k chars).
+    price: 0.015,
+    models: {
+      "tts-1": { price: 0.015 },
+      "tts-1-hd": { price: 0.03 },
+      // gpt-4o-mini-tts is billed by tokens upstream but published per
+      // 1k chars equivalent here for parity with the rest of the table.
+      "gpt-4o-mini-tts": { price: 0.012 }
+    }
+  },
+  // Legacy alias preserved for backward compat with users who set
+  // provider_key="openai_tts_hd" in their own adapters.
+  openai_tts_hd: { unit: PricingUnit.THOUSAND_CHARS, price: 0.03 },
+  cartesia_tts: {
+    unit: PricingUnit.THOUSAND_CHARS,
+    // Default = Sonic-2 (current Cartesia flagship) at ~$0.030/1k chars.
+    price: 0.03,
+    models: {
+      "sonic-2": { price: 0.03 },
+      "sonic-1": { price: 0.03 },
+      "sonic-english": { price: 0.03 },
+      "sonic-multilingual": { price: 0.03 }
+    }
+  },
+  rime: {
+    unit: PricingUnit.THOUSAND_CHARS,
+    // Default = mistv2 ($0.030/1k chars).
+    price: 0.03,
+    models: {
+      mistv2: { price: 0.03 },
+      mist: { price: 0.03 },
+      arcana: { price: 0.04 }
+    }
+  },
+  lmnt: {
+    unit: PricingUnit.THOUSAND_CHARS,
+    // Default = aurora ($0.050/1k chars).
+    price: 0.05,
+    models: {
+      aurora: { price: 0.05 },
+      blizzard: { price: 0.05 }
+    }
+  },
+  inworld: {
+    unit: PricingUnit.THOUSAND_CHARS,
+    // Default = inworld-tts-2 (placeholder rate — verify against tier).
+    price: 0.02,
+    models: {
+      "inworld-tts-2": { price: 0.02 },
+      "inworld-tts-1.5-max": { price: 0.025 },
+      "inworld-tts-1.5": { price: 0.025 }
+    }
+  },
+  // OpenAI Realtime — per token. Provider defaults match
+  // gpt-realtime-mini / gpt-4o-mini-realtime-preview (Patter's default).
+  // Per-model overrides under ``models`` are auto-resolved when the
+  // realtime adapter's model is threaded through ``calculateRealtimeCost``.
   openai_realtime: {
-    unit: "token",
+    unit: PricingUnit.TOKEN,
+    // Default rates: gpt-realtime-mini / gpt-4o-mini-realtime-preview
     audio_input_per_token: 1e-5,
     audio_output_per_token: 2e-5,
     text_input_per_token: 6e-7,
@@ -636,47 +889,119 @@ var DEFAULT_PRICING = {
     // text cached $0.06/M = 10% of full. OpenAI bills the cached portion of
     // input_token_details.audio_tokens / text_tokens at these reduced rates.
     cached_audio_input_per_token: 3e-7,
-    cached_text_input_per_token: 6e-8
+    cached_text_input_per_token: 6e-8,
+    models: {
+      // gpt-realtime (GA, August 2025): audio in $32/M, audio out $64/M,
+      // text in $4/M, text out $16/M, cached $0.40/M (audio + text).
+      "gpt-realtime": {
+        audio_input_per_token: 32e-6,
+        audio_output_per_token: 64e-6,
+        text_input_per_token: 4e-6,
+        text_output_per_token: 16e-6,
+        cached_audio_input_per_token: 4e-7,
+        cached_text_input_per_token: 4e-7
+      },
+      // gpt-realtime-2 (most-capable): audio in $32/M, audio out $64/M,
+      // text in $4/M, text out $24/M, cached $0.40/M (audio + text).
+      "gpt-realtime-2": {
+        audio_input_per_token: 32e-6,
+        audio_output_per_token: 64e-6,
+        text_input_per_token: 4e-6,
+        text_output_per_token: 24e-6,
+        cached_audio_input_per_token: 4e-7,
+        cached_text_input_per_token: 4e-7
+      },
+      // gpt-realtime-mini and gpt-4o-mini-realtime-preview share the
+      // provider defaults. Listed explicitly so tooling can introspect.
+      "gpt-realtime-mini": {
+        audio_input_per_token: 1e-5,
+        audio_output_per_token: 2e-5,
+        text_input_per_token: 6e-7,
+        text_output_per_token: 24e-7,
+        cached_audio_input_per_token: 3e-7,
+        cached_text_input_per_token: 6e-8
+      },
+      "gpt-4o-mini-realtime-preview": {
+        audio_input_per_token: 1e-5,
+        audio_output_per_token: 2e-5,
+        text_input_per_token: 6e-7,
+        text_output_per_token: 24e-7,
+        cached_audio_input_per_token: 3e-7,
+        cached_text_input_per_token: 6e-8
+      },
+      // gpt-4o-realtime-preview (legacy preview, ~10x mini for audio):
+      // audio in $100/M, audio out $200/M, text in $5/M, text out $20/M.
+      "gpt-4o-realtime-preview": {
+        audio_input_per_token: 1e-4,
+        audio_output_per_token: 2e-4,
+        text_input_per_token: 5e-6,
+        text_output_per_token: 2e-5,
+        cached_audio_input_per_token: 2e-6,
+        cached_text_input_per_token: 25e-7
+      }
+    }
   },
   // Telephony — per minute of call duration.
   // twilio default = US inbound local (the 99% case for voice agents receiving
   // calls on a local number). For US toll-free inbound ($0.022/min) or US
   // outbound local ($0.0140/min), override via Patter({ pricing: { twilio: {...} } }).
-  twilio: { unit: "minute", price: 85e-4 },
-  telnyx: { unit: "minute", price: 7e-3 }
+  twilio: { unit: PricingUnit.MINUTE, price: 85e-4 },
+  telnyx: { unit: PricingUnit.MINUTE, price: 7e-3 }
 };
+function cloneProviderEntry(entry) {
+  const out = { ...entry };
+  if (entry.models) {
+    const models = {};
+    for (const [mk, mv] of Object.entries(entry.models)) {
+      models[mk] = { ...mv };
+    }
+    out.models = models;
+  }
+  return out;
+}
 function mergePricing(overrides) {
   const merged = {};
   for (const [k, v] of Object.entries(DEFAULT_PRICING)) {
-    merged[k] = { ...v };
+    merged[k] = cloneProviderEntry(v);
   }
   if (!overrides) return merged;
   for (const [provider2, values] of Object.entries(overrides)) {
-    if (merged[provider2]) {
-      merged[provider2] = { ...merged[provider2], ...values };
-    } else {
-      merged[provider2] = { ...values };
+    if (!merged[provider2]) {
+      merged[provider2] = cloneProviderEntry(values);
+      continue;
+    }
+    const target = merged[provider2];
+    for (const [k, v] of Object.entries(values)) {
+      if (k === "models" && v && typeof v === "object" && target.models && typeof target.models === "object") {
+        const mergedModels = { ...target.models };
+        for (const [mk, mv] of Object.entries(v)) {
+          mergedModels[mk] = { ...mv };
+        }
+        target.models = mergedModels;
+      } else {
+        target[k] = v;
+      }
     }
   }
   return merged;
 }
-function calculateSttCost(provider2, audioSeconds, pricing) {
-  const config = pricing[provider2];
-  if (!config || config.unit !== "minute") return 0;
-  return audioSeconds / 60 * (config.price ?? 0);
+function calculateSttCost(provider2, audioSeconds, pricing, model) {
+  const rates = resolveProviderRates(pricing[provider2], model);
+  if (rates.unit !== "minute") return 0;
+  return audioSeconds / 60 * (rates.price ?? 0);
 }
-function calculateTtsCost(provider2, characterCount, pricing) {
-  const config = pricing[provider2];
-  if (!config || config.unit !== "1k_chars") return 0;
-  return characterCount / 1e3 * (config.price ?? 0);
+function calculateTtsCost(provider2, characterCount, pricing, model) {
+  const rates = resolveProviderRates(pricing[provider2], model);
+  if (rates.unit !== "1k_chars") return 0;
+  return characterCount / 1e3 * (rates.price ?? 0);
 }
-function calculateRealtimeCost(usage, pricing) {
-  const config = pricing.openai_realtime;
-  if (!config || config.unit !== "token") return 0;
+function calculateRealtimeCost(usage, pricing, model) {
+  const rates = resolveProviderRates(pricing.openai_realtime, model);
+  if (rates.unit !== "token") return 0;
   const input = usage.input_token_details ?? {};
   const output = usage.output_token_details ?? {};
-  const cachedAudioRate = config.cached_audio_input_per_token ?? config.audio_input_per_token ?? 0;
-  const cachedTextRate = config.cached_text_input_per_token ?? config.text_input_per_token ?? 0;
+  const cachedAudioRate = rates.cached_audio_input_per_token ?? rates.audio_input_per_token ?? 0;
+  const cachedTextRate = rates.cached_text_input_per_token ?? rates.text_input_per_token ?? 0;
   const totalAudioIn = input.audio_tokens ?? 0;
   const totalTextIn = input.text_tokens ?? 0;
   let cachedAudioIn;
@@ -695,25 +1020,25 @@ function calculateRealtimeCost(usage, pricing) {
     cachedTextIn = 0;
   }
   let cost = 0;
-  cost += (totalAudioIn - cachedAudioIn) * (config.audio_input_per_token ?? 0);
+  cost += (totalAudioIn - cachedAudioIn) * (rates.audio_input_per_token ?? 0);
   cost += cachedAudioIn * cachedAudioRate;
-  cost += (totalTextIn - cachedTextIn) * (config.text_input_per_token ?? 0);
+  cost += (totalTextIn - cachedTextIn) * (rates.text_input_per_token ?? 0);
   cost += cachedTextIn * cachedTextRate;
-  cost += (output.audio_tokens ?? 0) * (config.audio_output_per_token ?? 0);
-  cost += (output.text_tokens ?? 0) * (config.text_output_per_token ?? 0);
+  cost += (output.audio_tokens ?? 0) * (rates.audio_output_per_token ?? 0);
+  cost += (output.text_tokens ?? 0) * (rates.text_output_per_token ?? 0);
   return Math.max(0, cost);
 }
-function calculateRealtimeCachedSavings(usage, pricing) {
-  const config = pricing.openai_realtime;
-  if (!config || config.unit !== "token") return 0;
+function calculateRealtimeCachedSavings(usage, pricing, model) {
+  const rates = resolveProviderRates(pricing.openai_realtime, model);
+  if (rates.unit !== "token") return 0;
   const input = usage.input_token_details ?? {};
   const cached = input.cached_tokens_details ?? {};
-  const cachedAudioRate = config.cached_audio_input_per_token ?? config.audio_input_per_token ?? 0;
-  const cachedTextRate = config.cached_text_input_per_token ?? config.text_input_per_token ?? 0;
+  const cachedAudioRate = rates.cached_audio_input_per_token ?? rates.audio_input_per_token ?? 0;
+  const cachedTextRate = rates.cached_text_input_per_token ?? rates.text_input_per_token ?? 0;
   const cachedAudio = Math.min(cached.audio_tokens ?? 0, input.audio_tokens ?? 0);
   const cachedText = Math.min(cached.text_tokens ?? 0, input.text_tokens ?? 0);
-  const fullAudio = cachedAudio * (config.audio_input_per_token ?? 0);
-  const fullText = cachedText * (config.text_input_per_token ?? 0);
+  const fullAudio = cachedAudio * (rates.audio_input_per_token ?? 0);
+  const fullText = cachedText * (rates.text_input_per_token ?? 0);
   const discountedAudio = cachedAudio * cachedAudioRate;
   const discountedText = cachedText * cachedTextRate;
   return Math.max(0, fullAudio + fullText - (discountedAudio + discountedText));
@@ -745,14 +1070,32 @@ var llmPricing = {
     "gemini-live-2.5-flash-native-audio": { input: 0.3, output: 2.5 }
   },
   groq: {
+    // Rates as of 2026-05-08; verify against groq.com/pricing.
+    // ``llama-3.3-70b-versatile`` is the Patter default for Groq. The
+    // remaining models are reachable via ``model: "..."`` and were silently
+    // billing $0 before this entry was added (silent under-billing).
     "llama-3.3-70b-versatile": { input: 0.59, output: 0.79 },
-    "llama-3.1-8b-instant": { input: 0.05, output: 0.08 }
+    "llama-3.1-8b-instant": { input: 0.05, output: 0.08 },
+    "llama-3.3-70b-specdec": { input: 0.59, output: 0.99 },
+    "llama3-70b-8192": { input: 0.59, output: 0.79 },
+    "llama3-8b-8192": { input: 0.05, output: 0.08 },
+    "mixtral-8x7b-32768": { input: 0.27, output: 0.27 },
+    "gemma2-9b-it": { input: 0.2, output: 0.2 }
   },
   cerebras: {
+    // Rates as of 2026-05-08; verify against cerebras.net/inference.
+    // ``gpt-oss-120b`` is the Patter default for Cerebras (set in 0.5.4).
+    // On WSE-3 hardware every model size saturates the downstream TTS
+    // consumption rate (~150-300 tok/sec), so the 120B price stays in line
+    // with the 70B tier rather than scaling with weight count.
+    "gpt-oss-120b": { input: 0.85, output: 1.2 },
+    "llama3.1-8b": { input: 0.1, output: 0.2 },
     "llama-3.3-70b": { input: 0.85, output: 1.2 },
-    "qwen-3-32b": { input: 0.4, output: 0.8 }
+    "qwen-3-32b": { input: 0.4, output: 0.8 },
+    "qwen-3-235b-a22b-instruct-2507": { input: 1, output: 1.5 },
+    "zai-glm-4.7": { input: 0.85, output: 1.2 }
   },
-  // OpenAI Chat Completions (non-Realtime) — mirrors sdk-py pricing table.
+  // OpenAI Chat Completions (non-Realtime) — mirrors the Python SDK pricing table.
   // Rates are per 1M tokens (USD), cache_read = cached input rate.
   openai: {
     "gpt-4o": { input: 2.5, output: 10, cache_read: 1.25 },
@@ -792,6 +1135,7 @@ function calculateTelephonyCost(provider2, durationSeconds, pricing) {
 }
 // src/dashboard/store.ts
+init_esm_shims();
 import { EventEmitter } from "events";
 import * as fs from "fs";
 import * as path from "path";
@@ -812,6 +1156,7 @@ var MetricsStore = class extends EventEmitter {
   publish(eventType, data) {
     this.emit("sse", { type: eventType, data });
   }
+  /** Mark a call as in-progress (creates the row if it does not yet exist). */
   recordCallStart(data) {
     const callId = data.call_id || "";
     if (!callId) return;
@@ -909,6 +1254,7 @@ var MetricsStore = class extends EventEmitter {
     }
     this.publish("call_status", { call_id: callId, status, ...extra });
   }
+  /** Append a single conversation turn to an active call and broadcast it via SSE. */
   recordTurn(data) {
     const callId = data.call_id || "";
     const turn = data.turn;
@@ -920,6 +1266,7 @@ var MetricsStore = class extends EventEmitter {
     }
     this.publish("turn_complete", { call_id: callId, turn });
   }
+  /** Move a call from active to completed and persist its final metrics. */
   recordCallEnd(data, metrics) {
     const callId = data.call_id || "";
     if (!callId) return;
@@ -947,10 +1294,12 @@ var MetricsStore = class extends EventEmitter {
       metrics: entry.metrics ?? null
     });
   }
+  /** Return a window of completed calls in newest-first order. */
   getCalls(limit = 50, offset = 0) {
     const ordered = [...this.calls].reverse();
     return ordered.slice(offset, offset + limit);
   }
+  /** Look up a completed call by id (newest match wins). */
   getCall(callId) {
     for (let i = this.calls.length - 1; i >= 0; i--) {
       if (this.calls[i].call_id === callId) return this.calls[i];
@@ -961,9 +1310,11 @@ var MetricsStore = class extends EventEmitter {
   getActive(callId) {
     return this.activeCalls.get(callId);
   }
+  /** Return all currently active (not yet ended) calls. */
   getActiveCalls() {
     return Array.from(this.activeCalls.values());
   }
+  /** Compute summary statistics across the buffered call history. */
   getAggregates() {
     const totalCalls = this.calls.length;
     if (totalCalls === 0) {
@@ -1015,6 +1366,7 @@ var MetricsStore = class extends EventEmitter {
       active_calls: this.activeCalls.size
     };
   }
+  /** Return calls whose `started_at` falls within `[fromTs, toTs]` (Unix seconds). */
   getCallsInRange(fromTs = 0, toTs = 0) {
     return this.calls.filter((call) => {
       const started = call.started_at || 0;
@@ -1023,6 +1375,7 @@ var MetricsStore = class extends EventEmitter {
       return true;
     });
   }
+  /** Number of completed calls currently in the ring buffer. */
   get callCount() {
     return this.calls.length;
   }
@@ -1123,7 +1476,11 @@ function parseTimestamp(raw) {
   return null;
 }
+// src/dashboard/routes.ts
+init_esm_shims();
 // src/dashboard/auth.ts
+init_esm_shims();
 import crypto from "crypto";
 function timingSafeCompare(a, b) {
   const aBuf = Buffer.from(a);
@@ -1156,6 +1513,7 @@ function makeAuthMiddleware(token = "") {
 }
 // src/dashboard/export.ts
+init_esm_shims();
 function callsToCsv(calls) {
   const header = [
     "call_id",
@@ -1213,630 +1571,33 @@ function csvEscape(value) {
 }
 // src/dashboard/ui.ts
-var DASHBOARD_HTML = `<!DOCTYPE html>
-<html lang="en">
-<head>
-<meta charset="utf-8">
-<meta name="viewport" content="width=device-width, initial-scale=1">
-<title>Patter | Dashboard</title>
-<link rel="icon" type="image/svg+xml" href="data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 1188 1773' fill='none'%3E%3Cstyle%3Epath%7Bstroke:%2309090b%7D@media(prefers-color-scheme:dark)%7Bpath%7Bstroke:%23e4e4e7%7D%7D%3C/style%3E%3Cpath d='M25 561L245 694M25 561V818M245 694V951M25 961V1218M25 1357V1614M245 1489V1747M245 1093V1351M942 823V1080M1161 955V1213M1162 555V812M942 422V679M669 585V843L787 913M942 25V282M1162 158V415M25 818L245 951M244 1094L464 962M25 961L143 890M244 1352L464 1219M942 823L1162 956M942 679L1162 812M721 811L942 679M669 842L724 809M669 586L724 553M1041 883L1162 812M245 1747L1161 1213M244 1490L942 1080M25 1357L142 1289M518 1071L942 823M721 555L942 422M942 422L1162 556M942 282L1162 415M942 25L1162 158M942 1080L1161 1213M25 1218L245 1351M25 961L245 1094M464 962L519 929M464 1219L519 1186V928L403 859M25 1357L245 1490M25 1614L245 1747M25 561L942 25M244 694L941 282M1043 484L1162 415M245 951L668 704' stroke-width='50' stroke-linecap='round'/%3E%3C/svg%3E">
-<link rel="preconnect" href="https://fonts.googleapis.com">
-<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
-<link href="https://fonts.googleapis.com/css2?family=Instrument+Sans:wght@400;500;600;700&family=JetBrains+Mono:wght@400;500&display=swap" rel="stylesheet">
-<style>
-  :root {
-    --bg: #fdfcfc;
-    --fg: #09090b;
-    --card: #ffffff;
-    --primary: #18181b;
-    --primary-fg: #fafafa;
-    --secondary: #f4f4f5;
-    --muted: #71717b;
-    --border: #e4e4e7;
-    --border-d: #d4d4d8;
-    --green: #22c55e;
-    --red: #ef4444;
-    --blue: #3b82f6;
-    --purple: #a78bfa;
-    --orange: #fb923c;
-    --yellow: #eab308;
-    --radius: 12px;
-    --font: 'Instrument Sans', ui-sans-serif, system-ui, sans-serif;
-    --mono: 'JetBrains Mono', ui-monospace, SFMono-Regular, Menlo, monospace;
-    --header-bg: #fff;
-    --assistant-bubble: #f0eeff;
-  }
-  @media (prefers-color-scheme: dark) {
-    :root {
-      --bg: #151518;
-      --fg: #e4e4e7;
-      --card: #1c1c21;
-      --primary: #e4e4e7;
-      --primary-fg: #18181b;
-      --secondary: #232329;
-      --muted: #8b8b95;
-      --border: #2c2c33;
-      --border-d: #3a3a44;
-      --green: #34d399;
-      --red: #f87171;
-      --blue: #60a5fa;
-      --purple: #c4b5fd;
-      --orange: #fdba74;
-      --yellow: #fbbf24;
-      --header-bg: #1a1a1f;
-      --assistant-bubble: #252230;
-    }
-  }
-  * { margin:0; padding:0; box-sizing:border-box; }
-  html { -webkit-font-smoothing: antialiased; }
-  body {
-    font-family: var(--font);
-    font-size: 15px;
-    line-height: 1.6;
-    color: var(--fg);
-    background: var(--bg);
-    min-height: 100vh;
-  }
-  /* Header */
-  header {
-    position: sticky; top: 0; z-index: 100;
-    background: var(--header-bg);
-    border-bottom: 1px solid var(--border);
-    padding: 0 24px;
-    height: 56px;
-    display: flex; align-items: center; gap: 14px;
-  }
-  .logo {
-    display: flex; align-items: center; gap: 10px;
-    font-weight: 700; font-size: 18px; letter-spacing: -0.02em;
-    text-decoration: none; color: var(--fg);
-  }
-  .logo svg { width: 22px; height: 22px; }
-  .header-sep {
-    width: 1px; height: 20px; background: var(--border-d); margin: 0 2px;
-  }
-  .header-title {
-    font-size: 14px; font-weight: 500; color: var(--muted);
-  }
-  .badge-beta {
-    font-size: 10px; font-weight: 600; letter-spacing: 0.5px;
-    color: #e67e22; background: rgba(230,126,34,0.1);
-    border: 1px solid rgba(230,126,34,0.25);
-    padding: 2px 8px; border-radius: 100px; text-transform: uppercase;
-  }
-  .status {
-    margin-left: auto; font-size: 13px; color: var(--muted);
-    display: flex; align-items: center; gap: 6px;
-  }
-  .dot {
-    width: 7px; height: 7px; border-radius: 50%;
-    background: var(--green); display: inline-block;
-  }
-  /* Layout */
-  .container { max-width: 1200px; margin: 0 auto; padding: 24px; }
-  /* Stat cards */
-  .cards {
-    display: grid; grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
-    gap: 14px; margin-bottom: 28px;
-  }
-  .card {
-    background: var(--card);
-    border: 1px solid var(--border);
-    border-radius: var(--radius);
-    padding: 18px 20px;
-  }
-  .card .label {
-    font-size: 12px; color: var(--muted);
-    text-transform: uppercase; letter-spacing: 0.5px; font-weight: 500;
-  }
-  .card .value {
-    font-size: 28px; font-weight: 700; margin-top: 4px;
-    font-family: var(--mono); letter-spacing: -0.02em;
-  }
-  .card .sub { font-size: 12px; color: var(--muted); margin-top: 2px; }
-  /* Tabs */
-  .nav-tabs {
-    display: flex; gap: 0; margin-bottom: 16px;
-    border-bottom: 1px solid var(--border);
-  }
-  .nav-tab {
-    padding: 10px 20px; font-size: 13px; font-weight: 500;
-    color: var(--muted); cursor: pointer;
-    border: none; background: none;
-    border-bottom: 2px solid transparent;
-    margin-bottom: -1px; font-family: var(--font);
-    transition: color .15s;
-  }
-  .nav-tab:hover { color: var(--fg); }
-  .nav-tab.active { color: var(--fg); border-bottom-color: var(--primary); }
-  .tab-content { display: none; }
-  .tab-content.active { display: block; }
-  /* Tables */
-  table {
-    width: 100%; border-collapse: collapse;
-    background: var(--card);
-    border: 1px solid var(--border);
-    border-radius: var(--radius);
-    overflow: hidden;
-  }
-  th {
-    text-align: left; font-size: 11px; text-transform: uppercase;
-    color: var(--muted); padding: 12px 16px;
-    border-bottom: 1px solid var(--border);
-    letter-spacing: 0.5px; font-weight: 600;
-    background: var(--secondary);
-  }
-  td {
-    padding: 12px 16px; border-bottom: 1px solid var(--border);
-    font-size: 13px;
-  }
-  tr:last-child td { border-bottom: none; }
-  tr.clickable { cursor: pointer; transition: background .1s; }
-  tr.clickable:hover { background: var(--secondary); }
-  code {
-    font-family: var(--mono); font-size: 12px;
-    background: var(--secondary); padding: 2px 6px;
-    border-radius: 4px;
-  }
-  /* Badges */
-  .badge {
-    display: inline-block; padding: 3px 10px; border-radius: 100px;
-    font-size: 11px; font-weight: 600;
-  }
-  .badge-active { background: rgba(34,197,94,0.1); color: #16a34a; }
-  .badge-ended { background: var(--secondary); color: var(--muted); }
-  .badge-pipeline { background: rgba(167,139,250,0.1); color: #7c3aed; }
-  .badge-realtime { background: rgba(59,130,246,0.1); color: #2563eb; }
-  .cost { color: #16a34a; font-family: var(--mono); font-size: 13px; }
-  .latency { color: #ca8a04; font-family: var(--mono); font-size: 13px; }
-  @media (prefers-color-scheme: dark) {
-    .cost { color: var(--green); }
-    .latency { color: var(--yellow); }
-    code { background: var(--secondary); color: var(--fg); }
-  }
-  .empty {
-    text-align: center; padding: 48px; color: var(--muted);
-    font-size: 14px;
-  }
-  /* Modal */
-  .modal-overlay {
-    display: none; position: fixed; inset: 0;
-    background: rgba(0,0,0,0.4); backdrop-filter: blur(6px);
-    z-index: 200;
-    justify-content: center; align-items: flex-start;
-    padding: 48px 20px; overflow-y: auto;
-  }
-  .modal-overlay.open { display: flex; }
-  .modal {
-    background: var(--card);
-    border: 1px solid var(--border);
-    border-radius: 16px;
-    max-width: 820px; width: 100%;
-    padding: 0;
-    box-shadow: 0 24px 64px rgba(0,0,0,0.12), 0 0 0 1px rgba(0,0,0,0.03);
-    overflow: hidden;
-  }
-  .modal-header {
-    display: flex; justify-content: space-between; align-items: center;
-    padding: 20px 28px;
-    border-bottom: 1px solid var(--border);
-    background: var(--bg);
-  }
-  .modal-header h2 { font-size: 15px; font-weight: 600; display: flex; align-items: center; gap: 10px; }
-  .modal-close {
-    background: none; border: 1px solid var(--border);
-    color: var(--muted); width: 30px; height: 30px;
-    border-radius: 8px; font-size: 16px; cursor: pointer;
-    display: flex; align-items: center; justify-content: center;
-    transition: all .15s;
-  }
-  .modal-close:hover { background: var(--secondary); color: var(--fg); }
-  .modal-body { padding: 24px 28px; }
-  .detail-grid {
-    display: grid; grid-template-columns: 1fr 1fr;
-    gap: 14px; margin-bottom: 20px;
-  }
-  .detail-card {
-    background: var(--bg);
-    border: 1px solid var(--border);
-    border-radius: var(--radius); padding: 16px 18px;
-  }
-  .detail-card h3 {
-    font-size: 11px; color: var(--muted);
-    text-transform: uppercase; letter-spacing: 0.5px;
-    margin-bottom: 10px; font-weight: 600;
-  }
-  .detail-row {
-    display: flex; justify-content: space-between; align-items: baseline;
-    font-size: 13px; padding: 5px 0;
-  }
-  .detail-row .k { color: var(--muted); font-weight: 500; }
-  .detail-row span:last-child { font-weight: 500; text-align: right; }
-  .detail-row .mono { font-family: var(--mono); font-size: 12px; }
-  .detail-sep {
-    border-top: 1px solid var(--border); padding-top: 8px; margin-top: 6px;
-  }
-  .transcript-box {
-    border: 1px solid var(--border);
-    border-radius: var(--radius);
-    padding: 16px; max-height: 340px; overflow-y: auto;
-    background: var(--bg);
-  }
-  .transcript-box .msg {
-    padding: 8px 12px; border-radius: 10px; font-size: 13px;
-    max-width: 85%; margin-bottom: 6px; line-height: 1.5;
-  }
-  .transcript-box .msg.user {
-    background: var(--secondary); margin-left: auto;
-    border-bottom-right-radius: 4px;
-  }
-  .transcript-box .msg.assistant {
-    background: var(--assistant-bubble); margin-right: auto;
-    border-bottom-left-radius: 4px;
-  }
-  .transcript-box .role {
-    font-weight: 600; font-size: 11px; text-transform: uppercase;
-    letter-spacing: 0.3px; display: block; margin-bottom: 2px;
-  }
-  .transcript-box .msg.user .role { color: var(--blue); }
-  .transcript-box .msg.assistant .role { color: #7c3aed; }
-  /* Turn bars */
-  .turns-table { margin-top: 16px; }
-  .turns-table table { border: 1px solid var(--border); }
-  .bar-container { display: flex; height: 14px; border-radius: 4px; overflow: hidden; min-width: 120px; }
-  .bar-stt { background: var(--blue); }
-  .bar-llm { background: var(--purple); }
-  .bar-tts { background: var(--orange); }
-</style>
-</head>
-<body>
-<header>
-  <a href="/" class="logo">
-    <svg viewBox="0 0 1188 1773" fill="none" xmlns="http://www.w3.org/2000/svg">
-      <path d="M25 561L245 694M25 561V818M245 694V951M25 961V1218M25 1357V1614M245 1489V1747M245 1093V1351M942 823V1080M1161 955V1213M1162 555V812M942 422V679M669 585V843L787 913M942 25V282M1162 158V415M25 818L245 951M244 1094L464 962M25 961L143 890M244 1352L464 1219M942 823L1162 956M942 679L1162 812M721 811L942 679M669 842L724 809M669 586L724 553M1041 883L1162 812M245 1747L1161 1213M244 1490L942 1080M25 1357L142 1289M518 1071L942 823M721 555L942 422M942 422L1162 556M942 282L1162 415M942 25L1162 158M942 1080L1161 1213M25 1218L245 1351M25 961L245 1094M464 962L519 929M464 1219L519 1186V928L403 859M25 1357L245 1490M25 1614L245 1747M25 561L942 25M244 694L941 282M1043 484L1162 415M245 951L668 704" stroke="currentColor" stroke-width="50" stroke-linecap="round"/>
-    </svg>
-    Patter
-  </a>
-  <div class="header-sep"></div>
-  <span class="header-title">Dashboard</span>
-  <span class="badge-beta">Beta</span>
-  <div class="status"><span class="dot"></span> <span id="status-text">Listening</span></div>
-</header>
-<div class="container">
-  <div class="cards">
-    <div class="card">
-      <div class="label">Total Calls</div>
-      <div class="value" id="stat-total">0</div>
-      <div class="sub"><span id="stat-active">0</span> active</div>
-    </div>
-    <div class="card">
-      <div class="label">Total Cost</div>
-      <div class="value cost" id="stat-cost">$0.00</div>
-      <div class="sub" id="stat-cost-breakdown">-</div>
-    </div>
-    <div class="card">
-      <div class="label">Avg Duration</div>
-      <div class="value" id="stat-duration">0s</div>
-    </div>
-    <div class="card">
-      <div class="label">Avg Latency</div>
-      <div class="value latency" id="stat-latency">0ms</div>
-      <div class="sub">end-to-end response</div>
-    </div>
-  </div>
-  <div class="nav-tabs">
-    <button class="nav-tab active" data-tab="calls">Calls</button>
-    <button class="nav-tab" data-tab="active">Active</button>
-  </div>
-  <div class="tab-content active" id="tab-calls">
-    <div class="section">
-      <table id="calls-table">
-        <thead>
-          <tr>
-            <th>Call ID</th><th>Direction</th><th>From / To</th>
-            <th>Duration</th><th>Mode</th><th>Cost</th><th>Avg Latency</th><th>Turns</th>
-          </tr>
-        </thead>
-        <tbody id="calls-body">
-          <tr><td colspan="8" class="empty">No calls yet. Waiting for incoming calls...</td></tr>
-        </tbody>
-      </table>
-    </div>
-  </div>
-  <div class="tab-content" id="tab-active">
-    <div class="section">
-      <table>
-        <thead>
-          <tr><th>Call ID</th><th>Caller</th><th>Callee</th><th>Direction</th><th>Duration</th><th>Turns</th></tr>
-        </thead>
-        <tbody id="active-body">
-          <tr><td colspan="6" class="empty">No active calls</td></tr>
-        </tbody>
-      </table>
-    </div>
-  </div>
-</div>
-<div class="modal-overlay" id="modal">
-  <div class="modal">
-    <div class="modal-header">
-      <h2 id="modal-title">Call Detail</h2>
-      <button class="modal-close" onclick="closeModal()">&times;</button>
-    </div>
-    <div class="modal-body" id="modal-body"></div>
-  </div>
-</div>
-<script>
-var _$ = function(s) { return document.querySelector(s); };
-var _$$ = function(s) { return document.querySelectorAll(s); };
-_$$('.nav-tab').forEach(function(tab) {
-  tab.addEventListener('click', function() {
-    _$$('.nav-tab').forEach(function(t) { t.classList.remove('active'); });
-    _$$('.tab-content').forEach(function(t) { t.classList.remove('active'); });
-    tab.classList.add('active');
-    document.querySelector('#tab-'+tab.dataset.tab).classList.add('active');
-  });
-});
-function esc(s) {
-  if (!s) return '';
-  return String(s).replace(/&/g,'&amp;').replace(/</g,'&lt;').replace(/>/g,'&gt;').replace(/"/g,'&quot;').replace(/'/g,'&#39;');
-}
-function fmtCost(v) { return v >= 0.01 ? '$'+v.toFixed(4) : v > 0 ? '$'+v.toFixed(6) : '$0.00'; }
-function fmtMs(v) { return v != null && v >= 0 ? Math.round(v)+'ms' : '-'; }
-function fmtDur(s) {
-  if (s == null || s < 0) return '-';
-  if (s < 60) return Math.round(s)+'s';
-  return Math.floor(s/60)+'m '+Math.round(s%60)+'s';
-}
-function shortId(id) { return id ? esc(id.length > 16 ? id.slice(0,8)+'...'+id.slice(-4) : id) : '-'; }
-function fetchJSON(url) {
-  return fetch(url).then(function(r) { return r.json(); });
-}
-function refreshAggregates() {
-  return fetchJSON('/api/dashboard/aggregates').then(function(d) {
-    _$('#stat-total').textContent = d.total_calls;
-    _$('#stat-active').textContent = d.active_calls;
-    _$('#stat-cost').textContent = fmtCost(d.total_cost);
-    var cb = d.cost_breakdown;
-    _$('#stat-cost-breakdown').textContent =
-      'STT '+fmtCost(cb.stt)+' | LLM '+fmtCost(cb.llm)+' | TTS '+fmtCost(cb.tts)+' | Tel '+fmtCost(cb.telephony);
-    _$('#stat-duration').textContent = fmtDur(d.avg_duration);
-    _$('#stat-latency').textContent = fmtMs(d.avg_latency_ms);
-  });
-}
-function refreshCalls() {
-  return fetchJSON('/api/dashboard/calls?limit=50').then(function(calls) {
-    var body = _$('#calls-body');
-    if (!calls.length) {
-      body.innerHTML = '<tr><td colspan="8" class="empty">No calls yet. Waiting for incoming calls...</td></tr>';
-      return;
-    }
-    body.innerHTML = calls.map(function(c) {
-      var m = c.metrics || {};
-      var cost = m.cost || {};
-      var lat = m.latency_avg || {};
-      var mode = m.provider_mode || '-';
-      var turns = m.turns ? m.turns.length : 0;
-      var modeClass = mode === 'pipeline' ? 'badge-pipeline' : 'badge-realtime';
-      return '<tr class="clickable" onclick="showCall(\\''+esc(c.call_id)+'\\')">'+
-        '<td><code>'+shortId(c.call_id)+'</code></td>'+
-        '<td>'+(esc(c.direction) || '-')+'</td>'+
-        '<td>'+(esc(c.caller) || '-')+' &rarr; '+(esc(c.callee) || '-')+'</td>'+
-        '<td>'+fmtDur(m.duration_seconds)+'</td>'+
-        '<td><span class="badge '+modeClass+'">'+esc(mode)+'</span></td>'+
-        '<td class="cost">'+fmtCost(cost.total || 0)+'</td>'+
-        '<td class="latency">'+fmtMs(lat.total_ms || 0)+'</td>'+
-        '<td>'+turns+'</td></tr>';
-    }).join('');
-  });
-}
-function refreshActive() {
-  return fetchJSON('/api/dashboard/active').then(function(active) {
-    var body = _$('#active-body');
-    if (!active.length) {
-      body.innerHTML = '<tr><td colspan="6" class="empty">No active calls</td></tr>';
-      return;
-    }
-    var now = Date.now() / 1000;
-    body.innerHTML = active.map(function(c) {
-      var dur = c.started_at ? Math.round(now - c.started_at) : 0;
-      var turns = c.turns ? c.turns.length : 0;
-      return '<tr>'+
-        '<td><code>'+shortId(c.call_id)+'</code></td>'+
-        '<td>'+(esc(c.caller) || '-')+'</td>'+
-        '<td>'+(esc(c.callee) || '-')+'</td>'+
-        '<td>'+(esc(c.direction) || '-')+'</td>'+
-        '<td data-started="'+(c.started_at || 0)+'">'+fmtDur(dur)+'</td>'+
-        '<td>'+turns+'</td></tr>';
-    }).join('');
-  });
-}
-function showCall(callId) {
-  fetchJSON('/api/dashboard/calls/'+encodeURIComponent(callId)).then(function(c) {
-    if (c.error) return;
-    var m = c.metrics || {};
-    var cost = m.cost || {};
-    var latAvg = m.latency_avg || {};
-    var latP95 = m.latency_p95 || {};
-    var turns = m.turns || [];
-    var modeLabel = (m.provider_mode || '').replace(/_/g, ' ');
-    var modeBadgeClass = (m.provider_mode || '').indexOf('pipeline') !== -1 ? 'badge-pipeline' : 'badge-realtime';
-    _$('#modal-title').innerHTML = 'Call <code>'+shortId(c.call_id)+'</code> <span class="badge '+modeBadgeClass+'" style="font-size:10px">'+esc(modeLabel)+'</span>';
-    var isRealtime = (m.provider_mode || '').indexOf('realtime') !== -1;
-    var html = '<div class="detail-grid">'+
-      '<div class="detail-card">'+
-        '<h3>Overview</h3>'+
-        '<div class="detail-row"><span class="k">Direction</span><span>'+(esc(c.direction) || '-')+'</span></div>'+
-        '<div class="detail-row"><span class="k">From</span><span class="mono">'+(esc(c.caller) || '-')+'</span></div>'+
-        '<div class="detail-row"><span class="k">To</span><span class="mono">'+(esc(c.callee) || '-')+'</span></div>'+
-        '<div class="detail-row"><span class="k">Duration</span><span style="font-weight:600">'+fmtDur(m.duration_seconds)+'</span></div>'+
-        (isRealtime ? '' :
-          '<div class="detail-row"><span class="k">STT</span><span>'+(esc(m.stt_provider) || '-')+'</span></div>'+
-          '<div class="detail-row"><span class="k">TTS</span><span>'+(esc(m.tts_provider) || '-')+'</span></div>'+
-          '<div class="detail-row"><span class="k">LLM</span><span>'+(esc(m.llm_provider) || '-')+'</span></div>'
-        )+
-        '<div class="detail-row"><span class="k">Telephony</span><span>'+(esc(m.telephony_provider) || '-')+'</span></div>'+
-      '</div>'+
-      '<div class="detail-card">'+
-        '<h3>Cost</h3>'+
-        (isRealtime ?
-          '<div class="detail-row"><span class="k">OpenAI</span><span class="cost">'+fmtCost(cost.llm || 0)+'</span></div>' :
-          '<div class="detail-row"><span class="k">STT</span><span class="cost">'+fmtCost(cost.stt || 0)+'</span></div>'+
-          '<div class="detail-row"><span class="k">LLM</span><span class="cost">'+fmtCost(cost.llm || 0)+'</span></div>'+
-          '<div class="detail-row"><span class="k">TTS</span><span class="cost">'+fmtCost(cost.tts || 0)+'</span></div>'
-        )+
-        '<div class="detail-row"><span class="k">Telephony</span><span class="cost">'+fmtCost(cost.telephony || 0)+'</span></div>'+
-        '<div class="detail-row detail-sep">'+
-          '<span class="k" style="font-weight:600">Total</span><span class="cost" style="font-weight:700;font-size:14px">'+fmtCost(cost.total || 0)+'</span>'+
-        '</div>'+
-        '<h3 style="margin-top:16px">Latency <span style="font-weight:400;text-transform:none;letter-spacing:0;color:var(--muted)">(avg / p95)</span></h3>'+
-        (isRealtime ? '' :
-          '<div class="detail-row"><span class="k">STT</span><span class="latency">'+fmtMs(latAvg.stt_ms)+' / '+fmtMs(latP95.stt_ms)+'</span></div>'+
-          '<div class="detail-row"><span class="k">LLM</span><span class="latency">'+fmtMs(latAvg.llm_ms)+' / '+fmtMs(latP95.llm_ms)+'</span></div>'+
-          '<div class="detail-row"><span class="k">TTS</span><span class="latency">'+fmtMs(latAvg.tts_ms)+' / '+fmtMs(latP95.tts_ms)+'</span></div>'
-        )+
-        '<div class="detail-row"><span class="k">'+(isRealtime ? 'End-to-end' : 'Total')+'</span><span class="latency" style="font-weight:700;font-size:14px">'+fmtMs(latAvg.total_ms)+' / '+fmtMs(latP95.total_ms)+'</span></div>'+
-      '</div></div>';
-    if (turns.length) {
-      var maxMs = Math.max.apply(null, turns.map(function(t) {
-        var l = t.latency || {};
-        return (l.stt_ms||0) + (l.llm_ms||0) + (l.tts_ms||0) + (l.total_ms||0);
-      }).concat([1]));
-      html += '<div class="detail-card turns-table"><h3>Turns ('+turns.length+')</h3>'+
-        '<table><thead><tr><th>#</th><th>User</th><th>Agent</th><th>Latency</th><th>Breakdown</th></tr></thead><tbody>';
-      turns.forEach(function(t, i) {
-        var l = t.latency || {};
-        var total = l.total_ms || ((l.stt_ms||0) + (l.llm_ms||0) + (l.tts_ms||0));
-        var scale = total > 0 ? 120 / maxMs : 0;
-        var sttW = (l.stt_ms||0) * scale;
-        var llmW = (l.llm_ms||0) * scale;
-        var ttsW = (l.tts_ms||0) * scale;
-        var totalW = total > 0 && sttW === 0 && llmW === 0 && ttsW === 0 ? total * scale : 0;
-        html += '<tr>'+
-          '<td>'+(t.turn_index !== undefined ? t.turn_index : i)+'</td>'+
-          '<td title="'+esc(t.user_text||'')+'">'+esc((t.user_text||'').slice(0,40))+((t.user_text||'').length>40?'...':'')+'</td>'+
-          '<td title="'+esc(t.agent_text||'')+'">'+esc((t.agent_text||'').slice(0,40))+((t.agent_text||'').length>40?'...':'')+'</td>'+
-          '<td class="latency">'+fmtMs(total)+'</td>'+
-          '<td><div class="bar-container">'+
-            (sttW > 0 ? '<div class="bar-stt" style="width:'+sttW+'px" title="STT '+fmtMs(l.stt_ms)+'"></div>' : '')+
-            (llmW > 0 ? '<div class="bar-llm" style="width:'+llmW+'px" title="LLM '+fmtMs(l.llm_ms)+'"></div>' : '')+
-            (ttsW > 0 ? '<div class="bar-tts" style="width:'+ttsW+'px" title="TTS '+fmtMs(l.tts_ms)+'"></div>' : '')+
-            (totalW > 0 ? '<div class="bar-llm" style="width:'+totalW+'px" title="Total '+fmtMs(total)+'"></div>' : '')+
-          '</div></td></tr>';
-      });
-      html += '</tbody></table>'+
-        '<div style="margin-top:10px;font-size:11px;color:var(--muted)">'+
-          (isRealtime ?
-            '<span style="color:var(--purple)">&#9632;</span> End-to-end' :
-            '<span style="color:var(--blue)">&#9632;</span> STT &nbsp;'+
-            '<span style="color:var(--purple)">&#9632;</span> LLM &nbsp;'+
-            '<span style="color:var(--orange)">&#9632;</span> TTS'
-          )+
-        '</div></div>';
-    }
-    var transcript = c.transcript || [];
-    if (transcript.length) {
-      html += '<div class="detail-card" style="margin-top:16px"><h3>Transcript</h3><div class="transcript-box">';
-      transcript.forEach(function(msg) {
-        var role = esc(msg.role || 'unknown');
-        html += '<div class="msg '+role+'"><span class="role">'+role+'</span>'+esc(msg.text || '')+'</div>';
-      });
-      html += '</div></div>';
+init_esm_shims();
+import { readFileSync as readFileSync2 } from "fs";
+import { join as join2, dirname } from "path";
+var FALLBACK_HTML = `<!doctype html>
+<html><head><meta charset="utf-8"><title>Patter dashboard</title></head>
+<body style="font-family:ui-sans-serif,system-ui;padding:2rem;color:#1a1a1a">
+<h1>Dashboard asset missing</h1>
+<p>The bundled <code>ui.html</code> was not found alongside this module.
+Run <code>cd dashboard-app &amp;&amp; npm run build &amp;&amp; npm run sync</code>
+from the repo root to regenerate it.</p>
+</body></html>`;
+function loadDashboardHtml() {
+  const here = typeof __dirname !== "undefined" ? __dirname : dirname(".");
+  const candidates = [
+    join2(here, "ui.html"),
+    join2(here, "dashboard", "ui.html"),
+    join2(here, "..", "dashboard", "ui.html")
+  ];
+  for (const path3 of candidates) {
+    try {
+      return readFileSync2(path3, "utf8");
+    } catch {
     }
-    _$('#modal-body').innerHTML = html;
-    _$('#modal').classList.add('open');
-  });
-}
-function closeModal() { _$('#modal').classList.remove('open'); }
-_$('#modal').addEventListener('click', function(e) { if (e.target === _$('#modal')) closeModal(); });
-document.addEventListener('keydown', function(e) { if (e.key === 'Escape') closeModal(); });
-function refresh() {
-  return Promise.all([refreshAggregates(), refreshCalls(), refreshActive()]).then(function() {
-    _$('#status-text').textContent = 'Listening';
-  }).catch(function() {
-    _$('#status-text').textContent = 'Connection error';
-  });
-}
-refresh();
-// Update active call durations every second
-setInterval(function() {
-  var cells = document.querySelectorAll('#active-body td[data-started]');
-  if (!cells.length) return;
-  var now = Date.now() / 1000;
-  cells.forEach(function(td) {
-    var started = parseFloat(td.getAttribute('data-started'));
-    if (started) td.textContent = fmtDur(Math.round(now - started));
-  });
-}, 1000);
-if (typeof EventSource !== 'undefined') {
-  var sseUrl = '/api/dashboard/events';
-  var sseBackoff = 1000;
-  var sseFailures = 0;
-  var SSE_MAX_BACKOFF = 30000;
-  var SSE_MAX_FAILURES = 5;
-  function connectSSE() {
-    var es = new EventSource(sseUrl);
-    function onEvent() { sseBackoff = 1000; sseFailures = 0; }
-    es.addEventListener('call_start', function() { onEvent(); refresh(); });
-    es.addEventListener('turn_complete', function() { onEvent(); refreshAggregates(); });
-    es.addEventListener('call_end', function() { onEvent(); refresh(); });
-    es.onerror = function() {
-      es.close();
-      sseFailures++;
-      if (sseFailures >= SSE_MAX_FAILURES) {
-        _$('#status-text').textContent = 'Polling';
-        setInterval(refresh, 5000);
-        return;
-      }
-      _$('#status-text').textContent = 'Reconnecting...';
-      setTimeout(connectSSE, sseBackoff);
-      sseBackoff = Math.min(sseBackoff * 2, SSE_MAX_BACKOFF);
-    };
   }
-  connectSSE();
-} else {
-  setInterval(refresh, 3000);
+  return FALLBACK_HTML;
 }
-</script>
-</body>
-</html>`;
+var DASHBOARD_HTML = loadDashboardHtml();
 // src/dashboard/routes.ts
 function mountDashboard(app, store, token = "") {
@@ -1996,6 +1757,7 @@ function mountApi(app, store, token = "") {
 }
 // src/remote-message.ts
+init_esm_shims();
 import crypto2 from "crypto";
 var MAX_RESPONSE_BYTES = 64 * 1024;
 function validateWebSocketUrl(url) {
@@ -2200,43 +1962,99 @@ function isWebSocketUrl(url) {
   return url.startsWith("ws://") || url.startsWith("wss://");
 }
+// src/stream-handler.ts
+init_esm_shims();
 // src/providers/deepgram-stt.ts
+init_esm_shims();
 import WebSocket3 from "ws";
 // src/errors.ts
+init_esm_shims();
+var ErrorCode = {
+  /** Invalid constructor args, missing required env var, frozen-config violation. */
+  CONFIG: "CONFIG",
+  /** WebSocket connect failure, HTTP 5xx from provider, network error. */
+  CONNECTION: "CONNECTION",
+  /** Provider rejected our credentials (HTTP 401/403, invalid signature). */
+  AUTH: "AUTH",
+  /** Provider response, voicemail post, or other awaited operation timed out. */
+  TIMEOUT: "TIMEOUT",
+  /** Provider returned HTTP 429. */
+  RATE_LIMIT: "RATE_LIMIT",
+  /** Twilio / Telnyx webhook signature verification failed. */
+  WEBHOOK_VERIFICATION: "WEBHOOK_VERIFICATION",
+  /** Caller passed a malformed phone number, tool arg, etc. */
+  INPUT_VALIDATION: "INPUT_VALIDATION",
+  /** Generic catch-all for unexpected upstream provider failures. */
+  PROVIDER_ERROR: "PROVIDER_ERROR",
+  /** Phone number provisioning, webhook configuration, or carrier setup failed. */
+  PROVISION: "PROVISION",
+  /** Assertion failed / unexpected internal state. Likely a Patter bug. */
+  INTERNAL: "INTERNAL"
+};
 var PatterError = class extends Error {
-  constructor(message) {
+  /** Stable, machine-readable error code. Subclasses set the default. */
+  code;
+  constructor(message, options) {
     super(message);
     this.name = "PatterError";
+    this.code = options?.code ?? ErrorCode.INTERNAL;
   }
 };
 var PatterConnectionError = class extends PatterError {
-  constructor(message) {
-    super(message);
+  constructor(message, options) {
+    super(message, { code: options?.code ?? ErrorCode.CONNECTION });
     this.name = "PatterConnectionError";
   }
 };
 var AuthenticationError = class extends PatterError {
-  constructor(message) {
-    super(message);
+  constructor(message, options) {
+    super(message, { code: options?.code ?? ErrorCode.AUTH });
     this.name = "AuthenticationError";
   }
 };
 var ProvisionError = class extends PatterError {
-  constructor(message) {
-    super(message);
+  constructor(message, options) {
+    super(message, { code: options?.code ?? ErrorCode.PROVISION });
     this.name = "ProvisionError";
   }
 };
 var RateLimitError = class extends PatterConnectionError {
-  constructor(message) {
-    super(message);
+  constructor(message, options) {
+    super(message, { code: options?.code ?? ErrorCode.RATE_LIMIT });
     this.name = "RateLimitError";
   }
 };
 // src/providers/deepgram-stt.ts
 var DEEPGRAM_WS_URL = "wss://api.deepgram.com/v1/listen";
+var DeepgramModel = {
+  NOVA_3: "nova-3",
+  NOVA_2: "nova-2",
+  NOVA_2_PHONECALL: "nova-2-phonecall",
+  NOVA_2_GENERAL: "nova-2-general",
+  NOVA_2_MEETING: "nova-2-meeting",
+  NOVA: "nova",
+  ENHANCED: "enhanced",
+  BASE: "base"
+};
+var DeepgramEncoding = {
+  LINEAR16: "linear16",
+  MULAW: "mulaw",
+  ALAW: "alaw",
+  OPUS: "opus",
+  FLAC: "flac",
+  AMR_NB: "amr-nb",
+  AMR_WB: "amr-wb"
+};
+var DeepgramSampleRate = {
+  HZ_8000: 8e3,
+  HZ_16000: 16e3,
+  HZ_24000: 24e3,
+  HZ_44100: 44100,
+  HZ_48000: 48e3
+};
 var KEEPALIVE_INTERVAL_MS = 4e3;
 var FINALIZE_DRAIN_MS = 100;
 var CLOSE_LATENCY_BUDGET_MS = 500;
@@ -2264,9 +2082,9 @@ var DeepgramSTT = class _DeepgramSTT {
     this.apiKey = apiKey;
     const opts = typeof languageOrOptions === "object" && languageOrOptions !== null ? languageOrOptions : options ?? {};
     this.language = (typeof languageOrOptions === "string" ? languageOrOptions : opts.language) ?? "en";
-    this.model = model ?? opts.model ?? "nova-3";
-    this.encoding = encoding ?? opts.encoding ?? "linear16";
-    this.sampleRate = sampleRate ?? opts.sampleRate ?? 16e3;
+    this.model = model ?? opts.model ?? DeepgramModel.NOVA_3;
+    this.encoding = encoding ?? opts.encoding ?? DeepgramEncoding.LINEAR16;
+    this.sampleRate = sampleRate ?? opts.sampleRate ?? DeepgramSampleRate.HZ_16000;
     this.endpointingMs = opts.endpointingMs ?? 150;
     this.utteranceEndMs = opts.utteranceEndMs === null ? null : opts.utteranceEndMs ?? 1e3;
     this.smartFormat = opts.smartFormat ?? false;
@@ -2274,8 +2092,15 @@ var DeepgramSTT = class _DeepgramSTT {
     this.vadEvents = opts.vadEvents ?? true;
   }
   /** Factory for Twilio calls — mulaw 8 kHz. Forwards tuning options through. */
-  static forTwilio(apiKey, language = "en", model = "nova-3", options = {}) {
-    return new _DeepgramSTT(apiKey, language, model, "mulaw", 8e3, options);
+  static forTwilio(apiKey, language = "en", model = DeepgramModel.NOVA_3, options = {}) {
+    return new _DeepgramSTT(
+      apiKey,
+      language,
+      model,
+      DeepgramEncoding.MULAW,
+      DeepgramSampleRate.HZ_8000,
+      options
+    );
   }
   buildUrl() {
     const params = new URLSearchParams({
@@ -2295,6 +2120,7 @@ var DeepgramSTT = class _DeepgramSTT {
     }
     return `${DEEPGRAM_WS_URL}?${params.toString()}`;
   }
+  /** Open the streaming WebSocket and arm message + keepalive handlers. */
   async connect() {
     await this.openSocket();
     this.running = true;
@@ -2360,6 +2186,18 @@ var DeepgramSTT = class _DeepgramSTT {
     } catch {
       return;
     }
+    const dataType = String(data.type ?? "unknown");
+    if (dataType === "Results") {
+      const transcript2 = (data.channel?.alternatives?.[0]?.transcript ?? "").trim();
+      const isFinal = Boolean(data.is_final);
+      const speechFinal2 = Boolean(data.speech_final);
+      const fromFinalize = Boolean(data.from_finalize);
+      getLogger().info(
+        `[DIAG] DG Results text=${JSON.stringify(transcript2.slice(0, 60))} isFinal=${isFinal} speechFinal=${speechFinal2} fromFinalize=${fromFinalize}`
+      );
+    } else if (dataType !== "Metadata") {
+      getLogger().info(`[DIAG] DG event type=${dataType}`);
+    }
     if (data.type === "Metadata" && data.request_id) {
       this.requestId = data.request_id;
       return;
@@ -2444,23 +2282,71 @@ var DeepgramSTT = class _DeepgramSTT {
       this.running = false;
     }
   }
+  /** Send a binary audio chunk to Deepgram for transcription. */
   sendAudio(audio) {
-    if (!this.ws || this.ws.readyState !== WebSocket3.OPEN) return;
+    if (!this.ws || this.ws.readyState !== WebSocket3.OPEN) {
+      this.audioDroppedCount++;
+      if (this.audioDroppedCount === 1 || this.audioDroppedCount % 50 === 0) {
+        getLogger().info(
+          `[DIAG] DeepgramSTT.sendAudio dropped (ws state=${this.ws?.readyState ?? "null"}) \u2014 total dropped=${this.audioDroppedCount}`
+        );
+      }
+      return;
+    }
     if (audio.length === 0) return;
+    this.audioSentCount++;
+    if (this.audioSentCount === 1 || this.audioSentCount % 100 === 0) {
+      getLogger().info(
+        `[DIAG] DeepgramSTT.sendAudio: total chunks sent=${this.audioSentCount} (last=${audio.length} bytes)`
+      );
+    }
     this.ws.send(audio);
   }
+  audioSentCount = 0;
+  audioDroppedCount = 0;
+  /** Register a transcript listener. */
   onTranscript(callback) {
     this.transcriptCallbacks.add(callback);
   }
+  /** Remove a previously registered transcript listener. */
   offTranscript(callback) {
     this.transcriptCallbacks.delete(callback);
   }
+  /** Register an error listener for socket / API failures. */
   onError(callback) {
     this.errorCallbacks.add(callback);
   }
+  /** Remove a previously registered error listener. */
   offError(callback) {
     this.errorCallbacks.delete(callback);
   }
+  /**
+   * Force Deepgram to immediately emit a final ``Results`` frame for the
+   * in-flight utterance, rather than waiting for its own endpoint
+   * heuristic (utterance_end_ms ~1 s + natural-pause endpointing).
+   * Called by the SDK on VAD ``speech_end`` and after barge-in cancel —
+   * both moments where the SDK already knows the user has stopped
+   * speaking and waiting for Deepgram's own endpointing only adds
+   * dead air.
+   *
+   * Idempotent: safe to call when the socket is closed/closing.
+   */
+  finalize() {
+    const ws = this.ws;
+    if (!ws || ws.readyState !== WebSocket3.OPEN) {
+      getLogger().info(
+        `[DIAG] DeepgramSTT.finalize SKIPPED (ws state=${ws?.readyState ?? "null"})`
+      );
+      return;
+    }
+    try {
+      ws.send(JSON.stringify({ type: "Finalize" }));
+      getLogger().info("[DIAG] DeepgramSTT.finalize sent {type:Finalize}");
+    } catch (err) {
+      getLogger().info(`[DIAG] DeepgramSTT.finalize send failed: ${String(err)}`);
+    }
+  }
+  /** Send Finalize, briefly drain trailing transcripts, then close the socket. */
   close() {
     this.running = false;
     this.clearKeepalive();
@@ -2492,6 +2378,7 @@ var DeepgramSTT = class _DeepgramSTT {
 };
 // src/metrics.ts
+init_esm_shims();
 function round(value, decimals) {
   const factor = 10 ** decimals;
   return Math.round(value * factor) / factor;
@@ -2518,6 +2405,14 @@ var CallMetricsAccumulator = class {
   sttProvider;
   ttsProvider;
   llmProvider;
+  /**
+   * Model identifiers for per-model rate resolution (see pricing.ts). Empty
+   * string means "not known" → cost calc falls back to provider defaults,
+   * matching pre-2026.3 behaviour.
+   */
+  sttModel;
+  ttsModel;
+  realtimeModel;
   _pricing;
   _callStart;
   _turns = [];
@@ -2579,6 +2474,9 @@ var CallMetricsAccumulator = class {
     this.sttProvider = opts.sttProvider ?? "";
     this.ttsProvider = opts.ttsProvider ?? "";
     this.llmProvider = opts.llmProvider ?? "";
+    this.sttModel = opts.sttModel ?? "";
+    this.ttsModel = opts.ttsModel ?? "";
+    this.realtimeModel = opts.realtimeModel ?? "";
     this._pricing = mergePricing(opts.pricing);
     this._callStart = hrTimeMs();
     this._eventBus = opts.eventBus;
@@ -2601,6 +2499,7 @@ var CallMetricsAccumulator = class {
   get turnActive() {
     return this._turnStart !== null;
   }
+  /** Begin a new turn — stamps the turn start timestamp and resets per-turn state. */
   startTurn() {
     this._turnStart = hrTimeMs();
     this._sttComplete = null;
@@ -2631,6 +2530,7 @@ var CallMetricsAccumulator = class {
       this.startTurn();
     }
   }
+  /** Stamp end-of-STT, capture the user's transcript, and accrue billed STT seconds. */
   recordSttComplete(text, audioSeconds = 0) {
     this._sttComplete = hrTimeMs();
     this._sttFinalAt = this._sttComplete;
@@ -2640,11 +2540,30 @@ var CallMetricsAccumulator = class {
     this._turnUserText = text;
     this._turnSttAudioSeconds = audioSeconds;
     this._totalSttAudioSeconds += audioSeconds;
+    if (this._eventBus) {
+      const valueSec = this._turnStart !== null ? (this._sttComplete - this._turnStart) / 1e3 : 0;
+      const payload = {
+        timestamp: Date.now() / 1e3,
+        processor: "stt",
+        model: null,
+        value: valueSec
+      };
+      this._eventBus.emit("stt_metrics", payload);
+    }
   }
   /** Record the timestamp of the first LLM token (TTFT). No-op after first call. */
   recordLlmFirstToken() {
     if (this._llmFirstToken === null) {
       this._llmFirstToken = hrTimeMs();
+      if (this._eventBus && this._sttComplete !== null && (!this._reportOnlyInitialTtfb || !this._initialTtfbEmitted)) {
+        const payload = {
+          timestamp: Date.now() / 1e3,
+          processor: "llm",
+          model: null,
+          value: (this._llmFirstToken - this._sttComplete) / 1e3
+        };
+        this._eventBus.emit("llm_metrics", payload);
+      }
     }
   }
   /**
@@ -2658,9 +2577,11 @@ var CallMetricsAccumulator = class {
       this._llmFirstSentenceComplete = hrTimeMs();
     }
   }
+  /** Stamp end-of-LLM (last token received). */
   recordLlmComplete() {
     this._llmComplete = hrTimeMs();
   }
+  /** Stamp first TTS audio byte sent on the wire (used to compute TTS TTFB). */
   recordTtsFirstByte() {
     if (this._ttsFirstByte === null) {
       this._ttsFirstByte = hrTimeMs();
@@ -2669,7 +2590,20 @@ var CallMetricsAccumulator = class {
       return;
     }
     this._initialTtfbEmitted = true;
+    if (this._eventBus && this._ttsFirstByte !== null) {
+      const ttsRef = this._llmFirstSentenceComplete !== null ? this._llmFirstSentenceComplete : this._llmComplete;
+      if (ttsRef !== null) {
+        const payload = {
+          timestamp: Date.now() / 1e3,
+          processor: "tts",
+          model: null,
+          value: (this._ttsFirstByte - ttsRef) / 1e3
+        };
+        this._eventBus.emit("tts_metrics", payload);
+      }
+    }
   }
+  /** Record final TTS text length and stamp the last-byte timestamp. */
   recordTtsComplete(text) {
     this._totalTtsCharacters += text.length;
     if (this._ttsLastByte === null) {
@@ -2700,6 +2634,7 @@ var CallMetricsAccumulator = class {
   recordTtsStopped(ts) {
     this._bargeinStoppedAt = ts ?? hrTimeMs();
   }
+  /** Close the current turn cleanly and append a `TurnMetrics` record. */
   recordTurnComplete(agentText) {
     const latency = this._computeTurnLatency();
     const turn = {
@@ -2717,6 +2652,7 @@ var CallMetricsAccumulator = class {
     this._eventBus?.emit("metrics_collected", { callId: this.callId, turn });
     return turn;
   }
+  /** Close the current turn as interrupted (barge-in) and return the recorded metrics. */
   recordTurnInterrupted() {
     if (this._turnStart === null) return null;
     const latency = this._computeTurnLatency();
@@ -2782,6 +2718,7 @@ var CallMetricsAccumulator = class {
    * ``transcriptionDelay``       = turnCommitted − vadStopped  (ms)
    * ``onUserTurnCompletedDelay`` = caller-supplied delta (ms) or 0
    */
+  /** Emit `EOUMetrics` once VAD-stop, STT-final, and turn-committed timestamps are all known. */
   emitEouMetrics() {
     if (this._vadStoppedAt === null || this._sttFinalAt === null || this._turnCommittedAt === null) {
       return;
@@ -2832,16 +2769,32 @@ var CallMetricsAccumulator = class {
     this._eventBus?.emit("interruption", payload);
   }
   // ---- Usage tracking ----
+  /** Accumulate inbound STT audio bytes for cost calculation when seconds are unknown. */
   addSttAudioBytes(byteCount) {
     this._sttByteCount += byteCount;
   }
-  recordRealtimeUsage(usage) {
-    this._totalRealtimeCost += calculateRealtimeCost(usage, this._pricing);
-    this._totalRealtimeCachedSavings += calculateRealtimeCachedSavings(usage, this._pricing);
+  /**
+   * Record an OpenAI Realtime usage payload and roll up its cost + cached-savings.
+   *
+   * `model` allows the cost calc to pick the per-model rate (e.g.
+   * `gpt-realtime-2`). Defaults to whatever was supplied at construction
+   * time (`this.realtimeModel`); pass an explicit value to override per-call
+   * (the `response.done` payload carries the model used).
+   */
+  recordRealtimeUsage(usage, model) {
+    const resolvedModel = model || this.realtimeModel || null;
+    this._totalRealtimeCost += calculateRealtimeCost(usage, this._pricing, resolvedModel);
+    this._totalRealtimeCachedSavings += calculateRealtimeCachedSavings(
+      usage,
+      this._pricing,
+      resolvedModel
+    );
   }
+  /** Override the carrier-billed telephony cost (e.g. exact value reported via Twilio API). */
   setActualTelephonyCost(cost) {
     this._actualTelephonyCost = cost;
   }
+  /** Override the provider-billed STT cost when an exact figure is available. */
   setActualSttCost(cost) {
     this._actualSttCost = cost;
   }
@@ -2869,6 +2822,7 @@ var CallMetricsAccumulator = class {
     );
   }
   // ---- Finalize ----
+  /** Finalize the call: flush any in-flight turn, compute aggregates, and return `CallMetrics`. */
   endCall() {
     const duration = (hrTimeMs() - this._callStart) / 1e3;
     if (this.turnActive) {
@@ -2902,6 +2856,7 @@ var CallMetricsAccumulator = class {
     this._eventBus?.emit("call_ended", { callId: this.callId, metrics });
     return metrics;
   }
+  /** Return the cost breakdown for the call so far without ending it. */
   getCostSoFar() {
     const duration = (hrTimeMs() - this._callStart) / 1e3;
     return this._computeCost(duration);
@@ -2962,6 +2917,10 @@ var CallMetricsAccumulator = class {
     if (ttsTotalRef !== null && this._ttsLastByte !== null) {
       tts_total_ms = Math.max(0, this._ttsLastByte - ttsTotalRef);
     }
+    let agent_response_ms;
+    if (endpoint_ms !== void 0 && llm_ttft_ms !== void 0 && tts_ms > 0) {
+      agent_response_ms = round(endpoint_ms + llm_ttft_ms + tts_ms, 1);
+    }
     return {
       stt_ms: round(stt_ms, 1),
       llm_ms: round(llm_ms, 1),
@@ -2971,7 +2930,8 @@ var CallMetricsAccumulator = class {
       total_ms: round(total_ms, 1),
       ...endpoint_ms !== void 0 ? { endpoint_ms: round(endpoint_ms, 1) } : {},
       ...bargein_ms !== void 0 ? { bargein_ms: round(bargein_ms, 1) } : {},
-      ...tts_total_ms !== void 0 ? { tts_total_ms: round(tts_total_ms, 1) } : {}
+      ...tts_total_ms !== void 0 ? { tts_total_ms: round(tts_total_ms, 1) } : {},
+      ...agent_response_ms !== void 0 ? { agent_response_ms } : {}
     };
   }
   _computeCost(durationSeconds) {
@@ -2987,8 +2947,18 @@ var CallMetricsAccumulator = class {
       tts = 0;
       llm = 0;
     } else {
-      stt = this._actualSttCost !== null ? this._actualSttCost : calculateSttCost(this.sttProvider, this._totalSttAudioSeconds, this._pricing);
-      tts = calculateTtsCost(this.ttsProvider, this._totalTtsCharacters, this._pricing);
+      stt = this._actualSttCost !== null ? this._actualSttCost : calculateSttCost(
+        this.sttProvider,
+        this._totalSttAudioSeconds,
+        this._pricing,
+        this.sttModel || null
+      );
+      tts = calculateTtsCost(
+        this.ttsProvider,
+        this._totalTtsCharacters,
+        this._pricing,
+        this.ttsModel || null
+      );
       llm = this._totalLlmCost;
     }
     const telephony = this._actualTelephonyCost !== null ? this._actualTelephonyCost : calculateTelephonyCost(this.telephonyProvider, durationSeconds, this._pricing);
@@ -3074,7 +3044,8 @@ var CallMetricsAccumulator = class {
   }
 };
-// src/transcoding.ts
+// src/audio/transcoding.ts
+init_esm_shims();
 var MULAW_TO_PCM16_TABLE = (() => {
   const table = new Int16Array(256);
   for (let i = 0; i < 256; i++) {
@@ -3189,9 +3160,9 @@ var StatefulResampler = class {
       throw new Error("StatefulResampler: only mono (channels=1) is supported");
     }
     const key = `${this.srcRate}->${this.dstRate}`;
-    if (key !== "16000->8000" && key !== "8000->16000" && key !== "24000->16000") {
+    if (key !== "16000->8000" && key !== "8000->16000" && key !== "24000->16000" && key !== "24000->8000") {
       throw new Error(
-        `StatefulResampler: unsupported conversion ${key}. Supported: 16000->8000, 8000->16000, 24000->16000`
+        `StatefulResampler: unsupported conversion ${key}. Supported: 16000->8000, 8000->16000, 24000->16000, 24000->8000`
       );
     }
   }
@@ -3211,6 +3182,9 @@ var StatefulResampler = class {
     if (this.srcRate === 8e3 && this.dstRate === 16e3) {
       return this._upsample8kTo16k(aligned);
     }
+    if (this.srcRate === 24e3 && this.dstRate === 8e3) {
+      return this._resample24kTo8k(aligned);
+    }
     return this._resample24kTo16k(aligned);
   }
   /**
@@ -3356,7 +3330,7 @@ var StatefulResampler = class {
     return outBuf;
   }
   // ---------------------------------------------------------------------------
-  // Private: 24 kHz → 16 kHz
+  // Private: 24 kHz → 16 kHz / 8 kHz
   // ---------------------------------------------------------------------------
   /**
    * 3:2 linear-interpolation decimator (ratio srcRate/dstRate = 1.5).
@@ -3367,6 +3341,14 @@ var StatefulResampler = class {
    * handled using `resample24Last`.
    */
   _resample24kTo16k(buf) {
+    return this._resample24kStep(buf, 24e3 / 16e3);
+  }
+  /** 3:1 decimation — collapses the 24k→16k→8k chain into a single step. */
+  _resample24kTo8k(buf) {
+    return this._resample24kStep(buf, 24e3 / 8e3);
+  }
+  /** Shared phase-stepping resampler used by 24→16 (step 1.5) and 24→8 (step 3). */
+  _resample24kStep(buf, step) {
     const sampleCount = buf.length >> 1;
     if (sampleCount === 0) return Buffer.alloc(0);
     const outArr = [];
@@ -3386,7 +3368,7 @@ var StatefulResampler = class {
       }
       const interp = Math.round(s0 + (s1 - s0) * frac);
       outArr.push(Math.max(-32768, Math.min(32767, interp)));
-      phase += 24e3 / 16e3;
+      phase += step;
     }
     this.resample24Last = buf.readInt16LE((sampleCount - 1) * 2);
     this.resample24HasHistory = true;
@@ -3405,6 +3387,9 @@ function createResampler8kTo16k() {
 function createResampler24kTo16k() {
   return new StatefulResampler({ srcRate: 24e3, dstRate: 16e3 });
 }
+function createResampler24kTo8k() {
+  return new StatefulResampler({ srcRate: 24e3, dstRate: 8e3 });
+}
 var _warnedResample8kTo16k = false;
 var _warnedResample16kTo8k = false;
 var _warnedResample24kTo16k = false;
@@ -3458,6 +3443,7 @@ function resample24kTo16k(pcm24k) {
 }
 // src/handler-utils.ts
+init_esm_shims();
 function createHistoryManager(maxSize) {
   const entries = [];
   const push = (entry) => {
@@ -3467,59 +3453,239 @@ function createHistoryManager(maxSize) {
   const getHistory = () => [...entries];
   return { push, getHistory, entries };
 }
-async function executeToolWebhook(webhookUrl, toolName, parsedArgs, context, label = "") {
-  try {
-    validateWebhookUrl(webhookUrl);
-  } catch (e) {
-    const tag = label ? ` (${label})` : "";
-    getLogger().error(`Tool webhook URL rejected${tag}: ${String(e)}`);
-    return JSON.stringify({ error: String(e), fallback: true });
+// src/tools/mcp-client.ts
+init_esm_shims();
+function resolveConfig(input, index) {
+  if (typeof input === "string") {
+    return { url: input, headers: {}, name: `mcp[${index}]` };
+  }
+  if (!input.url) {
+    throw new Error(`mcpServers[${index}]: missing required 'url' field`);
+  }
+  return {
+    url: input.url,
+    headers: input.headers ?? {},
+    name: input.name ?? `mcp[${index}]`
+  };
+}
+var MCPManager = class {
+  configs;
+  connected = [];
+  constructor(servers) {
+    this.configs = (servers ?? []).map((s, i) => resolveConfig(s, i));
+  }
+  get hasServers() {
+    return this.configs.length > 0;
   }
-  let result = "";
-  for (let attempt = 0; attempt < 3; attempt++) {
+  /** Connect to every configured server and discover their tools.
+   *  Returns the discovered tools wrapped as Patter ``ToolDefinition``s. */
+  async connect() {
+    if (this.configs.length === 0) return [];
+    let mcpModule;
+    let transportModule;
     try {
-      const resp = await fetch(webhookUrl, {
-        method: "POST",
-        headers: { "Content-Type": "application/json" },
-        body: JSON.stringify({
-          tool: toolName,
-          arguments: parsedArgs,
-          call_id: context.callId,
-          caller: context.caller,
-          attempt: attempt + 1
-        }),
-        signal: AbortSignal.timeout(1e4)
+      mcpModule = await import("./client-2GJVZT42.mjs");
+      transportModule = await import("./streamableHttp-WKNGHDVO.mjs");
+    } catch (e) {
+      throw new Error(
+        `mcpServers configured but \`@modelcontextprotocol/sdk\` is not installed. Run \`npm install @modelcontextprotocol/sdk\` to enable MCP support. (import error: ${String(e)})`
+      );
+    }
+    const aggregatedTools = [];
+    for (const cfg of this.configs) {
+      const transport = new transportModule.StreamableHTTPClientTransport(new URL(cfg.url), {
+        requestInit: { headers: cfg.headers }
       });
-      if (!resp.ok) throw new Error(`HTTP ${resp.status}`);
-      result = JSON.stringify(await resp.json());
-      const MAX_RESPONSE_BYTES2 = 1 * 1024 * 1024;
-      if (result.length > MAX_RESPONSE_BYTES2) {
-        const tag = label ? ` (${label})` : "";
-        getLogger().warn(`Tool webhook response too large: ${result.length} bytes (max ${MAX_RESPONSE_BYTES2})${tag}`);
-        return JSON.stringify({ error: `Webhook response too large: ${result.length} bytes (max ${MAX_RESPONSE_BYTES2})`, fallback: true });
+      const client = new mcpModule.Client({ name: "patter", version: "0.6.0" });
+      try {
+        await client.connect(transport);
+      } catch (e) {
+        getLogger().error(`MCP server '${cfg.name}' (${cfg.url}) connect failed: ${String(e)}`);
+        try {
+          await transport.close?.();
+        } catch {
+        }
+        continue;
       }
-      return result;
-    } catch (e) {
-      if (attempt < 2) {
-        const tag = label ? ` (${label})` : "";
-        getLogger().info(`Tool webhook retry ${attempt + 1}${tag}: ${String(e)}`);
-        await new Promise((r) => setTimeout(r, 500));
-      } else {
-        result = JSON.stringify({ error: `Tool failed after 3 attempts: ${String(e)}`, fallback: true });
+      this.connected.push({ config: cfg, client, transport });
+      let listed;
+      try {
+        listed = await client.listTools();
+      } catch (e) {
+        getLogger().error(`MCP server '${cfg.name}' tools/list failed: ${String(e)}`);
+        continue;
       }
+      const tools = Array.isArray(listed?.tools) ? listed.tools : [];
+      for (const t of tools) {
+        if (!t?.name) continue;
+        aggregatedTools.push({
+          name: t.name,
+          description: t.description ?? "",
+          parameters: t.inputSchema ?? { type: "object", properties: {} },
+          handler: async (args) => {
+            const callResult = await client.callTool({
+              name: t.name,
+              arguments: args
+            });
+            const text = (callResult.content ?? []).map((c) => c.type === "text" ? c.text ?? "" : JSON.stringify(c)).join("\n");
+            if (callResult.isError) {
+              return JSON.stringify({ error: text || "MCP tool error", fallback: true });
+            }
+            return text || "{}";
+          }
+        });
+      }
+      getLogger().info(`MCP server '${cfg.name}' registered ${tools.length} tool(s)`);
     }
+    return aggregatedTools;
   }
-  return result;
-}
+  /** Validate no tool name collides between MCP-discovered and
+   *  user-supplied tools. Throws on conflict so the user fixes it. */
+  static assertNoConflicts(userTools, mcpTools) {
+    if (!userTools || userTools.length === 0 || mcpTools.length === 0) return;
+    const userNames = new Set(userTools.map((t) => t.name));
+    for (const mcp of mcpTools) {
+      if (userNames.has(mcp.name)) {
+        throw new Error(
+          `MCP tool '${mcp.name}' collides with a user-supplied tool of the same name. Rename one of them or remove the duplicate from agent.tools.`
+        );
+      }
+    }
+  }
+  /** Close every open MCP connection. Idempotent; logs but does not
+   *  throw on individual failures (we don't want a flaky shutdown to
+   *  derail the call-end teardown). */
+  async close() {
+    const conns = this.connected;
+    this.connected = [];
+    for (const conn of conns) {
+      try {
+        await conn.client.close?.();
+      } catch (e) {
+        getLogger().debug(`MCP server '${conn.config.name}' close error (ignored): ${String(e)}`);
+      }
+    }
+  }
+};
 // src/sentence-chunker.ts
+init_esm_shims();
 var DEFAULT_MIN_SENTENCE_LEN = 20;
-var DEFAULT_MIN_WORDS_FOR_SHORT_FLUSH = 2;
-var SENTENCE_TERMINATORS = ".!?\u3002\uFF01\uFF1F";
+var DEFAULT_MIN_WORDS_FOR_SHORT_FLUSH = 1;
+var HONORIFICS_EN = [
+  "Mr",
+  "St",
+  "Mrs",
+  "Ms",
+  "Dr",
+  "Prof",
+  "Gen",
+  "Sen",
+  "Rep",
+  "Lt",
+  "Cpt",
+  "Capt",
+  "Col",
+  "Cmdr",
+  "Adm"
+];
+var HONORIFICS_IT = [
+  "Sig",
+  "Sgr",
+  "Dott",
+  "Prof",
+  "Avv",
+  "Ing",
+  "Geom",
+  "Rag",
+  "Arch",
+  "On",
+  "Egr",
+  "Spett",
+  "Gent",
+  "Ill"
+];
+var HONORIFICS_ES = [
+  "Sr",
+  "Sra",
+  "Sres",
+  "Sras",
+  "Srta",
+  "Srtas",
+  "Dr",
+  "Dra",
+  "Dres",
+  "Lic",
+  "Licda",
+  "Ing",
+  "Prof",
+  "Profa",
+  "Arq",
+  "Mtro",
+  "Mtra"
+];
+var HONORIFICS_DE = [
+  "Hr",
+  "Fr",
+  "Frl",
+  "Dr",
+  "Prof",
+  "Dipl",
+  "Mag"
+];
+var HONORIFICS_FR = [
+  "Mme",
+  "Mmes",
+  "Mlle",
+  "Mlles",
+  "MM",
+  "Dr",
+  "Pr",
+  "Mgr",
+  "Me"
+];
+var HONORIFICS_PT = [
+  "Sr",
+  "Sra",
+  "Srs",
+  "Sras",
+  "Srta",
+  "Srtas",
+  "Dr",
+  "Dra",
+  "Eng",
+  "Enga",
+  "Prof",
+  "Profa"
+];
+var HONORIFICS_BY_LANGUAGE = {
+  en: HONORIFICS_EN,
+  it: HONORIFICS_IT,
+  es: HONORIFICS_ES,
+  de: HONORIFICS_DE,
+  fr: HONORIFICS_FR,
+  pt: HONORIFICS_PT
+};
+var HONORIFICS_ALL = Array.from(
+  new Set(Object.values(HONORIFICS_BY_LANGUAGE).flat())
+).sort((a, b) => b.length - a.length || a.localeCompare(b));
+var SENTENCE_TERMINATORS = ".!?\u2026;\u3002\uFF01\uFF1F\uFF1B\uFF0E\uFF61";
+var UNAMBIGUOUS_NON_LATIN_TERMINATORS = "\u0964\u0965\u061F\u061B\u06D4\u060F\u0589\u1367\u1362\u17D4\u17D5\u104B\u0F0E\u0F0F";
+var TERMINATOR_REGEX_CLASS = Array.from(
+  new Set(SENTENCE_TERMINATORS + UNAMBIGUOUS_NON_LATIN_TERMINATORS)
+).map((c) => c.replace(/[\\^$.|?*+()[\]{}]/g, "\\$&")).sort().join("");
+var SOFT_TERMINATORS = ",\u2014\u2013";
+var DEFAULT_AGGRESSIVE_FIRST_MIN_LEN = 40;
+var CURRENCY_SYMBOLS = "$\u20AC\xA3\xA5\u20B9\u20A9";
+var HONORIFICS_REGEX_ALT = HONORIFICS_ALL.map(
+  (p) => p.replace(/[\\^$.|?*+()[\]{}]/g, "\\$&")
+).join("|");
+var HONORIFICS_SET = new Set(HONORIFICS_ALL);
 function splitSentences(text, minSentenceLen = DEFAULT_MIN_SENTENCE_LEN) {
   const alphabets = "([A-Za-z])";
-  const prefixes = "(Mr|St|Mrs|Ms|Dr)[.]";
-  const suffixes = "(Inc|Ltd|Jr|Sr|Co)";
+  const prefixes = `(${HONORIFICS_REGEX_ALT})[.]`;
+  const suffixes = "(Inc|Ltd|Jr|Sr|Co|ecc|cit|cap|sez|art|pag|fig|tab|cfr|vol|ed|vs|etc|No|Vol|pp|cf|ca|op|Mt|Hwy|Rt|Pl|Ave|Blvd|Sq)";
   const starters = "(Mr|Mrs|Ms|Dr|Prof|Capt|Cpt|Lt|He\\s|She\\s|It\\s|They\\s|Their\\s|Our\\s|We\\s|But\\s|However\\s|That\\s|This\\s|Wherever)";
   const acronyms = "([A-Z][.][A-Z][.](?:[A-Z][.])?)";
   const websites = "[.](com|net|org|io|gov|edu|me)";
@@ -3543,14 +3709,20 @@ function splitSentences(text, minSentenceLen = DEFAULT_MIN_SENTENCE_LEN) {
     new RegExp(alphabets + "[.]" + alphabets + "[.]", "g"),
     "$1<prd>$2<prd>"
   );
-  text = text.replace(new RegExp(" " + suffixes + "[.] " + starters, "g"), " $1<stop> $2");
+  text = text.replace(new RegExp(" " + suffixes + "[.] " + starters, "g"), " $1.<stop> $2");
   text = text.replace(new RegExp(" " + suffixes + "[.]", "g"), " $1<prd>");
   text = text.replace(new RegExp(" " + alphabets + "[.]", "g"), " $1<prd>");
-  text = text.replace(/([.!?\u3002\uff01\uff1f])(["\u201d])/g, "$1$2<stop>");
-  text = text.replace(/([.!?\u3002\uff01\uff1f])(?!["\u201d])/g, "$1<stop>");
-  text = text.replace(/<prd>/g, ".");
-  const splitted = text.split("<stop>");
-  text = text.replace(/<stop>/g, "");
+  text = text.replace(
+    new RegExp(`([${TERMINATOR_REGEX_CLASS}])(["\u201D])`, "g"),
+    "$1$2<stop>"
+  );
+  text = text.replace(
+    new RegExp(`([${TERMINATOR_REGEX_CLASS}])(?!["\u201D])`, "g"),
+    "$1<stop>"
+  );
+  text = text.replace(/<prd>/g, ".");
+  const splitted = text.split("<stop>");
+  text = text.replace(/<stop>/g, "");
   const sentences = [];
   let buff = "";
   let startPos = 0;
@@ -3575,9 +3747,16 @@ var SentenceChunker = class {
   buffer = "";
   minSentenceLen;
   minWordsForShortFlush;
+  aggressiveFirstMinLen;
+  aggressiveFirstFlush;
+  language;
+  isFirstFlush = true;
   constructor(options) {
     this.minSentenceLen = options?.minSentenceLen ?? DEFAULT_MIN_SENTENCE_LEN;
     this.minWordsForShortFlush = options?.minWordsForShortFlush ?? DEFAULT_MIN_WORDS_FOR_SHORT_FLUSH;
+    this.aggressiveFirstMinLen = options?.aggressiveFirstMinLen ?? DEFAULT_AGGRESSIVE_FIRST_MIN_LEN;
+    this.language = (options?.language ?? "en").toLowerCase();
+    this.aggressiveFirstFlush = (options?.aggressiveFirstFlush ?? false) && !this.language.startsWith("it");
   }
   /**
    * Feed a token. Returns zero or more complete sentences.
@@ -3588,13 +3767,21 @@ var SentenceChunker = class {
    *   sentence, all but the last (potentially incomplete) are emitted.
    * - **Short-flush path** — when the buffer is shorter than `minSentenceLen`
    *   but ends with a sentence terminator AND has at least
-   *   `minWordsForShortFlush` whitespace-separated words, emit it
-   *   immediately. This drops TTS TTFB on short greetings like `"Hi there!"`
-   *   while keeping single-word utterances (`"Sì."`) buffered until
-   *   `flush()`.
+   *   `minWordsForShortFlush` whitespace-separated words (default 1 — a
+   *   single-word reply like `"Yes."` flushes immediately for low TTS
+   *   TTFB). Acronym ("U.S.") and decimal ("f(x) = 2.") guards still block
+   *   dangerous cases. Bump `minWordsForShortFlush` to 2+ to keep
+   *   single-word utterances buffered until `flush()`.
    */
   push(token) {
     this.buffer += token;
+    if (this.aggressiveFirstFlush && this.isFirstFlush) {
+      const flushed = this.maybeAggressiveFirstFlush();
+      if (flushed !== null) {
+        this.isFirstFlush = false;
+        return [flushed];
+      }
+    }
     if (this.buffer.length < this.minSentenceLen) {
       return this.maybeShortFlush();
     }
@@ -3615,16 +3802,19 @@ var SentenceChunker = class {
    *
    * A buffer qualifies when **all** of these hold:
    * 1. Last non-whitespace char is a sentence terminator.
-   * 2. Word count is at least `minWordsForShortFlush` (default 2 — keeps
-   *    single-word "Sì." / "Yes." buffered until `flush()`).
+   * 2. Word count is at least `minWordsForShortFlush` (default 1 —
+   *    single-word replies like `"Yes."` flush immediately).
    * 3. The buffer contains exactly one terminator (the trailing one).
    *    Multiple terminators mean we may be mid-stream of a longer merged
    *    utterance like `"Hey! Hi! Hello! This is a sentence."` — let the
    *    standard path keep merging.
    * 4. The char immediately before the terminator is NOT a digit (avoids
    *    decimal mid-stream like `"f(x) = x * 2."` flushing before `54`).
-   * 5. The char immediately before the terminator is NOT an uppercase
-   *    ASCII letter (avoids acronym patterns like `"U.S."` / `"U."`).
+   * 5. The trailing word is NOT a short ASCII all-caps acronym of 1-3 chars
+   *    (`"U."` / `"U.S."` / `"USA."`).
+   * 6. The trailing word is NOT a known honorific from any of the
+   *    per-language `HONORIFICS_*` constants (`"Mr."`, `"Sr."`, `"Dr."`,
+   *    `"Hr."`, `"Mme."`, ...).
    */
   maybeShortFlush() {
     const stripped = this.buffer.replace(/\s+$/, "");
@@ -3640,29 +3830,109 @@ var SentenceChunker = class {
     if (wordCount < this.minWordsForShortFlush) return [];
     if (stripped.length >= 2) {
       const prev = stripped[stripped.length - 2];
-      if (/\d/.test(prev) || /[A-Z]/.test(prev)) return [];
+      if (/\d/.test(prev)) return [];
+      const terminator = stripped[stripped.length - 1];
+      if (terminator === ".") {
+        const stripTerm = stripped.replace(
+          new RegExp(`[${TERMINATOR_REGEX_CLASS}]+$`),
+          ""
+        );
+        const tokens = stripTerm.split(/\s+/).filter((w) => w.length > 0);
+        const lastWord = tokens.length > 0 ? tokens[tokens.length - 1] : "";
+        if (/^[A-Z]{1,3}$/.test(lastWord)) return [];
+        if (HONORIFICS_SET.has(lastWord)) return [];
+      }
     }
     this.buffer = "";
     return [stripped];
   }
+  /**
+   * Try to flush the first clause of the response on a soft punctuation
+   * boundary (comma / em-dash / en-dash) to minimise TTFA.
+   *
+   * Returns the flushed clause text (with terminator) or `null` if no safe
+   * boundary is found. All of these guards must pass:
+   *
+   * 1. **Min length** — buffer ≥ `aggressiveFirstMinLen` (default 40).
+   * 2. **Trailing terminator** — last non-whitespace char in `SOFT_TERMINATORS`.
+   * 3. **Decimal/thousands guard** — refuse if comma is between two digits
+   *    or surrounded by digit-thousands grouping.
+   * 4. **Currency guard** — refuse if a currency symbol appears in the
+   *    preceding 8 characters.
+   * 5. **Balanced delimiter** — refuse if open parens/brackets/braces or
+   *    unmatched double-quotes still pending.
+   * 6. **Ellipsis** — refuse if buffer ends with `...` or `…`.
+   * 7. **Sub-token ambiguity** — only fire when at least one trailing char
+   *    after the terminator has arrived.
+   */
+  maybeAggressiveFirstFlush() {
+    const rstripped = this.buffer.replace(/\s+$/, "");
+    if (rstripped.length < this.aggressiveFirstMinLen) return null;
+    const lastChar = rstripped[rstripped.length - 1] ?? "";
+    if (!SOFT_TERMINATORS.includes(lastChar)) return null;
+    const pos = rstripped.length - 1;
+    if (pos + 1 >= this.buffer.length) return null;
+    const nextChar = this.buffer[pos + 1] ?? "";
+    if (lastChar === ",") {
+      const prevChar = pos >= 1 ? rstripped[pos - 1] ?? "" : "";
+      if (/\d/.test(prevChar) && /\d/.test(nextChar)) return null;
+      const tail = rstripped.slice(Math.max(0, pos - 6), pos);
+      if (/\d/.test(prevChar) && tail.includes(",") && /\d/.test(tail)) {
+        return null;
+      }
+    }
+    const snippet = rstripped.slice(Math.max(0, pos - 8), pos);
+    for (const c of CURRENCY_SYMBOLS) {
+      if (snippet.includes(c)) return null;
+    }
+    const opens = (rstripped.match(/[([{]/g) ?? []).length;
+    const closes = (rstripped.match(/[)\]}]/g) ?? []).length;
+    if (opens > closes) return null;
+    const dquoteCount = (rstripped.match(/"/g) ?? []).length;
+    if (dquoteCount % 2 !== 0) return null;
+    if (rstripped.endsWith("...") || rstripped.endsWith("\u2026")) return null;
+    if (lastChar === "," && nextChar === '"') return null;
+    const flushed = rstripped;
+    this.buffer = this.buffer.slice(rstripped.length).replace(/^\s+/, "");
+    return flushed;
+  }
   /** Flush remaining buffer as final sentence(s). Call at end of stream. */
   flush() {
     const remaining = this.buffer.trim();
     this.buffer = "";
+    this.isFirstFlush = true;
     if (!remaining) return [];
     return [remaining];
   }
   /** Discard buffered text. Call on interrupt. */
   reset() {
     this.buffer = "";
+    this.isFirstFlush = true;
   }
 };
 // src/pipeline-hooks.ts
+init_esm_shims();
+var legacyAfterLlmWarned = false;
+function normaliseAfterLlm(hook) {
+  if (hook === void 0) return void 0;
+  if (typeof hook === "function") {
+    if (!legacyAfterLlmWarned) {
+      legacyAfterLlmWarned = true;
+      getLogger().warn(
+        "[patter] afterLlm: (text, ctx) => string is deprecated; pass an object with { onResponse } instead. The legacy form maps to onResponse and blocks streaming TTS. Will be removed in v0.7.0."
+      );
+    }
+    return { onResponse: hook };
+  }
+  return hook;
+}
 var PipelineHookExecutor = class {
   hooks;
+  afterLlm;
   constructor(hooks) {
     this.hooks = hooks;
+    this.afterLlm = normaliseAfterLlm(hooks?.afterLlm);
   }
   /**
    * Run beforeSendToStt hook. Returns null to drop the audio chunk.
@@ -3708,26 +3978,87 @@ var PipelineHookExecutor = class {
     }
   }
   /**
-   * Run afterLlm hook. Returns a possibly-modified assistant text.
-   * Returning ``null`` from the hook means "keep the original".
-   * Fail-open: on exception, the original text passes through.
+   * Tier 1 — per-token sync transform. Returns the (possibly transformed)
+   * chunk. Fail-open: on exception or non-string return, the original chunk
+   * passes through unchanged. Must be cheap (~0 ms budget).
    */
-  async runAfterLlm(text, ctx) {
-    if (!this.hooks?.afterLlm) return text;
+  runAfterLlmChunk(chunk) {
+    if (!this.afterLlm?.onChunk) return chunk;
     try {
-      const result = await this.hooks.afterLlm(text, ctx);
+      const result = this.afterLlm.onChunk(chunk);
+      return typeof result === "string" ? result : chunk;
+    } catch (e) {
+      getLogger().error("Pipeline hook afterLlm.onChunk threw:", e);
+      return chunk;
+    }
+  }
+  /**
+   * Tier 2 — per-sentence rewrite. Returns rewritten sentence text, the
+   * original sentence (if hook returned `null`), or `null` to drop the
+   * sentence entirely (empty string is treated as drop). Fail-open.
+   */
+  async runAfterLlmSentence(sentence, ctx) {
+    if (!this.afterLlm?.onSentence) return sentence;
+    try {
+      const result = await this.afterLlm.onSentence(sentence, ctx);
+      if (result === null) return sentence;
+      if (result === "") return null;
+      return result;
+    } catch (e) {
+      getLogger().error("Pipeline hook afterLlm.onSentence threw:", e);
+      return sentence;
+    }
+  }
+  /**
+   * Tier 3 — per-response rewrite. Returns the (possibly rewritten) full
+   * response text. Triggered after the LLM stream completes. Caller is
+   * responsible for buffering tokens before invocation. Fail-open.
+   */
+  async runAfterLlmResponse(text, ctx) {
+    if (!this.afterLlm?.onResponse) return text;
+    try {
+      const result = await this.afterLlm.onResponse(text, ctx);
       return result ?? text;
     } catch (e) {
-      getLogger().error("Pipeline hook afterLlm threw:", e);
+      getLogger().error("Pipeline hook afterLlm.onResponse threw:", e);
       return text;
     }
   }
   /**
-   * Whether ``afterLlm`` is configured. Used by the LLM loop to decide
-   * whether to buffer streaming tokens before yielding them.
+   * Backward-compatible alias for `runAfterLlmResponse`. Existing call sites
+   * in the LLM loop continue to work unchanged.
+   *
+   * @deprecated Use `runAfterLlmResponse` directly.
+   */
+  async runAfterLlm(text, ctx) {
+    return this.runAfterLlmResponse(text, ctx);
+  }
+  /**
+   * Whether a per-response (tier 3) `onResponse` transform is configured.
+   * The LLM loop uses this to decide whether to buffer streaming tokens
+   * before yielding them. Per-token (tier 1) and per-sentence (tier 2)
+   * transforms do NOT require buffering.
+   */
+  hasAfterLlmResponse() {
+    return Boolean(this.afterLlm?.onResponse);
+  }
+  /** Whether a per-sentence (tier 2) transform is configured. */
+  hasAfterLlmSentence() {
+    return Boolean(this.afterLlm?.onSentence);
+  }
+  /** Whether a per-token (tier 1) transform is configured. */
+  hasAfterLlmChunk() {
+    return Boolean(this.afterLlm?.onChunk);
+  }
+  /**
+   * Backward-compatible alias for `hasAfterLlmResponse`. The legacy callable
+   * form maps to `onResponse`, so this preserves the original semantic for
+   * existing call sites.
+   *
+   * @deprecated Use `hasAfterLlmResponse` directly.
    */
   hasAfterLlm() {
-    return Boolean(this.hooks?.afterLlm);
+    return this.hasAfterLlmResponse();
   }
   /**
    * Run beforeSynthesize hook. Returns null if hook vetoes TTS for this sentence.
@@ -3758,6 +4089,7 @@ var PipelineHookExecutor = class {
 };
 // src/observability/event-bus.ts
+init_esm_shims();
 var EventBus = class {
   listeners = /* @__PURE__ */ new Map();
   /**
@@ -3784,17 +4116,18 @@ var EventBus = class {
         const res = cb(payload);
         if (res && typeof res.catch === "function") {
           res.catch(
-            (e) => getLogger().warn(`[EventBus] listener for "${event}" rejected:`, e)
+            (e) => getLogger().error(`[EventBus] listener for "${event}" rejected:`, e)
           );
         }
       } catch (e) {
-        getLogger().warn(`[EventBus] listener for "${event}" threw:`, e);
+        getLogger().error(`[EventBus] listener for "${event}" threw:`, e);
       }
     }
   }
 };
 // src/observability/tracing.ts
+init_esm_shims();
 var ENV_FLAG = "PATTER_OTEL_ENABLED";
 var SERVICE_NAME = "patter";
 var SPAN_CALL = "getpatter.call";
@@ -3982,7 +4315,7 @@ var HALLUCINATIONS = /* @__PURE__ */ new Set([
   "right",
   "cool"
 ]);
-var StreamHandler = class {
+var StreamHandler = class _StreamHandler {
   deps;
   ws;
   caller;
@@ -3994,8 +4327,50 @@ var StreamHandler = class {
   stt = null;
   tts = null;
   isSpeaking = false;
+  /**
+   * Ring buffer of inbound PCM16 16 kHz frames captured while the agent
+   * is speaking and the self-hearing guard is dropping audio. On
+   * barge-in we flush this buffer to STT so Deepgram (or any other
+   * streaming STT) receives the user's first ~500 ms of speech — which
+   * would otherwise be lost while the VAD's `minSpeechDuration` window
+   * accumulated and fired `speech_start`. Each frame is 20 ms × 32 bytes
+   * (16 kHz × 16-bit mono) ≈ 640 bytes.
+   *
+   * Capped to ``INBOUND_AUDIO_RING_FRAMES`` to recover only the
+   * VAD-missed leading edge of the user's speech (default 250 ms,
+   * matching SileroVAD ``minSpeechDuration``). Earlier values up to
+   * 600 ms were including ~350 ms of pre-speech silence/agent-bleed in
+   * the replay; on PSTN (where AEC is a no-op) Deepgram trained on
+   * English happily transcribes that bleed as English garbage
+   * (``"The same as Edgar,"``, ``"Permadees."``) and commits it to
+   * the LLM as a phantom user transcript. See BUGS.md 2026-05-05
+   * post-barge-in bleed-transcription entry.
+   */
+  inboundAudioRing = [];
+  static INBOUND_AUDIO_RING_FRAMES = 13;
+  /**
+   * Cached LLM provider tag used by speech-event payloads. Mirrors the
+   * value passed to the metrics accumulator at construction time so the
+   * speech-edge events report the same provider classification as
+   * dashboard / pricing rows.
+   */
+  llmProviderTag = "openai";
   /** Set to true after a VAD error to suppress log spam for the rest of the call. */
   vadDisabled = false;
+  /**
+   * Auto-loaded SileroVAD when ``agent.vad`` is undefined. Populated by
+   * ``initPipeline`` and queried alongside ``agent.vad`` on every audio frame.
+   * Stays null when ``onnxruntime-node`` is not installed — the pipeline
+   * then falls back to the STT-endpoint heuristic (legacy behaviour).
+   */
+  autoVad = null;
+  /**
+   * Acoustic echo canceller (NLMS adaptive filter). Lazily instantiated in
+   * ``initPipeline`` when ``agent.echoCancellation`` is true. ``null``
+   * otherwise — the mic path stays a pure pass-through for handset /
+   * headset deployments that don't have TTS bleed.
+   */
+  aec = null;
   /**
    * Monotonic counter incremented on every TTS-start. The grace timer
    * scheduled by ``endSpeakingWithGrace`` only flips ``isSpeaking=false``
@@ -4004,20 +4379,97 @@ var StreamHandler = class {
    * own ``isSpeaking=true``.
    */
   speakingGeneration = 0;
+  /**
+   * Wall-clock timestamp (ms since epoch) when the current TTS turn
+   * started — captured by ``beginSpeaking`` and cleared by
+   * ``cancelSpeaking`` / the grace flip. Used to gate barge-in: we
+   * suppress the cancel for the first
+   * ``MIN_AGENT_SPEAKING_MS_BEFORE_BARGE_IN_AEC`` of every turn (when AEC
+   * is on) so the AEC filter has time to converge — otherwise residual
+   * TTS bleed in the mic stream looks like user speech to VAD and
+   * triggers an immediate self-cancellation of the agent's first
+   * sentence.
+   */
+  speakingStartedAt = null;
+  /**
+   * Minimum wall-clock duration (ms) the agent must have been speaking
+   * before barge-in is allowed to fire when AEC is active. Covers the
+   * AEC warmup window (~500 ms) plus a safety margin so residual bleed
+   * during the convergence period does not self-trigger barge-in.
+   */
+  static MIN_AGENT_SPEAKING_MS_BEFORE_BARGE_IN_AEC = 1e3;
+  /**
+   * Same as the AEC variant but for deployments where AEC is OFF
+   * (default on PSTN — Twilio/Telnyx). Without an adaptive filter to
+   * converge, the only justification for a gate is anti-flicker on
+   * micro-events (cough, click). A short 250 ms window keeps real-user
+   * barge-in responsive while still filtering tiny noise spikes.
+   */
+  static MIN_AGENT_SPEAKING_MS_BEFORE_BARGE_IN_NO_AEC = 250;
   /** Handle for the pending grace-period timer, so it can be cleared on cleanup. */
   graceTimer = null;
-  /** Mark the start of a TTS span. Use instead of setting isSpeaking directly. */
-  beginSpeaking() {
+  /**
+   * AbortController for the current LLM streaming consumption.  Aborted by
+   * ``cancelSpeaking`` so the in-flight LLM stream stops generating tokens
+   * we will never speak — saves provider cost and frees the connection
+   * earlier.  Mirrors Python ``_llm_cancel_event``.
+   */
+  llmAbort = null;
+  /**
+   * Wall-clock timestamp of the most recent ``cancelSpeaking`` call, or
+   * ``null`` if no cancel has fired since the call started. Used by
+   * ``beginSpeaking`` to enforce a short post-cancel drain window so the
+   * remote PSTN player finishes flushing the previous turn's in-flight
+   * audio before the next TTS chunk lands on top of it. Without this,
+   * the first sentence of a post-barge-in turn audibly overlaps with
+   * the tail of the cancelled turn (~50-200 ms of doubled audio).
+   */
+  lastCancelAt = null;
+  /**
+   * Minimum drain window (ms) between a ``cancelSpeaking`` and the next
+   * ``beginSpeaking``. 150 ms covers a typical PSTN jitter buffer drain
+   * + Twilio Media Stream clear propagation. Lower values risk audio
+   * overlap on the first chunk; higher values increase the perceived
+   * "agent ack" latency after a barge-in. 150 ms is the smallest value
+   * that consistently eliminated the overlap during 0.6.0 acceptance.
+   */
+  static POST_CANCEL_DRAIN_MS = 150;
+  /**
+   * Mark the start of a TTS span. Use instead of setting isSpeaking
+   * directly. Awaits the post-cancel drain window before flipping state
+   * so the remote player has time to flush the cancelled turn's tail.
+   */
+  async beginSpeaking() {
+    if (this.lastCancelAt !== null) {
+      const elapsed = Date.now() - this.lastCancelAt;
+      const remaining = _StreamHandler.POST_CANCEL_DRAIN_MS - elapsed;
+      if (remaining > 0) {
+        await new Promise((r) => setTimeout(r, remaining));
+      }
+    }
     this.speakingGeneration++;
     this.isSpeaking = true;
+    this.speakingStartedAt = Date.now();
+    this.inboundAudioRing = [];
   }
   /**
    * Atomically end speaking AND invalidate any pending grace timer.
    * Use instead of ``this.isSpeaking = false`` at barge-in sites.
+   *
+   * Also aborts the in-flight LLM stream (if any) so the provider stops
+   * billing tokens we will never speak.
    */
   cancelSpeaking() {
     this.speakingGeneration++;
     this.isSpeaking = false;
+    this.speakingStartedAt = null;
+    this.lastCancelAt = Date.now();
+    if (this.llmAbort !== null) {
+      try {
+        this.llmAbort.abort();
+      } catch {
+      }
+    }
   }
   /** Cancel and clear the pending grace timer, if any. */
   clearGraceTimer() {
@@ -4040,18 +4492,102 @@ var StreamHandler = class {
       this.clearGraceTimer();
       this.graceTimer = setTimeout(() => {
         this.graceTimer = null;
-        if (this.speakingGeneration === gen) this.isSpeaking = false;
+        if (this.speakingGeneration === gen) {
+          this.isSpeaking = false;
+          this.speakingStartedAt = null;
+        }
       }, grace);
     } else {
       this.isSpeaking = false;
+      this.speakingStartedAt = null;
     }
   }
+  /**
+   * Whether barge-in is allowed to fire right now. Gate length depends
+   * on whether AEC is active: 1 s with AEC (covers filter warmup),
+   * 250 ms without (anti-flicker only — keeps PSTN barge-in responsive).
+   */
+  canBargeIn() {
+    if (this.speakingStartedAt === null) return true;
+    const elapsed = Date.now() - this.speakingStartedAt;
+    const gate = this.aec ? _StreamHandler.MIN_AGENT_SPEAKING_MS_BEFORE_BARGE_IN_AEC : _StreamHandler.MIN_AGENT_SPEAKING_MS_BEFORE_BARGE_IN_NO_AEC;
+    return elapsed >= gate;
+  }
+  /**
+   * Replay the audio captured by the self-hearing guard right before a
+   * confirmed barge-in. VAD's ``minSpeechDuration`` window (default
+   * 250 ms) means ``speech_start`` fires only AFTER the user has been
+   * talking for that long; without this replay STT sees only the tail
+   * of the user's interruption and produces "the line is breaking up"
+   * partial transcripts. We deliberately do NOT call this on natural
+   * turn end — see the comment in ``endSpeakingWithGrace`` for why.
+   */
+  flushInboundAudioRing() {
+    if (!this.stt || this.inboundAudioRing.length === 0) return;
+    const replayed = this.inboundAudioRing.length;
+    for (const buf of this.inboundAudioRing) {
+      try {
+        this.stt.sendAudio(buf);
+      } catch (err) {
+        getLogger().debug(`sendAudio replay failed: ${String(err)}`);
+      }
+    }
+    this.inboundAudioRing = [];
+    getLogger().info(
+      `[DIAG] Flushed ${replayed} pre-barge-in frame(s) (~${replayed * 20} ms) to STT`
+    );
+  }
   llmLoop = null;
+  /**
+   * Per-call tool executor — provides retry-with-exponential-backoff and a
+   * per-tool circuit breaker for Realtime function calls. Pipeline mode
+   * uses its own executor inside ``LLMLoop``; this one is dedicated to
+   * the Realtime path so a flaky downstream (DB outage, vendor rate
+   * limit) returns a structured ``{ error, fallback: true }`` instead of
+   * hanging the model on retries that will keep failing.
+   */
+  toolExecutor = new DefaultToolExecutor();
+  /**
+   * MCP server connection manager — populated lazily in
+   * ``initMcpTools()`` when the agent declares ``mcpServers``. Holds
+   * the open MCP client connections for the lifetime of the call so
+   * we can dispatch ``tools/call`` without re-handshaking on every
+   * function invocation. Cleared in ``fireCallEnd``.
+   */
+  mcpManager = null;
   chunkCount = 0;
   callEndFired = false;
   sttClosed = false;
   currentAgentText = "";
   responseAudioStarted = false;
+  /**
+   * Realtime turn ordering buffer. OpenAI Realtime emits
+   * `input_audio_transcription.completed` (user transcript) AFTER
+   * `response.done` (assistant complete) because Whisper transcription
+   * runs in parallel with — and slower than — model response. Without
+   * this buffer the pushed `history` order is [assistant, user, ...]
+   * which renders out-of-order in the dashboard.
+   *
+   * Behaviour:
+   *  - `onAdapterSpeechStopped` flips `userTranscriptPending = true`
+   *  - `onAdapterResponseDone` checks the flag; if set, stashes the
+   *    assistant text + a fallback timer
+   *  - `onAdapterTranscriptInput` clears the flag, pushes user, then
+   *    flushes any pending assistant turn
+   *  - The fallback timer flushes the assistant alone if the user
+   *    transcript never arrives (silence misclassified as speech, etc.)
+   */
+  userTranscriptPending = false;
+  pendingAssistantTurn = null;
+  pendingAssistantTimer = null;
+  /**
+   * Hard cap on how long we wait for the user transcript before flushing
+   * the buffered assistant turn alone. 3 s covers OpenAI Whisper's typical
+   * 200-800 ms post-response delay with substantial headroom for slow
+   * cellular audio uploads. Beyond this we accept the order will look
+   * "assistant-only" rather than block the call's transcript display.
+   */
+  static REALTIME_USER_TRANSCRIPT_WAIT_MS = 3e3;
   maxDurationTimer = null;
   transcriptProcessing = false;
   transcriptQueue = [];
@@ -4080,9 +4616,12 @@ var StreamHandler = class {
     this.history = createHistoryManager(200);
     const sttKey = deps.agent.stt?.constructor?.providerKey;
     const sttProviderName = deps.agent.stt ? sttKey ?? deps.agent.stt.constructor?.name ?? "custom" : void 0;
+    const sttModelName = String((deps.agent.stt?.model ?? "") || "");
     const ttsKey = deps.agent.tts?.constructor?.providerKey;
     const ttsProviderName = deps.agent.tts ? ttsKey ?? deps.agent.tts.constructor?.name ?? "custom" : void 0;
+    const ttsModelName = String((deps.agent.tts?.model ?? "") || "");
     const providerMode = deps.agent.provider ?? "openai_realtime";
+    const realtimeModelName = providerMode === "openai_realtime" ? String((deps.agent.model ?? "") || "") || "gpt-realtime-mini" : "";
     const llmKey = deps.agent.llm?.constructor?.providerKey;
     let llmProviderName;
     if (deps.agent.llm) {
@@ -4095,6 +4634,7 @@ var StreamHandler = class {
     } else {
       llmProviderName = providerMode === "openai_realtime" ? "openai_realtime" : "openai";
     }
+    this.llmProviderTag = llmProviderName;
     this._eventBus = new EventBus();
     this.metricsAcc = new CallMetricsAccumulator({
       callId: "",
@@ -4103,6 +4643,9 @@ var StreamHandler = class {
       sttProvider: sttProviderName,
       ttsProvider: ttsProviderName,
       llmProvider: llmProviderName,
+      sttModel: sttModelName,
+      ttsModel: ttsModelName,
+      realtimeModel: realtimeModelName,
       pricing: deps.pricing,
       eventBus: this._eventBus,
       reportOnlyInitialTtfb: deps.reportOnlyInitialTtfb ?? false
@@ -4213,6 +4756,7 @@ var StreamHandler = class {
    * @param callId       Call SID (Twilio) or call_control_id (Telnyx)
    * @param customParams TwiML custom parameters (Twilio only, empty for Telnyx)
    */
+  /** Initialize per-call state, build the AI adapter, and dispatch the `onCallStart` callback. */
   async handleCallStart(callId, customParams = {}) {
     this.callId = callId;
     this.metricsAcc.callId = callId;
@@ -4239,7 +4783,7 @@ var StreamHandler = class {
       }
     }, MAX_CALL_DURATION_MS);
     try {
-      const { notifyDashboard } = await import("./persistence-LQBYQPQQ.mjs");
+      const { notifyDashboard } = await import("./persistence-LVIAHESK.mjs");
       notifyDashboard({
         call_id: callId,
         caller: this.caller,
@@ -4264,25 +4808,58 @@ var StreamHandler = class {
     const allVars = { ...agentVars, ...safeCustomParams };
     const resolvedPrompt = Object.keys(allVars).length > 0 ? this.deps.resolveVariables(this.deps.agent.systemPrompt, allVars) : this.deps.agent.systemPrompt;
     const provider2 = this.deps.agent.provider ?? "openai_realtime";
+    await this.initMcpTools();
     if (provider2 === "pipeline") {
       await this.initPipeline(resolvedPrompt);
     } else {
       await this.initRealtimeAdapter(resolvedPrompt);
     }
   }
+  /**
+   * Connect to every configured MCP server, discover their tools via
+   * ``tools/list``, and merge them into ``agent.tools`` before the
+   * adapter is built. The synthetic handlers dispatch back through the
+   * MCP client so ``DefaultToolExecutor`` can invoke them like any
+   * other handler-tool. No-op when ``agent.mcpServers`` is empty or the
+   * optional ``@modelcontextprotocol/sdk`` is not installed.
+   */
+  async initMcpTools() {
+    const servers = this.deps.agent.mcpServers;
+    if (!servers || servers.length === 0) return;
+    this.mcpManager = new MCPManager(servers);
+    let discovered;
+    try {
+      discovered = await this.mcpManager.connect();
+    } catch (e) {
+      getLogger().error(`MCP connect failed (continuing without MCP tools): ${String(e)}`);
+      this.mcpManager = null;
+      return;
+    }
+    if (discovered.length === 0) return;
+    MCPManager.assertNoConflicts(this.deps.agent.tools, discovered);
+    const mutableAgent = this.deps.agent;
+    mutableAgent.tools = [...mutableAgent.tools ?? [], ...discovered];
+    getLogger().info(`MCP: merged ${discovered.length} tool(s) into agent`);
+  }
   /** Set the stream SID (Twilio only, called after parsing 'start' event). */
+  /** Set the carrier-side stream id (Twilio `streamSid` / Telnyx stream identifier). */
   setStreamSid(sid) {
     this.streamSid = sid;
   }
   /** Handle an incoming audio chunk (already decoded from base64). */
+  /** Forward inbound audio bytes to the AI adapter and (in pipeline mode) the STT provider. */
   async handleAudio(audioBuffer) {
     const provider2 = this.deps.agent.provider ?? "openai_realtime";
     if (provider2 === "pipeline" && this.stt) {
       const pcm8k = mulawToPcm16(audioBuffer);
-      const pcm16k = this.inboundResampler.process(pcm8k);
-      if (this.deps.agent.vad && !this.vadDisabled) {
+      let pcm16k = this.inboundResampler.process(pcm8k);
+      if (this.aec) {
+        pcm16k = this.aec.processNearEnd(pcm16k);
+      }
+      const activeVad = this.deps.agent.vad ?? this.autoVad;
+      if (activeVad && !this.vadDisabled) {
         try {
-          const vadPromise = this.deps.agent.vad.processFrame(pcm16k, 16e3);
+          const vadPromise = activeVad.processFrame(pcm16k, 16e3);
           const timeoutPromise = new Promise((resolve) => setTimeout(() => resolve(null), 25));
           const evt = await Promise.race([vadPromise, timeoutPromise]);
           if (evt) {
@@ -4291,7 +4868,11 @@ var StreamHandler = class {
             );
           }
           if (evt?.type === "speech_start") {
-            if (this.isSpeaking) {
+            if (this.isSpeaking && !this.canBargeIn()) {
+              getLogger().info(
+                `[VAD] speech_start suppressed (agent speaking < gate, aec=${this.aec ? "on" : "off"})`
+              );
+            } else if (this.isSpeaking) {
               getLogger().info("[VAD] speech_start during TTS \u2192 BARGE-IN");
               this.metricsAcc.recordOverlapStart();
               this.metricsAcc.recordBargeinDetected();
@@ -4303,6 +4884,7 @@ var StreamHandler = class {
                 } catch (err) {
                   getLogger().debug(`sendClear during VAD barge-in failed: ${String(err)}`);
                 }
+                this.flushInboundAudioRing();
                 this.metricsAcc.recordTtsStopped();
                 this.metricsAcc.recordTurnInterrupted();
                 this.metricsAcc.recordOverlapEnd(true);
@@ -4316,6 +4898,16 @@ var StreamHandler = class {
             this.metricsAcc.startTurnIfIdle();
           } else if (evt?.type === "speech_end") {
             this.metricsAcc.recordVadStop();
+            try {
+              const ret = this.stt?.finalize?.();
+              if (ret instanceof Promise) {
+                ret.catch(
+                  (err) => getLogger().debug(`STT finalize threw: ${String(err)}`)
+                );
+              }
+            } catch (err) {
+              getLogger().debug(`STT finalize threw: ${String(err)}`);
+            }
           }
         } catch (err) {
           this.vadDisabled = true;
@@ -4323,7 +4915,13 @@ var StreamHandler = class {
         }
       }
       if (this.isSpeaking) {
-        if (this.deps.agent.vad) return;
+        if (this.deps.agent.vad ?? this.autoVad) {
+          this.inboundAudioRing.push(pcm16k);
+          if (this.inboundAudioRing.length > _StreamHandler.INBOUND_AUDIO_RING_FRAMES) {
+            this.inboundAudioRing.shift();
+          }
+          return;
+        }
         if ((this.deps.agent.bargeInThresholdMs ?? 300) === 0) return;
       }
       const hooks = this.deps.agent.hooks;
@@ -4349,6 +4947,7 @@ var StreamHandler = class {
     }
   }
   /** Handle a DTMF keypress event (Twilio only). */
+  /** Handle an inbound DTMF tone from the caller. */
   async handleDtmf(digit) {
     getLogger().debug(`DTMF: ${digit}`);
     if (this.adapter instanceof OpenAIRealtimeAdapter) {
@@ -4371,12 +4970,14 @@ var StreamHandler = class {
    * ``twilio_handler.py``: ``audio_sender.on_mark_confirmed(mark_name)`` +
    * ``handler.on_mark(mark_name)``.
    */
+  /** Handle a Twilio Media Streams `mark` event acknowledging audio playback boundaries. */
   async onMark(markName) {
     if (markName) {
       this.lastConfirmedMark = markName;
     }
   }
   /** Handle call stop / stream end. */
+  /** Handle a carrier-emitted `stop` event signalling the call has ended. */
   async handleStop() {
     this.clearGraceTimer();
     this.flushResamplers();
@@ -4388,6 +4989,7 @@ var StreamHandler = class {
     await this.fireCallEnd();
   }
   /** Handle WebSocket close event. */
+  /** Tear down adapter, STT/TTS, and per-call state when the carrier WebSocket closes. */
   async handleWsClose() {
     this.clearGraceTimer();
     this.flushResamplers();
@@ -4422,7 +5024,7 @@ var StreamHandler = class {
    * (server.ts) requests ``stream_bidirectional_codec=PCMU`` at 8 kHz. So
    * the wire format for both providers is mulaw 8 kHz; we resample 16 kHz
    * PCM16 → 8 kHz then encode to mulaw. Mirrors the Python pipeline path
-   * (sdk-py/getpatter/handlers/telnyx_handler.py::TelnyxAudioSender).
+   * (libraries/python/getpatter/handlers/telnyx_handler.py::TelnyxAudioSender).
    *
    * Maintains a 1-byte carry across calls so unaligned HTTP chunks from
    * streaming TTS providers never byte-swap the PCM16 samples downstream.
@@ -4451,12 +5053,61 @@ var StreamHandler = class {
     const label = this.deps.bridge.label;
     this.stt = await this.deps.bridge.createStt(this.deps.agent);
     this.tts = await createTTS(this.deps.agent);
+    if (this.tts) {
+      const carrierAware = this.tts;
+      if (typeof carrierAware.setTelephonyCarrier === "function") {
+        try {
+          carrierAware.setTelephonyCarrier(this.deps.bridge.telephonyProvider);
+        } catch (e) {
+          getLogger().debug(`TTS setTelephonyCarrier failed (${label}): ${String(e)}`);
+        }
+      }
+    }
     if (!this.stt) {
       getLogger().debug(`Pipeline mode (${label}): no STT configured`);
     }
     if (!this.tts) {
       getLogger().debug(`Pipeline mode (${label}): no TTS configured`);
     }
+    if (!this.deps.agent.vad) {
+      try {
+        const { SileroVAD } = await import("./silero-vad-YLCXT5GQ.mjs");
+        this.autoVad = await SileroVAD.forPhoneCall();
+        getLogger().info(
+          `auto-VAD enabled (SileroVAD, phone preset). Pass agent.vad=\u2026 to override.`
+        );
+      } catch (e) {
+        const msg = e?.message ?? String(e);
+        if (/Cannot find module|onnxruntime-node/i.test(msg)) {
+          getLogger().info(
+            "auto-VAD unavailable: onnxruntime-node not installed. Run `npm install onnxruntime-node@~1.18.0` for fast barge-in."
+          );
+        } else {
+          getLogger().warn(
+            `auto-VAD load failed (${msg}); falling back to STT-endpoint heuristic`
+          );
+        }
+      }
+    }
+    if (this.deps.agent.echoCancellation) {
+      const carrier = this.deps.bridge.telephonyProvider;
+      if (carrier === "twilio" || carrier === "telnyx") {
+        getLogger().warn(
+          `echoCancellation: true on ${carrier} (PSTN). Server-side NLMS cannot model PSTN's ~250\u20131500 ms round-trip echo with a 32 ms filter window \u2014 it will silently no-op. Best practice: keep echoCancellation: false; rely on the carrier + caller device's built-in echo suppression and Patter's self-hearing guard. Enable AEC only for browser/native deployments where the SDK owns the audio path end-to-end.`
+        );
+      }
+      try {
+        const { NlmsEchoCanceller } = await import("./aec-PJJMUM5E.mjs");
+        this.aec = new NlmsEchoCanceller({ sampleRate: 16e3 });
+        getLogger().info(
+          "echo cancellation enabled (NLMS, 512 taps + 0.5 s warmup \u03BC=0.5); filter converges within ~250 ms of TTS playback in low-latency loops."
+        );
+      } catch (e) {
+        getLogger().warn(
+          `echo cancellation requested but failed to load: ${String(e)}; falling back to pass-through.`
+        );
+      }
+    }
     try {
       if (this.stt) await this.stt.connect();
       getLogger().debug(`Pipeline mode (${label}): STT + TTS connected`);
@@ -4470,13 +5121,19 @@ var StreamHandler = class {
     }
     if (this.deps.agent.firstMessage && !this.deps.onMessage && this.tts) {
       this.metricsAcc.startTurn();
+      await this.beginSpeaking();
       let firstChunkSent = false;
       this.resetTtsCarry();
       try {
         for await (const chunk of this.tts.synthesizeStream(this.deps.agent.firstMessage)) {
+          if (!this.isSpeaking) break;
           if (!firstChunkSent) {
             firstChunkSent = true;
             this.metricsAcc.recordTtsFirstByte();
+            await this.emitAudioOut();
+          }
+          if (this.aec) {
+            this.aec.pushFarEnd(chunk);
           }
           const encoded = this.encodePipelineAudio(chunk);
           this.deps.bridge.sendAudio(this.ws, encoded, this.streamSid);
@@ -4485,6 +5142,7 @@ var StreamHandler = class {
         getLogger().error(`First message TTS error (${label}):`, e);
       } finally {
         this.resetTtsCarry();
+        this.endSpeakingWithGrace();
       }
       if (firstChunkSent) {
         await this.emitTurnMetrics(this.metricsAcc.recordTurnComplete(this.deps.agent.firstMessage));
@@ -4505,9 +5163,11 @@ var StreamHandler = class {
         // propagate so calculateLlmCost can match the price row
         resolvedPrompt,
         this.deps.agent.tools,
-        this.deps.agent.llm
+        this.deps.agent.llm,
+        this.deps.agent.disablePhonePreamble ?? false
       );
       this.llmLoop.setEventBus(this._eventBus);
+      this.llmLoop.setOnToolCall((n, a, r) => this.recordToolCall(n, a, r));
       const llmLabel = this.deps.agent.llm.constructor?.name ?? "custom";
       getLogger().debug(`Built-in LLM loop active (pipeline, ${label}, llm=${llmLabel})`);
     } else if (!this.deps.onMessage && this.deps.config.openaiKey) {
@@ -4517,9 +5177,12 @@ var StreamHandler = class {
         this.deps.config.openaiKey,
         llmModel,
         resolvedPrompt,
-        this.deps.agent.tools
+        this.deps.agent.tools,
+        void 0,
+        this.deps.agent.disablePhonePreamble ?? false
       );
       this.llmLoop.setEventBus(this._eventBus);
+      this.llmLoop.setOnToolCall((n, a, r) => this.recordToolCall(n, a, r));
       getLogger().debug(`Built-in LLM loop active (pipeline, ${label})`);
     }
     if (this.stt) {
@@ -4555,9 +5218,14 @@ var StreamHandler = class {
         if (!this.isSpeaking) break;
         const processedAudio = await hookExecutor.runAfterSynthesize(chunk, processedText, hookCtx);
         if (processedAudio === null) continue;
+        if (!this.isSpeaking) break;
         if (!ttsFirstByteSent.value) {
           ttsFirstByteSent.value = true;
           this.metricsAcc.recordTtsFirstByte();
+          await this.emitAudioOut();
+        }
+        if (this.aec) {
+          this.aec.pushFarEnd(processedAudio);
         }
         const encoded = this.encodePipelineAudio(processedAudio);
         this.deps.bridge.sendAudio(this.ws, encoded, this.streamSid);
@@ -4583,6 +5251,9 @@ var StreamHandler = class {
     }
   }
   async processTranscript(transcript) {
+    getLogger().info(
+      `[DIAG] processTranscript text=${JSON.stringify((transcript.text ?? "").slice(0, 60))} isFinal=${transcript.isFinal} speechFinal=${transcript.speechFinal} isSpeaking=${this.isSpeaking}`
+    );
     let interrupted = this.handleBargeIn(transcript);
     if (transcript.text) {
       this.metricsAcc.startTurnIfIdle();
@@ -4593,6 +5264,9 @@ var StreamHandler = class {
     if (!transcript.isFinal || !transcript.text) return;
     if (!this.commitTranscript(transcript.text)) return;
     const label = this.deps.bridge.label;
+    getLogger().info(
+      `[DIAG] processTranscript COMMITTED \u2192 LLM (${label} pipeline): ${sanitizeLogValue(transcript.text.slice(0, 80))}`
+    );
     getLogger().debug(`User (${label} pipeline): ${sanitizeLogValue(transcript.text)}`);
     this.metricsAcc.startTurnIfIdle();
     this.metricsAcc.recordSttComplete(transcript.text);
@@ -4672,7 +5346,7 @@ var StreamHandler = class {
     }
     if (!responseText) return;
     if (this.llmLoop) {
-      this.history.push({ role: "assistant", text: responseText, timestamp: Date.now() });
+      await this.emitAssistantTranscript(responseText);
       this.metricsAcc.recordTtsComplete(responseText);
     } else {
       interrupted = await this.runRegularLlm(responseText, hookExecutor, hookCtx) || interrupted;
@@ -4690,6 +5364,12 @@ var StreamHandler = class {
    */
   handleBargeIn(transcript) {
     if (!transcript.text || !this.isSpeaking) return false;
+    if (!this.canBargeIn()) {
+      getLogger().info(
+        `Barge-in transcript suppressed (agent speaking < gate, aec=${this.aec ? "on" : "off"})`
+      );
+      return false;
+    }
     getLogger().debug(
       `Barge-in: caller spoke over agent (${sanitizeLogValue(transcript.text.slice(0, 40))})`
     );
@@ -4755,16 +5435,26 @@ var StreamHandler = class {
   async runPipelineLlm(filteredTranscript, hookExecutor, hookCtx) {
     const label = this.deps.bridge.label;
     const callCtx = { call_id: this.callId, caller: this.caller, callee: this.callee };
-    const chunker = new SentenceChunker();
+    const chunker = new SentenceChunker({
+      aggressiveFirstFlush: this.deps.agent.aggressiveFirstFlush ?? false,
+      language: this.deps.agent.language
+    });
     const allParts = [];
     const ttsFirstByteSent = { value: false };
-    this.beginSpeaking();
+    await this.beginSpeaking();
+    this.llmAbort = new AbortController();
+    const llmSignal = this.llmAbort.signal;
     let llmError = false;
     const llmSpan = startSpan(SPAN_LLM, { "patter.call.id": this.callId });
     const guardAndSpeak = async (sentence, isFirst) => {
       if (isFirst) this.metricsAcc.recordLlmFirstSentenceComplete();
       const guard = checkGuardrails(sentence, this.deps.agent.guardrails);
-      const sentenceText = guard ? guard.replacement ?? "I'm sorry, I can't respond to that." : sentence;
+      let sentenceText = guard ? guard.replacement ?? "I'm sorry, I can't respond to that." : sentence;
+      if (hookExecutor.hasAfterLlmSentence()) {
+        const transformed = await hookExecutor.runAfterLlmSentence(sentenceText, hookCtx);
+        if (transformed === null) return;
+        sentenceText = transformed;
+      }
       await this.synthesizeSentence(sentenceText, hookExecutor, hookCtx, ttsFirstByteSent);
     };
     let firstSentenceEmitted = false;
@@ -4776,22 +5466,28 @@ var StreamHandler = class {
           callCtx,
           this.metricsAcc,
           hookExecutor,
-          hookCtx
+          hookCtx,
+          { signal: llmSignal }
         )) {
+          if (llmSignal.aborted) break;
           this.metricsAcc.recordLlmFirstToken();
+          await this.emitLlmFirstToken();
           allParts.push(token);
           for (const sentence of chunker.push(token)) {
             if (!this.isSpeaking) break;
             await guardAndSpeak(sentence, !firstSentenceEmitted);
             firstSentenceEmitted = true;
           }
-          if (!this.isSpeaking) break;
+          if (!this.isSpeaking || llmSignal.aborted) break;
         }
       } catch (e) {
-        llmError = true;
-        chunker.reset();
-        getLogger().error(`LLM loop error (${label}):`, e);
-        this.metricsAcc.recordTurnInterrupted();
+        const isAbort = e?.name === "AbortError" || llmSignal.aborted;
+        if (!isAbort) {
+          llmError = true;
+          chunker.reset();
+          getLogger().error(`LLM loop error (${label}):`, e);
+          this.metricsAcc.recordTurnInterrupted();
+        }
       }
       this.metricsAcc.recordLlmComplete();
       if (!llmError && this.isSpeaking) {
@@ -4803,6 +5499,7 @@ var StreamHandler = class {
       }
     } finally {
       this.endSpeakingWithGrace();
+      this.llmAbort = null;
       try {
         llmSpan.end();
       } catch {
@@ -4823,11 +5520,11 @@ var StreamHandler = class {
       text = guard.replacement ?? "I'm sorry, I can't respond to that.";
     }
     this.metricsAcc.recordLlmComplete();
-    this.history.push({ role: "assistant", text, timestamp: Date.now() });
+    await this.emitAssistantTranscript(text);
     const chunker = new SentenceChunker();
     const sentences = [...chunker.push(text), ...chunker.flush()];
     const ttsFirstByteSent = { value: false };
-    this.beginSpeaking();
+    await this.beginSpeaking();
     let interrupted = false;
     try {
       for (const sentence of sentences) {
@@ -4835,7 +5532,13 @@ var StreamHandler = class {
           interrupted = true;
           break;
         }
-        await this.synthesizeSentence(sentence, hookExecutor, hookCtx, ttsFirstByteSent);
+        let sentenceText = sentence;
+        if (hookExecutor.hasAfterLlmSentence()) {
+          const transformed = await hookExecutor.runAfterLlmSentence(sentenceText, hookCtx);
+          if (transformed === null) continue;
+          sentenceText = transformed;
+        }
+        await this.synthesizeSentence(sentenceText, hookExecutor, hookCtx, ttsFirstByteSent);
       }
     } finally {
       this.endSpeakingWithGrace();
@@ -4848,7 +5551,7 @@ var StreamHandler = class {
     const onMessage = this.deps.onMessage;
     const parts = [];
     this.metricsAcc.recordLlmComplete();
-    this.beginSpeaking();
+    await this.beginSpeaking();
     let wsTtsStarted = false;
     try {
       for await (const chunk of this.deps.remoteHandler.callWebSocket(onMessage, msgData)) {
@@ -4860,6 +5563,7 @@ var StreamHandler = class {
             if (!wsTtsStarted) {
               wsTtsStarted = true;
               this.metricsAcc.recordTtsFirstByte();
+              await this.emitAudioOut();
             }
             const encoded = this.encodePipelineAudio(audioChunk);
             this.deps.bridge.sendAudio(this.ws, encoded, this.streamSid);
@@ -4875,7 +5579,7 @@ var StreamHandler = class {
     const responseText = parts.join("");
     this.metricsAcc.recordTtsComplete(responseText);
     await this.emitTurnMetrics(this.metricsAcc.recordTurnComplete(responseText));
-    if (responseText) this.history.push({ role: "assistant", text: responseText, timestamp: Date.now() });
+    if (responseText) await this.emitAssistantTranscript(responseText);
   }
   // ---------------------------------------------------------------------------
   // Private: OpenAI Realtime / ElevenLabs ConvAI mode
@@ -4897,7 +5601,8 @@ var StreamHandler = class {
     if (this.deps.agent.firstMessage) {
       this.metricsAcc.startTurn();
       if (this.adapter instanceof OpenAIRealtimeAdapter) {
-        await this.adapter.sendText(this.deps.agent.firstMessage);
+        const sender = typeof this.adapter.sendFirstMessage === "function" ? this.adapter.sendFirstMessage.bind(this.adapter) : this.adapter.sendText.bind(this.adapter);
+        await sender(this.deps.agent.firstMessage);
       }
     }
     this.adapter.onEvent(async (type, eventData) => {
@@ -4927,21 +5632,87 @@ var StreamHandler = class {
       }
     }
   };
+  // ---- Speech-event helpers ------------------------------------------
+  // No-op when the deps don't include a SpeechEvents dispatcher. Tracks
+  // wall-clock for `speech_duration_ms` payloads.
+  userSpeechStartMs = null;
+  agentTurnStartMs = null;
+  async emitUserSpeechStarted() {
+    if (!this.deps.speechEvents) return;
+    this.userSpeechStartMs = Date.now();
+    await this.deps.speechEvents.fireUserSpeechStarted();
+  }
+  async emitUserSpeechEnded() {
+    if (!this.deps.speechEvents) return;
+    const duration = this.userSpeechStartMs !== null ? Math.max(0, Date.now() - this.userSpeechStartMs) : 0;
+    this.userSpeechStartMs = null;
+    await this.deps.speechEvents.fireUserSpeechEnded({
+      speechDurationMs: duration
+    });
+  }
+  async emitUserSpeechEos(transcriptSoFar) {
+    if (!this.deps.speechEvents) return;
+    await this.deps.speechEvents.fireUserSpeechEos({
+      trigger: "vad_silence",
+      transcriptSoFar
+    });
+  }
+  async emitAgentSpeechStarted() {
+    if (!this.deps.speechEvents) return;
+    this.agentTurnStartMs = Date.now();
+    const ttsKey = this.deps.agent.tts?.constructor?.providerKey;
+    await this.deps.speechEvents.fireAgentSpeechStarted({
+      ttsProvider: ttsKey,
+      engine: this.deps.agent.provider ?? "openai_realtime"
+    });
+  }
+  async emitAgentSpeechEnded(interrupted) {
+    if (!this.deps.speechEvents) return;
+    if (this.agentTurnStartMs === null) return;
+    const duration = Math.max(0, Date.now() - this.agentTurnStartMs);
+    this.agentTurnStartMs = null;
+    await this.deps.speechEvents.fireAgentSpeechEnded({
+      speechDurationMs: duration,
+      interrupted
+    });
+  }
+  /** Fire the per-turn LLM TTFT marker. Idempotent in the dispatcher
+   * — guarded by `firstTokenForTurn` on the SpeechEvents instance. */
+  async emitLlmFirstToken() {
+    if (!this.deps.speechEvents) return;
+    await this.deps.speechEvents.fireLlmFirstToken({
+      llmProvider: this.llmProviderTag,
+      model: this.deps.agent.model ?? ""
+    });
+  }
+  /** Fire the per-turn first-TTS-audio marker. Idempotent in the
+   * dispatcher — guarded by `firstAudioForTurn`. The provider tag falls
+   * back to the engine name for Realtime / ConvAI (no separate TTS). */
+  async emitAudioOut() {
+    if (!this.deps.speechEvents) return;
+    const ttsKey = this.deps.agent.tts?.constructor?.providerKey;
+    const provider2 = ttsKey ?? this.deps.agent.provider ?? "openai_realtime";
+    await this.deps.speechEvents.fireAudioOut({ ttsProvider: provider2 });
+  }
   async onAdapterAudio(eventData) {
     if (!this.responseAudioStarted) {
       this.responseAudioStarted = true;
       if (this.metricsAcc.turnActive === false) this.metricsAcc.startTurn();
       this.metricsAcc.recordTtsFirstByte();
+      await this.emitAgentSpeechStarted();
+      await this.emitAudioOut();
     }
     const outAudio = eventData;
     this.deps.bridge.sendAudio(this.ws, outAudio.toString("base64"), this.streamSid);
     this.chunkCount++;
     this.deps.bridge.sendMark(this.ws, `audio_${this.chunkCount}`, this.streamSid);
   }
-  onAdapterSpeechStopped() {
+  async onAdapterSpeechStopped() {
     if (!this.metricsAcc.turnActive) this.metricsAcc.startTurn();
     this.currentAgentText = "";
     this.responseAudioStarted = false;
+    this.userTranscriptPending = true;
+    await this.emitUserSpeechEnded();
   }
   async onAdapterTranscriptInput(inputText) {
     getLogger().debug(`User (${this.deps.bridge.label}): ${sanitizeLogValue(inputText)}`);
@@ -4951,6 +5722,7 @@ var StreamHandler = class {
       this.currentAgentText = "";
       this.responseAudioStarted = false;
     }
+    await this.emitUserSpeechEos(inputText);
     this.metricsAcc.recordSttComplete(inputText);
     if (this.deps.onTranscript) {
       await this.deps.onTranscript({
@@ -4960,9 +5732,97 @@ var StreamHandler = class {
         history: [...this.history.entries]
       });
     }
+    this.userTranscriptPending = false;
+    if (this.pendingAssistantTurn !== null) {
+      const buffered = this.pendingAssistantTurn;
+      this.pendingAssistantTurn = null;
+      if (this.pendingAssistantTimer) {
+        clearTimeout(this.pendingAssistantTimer);
+        this.pendingAssistantTimer = null;
+      }
+      await this.flushAssistantTurn(buffered);
+    }
+  }
+  /**
+   * Push an assistant turn into history, fire `onTranscript`, and emit
+   * turn-complete metrics. Shared between the immediate path (no user
+   * transcript pending) and the buffered path (flushed after user
+   * transcript arrives or fallback timer fires).
+   */
+  async flushAssistantTurn(text) {
+    this.history.push({ role: "assistant", text, timestamp: Date.now() });
+    if (this.deps.onTranscript) {
+      await this.deps.onTranscript({
+        role: "assistant",
+        text,
+        call_id: this.callId,
+        history: [...this.history.entries]
+      });
+    }
+    this.responseAudioStarted = false;
+    await this.emitTurnMetrics(this.metricsAcc.recordTurnComplete(text));
+  }
+  /**
+   * Push an assistant turn into history and fire `onTranscript` so host
+   * applications observe pipeline-mode replies the same way they observe
+   * realtime-mode replies. Mirrors `_emit_assistant_transcript` in the
+   * Python SDK and parallels `flushAssistantTurn` (realtime path).
+   * Caller is responsible for filtering empty strings.
+   */
+  async emitAssistantTranscript(text) {
+    this.history.push({ role: "assistant", text, timestamp: Date.now() });
+    if (this.deps.onTranscript) {
+      await this.deps.onTranscript({
+        role: "assistant",
+        text,
+        call_id: this.callId,
+        history: [...this.history.entries]
+      });
+    }
+  }
+  /**
+   * Surface a tool invocation from pipeline mode into the transcript
+   * timeline. Emits TWO events: one for the call (`name(argsJson)`) and
+   * one for the result (`name(...) → result`, truncated to 200 chars).
+   * Mirrors realtime mode's two `emitToolEvent` calls in
+   * `handleFunctionCall`. Wired as the `LLMLoop` `onToolCall` observer.
+   */
+  async recordToolCall(name, args, result) {
+    let argsText;
+    try {
+      argsText = JSON.stringify(args ?? {});
+    } catch {
+      argsText = "{}";
+    }
+    const callText = `${name}(${argsText})`;
+    this.history.push({ role: "tool", text: callText, timestamp: Date.now() });
+    if (this.deps.onTranscript) {
+      await this.deps.onTranscript({
+        role: "tool",
+        text: callText,
+        call_id: this.callId,
+        tool_name: name,
+        tool_args: args ?? {},
+        tool_result: null
+      });
+    }
+    const displayed = result.length > 200 ? result.slice(0, 200) + "\u2026" : result;
+    const resText = `${name}(...) \u2192 ${displayed}`;
+    this.history.push({ role: "tool", text: resText, timestamp: Date.now() });
+    if (this.deps.onTranscript) {
+      await this.deps.onTranscript({
+        role: "tool",
+        text: resText,
+        call_id: this.callId,
+        tool_name: name,
+        tool_args: args ?? {},
+        tool_result: result
+      });
+    }
   }
   async onAdapterTranscriptOutput(outputText) {
     if (!outputText) return;
+    await this.emitLlmFirstToken();
     const triggered = checkGuardrails(outputText, this.deps.agent.guardrails);
     if (triggered) {
       getLogger().debug(`Guardrail '${triggered.name}' triggered`);
@@ -4976,24 +5836,75 @@ var StreamHandler = class {
   async onAdapterResponseDone(responseData) {
     if (responseData) {
       const usage = responseData.usage;
-      if (usage) this.metricsAcc.recordRealtimeUsage(usage);
+      if (usage) {
+        const turnModel = typeof responseData.model === "string" ? responseData.model : null;
+        this.metricsAcc.recordRealtimeUsage(usage, turnModel);
+      }
     }
-    if (this.currentAgentText) {
-      this.history.push({ role: "assistant", text: this.currentAgentText, timestamp: Date.now() });
-      this.responseAudioStarted = false;
-      await this.emitTurnMetrics(this.metricsAcc.recordTurnComplete(this.currentAgentText));
-      this.currentAgentText = "";
-    } else {
+    if (!this.currentAgentText) {
       this.metricsAcc.recordTurnInterrupted();
       this.responseAudioStarted = false;
+      await this.emitAgentSpeechEnded(true);
+      return;
     }
+    await this.emitAgentSpeechEnded(false);
+    const text = this.currentAgentText;
+    this.currentAgentText = "";
+    if (this.userTranscriptPending) {
+      this.pendingAssistantTurn = text;
+      if (this.pendingAssistantTimer) clearTimeout(this.pendingAssistantTimer);
+      this.pendingAssistantTimer = setTimeout(() => {
+        const buffered = this.pendingAssistantTurn;
+        this.pendingAssistantTurn = null;
+        this.pendingAssistantTimer = null;
+        this.userTranscriptPending = false;
+        if (buffered !== null) {
+          void this.flushAssistantTurn(buffered);
+        }
+      }, _StreamHandler.REALTIME_USER_TRANSCRIPT_WAIT_MS);
+      this.responseAudioStarted = false;
+      return;
+    }
+    await this.flushAssistantTurn(text);
   }
-  onAdapterSpeechInterrupt() {
+  async onAdapterSpeechInterrupt() {
     this.deps.bridge.sendClear(this.ws, this.streamSid);
     if (this.adapter instanceof OpenAIRealtimeAdapter) this.adapter.cancelResponse();
     this.metricsAcc.recordTurnInterrupted();
+    if (this.responseAudioStarted) {
+      await this.emitAgentSpeechEnded(true);
+    }
+    await this.emitUserSpeechStarted();
     this.currentAgentText = "";
     this.responseAudioStarted = false;
+    this.pendingAssistantTurn = null;
+    if (this.pendingAssistantTimer) {
+      clearTimeout(this.pendingAssistantTimer);
+      this.pendingAssistantTimer = null;
+    }
+    this.userTranscriptPending = false;
+  }
+  /**
+   * Emit a tool-invocation event into the transcript timeline. Pushes a
+   * `role=tool` entry into `history` (so it appears in the dashboard
+   * transcript next to user/assistant turns) AND fires `onTranscript` so
+   * the host application can log / persist / render it. `result` is
+   * truncated for log readability — the full payload is in history.
+   */
+  async emitToolEvent(name, args, result) {
+    const argsText = JSON.stringify(args);
+    const text = result === null ? `${name}(${argsText})` : `${name}(${argsText}) \u2192 ${result.length > 200 ? result.slice(0, 200) + "\u2026" : result}`;
+    this.history.push({ role: "tool", text, timestamp: Date.now() });
+    if (this.deps.onTranscript) {
+      await this.deps.onTranscript({
+        role: "tool",
+        text,
+        call_id: this.callId,
+        tool_name: name,
+        tool_args: args,
+        tool_result: result
+      });
+    }
   }
   async handleFunctionCall(fc) {
     const adapter = this.adapter;
@@ -5007,11 +5918,15 @@ var StreamHandler = class {
       const transferTo = transferArgs.number ?? "";
       if (!isValidE164(transferTo)) {
         getLogger().warn(`transfer_call rejected (${this.deps.bridge.label}): invalid number ${JSON.stringify(transferTo)}`);
-        await adapter.sendFunctionResult(fc.call_id, JSON.stringify({ error: "Invalid phone number format", status: "rejected" }));
+        const rejection = JSON.stringify({ error: "Invalid phone number format", status: "rejected" });
+        await adapter.sendFunctionResult(fc.call_id, rejection);
+        await this.emitToolEvent("transfer_call", transferArgs, rejection);
         return;
       }
       getLogger().debug(`Transferring call to ${transferTo}`);
-      await adapter.sendFunctionResult(fc.call_id, JSON.stringify({ status: "transferring", to: transferTo }));
+      const result2 = JSON.stringify({ status: "transferring", to: transferTo });
+      await adapter.sendFunctionResult(fc.call_id, result2);
+      await this.emitToolEvent("transfer_call", transferArgs, result2);
       await this.deps.bridge.transferCall(this.callId, transferTo);
       if (this.deps.onTranscript) {
         await this.deps.onTranscript({ role: "system", text: `Call transferred to ${transferTo}`, call_id: this.callId });
@@ -5027,7 +5942,9 @@ var StreamHandler = class {
       }
       const reason = endArgs.reason ?? "conversation_complete";
       getLogger().debug(`Ending call (${this.deps.bridge.label}): ${reason}`);
-      await adapter.sendFunctionResult(fc.call_id, JSON.stringify({ status: "ending", reason }));
+      const result2 = JSON.stringify({ status: "ending", reason });
+      await adapter.sendFunctionResult(fc.call_id, result2);
+      await this.emitToolEvent("end_call", endArgs, result2);
       await this.deps.bridge.endCall(this.callId, this.ws);
       if (this.deps.onTranscript) {
         await this.deps.onTranscript({ role: "system", text: `Call ended: ${reason}`, call_id: this.callId });
@@ -5035,22 +5952,57 @@ var StreamHandler = class {
       return;
     }
     const toolDef = this.deps.agent.tools?.find((t) => t.name === fc.name);
-    if (toolDef?.webhookUrl) {
-      let parsedArgs;
+    if (!toolDef) {
+      getLogger().warn(`Realtime tool '${fc.name}' not found in agent.tools \u2014 skipping`);
+      const result2 = JSON.stringify({ error: `Tool '${fc.name}' not registered`, fallback: true });
+      await adapter.sendFunctionResult(fc.call_id, result2);
+      await this.emitToolEvent(fc.name, {}, result2);
+      return;
+    }
+    let parsedArgs;
+    try {
+      parsedArgs = JSON.parse(fc.arguments || "{}");
+    } catch {
+      parsedArgs = {};
+    }
+    await this.emitToolEvent(fc.name, parsedArgs, null);
+    const reassurance = toolDef.reassurance;
+    let reassuranceTimer = null;
+    if (reassurance) {
+      const msg = typeof reassurance === "string" ? reassurance : reassurance.message;
+      const afterMs = typeof reassurance === "string" ? 1500 : reassurance.afterMs ?? 1500;
+      if (msg && this.adapter instanceof OpenAIRealtimeAdapter) {
+        const realtimeAdapter = this.adapter;
+        reassuranceTimer = setTimeout(() => {
+          realtimeAdapter.sendText(msg).catch((e) => {
+            getLogger().warn(`Reassurance message failed for tool '${fc.name}': ${String(e)}`);
+          });
+        }, afterMs);
+      }
+    }
+    const onProgress = this.adapter instanceof OpenAIRealtimeAdapter ? async (text) => {
       try {
-        parsedArgs = JSON.parse(fc.arguments || "{}");
-      } catch {
-        parsedArgs = {};
+        await this.adapter.sendText(text);
+      } catch (e) {
+        getLogger().warn(`Tool progress message failed for '${fc.name}': ${String(e)}`);
       }
-      const result = await executeToolWebhook(
-        toolDef.webhookUrl,
-        fc.name,
+    } : void 0;
+    let result;
+    try {
+      result = await this.toolExecutor.execute(
+        toolDef,
         parsedArgs,
-        { callId: this.callId, caller: this.caller },
-        this.deps.bridge.label === "Twilio" ? "" : this.deps.bridge.label
+        {
+          call_id: this.callId,
+          caller: this.caller
+        },
+        onProgress
       );
-      await adapter.sendFunctionResult(fc.call_id, result);
+    } finally {
+      if (reassuranceTimer) clearTimeout(reassuranceTimer);
     }
+    await adapter.sendFunctionResult(fc.call_id, result);
+    await this.emitToolEvent(fc.name, parsedArgs, result);
   }
   // ---------------------------------------------------------------------------
   // Private: call end / metrics finalization
@@ -5062,6 +6014,25 @@ var StreamHandler = class {
       clearTimeout(this.maxDurationTimer);
       this.maxDurationTimer = null;
     }
+    if (this.pendingAssistantTimer) {
+      clearTimeout(this.pendingAssistantTimer);
+      this.pendingAssistantTimer = null;
+    }
+    if (this.pendingAssistantTurn !== null) {
+      const buffered = this.pendingAssistantTurn;
+      this.pendingAssistantTurn = null;
+      try {
+        await this.flushAssistantTurn(buffered);
+      } catch {
+      }
+    }
+    if (this.mcpManager) {
+      try {
+        await this.mcpManager.close();
+      } catch {
+      }
+      this.mcpManager = null;
+    }
     await this.deps.bridge.queryTelephonyCost(this.metricsAcc, this.callId);
     if (this.stt instanceof DeepgramSTT && this.stt.requestId) {
       const dgKey = this.stt.apiKey;
@@ -5088,7 +6059,7 @@ var StreamHandler = class {
       finalMetrics
     );
     try {
-      const { notifyDashboard } = await import("./persistence-LQBYQPQQ.mjs");
+      const { notifyDashboard } = await import("./persistence-LVIAHESK.mjs");
       notifyDashboard(callEndData);
     } catch {
     }
@@ -5129,6 +6100,7 @@ async function queryDeepgramCost(metricsAcc, deepgramKey, deepgramRequestId) {
 }
 // src/services/call-log.ts
+init_esm_shims();
 import * as crypto3 from "crypto";
 import * as fs2 from "fs";
 import { promises as fsp } from "fs";
@@ -5226,6 +6198,7 @@ var CallLogger = class {
       this.root = null;
     }
   }
+  /** True when a log root was configured and is writable. */
   get enabled() {
     return this.root !== null;
   }
@@ -5239,6 +6212,7 @@ var CallLogger = class {
     const safeId = sanitizeLogValue(callId, 64).replace(/\//g, "_") || "unknown";
     return path2.join(this.root, "calls", year, month, day, safeId);
   }
+  /** Write the initial `metadata.json` for a new call. */
   async logCallStart(callId, input = {}) {
     if (!this.enabled) return;
     const startedAt = Date.now() / 1e3;
@@ -5271,6 +6245,7 @@ var CallLogger = class {
       this.sweepOldDays();
     }
   }
+  /** Append a single turn record to the call's `transcript.jsonl`. */
   async logTurn(callId, turn) {
     if (!this.enabled) return;
     const dir = this.callDir(callId);
@@ -5288,6 +6263,7 @@ var CallLogger = class {
       );
     }
   }
+  /** Append an operational event (tool_call, barge_in, error, …) to `events.jsonl`. */
   async logEvent(callId, eventType, payload = {}) {
     if (!this.enabled) return;
     const dir = this.callDir(callId);
@@ -5306,6 +6282,7 @@ var CallLogger = class {
       );
     }
   }
+  /** Merge end-of-call fields into the existing `metadata.json`. */
   async logCallEnd(callId, input = {}) {
     if (!this.enabled) return;
     const dir = this.callDir(callId);
@@ -5432,6 +6409,18 @@ var END_CALL_TOOL = {
 function xmlEscape(s) {
   return s.replace(/&/g, "&amp;").replace(/</g, "&lt;").replace(/>/g, "&gt;").replace(/"/g, "&quot;").replace(/'/g, "&apos;");
 }
+function classifyTwilioAmd(answeredBy) {
+  if (answeredBy === "human") return "human";
+  if (answeredBy.startsWith("machine_")) return "machine";
+  if (answeredBy === "fax") return "fax";
+  return "unknown";
+}
+function classifyTelnyxAmd(result) {
+  if (result === "human") return "human";
+  if (result === "machine" || result === "machine_detected") return "machine";
+  if (result === "fax") return "fax";
+  return "unknown";
+}
 function validateWebhookUrl(url) {
   const parsed = new URL(url);
   if (!["http:", "https:"].includes(parsed.protocol)) {
@@ -5561,22 +6550,35 @@ function buildAIAdapter(config, agent, resolvedPrompt) {
   const agentTools = agent.tools?.map((t) => ({
     name: t.name,
     description: t.description,
-    parameters: t.parameters
+    parameters: t.parameters,
+    strict: t.strict
   })) ?? [];
   const tools = [...agentTools, TRANSFER_CALL_TOOL, END_CALL_TOOL];
   const openaiKey = engine && engine.kind === "openai_realtime" ? engine.apiKey : config.openaiKey ?? "";
+  const adapterOptions = {};
+  if (engine && engine.kind === "openai_realtime") {
+    if (engine.reasoningEffort !== void 0) {
+      adapterOptions.reasoningEffort = engine.reasoningEffort;
+    }
+    if (engine.inputAudioTranscriptionModel !== void 0) {
+      adapterOptions.inputAudioTranscriptionModel = engine.inputAudioTranscriptionModel;
+    }
+  }
   return new OpenAIRealtimeAdapter(
     openaiKey,
     agent.model,
     agent.voice,
     resolvedPrompt ?? agent.systemPrompt,
-    tools
+    tools,
+    void 0,
+    adapterOptions
   );
 }
 var TwilioBridge = class {
   constructor(config) {
     this.config = config;
   }
+  config;
   label = "Twilio";
   telephonyProvider = "twilio";
   sendAudio(ws, audioBase64, streamSid) {
@@ -5649,7 +6651,10 @@ var TwilioBridge = class {
             getLogger().info(`Twilio actual cost: $${Math.abs(parseFloat(data.price))}`);
           }
         }
-      } catch {
+      } catch (err) {
+        getLogger().debug(
+          `queryTelephonyCost(twilio) failed: ${err?.message ?? err}`
+        );
       }
     }
   }
@@ -5669,6 +6674,7 @@ var TelnyxBridge = class {
   constructor(config) {
     this.config = config;
   }
+  config;
   label = "Telnyx";
   telephonyProvider = "telnyx";
   sendAudio(ws, audioBase64, _streamSid) {
@@ -5790,7 +6796,10 @@ var TelnyxBridge = class {
             getLogger().info(`Telnyx actual cost: $${Math.abs(parseFloat(amount))}`);
           }
         }
-      } catch {
+      } catch (err) {
+        getLogger().debug(
+          `queryTelephonyCost(telnyx) failed: ${err?.message ?? err}`
+        );
       }
     }
   }
@@ -5811,7 +6820,8 @@ var EmbeddedServer = class {
     this.dashboardToken = dashboardToken;
     this.metricsStore = new MetricsStore();
     this.pricing = mergePricing(pricingOverrides);
-    const logRoot = resolveLogRoot();
+    const logRoot = config.persistRoot === void 0 ? resolveLogRoot() : config.persistRoot;
+    this.callLogger = new CallLogger(logRoot);
     if (logRoot) {
       try {
         const restored = this.metricsStore.hydrate(logRoot);
@@ -5823,6 +6833,17 @@ var EmbeddedServer = class {
       }
     }
   }
+  config;
+  agent;
+  onCallStart;
+  onCallEnd;
+  onTranscript;
+  onMessage;
+  recording;
+  voicemailMessage;
+  onMetrics;
+  dashboard;
+  dashboardToken;
   server = null;
   wss = null;
   twilioTokenWarningLogged = false;
@@ -5830,11 +6851,25 @@ var EmbeddedServer = class {
   metricsStore;
   pricing;
   remoteHandler = new RemoteMessageHandler();
-  /** Opt-in per-call filesystem logger (set via PATTER_LOG_DIR). */
-  callLogger = new CallLogger(resolveLogRoot());
+  /**
+   * Opt-in per-call filesystem logger. Path is resolved by ``client.ts``
+   * from the public ``LocalOptions.persist`` option (with the legacy
+   * ``PATTER_LOG_DIR`` env var as fallback). Initialised in the ctor
+   * because ``resolveLogRoot`` cannot see ``this.config`` from a field
+   * default expression.
+   */
+  callLogger;
   /** Active WebSocket connections tracked for graceful shutdown. */
   activeConnections = /* @__PURE__ */ new Set();
   activeCallIds = /* @__PURE__ */ new Map();
+  /**
+   * Per-call AMD result callback set by ``Patter.call()`` for the most
+   * recent outbound call. Public so ``client.ts`` can populate it after
+   * server start. Cleared after firing once per call to avoid leaking
+   * across calls.
+   */
+  onMachineDetection;
+  /** Bind HTTP + WebSocket listeners on `port`, mount carrier webhooks and dashboard routes. */
   async start(port = 8e3) {
     const webhookUrlPattern = /^[a-zA-Z0-9][a-zA-Z0-9.\-]+[a-zA-Z0-9]$/;
     if (!webhookUrlPattern.test(this.config.webhookUrl)) {
@@ -5950,6 +6985,20 @@ var EmbeddedServer = class {
       const answeredBy = body["AnsweredBy"] ?? "";
       const callSid = body["CallSid"] ?? "";
       getLogger().info(`AMD result for ${sanitizeLogValue(callSid)}: ${sanitizeLogValue(answeredBy)}`);
+      const cb = this.onMachineDetection;
+      if (cb && callSid) {
+        try {
+          await cb({
+            call_id: callSid,
+            carrier: "twilio",
+            classification: classifyTwilioAmd(answeredBy),
+            raw: answeredBy,
+            detected_at: Date.now() / 1e3
+          });
+        } catch (err) {
+          getLogger().warn(`onMachineDetection callback threw: ${sanitizeLogValue(String(err))}`);
+        }
+      }
       if ((answeredBy === "machine_end_beep" || answeredBy === "machine_end_silence") && this.voicemailMessage && this.config.twilioSid && this.config.twilioToken) {
         if (!validateTwilioSid(callSid)) {
           getLogger().warn(`AMD webhook rejected: invalid CallSid ${JSON.stringify(sanitizeLogValue(callSid))}`);
@@ -5965,7 +7014,8 @@ var EmbeddedServer = class {
               "Content-Type": "application/x-www-form-urlencoded",
               "Authorization": `Basic ${Buffer.from(`${this.config.twilioSid}:${this.config.twilioToken}`).toString("base64")}`
             },
-            body: new URLSearchParams({ Twiml: twiml }).toString()
+            body: new URLSearchParams({ Twiml: twiml }).toString(),
+            signal: AbortSignal.timeout(1e4)
           });
           if (vmResp.ok) {
             getLogger().info(`Voicemail dropped for ${sanitizeLogValue(callSid)}`);
@@ -6053,6 +7103,20 @@ var EmbeddedServer = class {
         getLogger().info(
           `Telnyx AMD result for ${sanitizeLogValue(amdCallId)}: ${sanitizeLogValue(amdResult)}`
         );
+        const cbTx = this.onMachineDetection;
+        if (cbTx && amdCallId) {
+          try {
+            await cbTx({
+              call_id: amdCallId,
+              carrier: "telnyx",
+              classification: classifyTelnyxAmd(amdResult),
+              raw: amdResult,
+              detected_at: Date.now() / 1e3
+            });
+          } catch (err) {
+            getLogger().warn(`onMachineDetection callback threw: ${sanitizeLogValue(String(err))}`);
+          }
+        }
         if (amdCallId && (amdResult === "machine" || amdResult === "machine_detected")) {
           await this.handleTelnyxAmdVoicemail(amdCallId);
         }
@@ -6147,7 +7211,8 @@ var EmbeddedServer = class {
       }
     });
     await new Promise((resolve) => {
-      this.server.listen(port, "127.0.0.1", () => {
+      const bindHost = process.env.PATTER_BIND_HOST ?? "127.0.0.1";
+      this.server.listen(port, bindHost, () => {
         getLogger().info(`Server on port ${port}`);
         getLogger().info(`Webhook: https://${this.config.webhookUrl}`);
         getLogger().info(`Phone:   ${this.config.phoneNumber}`);
@@ -6477,30 +7542,168 @@ var EmbeddedServer = class {
   }
 };
+// src/tools/circuit-breaker.ts
+init_esm_shims();
+var CircuitBreakerState = {
+  CLOSED: "closed",
+  OPEN: "open",
+  HALF_OPEN: "half_open"
+};
+var DEFAULT_FAILURE_THRESHOLD = 5;
+var DEFAULT_COOLDOWN_MS = 3e4;
+var CircuitBreakerRegistry = class {
+  threshold;
+  cooldownMs;
+  state = /* @__PURE__ */ new Map();
+  /** Inject for deterministic tests; defaults to ``Date.now()``. */
+  clock;
+  constructor(opts = {}, clock = Date.now) {
+    this.threshold = opts.failureThreshold ?? DEFAULT_FAILURE_THRESHOLD;
+    this.cooldownMs = opts.cooldownMs ?? DEFAULT_COOLDOWN_MS;
+    this.clock = clock;
+  }
+  /** Returns ``true`` when this tool is currently allowed to run. */
+  allow(toolName) {
+    if (this.threshold <= 0) return true;
+    const s = this.state.get(toolName);
+    if (!s) return true;
+    if (s.state === CircuitBreakerState.CLOSED) return true;
+    if (s.state === CircuitBreakerState.OPEN) {
+      if (this.clock() - s.openedAt >= this.cooldownMs) {
+        s.state = CircuitBreakerState.HALF_OPEN;
+        return true;
+      }
+      return false;
+    }
+    return true;
+  }
+  /** Mark a successful execution. Resets the breaker to CLOSED. */
+  recordSuccess(toolName) {
+    const s = this.state.get(toolName);
+    if (!s) return;
+    s.state = CircuitBreakerState.CLOSED;
+    s.consecutiveFailures = 0;
+    s.openedAt = 0;
+  }
+  /** Mark a failed execution; trips OPEN once threshold is reached. */
+  recordFailure(toolName) {
+    if (this.threshold <= 0) return;
+    let s = this.state.get(toolName);
+    if (!s) {
+      s = { state: CircuitBreakerState.CLOSED, consecutiveFailures: 0, openedAt: 0 };
+      this.state.set(toolName, s);
+    }
+    s.consecutiveFailures += 1;
+    if (s.consecutiveFailures >= this.threshold) {
+      s.state = CircuitBreakerState.OPEN;
+      s.openedAt = this.clock();
+    }
+  }
+  /**
+   * Time until the breaker transitions OPEN → HALF_OPEN, in ms. Returns
+   * ``0`` when the breaker is currently allowing calls. Useful for
+   * tests and the structured rejection JSON.
+   */
+  timeUntilHalfOpen(toolName) {
+    const s = this.state.get(toolName);
+    if (!s || s.state !== CircuitBreakerState.OPEN) return 0;
+    const elapsed = this.clock() - s.openedAt;
+    return Math.max(0, this.cooldownMs - elapsed);
+  }
+  /** Snapshot for debugging / metrics. */
+  snapshot(toolName) {
+    const s = this.state.get(toolName);
+    return s ? { ...s } : null;
+  }
+};
 // src/llm-loop.ts
 var DEFAULT_TOOL_MAX_RETRIES = 2;
 var DEFAULT_TOOL_RETRY_DELAY_MS = 500;
 var DEFAULT_TOOL_TIMEOUT_MS = 1e4;
 var TOOL_MAX_RESPONSE_BYTES = 1 * 1024 * 1024;
+async function invokeHandler(handler, args, callContext, onProgress) {
+  const invoked = handler(args, callContext);
+  if (invoked && typeof invoked === "object" && typeof invoked[Symbol.asyncIterator] === "function" && typeof invoked.next === "function") {
+    let lastResult = "";
+    while (true) {
+      const step = await invoked.next();
+      if (step.done) {
+        const ret = typeof step.value === "string" ? step.value : "";
+        return ret || lastResult || "{}";
+      }
+      const yielded = step.value;
+      if (yielded && typeof yielded === "object") {
+        if (typeof yielded.progress === "string") {
+          if (onProgress) await onProgress(yielded.progress);
+          continue;
+        }
+        if (typeof yielded.result === "string") {
+          lastResult = yielded.result;
+          continue;
+        }
+      }
+      if (onProgress && yielded != null) {
+        const text = typeof yielded === "string" ? yielded : JSON.stringify(yielded);
+        await onProgress(text);
+      }
+    }
+  }
+  return await invoked;
+}
+function backoffDelayMs(baseMs, attempt) {
+  const cap = 5e3;
+  const exp = Math.min(cap, baseMs * Math.pow(2, attempt));
+  return Math.round(exp + Math.random() * 60);
+}
 var DefaultToolExecutor = class {
   maxRetries;
   retryDelayMs;
   requestTimeoutMs;
+  breaker;
   constructor(opts = {}) {
     this.maxRetries = opts.maxRetries ?? DEFAULT_TOOL_MAX_RETRIES;
     this.retryDelayMs = opts.retryDelayMs ?? DEFAULT_TOOL_RETRY_DELAY_MS;
     this.requestTimeoutMs = opts.requestTimeoutMs ?? DEFAULT_TOOL_TIMEOUT_MS;
-  }
-  async execute(toolDef, args, callContext) {
+    this.breaker = new CircuitBreakerRegistry(opts.circuitBreaker ?? {});
+  }
+  /** Expose the breaker for tests + dashboard observability. */
+  get circuitBreaker() {
+    return this.breaker;
+  }
+  async execute(toolDef, args, callContext, onProgress) {
+    if (!this.breaker.allow(toolDef.name)) {
+      const cooldown = this.breaker.timeUntilHalfOpen(toolDef.name);
+      return JSON.stringify({
+        error: `Tool '${toolDef.name}' is temporarily unavailable (circuit open).`,
+        fallback: true,
+        circuit_state: "open",
+        retry_after_ms: cooldown
+      });
+    }
     if (toolDef.handler) {
-      try {
-        return await toolDef.handler(args, callContext);
-      } catch (e) {
-        return JSON.stringify({
-          error: `Tool handler error: ${String(e)}`,
-          fallback: true
-        });
+      const totalAttempts = this.maxRetries + 1;
+      let lastErr = null;
+      for (let attempt = 0; attempt < totalAttempts; attempt++) {
+        try {
+          const result = await invokeHandler(toolDef.handler, args, callContext, onProgress);
+          this.breaker.recordSuccess(toolDef.name);
+          return result;
+        } catch (e) {
+          lastErr = e;
+          if (attempt < totalAttempts - 1) {
+            getLogger().warn(
+              `Tool handler '${toolDef.name}' failed (attempt ${attempt + 1}/${totalAttempts}), retrying: ${String(e)}`
+            );
+            await new Promise((r) => setTimeout(r, backoffDelayMs(this.retryDelayMs, attempt)));
+          }
+        }
       }
+      this.breaker.recordFailure(toolDef.name);
+      return JSON.stringify({
+        error: `Tool handler error after ${totalAttempts} attempts: ${String(lastErr)}`,
+        fallback: true
+      });
     }
     if (toolDef.webhookUrl) {
       try {
@@ -6535,20 +7738,23 @@ var DefaultToolExecutor = class {
               if (!resp.ok) throw new Error(`HTTP ${resp.status}`);
               const result = JSON.stringify(await resp.json());
               if (result.length > TOOL_MAX_RESPONSE_BYTES) {
+                this.breaker.recordFailure(toolDef.name);
                 return JSON.stringify({
                   error: `Webhook response too large: ${result.length} bytes (max ${TOOL_MAX_RESPONSE_BYTES})`,
                   fallback: true
                 });
               }
+              this.breaker.recordSuccess(toolDef.name);
               return result;
             } catch (e) {
               if (attempt < totalAttempts - 1) {
                 getLogger().warn(
-                  `Tool webhook '${toolDef.name}' failed (attempt ${attempt + 1}), retrying: ${String(e)}`
+                  `Tool webhook '${toolDef.name}' failed (attempt ${attempt + 1}/${totalAttempts}), retrying: ${String(e)}`
                 );
-                await new Promise((r) => setTimeout(r, this.retryDelayMs));
+                await new Promise((r) => setTimeout(r, backoffDelayMs(this.retryDelayMs, attempt)));
               } else {
                 span.recordException(e);
+                this.breaker.recordFailure(toolDef.name);
                 return JSON.stringify({
                   error: `Tool failed after ${totalAttempts} attempts: ${String(e)}`,
                   fallback: true
@@ -6569,6 +7775,30 @@ var DefaultToolExecutor = class {
     });
   }
 };
+function mergeAbortSignals(...signals) {
+  const filtered = signals.filter(
+    (s) => s != null
+  );
+  if (filtered.length === 1) return filtered[0];
+  if (typeof AbortSignal.any === "function") {
+    return AbortSignal.any(
+      filtered
+    );
+  }
+  const controller = new AbortController();
+  for (const sig of filtered) {
+    if (sig.aborted) {
+      controller.abort(sig.reason);
+      return controller.signal;
+    }
+    sig.addEventListener(
+      "abort",
+      () => controller.abort(sig.reason),
+      { once: true }
+    );
+  }
+  return controller.signal;
+}
 var OpenAILLMProvider = class {
   apiKey;
   model;
@@ -6596,7 +7826,8 @@ var OpenAILLMProvider = class {
     this.presencePenalty = sampling.presencePenalty;
     this.stop = sampling.stop;
   }
-  async *stream(messages, tools) {
+  /** Stream OpenAI Chat Completions chunks for the given messages/tools. */
+  async *stream(messages, tools, opts) {
     const body = {
       model: this.model,
       messages,
@@ -6620,6 +7851,7 @@ var OpenAILLMProvider = class {
     if (tools) {
       body.tools = tools;
     }
+    const signal = mergeAbortSignals(opts?.signal, AbortSignal.timeout(3e4));
     const response = await fetch("https://api.openai.com/v1/chat/completions", {
       method: "POST",
       headers: {
@@ -6627,12 +7859,14 @@ var OpenAILLMProvider = class {
         "Authorization": `Bearer ${this.apiKey}`
       },
       body: JSON.stringify(body),
-      signal: AbortSignal.timeout(3e4)
+      signal
     });
     if (!response.ok) {
       const errText = await response.text();
       getLogger().error(`LLM API error: ${response.status} ${errText}`);
-      return;
+      throw new PatterConnectionError(
+        `LLM API returned ${response.status}: ${errText.slice(0, 200)}`
+      );
     }
     const reader = response.body?.getReader();
     if (!reader) return;
@@ -6685,6 +7919,7 @@ var OpenAILLMProvider = class {
     }
   }
 };
+var DEFAULT_PHONE_PREAMBLE = "You are speaking on a live phone call. Respond concisely. Do not use markdown, headers, bullet lists, code fences, or emojis. Spell out numbers, currencies, dates, and units in natural spoken language. Keep replies under 2 sentences unless the caller asks for detail.";
 var LLMLoop = class {
   provider;
   systemPrompt;
@@ -6696,9 +7931,20 @@ var LLMLoop = class {
   // Fix 10: track provider/model so usage chunks can be attributed for billing.
   _providerName;
   _modelName;
-  constructor(apiKey, model, systemPrompt, tools, llmProvider) {
+  // Optional async observer fired after a successful tool execution so
+  // the host SDK (StreamHandler in pipeline mode) can surface tool calls
+  // into the transcript timeline / `onTranscript` callback. Mirrors the
+  // Python `on_tool_call` parameter on `LLMLoop.__init__`.
+  onToolCall;
+  constructor(apiKey, model, systemPrompt, tools, llmProvider, disablePhonePreamble = false) {
     this.provider = llmProvider ?? new OpenAILLMProvider(apiKey, model);
-    this.systemPrompt = systemPrompt;
+    if (disablePhonePreamble) {
+      this.systemPrompt = systemPrompt;
+    } else {
+      this.systemPrompt = systemPrompt ? `${DEFAULT_PHONE_PREAMBLE}
+${systemPrompt}` : DEFAULT_PHONE_PREAMBLE;
+    }
     if (llmProvider) {
       const key = llmProvider.constructor?.providerKey;
       if (key) {
@@ -6745,6 +7991,16 @@ var LLMLoop = class {
   setEventBus(bus) {
     this.eventBus = bus;
   }
+  /**
+   * Set or replace the post-tool-execution observer. The callback is
+   * awaited after every successful tool execution with
+   * `(name, args, result)`. Pass `undefined` to disable. Mirrors the
+   * Python `LLMLoop.set_on_tool_call` setter so callers (e.g. the
+   * pipeline `StreamHandler`) can wire the loop after construction.
+   */
+  setOnToolCall(callback) {
+    this.onToolCall = callback;
+  }
   /**
    * Stream LLM response tokens, handling tool calls automatically.
    * Yields text tokens as they arrive from the LLM.
@@ -6753,7 +8009,7 @@ var LLMLoop = class {
    *   from the provider are forwarded to {@link LlmUsageRecorder.recordLlmUsage}
    *   so token costs are included in the call cost breakdown (fix 10).
    */
-  async *run(userText, history, callContext, metrics, hookExecutor, hookCtx) {
+  async *run(userText, history, callContext, metrics, hookExecutor, hookCtx, opts) {
     let messages = this.buildMessages(history, userText);
     const maxIterations = 10;
     if (hookExecutor && hookCtx) {
@@ -6762,20 +8018,22 @@ var LLMLoop = class {
         hookCtx
       );
     }
-    const hasAfterLlm = Boolean(hookExecutor?.hasAfterLlm() && hookCtx);
+    const hasAfterLlmResponse = Boolean(hookExecutor?.hasAfterLlmResponse() && hookCtx);
+    const hasAfterLlmChunk = Boolean(hookExecutor?.hasAfterLlmChunk());
     const allEmittedText = [];
     for (let iter = 0; iter < maxIterations; iter++) {
       const toolCallsAccumulated = /* @__PURE__ */ new Map();
       const textParts = [];
       let hasToolCalls = false;
-      for await (const chunk of this.provider.stream(messages, this.openaiTools)) {
+      for await (const chunk of this.provider.stream(messages, this.openaiTools, opts)) {
         if (chunk.type === "text" && chunk.content) {
-          textParts.push(chunk.content);
-          this.eventBus?.emit("llm_chunk", { text: chunk.content, iteration: iter });
-          if (hasAfterLlm) {
-            allEmittedText.push(chunk.content);
+          const content = hasAfterLlmChunk && hookExecutor ? hookExecutor.runAfterLlmChunk(chunk.content) : chunk.content;
+          textParts.push(content);
+          this.eventBus?.emit("llm_chunk", { text: content, iteration: iter });
+          if (hasAfterLlmResponse) {
+            allEmittedText.push(content);
           } else {
-            yield chunk.content;
+            yield content;
           }
         } else if (chunk.type === "usage") {
           metrics?.recordLlmUsage(
@@ -6804,9 +8062,9 @@ var LLMLoop = class {
         }
       }
       if (!hasToolCalls) {
-        if (hasAfterLlm && hookExecutor && hookCtx) {
+        if (hasAfterLlmResponse && hookExecutor && hookCtx) {
           const finalText = allEmittedText.join("");
-          const rewritten = await hookExecutor.runAfterLlm(finalText, hookCtx);
+          const rewritten = await hookExecutor.runAfterLlmResponse(finalText, hookCtx);
           if (rewritten) yield rewritten;
         }
         return;
@@ -6840,6 +8098,15 @@ var LLMLoop = class {
           tool_call_id: tcData.id,
           content: result
         });
+        if (this.onToolCall) {
+          try {
+            await this.onToolCall(toolName, args, result);
+          } catch (err) {
+            getLogger().error(
+              `onToolCall observer failed for tool '${toolName}': ${String(err)}`
+            );
+          }
+        }
       }
     }
     getLogger().warn(`LLM loop hit max iterations (${maxIterations})`);
@@ -6868,6 +8135,7 @@ var LLMLoop = class {
 // src/test-mode.ts
 var TestSession = class {
+  /** Run a REPL-style session that loops user input through the agent's LLM/onMessage handler. */
   async run(opts) {
     const { agent, openaiKey, onMessage, onCallStart, onCallEnd } = opts;
     const callId = `test_${Date.now().toString(36)}${Math.random().toString(36).slice(2, 8)}`;
@@ -6918,7 +8186,9 @@ var TestSession = class {
         openaiKey,
         llmModel,
         resolvedPrompt,
-        agent.tools
+        agent.tools,
+        void 0,
+        agent.disablePhonePreamble ?? false
       );
     }
     let ended = false;
@@ -7036,6 +8306,7 @@ var TestSession = class {
 };
 export {
+  ErrorCode,
   PatterError,
   PatterConnectionError,
   AuthenticationError,
@@ -7067,6 +8338,7 @@ export {
   createResampler16kTo8k,
   createResampler8kTo16k,
   createResampler24kTo16k,
+  createResampler24kTo8k,
   resample8kTo16k,
   resample16kTo8k,
   resample24kTo16k,
@@ -7081,12 +8353,14 @@ export {
   isTracingEnabled,
   startSpan,
   DefaultToolExecutor,
+  mergeAbortSignals,
   OpenAILLMProvider,
   LLMLoop,
   DEFAULT_MIN_SENTENCE_LEN,
   SentenceChunker,
   PipelineHookExecutor,
   EventBus,
+  resolveLogRoot,
   EmbeddedServer,
   TestSession
 };