npm - getpatter - Versions diffs - 0.6.0 → 0.6.2 - Mend

getpatter 0.6.0 → 0.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

package/dist/barge-in-strategies-X6ARMGIQ.mjs +12 -0
package/dist/chunk-CL2U3YET.mjs +1429 -0
package/dist/chunk-D4424JZR.mjs +71 -0
package/dist/{chunk-JUQ5WQTQ.mjs → chunk-LE63CSOB.mjs} +1424 -969
package/dist/{chunk-X3364LSI.mjs → chunk-R2T4JABZ.mjs} +49 -2
package/dist/cli.js +315 -37
package/dist/dashboard/ui.html +13 -13
package/dist/index.d.mts +2136 -709
package/dist/index.d.ts +2136 -709
package/dist/index.js +5674 -2233
package/dist/index.mjs +2338 -915
package/dist/openai-realtime-2-CNFARP25.mjs +8 -0
package/dist/{silero-vad-YLCXT5GQ.mjs → silero-vad-LNDFGIY7.mjs} +1 -1
package/dist/{test-mode-Y7YG5LFZ.mjs → test-mode-RS57BDM6.mjs} +2 -1
package/package.json +1 -1
package/src/dashboard/ui.html +13 -13

package/dist/index.mjs CHANGED Viewed

@@ -6,6 +6,7 @@ import {
   CallMetricsAccumulator,
   DEFAULT_MIN_SENTENCE_LEN,
   DEFAULT_PRICING,
+  DeepgramModel,
   DeepgramSTT,
   DefaultToolExecutor,
   ElevenLabsConvAIAdapter,
@@ -15,11 +16,12 @@ import {
   LLMLoop,
   MetricsStore,
   OpenAILLMProvider,
-  OpenAIRealtimeAdapter,
+  PRICING_LAST_UPDATED,
+  PRICING_VERSION,
   PatterConnectionError,
   PatterError,
-  PcmCarry,
   PipelineHookExecutor,
+  PricingUnit,
   ProvisionError,
   RateLimitError,
   RemoteMessageHandler,
@@ -31,18 +33,14 @@ import {
   SPAN_TOOL,
   SPAN_TTS,
   SentenceChunker,
-  StatefulResampler,
   TestSession,
+  VERSION,
   calculateRealtimeCost,
   calculateSttCost,
   calculateTelephonyCost,
   calculateTtsCost,
   callsToCsv,
   callsToJson,
-  createResampler16kTo8k,
-  createResampler24kTo16k,
-  createResampler24kTo8k,
-  createResampler8kTo16k,
   initTracing,
   isRemoteUrl,
   isTracingEnabled,
@@ -52,14 +50,34 @@ import {
   mergePricing,
   mountApi,
   mountDashboard,
+  resolveLogRoot,
+  startSpan
+} from "./chunk-LE63CSOB.mjs";
+import {
+  OpenAIRealtime2Adapter,
+  OpenAIRealtimeAdapter,
+  OpenAIRealtimeAudioFormat,
+  OpenAIRealtimeModel,
+  OpenAIRealtimeVADType,
+  OpenAITranscriptionModel,
+  OpenAIVoice,
+  PcmCarry,
+  StatefulResampler,
+  createResampler16kTo8k,
+  createResampler24kTo16k,
+  createResampler24kTo8k,
+  createResampler8kTo16k,
   mulawToPcm16,
   pcm16ToMulaw,
   resample16kTo8k,
   resample24kTo16k,
-  resample8kTo16k,
-  resolveLogRoot,
-  startSpan
-} from "./chunk-JUQ5WQTQ.mjs";
+  resample8kTo16k
+} from "./chunk-CL2U3YET.mjs";
+import {
+  MinWordsStrategy,
+  evaluateStrategies,
+  resetStrategies
+} from "./chunk-D4424JZR.mjs";
 import {
   getLogger,
   setLogger
@@ -69,7 +87,7 @@ import {
 } from "./chunk-6GR5MHHQ.mjs";
 import {
   SileroVAD
-} from "./chunk-X3364LSI.mjs";
+} from "./chunk-R2T4JABZ.mjs";
 import {
   __dirname,
   __require,
@@ -99,7 +117,31 @@ var Realtime = class {
       );
     }
     this.apiKey = key;
-    this.model = opts.model ?? "gpt-4o-mini-realtime-preview";
+    this.model = opts.model ?? "gpt-realtime-mini";
+    this.voice = opts.voice ?? "alloy";
+    this.reasoningEffort = opts.reasoningEffort;
+    this.inputAudioTranscriptionModel = opts.inputAudioTranscriptionModel;
+  }
+};
+// src/engines/openai-2.ts
+init_esm_shims();
+var Realtime2 = class {
+  kind = "openai_realtime_2";
+  apiKey;
+  model;
+  voice;
+  reasoningEffort;
+  inputAudioTranscriptionModel;
+  constructor(opts = {}) {
+    const key = opts.apiKey ?? process.env.OPENAI_API_KEY;
+    if (!key) {
+      throw new Error(
+        "OpenAI Realtime 2 requires an apiKey. Pass { apiKey: 'sk-...' } or set OPENAI_API_KEY in the environment."
+      );
+    }
+    this.apiKey = key;
+    this.model = opts.model ?? "gpt-realtime-2";
     this.voice = opts.voice ?? "alloy";
     this.reasoningEffort = opts.reasoningEffort;
     this.inputAudioTranscriptionModel = opts.inputAudioTranscriptionModel;
@@ -520,11 +562,41 @@ function filterUndef(obj) {
 }
 // src/client.ts
+var PREWARM_CACHE_MAX = 200;
+var PREWARM_TTL_GRACE_MS = 5e3;
+var PARKED_CONN_TTL_MS = 3e4;
 function resolvePersistRoot(persist) {
   if (persist === false) return null;
   if (persist === true) return resolveLogRoot("auto");
   if (typeof persist === "string") return resolveLogRoot(persist);
-  return resolveLogRoot();
+  const envRoot = resolveLogRoot();
+  if (envRoot !== null) return envRoot;
+  return resolveLogRoot("auto");
+}
+function closeParkedConnections(slot) {
+  if (slot.stt) {
+    try {
+      slot.stt.close();
+    } catch {
+    }
+  }
+  if (slot.tts) {
+    try {
+      slot.tts.ws.close();
+    } catch {
+    }
+  }
+  if (slot.openaiRealtime) {
+    const wsAny = slot.openaiRealtime;
+    if (wsAny._parkedKeepalive) {
+      clearInterval(wsAny._parkedKeepalive);
+      delete wsAny._parkedKeepalive;
+    }
+    try {
+      slot.openaiRealtime.close();
+    } catch {
+    }
+  }
 }
 var Patter = class {
   localConfig;
@@ -546,6 +618,65 @@ var Patter = class {
    * ``Cannot use both tunnel: true and webhookUrl``.
    */
   tunnelOwnsWebhookUrl = false;
+  /**
+   * Pre-rendered first-message TTS audio per outbound call_id. Populated
+   * by :meth:`call` when ``agent.prewarmFirstMessage`` is true; consumed
+   * by the StreamHandler firstMessage emit so the greeting streams
+   * instantly on ``start`` instead of paying the 200-700 ms TTS first-byte
+   * latency. See ``AgentOptions.prewarmFirstMessage``.
+   *
+   * Stores raw bytes in the TTS provider's native sample rate; the
+   * carrier-side audio sender resamples on emit.
+   */
+  prewarmAudio = /* @__PURE__ */ new Map();
+  /**
+   * Call IDs whose prewarm cache slot has already been consumed —
+   * either by ``popPrewarmAudio`` (cache hit OR miss on the firstMessage
+   * emit path) or by ``recordPrewarmWaste`` (call ended before pickup).
+   * The prewarm task checks this set BEFORE writing bytes so a slow
+   * synth that finishes after the consumer already polled doesn't
+   * orphan bytes in ``prewarmAudio``. See FIX #92 in the parity audit.
+   */
+  prewarmConsumed = /* @__PURE__ */ new Set();
+  /**
+   * Background tasks tracked so :meth:`disconnect` can wait on / drop any
+   * still-running prewarm-first-message synth before tearing down.
+   */
+  prewarmTasks = /* @__PURE__ */ new Set();
+  /**
+   * TTL eviction timers keyed by call_id so :meth:`disconnect` (and
+   * normal consumption / waste-record paths) can cancel any pending
+   * timer when the slot drains naturally. Without this, the timer
+   * would WARN spuriously after the cache was already emptied.
+   */
+  prewarmTtlTimers = /* @__PURE__ */ new Map();
+  /**
+   * Pre-opened, fully-handshaked provider WebSockets keyed by
+   * carrier-issued call_id. Populated by ``parkProviderConnections``
+   * during the carrier ringing window; consumed by the per-call
+   * StreamHandler at ``start`` via ``adoptWebSocket(...)`` so STT / TTS
+   * / Realtime audio can flow on the first turn without paying the
+   * 150-900 ms TLS + WS-upgrade + protocol-handshake round-trip again.
+   *
+   * Distinct from ``prewarmAudio`` (which holds pre-rendered TTS bytes
+   * for the first message); the two features are complementary and
+   * orthogonal — both can be active for the same call.
+   *
+   * Each slot may hold up to three parked connections (STT, TTS,
+   * Realtime). Drained by:
+   *   - {@link popPrewarmedConnections} on the carrier ``start`` event
+   *     (consumed normally — the handles transfer to the StreamHandler)
+   *   - {@link recordPrewarmWaste} on call-termination paths (no-answer,
+   *     busy, failed, canceled, AMD voicemail). Closes parked sockets.
+   *   - {@link disconnect} on Patter teardown. Closes all parked sockets.
+   */
+  prewarmedConnections = /* @__PURE__ */ new Map();
+  /**
+   * TTL eviction handles keyed by call_id for connections that are never
+   * adopted (e.g. a carrier that swallows ``start``). Closes the parked
+   * sockets so they don't leak past the safety window.
+   */
+  prewarmedConnTimers = /* @__PURE__ */ new Map();
   /**
    * Speech-edge events for turn-taking instrumentation. Public surface: the
    * seven `on*` proxy accessors below plus the `conversationState` snapshot.
@@ -553,13 +684,15 @@ var Patter = class {
    * the previous behaviour.
    *
    * See `src/_speech-events.ts` for the full event taxonomy and the
-   * industry-alignment table (LiveKit / Pipecat / OpenAI Realtime).
+   * OpenAI Realtime alignment table.
    */
   speechEvents = new SpeechEvents();
   // ---- Speech-edge event callback proxies ------------------------------
-  // The seven `on*` properties below mirror the public APIs of LiveKit
-  // Agents, Pipecat and OpenAI Realtime. They proxy to `speechEvents` so
-  // the dispatcher remains the single source of truth (state + OTel).
+  // The seven `on*` properties below follow the canonical voice-agent
+  // metric set (user/agent state transitions, turn boundaries, TTFT, audio
+  // first-byte) and align with OpenAI Realtime where applicable. They
+  // proxy to `speechEvents` so the dispatcher remains the single source of
+  // truth (state + OTel).
   get onUserSpeechStarted() {
     return this.speechEvents.onUserSpeechStarted;
   }
@@ -604,8 +737,8 @@ var Patter = class {
   }
   /**
    * Snapshot of the current per-side state of the call.
-   * Mirrors LiveKit's `user_state_changed` / `agent_state_changed`
-   * payloads. Read-only and safe to call at any time.
+   * Returns the user_state / agent_state payload shape — read-only and
+   * safe to call at any time.
    */
   get conversationState() {
     return this.speechEvents.conversationState;
@@ -717,7 +850,7 @@ var Patter = class {
         );
       }
       const engine = opts.engine;
-      if (engine instanceof Realtime) {
+      if (engine instanceof Realtime || engine instanceof Realtime2) {
         working = {
           ...working,
           provider: "openai_realtime",
@@ -735,7 +868,7 @@ var Patter = class {
         };
       } else {
         throw new Error(
-          "Unknown engine. Expected OpenAIRealtime or ElevenLabsConvAI instance."
+          "Unknown engine. Expected OpenAIRealtime, OpenAIRealtime2, or ElevenLabsConvAI instance."
         );
       }
     } else if (!working.provider && (working.stt !== void 0 || working.tts !== void 0 || working.llm !== void 0)) {
@@ -795,6 +928,13 @@ var Patter = class {
     if (!opts.agent.systemPrompt && opts.agent.provider !== "pipeline") {
       throw new Error("agent.systemPrompt is required");
     }
+    if (opts.agent.echoCancellation) {
+      try {
+        await import("./aec-PJJMUM5E.mjs");
+      } catch (err) {
+        getLogger().debug(`AEC pre-import failed at serve(): ${String(err)}`);
+      }
+    }
     if (opts.port !== void 0) {
       if (typeof opts.port !== "number" || opts.port < 1 || opts.port > 65535) {
         throw new RangeError(`port must be between 1 and 65535, got ${opts.port}`);
@@ -876,6 +1016,9 @@ var Patter = class {
       opts.dashboard ?? true,
       opts.dashboardToken ?? ""
     );
+    this.embeddedServer.popPrewarmAudio = this.popPrewarmAudio;
+    this.embeddedServer.popPrewarmedConnections = this.popPrewarmedConnections;
+    this.embeddedServer.recordPrewarmWaste = this.recordPrewarmWaste;
     try {
       await this.embeddedServer.start(port);
       if (this.tunnelHandle) {
@@ -890,7 +1033,7 @@ var Patter = class {
   }
   /** Run the agent in interactive terminal-test mode (no real telephony). */
   async test(opts) {
-    const { TestSession: TestSession2 } = await import("./test-mode-Y7YG5LFZ.mjs");
+    const { TestSession: TestSession2 } = await import("./test-mode-RS57BDM6.mjs");
     const session = new TestSession2();
     await session.run({
       agent: opts.agent,
@@ -900,6 +1043,376 @@ var Patter = class {
       onCallEnd: opts.onCallEnd
     });
   }
+  /**
+   * Pop and return the pre-synthesised first-message audio for ``callId``.
+   *
+   * Returns ``undefined`` when ``agent.prewarmFirstMessage`` was not set
+   * for the originating outbound call, or when the synth was still in
+   * flight at the moment the carrier emitted ``start`` (cache miss — the
+   * StreamHandler falls back to live TTS).
+   *
+   * Called by the per-call StreamHandler at the start of the firstMessage
+   * emit. Returning bytes here lets the handler skip the live TTS
+   * synthesis and stream the cached buffer directly.
+   *
+   * Marks ``callId`` as consumed regardless of cache hit/miss so a slow
+   * synth task that finishes after this call drops its bytes instead of
+   * orphaning them in ``prewarmAudio``. See FIX #92.
+   */
+  popPrewarmAudio = (callId) => {
+    this.prewarmConsumed.add(callId);
+    const ttl = this.prewarmTtlTimers.get(callId);
+    if (ttl !== void 0) {
+      clearTimeout(ttl);
+      this.prewarmTtlTimers.delete(callId);
+    }
+    const buf = this.prewarmAudio.get(callId);
+    if (buf !== void 0) this.prewarmAudio.delete(callId);
+    return buf;
+  };
+  /**
+   * Log a warning if a prewarmed greeting was paid for but never used.
+   * The TTS bill for ``agent.firstMessage`` has already been incurred by
+   * the background synth task, so the user should know — opt-in feature
+   * with a known cost surface.
+   *
+   * Idempotent: the second call for the same ``callId`` is a no-op, so
+   * the status callback firing first and ``endCall`` running afterwards
+   * (or vice-versa) does not double-WARN. Public so the embedded
+   * server's webhook handlers can invoke it on no-answer / busy /
+   * failed / canceled / AMD-machine paths. See FIX #91.
+   */
+  recordPrewarmWaste = (callId) => {
+    this.closePrewarmedConnections(callId);
+    if (this.prewarmConsumed.has(callId)) {
+      this.prewarmAudio.delete(callId);
+      return;
+    }
+    this.prewarmConsumed.add(callId);
+    const ttl = this.prewarmTtlTimers.get(callId);
+    if (ttl !== void 0) {
+      clearTimeout(ttl);
+      this.prewarmTtlTimers.delete(callId);
+    }
+    const buf = this.prewarmAudio.get(callId);
+    if (buf !== void 0) {
+      this.prewarmAudio.delete(callId);
+      getLogger().warn(
+        `Prewarm wasted for call ${callId} \u2014 first-message TTS already paid (~${buf.byteLength} bytes synthesised) but call ended before pickup.`
+      );
+    }
+  };
+  /**
+   * Pop and return the parked provider WebSockets for ``callId``, or
+   * ``undefined`` when no parked connections exist.
+   *
+   * Wired into ``EmbeddedServer.popPrewarmedConnections`` so the
+   * per-call ``StreamHandler`` can adopt the parked sockets at the
+   * carrier ``start`` event instead of opening fresh ones — saving
+   * ~150-900 ms of cold-start handshake on the first turn.
+   */
+  popPrewarmedConnections = (callId) => {
+    const slot = this.prewarmedConnections.get(callId);
+    if (slot === void 0) return void 0;
+    this.prewarmedConnections.delete(callId);
+    const ttl = this.prewarmedConnTimers.get(callId);
+    if (ttl !== void 0) {
+      clearTimeout(ttl);
+      this.prewarmedConnTimers.delete(callId);
+    }
+    return slot;
+  };
+  /**
+   * Close any parked provider WebSockets for ``callId``. Wired into
+   * ``EmbeddedServer.closePrewarmedConnections`` so call-termination
+   * paths (no-answer, busy, failed, canceled, AMD voicemail) drop the
+   * sockets cleanly instead of leaving them to the upstream timeout.
+   */
+  closePrewarmedConnections = (callId) => {
+    const slot = this.prewarmedConnections.get(callId);
+    if (slot === void 0) return;
+    this.prewarmedConnections.delete(callId);
+    const ttl = this.prewarmedConnTimers.get(callId);
+    if (ttl !== void 0) {
+      clearTimeout(ttl);
+      this.prewarmedConnTimers.delete(callId);
+    }
+    closeParkedConnections(slot);
+  };
+  /**
+   * Open and park provider WebSockets in parallel with the carrier-side
+   * ``initiateCall``. Unlike :meth:`spawnProviderWarmup` (which closes
+   * the WS after a brief idle), the sockets opened here stay OPEN and
+   * are handed off to the per-call ``StreamHandler`` on ``start``.
+   *
+   * This is the structural fix for first-turn cold-start: on Node's
+   * ``ws`` package, opening + closing a WS does NOT warm TLS for the
+   * next open — every fresh ``new WebSocket()`` re-pays the full
+   * TCP + TLS + HTTP-101 round-trip. By keeping the WS open and
+   * adopting it directly, the live first turn skips the handshake
+   * entirely (saves ~150-900 ms depending on provider).
+   *
+   * Best-effort: each provider's parking task is wrapped in
+   * ``Promise.allSettled`` so a slow or failing endpoint cannot block
+   * the others. Providers without ``openParkedConnection`` contribute
+   * nothing — the call falls through to the cold ``connect()`` path
+   * for that provider.
+   */
+  parkProviderConnections(agent, callId) {
+    const stt = agent.stt;
+    const tts = agent.tts;
+    const sttOpen = typeof stt?.openParkedConnection === "function" ? stt.openParkedConnection.bind(stt) : null;
+    const ttsOpen = typeof tts?.openParkedConnection === "function" ? tts.openParkedConnection.bind(tts) : null;
+    const providerStr = agent.provider ?? "";
+    const wantsRealtimePark = providerStr === "openai_realtime" || providerStr === "openai_realtime_2";
+    if (!sttOpen && !ttsOpen && !wantsRealtimePark) return;
+    const slot = {};
+    this.prewarmedConnections.set(callId, slot);
+    const startedAt = Date.now();
+    const tasks = [];
+    if (sttOpen) {
+      tasks.push((async () => {
+        try {
+          const ws = await sttOpen();
+          if (this.prewarmedConnections.get(callId) !== slot) {
+            try {
+              ws.close();
+            } catch {
+            }
+            return;
+          }
+          slot.stt = ws;
+          getLogger().info(
+            `[PREWARM] callId=${callId} provider=stt ms=${Date.now() - startedAt}`
+          );
+        } catch (err) {
+          getLogger().debug(`Park STT failed for ${callId}: ${String(err)}`);
+        }
+      })());
+    }
+    if (ttsOpen) {
+      tasks.push((async () => {
+        try {
+          const parked = await ttsOpen();
+          if (this.prewarmedConnections.get(callId) !== slot) {
+            try {
+              parked.ws.close();
+            } catch {
+            }
+            return;
+          }
+          slot.tts = parked;
+          getLogger().info(
+            `[PREWARM] callId=${callId} provider=tts ms=${Date.now() - startedAt}`
+          );
+        } catch (err) {
+          getLogger().debug(`Park TTS failed for ${callId}: ${String(err)}`);
+        }
+      })());
+    }
+    if (wantsRealtimePark) {
+      tasks.push((async () => {
+        const { OpenAIRealtime2Adapter: OpenAIRealtime2Adapter2 } = await import("./openai-realtime-2-CNFARP25.mjs");
+        const apiKey = process.env.OPENAI_API_KEY ?? "";
+        if (!apiKey) {
+          getLogger().debug(`Park OpenAI Realtime skipped for ${callId}: no OPENAI_API_KEY`);
+          return;
+        }
+        try {
+          const tmpAdapter = new OpenAIRealtime2Adapter2(
+            apiKey,
+            agent.model ?? "gpt-realtime-mini",
+            agent.voice ?? "alloy",
+            agent.systemPrompt ?? "",
+            [],
+            // audioFormat — the GA adapter always emits audio/pcm@24000
+            // internally regardless of this value, but it's a required
+            // positional param. Default to g711_ulaw (Twilio wire format).
+            void 0
+          );
+          const ws = await tmpAdapter.openParkedConnection();
+          if (this.prewarmedConnections.get(callId) !== slot) {
+            try {
+              ws.close();
+            } catch {
+            }
+            return;
+          }
+          slot.openaiRealtime = ws;
+          getLogger().info(
+            `[PREWARM] callId=${callId} provider=openai_realtime ms=${Date.now() - startedAt}`
+          );
+        } catch (err) {
+          getLogger().debug(`Park OpenAI Realtime failed for ${callId}: ${String(err)}`);
+        }
+      })());
+    }
+    const task = (async () => {
+      await Promise.allSettled(tasks);
+    })();
+    this.prewarmTasks.add(task);
+    void task.finally(() => {
+      this.prewarmTasks.delete(task);
+      if (!this.prewarmedConnections.has(callId)) return;
+      const handle = setTimeout(() => {
+        this.prewarmedConnTimers.delete(callId);
+        const orphan = this.prewarmedConnections.get(callId);
+        if (orphan === void 0) return;
+        this.prewarmedConnections.delete(callId);
+        closeParkedConnections(orphan);
+        getLogger().warn(
+          `[PREWARM] parked connections evicted by TTL for ${callId} \u2014 call never reached start (~${(PARKED_CONN_TTL_MS / 1e3).toFixed(0)}s).`
+        );
+      }, PARKED_CONN_TTL_MS);
+      handle.unref?.();
+      this.prewarmedConnTimers.set(callId, handle);
+    });
+  }
+  /**
+   * Spawn a fire-and-forget task that warms up STT / TTS / LLM in
+   * parallel with the carrier-side ``initiateCall``.
+   *
+   * Best-effort: each provider's optional ``warmup()`` is wrapped in
+   * ``Promise.allSettled`` so a slow or failing endpoint cannot block
+   * the others. Providers without ``warmup`` contribute nothing.
+   */
+  spawnProviderWarmup(agent) {
+    const targets = [];
+    const collect = (provider, label) => {
+      if (!provider || typeof provider !== "object") return;
+      const fn = provider.warmup;
+      if (typeof fn !== "function") return;
+      targets.push({
+        name: label,
+        fn: fn.bind(provider)
+      });
+    };
+    collect(agent.stt, "stt");
+    collect(agent.tts, "tts");
+    collect(agent.llm, "llm");
+    if (targets.length === 0) return;
+    const task = (async () => {
+      const results = await Promise.allSettled(targets.map((t) => t.fn()));
+      results.forEach((r, i) => {
+        if (r.status === "rejected") {
+          getLogger().debug(
+            `Provider warmup failed (${targets[i].name}): ${String(r.reason)}`
+          );
+        }
+      });
+    })();
+    this.prewarmTasks.add(task);
+    void task.finally(() => this.prewarmTasks.delete(task));
+  }
+  /**
+   * Pre-render ``agent.firstMessage`` to TTS bytes during the ringing
+   * window and stash them in ``prewarmAudio.set(callId, buf)``.
+   *
+   * Skipped silently when ``agent.prewarmFirstMessage`` is false or
+   * when ``agent.tts`` / ``agent.firstMessage`` is missing. The synth
+   * is bounded by ``ringTimeout`` (default 25 s) so a never-answered
+   * call doesn't tie up the TTS connection. On timeout / error the
+   * cache is left empty and the StreamHandler falls back to live TTS.
+   *
+   * **Pipeline mode only.** Realtime / ConvAI provider modes never
+   * consume the prewarm cache (the StreamHandler for those modes runs
+   * its first-message emit through the provider's own audio path).
+   * Spawning the prewarm in those modes pays the TTS bill for nothing
+   * — refused with a warn.
+   *
+   * **Capped at ``PREWARM_CACHE_MAX`` concurrent entries.** Refused
+   * with a warn when the cap is reached (the call still proceeds —
+   * StreamHandler falls back to live TTS).
+   */
+  spawnPrewarmFirstMessage(agent, callId, ringTimeout, carrier) {
+    if (!agent.prewarmFirstMessage) return;
+    const providerMode = agent.provider ?? "openai_realtime";
+    if (providerMode !== "pipeline") {
+      getLogger().warn(
+        `agent.prewarmFirstMessage=true is only supported in pipeline mode (provider=${providerMode}); skipping pre-synth to avoid wasted TTS spend.`
+      );
+      return;
+    }
+    const firstMessage = agent.firstMessage ?? "";
+    const tts = agent.tts;
+    if (!firstMessage || !tts) return;
+    if (typeof tts.synthesizeStream !== "function") return;
+    if (carrier) {
+      const carrierAware = tts;
+      if (typeof carrierAware.setTelephonyCarrier === "function") {
+        try {
+          carrierAware.setTelephonyCarrier(carrier);
+        } catch (err) {
+          getLogger().debug(
+            `Prewarm TTS setTelephonyCarrier failed for ${callId}: ${String(err)}`
+          );
+        }
+      }
+    }
+    const inFlight = this.prewarmAudio.size + this.prewarmTasks.size;
+    if (inFlight >= PREWARM_CACHE_MAX) {
+      getLogger().warn(
+        `Prewarm cache full (${inFlight}/${PREWARM_CACHE_MAX} in-flight) \u2014 skipping pre-synth for call ${callId}; falling back to live TTS at pickup.`
+      );
+      return;
+    }
+    const timeoutMs = (typeof ringTimeout === "number" ? ringTimeout : 25) * 1e3;
+    const task = (async () => {
+      try {
+        const accumulate = async () => {
+          const chunks = [];
+          for await (const chunk of tts.synthesizeStream(firstMessage)) {
+            const u = chunk;
+            if (Buffer.isBuffer(u)) chunks.push(u);
+            else if (ArrayBuffer.isView(u))
+              chunks.push(Buffer.from(u.buffer, u.byteOffset, u.byteLength));
+          }
+          return Buffer.concat(chunks);
+        };
+        const timer = new Promise(
+          (_resolve, reject) => setTimeout(
+            () => reject(new Error("prewarm-first-message timeout")),
+            timeoutMs
+          ).unref?.()
+        );
+        const buf = await Promise.race([accumulate(), timer]);
+        if (buf.byteLength > 0) {
+          if (this.prewarmConsumed.has(callId)) {
+            getLogger().warn(
+              `Prewarm orphaned for call ${callId} \u2014 synth completed (~${buf.byteLength} bytes) AFTER consumer polled; bytes dropped, TTS bill already paid.`
+            );
+            return;
+          }
+          this.prewarmAudio.set(callId, buf);
+          getLogger().debug(
+            `Prewarm first-message ready for call ${callId} (${buf.byteLength} bytes)`
+          );
+        }
+      } catch (err) {
+        getLogger().debug(
+          `Prewarm first-message failed for call ${callId}: ${String(err)}`
+        );
+      }
+    })();
+    this.prewarmTasks.add(task);
+    void task.finally(() => {
+      this.prewarmTasks.delete(task);
+      if (!this.prewarmAudio.has(callId)) return;
+      const ttlMs = timeoutMs + PREWARM_TTL_GRACE_MS;
+      const handle = setTimeout(() => {
+        this.prewarmTtlTimers.delete(callId);
+        const orphan = this.prewarmAudio.get(callId);
+        if (orphan === void 0) return;
+        this.prewarmAudio.delete(callId);
+        this.prewarmConsumed.add(callId);
+        getLogger().warn(
+          `Prewarm bytes evicted by TTL \u2014 call ${callId} never consumed them (~${orphan.byteLength} bytes synthesised, ${(ttlMs / 1e3).toFixed(1)}s after ringTimeout).`
+        );
+      }, ttlMs);
+      handle.unref?.();
+      this.prewarmTtlTimers.set(callId, handle);
+    });
+  }
   /** Place an outbound call via the configured carrier. */
   async call(options) {
     if (!options.to) {
@@ -914,6 +1427,9 @@ var Patter = class {
     if (this.embeddedServer) {
       this.embeddedServer.onMachineDetection = options.onMachineDetection;
     }
+    if (options.agent.prewarm !== false) {
+      this.spawnProviderWarmup(options.agent);
+    }
     if (carrier.kind === "telnyx") {
       const telnyxKey = carrier.apiKey;
       const connectionId = carrier.connectionId;
@@ -939,21 +1455,35 @@ var Patter = class {
       if (!response2.ok) {
         throw new ProvisionError(`Failed to initiate Telnyx call: ${await response2.text()}`);
       }
-      if (this.embeddedServer) {
+      let telnyxCallId;
+      try {
+        const body = await response2.clone().json();
+        telnyxCallId = body.data?.call_control_id;
+      } catch {
+      }
+      if (telnyxCallId) {
+        const initiatedPayload = {
+          call_id: telnyxCallId,
+          caller: phoneNumber,
+          callee: options.to,
+          direction: "outbound",
+          status: "initiated"
+        };
+        if (this.embeddedServer) {
+          this.embeddedServer.metricsStore.recordCallInitiated(initiatedPayload);
+        }
         try {
-          const body = await response2.clone().json();
-          const callId = body.data?.call_control_id;
-          if (callId) {
-            this.embeddedServer.metricsStore.recordCallInitiated({
-              call_id: callId,
-              caller: phoneNumber,
-              callee: options.to,
-              direction: "outbound"
-            });
-          }
+          const { notifyDashboard: notifyDashboard2 } = await import("./persistence-LVIAHESK.mjs");
+          notifyDashboard2(initiatedPayload);
         } catch {
         }
       }
+      if (telnyxCallId) {
+        this.spawnPrewarmFirstMessage(options.agent, telnyxCallId, effectiveRingTimeout, "telnyx");
+        if (options.agent.prewarm !== false) {
+          this.parkProviderConnections(options.agent, telnyxCallId);
+        }
+      }
       return;
     }
     const twilioSid = carrier.accountSid;
@@ -994,34 +1524,77 @@ var Patter = class {
     if (!response.ok) {
       throw new ProvisionError(`Failed to initiate call: ${await response.text()}`);
     }
-    if (this.embeddedServer) {
-      try {
-        const body = await response.clone().json();
-        const callSid = body.sid;
-        if (callSid) {
-          this.embeddedServer.metricsStore.recordCallInitiated({
-            call_id: callSid,
-            caller: phoneNumber,
-            callee: options.to,
-            direction: "outbound"
-          });
-          const notificationsPath = body.subresource_uris?.notifications;
-          if (notificationsPath) {
-            getLogger().info(
-              `Outbound call ${callSid} placed. Twilio notifications: https://api.twilio.com${notificationsPath} (check here if the call drops with no audio).`
-            );
-          }
+    let twilioCallSid;
+    let twilioNotificationsPath;
+    try {
+      const body = await response.clone().json();
+      twilioCallSid = body.sid;
+      twilioNotificationsPath = body.subresource_uris?.notifications;
+    } catch {
+    }
+    if (twilioCallSid) {
+      const initiatedPayload = {
+        call_id: twilioCallSid,
+        caller: phoneNumber,
+        callee: options.to,
+        direction: "outbound",
+        status: "initiated"
+      };
+      if (this.embeddedServer) {
+        this.embeddedServer.metricsStore.recordCallInitiated(initiatedPayload);
+        if (twilioNotificationsPath) {
+          getLogger().info(
+            `Outbound call ${twilioCallSid} placed. Twilio notifications: https://api.twilio.com${twilioNotificationsPath} (check here if the call drops with no audio).`
+          );
         }
+      }
+      try {
+        const { notifyDashboard: notifyDashboard2 } = await import("./persistence-LVIAHESK.mjs");
+        notifyDashboard2(initiatedPayload);
       } catch {
       }
     }
+    if (twilioCallSid) {
+      this.spawnPrewarmFirstMessage(options.agent, twilioCallSid, effectiveRingTimeout, "twilio");
+      if (options.agent.prewarm !== false) {
+        this.parkProviderConnections(options.agent, twilioCallSid);
+      }
+    }
   }
   /**
    * Stop the embedded server and any running tunnel. Safe to call multiple
    * times. Leaves the instance reusable: a subsequent ``serve()`` works as
    * if the previous lifecycle never happened.
+   *
+   * Also clears any pending TTL eviction timers, awaits in-flight
+   * prewarm-first-message synth tasks (best-effort, with a 1 s safety
+   * timeout), and clears the prewarm cache. Without this a still-running
+   * TTS WS keeps the user billed long after SDK teardown, and stale
+   * entries leak across ``serve`` / ``disconnect`` cycles. See FIX #93.
    */
   async disconnect() {
+    for (const handle of this.prewarmTtlTimers.values()) {
+      clearTimeout(handle);
+    }
+    this.prewarmTtlTimers.clear();
+    if (this.prewarmTasks.size > 0) {
+      const drain = Promise.allSettled(Array.from(this.prewarmTasks));
+      const timer = new Promise(
+        (resolve) => setTimeout(resolve, 1e3).unref?.()
+      );
+      await Promise.race([drain, timer]);
+    }
+    this.prewarmTasks.clear();
+    this.prewarmAudio.clear();
+    this.prewarmConsumed.clear();
+    for (const handle of this.prewarmedConnTimers.values()) {
+      clearTimeout(handle);
+    }
+    this.prewarmedConnTimers.clear();
+    for (const slot of this.prewarmedConnections.values()) {
+      closeParkedConnections(slot);
+    }
+    this.prewarmedConnections.clear();
     if (this.tunnelHandle) {
       this.tunnelHandle.stop();
       this.tunnelHandle = null;
@@ -1072,6 +1645,7 @@ var Patter = class {
     if (!callSid) {
       throw new Error("callSid must be a non-empty string");
     }
+    this.recordPrewarmWaste(callSid);
     const carrier = this.localConfig.carrier;
     if (carrier.kind === "twilio") {
       const auth = Buffer.from(`${carrier.accountSid}:${carrier.authToken}`).toString("base64");
@@ -1107,7 +1681,7 @@ var Patter = class {
   }
 };
 async function waitForTunnelPubliclyReachable(hostname, totalTimeoutMs = 6e4, graceMs = 5e3) {
-  const log = getLogger();
+  const log2 = getLogger();
   const { Resolver } = await import("dns/promises");
   const resolver = new Resolver({ timeout: 1500, tries: 1 });
   resolver.setServers(["1.1.1.1", "8.8.8.8"]);
@@ -1119,7 +1693,7 @@ async function waitForTunnelPubliclyReachable(hostname, totalTimeoutMs = 6e4, gr
     try {
       const records = await resolver.resolve4(hostname);
       const first = records[0] ?? "<unknown>";
-      log.info(
+      log2.info(
         "Tunnel DNS resolved \u2192 %s (attempt %d); waiting %d ms grace",
         first,
         attempt,
@@ -2278,48 +2852,633 @@ function scheduleInterval(intervalOrOpts, callback) {
   };
 }
-// src/stt/deepgram.ts
-init_esm_shims();
-var STT = class extends DeepgramSTT {
-  static providerKey = "deepgram";
-  constructor(opts = {}) {
-    const key = opts.apiKey ?? process.env.DEEPGRAM_API_KEY;
-    if (!key) {
-      throw new Error(
-        "Deepgram STT requires an apiKey. Pass { apiKey: 'dg_...' } or set DEEPGRAM_API_KEY in the environment."
-      );
-    }
-    super(
-      key,
-      opts.language ?? "en",
-      opts.model ?? "nova-3",
-      opts.encoding ?? "linear16",
-      opts.sampleRate ?? 16e3,
-      {
-        endpointingMs: opts.endpointingMs ?? 150,
-        utteranceEndMs: opts.utteranceEndMs === null ? null : opts.utteranceEndMs ?? 1e3,
-        smartFormat: opts.smartFormat ?? true,
-        interimResults: opts.interimResults ?? true,
-        ...opts.vadEvents !== void 0 ? { vadEvents: opts.vadEvents } : {}
-      }
-    );
-  }
-};
-// src/stt/whisper.ts
-init_esm_shims();
-// src/providers/whisper-stt.ts
+// src/providers/elevenlabs-tts.ts
 init_esm_shims();
-var OPENAI_TRANSCRIPTION_URL = "https://api.openai.com/v1/audio/transcriptions";
-var DEFAULT_BUFFER_SIZE = 16e3 * 2;
-var ALLOWED_MODELS = /* @__PURE__ */ new Set(["whisper-1", "gpt-4o-transcribe", "gpt-4o-mini-transcribe"]);
-function wrapPcmInWav(pcm, sampleRate = 16e3, channels = 1, bitsPerSample = 16) {
-  const dataSize = pcm.length;
-  const header = Buffer.alloc(44);
-  header.write("RIFF", 0);
-  header.writeUInt32LE(36 + dataSize, 4);
-  header.write("WAVE", 8);
+var ELEVENLABS_BASE_URL = "https://api.elevenlabs.io/v1";
+var ELEVENLABS_VOICE_ID_BY_NAME = {
+  rachel: "21m00Tcm4TlvDq8ikWAM",
+  drew: "29vD33N1CtxCmqQRPOHJ",
+  clyde: "2EiwWnXFnvU5JabPnv8n",
+  paul: "5Q0t7uMcjvnagumLfvZi",
+  domi: "AZnzlk1XvdvUeBnXmlld",
+  dave: "CYw3kZ02Hs0563khs1Fj",
+  fin: "D38z5RcWu1voky8WS1ja",
+  bella: "EXAVITQu4vr4xnSDxMaL",
+  antoni: "ErXwobaYiN019PkySvjV",
+  thomas: "GBv7mTt0atIp3Br8iCZE",
+  charlie: "IKne3meq5aSn9XLyUdCD",
+  george: "JBFqnCBsd6RMkjVDRZzb",
+  emily: "LcfcDJNUP1GQjkzn1xUU",
+  elli: "MF3mGyEYCl7XYWbV9V6O",
+  callum: "N2lVS1w4EtoT3dr4eOWO",
+  patrick: "ODq5zmih8GrVes37Dizd",
+  harry: "SOYHLrjzK2X1ezoPC6cr",
+  liam: "TX3LPaxmHKxFdv7VOQHJ",
+  dorothy: "ThT5KcBeYPX3keUQqHPh",
+  josh: "TxGEqnHWrfWFTfGW9XjX",
+  arnold: "VR6AewLTigWG4xSOukaG",
+  charlotte: "XB0fDUnXU5powFXDhCwa",
+  matilda: "XrExE9yKIg1WjnnlVkGX",
+  matthew: "Yko7PKHZNXotIFUBG7I9",
+  james: "ZQe5CZNOzWyzPSCn5a3c",
+  joseph: "Zlb1dXrM653N07WRdFW3",
+  jeremy: "bVMeCyTHy58xNoL34h3p",
+  michael: "flq6f7yk4E4fJM5XTYuZ",
+  ethan: "g5CIjZEefAph4nQFvHAz",
+  gigi: "jBpfuIE2acCO8z3wKNLl",
+  freya: "jsCqWAovK2LkecY7zXl4",
+  brian: "nPczCjzI2devNBz1zQrb",
+  grace: "oWAxZDx7w5VEj9dCyTzz",
+  daniel: "onwK4e9ZLuTAKqWW03F9",
+  lily: "pFZP5JQG7iQjIQuC4Bku",
+  serena: "pMsXgVXv3BLzUgSXRplE",
+  adam: "pNInz6obpgDQGcFmaJgB",
+  nicole: "piTKgcLEGmPE4e6mEKli",
+  bill: "pqHfZKP75CvOlQylNhV4",
+  jessie: "t0jbNlBVZ17f02VDIeMI",
+  ryan: "wViXBPUzp2ZZixB1xQuM",
+  sam: "yoZ06aMxZJJ28mfd3POQ",
+  glinda: "z9fAnlkpzviPz146aGWa",
+  giovanni: "zcAOhNBS3c14rBihAFp1",
+  mimi: "zrHiDhphv9ZnVXBqCLjz",
+  sarah: "EXAVITQu4vr4xnSDxMaL",
+  alloy: "EXAVITQu4vr4xnSDxMaL"
+};
+var VOICE_ID_PATTERN = /^[A-Za-z0-9]{20}$/;
+function resolveVoiceId(voice) {
+  if (!voice) return voice;
+  if (VOICE_ID_PATTERN.test(voice)) return voice;
+  return ELEVENLABS_VOICE_ID_BY_NAME[voice.toLowerCase()] ?? voice;
+}
+var ElevenLabsModel = {
+  V3: "eleven_v3",
+  FLASH_V2_5: "eleven_flash_v2_5",
+  TURBO_V2_5: "eleven_turbo_v2_5",
+  MULTILINGUAL_V2: "eleven_multilingual_v2",
+  MONOLINGUAL_V1: "eleven_monolingual_v1"
+};
+var ElevenLabsOutputFormat = {
+  MP3_22050_32: "mp3_22050_32",
+  MP3_44100_32: "mp3_44100_32",
+  MP3_44100_64: "mp3_44100_64",
+  MP3_44100_96: "mp3_44100_96",
+  MP3_44100_128: "mp3_44100_128",
+  MP3_44100_192: "mp3_44100_192",
+  PCM_8000: "pcm_8000",
+  PCM_16000: "pcm_16000",
+  PCM_22050: "pcm_22050",
+  PCM_24000: "pcm_24000",
+  PCM_44100: "pcm_44100",
+  ULAW_8000: "ulaw_8000"
+};
+var ElevenLabsTTS = class _ElevenLabsTTS {
+  // Stable pricing/dashboard key — read by stream-handler / metrics via
+  // ``(agent.tts.constructor as any).providerKey``. Without this the cost
+  // calculator falls back to ``constructor.name`` ("ElevenLabsTTS") which
+  // does NOT match the pricing table key "elevenlabs", silently zeroing
+  // TTS cost for callers that construct the raw REST class directly
+  // (exposed at top level as ``ElevenLabsRestTTS``).
+  static providerKey = "elevenlabs";
+  apiKey;
+  voiceId;
+  modelId;
+  _outputFormat;
+  _outputFormatExplicit;
+  voiceSettings;
+  languageCode;
+  chunkSize;
+  /**
+   * Public view of the (possibly auto-flipped) wire format. Read by the
+   * stream-handler to decide whether to skip the client-side resample +
+   * mulaw encode when the bytes are already in the carrier's wire codec.
+   */
+  get outputFormat() {
+    return this._outputFormat;
+  }
+  constructor(apiKey, voiceIdOrOptions = "21m00Tcm4TlvDq8ikWAM", modelId = ElevenLabsModel.FLASH_V2_5, outputFormat = ElevenLabsOutputFormat.PCM_16000) {
+    this.apiKey = apiKey;
+    if (typeof voiceIdOrOptions === "object") {
+      const o = voiceIdOrOptions;
+      this.voiceId = resolveVoiceId(o.voiceId ?? "21m00Tcm4TlvDq8ikWAM");
+      this.modelId = o.modelId ?? ElevenLabsModel.FLASH_V2_5;
+      this._outputFormatExplicit = o.outputFormat !== void 0;
+      this._outputFormat = o.outputFormat ?? ElevenLabsOutputFormat.PCM_16000;
+      this.voiceSettings = o.voiceSettings;
+      this.languageCode = o.languageCode;
+      this.chunkSize = o.chunkSize ?? 4096;
+    } else {
+      this.voiceId = resolveVoiceId(voiceIdOrOptions);
+      this.modelId = modelId;
+      this._outputFormatExplicit = outputFormat !== ElevenLabsOutputFormat.PCM_16000;
+      this._outputFormat = outputFormat;
+      this.voiceSettings = void 0;
+      this.languageCode = void 0;
+      this.chunkSize = 4096;
+    }
+  }
+  /**
+   * Hook called by ``StreamHandler.initPipeline`` to advise the carrier
+   * wire format. When the user did NOT pass an explicit ``outputFormat``,
+   * auto-flip to the carrier's native codec so the audio bytes ElevenLabs
+   * returns are already in Twilio/Telnyx wire format — eliminating the
+   * client-side 16 kHz → 8 kHz resample and PCM → μ-law encode. The
+   * resample/encode chain was a source of audible artifacts on the
+   * prewarmed firstMessage (see 0.6.2 acceptance notes — burst delivery
+   * of resampled audio crackled on the carrier-side jitter buffer).
+   *
+   * No-op when the caller passed an explicit ``outputFormat`` (incl. via
+   * the ``forTwilio`` / ``forTelnyx`` factories) — user wins.
+   *
+   * Parity with {@link ElevenLabsWebSocketTTS.setTelephonyCarrier}.
+   */
+  setTelephonyCarrier(carrier) {
+    if (this._outputFormatExplicit) return;
+    if (carrier === "twilio") {
+      this._outputFormat = ElevenLabsOutputFormat.ULAW_8000;
+    } else if (carrier === "telnyx") {
+      this._outputFormat = ElevenLabsOutputFormat.PCM_16000;
+    }
+  }
+  /**
+   * Construct an instance pre-configured for Twilio Media Streams.
+   *
+   * Sets `outputFormat='ulaw_8000'` so ElevenLabs emits μ-law @ 8 kHz
+   * directly — the exact wire format Twilio's media stream uses — letting
+   * the SDK skip the 16 kHz→8 kHz resample and PCM→μ-law conversion in
+   * `TwilioAudioSender`. Saves ~30–80 ms first-byte and per-frame CPU,
+   * and removes a potential aliasing source.
+   *
+   * `voiceSettings` defaults to a low-bandwidth-friendly profile
+   * (speaker boost off, modest stability) which sounds cleaner at 8 kHz
+   * μ-law than the studio default. Pass an explicit object to override.
+   */
+  static forTwilio(apiKey, options = {}) {
+    const voiceSettings = options.voiceSettings ?? {
+      // Speaker boost adds high-frequency emphasis that aliases ugly over an
+      // 8 kHz μ-law line. Slightly higher stability tames the excursions
+      // that compander quantization noise can amplify.
+      stability: 0.6,
+      similarity_boost: 0.75,
+      use_speaker_boost: false
+    };
+    return new _ElevenLabsTTS(apiKey, {
+      ...options,
+      voiceSettings,
+      outputFormat: ElevenLabsOutputFormat.ULAW_8000
+    });
+  }
+  /**
+   * Construct an instance pre-configured for Telnyx bidirectional media.
+   *
+   * Telnyx's default media-streaming codec is L16 PCM @ 16 kHz, which
+   * matches our default Telnyx handler. We pick `pcm_16000` so the audio
+   * flows end-to-end with zero resampling or transcoding.
+   *
+   * Trade-off: if your Telnyx profile is pinned to PCMU/8000 (μ-law),
+   * construct `ElevenLabsTTS` directly with `outputFormat: 'ulaw_8000'`
+   * — Telnyx supports that natively too.
+   */
+  static forTelnyx(apiKey, options = {}) {
+    return new _ElevenLabsTTS(apiKey, {
+      ...options,
+      outputFormat: ElevenLabsOutputFormat.PCM_16000
+    });
+  }
+  /**
+   * Synthesise text to speech and return the full audio as a single Buffer.
+   *
+   * For large chunks (or when latency matters) call `synthesizeStream` instead.
+   */
+  async synthesize(text) {
+    const chunks = [];
+    for await (const chunk of this.synthesizeStream(text)) {
+      chunks.push(chunk);
+    }
+    return Buffer.concat(chunks);
+  }
+  /**
+   * Synthesise text and yield audio chunks as they arrive (streaming).
+   *
+   * The yielded buffers are raw PCM at 16 kHz (or whatever `outputFormat` is
+   * configured to). `chunkSize` controls the maximum yield size — 512 is a
+   * good choice for low-latency telephony.
+   */
+  async *synthesizeStream(text) {
+    const url = `${ELEVENLABS_BASE_URL}/text-to-speech/${encodeURIComponent(this.voiceId)}/stream?output_format=${encodeURIComponent(this._outputFormat)}`;
+    const body = {
+      text,
+      model_id: this.modelId
+    };
+    if (this.voiceSettings) body["voice_settings"] = this.voiceSettings;
+    if (this.languageCode) body["language_code"] = this.languageCode;
+    const response = await fetch(url, {
+      method: "POST",
+      headers: {
+        "xi-api-key": this.apiKey,
+        "Content-Type": "application/json"
+      },
+      body: JSON.stringify(body),
+      signal: AbortSignal.timeout(3e4)
+    });
+    if (!response.ok) {
+      const errBody = await response.text();
+      throw new Error(`ElevenLabs TTS error ${response.status}: ${errBody}`);
+    }
+    if (!response.body) {
+      throw new Error("ElevenLabs TTS: no response body");
+    }
+    const reader = response.body.getReader();
+    try {
+      while (true) {
+        const { done, value } = await reader.read();
+        if (done) break;
+        if (!value || value.length === 0) continue;
+        const buf = Buffer.from(value);
+        for (let offset = 0; offset < buf.length; offset += this.chunkSize) {
+          yield buf.subarray(offset, Math.min(offset + this.chunkSize, buf.length));
+        }
+      }
+    } finally {
+      if (typeof reader.cancel === "function") await reader.cancel().catch(() => {
+      });
+      reader.releaseLock();
+    }
+  }
+};
+// src/providers/cartesia-tts.ts
+init_esm_shims();
+var CARTESIA_BASE_URL = "https://api.cartesia.ai";
+var CARTESIA_API_VERSION = "2025-04-16";
+var CARTESIA_DEFAULT_VOICE_ID = "f786b574-daa5-4673-aa0c-cbe3e8534c02";
+var CartesiaTTSModel = {
+  SONIC_3: "sonic-3",
+  SONIC_2: "sonic-2",
+  SONIC: "sonic"
+};
+var CartesiaTTSContainer = {
+  RAW: "raw",
+  WAV: "wav",
+  MP3: "mp3"
+};
+var CartesiaTTSEncoding = {
+  PCM_S16LE: "pcm_s16le",
+  PCM_F32LE: "pcm_f32le",
+  PCM_MULAW: "pcm_mulaw",
+  PCM_ALAW: "pcm_alaw"
+};
+var CartesiaTTSSampleRate = {
+  HZ_8000: 8e3,
+  HZ_16000: 16e3,
+  HZ_22050: 22050,
+  HZ_24000: 24e3,
+  HZ_44100: 44100
+};
+var CartesiaTTSVoiceMode = {
+  ID: "id",
+  EMBEDDING: "embedding"
+};
+var CartesiaTTS = class _CartesiaTTS {
+  /** Stable pricing/dashboard key — read by stream-handler/metrics. */
+  static providerKey = "cartesia_tts";
+  apiKey;
+  model;
+  voice;
+  language;
+  sampleRate;
+  speed;
+  emotion;
+  volume;
+  baseUrl;
+  apiVersion;
+  constructor(apiKey, opts = {}) {
+    this.apiKey = apiKey;
+    this.model = opts.model ?? CartesiaTTSModel.SONIC_3;
+    this.voice = opts.voice ?? CARTESIA_DEFAULT_VOICE_ID;
+    this.language = opts.language ?? "en";
+    this.sampleRate = opts.sampleRate ?? CartesiaTTSSampleRate.HZ_16000;
+    this.speed = opts.speed;
+    this.emotion = typeof opts.emotion === "string" ? [opts.emotion] : opts.emotion;
+    this.volume = opts.volume;
+    this.baseUrl = opts.baseUrl ?? CARTESIA_BASE_URL;
+    this.apiVersion = opts.apiVersion ?? CARTESIA_API_VERSION;
+  }
+  /**
+   * Construct an instance pre-configured for Twilio Media Streams.
+   *
+   * Sets `sampleRate=8000` so Cartesia emits PCM_S16LE @ 8 kHz directly.
+   * Twilio's media stream uses μ-law @ 8 kHz so the SDK still does the
+   * PCM → μ-law transcode client-side, but the 16 kHz → 8 kHz resample
+   * step is skipped. Saves ~10–30 ms first-byte plus per-frame CPU and
+   * removes a potential aliasing source.
+   */
+  static forTwilio(apiKey, options = {}) {
+    return new _CartesiaTTS(apiKey, {
+      ...options,
+      sampleRate: CartesiaTTSSampleRate.HZ_8000
+    });
+  }
+  /**
+   * Construct an instance pre-configured for Telnyx bidirectional media.
+   *
+   * Sets `sampleRate=16000` to match Telnyx's L16/16000 default codec —
+   * audio flows end-to-end with zero resampling or transcoding. Same as
+   * the bare-constructor default; exists for API symmetry with
+   * {@link CartesiaTTS.forTwilio}.
+   */
+  static forTelnyx(apiKey, options = {}) {
+    return new _CartesiaTTS(apiKey, {
+      ...options,
+      sampleRate: CartesiaTTSSampleRate.HZ_16000
+    });
+  }
+  /** Build the JSON payload for the Cartesia bytes endpoint. */
+  buildPayload(text) {
+    const payload = {
+      model_id: this.model,
+      voice: { mode: CartesiaTTSVoiceMode.ID, id: this.voice },
+      transcript: text,
+      output_format: {
+        container: CartesiaTTSContainer.RAW,
+        encoding: CartesiaTTSEncoding.PCM_S16LE,
+        sample_rate: this.sampleRate
+      },
+      language: this.language
+    };
+    const generationConfig = {};
+    if (this.speed !== void 0) generationConfig.speed = this.speed;
+    if (this.emotion && this.emotion.length > 0)
+      generationConfig.emotion = this.emotion[0];
+    if (this.volume !== void 0) generationConfig.volume = this.volume;
+    if (Object.keys(generationConfig).length > 0) {
+      payload.generation_config = generationConfig;
+    }
+    return payload;
+  }
+  /**
+   * Pre-call HTTP warmup for the Cartesia `/tts/bytes` endpoint.
+   *
+   * Issues a lightweight `GET <baseUrl>/voices` so DNS, TLS, and HTTP/2
+   * are already up by the time the first `synthesizeStream()` POST
+   * lands. Best-effort: 5 s timeout, all exceptions swallowed at
+   * debug level.
+   *
+   * Billing safety: `GET /voices` is a free metadata read on
+   * Cartesia's REST surface (per https://docs.cartesia.ai). It does
+   * not consume synthesis credits. The actual synthesis is billed
+   * only when `POST /tts/bytes` runs with a non-empty `transcript`.
+   *
+   * Note: Cartesia TTS uses the HTTP path (vs the WebSocket variant
+   * Cartesia also exposes) — connection warmup is therefore HTTP-GET
+   * based, not WebSocket pre-handshake. The latency win is smaller
+   * (~50-150 ms vs the ~200-500 ms of a WS prewarm) but still real.
+   */
+  async warmup() {
+    try {
+      await fetch(`${this.baseUrl}/voices`, {
+        method: "GET",
+        headers: {
+          "X-API-Key": this.apiKey,
+          "Cartesia-Version": this.apiVersion
+        },
+        signal: AbortSignal.timeout(5e3)
+      });
+    } catch (err) {
+      getLogger().debug(`Cartesia TTS warmup failed (best-effort): ${String(err)}`);
+    }
+  }
+  /** Synthesize text and return the concatenated audio buffer. */
+  async synthesize(text) {
+    const chunks = [];
+    for await (const chunk of this.synthesizeStream(text)) {
+      chunks.push(chunk);
+    }
+    return Buffer.concat(chunks);
+  }
+  /**
+   * Synthesize text and yield raw PCM_S16LE chunks at the configured
+   * `sampleRate` as they arrive from Cartesia.
+   */
+  async *synthesizeStream(text) {
+    const response = await fetch(`${this.baseUrl}/tts/bytes`, {
+      method: "POST",
+      headers: {
+        "X-API-Key": this.apiKey,
+        "Cartesia-Version": this.apiVersion,
+        "Content-Type": "application/json"
+      },
+      body: JSON.stringify(this.buildPayload(text)),
+      signal: AbortSignal.timeout(3e4)
+    });
+    if (!response.ok) {
+      const body = await response.text();
+      throw new Error(`Cartesia TTS error ${response.status}: ${body}`);
+    }
+    if (!response.body) {
+      throw new Error("Cartesia TTS: no response body");
+    }
+    const reader = response.body.getReader();
+    try {
+      while (true) {
+        const { done, value } = await reader.read();
+        if (done) break;
+        if (value && value.length > 0) {
+          yield Buffer.from(value);
+        }
+      }
+    } finally {
+      if (typeof reader.cancel === "function")
+        await reader.cancel().catch(() => {
+        });
+      reader.releaseLock();
+    }
+  }
+};
+// src/providers/rime-tts.ts
+init_esm_shims();
+var RIME_BASE_URL = "https://users.rime.ai/v1/rime-tts";
+var RimeModel = {
+  ARCANA: "arcana",
+  MIST: "mist",
+  MIST_V2: "mistv2"
+};
+var RimeAudioFormat = {
+  PCM: "audio/pcm",
+  MP3: "audio/mp3",
+  WAV: "audio/wav",
+  MULAW: "audio/mulaw"
+};
+var ARCANA_MODEL_TIMEOUT_MS = 60 * 4 * 1e3;
+var MIST_MODEL_TIMEOUT_MS = 30 * 1e3;
+function isMistModel(model) {
+  return model.includes(RimeModel.MIST);
+}
+function timeoutForModel(model) {
+  if (model === RimeModel.ARCANA) return ARCANA_MODEL_TIMEOUT_MS;
+  return MIST_MODEL_TIMEOUT_MS;
+}
+var RimeTTS = class {
+  /** Stable pricing/dashboard key — read by stream-handler/metrics. */
+  static providerKey = "rime";
+  apiKey;
+  model;
+  speaker;
+  lang;
+  sampleRate;
+  repetitionPenalty;
+  temperature;
+  topP;
+  maxTokens;
+  speedAlpha;
+  reduceLatency;
+  pauseBetweenBrackets;
+  phonemizeBetweenBrackets;
+  baseUrl;
+  totalTimeoutMs;
+  constructor(apiKey, opts = {}) {
+    this.apiKey = apiKey;
+    this.model = opts.model ?? RimeModel.ARCANA;
+    const defaultSpeaker = isMistModel(this.model) ? "cove" : "astra";
+    this.speaker = opts.speaker ?? defaultSpeaker;
+    this.lang = opts.lang ?? "eng";
+    this.sampleRate = opts.sampleRate ?? 16e3;
+    this.repetitionPenalty = opts.repetitionPenalty;
+    this.temperature = opts.temperature;
+    this.topP = opts.topP;
+    this.maxTokens = opts.maxTokens;
+    this.speedAlpha = opts.speedAlpha;
+    this.reduceLatency = opts.reduceLatency;
+    this.pauseBetweenBrackets = opts.pauseBetweenBrackets;
+    this.phonemizeBetweenBrackets = opts.phonemizeBetweenBrackets;
+    this.baseUrl = opts.baseUrl ?? RIME_BASE_URL;
+    this.totalTimeoutMs = timeoutForModel(this.model);
+  }
+  buildPayload(text) {
+    const payload = {
+      speaker: this.speaker,
+      text,
+      modelId: this.model
+    };
+    if (this.model === RimeModel.ARCANA) {
+      if (this.repetitionPenalty !== void 0)
+        payload.repetition_penalty = this.repetitionPenalty;
+      if (this.temperature !== void 0) payload.temperature = this.temperature;
+      if (this.topP !== void 0) payload.top_p = this.topP;
+      if (this.maxTokens !== void 0) payload.max_tokens = this.maxTokens;
+      payload.lang = this.lang;
+      payload.samplingRate = this.sampleRate;
+    } else if (isMistModel(this.model)) {
+      payload.lang = this.lang;
+      payload.samplingRate = this.sampleRate;
+      if (this.speedAlpha !== void 0) payload.speedAlpha = this.speedAlpha;
+      if (this.model === RimeModel.MIST_V2 && this.reduceLatency !== void 0) {
+        payload.reduceLatency = this.reduceLatency;
+      }
+      if (this.pauseBetweenBrackets !== void 0) {
+        payload.pauseBetweenBrackets = this.pauseBetweenBrackets;
+      }
+      if (this.phonemizeBetweenBrackets !== void 0) {
+        payload.phonemizeBetweenBrackets = this.phonemizeBetweenBrackets;
+      }
+    }
+    return payload;
+  }
+  /** Synthesize text and return the concatenated audio buffer. */
+  async synthesize(text) {
+    const chunks = [];
+    for await (const chunk of this.synthesizeStream(text)) {
+      chunks.push(chunk);
+    }
+    return Buffer.concat(chunks);
+  }
+  /**
+   * Synthesize text and yield raw PCM_S16LE chunks at the configured
+   * `sampleRate` as they stream in.
+   */
+  async *synthesizeStream(text) {
+    const response = await fetch(this.baseUrl, {
+      method: "POST",
+      headers: {
+        accept: RimeAudioFormat.PCM,
+        Authorization: `Bearer ${this.apiKey}`,
+        "content-type": "application/json"
+      },
+      body: JSON.stringify(this.buildPayload(text)),
+      signal: AbortSignal.timeout(this.totalTimeoutMs)
+    });
+    if (!response.ok) {
+      const body = await response.text();
+      throw new Error(`Rime TTS error ${response.status}: ${body}`);
+    }
+    const contentType = response.headers.get("content-type") ?? "";
+    if (!contentType.startsWith("audio")) {
+      const body = await response.text();
+      throw new Error(`Rime returned non-audio response: ${body.slice(0, 500)}`);
+    }
+    if (!response.body) {
+      throw new Error("Rime TTS: no response body");
+    }
+    const reader = response.body.getReader();
+    try {
+      while (true) {
+        const { done, value } = await reader.read();
+        if (done) break;
+        if (value && value.length > 0) {
+          yield Buffer.from(value);
+        }
+      }
+    } finally {
+      if (typeof reader.cancel === "function")
+        await reader.cancel().catch(() => {
+        });
+      reader.releaseLock();
+    }
+  }
+};
+// src/stt/deepgram.ts
+init_esm_shims();
+var STT = class extends DeepgramSTT {
+  static providerKey = "deepgram";
+  constructor(opts = {}) {
+    const key = opts.apiKey ?? process.env.DEEPGRAM_API_KEY;
+    if (!key) {
+      throw new Error(
+        "Deepgram STT requires an apiKey. Pass { apiKey: 'dg_...' } or set DEEPGRAM_API_KEY in the environment."
+      );
+    }
+    super(
+      key,
+      opts.language ?? "en",
+      opts.model ?? "nova-3",
+      opts.encoding ?? "linear16",
+      opts.sampleRate ?? 16e3,
+      {
+        endpointingMs: opts.endpointingMs ?? 150,
+        utteranceEndMs: opts.utteranceEndMs === null ? null : opts.utteranceEndMs ?? 1e3,
+        smartFormat: opts.smartFormat ?? true,
+        interimResults: opts.interimResults ?? true,
+        ...opts.vadEvents !== void 0 ? { vadEvents: opts.vadEvents } : {}
+      }
+    );
+  }
+};
+// src/stt/whisper.ts
+init_esm_shims();
+// src/providers/whisper-stt.ts
+init_esm_shims();
+var OPENAI_TRANSCRIPTION_URL = "https://api.openai.com/v1/audio/transcriptions";
+var DEFAULT_BUFFER_SIZE = 16e3 * 2;
+var ALLOWED_MODELS = /* @__PURE__ */ new Set(["whisper-1", "gpt-4o-transcribe", "gpt-4o-mini-transcribe"]);
+function wrapPcmInWav(pcm, sampleRate = 16e3, channels = 1, bitsPerSample = 16) {
+  const dataSize = pcm.length;
+  const header = Buffer.alloc(44);
+  header.write("RIFF", 0);
+  header.writeUInt32LE(36 + dataSize, 4);
+  header.write("WAVE", 8);
   header.write("fmt ", 12);
   header.writeUInt32LE(16, 16);
   header.writeUInt16LE(1, 20);
@@ -2333,6 +3492,8 @@ function wrapPcmInWav(pcm, sampleRate = 16e3, channels = 1, bitsPerSample = 16)
   return Buffer.concat([header, pcm]);
 }
 var WhisperSTT = class _WhisperSTT {
+  /** Stable pricing/dashboard key — read by stream-handler/metrics. */
+  static providerKey = "whisper";
   apiKey;
   model;
   language;
@@ -2501,6 +3662,8 @@ init_esm_shims();
 var ALLOWED_MODELS2 = /* @__PURE__ */ new Set(["gpt-4o-transcribe", "gpt-4o-mini-transcribe"]);
 var DEFAULT_BUFFER_SIZE2 = 16e3 * 2;
 var OpenAITranscribeSTT = class extends WhisperSTT {
+  /** Stable pricing/dashboard key — read by stream-handler/metrics. */
+  static providerKey = "openai_transcribe";
   /**
    * @param apiKey OpenAI API key.
    * @param language ISO-639-1 language code (e.g. ``"en"``, ``"it"``). Optional.
@@ -2576,6 +3739,8 @@ var CartesiaSTT = class {
   }
   apiKey;
   options;
+  /** Stable pricing/dashboard key — read by stream-handler/metrics. */
+  static providerKey = "cartesia_stt";
   ws = null;
   callbacks = /* @__PURE__ */ new Set();
   keepaliveTimer = null;
@@ -2584,6 +3749,37 @@ var CartesiaSTT = class {
    * `null` until the first transcript event arrives (matches Python's `None`).
    */
   requestId = null;
+  /**
+   * Open a fresh WebSocket without arming any message / keepalive handlers
+   * and without taking ownership on `this.ws`. Returns the OPEN socket so
+   * the caller (the prewarm pipeline) can park it for later adoption via
+   * `adoptWebSocket`. Bounded by `CONNECT_TIMEOUT_MS`.
+   *
+   * Billing safety: opening + parking the WS does not stream audio
+   * (Cartesia STT bills on streamed audio seconds), so no charge is
+   * incurred. Close the returned WS yourself if it is never adopted.
+   */
+  async openParkedConnection() {
+    const url = this.buildWsUrl();
+    const ws = new WebSocket2(url, {
+      headers: { "User-Agent": USER_AGENT }
+    });
+    await new Promise((resolve, reject) => {
+      const timer = setTimeout(
+        () => reject(new Error("Cartesia STT park connect timeout")),
+        CONNECT_TIMEOUT_MS
+      );
+      ws.once("open", () => {
+        clearTimeout(timer);
+        resolve();
+      });
+      ws.once("error", (err) => {
+        clearTimeout(timer);
+        reject(err);
+      });
+    });
+    return ws;
+  }
   buildWsUrl() {
     const opts = this.options;
     const rawBase = opts.baseUrl ?? DEFAULT_BASE_URL;
@@ -2608,6 +3804,57 @@ var CartesiaSTT = class {
     });
     return `${base}/stt/websocket?${params.toString()}`;
   }
+  /**
+   * Pre-call WebSocket warmup for the Cartesia STT `/stt/websocket` endpoint.
+   *
+   * Opens the WS (DNS + TLS + auth handshake), idles ~250 ms so the
+   * Cartesia edge keeps session state warm, then closes. By the time
+   * `connect()` is invoked at call-pickup the resolver and TLS session
+   * are hot — net wire time saving of 200-500 ms.
+   *
+   * Billing safety: Cartesia STT bills on streamed audio seconds (per
+   * https://docs.cartesia.ai/2025-04-16/api-reference/stt/stt). Opening
+   * + closing the WebSocket without forwarding audio does not consume
+   * billable seconds. Best-effort: failures logged at debug level.
+   */
+  async warmup() {
+    const url = this.buildWsUrl();
+    let ws = null;
+    try {
+      ws = await new Promise((resolve, reject) => {
+        const sock = new WebSocket2(url, {
+          headers: { "User-Agent": USER_AGENT }
+        });
+        const timer = setTimeout(() => {
+          try {
+            sock.close();
+          } catch {
+          }
+          reject(new Error("Cartesia STT warmup connect timeout"));
+        }, 5e3);
+        sock.once("open", () => {
+          clearTimeout(timer);
+          resolve(sock);
+        });
+        sock.once("error", (err) => {
+          clearTimeout(timer);
+          reject(err);
+        });
+      });
+      await new Promise((r) => setTimeout(r, 250));
+    } catch (err) {
+      getLogger().debug(
+        `Cartesia STT warmup failed (best-effort): ${describeWarmupError(err)}`
+      );
+    } finally {
+      if (ws) {
+        try {
+          ws.close();
+        } catch {
+        }
+      }
+    }
+  }
   /** Open the streaming WebSocket and arm message + keepalive handlers. */
   async connect() {
     const url = this.buildWsUrl();
@@ -2628,6 +3875,24 @@ var CartesiaSTT = class {
         reject(err);
       });
     });
+    this.armMessageAndKeepalive();
+  }
+  /**
+   * Adopt a pre-opened, already-OPEN WebSocket produced by the prewarm
+   * pipeline (see `Patter.parkProviderConnections`). Skips the fresh
+   * `new WebSocket()` + handshake — the WS is already through DNS, TLS
+   * and HTTP-101 so audio frames can flow on this turn instead of
+   * paying ~150-400 ms of handshake.
+   *
+   * Caller MUST verify `ws.readyState === OPEN` before calling. If the
+   * parked WS died between park and adopt, fall back to `connect()`.
+   */
+  adoptWebSocket(ws) {
+    this.ws = ws;
+    this.armMessageAndKeepalive();
+  }
+  armMessageAndKeepalive() {
+    if (!this.ws) return;
     this.ws.on("message", (raw) => {
       let event;
       try {
@@ -2675,6 +3940,31 @@ var CartesiaSTT = class {
     if (!this.ws || this.ws.readyState !== WebSocket2.OPEN) return;
     this.ws.send(audio);
   }
+  /**
+   * Force Cartesia to finalise the in-flight utterance immediately.
+   *
+   * Sends a ``finalize`` text frame on the live WebSocket. Cartesia
+   * replies with the final transcript followed by ``flush_done``,
+   * bypassing its conservative internal silence heuristic (which can
+   * wait 2-7 s on PSTN audio before naturally finalising). Wired
+   * into ``StreamHandler`` on the VAD ``speech_end`` event so the
+   * SDK's authoritative end-of-speech detection forces an immediate
+   * STT finalisation — turning Cartesia's natural-pause endpointing
+   * into a deterministic VAD-driven one, parity with the Deepgram
+   * fast-path. No-op when the WS isn't open. Parity with Python
+   * ``CartesiaSTT.finalize``.
+   */
+  async finalize() {
+    if (!this.ws || this.ws.readyState !== WebSocket2.OPEN) return;
+    await new Promise((resolve) => {
+      this.ws.send(CartesiaSTTClientFrame.FINALIZE, (err) => {
+        if (err) {
+          getLogger().debug(`Cartesia finalize send failed: ${String(err)}`);
+        }
+        resolve();
+      });
+    });
+  }
   /** Register a transcript listener. */
   onTranscript(callback) {
     this.callbacks.add(callback);
@@ -2748,6 +4038,17 @@ var CartesiaSTT = class {
     }
   }
 };
+function describeWarmupError(err) {
+  if (typeof err === "object" && err !== null) {
+    const e = err;
+    if (typeof e.statusCode === "number") return `HTTP ${e.statusCode}`;
+    if (typeof e.code === "number" && e.code >= 100 && e.code < 600) return `HTTP ${e.code}`;
+    const ctor = e.constructor?.name;
+    if (typeof ctor === "string" && ctor !== "Object") return ctor;
+    if (typeof e.name === "string") return e.name;
+  }
+  return typeof err;
+}
 // src/stt/cartesia.ts
 var STT4 = class extends CartesiaSTT {
@@ -2826,6 +4127,8 @@ var TokenAccumulator = class {
   }
 };
 var SonioxSTT = class _SonioxSTT {
+  /** Stable pricing/dashboard key — read by stream-handler/metrics. */
+  static providerKey = "soniox";
   ws = null;
   callbacks = [];
   final = new TokenAccumulator();
@@ -3103,6 +4406,8 @@ var AssemblyAISTT = class _AssemblyAISTT {
   }
   apiKey;
   options;
+  /** Stable pricing/dashboard key — read by stream-handler/metrics. */
+  static providerKey = "assemblyai";
   ws = null;
   callbacks = /* @__PURE__ */ new Set();
   closing = false;
@@ -3192,6 +4497,62 @@ var AssemblyAISTT = class _AssemblyAISTT {
     }
     return headers;
   }
+  /**
+   * Pre-call WebSocket warmup for the AssemblyAI v3 `/v3/ws` endpoint.
+   *
+   * Opens the WS (DNS + TLS + auth handshake), idles ~250 ms so the
+   * AssemblyAI edge keeps the session state warm, then sends Terminate
+   * and closes. By the time `connect()` is invoked at call-pickup the
+   * resolver and TLS session are hot — net wire time saving of
+   * 200-500 ms.
+   *
+   * Billing safety: AssemblyAI Universal Streaming bills on streamed
+   * audio seconds (per https://www.assemblyai.com/pricing). Opening +
+   * closing the WebSocket without forwarding any audio frames does
+   * not consume billable seconds. Best-effort: failures logged at
+   * debug level.
+   */
+  async warmup() {
+    const url = this.buildUrl();
+    const headers = this.buildHeaders();
+    let ws = null;
+    try {
+      ws = await new Promise((resolve, reject) => {
+        const sock = new WebSocket4(url, { headers });
+        const timer = setTimeout(() => {
+          try {
+            sock.close();
+          } catch {
+          }
+          reject(new Error("AssemblyAI STT warmup connect timeout"));
+        }, 5e3);
+        sock.once("open", () => {
+          clearTimeout(timer);
+          resolve(sock);
+        });
+        sock.once("error", (err) => {
+          clearTimeout(timer);
+          reject(err);
+        });
+      });
+      await new Promise((r) => setTimeout(r, 250));
+      try {
+        ws.send(JSON.stringify({ type: AssemblyAIClientFrame.TERMINATE }));
+      } catch {
+      }
+    } catch (err) {
+      getLogger().debug(
+        `AssemblyAI STT warmup failed (best-effort): ${describeWarmupError2(err)}`
+      );
+    } finally {
+      if (ws) {
+        try {
+          ws.close();
+        } catch {
+        }
+      }
+    }
+  }
   /** Open the streaming WebSocket and arm message handlers. */
   async connect() {
     this.closing = false;
@@ -3420,6 +4781,17 @@ function averageConfidence(words) {
   }
   return total / words.length;
 }
+function describeWarmupError2(err) {
+  if (typeof err === "object" && err !== null) {
+    const e = err;
+    if (typeof e.statusCode === "number") return `HTTP ${e.statusCode}`;
+    if (typeof e.code === "number" && e.code >= 100 && e.code < 600) return `HTTP ${e.code}`;
+    const ctor = e.constructor?.name;
+    if (typeof ctor === "string" && ctor !== "Object") return ctor;
+    if (typeof e.name === "string") return e.name;
+  }
+  return typeof err;
+}
 // src/stt/assemblyai.ts
 var STT6 = class extends AssemblyAISTT {
@@ -3476,6 +4848,8 @@ var SpeechmaticsServerMessage = {
   ERROR: "Error"
 };
 var SpeechmaticsSTT = class {
+  /** Stable pricing/dashboard key — read by stream-handler/metrics. */
+  static providerKey = "speechmatics";
   ws = null;
   transcriptCallbacks = /* @__PURE__ */ new Set();
   errorCallbacks = /* @__PURE__ */ new Set();
@@ -3729,275 +5103,60 @@ var SpeechmaticsSTT = class {
   emitError(err) {
     for (const cb of this.errorCallbacks) {
       try {
-        cb(err);
-      } catch (cbErr) {
-        getLogger().error(`SpeechmaticsSTT error callback threw: ${String(cbErr)}`);
-      }
-    }
-  }
-  handleError(err) {
-    getLogger().error(`SpeechmaticsSTT WebSocket error: ${err.message}`);
-    this.emitError(err);
-  }
-  handleClose() {
-    if (!this.running) return;
-    this.running = false;
-  }
-  /** Send `EndOfStream` and close the WebSocket. Idempotent. */
-  close() {
-    this.running = false;
-    const ws = this.ws;
-    if (!ws) return;
-    this.ws = null;
-    const sendSafe = (payload) => {
-      if (ws.readyState === WebSocket5.OPEN) {
-        try {
-          ws.send(payload);
-        } catch {
-        }
-      }
-    };
-    sendSafe(
-      JSON.stringify({ message: "EndOfStream", last_seq_no: this.lastSeqNo })
-    );
-    try {
-      ws.close();
-    } catch {
-    }
-  }
-};
-// src/stt/speechmatics.ts
-var STT7 = class extends SpeechmaticsSTT {
-  static providerKey = "speechmatics";
-  constructor(opts = {}) {
-    const key = opts.apiKey ?? process.env.SPEECHMATICS_API_KEY;
-    if (!key) {
-      throw new Error(
-        "Speechmatics STT requires an apiKey. Pass { apiKey: 'sm_...' } or set SPEECHMATICS_API_KEY in the environment."
-      );
-    }
-    super(key, opts);
-  }
-};
-// src/tts/elevenlabs.ts
-init_esm_shims();
-// src/providers/elevenlabs-tts.ts
-init_esm_shims();
-var ELEVENLABS_BASE_URL = "https://api.elevenlabs.io/v1";
-var ELEVENLABS_VOICE_ID_BY_NAME = {
-  rachel: "21m00Tcm4TlvDq8ikWAM",
-  drew: "29vD33N1CtxCmqQRPOHJ",
-  clyde: "2EiwWnXFnvU5JabPnv8n",
-  paul: "5Q0t7uMcjvnagumLfvZi",
-  domi: "AZnzlk1XvdvUeBnXmlld",
-  dave: "CYw3kZ02Hs0563khs1Fj",
-  fin: "D38z5RcWu1voky8WS1ja",
-  bella: "EXAVITQu4vr4xnSDxMaL",
-  antoni: "ErXwobaYiN019PkySvjV",
-  thomas: "GBv7mTt0atIp3Br8iCZE",
-  charlie: "IKne3meq5aSn9XLyUdCD",
-  george: "JBFqnCBsd6RMkjVDRZzb",
-  emily: "LcfcDJNUP1GQjkzn1xUU",
-  elli: "MF3mGyEYCl7XYWbV9V6O",
-  callum: "N2lVS1w4EtoT3dr4eOWO",
-  patrick: "ODq5zmih8GrVes37Dizd",
-  harry: "SOYHLrjzK2X1ezoPC6cr",
-  liam: "TX3LPaxmHKxFdv7VOQHJ",
-  dorothy: "ThT5KcBeYPX3keUQqHPh",
-  josh: "TxGEqnHWrfWFTfGW9XjX",
-  arnold: "VR6AewLTigWG4xSOukaG",
-  charlotte: "XB0fDUnXU5powFXDhCwa",
-  matilda: "XrExE9yKIg1WjnnlVkGX",
-  matthew: "Yko7PKHZNXotIFUBG7I9",
-  james: "ZQe5CZNOzWyzPSCn5a3c",
-  joseph: "Zlb1dXrM653N07WRdFW3",
-  jeremy: "bVMeCyTHy58xNoL34h3p",
-  michael: "flq6f7yk4E4fJM5XTYuZ",
-  ethan: "g5CIjZEefAph4nQFvHAz",
-  gigi: "jBpfuIE2acCO8z3wKNLl",
-  freya: "jsCqWAovK2LkecY7zXl4",
-  brian: "nPczCjzI2devNBz1zQrb",
-  grace: "oWAxZDx7w5VEj9dCyTzz",
-  daniel: "onwK4e9ZLuTAKqWW03F9",
-  lily: "pFZP5JQG7iQjIQuC4Bku",
-  serena: "pMsXgVXv3BLzUgSXRplE",
-  adam: "pNInz6obpgDQGcFmaJgB",
-  nicole: "piTKgcLEGmPE4e6mEKli",
-  bill: "pqHfZKP75CvOlQylNhV4",
-  jessie: "t0jbNlBVZ17f02VDIeMI",
-  ryan: "wViXBPUzp2ZZixB1xQuM",
-  sam: "yoZ06aMxZJJ28mfd3POQ",
-  glinda: "z9fAnlkpzviPz146aGWa",
-  giovanni: "zcAOhNBS3c14rBihAFp1",
-  mimi: "zrHiDhphv9ZnVXBqCLjz",
-  sarah: "EXAVITQu4vr4xnSDxMaL",
-  alloy: "EXAVITQu4vr4xnSDxMaL"
-};
-var VOICE_ID_PATTERN = /^[A-Za-z0-9]{20}$/;
-function resolveVoiceId(voice) {
-  if (!voice) return voice;
-  if (VOICE_ID_PATTERN.test(voice)) return voice;
-  return ELEVENLABS_VOICE_ID_BY_NAME[voice.toLowerCase()] ?? voice;
-}
-var ElevenLabsModel = {
-  V3: "eleven_v3",
-  FLASH_V2_5: "eleven_flash_v2_5",
-  TURBO_V2_5: "eleven_turbo_v2_5",
-  MULTILINGUAL_V2: "eleven_multilingual_v2",
-  MONOLINGUAL_V1: "eleven_monolingual_v1"
-};
-var ElevenLabsOutputFormat = {
-  MP3_22050_32: "mp3_22050_32",
-  MP3_44100_32: "mp3_44100_32",
-  MP3_44100_64: "mp3_44100_64",
-  MP3_44100_96: "mp3_44100_96",
-  MP3_44100_128: "mp3_44100_128",
-  MP3_44100_192: "mp3_44100_192",
-  PCM_8000: "pcm_8000",
-  PCM_16000: "pcm_16000",
-  PCM_22050: "pcm_22050",
-  PCM_24000: "pcm_24000",
-  PCM_44100: "pcm_44100",
-  ULAW_8000: "ulaw_8000"
-};
-var ElevenLabsTTS = class _ElevenLabsTTS {
-  apiKey;
-  voiceId;
-  modelId;
-  outputFormat;
-  voiceSettings;
-  languageCode;
-  chunkSize;
-  constructor(apiKey, voiceIdOrOptions = "21m00Tcm4TlvDq8ikWAM", modelId = ElevenLabsModel.FLASH_V2_5, outputFormat = ElevenLabsOutputFormat.PCM_16000) {
-    this.apiKey = apiKey;
-    if (typeof voiceIdOrOptions === "object") {
-      const o = voiceIdOrOptions;
-      this.voiceId = resolveVoiceId(o.voiceId ?? "21m00Tcm4TlvDq8ikWAM");
-      this.modelId = o.modelId ?? ElevenLabsModel.FLASH_V2_5;
-      this.outputFormat = o.outputFormat ?? ElevenLabsOutputFormat.PCM_16000;
-      this.voiceSettings = o.voiceSettings;
-      this.languageCode = o.languageCode;
-      this.chunkSize = o.chunkSize ?? 4096;
-    } else {
-      this.voiceId = resolveVoiceId(voiceIdOrOptions);
-      this.modelId = modelId;
-      this.outputFormat = outputFormat;
-      this.voiceSettings = void 0;
-      this.languageCode = void 0;
-      this.chunkSize = 4096;
-    }
-  }
-  /**
-   * Construct an instance pre-configured for Twilio Media Streams.
-   *
-   * Sets `outputFormat='ulaw_8000'` so ElevenLabs emits μ-law @ 8 kHz
-   * directly — the exact wire format Twilio's media stream uses — letting
-   * the SDK skip the 16 kHz→8 kHz resample and PCM→μ-law conversion in
-   * `TwilioAudioSender`. Saves ~30–80 ms first-byte and per-frame CPU,
-   * and removes a potential aliasing source.
-   *
-   * `voiceSettings` defaults to a low-bandwidth-friendly profile
-   * (speaker boost off, modest stability) which sounds cleaner at 8 kHz
-   * μ-law than the studio default. Pass an explicit object to override.
-   */
-  static forTwilio(apiKey, options = {}) {
-    const voiceSettings = options.voiceSettings ?? {
-      // Speaker boost adds high-frequency emphasis that aliases ugly over an
-      // 8 kHz μ-law line. Slightly higher stability tames the excursions
-      // that compander quantization noise can amplify.
-      stability: 0.6,
-      similarity_boost: 0.75,
-      use_speaker_boost: false
-    };
-    return new _ElevenLabsTTS(apiKey, {
-      ...options,
-      voiceSettings,
-      outputFormat: ElevenLabsOutputFormat.ULAW_8000
-    });
-  }
-  /**
-   * Construct an instance pre-configured for Telnyx bidirectional media.
-   *
-   * Telnyx's default media-streaming codec is L16 PCM @ 16 kHz, which
-   * matches our default Telnyx handler. We pick `pcm_16000` so the audio
-   * flows end-to-end with zero resampling or transcoding.
-   *
-   * Trade-off: if your Telnyx profile is pinned to PCMU/8000 (μ-law),
-   * construct `ElevenLabsTTS` directly with `outputFormat: 'ulaw_8000'`
-   * — Telnyx supports that natively too.
-   */
-  static forTelnyx(apiKey, options = {}) {
-    return new _ElevenLabsTTS(apiKey, {
-      ...options,
-      outputFormat: ElevenLabsOutputFormat.PCM_16000
-    });
-  }
-  /**
-   * Synthesise text to speech and return the full audio as a single Buffer.
-   *
-   * For large chunks (or when latency matters) call `synthesizeStream` instead.
-   */
-  async synthesize(text) {
-    const chunks = [];
-    for await (const chunk of this.synthesizeStream(text)) {
-      chunks.push(chunk);
-    }
-    return Buffer.concat(chunks);
-  }
-  /**
-   * Synthesise text and yield audio chunks as they arrive (streaming).
-   *
-   * The yielded buffers are raw PCM at 16 kHz (or whatever `outputFormat` is
-   * configured to). `chunkSize` controls the maximum yield size — 512 is a
-   * good choice for low-latency telephony.
-   */
-  async *synthesizeStream(text) {
-    const url = `${ELEVENLABS_BASE_URL}/text-to-speech/${encodeURIComponent(this.voiceId)}/stream?output_format=${encodeURIComponent(this.outputFormat)}`;
-    const body = {
-      text,
-      model_id: this.modelId
-    };
-    if (this.voiceSettings) body["voice_settings"] = this.voiceSettings;
-    if (this.languageCode) body["language_code"] = this.languageCode;
-    const response = await fetch(url, {
-      method: "POST",
-      headers: {
-        "xi-api-key": this.apiKey,
-        "Content-Type": "application/json"
-      },
-      body: JSON.stringify(body),
-      signal: AbortSignal.timeout(3e4)
-    });
-    if (!response.ok) {
-      const errBody = await response.text();
-      throw new Error(`ElevenLabs TTS error ${response.status}: ${errBody}`);
-    }
-    if (!response.body) {
-      throw new Error("ElevenLabs TTS: no response body");
-    }
-    const reader = response.body.getReader();
-    try {
-      while (true) {
-        const { done, value } = await reader.read();
-        if (done) break;
-        if (!value || value.length === 0) continue;
-        const buf = Buffer.from(value);
-        for (let offset = 0; offset < buf.length; offset += this.chunkSize) {
-          yield buf.subarray(offset, Math.min(offset + this.chunkSize, buf.length));
+        cb(err);
+      } catch (cbErr) {
+        getLogger().error(`SpeechmaticsSTT error callback threw: ${String(cbErr)}`);
+      }
+    }
+  }
+  handleError(err) {
+    getLogger().error(`SpeechmaticsSTT WebSocket error: ${err.message}`);
+    this.emitError(err);
+  }
+  handleClose() {
+    if (!this.running) return;
+    this.running = false;
+  }
+  /** Send `EndOfStream` and close the WebSocket. Idempotent. */
+  close() {
+    this.running = false;
+    const ws = this.ws;
+    if (!ws) return;
+    this.ws = null;
+    const sendSafe = (payload) => {
+      if (ws.readyState === WebSocket5.OPEN) {
+        try {
+          ws.send(payload);
+        } catch {
         }
       }
-    } finally {
-      if (typeof reader.cancel === "function") await reader.cancel().catch(() => {
-      });
-      reader.releaseLock();
+    };
+    sendSafe(
+      JSON.stringify({ message: "EndOfStream", last_seq_no: this.lastSeqNo })
+    );
+    try {
+      ws.close();
+    } catch {
+    }
+  }
+};
+// src/stt/speechmatics.ts
+var STT7 = class extends SpeechmaticsSTT {
+  static providerKey = "speechmatics";
+  constructor(opts = {}) {
+    const key = opts.apiKey ?? process.env.SPEECHMATICS_API_KEY;
+    if (!key) {
+      throw new Error(
+        "Speechmatics STT requires an apiKey. Pass { apiKey: 'sm_...' } or set SPEECHMATICS_API_KEY in the environment."
+      );
     }
+    super(key, opts);
   }
 };
 // src/tts/elevenlabs.ts
+init_esm_shims();
 function resolveApiKey(apiKey) {
   const key = apiKey ?? process.env.ELEVENLABS_API_KEY;
   if (!key) {
@@ -4013,7 +5172,7 @@ var TTS = class _TTS extends ElevenLabsTTS {
     super(resolveApiKey(opts.apiKey), {
       voiceId: opts.voiceId ?? "EXAVITQu4vr4xnSDxMaL",
       modelId: opts.modelId ?? "eleven_flash_v2_5",
-      outputFormat: opts.outputFormat ?? "pcm_16000",
+      ...opts.outputFormat !== void 0 ? { outputFormat: opts.outputFormat } : {},
       languageCode: opts.languageCode,
       voiceSettings: opts.voiceSettings
     });
@@ -4052,7 +5211,7 @@ var ElevenLabsPlanError = class extends ElevenLabsTTSError {
     this.name = "ElevenLabsPlanError";
   }
 };
-var PLAN_REQUIRED_MSG = "ElevenLabs WS streaming requires a Pro plan or higher (the WS endpoint returned `payment_required`). Either upgrade at https://elevenlabs.io/pricing, or use the HTTP `ElevenLabsTTS` class which works on all plans (drop-in API).";
+var PLAN_REQUIRED_MSG = "ElevenLabs WS streaming requires a Pro plan or higher (the WS endpoint returned `payment_required`). Either upgrade at https://elevenlabs.io/pricing, or use `ElevenLabsRestTTS` for HTTP REST instead which works on all plans (drop-in API).";
 function sanitiseLogStr(value, limit = 200) {
   return String(value).replace(/[\r\n\x00]/g, " ").slice(0, limit);
 }
@@ -4071,6 +5230,33 @@ var ElevenLabsWebSocketTTS = class _ElevenLabsWebSocketTTS {
   inactivityTimeout;
   chunkLengthSchedule;
   chunkSize;
+  /**
+   * Single-slot adoption queue. The prewarm pipeline parks one WS per
+   * outbound call here; the next `synthesizeStream` call consumes it
+   * (skipping `new WebSocket()` and the BOS send) instead of opening
+   * a fresh socket. The slot is consumed exactly once: if a second
+   * `synthesizeStream` runs before the first, only the first benefits.
+   *
+   * We keep this on the adapter (not in a parameter) so the existing
+   * `for await (const chunk of agent.tts.synthesizeStream(...))` call
+   * site in `StreamHandler` continues to work without signature
+   * changes.
+   */
+  adoptedConnection = null;
+  /**
+   * Active WS for the in-flight ``synthesizeStream`` call, if any. Set
+   * when a stream starts, cleared in its ``finally`` block. The
+   * stream-handler calls ``cancelActiveStream()`` from ``cancelSpeaking``
+   * to unblock the generator's inner ``await Promise<frame>`` — without
+   * it, a barge-in on the firstMessage live path leaves the for-await
+   * stuck waiting for the next frame; ElevenLabs never sends
+   * ``isFinal=true`` after the consumer breaks, the 30 s frame timeout
+   * fires post-call, and meanwhile ``initPipeline`` never returns so
+   * the STT ``onTranscript`` callback never registers and subsequent
+   * user turns are silently dropped (root cause of the 2026-05-20
+   * "first message OK, then no response" symptom).
+   */
+  activeStreamWs = null;
   /**
    * The wire format requested over the ElevenLabs WS. Initially set from
    * the constructor; ``setTelephonyCarrier`` may auto-flip it to the
@@ -4086,7 +5272,7 @@ var ElevenLabsWebSocketTTS = class _ElevenLabsWebSocketTTS {
   constructor(opts) {
     if (opts.modelId === "eleven_v3") {
       throw new Error(
-        "eleven_v3 is not supported by the WebSocket stream-input endpoint \u2014 use the HTTP ElevenLabsTTS class instead."
+        "eleven_v3 is not supported by the WebSocket stream-input endpoint \u2014 use `ElevenLabsRestTTS` for HTTP REST instead."
       );
     }
     this.apiKey = opts.apiKey;
@@ -4119,6 +5305,32 @@ var ElevenLabsWebSocketTTS = class _ElevenLabsWebSocketTTS {
     if (!native) return;
     this._outputFormat = native;
   }
+  /**
+   * Force-close the WebSocket of any in-flight ``synthesizeStream`` call.
+   * Called by the stream-handler from ``cancelSpeaking`` (barge-in) so
+   * the generator's inner ``await Promise<frame>`` loop unblocks cleanly
+   * via the ``onClose`` handler — instead of waiting up to 30 s for the
+   * ``FRAME_TIMEOUT_MS`` watchdog to fire. No-op when no stream is in
+   * flight or when the WS is already closing.
+   *
+   * Without this, a barge-in during the firstMessage live path left the
+   * for-await stuck (ElevenLabs never sends ``isFinal=true`` after the
+   * consumer breaks), ``initPipeline`` never returned, the STT
+   * ``onTranscript`` callback never registered, and the entire remainder
+   * of the call was silent for the user. Surfaced during the 2026-05-20
+   * acceptance run.
+   */
+  cancelActiveStream() {
+    const ws = this.activeStreamWs;
+    if (!ws) return;
+    this.activeStreamWs = null;
+    try {
+      if (ws.readyState === WebSocket6.OPEN || ws.readyState === WebSocket6.CONNECTING) {
+        ws.close();
+      }
+    } catch {
+    }
+  }
   /** Pre-configured for Twilio Media Streams (`ulaw_8000`). */
   static forTwilio(opts) {
     return new _ElevenLabsWebSocketTTS({
@@ -4148,6 +5360,24 @@ var ElevenLabsWebSocketTTS = class _ElevenLabsWebSocketTTS {
     if (this.languageCode) params.set("language_code", this.languageCode);
     return `${WS_BASE}/${encodeURIComponent(this.voiceId)}/stream-input?${params.toString()}`;
   }
+  /**
+   * Build the protocol-required BOS frame sent on every fresh WS.
+   *
+   * The single-space `{"text": " "}` keep-alive establishes the session
+   * without committing any synthesis (no `flush: true`, no real text).
+   * Production `synthesizeStream()` and `warmup()` share this exact
+   * construction so the upstream worker chooses the same per-session
+   * config in both cases — otherwise the warm session is on a different
+   * worker than the live request, which defeats the warmup goal.
+   */
+  buildBosFrame() {
+    const init = { text: " " };
+    if (this.voiceSettings) init["voice_settings"] = this.voiceSettings;
+    if (!this.autoMode && this.chunkLengthSchedule) {
+      init["generation_config"] = { chunk_length_schedule: this.chunkLengthSchedule };
+    }
+    return init;
+  }
   /**
    * Single-shot synthesis: open WS, send text, yield bytes, close.
    *
@@ -4166,9 +5396,27 @@ var ElevenLabsWebSocketTTS = class _ElevenLabsWebSocketTTS {
    *   after flush — auto_mode could otherwise truncate the tail audio).
    */
   async *synthesizeStream(text) {
-    const ws = new WebSocket6(this.buildUrl(), {
-      headers: { "xi-api-key": this.apiKey }
-    });
+    let ws;
+    let bosAlreadySent = false;
+    let adopted = false;
+    const parked = this.adoptedConnection;
+    this.adoptedConnection = null;
+    if (parked && parked.ws.readyState === WebSocket6.OPEN) {
+      ws = parked.ws;
+      bosAlreadySent = parked.bosSent;
+      adopted = true;
+    } else {
+      if (parked) {
+        try {
+          parked.ws.close();
+        } catch {
+        }
+      }
+      ws = new WebSocket6(this.buildUrl(), {
+        headers: { "xi-api-key": this.apiKey }
+      });
+    }
+    this.activeStreamWs = ws;
     const queue = [];
     let done = false;
     let pendingError = null;
@@ -4238,28 +5486,27 @@ var ElevenLabsWebSocketTTS = class _ElevenLabsWebSocketTTS {
     };
     ws.on("error", onError);
     try {
-      await new Promise((resolve, reject) => {
-        connectTimer = setTimeout(
-          () => reject(new Error("ElevenLabs WS connect timeout")),
-          CONNECT_TIMEOUT_MS4
-        );
-        ws.once("open", () => {
-          if (connectTimer) clearTimeout(connectTimer);
-          connectTimer = void 0;
-          resolve();
-        });
-        ws.once("error", (err) => {
-          if (connectTimer) clearTimeout(connectTimer);
-          connectTimer = void 0;
-          reject(err);
+      if (!adopted) {
+        await new Promise((resolve, reject) => {
+          connectTimer = setTimeout(
+            () => reject(new Error("ElevenLabs WS connect timeout")),
+            CONNECT_TIMEOUT_MS4
+          );
+          ws.once("open", () => {
+            if (connectTimer) clearTimeout(connectTimer);
+            connectTimer = void 0;
+            resolve();
+          });
+          ws.once("error", (err) => {
+            if (connectTimer) clearTimeout(connectTimer);
+            connectTimer = void 0;
+            reject(err);
+          });
         });
-      });
-      const init = { text: " " };
-      if (this.voiceSettings) init["voice_settings"] = this.voiceSettings;
-      if (!this.autoMode && this.chunkLengthSchedule) {
-        init["generation_config"] = { chunk_length_schedule: this.chunkLengthSchedule };
       }
-      ws.send(JSON.stringify(init));
+      if (!bosAlreadySent) {
+        ws.send(JSON.stringify(this.buildBosFrame()));
+      }
       ws.send(JSON.stringify({ text: text + " ", flush: true }));
       ws.on("message", onMessage);
       ws.on("close", onClose);
@@ -4290,6 +5537,7 @@ var ElevenLabsWebSocketTTS = class _ElevenLabsWebSocketTTS {
       }
     } finally {
       if (connectTimer) clearTimeout(connectTimer);
+      if (this.activeStreamWs === ws) this.activeStreamWs = null;
       try {
         if (ws.readyState === WebSocket6.OPEN) {
           ws.send(JSON.stringify({ text: "" }));
@@ -4305,387 +5553,227 @@ var ElevenLabsWebSocketTTS = class _ElevenLabsWebSocketTTS {
       ws.removeAllListeners();
     }
   }
-  /** No-op — connections are per-utterance and torn down inside synthesizeStream. */
-  async close() {
-  }
-};
-function looksLikeJson(buf) {
-  if (buf.length === 0) return false;
-  const b = buf[0];
-  return b === 123 || b === 91;
-}
-// src/tts/elevenlabs-ws.ts
-function resolveApiKey2(apiKey) {
-  const key = apiKey ?? process.env.ELEVENLABS_API_KEY;
-  if (!key) {
-    throw new Error(
-      "ElevenLabs WebSocket TTS requires an apiKey. Pass { apiKey: '...' } or set ELEVENLABS_API_KEY in the environment."
-    );
-  }
-  return key;
-}
-function buildOpts(opts) {
-  const out = {
-    apiKey: resolveApiKey2(opts.apiKey),
-    modelId: opts.modelId ?? "eleven_flash_v2_5",
-    outputFormat: opts.outputFormat ?? "pcm_16000",
-    autoMode: opts.autoMode ?? true
-  };
-  if (opts.voiceId !== void 0) out.voiceId = opts.voiceId;
-  if (opts.voiceSettings !== void 0) out.voiceSettings = opts.voiceSettings;
-  if (opts.languageCode !== void 0) out.languageCode = opts.languageCode;
-  if (opts.inactivityTimeout !== void 0) out.inactivityTimeout = opts.inactivityTimeout;
-  if (opts.chunkLengthSchedule !== void 0) out.chunkLengthSchedule = opts.chunkLengthSchedule;
-  return out;
-}
-var TTS2 = class _TTS extends ElevenLabsWebSocketTTS {
-  static providerKey = "elevenlabs_ws";
-  constructor(opts = {}) {
-    super(buildOpts(opts));
-  }
-  /** WebSocket TTS pre-configured for Twilio Media Streams (`ulaw_8000`). */
-  static forTwilio(opts = {}) {
-    return new _TTS({ ...opts, outputFormat: "ulaw_8000" });
-  }
-  /** WebSocket TTS pre-configured for Telnyx (`pcm_16000`). */
-  static forTelnyx(opts = {}) {
-    return new _TTS({ ...opts, outputFormat: "pcm_16000" });
-  }
-};
-// src/tts/openai.ts
-init_esm_shims();
-// src/providers/openai-tts.ts
-init_esm_shims();
-var OPENAI_TTS_URL = "https://api.openai.com/v1/audio/speech";
-var INSTRUCTIONS_PREFIX = "gpt-4o-mini-tts";
-var LPF_ALPHA = 0.78;
-var LPF_ALPHA_8K = 0.45;
-var OpenAITTS = class _OpenAITTS {
-  constructor(apiKey, voice = "alloy", model = "gpt-4o-mini-tts", instructions = null, speed = null, antiAlias = true, targetSampleRate = 16e3) {
-    this.apiKey = apiKey;
-    this.voice = voice;
-    this.model = model;
-    this.instructions = instructions;
-    this.speed = speed;
-    this.antiAlias = antiAlias;
-    this.targetSampleRate = targetSampleRate;
-    if (speed !== null && speed !== void 0 && (speed < 0.25 || speed > 4)) {
-      throw new Error("OpenAITTS: speed must be in [0.25, 4.0]");
-    }
-    if (targetSampleRate !== 8e3 && targetSampleRate !== 16e3) {
-      throw new Error("OpenAITTS: targetSampleRate must be 8000 or 16000");
-    }
-  }
-  apiKey;
-  voice;
-  model;
-  instructions;
-  speed;
-  antiAlias;
-  targetSampleRate;
-  /**
-   * Synthesise text to speech and return the full audio as a single Buffer.
-   *
-   * For large chunks (or when latency matters) call `synthesizeStream` instead.
-   */
-  async synthesize(text) {
-    const chunks = [];
-    for await (const chunk of this.synthesizeStream(text)) {
-      chunks.push(chunk);
-    }
-    return Buffer.concat(chunks);
-  }
   /**
-   * Synthesise text and yield audio chunks as they arrive (streaming).
+   * Pre-call WebSocket warmup for the ElevenLabs `/stream-input` endpoint.
    *
-   * OpenAI returns 24 kHz PCM16; each chunk is lowpass-filtered then
-   * decimated 3:2 to 16 kHz before yielding so the output is ready for
-   * telephony pipelines.
+   * Opens the WS (DNS + TLS + auth handshake), sends the EXACT same BOS
+   * frame the production `synthesizeStream()` path sends — including
+   * `voice_settings` and (when configured) `generation_config` — so
+   * ElevenLabs instantiates the same per-session worker for both
+   * warmup and the live request. If the BOS frames differ, the server
+   * may route warmup and the real call to two different workers, and
+   * the warmed worker is wasted. Idles ~250 ms, then closes. By the
+   * time the first `synthesizeStream()` call lands during the call,
+   * the connection pool has the upstream warm — net wire time saving
+   * of 200-500 ms.
    *
-   * The resampler carries state (filter memory + buffered samples + odd
-   * trailing byte) between chunks so cross-chunk sample alignment and
-   * filter phase don't reset on every network read.
+   * Billing safety: ElevenLabs bills on synthesised characters
+   * delivered via `audio` frames (per https://elevenlabs.io/pricing).
+   * The keepalive (single-space `text`, no `flush: true`, no real
+   * transcript) is documented as the session-establishment frame and
+   * does NOT generate synthesis. Closing without sending the actual
+   * transcript does not consume billable characters. Best-effort:
+   * failures logged at debug level.
    */
-  async *synthesizeStream(text) {
-    const body = {
-      model: this.model,
-      input: text,
-      voice: this.voice,
-      response_format: "pcm"
-    };
-    if (this.instructions !== null && this.model.startsWith(INSTRUCTIONS_PREFIX)) {
-      body.instructions = this.instructions;
-    }
-    if (this.speed !== null) {
-      body.speed = this.speed;
-    }
-    const response = await fetch(OPENAI_TTS_URL, {
-      method: "POST",
-      headers: {
-        "Authorization": `Bearer ${this.apiKey}`,
-        "Content-Type": "application/json"
-      },
-      body: JSON.stringify(body)
+  async warmup() {
+    const ws = new WebSocket6(this.buildUrl(), {
+      headers: { "xi-api-key": this.apiKey }
     });
-    if (!response.ok) {
-      const errBody = await response.text();
-      throw new Error(`OpenAI TTS error ${response.status}: ${errBody}`);
-    }
-    if (!response.body) {
-      throw new Error("OpenAI TTS: no response body");
-    }
-    const ctx = {
-      carryByte: null,
-      leftover: [],
-      lpfPrev: 0,
-      lpfEnabled: this.antiAlias,
-      targetSampleRate: this.targetSampleRate
-    };
-    const reader = response.body.getReader();
     try {
-      while (true) {
-        const { done, value } = await reader.read();
-        if (done) break;
-        if (value && value.length > 0) {
-          const out = _OpenAITTS.resampleStreaming(Buffer.from(value), ctx);
-          if (out.length > 0) yield out;
-        }
-      }
-      if (ctx.leftover.length > 0) {
-        const tail = Buffer.alloc(ctx.leftover.length * 2);
-        for (let i = 0; i < ctx.leftover.length; i++) {
-          tail.writeInt16LE(ctx.leftover[i], i * 2);
-        }
-        yield tail;
+      await new Promise((resolve, reject) => {
+        const timer = setTimeout(
+          () => reject(new Error("ElevenLabs WS TTS warmup connect timeout")),
+          CONNECT_TIMEOUT_MS4
+        );
+        ws.once("open", () => {
+          clearTimeout(timer);
+          resolve();
+        });
+        ws.once("error", (err) => {
+          clearTimeout(timer);
+          reject(err);
+        });
+      });
+      try {
+        ws.send(JSON.stringify(this.buildBosFrame()));
+      } catch {
       }
+      await new Promise((r) => setTimeout(r, 250));
+    } catch (err) {
+      getLogger().debug(`ElevenLabs WS TTS warmup failed (best-effort): ${String(err)}`);
     } finally {
-      if (typeof reader.cancel === "function") await reader.cancel().catch(() => {
-      });
-      reader.releaseLock();
+      try {
+        if (ws.readyState === WebSocket6.OPEN || ws.readyState === WebSocket6.CONNECTING) {
+          ws.close();
+        }
+      } catch {
+      }
+      ws.removeAllListeners();
     }
   }
   /**
-   * Streaming 24 kHz → {16, 8} kHz resampler (PCM16-LE). Applies a single-pole
-   * lowpass ahead of the decimation and carries filter + sample state across
-   * chunks so the cadence doesn't reset at every network read.
+   * Open a fresh WS, send the EXACT BOS frame the live `synthesizeStream`
+   * sends, and return the OPEN socket without closing it. Used by the
+   * prewarm pipeline to park a TTS connection during the carrier ringing
+   * window so the next `synthesizeStream` call can adopt it via
+   * {@link adoptWebSocket} and skip ~400-900 ms of TLS + BOS round-trip.
    *
-   * Output rate is selected by ``ctx.targetSampleRate``:
-   *   16000 → 3:2 decimation (sample 0 + mid(1,2))   [default]
-   *    8000 → 3:1 decimation (sample 0 only)         [fix #46]
+   * Returns a parked-handle the caller stashes; the next
+   * `synthesizeStream` will detect the adoption queue and skip its own
+   * `new WebSocket()` + BOS send.
    *
-   * ``ctx.lpfEnabled`` controls whether the LPF is engaged — kept disabled
-   * for the legacy static helper so the bit-exact downsample-only tests
-   * remain valid; the real streaming path always engages it.
+   * Billing safety: BOS is the documented session-establishment frame
+   * (single space `text`, no `flush: true`) and does not generate
+   * synthesis. ElevenLabs bills on `audio` frames received from the
+   * server, not on BOS bytes sent by the client.
    */
-  static resampleStreaming(audio, ctx) {
-    let buf;
-    if (ctx.carryByte !== null) {
-      buf = Buffer.concat([Buffer.from([ctx.carryByte]), audio]);
-      ctx.carryByte = null;
-    } else {
-      buf = audio;
-    }
-    if (buf.length % 2 === 1) {
-      ctx.carryByte = buf[buf.length - 1];
-      buf = buf.subarray(0, buf.length - 1);
-    }
-    if (buf.length === 0 && ctx.leftover.length === 0) {
-      return Buffer.alloc(0);
+  async openParkedConnection() {
+    const ws = new WebSocket6(this.buildUrl(), {
+      headers: { "xi-api-key": this.apiKey }
+    });
+    await new Promise((resolve, reject) => {
+      const timer = setTimeout(
+        () => reject(new Error("ElevenLabs WS park connect timeout")),
+        CONNECT_TIMEOUT_MS4
+      );
+      ws.once("open", () => {
+        clearTimeout(timer);
+        resolve();
+      });
+      ws.once("error", (err) => {
+        clearTimeout(timer);
+        reject(err);
+      });
+    });
+    let bosSent = false;
+    try {
+      ws.send(JSON.stringify(this.buildBosFrame()));
+      bosSent = true;
+    } catch {
     }
-    const direct8k = ctx.targetSampleRate === 8e3;
-    const lpfAlpha = direct8k ? LPF_ALPHA_8K : LPF_ALPHA;
-    const sampleCount = buf.length / 2;
-    const samples = ctx.leftover.slice();
-    const lpf = ctx.lpfEnabled !== false;
-    let y = ctx.lpfPrev;
-    for (let i2 = 0; i2 < sampleCount; i2++) {
-      const x = buf.readInt16LE(i2 * 2);
-      if (lpf) {
-        y = lpfAlpha * x + (1 - lpfAlpha) * y;
-        let s = Math.round(y);
-        if (s > 32767) s = 32767;
-        else if (s < -32768) s = -32768;
-        samples.push(s);
-      } else {
-        samples.push(x);
+    return { ws, bosSent };
+  }
+  /**
+   * Stash a parked WS handle so the next `synthesizeStream` call adopts
+   * it instead of opening a fresh socket. Caller is responsible for
+   * holding the handle alive until either the live request consumes it
+   * or the call ends (in which case `discardAdoptedConnection()`
+   * cleans it up).
+   */
+  adoptWebSocket(parked) {
+    const prev = this.adoptedConnection;
+    this.adoptedConnection = parked;
+    if (prev && prev !== parked) {
+      try {
+        prev.ws.close();
+      } catch {
       }
     }
-    if (lpf) ctx.lpfPrev = y;
-    const out = [];
-    let i = 0;
-    if (direct8k) {
-      while (i + 2 < samples.length) {
-        out.push(samples[i]);
-        i += 3;
-      }
-    } else {
-      while (i + 2 < samples.length) {
-        out.push(samples[i]);
-        out.push(Math.round((samples[i + 1] + samples[i + 2]) / 2));
-        i += 3;
+  }
+  /**
+   * Drop and close any pending parked WS without consuming it. Used on
+   * call-failure paths so a never-started call does not leak a TTS WS
+   * that ElevenLabs will close after its inactivity timeout anyway.
+   */
+  discardAdoptedConnection() {
+    const parked = this.adoptedConnection;
+    this.adoptedConnection = null;
+    if (parked) {
+      try {
+        parked.ws.close();
+      } catch {
       }
     }
-    ctx.leftover = samples.slice(i);
-    const buffer = Buffer.alloc(out.length * 2);
-    for (let j = 0; j < out.length; j++) {
-      buffer.writeInt16LE(out[j], j * 2);
-    }
-    return buffer;
   }
-  /** @deprecated use {@link resampleStreaming} with persistent state. */
-  static resample24kTo16k(audio) {
-    const ctx = {
-      carryByte: null,
-      leftover: [],
-      lpfPrev: 0,
-      lpfEnabled: false,
-      targetSampleRate: 16e3
-    };
-    const out = _OpenAITTS.resampleStreaming(audio, ctx);
-    if (ctx.leftover.length === 0) return out;
-    const tail = Buffer.alloc(ctx.leftover.length * 2);
-    for (let i = 0; i < ctx.leftover.length; i++) {
-      tail.writeInt16LE(ctx.leftover[i], i * 2);
-    }
-    return Buffer.concat([out, tail]);
+  /** No-op — connections are per-utterance and torn down inside synthesizeStream. */
+  async close() {
+    this.discardAdoptedConnection();
   }
 };
+function looksLikeJson(buf) {
+  if (buf.length === 0) return false;
+  const b = buf[0];
+  return b === 123 || b === 91;
+}
-// src/tts/openai.ts
-var TTS3 = class extends OpenAITTS {
-  static providerKey = "openai_tts";
-  constructor(opts = {}) {
-    const key = opts.apiKey ?? process.env.OPENAI_API_KEY;
-    if (!key) {
-      throw new Error(
-        "OpenAI TTS requires an apiKey. Pass { apiKey: 'sk-...' } or set OPENAI_API_KEY in the environment."
-      );
-    }
-    super(
-      key,
-      opts.voice ?? "alloy",
-      opts.model ?? "gpt-4o-mini-tts",
-      opts.instructions ?? null,
-      opts.speed ?? null,
-      opts.antiAlias ?? false
+// src/tts/elevenlabs-ws.ts
+function resolveApiKey2(apiKey) {
+  const key = apiKey ?? process.env.ELEVENLABS_API_KEY;
+  if (!key) {
+    throw new Error(
+      "ElevenLabs WebSocket TTS requires an apiKey. Pass { apiKey: '...' } or set ELEVENLABS_API_KEY in the environment."
     );
   }
+  return key;
+}
+function buildOpts(opts) {
+  const out = {
+    apiKey: resolveApiKey2(opts.apiKey),
+    modelId: opts.modelId ?? "eleven_flash_v2_5",
+    autoMode: opts.autoMode ?? true
+  };
+  if (opts.outputFormat !== void 0) out.outputFormat = opts.outputFormat;
+  if (opts.voiceId !== void 0) out.voiceId = opts.voiceId;
+  if (opts.voiceSettings !== void 0) out.voiceSettings = opts.voiceSettings;
+  if (opts.languageCode !== void 0) out.languageCode = opts.languageCode;
+  if (opts.inactivityTimeout !== void 0) out.inactivityTimeout = opts.inactivityTimeout;
+  if (opts.chunkLengthSchedule !== void 0) out.chunkLengthSchedule = opts.chunkLengthSchedule;
+  return out;
+}
+var TTS2 = class _TTS extends ElevenLabsWebSocketTTS {
+  static providerKey = "elevenlabs_ws";
+  constructor(opts = {}) {
+    super(buildOpts(opts));
+  }
+  /** WebSocket TTS pre-configured for Twilio Media Streams (`ulaw_8000`). */
+  static forTwilio(opts = {}) {
+    return new _TTS({ ...opts, outputFormat: "ulaw_8000" });
+  }
+  /** WebSocket TTS pre-configured for Telnyx (`pcm_16000`). */
+  static forTelnyx(opts = {}) {
+    return new _TTS({ ...opts, outputFormat: "pcm_16000" });
+  }
 };
-// src/tts/cartesia.ts
+// src/tts/openai.ts
 init_esm_shims();
-// src/providers/cartesia-tts.ts
+// src/providers/openai-tts.ts
 init_esm_shims();
-var CARTESIA_BASE_URL = "https://api.cartesia.ai";
-var CARTESIA_API_VERSION = "2025-04-16";
-var CARTESIA_DEFAULT_VOICE_ID = "f786b574-daa5-4673-aa0c-cbe3e8534c02";
-var CartesiaTTSModel = {
-  SONIC_3: "sonic-3",
-  SONIC_2: "sonic-2",
-  SONIC: "sonic"
-};
-var CartesiaTTSContainer = {
-  RAW: "raw",
-  WAV: "wav",
-  MP3: "mp3"
-};
-var CartesiaTTSEncoding = {
-  PCM_S16LE: "pcm_s16le",
-  PCM_F32LE: "pcm_f32le",
-  PCM_MULAW: "pcm_mulaw",
-  PCM_ALAW: "pcm_alaw"
-};
-var CartesiaTTSSampleRate = {
-  HZ_8000: 8e3,
-  HZ_16000: 16e3,
-  HZ_22050: 22050,
-  HZ_24000: 24e3,
-  HZ_44100: 44100
-};
-var CartesiaTTSVoiceMode = {
-  ID: "id",
-  EMBEDDING: "embedding"
-};
-var CartesiaTTS = class _CartesiaTTS {
-  apiKey;
-  model;
-  voice;
-  language;
-  sampleRate;
-  speed;
-  emotion;
-  volume;
-  baseUrl;
-  apiVersion;
-  constructor(apiKey, opts = {}) {
+var OPENAI_TTS_URL = "https://api.openai.com/v1/audio/speech";
+var INSTRUCTIONS_PREFIX = "gpt-4o-mini-tts";
+var LPF_ALPHA = 0.78;
+var LPF_ALPHA_8K = 0.45;
+var OpenAITTS = class _OpenAITTS {
+  constructor(apiKey, voice = "alloy", model = "gpt-4o-mini-tts", instructions = null, speed = null, antiAlias = true, targetSampleRate = 16e3) {
     this.apiKey = apiKey;
-    this.model = opts.model ?? CartesiaTTSModel.SONIC_3;
-    this.voice = opts.voice ?? CARTESIA_DEFAULT_VOICE_ID;
-    this.language = opts.language ?? "en";
-    this.sampleRate = opts.sampleRate ?? CartesiaTTSSampleRate.HZ_16000;
-    this.speed = opts.speed;
-    this.emotion = typeof opts.emotion === "string" ? [opts.emotion] : opts.emotion;
-    this.volume = opts.volume;
-    this.baseUrl = opts.baseUrl ?? CARTESIA_BASE_URL;
-    this.apiVersion = opts.apiVersion ?? CARTESIA_API_VERSION;
-  }
-  /**
-   * Construct an instance pre-configured for Twilio Media Streams.
-   *
-   * Sets `sampleRate=8000` so Cartesia emits PCM_S16LE @ 8 kHz directly.
-   * Twilio's media stream uses μ-law @ 8 kHz so the SDK still does the
-   * PCM → μ-law transcode client-side, but the 16 kHz → 8 kHz resample
-   * step is skipped. Saves ~10–30 ms first-byte plus per-frame CPU and
-   * removes a potential aliasing source.
-   */
-  static forTwilio(apiKey, options = {}) {
-    return new _CartesiaTTS(apiKey, {
-      ...options,
-      sampleRate: CartesiaTTSSampleRate.HZ_8000
-    });
+    this.voice = voice;
+    this.model = model;
+    this.instructions = instructions;
+    this.speed = speed;
+    this.antiAlias = antiAlias;
+    this.targetSampleRate = targetSampleRate;
+    if (speed !== null && speed !== void 0 && (speed < 0.25 || speed > 4)) {
+      throw new Error("OpenAITTS: speed must be in [0.25, 4.0]");
+    }
+    if (targetSampleRate !== 8e3 && targetSampleRate !== 16e3) {
+      throw new Error("OpenAITTS: targetSampleRate must be 8000 or 16000");
+    }
   }
+  apiKey;
+  voice;
+  model;
+  instructions;
+  speed;
+  antiAlias;
+  targetSampleRate;
+  /** Stable pricing/dashboard key — read by stream-handler/metrics. */
+  static providerKey = "openai_tts";
   /**
-   * Construct an instance pre-configured for Telnyx bidirectional media.
+   * Synthesise text to speech and return the full audio as a single Buffer.
    *
-   * Sets `sampleRate=16000` to match Telnyx's L16/16000 default codec —
-   * audio flows end-to-end with zero resampling or transcoding. Same as
-   * the bare-constructor default; exists for API symmetry with
-   * {@link CartesiaTTS.forTwilio}.
+   * For large chunks (or when latency matters) call `synthesizeStream` instead.
    */
-  static forTelnyx(apiKey, options = {}) {
-    return new _CartesiaTTS(apiKey, {
-      ...options,
-      sampleRate: CartesiaTTSSampleRate.HZ_16000
-    });
-  }
-  /** Build the JSON payload for the Cartesia bytes endpoint. */
-  buildPayload(text) {
-    const payload = {
-      model_id: this.model,
-      voice: { mode: CartesiaTTSVoiceMode.ID, id: this.voice },
-      transcript: text,
-      output_format: {
-        container: CartesiaTTSContainer.RAW,
-        encoding: CartesiaTTSEncoding.PCM_S16LE,
-        sample_rate: this.sampleRate
-      },
-      language: this.language
-    };
-    const generationConfig = {};
-    if (this.speed !== void 0) generationConfig.speed = this.speed;
-    if (this.emotion && this.emotion.length > 0)
-      generationConfig.emotion = this.emotion[0];
-    if (this.volume !== void 0) generationConfig.volume = this.volume;
-    if (Object.keys(generationConfig).length > 0) {
-      payload.generation_config = generationConfig;
-    }
-    return payload;
-  }
-  /** Synthesize text and return the concatenated audio buffer. */
   async synthesize(text) {
     const chunks = [];
     for await (const chunk of this.synthesizeStream(text)) {
@@ -4694,217 +5782,213 @@ var CartesiaTTS = class _CartesiaTTS {
     return Buffer.concat(chunks);
   }
   /**
-   * Synthesize text and yield raw PCM_S16LE chunks at the configured
-   * `sampleRate` as they arrive from Cartesia.
+   * Synthesise text and yield audio chunks as they arrive (streaming).
+   *
+   * OpenAI returns 24 kHz PCM16; each chunk is lowpass-filtered then
+   * decimated 3:2 to 16 kHz before yielding so the output is ready for
+   * telephony pipelines.
+   *
+   * The resampler carries state (filter memory + buffered samples + odd
+   * trailing byte) between chunks so cross-chunk sample alignment and
+   * filter phase don't reset on every network read.
    */
   async *synthesizeStream(text) {
-    const response = await fetch(`${this.baseUrl}/tts/bytes`, {
+    const body = {
+      model: this.model,
+      input: text,
+      voice: this.voice,
+      response_format: "pcm"
+    };
+    if (this.instructions !== null && this.model.startsWith(INSTRUCTIONS_PREFIX)) {
+      body.instructions = this.instructions;
+    }
+    if (this.speed !== null) {
+      body.speed = this.speed;
+    }
+    const response = await fetch(OPENAI_TTS_URL, {
       method: "POST",
       headers: {
-        "X-API-Key": this.apiKey,
-        "Cartesia-Version": this.apiVersion,
+        "Authorization": `Bearer ${this.apiKey}`,
         "Content-Type": "application/json"
       },
-      body: JSON.stringify(this.buildPayload(text)),
-      signal: AbortSignal.timeout(3e4)
+      body: JSON.stringify(body)
     });
     if (!response.ok) {
-      const body = await response.text();
-      throw new Error(`Cartesia TTS error ${response.status}: ${body}`);
+      const errBody = await response.text();
+      throw new Error(`OpenAI TTS error ${response.status}: ${errBody}`);
     }
     if (!response.body) {
-      throw new Error("Cartesia TTS: no response body");
+      throw new Error("OpenAI TTS: no response body");
     }
+    const ctx = {
+      carryByte: null,
+      leftover: [],
+      lpfPrev: 0,
+      lpfEnabled: this.antiAlias,
+      targetSampleRate: this.targetSampleRate
+    };
     const reader = response.body.getReader();
     try {
       while (true) {
         const { done, value } = await reader.read();
         if (done) break;
         if (value && value.length > 0) {
-          yield Buffer.from(value);
+          const out = _OpenAITTS.resampleStreaming(Buffer.from(value), ctx);
+          if (out.length > 0) yield out;
+        }
+      }
+      if (ctx.leftover.length > 0) {
+        const tail = Buffer.alloc(ctx.leftover.length * 2);
+        for (let i = 0; i < ctx.leftover.length; i++) {
+          tail.writeInt16LE(ctx.leftover[i], i * 2);
         }
+        yield tail;
       }
     } finally {
-      if (typeof reader.cancel === "function")
-        await reader.cancel().catch(() => {
-        });
+      if (typeof reader.cancel === "function") await reader.cancel().catch(() => {
+      });
       reader.releaseLock();
     }
   }
-};
-// src/tts/cartesia.ts
-function resolveApiKey3(apiKey) {
-  const key = apiKey ?? process.env.CARTESIA_API_KEY;
-  if (!key) {
-    throw new Error(
-      "Cartesia TTS requires an apiKey. Pass { apiKey: '...' } or set CARTESIA_API_KEY in the environment."
-    );
-  }
-  return key;
-}
-var TTS4 = class _TTS extends CartesiaTTS {
-  static providerKey = "cartesia_tts";
-  constructor(opts = {}) {
-    const key = resolveApiKey3(opts.apiKey);
-    const { apiKey: _ignored, ...rest } = opts;
-    void _ignored;
-    super(key, rest);
-  }
-  static forTwilio(arg1, arg2) {
-    const opts = typeof arg1 === "string" ? { apiKey: arg1, ...arg2 ?? {} } : arg1 ?? {};
-    return new _TTS({ ...opts, sampleRate: 8e3 });
-  }
-  static forTelnyx(arg1, arg2) {
-    const opts = typeof arg1 === "string" ? { apiKey: arg1, ...arg2 ?? {} } : arg1 ?? {};
-    return new _TTS({ ...opts, sampleRate: 16e3 });
-  }
-};
-// src/tts/rime.ts
-init_esm_shims();
-// src/providers/rime-tts.ts
-init_esm_shims();
-var RIME_BASE_URL = "https://users.rime.ai/v1/rime-tts";
-var RimeModel = {
-  ARCANA: "arcana",
-  MIST: "mist",
-  MIST_V2: "mistv2"
-};
-var RimeAudioFormat = {
-  PCM: "audio/pcm",
-  MP3: "audio/mp3",
-  WAV: "audio/wav",
-  MULAW: "audio/mulaw"
-};
-var ARCANA_MODEL_TIMEOUT_MS = 60 * 4 * 1e3;
-var MIST_MODEL_TIMEOUT_MS = 30 * 1e3;
-function isMistModel(model) {
-  return model.includes(RimeModel.MIST);
-}
-function timeoutForModel(model) {
-  if (model === RimeModel.ARCANA) return ARCANA_MODEL_TIMEOUT_MS;
-  return MIST_MODEL_TIMEOUT_MS;
-}
-var RimeTTS = class {
-  apiKey;
-  model;
-  speaker;
-  lang;
-  sampleRate;
-  repetitionPenalty;
-  temperature;
-  topP;
-  maxTokens;
-  speedAlpha;
-  reduceLatency;
-  pauseBetweenBrackets;
-  phonemizeBetweenBrackets;
-  baseUrl;
-  totalTimeoutMs;
-  constructor(apiKey, opts = {}) {
-    this.apiKey = apiKey;
-    this.model = opts.model ?? RimeModel.ARCANA;
-    const defaultSpeaker = isMistModel(this.model) ? "cove" : "astra";
-    this.speaker = opts.speaker ?? defaultSpeaker;
-    this.lang = opts.lang ?? "eng";
-    this.sampleRate = opts.sampleRate ?? 16e3;
-    this.repetitionPenalty = opts.repetitionPenalty;
-    this.temperature = opts.temperature;
-    this.topP = opts.topP;
-    this.maxTokens = opts.maxTokens;
-    this.speedAlpha = opts.speedAlpha;
-    this.reduceLatency = opts.reduceLatency;
-    this.pauseBetweenBrackets = opts.pauseBetweenBrackets;
-    this.phonemizeBetweenBrackets = opts.phonemizeBetweenBrackets;
-    this.baseUrl = opts.baseUrl ?? RIME_BASE_URL;
-    this.totalTimeoutMs = timeoutForModel(this.model);
-  }
-  buildPayload(text) {
-    const payload = {
-      speaker: this.speaker,
-      text,
-      modelId: this.model
-    };
-    if (this.model === RimeModel.ARCANA) {
-      if (this.repetitionPenalty !== void 0)
-        payload.repetition_penalty = this.repetitionPenalty;
-      if (this.temperature !== void 0) payload.temperature = this.temperature;
-      if (this.topP !== void 0) payload.top_p = this.topP;
-      if (this.maxTokens !== void 0) payload.max_tokens = this.maxTokens;
-      payload.lang = this.lang;
-      payload.samplingRate = this.sampleRate;
-    } else if (isMistModel(this.model)) {
-      payload.lang = this.lang;
-      payload.samplingRate = this.sampleRate;
-      if (this.speedAlpha !== void 0) payload.speedAlpha = this.speedAlpha;
-      if (this.model === RimeModel.MIST_V2 && this.reduceLatency !== void 0) {
-        payload.reduceLatency = this.reduceLatency;
+  /**
+   * Streaming 24 kHz → {16, 8} kHz resampler (PCM16-LE). Applies a single-pole
+   * lowpass ahead of the decimation and carries filter + sample state across
+   * chunks so the cadence doesn't reset at every network read.
+   *
+   * Output rate is selected by ``ctx.targetSampleRate``:
+   *   16000 → 3:2 decimation (sample 0 + mid(1,2))   [default]
+   *    8000 → 3:1 decimation (sample 0 only)         [fix #46]
+   *
+   * ``ctx.lpfEnabled`` controls whether the LPF is engaged — kept disabled
+   * for the legacy static helper so the bit-exact downsample-only tests
+   * remain valid; the real streaming path always engages it.
+   */
+  static resampleStreaming(audio, ctx) {
+    let buf;
+    if (ctx.carryByte !== null) {
+      buf = Buffer.concat([Buffer.from([ctx.carryByte]), audio]);
+      ctx.carryByte = null;
+    } else {
+      buf = audio;
+    }
+    if (buf.length % 2 === 1) {
+      ctx.carryByte = buf[buf.length - 1];
+      buf = buf.subarray(0, buf.length - 1);
+    }
+    if (buf.length === 0 && ctx.leftover.length === 0) {
+      return Buffer.alloc(0);
+    }
+    const direct8k = ctx.targetSampleRate === 8e3;
+    const lpfAlpha = direct8k ? LPF_ALPHA_8K : LPF_ALPHA;
+    const sampleCount = buf.length / 2;
+    const samples = ctx.leftover.slice();
+    const lpf = ctx.lpfEnabled !== false;
+    let y = ctx.lpfPrev;
+    for (let i2 = 0; i2 < sampleCount; i2++) {
+      const x = buf.readInt16LE(i2 * 2);
+      if (lpf) {
+        y = lpfAlpha * x + (1 - lpfAlpha) * y;
+        let s = Math.round(y);
+        if (s > 32767) s = 32767;
+        else if (s < -32768) s = -32768;
+        samples.push(s);
+      } else {
+        samples.push(x);
       }
-      if (this.pauseBetweenBrackets !== void 0) {
-        payload.pauseBetweenBrackets = this.pauseBetweenBrackets;
+    }
+    if (lpf) ctx.lpfPrev = y;
+    const out = [];
+    let i = 0;
+    if (direct8k) {
+      while (i + 2 < samples.length) {
+        out.push(samples[i]);
+        i += 3;
       }
-      if (this.phonemizeBetweenBrackets !== void 0) {
-        payload.phonemizeBetweenBrackets = this.phonemizeBetweenBrackets;
+    } else {
+      while (i + 2 < samples.length) {
+        out.push(samples[i]);
+        out.push(Math.round((samples[i + 1] + samples[i + 2]) / 2));
+        i += 3;
       }
     }
-    return payload;
-  }
-  /** Synthesize text and return the concatenated audio buffer. */
-  async synthesize(text) {
-    const chunks = [];
-    for await (const chunk of this.synthesizeStream(text)) {
-      chunks.push(chunk);
+    ctx.leftover = samples.slice(i);
+    const buffer = Buffer.alloc(out.length * 2);
+    for (let j = 0; j < out.length; j++) {
+      buffer.writeInt16LE(out[j], j * 2);
     }
-    return Buffer.concat(chunks);
+    return buffer;
   }
-  /**
-   * Synthesize text and yield raw PCM_S16LE chunks at the configured
-   * `sampleRate` as they stream in.
-   */
-  async *synthesizeStream(text) {
-    const response = await fetch(this.baseUrl, {
-      method: "POST",
-      headers: {
-        accept: RimeAudioFormat.PCM,
-        Authorization: `Bearer ${this.apiKey}`,
-        "content-type": "application/json"
-      },
-      body: JSON.stringify(this.buildPayload(text)),
-      signal: AbortSignal.timeout(this.totalTimeoutMs)
-    });
-    if (!response.ok) {
-      const body = await response.text();
-      throw new Error(`Rime TTS error ${response.status}: ${body}`);
-    }
-    const contentType = response.headers.get("content-type") ?? "";
-    if (!contentType.startsWith("audio")) {
-      const body = await response.text();
-      throw new Error(`Rime returned non-audio response: ${body.slice(0, 500)}`);
-    }
-    if (!response.body) {
-      throw new Error("Rime TTS: no response body");
+  /** @deprecated use {@link resampleStreaming} with persistent state. */
+  static resample24kTo16k(audio) {
+    const ctx = {
+      carryByte: null,
+      leftover: [],
+      lpfPrev: 0,
+      lpfEnabled: false,
+      targetSampleRate: 16e3
+    };
+    const out = _OpenAITTS.resampleStreaming(audio, ctx);
+    if (ctx.leftover.length === 0) return out;
+    const tail = Buffer.alloc(ctx.leftover.length * 2);
+    for (let i = 0; i < ctx.leftover.length; i++) {
+      tail.writeInt16LE(ctx.leftover[i], i * 2);
     }
-    const reader = response.body.getReader();
-    try {
-      while (true) {
-        const { done, value } = await reader.read();
-        if (done) break;
-        if (value && value.length > 0) {
-          yield Buffer.from(value);
-        }
-      }
-    } finally {
-      if (typeof reader.cancel === "function")
-        await reader.cancel().catch(() => {
-        });
-      reader.releaseLock();
+    return Buffer.concat([out, tail]);
+  }
+};
+// src/tts/openai.ts
+var TTS3 = class extends OpenAITTS {
+  static providerKey = "openai_tts";
+  constructor(opts = {}) {
+    const key = opts.apiKey ?? process.env.OPENAI_API_KEY;
+    if (!key) {
+      throw new Error(
+        "OpenAI TTS requires an apiKey. Pass { apiKey: 'sk-...' } or set OPENAI_API_KEY in the environment."
+      );
     }
+    super(
+      key,
+      opts.voice ?? "alloy",
+      opts.model ?? "gpt-4o-mini-tts",
+      opts.instructions ?? null,
+      opts.speed ?? null,
+      opts.antiAlias ?? false
+    );
+  }
+};
+// src/tts/cartesia.ts
+init_esm_shims();
+function resolveApiKey3(apiKey) {
+  const key = apiKey ?? process.env.CARTESIA_API_KEY;
+  if (!key) {
+    throw new Error(
+      "Cartesia TTS requires an apiKey. Pass { apiKey: '...' } or set CARTESIA_API_KEY in the environment."
+    );
+  }
+  return key;
+}
+var TTS4 = class _TTS extends CartesiaTTS {
+  static providerKey = "cartesia_tts";
+  constructor(opts = {}) {
+    const key = resolveApiKey3(opts.apiKey);
+    const { apiKey: _ignored, ...rest } = opts;
+    void _ignored;
+    super(key, rest);
+  }
+  static forTwilio(arg1, arg2) {
+    const opts = typeof arg1 === "string" ? { apiKey: arg1, ...arg2 ?? {} } : arg1 ?? {};
+    return new _TTS({ ...opts, sampleRate: 8e3 });
+  }
+  static forTelnyx(arg1, arg2) {
+    const opts = typeof arg1 === "string" ? { apiKey: arg1, ...arg2 ?? {} } : arg1 ?? {};
+    return new _TTS({ ...opts, sampleRate: 16e3 });
   }
 };
 // src/tts/rime.ts
+init_esm_shims();
 var TTS5 = class extends RimeTTS {
   static providerKey = "rime";
   constructor(opts = {}) {
@@ -4943,6 +6027,8 @@ var LMNTSampleRate = {
   HZ_24000: 24e3
 };
 var LMNTTTS = class {
+  /** Stable pricing/dashboard key — read by stream-handler/metrics. */
+  static providerKey = "lmnt";
   apiKey;
   model;
   voice;
@@ -5041,6 +6127,7 @@ init_esm_shims();
 // src/providers/inworld-tts.ts
 init_esm_shims();
 var INWORLD_BASE_URL = "https://api.inworld.ai/tts/v1/voice:stream";
+var INWORLD_VOICES_URL = "https://api.inworld.ai/tts/v1/voices";
 var InworldModel = {
   TTS_2: "inworld-tts-2",
   TTS_1_5_MAX: "inworld-tts-1.5-max",
@@ -5055,6 +6142,8 @@ var InworldAudioEncoding = {
   MP3: "MP3"
 };
 var InworldTTS = class {
+  /** Stable pricing/dashboard key — read by stream-handler/metrics. */
+  static providerKey = "inworld";
   authToken;
   model;
   voice;
@@ -5099,6 +6188,45 @@ var InworldTTS = class {
     if (this.deliveryMode !== void 0) payload.deliveryMode = this.deliveryMode;
     return payload;
   }
+  /**
+   * Pre-call HTTP warmup for the Inworld TTS API.
+   *
+   * Issues a lightweight `GET /tts/v1/voices` against the API host so
+   * DNS + TLS + HTTP/2 connection are already up by the time the first
+   * `synthesizeStream()` POST lands. Best-effort: 5 s timeout, all
+   * exceptions swallowed at debug level.
+   *
+   * Earlier revisions issued `HEAD` against the streaming endpoint
+   * (`/tts/v1/voice:stream`). That endpoint is POST-only so HEAD
+   * returns `405 Method Not Allowed` — the warmup still completed the
+   * TLS handshake but spammed 405 errors into Inworld's audit logs and
+   * into our own logs. Switching to a documented `GET /tts/v1/voices`
+   * metadata read is a 2xx-clean equivalent.
+   *
+   * Billing safety: `GET /tts/v1/voices` is a free metadata endpoint
+   * (per https://docs.inworld.ai/). It returns the voice catalogue
+   * without invoking the synthesis pipeline. The actual synthesis is
+   * billed only when `POST /tts/v1/voice:stream` runs with a non-empty
+   * `text`.
+   *
+   * Note: Inworld TTS uses the HTTP NDJSON streaming path rather than
+   * a persistent WebSocket — connection warmup is therefore HTTP-based,
+   * not WebSocket pre-handshake. The latency win is smaller (~50-150 ms)
+   * than the WS-based prewarms but still real on cold-start calls.
+   */
+  async warmup() {
+    try {
+      await fetch(INWORLD_VOICES_URL, {
+        method: "GET",
+        headers: {
+          Authorization: `Basic ${this.authToken}`
+        },
+        signal: AbortSignal.timeout(5e3)
+      });
+    } catch (err) {
+      getLogger().debug(`Inworld TTS warmup failed (best-effort): ${String(err)}`);
+    }
+  }
   /** Synthesize text and return the concatenated audio buffer. */
   async synthesize(text) {
     const chunks = [];
@@ -5238,6 +6366,8 @@ var DEFAULT_MODEL = AnthropicModel.CLAUDE_HAIKU_4_5_20251001;
 var DEFAULT_MAX_TOKENS = 1024;
 var PROMPT_CACHING_BETA = "prompt-caching-2024-07-31";
 var AnthropicLLMProvider = class {
+  /** Stable pricing/dashboard key — read by stream-handler/metrics. */
+  static providerKey = "anthropic";
   apiKey;
   model;
   maxTokens;
@@ -5259,6 +6389,27 @@ var AnthropicLLMProvider = class {
     this.anthropicVersion = options.anthropicVersion ?? DEFAULT_ANTHROPIC_VERSION;
     this.promptCaching = options.promptCaching ?? true;
   }
+  /**
+   * Pre-call DNS / TLS warmup for the Anthropic Messages API.
+   * Issues a lightweight ``GET https://api.anthropic.com/v1/models`` so
+   * DNS, TLS and HTTP/2 are already up by the time the first ``messages``
+   * call lands. Best-effort: 5 s timeout, exceptions swallowed at debug.
+   */
+  async warmup() {
+    try {
+      const modelsUrl = this.url.replace(/\/messages\/?$/, "/models");
+      await fetch(modelsUrl, {
+        method: "GET",
+        headers: {
+          "x-api-key": this.apiKey,
+          "anthropic-version": this.anthropicVersion
+        },
+        signal: AbortSignal.timeout(5e3)
+      });
+    } catch (err) {
+      getLogger().debug(`Anthropic LLM warmup failed (best-effort): ${String(err)}`);
+    }
+  }
   /** Stream Patter-format LLM chunks for the given OpenAI-style chat history. */
   async *stream(messages, tools, opts) {
     const { system, messages: anthropicMessages } = toAnthropicMessages(messages);
@@ -5476,12 +6627,6 @@ init_esm_shims();
 // src/providers/groq-llm.ts
 init_esm_shims();
-// src/version.ts
-init_esm_shims();
-var VERSION = "0.5.5";
-// src/providers/groq-llm.ts
 var GROQ_BASE_URL = "https://api.groq.com/openai/v1";
 var GroqModel = {
   LLAMA_3_3_70B_VERSATILE: "llama-3.3-70b-versatile",
@@ -5494,6 +6639,8 @@ var GroqModel = {
 };
 var DEFAULT_MODEL2 = GroqModel.LLAMA_3_3_70B_VERSATILE;
 var GroqLLMProvider = class {
+  /** Stable pricing/dashboard key — read by stream-handler/metrics. */
+  static providerKey = "groq";
   apiKey;
   model;
   baseUrl;
@@ -5527,6 +6674,21 @@ var GroqLLMProvider = class {
     this.presencePenalty = options.presencePenalty;
     this.stop = options.stop;
   }
+  /**
+   * Pre-call DNS / TLS warmup for the Groq inference endpoint.
+   * Best-effort: 5 s timeout, all exceptions swallowed at debug level.
+   */
+  async warmup() {
+    try {
+      await fetch(`${this.baseUrl}/models`, {
+        method: "GET",
+        headers: { Authorization: `Bearer ${this.apiKey}` },
+        signal: AbortSignal.timeout(5e3)
+      });
+    } catch (err) {
+      getLogger().debug(`Groq LLM warmup failed (best-effort): ${String(err)}`);
+    }
+  }
   /** Stream Patter-format LLM chunks from the Groq chat completions API. */
   async *stream(messages, tools, opts) {
     const body = {
@@ -5662,6 +6824,8 @@ var CerebrasModel = {
 var DEFAULT_MODEL3 = CerebrasModel.GPT_OSS_120B;
 var RETRY_BACKOFF_BASE_MS = 500;
 var CerebrasLLMProvider = class {
+  /** Stable pricing/dashboard key — read by stream-handler/metrics. */
+  static providerKey = "cerebras";
   apiKey;
   model;
   baseUrl;
@@ -5697,6 +6861,21 @@ var CerebrasLLMProvider = class {
     this.presencePenalty = options.presencePenalty;
     this.stop = options.stop;
   }
+  /**
+   * Pre-call DNS / TLS warmup for the Cerebras inference endpoint.
+   * Best-effort: 5 s timeout, all exceptions swallowed at debug level.
+   */
+  async warmup() {
+    try {
+      await fetch(`${this.baseUrl}/models`, {
+        method: "GET",
+        headers: { Authorization: `Bearer ${this.apiKey}` },
+        signal: AbortSignal.timeout(5e3)
+      });
+    } catch (err) {
+      getLogger().debug(`Cerebras LLM warmup failed (best-effort): ${String(err)}`);
+    }
+  }
   /** Stream Patter-format LLM chunks from the Cerebras chat completions API. */
   async *stream(messages, tools, opts) {
     const body = {
@@ -5859,6 +7038,8 @@ var GoogleModel = {
 var DEFAULT_MODEL4 = GoogleModel.GEMINI_2_5_FLASH;
 var DEFAULT_BASE_URL3 = "https://generativelanguage.googleapis.com/v1beta";
 var GoogleLLMProvider = class {
+  /** Stable pricing/dashboard key — read by stream-handler/metrics. */
+  static providerKey = "google";
   apiKey;
   model;
   baseUrl;
@@ -5876,6 +7057,23 @@ var GoogleLLMProvider = class {
     this.temperature = options.temperature;
     this.maxOutputTokens = options.maxOutputTokens;
   }
+  /**
+   * Pre-call DNS / TLS warmup for the Gemini API.
+   * Issues a lightweight ``GET ${baseUrl}/models?key=...`` so DNS, TLS
+   * and HTTP/2 are already up by the time the first
+   * ``streamGenerateContent`` call lands. Best-effort: 5 s timeout, all
+   * exceptions swallowed at debug level.
+   */
+  async warmup() {
+    try {
+      await fetch(`${this.baseUrl}/models?key=${encodeURIComponent(this.apiKey)}`, {
+        method: "GET",
+        signal: AbortSignal.timeout(5e3)
+      });
+    } catch (err) {
+      getLogger().debug(`Google LLM warmup failed (best-effort): ${String(err)}`);
+    }
+  }
   /** Stream Patter-format LLM chunks from the Gemini SSE endpoint. */
   async *stream(messages, tools, opts) {
     const { systemInstruction, contents } = toGeminiContents(messages);
@@ -6065,6 +7263,186 @@ var LLM5 = class extends GoogleLLMProvider {
   }
 };
+// src/providers/deepfilternet-filter.ts
+init_esm_shims();
+function log() {
+  return getLogger();
+}
+var DEEPFILTERNET_SR = 48e3;
+async function loadOnnxRuntime() {
+  try {
+    const specifier = "onnxruntime-node";
+    const mod = await import(specifier);
+    return mod;
+  } catch {
+    return null;
+  }
+}
+function pcm16ToFloat32(pcm) {
+  const view = new Int16Array(pcm.buffer, pcm.byteOffset, Math.floor(pcm.byteLength / 2));
+  const out = new Float32Array(view.length);
+  for (let i = 0; i < view.length; i += 1) {
+    out[i] = view[i] / 32768;
+  }
+  return out;
+}
+function float32ToPcm16(samples) {
+  const out = Buffer.alloc(samples.length * 2);
+  for (let i = 0; i < samples.length; i += 1) {
+    const clamped = Math.max(-1, Math.min(1, samples[i]));
+    out.writeInt16LE(Math.round(clamped * 32767), i * 2);
+  }
+  return out;
+}
+var DeepFilterNetFilter = class {
+  modelPath;
+  silenceWarnings;
+  session = null;
+  ort = null;
+  warned = false;
+  closed = false;
+  // Fix 5: stateful resamplers for src_sr↔48k conversions so chunk-boundary
+  // samples are not discarded. Lazy-created and torn down on rate change.
+  _resamplerSrcRate = null;
+  _upsamplerInst = null;
+  _downsamplerInst = null;
+  constructor(options = {}) {
+    this.modelPath = options.modelPath;
+    this.silenceWarnings = options.silenceWarnings === true;
+  }
+  async ensureSession() {
+    if (this.session !== null) {
+      return this.session;
+    }
+    if (!this.modelPath) {
+      if (!this.warned && !this.silenceWarnings) {
+        log().warn(
+          "DeepFilterNetFilter: no modelPath provided; audio will pass through unmodified. Provide a DeepFilterNet ONNX model to enable noise suppression."
+        );
+        this.warned = true;
+      }
+      return null;
+    }
+    if (this.ort === null) {
+      this.ort = await loadOnnxRuntime();
+    }
+    if (this.ort === null) {
+      if (!this.warned && !this.silenceWarnings) {
+        log().warn(
+          "DeepFilterNetFilter: onnxruntime-node is not installed; audio will pass through unmodified. Run `npm install onnxruntime-node` to enable noise suppression."
+        );
+        this.warned = true;
+      }
+      return null;
+    }
+    try {
+      this.session = await this.ort.InferenceSession.create(this.modelPath);
+      return this.session;
+    } catch (error) {
+      const message = error instanceof Error ? error.message : String(error);
+      log().error(`DeepFilterNetFilter: failed to load model: ${message}`);
+      this.warned = true;
+      return null;
+    }
+  }
+  /** Run noise suppression on a PCM16 chunk; pass-through when no model is loaded. */
+  async process(pcmChunk, sampleRate) {
+    if (this.closed) {
+      throw new Error("DeepFilterNetFilter is closed");
+    }
+    if (pcmChunk.length === 0) {
+      return pcmChunk;
+    }
+    const session = await this.ensureSession();
+    if (session === null || this.ort === null) {
+      return pcmChunk;
+    }
+    try {
+      if (this._resamplerSrcRate !== sampleRate) {
+        this._resamplerSrcRate = sampleRate;
+        this._upsamplerInst = new StatefulResampler({ srcRate: sampleRate, dstRate: DEEPFILTERNET_SR });
+        this._downsamplerInst = new StatefulResampler({ srcRate: DEEPFILTERNET_SR, dstRate: sampleRate });
+      }
+      const samples = pcm16ToFloat32(pcmChunk);
+      const pcm16Up = this._upsamplerInst.process(float32ToPcm16(new Float32Array(samples)));
+      const upsampled = pcm16ToFloat32(pcm16Up);
+      const inputName = session.inputNames[0];
+      const outputName = session.outputNames[0];
+      const tensor = new this.ort.Tensor("float32", upsampled, [1, upsampled.length]);
+      const feeds = { [inputName]: tensor };
+      const results = await session.run(feeds);
+      const output = results[outputName];
+      if (!output || !output.data) {
+        return pcmChunk;
+      }
+      const enhanced = output.data instanceof Float32Array ? output.data : new Float32Array(output.data);
+      const pcm16Enhanced = float32ToPcm16(enhanced);
+      const pcm16Restored = this._downsamplerInst.process(pcm16Enhanced);
+      return pcm16Restored;
+    } catch (error) {
+      const message = error instanceof Error ? error.message : String(error);
+      log().error(`DeepFilterNetFilter.process failed: ${message}`);
+      return pcmChunk;
+    }
+  }
+  /** Flush resamplers, release the ONNX session, and mark the filter closed. */
+  async close() {
+    try {
+      this._upsamplerInst?.flush();
+    } catch {
+    }
+    try {
+      this._downsamplerInst?.flush();
+    } catch {
+    }
+    this._upsamplerInst = null;
+    this._downsamplerInst = null;
+    if (this.session !== null && typeof this.session.release === "function") {
+      try {
+        await this.session.release();
+      } catch (error) {
+        const message = error instanceof Error ? error.message : String(error);
+        log().warn(`DeepFilterNetFilter.close: release failed: ${message}`);
+      }
+    }
+    this.session = null;
+    this.closed = true;
+  }
+};
+// src/providers/krisp-filter.ts
+init_esm_shims();
+var KrispSampleRate = {
+  HZ_8000: 8e3,
+  HZ_16000: 16e3,
+  HZ_32000: 32e3,
+  HZ_44100: 44100,
+  HZ_48000: 48e3
+};
+var KrispFrameDuration = {
+  MS_10: 10,
+  MS_15: 15,
+  MS_20: 20,
+  MS_30: 30,
+  MS_32: 32
+};
+var NODE_SDK_UNAVAILABLE_MESSAGE = "Krisp VIVA Filter is not yet available for the Patter TypeScript SDK.\n\nAs of 2026-05, Krisp does not publish an official Node.js (server) SDK. The Patter TypeScript SDK ships only the AudioFilter interface scaffold (this file) for parity with the Python implementation, since Patter runs server-side on a real-time audio stream from the telephony carrier.\n\nAvailable paths today:\n  1. Use the Python SDK: `from getpatter.providers.krisp_filter import KrispVivaFilter` \u2014 fully implemented, requires `pip install getpatter[krisp]` + `KRISP_VIVA_SDK_LICENSE_KEY` + `KRISP_VIVA_FILTER_MODEL_PATH`.\n  2. Use DeepFilterNet on TS: `new DeepFilterNetFilter({ modelPath: '.../DeepFilterNet3.onnx' })` \u2014 community ONNX export, no license needed.\n\nBrowser/React Native (not applicable to Patter server-side, listed for completeness):\n  - Browser WASM wrappers (various third-party packages) process local microphone capture, not server-received PCM/mulaw audio.\n  - Mobile client wrappers (iOS/Android, various third-party packages) are likewise client-side only.\n\nTrack Node SDK status:\n  - https://krisp.ai/developers/\n  - Patter backlog: task #38 \"Krisp TS port decision\"\n";
+var KrispVivaFilter = class {
+  static providerKey = "krisp_viva";
+  constructor(_options = {}) {
+    throw new Error(NODE_SDK_UNAVAILABLE_MESSAGE);
+  }
+  // The two methods below are unreachable at runtime (constructor throws)
+  // but kept so the class structurally satisfies `AudioFilter`. When the
+  // Node binding lands, replace constructor + these stubs with the real
+  // implementation.
+  async process(pcmChunk, _sampleRate) {
+    return pcmChunk;
+  }
+  async close() {
+  }
+};
 // src/telephony/twilio.ts
 init_esm_shims();
 var Carrier = class {
@@ -6905,12 +8283,28 @@ var TwilioAdapter = class _TwilioAdapter {
     return { callSid: call.sid };
   }
   /**
-   * Build a minimal ``<Response><Connect><Stream url="..."/></Connect></Response>``
-   * TwiML document. Mirrors the Python adapter's ``generate_stream_twiml``.
+   * Build a ``<Response><Connect><Stream url="...">`` TwiML document.
+   *
+   * ``parameters`` is forwarded as ``<Parameter name="..." value="..."/>``
+   * children of ``<Stream>``. Twilio Media Streams strips query-string params
+   * from the ``<Stream url=...>`` before the WS handshake, so
+   * ``<Parameter>`` tags are the supported way to pre-populate
+   * ``start.customParameters`` on the WS ``start`` frame. Used by the
+   * inbound path to carry caller / callee through to the bridge.
+   *
+   * Mirrors the Python adapter's ``generate_stream_twiml``.
    */
-  static generateStreamTwiml(streamUrl) {
-    const escaped = streamUrl.replace(/&/g, "&amp;").replace(/</g, "&lt;").replace(/>/g, "&gt;").replace(/"/g, "&quot;").replace(/'/g, "&apos;");
-    return `<?xml version="1.0" encoding="UTF-8"?><Response><Connect><Stream url="${escaped}"/></Connect></Response>`;
+  static generateStreamTwiml(streamUrl, parameters) {
+    const esc = (s) => s.replace(/&/g, "&amp;").replace(/</g, "&lt;").replace(/>/g, "&gt;").replace(/"/g, "&quot;").replace(/'/g, "&apos;");
+    const escapedUrl = esc(streamUrl);
+    let paramTags = "";
+    if (parameters) {
+      for (const [name, value] of Object.entries(parameters)) {
+        if (value == null) continue;
+        paramTags += `<Parameter name="${esc(name)}" value="${esc(String(value))}"/>`;
+      }
+    }
+    return `<?xml version="1.0" encoding="UTF-8"?><Response><Connect><Stream url="${escapedUrl}">${paramTags}</Stream></Connect></Response>`;
   }
   /** Force-complete an in-progress call. */
   async endCall(callSid) {
@@ -7100,6 +8494,8 @@ var TelnyxSTT = class {
   transcriptionEngine;
   sampleRate;
   baseUrl;
+  /** Stable pricing/dashboard key — read by stream-handler/metrics. */
+  static providerKey = "telnyx_stt";
   ws = null;
   callbacks = [];
   headerSent = false;
@@ -7204,6 +8600,8 @@ var TelnyxTTS = class {
   apiKey;
   voice;
   baseUrl;
+  /** Stable pricing/dashboard key — read by stream-handler/metrics. */
+  static providerKey = "telnyx_tts";
   /** Collect every audio chunk into a single Buffer. */
   async synthesize(text) {
     const chunks = [];
@@ -7299,16 +8697,23 @@ export {
   CallMetricsAccumulator,
   STT4 as CartesiaSTT,
   TTS4 as CartesiaTTS,
+  CartesiaTTSModel,
+  CartesiaTTSVoiceMode,
   LLM4 as CerebrasLLM,
   ChatContext,
   CloudflareTunnel,
   DEFAULT_MIN_SENTENCE_LEN,
   DEFAULT_PRICING,
   DTMF_EVENTS,
+  DeepFilterNetFilter,
+  DeepgramModel,
   STT as DeepgramSTT,
   DefaultToolExecutor,
   ConvAI as ElevenLabsConvAI,
   ElevenLabsConvAIAdapter,
+  ElevenLabsModel,
+  ElevenLabsOutputFormat,
+  ElevenLabsTTS as ElevenLabsRestTTS,
   TTS as ElevenLabsTTS,
   TTS2 as ElevenLabsWebSocketTTS,
   ErrorCode,
@@ -7322,16 +8727,29 @@ export {
   Guardrail,
   IVRActivity,
   TTS7 as InworldTTS,
+  KrispFrameDuration,
+  KrispSampleRate,
+  KrispVivaFilter,
   LLMLoop,
   TTS6 as LMNTTTS,
   MetricsStore,
+  MinWordsStrategy,
   Ngrok,
   LLM as OpenAILLM,
   OpenAILLMProvider,
   Realtime as OpenAIRealtime,
+  Realtime2 as OpenAIRealtime2,
+  OpenAIRealtime2Adapter,
   OpenAIRealtimeAdapter,
+  OpenAIRealtimeAudioFormat,
+  OpenAIRealtimeModel,
+  OpenAIRealtimeVADType,
   TTS3 as OpenAITTS,
   STT3 as OpenAITranscribeSTT,
+  OpenAITranscriptionModel,
+  OpenAIVoice,
+  PRICING_LAST_UPDATED,
+  PRICING_VERSION,
   PartialStreamError,
   Patter,
   PatterConnectionError,
@@ -7339,9 +8757,12 @@ export {
   PatterTool,
   PcmCarry,
   PipelineHookExecutor,
+  PricingUnit,
   ProvisionError,
   RateLimitError,
   RemoteMessageHandler,
+  RimeAudioFormat,
+  RimeModel,
   TTS5 as RimeTTS,
   SPAN_BARGEIN,
   SPAN_CALL,
@@ -7395,6 +8816,7 @@ export {
   deepgram,
   defineTool,
   elevenlabs,
+  evaluateStrategies as evaluateBargeInStrategies,
   filterEmoji,
   filterForTTS,
   filterMarkdown,
@@ -7420,6 +8842,7 @@ export {
   resample24kTo16k,
   resample8kTo16k,
   resamplePcm,
+  resetStrategies as resetBargeInStrategies,
   rime,
   scheduleCron,
   scheduleInterval,