npm - getpatter - Versions diffs - 0.6.1 → 0.6.2 - Mend

getpatter 0.6.1 → 0.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

package/dist/chunk-CL2U3YET.mjs +1429 -0
package/dist/{chunk-TEW3NAZJ.mjs → chunk-LE63CSOB.mjs} +371 -1486
package/dist/{chunk-RV7APPYE.mjs → chunk-R2T4JABZ.mjs} +13 -0
package/dist/cli.js +48 -23
package/dist/dashboard/ui.html +8 -8
package/dist/index.d.mts +452 -186
package/dist/index.d.ts +452 -186
package/dist/index.js +1485 -979
package/dist/index.mjs +973 -790
package/dist/openai-realtime-2-CNFARP25.mjs +8 -0
package/dist/{silero-vad-NSEXI4XS.mjs → silero-vad-LNDFGIY7.mjs} +1 -1
package/dist/{test-mode-WEKKNBLD.mjs → test-mode-RS57BDM6.mjs} +2 -1
package/package.json +1 -1
package/src/dashboard/ui.html +8 -8

package/dist/index.mjs CHANGED Viewed

@@ -6,6 +6,7 @@ import {
   CallMetricsAccumulator,
   DEFAULT_MIN_SENTENCE_LEN,
   DEFAULT_PRICING,
+  DeepgramModel,
   DeepgramSTT,
   DefaultToolExecutor,
   ElevenLabsConvAIAdapter,
@@ -15,12 +16,12 @@ import {
   LLMLoop,
   MetricsStore,
   OpenAILLMProvider,
-  OpenAIRealtime2Adapter,
-  OpenAIRealtimeAdapter,
+  PRICING_LAST_UPDATED,
+  PRICING_VERSION,
   PatterConnectionError,
   PatterError,
-  PcmCarry,
   PipelineHookExecutor,
+  PricingUnit,
   ProvisionError,
   RateLimitError,
   RemoteMessageHandler,
@@ -32,18 +33,14 @@ import {
   SPAN_TOOL,
   SPAN_TTS,
   SentenceChunker,
-  StatefulResampler,
   TestSession,
+  VERSION,
   calculateRealtimeCost,
   calculateSttCost,
   calculateTelephonyCost,
   calculateTtsCost,
   callsToCsv,
   callsToJson,
-  createResampler16kTo8k,
-  createResampler24kTo16k,
-  createResampler24kTo8k,
-  createResampler8kTo16k,
   initTracing,
   isRemoteUrl,
   isTracingEnabled,
@@ -53,14 +50,29 @@ import {
   mergePricing,
   mountApi,
   mountDashboard,
+  resolveLogRoot,
+  startSpan
+} from "./chunk-LE63CSOB.mjs";
+import {
+  OpenAIRealtime2Adapter,
+  OpenAIRealtimeAdapter,
+  OpenAIRealtimeAudioFormat,
+  OpenAIRealtimeModel,
+  OpenAIRealtimeVADType,
+  OpenAITranscriptionModel,
+  OpenAIVoice,
+  PcmCarry,
+  StatefulResampler,
+  createResampler16kTo8k,
+  createResampler24kTo16k,
+  createResampler24kTo8k,
+  createResampler8kTo16k,
   mulawToPcm16,
   pcm16ToMulaw,
   resample16kTo8k,
   resample24kTo16k,
-  resample8kTo16k,
-  resolveLogRoot,
-  startSpan
-} from "./chunk-TEW3NAZJ.mjs";
+  resample8kTo16k
+} from "./chunk-CL2U3YET.mjs";
 import {
   MinWordsStrategy,
   evaluateStrategies,
@@ -75,7 +87,7 @@ import {
 } from "./chunk-6GR5MHHQ.mjs";
 import {
   SileroVAD
-} from "./chunk-RV7APPYE.mjs";
+} from "./chunk-R2T4JABZ.mjs";
 import {
   __dirname,
   __require,
@@ -105,7 +117,7 @@ var Realtime = class {
       );
     }
     this.apiKey = key;
-    this.model = opts.model ?? "gpt-4o-mini-realtime-preview";
+    this.model = opts.model ?? "gpt-realtime-mini";
     this.voice = opts.voice ?? "alloy";
     this.reasoningEffort = opts.reasoningEffort;
     this.inputAudioTranscriptionModel = opts.inputAudioTranscriptionModel;
@@ -557,7 +569,9 @@ function resolvePersistRoot(persist) {
   if (persist === false) return null;
   if (persist === true) return resolveLogRoot("auto");
   if (typeof persist === "string") return resolveLogRoot(persist);
-  return resolveLogRoot();
+  const envRoot = resolveLogRoot();
+  if (envRoot !== null) return envRoot;
+  return resolveLogRoot("auto");
 }
 function closeParkedConnections(slot) {
   if (slot.stt) {
@@ -573,6 +587,11 @@ function closeParkedConnections(slot) {
     }
   }
   if (slot.openaiRealtime) {
+    const wsAny = slot.openaiRealtime;
+    if (wsAny._parkedKeepalive) {
+      clearInterval(wsAny._parkedKeepalive);
+      delete wsAny._parkedKeepalive;
+    }
     try {
       slot.openaiRealtime.close();
     } catch {
@@ -1014,7 +1033,7 @@ var Patter = class {
   }
   /** Run the agent in interactive terminal-test mode (no real telephony). */
   async test(opts) {
-    const { TestSession: TestSession2 } = await import("./test-mode-WEKKNBLD.mjs");
+    const { TestSession: TestSession2 } = await import("./test-mode-RS57BDM6.mjs");
     const session = new TestSession2();
     await session.run({
       agent: opts.agent,
@@ -1144,7 +1163,9 @@ var Patter = class {
     const tts = agent.tts;
     const sttOpen = typeof stt?.openParkedConnection === "function" ? stt.openParkedConnection.bind(stt) : null;
     const ttsOpen = typeof tts?.openParkedConnection === "function" ? tts.openParkedConnection.bind(tts) : null;
-    if (!sttOpen && !ttsOpen) return;
+    const providerStr = agent.provider ?? "";
+    const wantsRealtimePark = providerStr === "openai_realtime" || providerStr === "openai_realtime_2";
+    if (!sttOpen && !ttsOpen && !wantsRealtimePark) return;
     const slot = {};
     this.prewarmedConnections.set(callId, slot);
     const startedAt = Date.now();
@@ -1189,6 +1210,43 @@ var Patter = class {
         }
       })());
     }
+    if (wantsRealtimePark) {
+      tasks.push((async () => {
+        const { OpenAIRealtime2Adapter: OpenAIRealtime2Adapter2 } = await import("./openai-realtime-2-CNFARP25.mjs");
+        const apiKey = process.env.OPENAI_API_KEY ?? "";
+        if (!apiKey) {
+          getLogger().debug(`Park OpenAI Realtime skipped for ${callId}: no OPENAI_API_KEY`);
+          return;
+        }
+        try {
+          const tmpAdapter = new OpenAIRealtime2Adapter2(
+            apiKey,
+            agent.model ?? "gpt-realtime-mini",
+            agent.voice ?? "alloy",
+            agent.systemPrompt ?? "",
+            [],
+            // audioFormat — the GA adapter always emits audio/pcm@24000
+            // internally regardless of this value, but it's a required
+            // positional param. Default to g711_ulaw (Twilio wire format).
+            void 0
+          );
+          const ws = await tmpAdapter.openParkedConnection();
+          if (this.prewarmedConnections.get(callId) !== slot) {
+            try {
+              ws.close();
+            } catch {
+            }
+            return;
+          }
+          slot.openaiRealtime = ws;
+          getLogger().info(
+            `[PREWARM] callId=${callId} provider=openai_realtime ms=${Date.now() - startedAt}`
+          );
+        } catch (err) {
+          getLogger().debug(`Park OpenAI Realtime failed for ${callId}: ${String(err)}`);
+        }
+      })());
+    }
     const task = (async () => {
       await Promise.allSettled(tasks);
     })();
@@ -1266,7 +1324,7 @@ var Patter = class {
    * with a warn when the cap is reached (the call still proceeds —
    * StreamHandler falls back to live TTS).
    */
-  spawnPrewarmFirstMessage(agent, callId, ringTimeout) {
+  spawnPrewarmFirstMessage(agent, callId, ringTimeout, carrier) {
     if (!agent.prewarmFirstMessage) return;
     const providerMode = agent.provider ?? "openai_realtime";
     if (providerMode !== "pipeline") {
@@ -1279,6 +1337,18 @@ var Patter = class {
     const tts = agent.tts;
     if (!firstMessage || !tts) return;
     if (typeof tts.synthesizeStream !== "function") return;
+    if (carrier) {
+      const carrierAware = tts;
+      if (typeof carrierAware.setTelephonyCarrier === "function") {
+        try {
+          carrierAware.setTelephonyCarrier(carrier);
+        } catch (err) {
+          getLogger().debug(
+            `Prewarm TTS setTelephonyCarrier failed for ${callId}: ${String(err)}`
+          );
+        }
+      }
+    }
     const inFlight = this.prewarmAudio.size + this.prewarmTasks.size;
     if (inFlight >= PREWARM_CACHE_MAX) {
       getLogger().warn(
@@ -1391,16 +1461,25 @@ var Patter = class {
         telnyxCallId = body.data?.call_control_id;
       } catch {
       }
-      if (this.embeddedServer && telnyxCallId) {
-        this.embeddedServer.metricsStore.recordCallInitiated({
+      if (telnyxCallId) {
+        const initiatedPayload = {
           call_id: telnyxCallId,
           caller: phoneNumber,
           callee: options.to,
-          direction: "outbound"
-        });
+          direction: "outbound",
+          status: "initiated"
+        };
+        if (this.embeddedServer) {
+          this.embeddedServer.metricsStore.recordCallInitiated(initiatedPayload);
+        }
+        try {
+          const { notifyDashboard: notifyDashboard2 } = await import("./persistence-LVIAHESK.mjs");
+          notifyDashboard2(initiatedPayload);
+        } catch {
+        }
       }
       if (telnyxCallId) {
-        this.spawnPrewarmFirstMessage(options.agent, telnyxCallId, effectiveRingTimeout);
+        this.spawnPrewarmFirstMessage(options.agent, telnyxCallId, effectiveRingTimeout, "telnyx");
         if (options.agent.prewarm !== false) {
           this.parkProviderConnections(options.agent, telnyxCallId);
         }
@@ -1453,21 +1532,30 @@ var Patter = class {
       twilioNotificationsPath = body.subresource_uris?.notifications;
     } catch {
     }
-    if (this.embeddedServer && twilioCallSid) {
-      this.embeddedServer.metricsStore.recordCallInitiated({
+    if (twilioCallSid) {
+      const initiatedPayload = {
         call_id: twilioCallSid,
         caller: phoneNumber,
         callee: options.to,
-        direction: "outbound"
-      });
-      if (twilioNotificationsPath) {
-        getLogger().info(
-          `Outbound call ${twilioCallSid} placed. Twilio notifications: https://api.twilio.com${twilioNotificationsPath} (check here if the call drops with no audio).`
-        );
+        direction: "outbound",
+        status: "initiated"
+      };
+      if (this.embeddedServer) {
+        this.embeddedServer.metricsStore.recordCallInitiated(initiatedPayload);
+        if (twilioNotificationsPath) {
+          getLogger().info(
+            `Outbound call ${twilioCallSid} placed. Twilio notifications: https://api.twilio.com${twilioNotificationsPath} (check here if the call drops with no audio).`
+          );
+        }
+      }
+      try {
+        const { notifyDashboard: notifyDashboard2 } = await import("./persistence-LVIAHESK.mjs");
+        notifyDashboard2(initiatedPayload);
+      } catch {
       }
     }
     if (twilioCallSid) {
-      this.spawnPrewarmFirstMessage(options.agent, twilioCallSid, effectiveRingTimeout);
+      this.spawnPrewarmFirstMessage(options.agent, twilioCallSid, effectiveRingTimeout, "twilio");
       if (options.agent.prewarm !== false) {
         this.parkProviderConnections(options.agent, twilioCallSid);
       }
@@ -2764,109 +2852,694 @@ function scheduleInterval(intervalOrOpts, callback) {
   };
 }
-// src/stt/deepgram.ts
+// src/providers/elevenlabs-tts.ts
 init_esm_shims();
-var STT = class extends DeepgramSTT {
-  static providerKey = "deepgram";
-  constructor(opts = {}) {
-    const key = opts.apiKey ?? process.env.DEEPGRAM_API_KEY;
-    if (!key) {
-      throw new Error(
-        "Deepgram STT requires an apiKey. Pass { apiKey: 'dg_...' } or set DEEPGRAM_API_KEY in the environment."
-      );
-    }
-    super(
-      key,
-      opts.language ?? "en",
-      opts.model ?? "nova-3",
-      opts.encoding ?? "linear16",
-      opts.sampleRate ?? 16e3,
-      {
-        endpointingMs: opts.endpointingMs ?? 150,
-        utteranceEndMs: opts.utteranceEndMs === null ? null : opts.utteranceEndMs ?? 1e3,
-        smartFormat: opts.smartFormat ?? true,
-        interimResults: opts.interimResults ?? true,
-        ...opts.vadEvents !== void 0 ? { vadEvents: opts.vadEvents } : {}
-      }
-    );
-  }
+var ELEVENLABS_BASE_URL = "https://api.elevenlabs.io/v1";
+var ELEVENLABS_VOICE_ID_BY_NAME = {
+  rachel: "21m00Tcm4TlvDq8ikWAM",
+  drew: "29vD33N1CtxCmqQRPOHJ",
+  clyde: "2EiwWnXFnvU5JabPnv8n",
+  paul: "5Q0t7uMcjvnagumLfvZi",
+  domi: "AZnzlk1XvdvUeBnXmlld",
+  dave: "CYw3kZ02Hs0563khs1Fj",
+  fin: "D38z5RcWu1voky8WS1ja",
+  bella: "EXAVITQu4vr4xnSDxMaL",
+  antoni: "ErXwobaYiN019PkySvjV",
+  thomas: "GBv7mTt0atIp3Br8iCZE",
+  charlie: "IKne3meq5aSn9XLyUdCD",
+  george: "JBFqnCBsd6RMkjVDRZzb",
+  emily: "LcfcDJNUP1GQjkzn1xUU",
+  elli: "MF3mGyEYCl7XYWbV9V6O",
+  callum: "N2lVS1w4EtoT3dr4eOWO",
+  patrick: "ODq5zmih8GrVes37Dizd",
+  harry: "SOYHLrjzK2X1ezoPC6cr",
+  liam: "TX3LPaxmHKxFdv7VOQHJ",
+  dorothy: "ThT5KcBeYPX3keUQqHPh",
+  josh: "TxGEqnHWrfWFTfGW9XjX",
+  arnold: "VR6AewLTigWG4xSOukaG",
+  charlotte: "XB0fDUnXU5powFXDhCwa",
+  matilda: "XrExE9yKIg1WjnnlVkGX",
+  matthew: "Yko7PKHZNXotIFUBG7I9",
+  james: "ZQe5CZNOzWyzPSCn5a3c",
+  joseph: "Zlb1dXrM653N07WRdFW3",
+  jeremy: "bVMeCyTHy58xNoL34h3p",
+  michael: "flq6f7yk4E4fJM5XTYuZ",
+  ethan: "g5CIjZEefAph4nQFvHAz",
+  gigi: "jBpfuIE2acCO8z3wKNLl",
+  freya: "jsCqWAovK2LkecY7zXl4",
+  brian: "nPczCjzI2devNBz1zQrb",
+  grace: "oWAxZDx7w5VEj9dCyTzz",
+  daniel: "onwK4e9ZLuTAKqWW03F9",
+  lily: "pFZP5JQG7iQjIQuC4Bku",
+  serena: "pMsXgVXv3BLzUgSXRplE",
+  adam: "pNInz6obpgDQGcFmaJgB",
+  nicole: "piTKgcLEGmPE4e6mEKli",
+  bill: "pqHfZKP75CvOlQylNhV4",
+  jessie: "t0jbNlBVZ17f02VDIeMI",
+  ryan: "wViXBPUzp2ZZixB1xQuM",
+  sam: "yoZ06aMxZJJ28mfd3POQ",
+  glinda: "z9fAnlkpzviPz146aGWa",
+  giovanni: "zcAOhNBS3c14rBihAFp1",
+  mimi: "zrHiDhphv9ZnVXBqCLjz",
+  sarah: "EXAVITQu4vr4xnSDxMaL",
+  alloy: "EXAVITQu4vr4xnSDxMaL"
 };
-// src/stt/whisper.ts
-init_esm_shims();
-// src/providers/whisper-stt.ts
-init_esm_shims();
-var OPENAI_TRANSCRIPTION_URL = "https://api.openai.com/v1/audio/transcriptions";
-var DEFAULT_BUFFER_SIZE = 16e3 * 2;
-var ALLOWED_MODELS = /* @__PURE__ */ new Set(["whisper-1", "gpt-4o-transcribe", "gpt-4o-mini-transcribe"]);
-function wrapPcmInWav(pcm, sampleRate = 16e3, channels = 1, bitsPerSample = 16) {
-  const dataSize = pcm.length;
-  const header = Buffer.alloc(44);
-  header.write("RIFF", 0);
-  header.writeUInt32LE(36 + dataSize, 4);
-  header.write("WAVE", 8);
-  header.write("fmt ", 12);
-  header.writeUInt32LE(16, 16);
-  header.writeUInt16LE(1, 20);
-  header.writeUInt16LE(channels, 22);
-  header.writeUInt32LE(sampleRate, 24);
-  header.writeUInt32LE(sampleRate * channels * (bitsPerSample / 8), 28);
-  header.writeUInt16LE(channels * (bitsPerSample / 8), 32);
-  header.writeUInt16LE(bitsPerSample, 34);
-  header.write("data", 36);
-  header.writeUInt32LE(dataSize, 40);
-  return Buffer.concat([header, pcm]);
+var VOICE_ID_PATTERN = /^[A-Za-z0-9]{20}$/;
+function resolveVoiceId(voice) {
+  if (!voice) return voice;
+  if (VOICE_ID_PATTERN.test(voice)) return voice;
+  return ELEVENLABS_VOICE_ID_BY_NAME[voice.toLowerCase()] ?? voice;
 }
-var WhisperSTT = class _WhisperSTT {
-  /** Stable pricing/dashboard key — read by stream-handler/metrics. */
-  static providerKey = "whisper";
+var ElevenLabsModel = {
+  V3: "eleven_v3",
+  FLASH_V2_5: "eleven_flash_v2_5",
+  TURBO_V2_5: "eleven_turbo_v2_5",
+  MULTILINGUAL_V2: "eleven_multilingual_v2",
+  MONOLINGUAL_V1: "eleven_monolingual_v1"
+};
+var ElevenLabsOutputFormat = {
+  MP3_22050_32: "mp3_22050_32",
+  MP3_44100_32: "mp3_44100_32",
+  MP3_44100_64: "mp3_44100_64",
+  MP3_44100_96: "mp3_44100_96",
+  MP3_44100_128: "mp3_44100_128",
+  MP3_44100_192: "mp3_44100_192",
+  PCM_8000: "pcm_8000",
+  PCM_16000: "pcm_16000",
+  PCM_22050: "pcm_22050",
+  PCM_24000: "pcm_24000",
+  PCM_44100: "pcm_44100",
+  ULAW_8000: "ulaw_8000"
+};
+var ElevenLabsTTS = class _ElevenLabsTTS {
+  // Stable pricing/dashboard key — read by stream-handler / metrics via
+  // ``(agent.tts.constructor as any).providerKey``. Without this the cost
+  // calculator falls back to ``constructor.name`` ("ElevenLabsTTS") which
+  // does NOT match the pricing table key "elevenlabs", silently zeroing
+  // TTS cost for callers that construct the raw REST class directly
+  // (exposed at top level as ``ElevenLabsRestTTS``).
+  static providerKey = "elevenlabs";
   apiKey;
-  model;
-  language;
-  bufferSize;
-  responseFormat;
-  // Accumulate chunks in an array and concat once on flush — avoids the
-  // per-``sendAudio`` O(n) ``Buffer.concat([buffer, chunk])`` that quickly
-  // dominates CPU when the phone leg delivers 20 ms frames.
-  chunks = [];
-  bufferedBytes = 0;
-  callbacks = /* @__PURE__ */ new Set();
-  running = false;
-  pendingTranscriptions = [];
+  voiceId;
+  modelId;
+  _outputFormat;
+  _outputFormatExplicit;
+  voiceSettings;
+  languageCode;
+  chunkSize;
   /**
-   * @param apiKey OpenAI API key.
-   * @param language ISO-639-1 language code (e.g. ``"en"``, ``"it"``). Optional.
-   * @param model One of ``whisper-1``, ``gpt-4o-transcribe``, ``gpt-4o-mini-transcribe``.
-   * @param bufferSize Bytes of PCM16 to buffer before each transcription request.
-   * @param responseFormat ``"json"`` (default) or ``"verbose_json"``.
-   *
-   * Argument order matches the Python SDK's ``WhisperSTT(api_key, language, model, response_format)``
-   * for cross-language parity. Pre-0.5.3 the TS positional order was
-   * ``(apiKey, model, language, bufferSize, responseFormat)`` — callers using
-   * the old order will need to swap ``language`` and ``model``.
+   * Public view of the (possibly auto-flipped) wire format. Read by the
+   * stream-handler to decide whether to skip the client-side resample +
+   * mulaw encode when the bytes are already in the carrier's wire codec.
    */
-  constructor(apiKey, language, model = "whisper-1", bufferSize = DEFAULT_BUFFER_SIZE, responseFormat = "json") {
-    if (!ALLOWED_MODELS.has(model)) {
-      throw new Error(
-        `WhisperSTT: unsupported model "${model}". Expected one of ${[...ALLOWED_MODELS].join(", ")}.`
-      );
-    }
-    this.apiKey = apiKey;
-    this.model = model;
-    this.language = language;
-    this.bufferSize = bufferSize;
-    this.responseFormat = responseFormat;
-  }
-  /** Factory for Twilio calls — mulaw 8 kHz is transcoded upstream, so we still receive PCM 16-bit. */
-  static forTwilio(apiKey, language = "en", model = "whisper-1") {
-    return new _WhisperSTT(apiKey, language, model);
-  }
-  /** Reset the audio buffer and arm the adapter for incoming chunks. */
-  async connect() {
-    this.running = true;
-    this.chunks = [];
-    this.bufferedBytes = 0;
+  get outputFormat() {
+    return this._outputFormat;
+  }
+  constructor(apiKey, voiceIdOrOptions = "21m00Tcm4TlvDq8ikWAM", modelId = ElevenLabsModel.FLASH_V2_5, outputFormat = ElevenLabsOutputFormat.PCM_16000) {
+    this.apiKey = apiKey;
+    if (typeof voiceIdOrOptions === "object") {
+      const o = voiceIdOrOptions;
+      this.voiceId = resolveVoiceId(o.voiceId ?? "21m00Tcm4TlvDq8ikWAM");
+      this.modelId = o.modelId ?? ElevenLabsModel.FLASH_V2_5;
+      this._outputFormatExplicit = o.outputFormat !== void 0;
+      this._outputFormat = o.outputFormat ?? ElevenLabsOutputFormat.PCM_16000;
+      this.voiceSettings = o.voiceSettings;
+      this.languageCode = o.languageCode;
+      this.chunkSize = o.chunkSize ?? 4096;
+    } else {
+      this.voiceId = resolveVoiceId(voiceIdOrOptions);
+      this.modelId = modelId;
+      this._outputFormatExplicit = outputFormat !== ElevenLabsOutputFormat.PCM_16000;
+      this._outputFormat = outputFormat;
+      this.voiceSettings = void 0;
+      this.languageCode = void 0;
+      this.chunkSize = 4096;
+    }
+  }
+  /**
+   * Hook called by ``StreamHandler.initPipeline`` to advise the carrier
+   * wire format. When the user did NOT pass an explicit ``outputFormat``,
+   * auto-flip to the carrier's native codec so the audio bytes ElevenLabs
+   * returns are already in Twilio/Telnyx wire format — eliminating the
+   * client-side 16 kHz → 8 kHz resample and PCM → μ-law encode. The
+   * resample/encode chain was a source of audible artifacts on the
+   * prewarmed firstMessage (see 0.6.2 acceptance notes — burst delivery
+   * of resampled audio crackled on the carrier-side jitter buffer).
+   *
+   * No-op when the caller passed an explicit ``outputFormat`` (incl. via
+   * the ``forTwilio`` / ``forTelnyx`` factories) — user wins.
+   *
+   * Parity with {@link ElevenLabsWebSocketTTS.setTelephonyCarrier}.
+   */
+  setTelephonyCarrier(carrier) {
+    if (this._outputFormatExplicit) return;
+    if (carrier === "twilio") {
+      this._outputFormat = ElevenLabsOutputFormat.ULAW_8000;
+    } else if (carrier === "telnyx") {
+      this._outputFormat = ElevenLabsOutputFormat.PCM_16000;
+    }
+  }
+  /**
+   * Construct an instance pre-configured for Twilio Media Streams.
+   *
+   * Sets `outputFormat='ulaw_8000'` so ElevenLabs emits μ-law @ 8 kHz
+   * directly — the exact wire format Twilio's media stream uses — letting
+   * the SDK skip the 16 kHz→8 kHz resample and PCM→μ-law conversion in
+   * `TwilioAudioSender`. Saves ~30–80 ms first-byte and per-frame CPU,
+   * and removes a potential aliasing source.
+   *
+   * `voiceSettings` defaults to a low-bandwidth-friendly profile
+   * (speaker boost off, modest stability) which sounds cleaner at 8 kHz
+   * μ-law than the studio default. Pass an explicit object to override.
+   */
+  static forTwilio(apiKey, options = {}) {
+    const voiceSettings = options.voiceSettings ?? {
+      // Speaker boost adds high-frequency emphasis that aliases ugly over an
+      // 8 kHz μ-law line. Slightly higher stability tames the excursions
+      // that compander quantization noise can amplify.
+      stability: 0.6,
+      similarity_boost: 0.75,
+      use_speaker_boost: false
+    };
+    return new _ElevenLabsTTS(apiKey, {
+      ...options,
+      voiceSettings,
+      outputFormat: ElevenLabsOutputFormat.ULAW_8000
+    });
+  }
+  /**
+   * Construct an instance pre-configured for Telnyx bidirectional media.
+   *
+   * Telnyx's default media-streaming codec is L16 PCM @ 16 kHz, which
+   * matches our default Telnyx handler. We pick `pcm_16000` so the audio
+   * flows end-to-end with zero resampling or transcoding.
+   *
+   * Trade-off: if your Telnyx profile is pinned to PCMU/8000 (μ-law),
+   * construct `ElevenLabsTTS` directly with `outputFormat: 'ulaw_8000'`
+   * — Telnyx supports that natively too.
+   */
+  static forTelnyx(apiKey, options = {}) {
+    return new _ElevenLabsTTS(apiKey, {
+      ...options,
+      outputFormat: ElevenLabsOutputFormat.PCM_16000
+    });
+  }
+  /**
+   * Synthesise text to speech and return the full audio as a single Buffer.
+   *
+   * For large chunks (or when latency matters) call `synthesizeStream` instead.
+   */
+  async synthesize(text) {
+    const chunks = [];
+    for await (const chunk of this.synthesizeStream(text)) {
+      chunks.push(chunk);
+    }
+    return Buffer.concat(chunks);
+  }
+  /**
+   * Synthesise text and yield audio chunks as they arrive (streaming).
+   *
+   * The yielded buffers are raw PCM at 16 kHz (or whatever `outputFormat` is
+   * configured to). `chunkSize` controls the maximum yield size — 512 is a
+   * good choice for low-latency telephony.
+   */
+  async *synthesizeStream(text) {
+    const url = `${ELEVENLABS_BASE_URL}/text-to-speech/${encodeURIComponent(this.voiceId)}/stream?output_format=${encodeURIComponent(this._outputFormat)}`;
+    const body = {
+      text,
+      model_id: this.modelId
+    };
+    if (this.voiceSettings) body["voice_settings"] = this.voiceSettings;
+    if (this.languageCode) body["language_code"] = this.languageCode;
+    const response = await fetch(url, {
+      method: "POST",
+      headers: {
+        "xi-api-key": this.apiKey,
+        "Content-Type": "application/json"
+      },
+      body: JSON.stringify(body),
+      signal: AbortSignal.timeout(3e4)
+    });
+    if (!response.ok) {
+      const errBody = await response.text();
+      throw new Error(`ElevenLabs TTS error ${response.status}: ${errBody}`);
+    }
+    if (!response.body) {
+      throw new Error("ElevenLabs TTS: no response body");
+    }
+    const reader = response.body.getReader();
+    try {
+      while (true) {
+        const { done, value } = await reader.read();
+        if (done) break;
+        if (!value || value.length === 0) continue;
+        const buf = Buffer.from(value);
+        for (let offset = 0; offset < buf.length; offset += this.chunkSize) {
+          yield buf.subarray(offset, Math.min(offset + this.chunkSize, buf.length));
+        }
+      }
+    } finally {
+      if (typeof reader.cancel === "function") await reader.cancel().catch(() => {
+      });
+      reader.releaseLock();
+    }
+  }
+};
+// src/providers/cartesia-tts.ts
+init_esm_shims();
+var CARTESIA_BASE_URL = "https://api.cartesia.ai";
+var CARTESIA_API_VERSION = "2025-04-16";
+var CARTESIA_DEFAULT_VOICE_ID = "f786b574-daa5-4673-aa0c-cbe3e8534c02";
+var CartesiaTTSModel = {
+  SONIC_3: "sonic-3",
+  SONIC_2: "sonic-2",
+  SONIC: "sonic"
+};
+var CartesiaTTSContainer = {
+  RAW: "raw",
+  WAV: "wav",
+  MP3: "mp3"
+};
+var CartesiaTTSEncoding = {
+  PCM_S16LE: "pcm_s16le",
+  PCM_F32LE: "pcm_f32le",
+  PCM_MULAW: "pcm_mulaw",
+  PCM_ALAW: "pcm_alaw"
+};
+var CartesiaTTSSampleRate = {
+  HZ_8000: 8e3,
+  HZ_16000: 16e3,
+  HZ_22050: 22050,
+  HZ_24000: 24e3,
+  HZ_44100: 44100
+};
+var CartesiaTTSVoiceMode = {
+  ID: "id",
+  EMBEDDING: "embedding"
+};
+var CartesiaTTS = class _CartesiaTTS {
+  /** Stable pricing/dashboard key — read by stream-handler/metrics. */
+  static providerKey = "cartesia_tts";
+  apiKey;
+  model;
+  voice;
+  language;
+  sampleRate;
+  speed;
+  emotion;
+  volume;
+  baseUrl;
+  apiVersion;
+  constructor(apiKey, opts = {}) {
+    this.apiKey = apiKey;
+    this.model = opts.model ?? CartesiaTTSModel.SONIC_3;
+    this.voice = opts.voice ?? CARTESIA_DEFAULT_VOICE_ID;
+    this.language = opts.language ?? "en";
+    this.sampleRate = opts.sampleRate ?? CartesiaTTSSampleRate.HZ_16000;
+    this.speed = opts.speed;
+    this.emotion = typeof opts.emotion === "string" ? [opts.emotion] : opts.emotion;
+    this.volume = opts.volume;
+    this.baseUrl = opts.baseUrl ?? CARTESIA_BASE_URL;
+    this.apiVersion = opts.apiVersion ?? CARTESIA_API_VERSION;
+  }
+  /**
+   * Construct an instance pre-configured for Twilio Media Streams.
+   *
+   * Sets `sampleRate=8000` so Cartesia emits PCM_S16LE @ 8 kHz directly.
+   * Twilio's media stream uses μ-law @ 8 kHz so the SDK still does the
+   * PCM → μ-law transcode client-side, but the 16 kHz → 8 kHz resample
+   * step is skipped. Saves ~10–30 ms first-byte plus per-frame CPU and
+   * removes a potential aliasing source.
+   */
+  static forTwilio(apiKey, options = {}) {
+    return new _CartesiaTTS(apiKey, {
+      ...options,
+      sampleRate: CartesiaTTSSampleRate.HZ_8000
+    });
+  }
+  /**
+   * Construct an instance pre-configured for Telnyx bidirectional media.
+   *
+   * Sets `sampleRate=16000` to match Telnyx's L16/16000 default codec —
+   * audio flows end-to-end with zero resampling or transcoding. Same as
+   * the bare-constructor default; exists for API symmetry with
+   * {@link CartesiaTTS.forTwilio}.
+   */
+  static forTelnyx(apiKey, options = {}) {
+    return new _CartesiaTTS(apiKey, {
+      ...options,
+      sampleRate: CartesiaTTSSampleRate.HZ_16000
+    });
+  }
+  /** Build the JSON payload for the Cartesia bytes endpoint. */
+  buildPayload(text) {
+    const payload = {
+      model_id: this.model,
+      voice: { mode: CartesiaTTSVoiceMode.ID, id: this.voice },
+      transcript: text,
+      output_format: {
+        container: CartesiaTTSContainer.RAW,
+        encoding: CartesiaTTSEncoding.PCM_S16LE,
+        sample_rate: this.sampleRate
+      },
+      language: this.language
+    };
+    const generationConfig = {};
+    if (this.speed !== void 0) generationConfig.speed = this.speed;
+    if (this.emotion && this.emotion.length > 0)
+      generationConfig.emotion = this.emotion[0];
+    if (this.volume !== void 0) generationConfig.volume = this.volume;
+    if (Object.keys(generationConfig).length > 0) {
+      payload.generation_config = generationConfig;
+    }
+    return payload;
+  }
+  /**
+   * Pre-call HTTP warmup for the Cartesia `/tts/bytes` endpoint.
+   *
+   * Issues a lightweight `GET <baseUrl>/voices` so DNS, TLS, and HTTP/2
+   * are already up by the time the first `synthesizeStream()` POST
+   * lands. Best-effort: 5 s timeout, all exceptions swallowed at
+   * debug level.
+   *
+   * Billing safety: `GET /voices` is a free metadata read on
+   * Cartesia's REST surface (per https://docs.cartesia.ai). It does
+   * not consume synthesis credits. The actual synthesis is billed
+   * only when `POST /tts/bytes` runs with a non-empty `transcript`.
+   *
+   * Note: Cartesia TTS uses the HTTP path (vs the WebSocket variant
+   * Cartesia also exposes) — connection warmup is therefore HTTP-GET
+   * based, not WebSocket pre-handshake. The latency win is smaller
+   * (~50-150 ms vs the ~200-500 ms of a WS prewarm) but still real.
+   */
+  async warmup() {
+    try {
+      await fetch(`${this.baseUrl}/voices`, {
+        method: "GET",
+        headers: {
+          "X-API-Key": this.apiKey,
+          "Cartesia-Version": this.apiVersion
+        },
+        signal: AbortSignal.timeout(5e3)
+      });
+    } catch (err) {
+      getLogger().debug(`Cartesia TTS warmup failed (best-effort): ${String(err)}`);
+    }
+  }
+  /** Synthesize text and return the concatenated audio buffer. */
+  async synthesize(text) {
+    const chunks = [];
+    for await (const chunk of this.synthesizeStream(text)) {
+      chunks.push(chunk);
+    }
+    return Buffer.concat(chunks);
+  }
+  /**
+   * Synthesize text and yield raw PCM_S16LE chunks at the configured
+   * `sampleRate` as they arrive from Cartesia.
+   */
+  async *synthesizeStream(text) {
+    const response = await fetch(`${this.baseUrl}/tts/bytes`, {
+      method: "POST",
+      headers: {
+        "X-API-Key": this.apiKey,
+        "Cartesia-Version": this.apiVersion,
+        "Content-Type": "application/json"
+      },
+      body: JSON.stringify(this.buildPayload(text)),
+      signal: AbortSignal.timeout(3e4)
+    });
+    if (!response.ok) {
+      const body = await response.text();
+      throw new Error(`Cartesia TTS error ${response.status}: ${body}`);
+    }
+    if (!response.body) {
+      throw new Error("Cartesia TTS: no response body");
+    }
+    const reader = response.body.getReader();
+    try {
+      while (true) {
+        const { done, value } = await reader.read();
+        if (done) break;
+        if (value && value.length > 0) {
+          yield Buffer.from(value);
+        }
+      }
+    } finally {
+      if (typeof reader.cancel === "function")
+        await reader.cancel().catch(() => {
+        });
+      reader.releaseLock();
+    }
+  }
+};
+// src/providers/rime-tts.ts
+init_esm_shims();
+var RIME_BASE_URL = "https://users.rime.ai/v1/rime-tts";
+var RimeModel = {
+  ARCANA: "arcana",
+  MIST: "mist",
+  MIST_V2: "mistv2"
+};
+var RimeAudioFormat = {
+  PCM: "audio/pcm",
+  MP3: "audio/mp3",
+  WAV: "audio/wav",
+  MULAW: "audio/mulaw"
+};
+var ARCANA_MODEL_TIMEOUT_MS = 60 * 4 * 1e3;
+var MIST_MODEL_TIMEOUT_MS = 30 * 1e3;
+function isMistModel(model) {
+  return model.includes(RimeModel.MIST);
+}
+function timeoutForModel(model) {
+  if (model === RimeModel.ARCANA) return ARCANA_MODEL_TIMEOUT_MS;
+  return MIST_MODEL_TIMEOUT_MS;
+}
+var RimeTTS = class {
+  /** Stable pricing/dashboard key — read by stream-handler/metrics. */
+  static providerKey = "rime";
+  apiKey;
+  model;
+  speaker;
+  lang;
+  sampleRate;
+  repetitionPenalty;
+  temperature;
+  topP;
+  maxTokens;
+  speedAlpha;
+  reduceLatency;
+  pauseBetweenBrackets;
+  phonemizeBetweenBrackets;
+  baseUrl;
+  totalTimeoutMs;
+  constructor(apiKey, opts = {}) {
+    this.apiKey = apiKey;
+    this.model = opts.model ?? RimeModel.ARCANA;
+    const defaultSpeaker = isMistModel(this.model) ? "cove" : "astra";
+    this.speaker = opts.speaker ?? defaultSpeaker;
+    this.lang = opts.lang ?? "eng";
+    this.sampleRate = opts.sampleRate ?? 16e3;
+    this.repetitionPenalty = opts.repetitionPenalty;
+    this.temperature = opts.temperature;
+    this.topP = opts.topP;
+    this.maxTokens = opts.maxTokens;
+    this.speedAlpha = opts.speedAlpha;
+    this.reduceLatency = opts.reduceLatency;
+    this.pauseBetweenBrackets = opts.pauseBetweenBrackets;
+    this.phonemizeBetweenBrackets = opts.phonemizeBetweenBrackets;
+    this.baseUrl = opts.baseUrl ?? RIME_BASE_URL;
+    this.totalTimeoutMs = timeoutForModel(this.model);
+  }
+  buildPayload(text) {
+    const payload = {
+      speaker: this.speaker,
+      text,
+      modelId: this.model
+    };
+    if (this.model === RimeModel.ARCANA) {
+      if (this.repetitionPenalty !== void 0)
+        payload.repetition_penalty = this.repetitionPenalty;
+      if (this.temperature !== void 0) payload.temperature = this.temperature;
+      if (this.topP !== void 0) payload.top_p = this.topP;
+      if (this.maxTokens !== void 0) payload.max_tokens = this.maxTokens;
+      payload.lang = this.lang;
+      payload.samplingRate = this.sampleRate;
+    } else if (isMistModel(this.model)) {
+      payload.lang = this.lang;
+      payload.samplingRate = this.sampleRate;
+      if (this.speedAlpha !== void 0) payload.speedAlpha = this.speedAlpha;
+      if (this.model === RimeModel.MIST_V2 && this.reduceLatency !== void 0) {
+        payload.reduceLatency = this.reduceLatency;
+      }
+      if (this.pauseBetweenBrackets !== void 0) {
+        payload.pauseBetweenBrackets = this.pauseBetweenBrackets;
+      }
+      if (this.phonemizeBetweenBrackets !== void 0) {
+        payload.phonemizeBetweenBrackets = this.phonemizeBetweenBrackets;
+      }
+    }
+    return payload;
+  }
+  /** Synthesize text and return the concatenated audio buffer. */
+  async synthesize(text) {
+    const chunks = [];
+    for await (const chunk of this.synthesizeStream(text)) {
+      chunks.push(chunk);
+    }
+    return Buffer.concat(chunks);
+  }
+  /**
+   * Synthesize text and yield raw PCM_S16LE chunks at the configured
+   * `sampleRate` as they stream in.
+   */
+  async *synthesizeStream(text) {
+    const response = await fetch(this.baseUrl, {
+      method: "POST",
+      headers: {
+        accept: RimeAudioFormat.PCM,
+        Authorization: `Bearer ${this.apiKey}`,
+        "content-type": "application/json"
+      },
+      body: JSON.stringify(this.buildPayload(text)),
+      signal: AbortSignal.timeout(this.totalTimeoutMs)
+    });
+    if (!response.ok) {
+      const body = await response.text();
+      throw new Error(`Rime TTS error ${response.status}: ${body}`);
+    }
+    const contentType = response.headers.get("content-type") ?? "";
+    if (!contentType.startsWith("audio")) {
+      const body = await response.text();
+      throw new Error(`Rime returned non-audio response: ${body.slice(0, 500)}`);
+    }
+    if (!response.body) {
+      throw new Error("Rime TTS: no response body");
+    }
+    const reader = response.body.getReader();
+    try {
+      while (true) {
+        const { done, value } = await reader.read();
+        if (done) break;
+        if (value && value.length > 0) {
+          yield Buffer.from(value);
+        }
+      }
+    } finally {
+      if (typeof reader.cancel === "function")
+        await reader.cancel().catch(() => {
+        });
+      reader.releaseLock();
+    }
+  }
+};
+// src/stt/deepgram.ts
+init_esm_shims();
+var STT = class extends DeepgramSTT {
+  static providerKey = "deepgram";
+  constructor(opts = {}) {
+    const key = opts.apiKey ?? process.env.DEEPGRAM_API_KEY;
+    if (!key) {
+      throw new Error(
+        "Deepgram STT requires an apiKey. Pass { apiKey: 'dg_...' } or set DEEPGRAM_API_KEY in the environment."
+      );
+    }
+    super(
+      key,
+      opts.language ?? "en",
+      opts.model ?? "nova-3",
+      opts.encoding ?? "linear16",
+      opts.sampleRate ?? 16e3,
+      {
+        endpointingMs: opts.endpointingMs ?? 150,
+        utteranceEndMs: opts.utteranceEndMs === null ? null : opts.utteranceEndMs ?? 1e3,
+        smartFormat: opts.smartFormat ?? true,
+        interimResults: opts.interimResults ?? true,
+        ...opts.vadEvents !== void 0 ? { vadEvents: opts.vadEvents } : {}
+      }
+    );
+  }
+};
+// src/stt/whisper.ts
+init_esm_shims();
+// src/providers/whisper-stt.ts
+init_esm_shims();
+var OPENAI_TRANSCRIPTION_URL = "https://api.openai.com/v1/audio/transcriptions";
+var DEFAULT_BUFFER_SIZE = 16e3 * 2;
+var ALLOWED_MODELS = /* @__PURE__ */ new Set(["whisper-1", "gpt-4o-transcribe", "gpt-4o-mini-transcribe"]);
+function wrapPcmInWav(pcm, sampleRate = 16e3, channels = 1, bitsPerSample = 16) {
+  const dataSize = pcm.length;
+  const header = Buffer.alloc(44);
+  header.write("RIFF", 0);
+  header.writeUInt32LE(36 + dataSize, 4);
+  header.write("WAVE", 8);
+  header.write("fmt ", 12);
+  header.writeUInt32LE(16, 16);
+  header.writeUInt16LE(1, 20);
+  header.writeUInt16LE(channels, 22);
+  header.writeUInt32LE(sampleRate, 24);
+  header.writeUInt32LE(sampleRate * channels * (bitsPerSample / 8), 28);
+  header.writeUInt16LE(channels * (bitsPerSample / 8), 32);
+  header.writeUInt16LE(bitsPerSample, 34);
+  header.write("data", 36);
+  header.writeUInt32LE(dataSize, 40);
+  return Buffer.concat([header, pcm]);
+}
+var WhisperSTT = class _WhisperSTT {
+  /** Stable pricing/dashboard key — read by stream-handler/metrics. */
+  static providerKey = "whisper";
+  apiKey;
+  model;
+  language;
+  bufferSize;
+  responseFormat;
+  // Accumulate chunks in an array and concat once on flush — avoids the
+  // per-``sendAudio`` O(n) ``Buffer.concat([buffer, chunk])`` that quickly
+  // dominates CPU when the phone leg delivers 20 ms frames.
+  chunks = [];
+  bufferedBytes = 0;
+  callbacks = /* @__PURE__ */ new Set();
+  running = false;
+  pendingTranscriptions = [];
+  /**
+   * @param apiKey OpenAI API key.
+   * @param language ISO-639-1 language code (e.g. ``"en"``, ``"it"``). Optional.
+   * @param model One of ``whisper-1``, ``gpt-4o-transcribe``, ``gpt-4o-mini-transcribe``.
+   * @param bufferSize Bytes of PCM16 to buffer before each transcription request.
+   * @param responseFormat ``"json"`` (default) or ``"verbose_json"``.
+   *
+   * Argument order matches the Python SDK's ``WhisperSTT(api_key, language, model, response_format)``
+   * for cross-language parity. Pre-0.5.3 the TS positional order was
+   * ``(apiKey, model, language, bufferSize, responseFormat)`` — callers using
+   * the old order will need to swap ``language`` and ``model``.
+   */
+  constructor(apiKey, language, model = "whisper-1", bufferSize = DEFAULT_BUFFER_SIZE, responseFormat = "json") {
+    if (!ALLOWED_MODELS.has(model)) {
+      throw new Error(
+        `WhisperSTT: unsupported model "${model}". Expected one of ${[...ALLOWED_MODELS].join(", ")}.`
+      );
+    }
+    this.apiKey = apiKey;
+    this.model = model;
+    this.language = language;
+    this.bufferSize = bufferSize;
+    this.responseFormat = responseFormat;
+  }
+  /** Factory for Twilio calls — mulaw 8 kHz is transcoded upstream, so we still receive PCM 16-bit. */
+  static forTwilio(apiKey, language = "en", model = "whisper-1") {
+    return new _WhisperSTT(apiKey, language, model);
+  }
+  /** Reset the audio buffer and arm the adapter for incoming chunks. */
+  async connect() {
+    this.running = true;
+    this.chunks = [];
+    this.bufferedBytes = 0;
   }
   /** Buffer a PCM16 chunk; flushes to Whisper once `bufferSize` bytes are reached. */
   sendAudio(audio) {
@@ -4448,264 +5121,42 @@ var SpeechmaticsSTT = class {
   close() {
     this.running = false;
     const ws = this.ws;
-    if (!ws) return;
-    this.ws = null;
-    const sendSafe = (payload) => {
-      if (ws.readyState === WebSocket5.OPEN) {
-        try {
-          ws.send(payload);
-        } catch {
-        }
-      }
-    };
-    sendSafe(
-      JSON.stringify({ message: "EndOfStream", last_seq_no: this.lastSeqNo })
-    );
-    try {
-      ws.close();
-    } catch {
-    }
-  }
-};
-// src/stt/speechmatics.ts
-var STT7 = class extends SpeechmaticsSTT {
-  static providerKey = "speechmatics";
-  constructor(opts = {}) {
-    const key = opts.apiKey ?? process.env.SPEECHMATICS_API_KEY;
-    if (!key) {
-      throw new Error(
-        "Speechmatics STT requires an apiKey. Pass { apiKey: 'sm_...' } or set SPEECHMATICS_API_KEY in the environment."
-      );
-    }
-    super(key, opts);
-  }
-};
-// src/tts/elevenlabs.ts
-init_esm_shims();
-// src/providers/elevenlabs-tts.ts
-init_esm_shims();
-var ELEVENLABS_BASE_URL = "https://api.elevenlabs.io/v1";
-var ELEVENLABS_VOICE_ID_BY_NAME = {
-  rachel: "21m00Tcm4TlvDq8ikWAM",
-  drew: "29vD33N1CtxCmqQRPOHJ",
-  clyde: "2EiwWnXFnvU5JabPnv8n",
-  paul: "5Q0t7uMcjvnagumLfvZi",
-  domi: "AZnzlk1XvdvUeBnXmlld",
-  dave: "CYw3kZ02Hs0563khs1Fj",
-  fin: "D38z5RcWu1voky8WS1ja",
-  bella: "EXAVITQu4vr4xnSDxMaL",
-  antoni: "ErXwobaYiN019PkySvjV",
-  thomas: "GBv7mTt0atIp3Br8iCZE",
-  charlie: "IKne3meq5aSn9XLyUdCD",
-  george: "JBFqnCBsd6RMkjVDRZzb",
-  emily: "LcfcDJNUP1GQjkzn1xUU",
-  elli: "MF3mGyEYCl7XYWbV9V6O",
-  callum: "N2lVS1w4EtoT3dr4eOWO",
-  patrick: "ODq5zmih8GrVes37Dizd",
-  harry: "SOYHLrjzK2X1ezoPC6cr",
-  liam: "TX3LPaxmHKxFdv7VOQHJ",
-  dorothy: "ThT5KcBeYPX3keUQqHPh",
-  josh: "TxGEqnHWrfWFTfGW9XjX",
-  arnold: "VR6AewLTigWG4xSOukaG",
-  charlotte: "XB0fDUnXU5powFXDhCwa",
-  matilda: "XrExE9yKIg1WjnnlVkGX",
-  matthew: "Yko7PKHZNXotIFUBG7I9",
-  james: "ZQe5CZNOzWyzPSCn5a3c",
-  joseph: "Zlb1dXrM653N07WRdFW3",
-  jeremy: "bVMeCyTHy58xNoL34h3p",
-  michael: "flq6f7yk4E4fJM5XTYuZ",
-  ethan: "g5CIjZEefAph4nQFvHAz",
-  gigi: "jBpfuIE2acCO8z3wKNLl",
-  freya: "jsCqWAovK2LkecY7zXl4",
-  brian: "nPczCjzI2devNBz1zQrb",
-  grace: "oWAxZDx7w5VEj9dCyTzz",
-  daniel: "onwK4e9ZLuTAKqWW03F9",
-  lily: "pFZP5JQG7iQjIQuC4Bku",
-  serena: "pMsXgVXv3BLzUgSXRplE",
-  adam: "pNInz6obpgDQGcFmaJgB",
-  nicole: "piTKgcLEGmPE4e6mEKli",
-  bill: "pqHfZKP75CvOlQylNhV4",
-  jessie: "t0jbNlBVZ17f02VDIeMI",
-  ryan: "wViXBPUzp2ZZixB1xQuM",
-  sam: "yoZ06aMxZJJ28mfd3POQ",
-  glinda: "z9fAnlkpzviPz146aGWa",
-  giovanni: "zcAOhNBS3c14rBihAFp1",
-  mimi: "zrHiDhphv9ZnVXBqCLjz",
-  sarah: "EXAVITQu4vr4xnSDxMaL",
-  alloy: "EXAVITQu4vr4xnSDxMaL"
-};
-var VOICE_ID_PATTERN = /^[A-Za-z0-9]{20}$/;
-function resolveVoiceId(voice) {
-  if (!voice) return voice;
-  if (VOICE_ID_PATTERN.test(voice)) return voice;
-  return ELEVENLABS_VOICE_ID_BY_NAME[voice.toLowerCase()] ?? voice;
-}
-var ElevenLabsModel = {
-  V3: "eleven_v3",
-  FLASH_V2_5: "eleven_flash_v2_5",
-  TURBO_V2_5: "eleven_turbo_v2_5",
-  MULTILINGUAL_V2: "eleven_multilingual_v2",
-  MONOLINGUAL_V1: "eleven_monolingual_v1"
-};
-var ElevenLabsOutputFormat = {
-  MP3_22050_32: "mp3_22050_32",
-  MP3_44100_32: "mp3_44100_32",
-  MP3_44100_64: "mp3_44100_64",
-  MP3_44100_96: "mp3_44100_96",
-  MP3_44100_128: "mp3_44100_128",
-  MP3_44100_192: "mp3_44100_192",
-  PCM_8000: "pcm_8000",
-  PCM_16000: "pcm_16000",
-  PCM_22050: "pcm_22050",
-  PCM_24000: "pcm_24000",
-  PCM_44100: "pcm_44100",
-  ULAW_8000: "ulaw_8000"
-};
-var ElevenLabsTTS = class _ElevenLabsTTS {
-  // Stable pricing/dashboard key — read by stream-handler / metrics via
-  // ``(agent.tts.constructor as any).providerKey``. Without this the cost
-  // calculator falls back to ``constructor.name`` ("ElevenLabsTTS") which
-  // does NOT match the pricing table key "elevenlabs", silently zeroing
-  // TTS cost for callers that construct the raw REST class directly
-  // (exposed at top level as ``ElevenLabsRestTTS``).
-  static providerKey = "elevenlabs";
-  apiKey;
-  voiceId;
-  modelId;
-  outputFormat;
-  voiceSettings;
-  languageCode;
-  chunkSize;
-  constructor(apiKey, voiceIdOrOptions = "21m00Tcm4TlvDq8ikWAM", modelId = ElevenLabsModel.FLASH_V2_5, outputFormat = ElevenLabsOutputFormat.PCM_16000) {
-    this.apiKey = apiKey;
-    if (typeof voiceIdOrOptions === "object") {
-      const o = voiceIdOrOptions;
-      this.voiceId = resolveVoiceId(o.voiceId ?? "21m00Tcm4TlvDq8ikWAM");
-      this.modelId = o.modelId ?? ElevenLabsModel.FLASH_V2_5;
-      this.outputFormat = o.outputFormat ?? ElevenLabsOutputFormat.PCM_16000;
-      this.voiceSettings = o.voiceSettings;
-      this.languageCode = o.languageCode;
-      this.chunkSize = o.chunkSize ?? 4096;
-    } else {
-      this.voiceId = resolveVoiceId(voiceIdOrOptions);
-      this.modelId = modelId;
-      this.outputFormat = outputFormat;
-      this.voiceSettings = void 0;
-      this.languageCode = void 0;
-      this.chunkSize = 4096;
-    }
-  }
-  /**
-   * Construct an instance pre-configured for Twilio Media Streams.
-   *
-   * Sets `outputFormat='ulaw_8000'` so ElevenLabs emits μ-law @ 8 kHz
-   * directly — the exact wire format Twilio's media stream uses — letting
-   * the SDK skip the 16 kHz→8 kHz resample and PCM→μ-law conversion in
-   * `TwilioAudioSender`. Saves ~30–80 ms first-byte and per-frame CPU,
-   * and removes a potential aliasing source.
-   *
-   * `voiceSettings` defaults to a low-bandwidth-friendly profile
-   * (speaker boost off, modest stability) which sounds cleaner at 8 kHz
-   * μ-law than the studio default. Pass an explicit object to override.
-   */
-  static forTwilio(apiKey, options = {}) {
-    const voiceSettings = options.voiceSettings ?? {
-      // Speaker boost adds high-frequency emphasis that aliases ugly over an
-      // 8 kHz μ-law line. Slightly higher stability tames the excursions
-      // that compander quantization noise can amplify.
-      stability: 0.6,
-      similarity_boost: 0.75,
-      use_speaker_boost: false
-    };
-    return new _ElevenLabsTTS(apiKey, {
-      ...options,
-      voiceSettings,
-      outputFormat: ElevenLabsOutputFormat.ULAW_8000
-    });
-  }
-  /**
-   * Construct an instance pre-configured for Telnyx bidirectional media.
-   *
-   * Telnyx's default media-streaming codec is L16 PCM @ 16 kHz, which
-   * matches our default Telnyx handler. We pick `pcm_16000` so the audio
-   * flows end-to-end with zero resampling or transcoding.
-   *
-   * Trade-off: if your Telnyx profile is pinned to PCMU/8000 (μ-law),
-   * construct `ElevenLabsTTS` directly with `outputFormat: 'ulaw_8000'`
-   * — Telnyx supports that natively too.
-   */
-  static forTelnyx(apiKey, options = {}) {
-    return new _ElevenLabsTTS(apiKey, {
-      ...options,
-      outputFormat: ElevenLabsOutputFormat.PCM_16000
-    });
-  }
-  /**
-   * Synthesise text to speech and return the full audio as a single Buffer.
-   *
-   * For large chunks (or when latency matters) call `synthesizeStream` instead.
-   */
-  async synthesize(text) {
-    const chunks = [];
-    for await (const chunk of this.synthesizeStream(text)) {
-      chunks.push(chunk);
-    }
-    return Buffer.concat(chunks);
-  }
-  /**
-   * Synthesise text and yield audio chunks as they arrive (streaming).
-   *
-   * The yielded buffers are raw PCM at 16 kHz (or whatever `outputFormat` is
-   * configured to). `chunkSize` controls the maximum yield size — 512 is a
-   * good choice for low-latency telephony.
-   */
-  async *synthesizeStream(text) {
-    const url = `${ELEVENLABS_BASE_URL}/text-to-speech/${encodeURIComponent(this.voiceId)}/stream?output_format=${encodeURIComponent(this.outputFormat)}`;
-    const body = {
-      text,
-      model_id: this.modelId
-    };
-    if (this.voiceSettings) body["voice_settings"] = this.voiceSettings;
-    if (this.languageCode) body["language_code"] = this.languageCode;
-    const response = await fetch(url, {
-      method: "POST",
-      headers: {
-        "xi-api-key": this.apiKey,
-        "Content-Type": "application/json"
-      },
-      body: JSON.stringify(body),
-      signal: AbortSignal.timeout(3e4)
-    });
-    if (!response.ok) {
-      const errBody = await response.text();
-      throw new Error(`ElevenLabs TTS error ${response.status}: ${errBody}`);
-    }
-    if (!response.body) {
-      throw new Error("ElevenLabs TTS: no response body");
-    }
-    const reader = response.body.getReader();
-    try {
-      while (true) {
-        const { done, value } = await reader.read();
-        if (done) break;
-        if (!value || value.length === 0) continue;
-        const buf = Buffer.from(value);
-        for (let offset = 0; offset < buf.length; offset += this.chunkSize) {
-          yield buf.subarray(offset, Math.min(offset + this.chunkSize, buf.length));
+    if (!ws) return;
+    this.ws = null;
+    const sendSafe = (payload) => {
+      if (ws.readyState === WebSocket5.OPEN) {
+        try {
+          ws.send(payload);
+        } catch {
         }
       }
-    } finally {
-      if (typeof reader.cancel === "function") await reader.cancel().catch(() => {
-      });
-      reader.releaseLock();
+    };
+    sendSafe(
+      JSON.stringify({ message: "EndOfStream", last_seq_no: this.lastSeqNo })
+    );
+    try {
+      ws.close();
+    } catch {
+    }
+  }
+};
+// src/stt/speechmatics.ts
+var STT7 = class extends SpeechmaticsSTT {
+  static providerKey = "speechmatics";
+  constructor(opts = {}) {
+    const key = opts.apiKey ?? process.env.SPEECHMATICS_API_KEY;
+    if (!key) {
+      throw new Error(
+        "Speechmatics STT requires an apiKey. Pass { apiKey: 'sm_...' } or set SPEECHMATICS_API_KEY in the environment."
+      );
     }
+    super(key, opts);
   }
 };
 // src/tts/elevenlabs.ts
+init_esm_shims();
 function resolveApiKey(apiKey) {
   const key = apiKey ?? process.env.ELEVENLABS_API_KEY;
   if (!key) {
@@ -4721,7 +5172,7 @@ var TTS = class _TTS extends ElevenLabsTTS {
     super(resolveApiKey(opts.apiKey), {
       voiceId: opts.voiceId ?? "EXAVITQu4vr4xnSDxMaL",
       modelId: opts.modelId ?? "eleven_flash_v2_5",
-      outputFormat: opts.outputFormat ?? "pcm_16000",
+      ...opts.outputFormat !== void 0 ? { outputFormat: opts.outputFormat } : {},
       languageCode: opts.languageCode,
       voiceSettings: opts.voiceSettings
     });
@@ -4792,6 +5243,20 @@ var ElevenLabsWebSocketTTS = class _ElevenLabsWebSocketTTS {
    * changes.
    */
   adoptedConnection = null;
+  /**
+   * Active WS for the in-flight ``synthesizeStream`` call, if any. Set
+   * when a stream starts, cleared in its ``finally`` block. The
+   * stream-handler calls ``cancelActiveStream()`` from ``cancelSpeaking``
+   * to unblock the generator's inner ``await Promise<frame>`` — without
+   * it, a barge-in on the firstMessage live path leaves the for-await
+   * stuck waiting for the next frame; ElevenLabs never sends
+   * ``isFinal=true`` after the consumer breaks, the 30 s frame timeout
+   * fires post-call, and meanwhile ``initPipeline`` never returns so
+   * the STT ``onTranscript`` callback never registers and subsequent
+   * user turns are silently dropped (root cause of the 2026-05-20
+   * "first message OK, then no response" symptom).
+   */
+  activeStreamWs = null;
   /**
    * The wire format requested over the ElevenLabs WS. Initially set from
    * the constructor; ``setTelephonyCarrier`` may auto-flip it to the
@@ -4840,6 +5305,32 @@ var ElevenLabsWebSocketTTS = class _ElevenLabsWebSocketTTS {
     if (!native) return;
     this._outputFormat = native;
   }
+  /**
+   * Force-close the WebSocket of any in-flight ``synthesizeStream`` call.
+   * Called by the stream-handler from ``cancelSpeaking`` (barge-in) so
+   * the generator's inner ``await Promise<frame>`` loop unblocks cleanly
+   * via the ``onClose`` handler — instead of waiting up to 30 s for the
+   * ``FRAME_TIMEOUT_MS`` watchdog to fire. No-op when no stream is in
+   * flight or when the WS is already closing.
+   *
+   * Without this, a barge-in during the firstMessage live path left the
+   * for-await stuck (ElevenLabs never sends ``isFinal=true`` after the
+   * consumer breaks), ``initPipeline`` never returned, the STT
+   * ``onTranscript`` callback never registered, and the entire remainder
+   * of the call was silent for the user. Surfaced during the 2026-05-20
+   * acceptance run.
+   */
+  cancelActiveStream() {
+    const ws = this.activeStreamWs;
+    if (!ws) return;
+    this.activeStreamWs = null;
+    try {
+      if (ws.readyState === WebSocket6.OPEN || ws.readyState === WebSocket6.CONNECTING) {
+        ws.close();
+      }
+    } catch {
+    }
+  }
   /** Pre-configured for Twilio Media Streams (`ulaw_8000`). */
   static forTwilio(opts) {
     return new _ElevenLabsWebSocketTTS({
@@ -4925,6 +5416,7 @@ var ElevenLabsWebSocketTTS = class _ElevenLabsWebSocketTTS {
         headers: { "xi-api-key": this.apiKey }
       });
     }
+    this.activeStreamWs = ws;
     const queue = [];
     let done = false;
     let pendingError = null;
@@ -5045,6 +5537,7 @@ var ElevenLabsWebSocketTTS = class _ElevenLabsWebSocketTTS {
       }
     } finally {
       if (connectTimer) clearTimeout(connectTimer);
+      if (this.activeStreamWs === ws) this.activeStreamWs = null;
       try {
         if (ws.readyState === WebSocket6.OPEN) {
           ws.send(JSON.stringify({ text: "" }));
@@ -5217,9 +5710,9 @@ function buildOpts(opts) {
   const out = {
     apiKey: resolveApiKey2(opts.apiKey),
     modelId: opts.modelId ?? "eleven_flash_v2_5",
-    outputFormat: opts.outputFormat ?? "pcm_16000",
     autoMode: opts.autoMode ?? true
   };
+  if (opts.outputFormat !== void 0) out.outputFormat = opts.outputFormat;
   if (opts.voiceId !== void 0) out.voiceId = opts.voiceId;
   if (opts.voiceSettings !== void 0) out.voiceSettings = opts.voiceSettings;
   if (opts.languageCode !== void 0) out.languageCode = opts.languageCode;
@@ -5396,268 +5889,77 @@ var OpenAITTS = class _OpenAITTS {
       if (lpf) {
         y = lpfAlpha * x + (1 - lpfAlpha) * y;
         let s = Math.round(y);
-        if (s > 32767) s = 32767;
-        else if (s < -32768) s = -32768;
-        samples.push(s);
-      } else {
-        samples.push(x);
-      }
-    }
-    if (lpf) ctx.lpfPrev = y;
-    const out = [];
-    let i = 0;
-    if (direct8k) {
-      while (i + 2 < samples.length) {
-        out.push(samples[i]);
-        i += 3;
-      }
-    } else {
-      while (i + 2 < samples.length) {
-        out.push(samples[i]);
-        out.push(Math.round((samples[i + 1] + samples[i + 2]) / 2));
-        i += 3;
-      }
-    }
-    ctx.leftover = samples.slice(i);
-    const buffer = Buffer.alloc(out.length * 2);
-    for (let j = 0; j < out.length; j++) {
-      buffer.writeInt16LE(out[j], j * 2);
-    }
-    return buffer;
-  }
-  /** @deprecated use {@link resampleStreaming} with persistent state. */
-  static resample24kTo16k(audio) {
-    const ctx = {
-      carryByte: null,
-      leftover: [],
-      lpfPrev: 0,
-      lpfEnabled: false,
-      targetSampleRate: 16e3
-    };
-    const out = _OpenAITTS.resampleStreaming(audio, ctx);
-    if (ctx.leftover.length === 0) return out;
-    const tail = Buffer.alloc(ctx.leftover.length * 2);
-    for (let i = 0; i < ctx.leftover.length; i++) {
-      tail.writeInt16LE(ctx.leftover[i], i * 2);
-    }
-    return Buffer.concat([out, tail]);
-  }
-};
-// src/tts/openai.ts
-var TTS3 = class extends OpenAITTS {
-  static providerKey = "openai_tts";
-  constructor(opts = {}) {
-    const key = opts.apiKey ?? process.env.OPENAI_API_KEY;
-    if (!key) {
-      throw new Error(
-        "OpenAI TTS requires an apiKey. Pass { apiKey: 'sk-...' } or set OPENAI_API_KEY in the environment."
-      );
-    }
-    super(
-      key,
-      opts.voice ?? "alloy",
-      opts.model ?? "gpt-4o-mini-tts",
-      opts.instructions ?? null,
-      opts.speed ?? null,
-      opts.antiAlias ?? false
-    );
-  }
-};
-// src/tts/cartesia.ts
-init_esm_shims();
-// src/providers/cartesia-tts.ts
-init_esm_shims();
-var CARTESIA_BASE_URL = "https://api.cartesia.ai";
-var CARTESIA_API_VERSION = "2025-04-16";
-var CARTESIA_DEFAULT_VOICE_ID = "f786b574-daa5-4673-aa0c-cbe3e8534c02";
-var CartesiaTTSModel = {
-  SONIC_3: "sonic-3",
-  SONIC_2: "sonic-2",
-  SONIC: "sonic"
-};
-var CartesiaTTSContainer = {
-  RAW: "raw",
-  WAV: "wav",
-  MP3: "mp3"
-};
-var CartesiaTTSEncoding = {
-  PCM_S16LE: "pcm_s16le",
-  PCM_F32LE: "pcm_f32le",
-  PCM_MULAW: "pcm_mulaw",
-  PCM_ALAW: "pcm_alaw"
-};
-var CartesiaTTSSampleRate = {
-  HZ_8000: 8e3,
-  HZ_16000: 16e3,
-  HZ_22050: 22050,
-  HZ_24000: 24e3,
-  HZ_44100: 44100
-};
-var CartesiaTTSVoiceMode = {
-  ID: "id",
-  EMBEDDING: "embedding"
-};
-var CartesiaTTS = class _CartesiaTTS {
-  /** Stable pricing/dashboard key — read by stream-handler/metrics. */
-  static providerKey = "cartesia_tts";
-  apiKey;
-  model;
-  voice;
-  language;
-  sampleRate;
-  speed;
-  emotion;
-  volume;
-  baseUrl;
-  apiVersion;
-  constructor(apiKey, opts = {}) {
-    this.apiKey = apiKey;
-    this.model = opts.model ?? CartesiaTTSModel.SONIC_3;
-    this.voice = opts.voice ?? CARTESIA_DEFAULT_VOICE_ID;
-    this.language = opts.language ?? "en";
-    this.sampleRate = opts.sampleRate ?? CartesiaTTSSampleRate.HZ_16000;
-    this.speed = opts.speed;
-    this.emotion = typeof opts.emotion === "string" ? [opts.emotion] : opts.emotion;
-    this.volume = opts.volume;
-    this.baseUrl = opts.baseUrl ?? CARTESIA_BASE_URL;
-    this.apiVersion = opts.apiVersion ?? CARTESIA_API_VERSION;
-  }
-  /**
-   * Construct an instance pre-configured for Twilio Media Streams.
-   *
-   * Sets `sampleRate=8000` so Cartesia emits PCM_S16LE @ 8 kHz directly.
-   * Twilio's media stream uses μ-law @ 8 kHz so the SDK still does the
-   * PCM → μ-law transcode client-side, but the 16 kHz → 8 kHz resample
-   * step is skipped. Saves ~10–30 ms first-byte plus per-frame CPU and
-   * removes a potential aliasing source.
-   */
-  static forTwilio(apiKey, options = {}) {
-    return new _CartesiaTTS(apiKey, {
-      ...options,
-      sampleRate: CartesiaTTSSampleRate.HZ_8000
-    });
-  }
-  /**
-   * Construct an instance pre-configured for Telnyx bidirectional media.
-   *
-   * Sets `sampleRate=16000` to match Telnyx's L16/16000 default codec —
-   * audio flows end-to-end with zero resampling or transcoding. Same as
-   * the bare-constructor default; exists for API symmetry with
-   * {@link CartesiaTTS.forTwilio}.
-   */
-  static forTelnyx(apiKey, options = {}) {
-    return new _CartesiaTTS(apiKey, {
-      ...options,
-      sampleRate: CartesiaTTSSampleRate.HZ_16000
-    });
-  }
-  /** Build the JSON payload for the Cartesia bytes endpoint. */
-  buildPayload(text) {
-    const payload = {
-      model_id: this.model,
-      voice: { mode: CartesiaTTSVoiceMode.ID, id: this.voice },
-      transcript: text,
-      output_format: {
-        container: CartesiaTTSContainer.RAW,
-        encoding: CartesiaTTSEncoding.PCM_S16LE,
-        sample_rate: this.sampleRate
-      },
-      language: this.language
-    };
-    const generationConfig = {};
-    if (this.speed !== void 0) generationConfig.speed = this.speed;
-    if (this.emotion && this.emotion.length > 0)
-      generationConfig.emotion = this.emotion[0];
-    if (this.volume !== void 0) generationConfig.volume = this.volume;
-    if (Object.keys(generationConfig).length > 0) {
-      payload.generation_config = generationConfig;
-    }
-    return payload;
-  }
-  /**
-   * Pre-call HTTP warmup for the Cartesia `/tts/bytes` endpoint.
-   *
-   * Issues a lightweight `GET <baseUrl>/voices` so DNS, TLS, and HTTP/2
-   * are already up by the time the first `synthesizeStream()` POST
-   * lands. Best-effort: 5 s timeout, all exceptions swallowed at
-   * debug level.
-   *
-   * Billing safety: `GET /voices` is a free metadata read on
-   * Cartesia's REST surface (per https://docs.cartesia.ai). It does
-   * not consume synthesis credits. The actual synthesis is billed
-   * only when `POST /tts/bytes` runs with a non-empty `transcript`.
-   *
-   * Note: Cartesia TTS uses the HTTP path (vs the WebSocket variant
-   * Cartesia also exposes) — connection warmup is therefore HTTP-GET
-   * based, not WebSocket pre-handshake. The latency win is smaller
-   * (~50-150 ms vs the ~200-500 ms of a WS prewarm) but still real.
-   */
-  async warmup() {
-    try {
-      await fetch(`${this.baseUrl}/voices`, {
-        method: "GET",
-        headers: {
-          "X-API-Key": this.apiKey,
-          "Cartesia-Version": this.apiVersion
-        },
-        signal: AbortSignal.timeout(5e3)
-      });
-    } catch (err) {
-      getLogger().debug(`Cartesia TTS warmup failed (best-effort): ${String(err)}`);
+        if (s > 32767) s = 32767;
+        else if (s < -32768) s = -32768;
+        samples.push(s);
+      } else {
+        samples.push(x);
+      }
     }
-  }
-  /** Synthesize text and return the concatenated audio buffer. */
-  async synthesize(text) {
-    const chunks = [];
-    for await (const chunk of this.synthesizeStream(text)) {
-      chunks.push(chunk);
+    if (lpf) ctx.lpfPrev = y;
+    const out = [];
+    let i = 0;
+    if (direct8k) {
+      while (i + 2 < samples.length) {
+        out.push(samples[i]);
+        i += 3;
+      }
+    } else {
+      while (i + 2 < samples.length) {
+        out.push(samples[i]);
+        out.push(Math.round((samples[i + 1] + samples[i + 2]) / 2));
+        i += 3;
+      }
     }
-    return Buffer.concat(chunks);
-  }
-  /**
-   * Synthesize text and yield raw PCM_S16LE chunks at the configured
-   * `sampleRate` as they arrive from Cartesia.
-   */
-  async *synthesizeStream(text) {
-    const response = await fetch(`${this.baseUrl}/tts/bytes`, {
-      method: "POST",
-      headers: {
-        "X-API-Key": this.apiKey,
-        "Cartesia-Version": this.apiVersion,
-        "Content-Type": "application/json"
-      },
-      body: JSON.stringify(this.buildPayload(text)),
-      signal: AbortSignal.timeout(3e4)
-    });
-    if (!response.ok) {
-      const body = await response.text();
-      throw new Error(`Cartesia TTS error ${response.status}: ${body}`);
+    ctx.leftover = samples.slice(i);
+    const buffer = Buffer.alloc(out.length * 2);
+    for (let j = 0; j < out.length; j++) {
+      buffer.writeInt16LE(out[j], j * 2);
     }
-    if (!response.body) {
-      throw new Error("Cartesia TTS: no response body");
+    return buffer;
+  }
+  /** @deprecated use {@link resampleStreaming} with persistent state. */
+  static resample24kTo16k(audio) {
+    const ctx = {
+      carryByte: null,
+      leftover: [],
+      lpfPrev: 0,
+      lpfEnabled: false,
+      targetSampleRate: 16e3
+    };
+    const out = _OpenAITTS.resampleStreaming(audio, ctx);
+    if (ctx.leftover.length === 0) return out;
+    const tail = Buffer.alloc(ctx.leftover.length * 2);
+    for (let i = 0; i < ctx.leftover.length; i++) {
+      tail.writeInt16LE(ctx.leftover[i], i * 2);
     }
-    const reader = response.body.getReader();
-    try {
-      while (true) {
-        const { done, value } = await reader.read();
-        if (done) break;
-        if (value && value.length > 0) {
-          yield Buffer.from(value);
-        }
-      }
-    } finally {
-      if (typeof reader.cancel === "function")
-        await reader.cancel().catch(() => {
-        });
-      reader.releaseLock();
+    return Buffer.concat([out, tail]);
+  }
+};
+// src/tts/openai.ts
+var TTS3 = class extends OpenAITTS {
+  static providerKey = "openai_tts";
+  constructor(opts = {}) {
+    const key = opts.apiKey ?? process.env.OPENAI_API_KEY;
+    if (!key) {
+      throw new Error(
+        "OpenAI TTS requires an apiKey. Pass { apiKey: 'sk-...' } or set OPENAI_API_KEY in the environment."
+      );
     }
+    super(
+      key,
+      opts.voice ?? "alloy",
+      opts.model ?? "gpt-4o-mini-tts",
+      opts.instructions ?? null,
+      opts.speed ?? null,
+      opts.antiAlias ?? false
+    );
   }
 };
 // src/tts/cartesia.ts
+init_esm_shims();
 function resolveApiKey3(apiKey) {
   const key = apiKey ?? process.env.CARTESIA_API_KEY;
   if (!key) {
@@ -5687,150 +5989,6 @@ var TTS4 = class _TTS extends CartesiaTTS {
 // src/tts/rime.ts
 init_esm_shims();
-// src/providers/rime-tts.ts
-init_esm_shims();
-var RIME_BASE_URL = "https://users.rime.ai/v1/rime-tts";
-var RimeModel = {
-  ARCANA: "arcana",
-  MIST: "mist",
-  MIST_V2: "mistv2"
-};
-var RimeAudioFormat = {
-  PCM: "audio/pcm",
-  MP3: "audio/mp3",
-  WAV: "audio/wav",
-  MULAW: "audio/mulaw"
-};
-var ARCANA_MODEL_TIMEOUT_MS = 60 * 4 * 1e3;
-var MIST_MODEL_TIMEOUT_MS = 30 * 1e3;
-function isMistModel(model) {
-  return model.includes(RimeModel.MIST);
-}
-function timeoutForModel(model) {
-  if (model === RimeModel.ARCANA) return ARCANA_MODEL_TIMEOUT_MS;
-  return MIST_MODEL_TIMEOUT_MS;
-}
-var RimeTTS = class {
-  /** Stable pricing/dashboard key — read by stream-handler/metrics. */
-  static providerKey = "rime";
-  apiKey;
-  model;
-  speaker;
-  lang;
-  sampleRate;
-  repetitionPenalty;
-  temperature;
-  topP;
-  maxTokens;
-  speedAlpha;
-  reduceLatency;
-  pauseBetweenBrackets;
-  phonemizeBetweenBrackets;
-  baseUrl;
-  totalTimeoutMs;
-  constructor(apiKey, opts = {}) {
-    this.apiKey = apiKey;
-    this.model = opts.model ?? RimeModel.ARCANA;
-    const defaultSpeaker = isMistModel(this.model) ? "cove" : "astra";
-    this.speaker = opts.speaker ?? defaultSpeaker;
-    this.lang = opts.lang ?? "eng";
-    this.sampleRate = opts.sampleRate ?? 16e3;
-    this.repetitionPenalty = opts.repetitionPenalty;
-    this.temperature = opts.temperature;
-    this.topP = opts.topP;
-    this.maxTokens = opts.maxTokens;
-    this.speedAlpha = opts.speedAlpha;
-    this.reduceLatency = opts.reduceLatency;
-    this.pauseBetweenBrackets = opts.pauseBetweenBrackets;
-    this.phonemizeBetweenBrackets = opts.phonemizeBetweenBrackets;
-    this.baseUrl = opts.baseUrl ?? RIME_BASE_URL;
-    this.totalTimeoutMs = timeoutForModel(this.model);
-  }
-  buildPayload(text) {
-    const payload = {
-      speaker: this.speaker,
-      text,
-      modelId: this.model
-    };
-    if (this.model === RimeModel.ARCANA) {
-      if (this.repetitionPenalty !== void 0)
-        payload.repetition_penalty = this.repetitionPenalty;
-      if (this.temperature !== void 0) payload.temperature = this.temperature;
-      if (this.topP !== void 0) payload.top_p = this.topP;
-      if (this.maxTokens !== void 0) payload.max_tokens = this.maxTokens;
-      payload.lang = this.lang;
-      payload.samplingRate = this.sampleRate;
-    } else if (isMistModel(this.model)) {
-      payload.lang = this.lang;
-      payload.samplingRate = this.sampleRate;
-      if (this.speedAlpha !== void 0) payload.speedAlpha = this.speedAlpha;
-      if (this.model === RimeModel.MIST_V2 && this.reduceLatency !== void 0) {
-        payload.reduceLatency = this.reduceLatency;
-      }
-      if (this.pauseBetweenBrackets !== void 0) {
-        payload.pauseBetweenBrackets = this.pauseBetweenBrackets;
-      }
-      if (this.phonemizeBetweenBrackets !== void 0) {
-        payload.phonemizeBetweenBrackets = this.phonemizeBetweenBrackets;
-      }
-    }
-    return payload;
-  }
-  /** Synthesize text and return the concatenated audio buffer. */
-  async synthesize(text) {
-    const chunks = [];
-    for await (const chunk of this.synthesizeStream(text)) {
-      chunks.push(chunk);
-    }
-    return Buffer.concat(chunks);
-  }
-  /**
-   * Synthesize text and yield raw PCM_S16LE chunks at the configured
-   * `sampleRate` as they stream in.
-   */
-  async *synthesizeStream(text) {
-    const response = await fetch(this.baseUrl, {
-      method: "POST",
-      headers: {
-        accept: RimeAudioFormat.PCM,
-        Authorization: `Bearer ${this.apiKey}`,
-        "content-type": "application/json"
-      },
-      body: JSON.stringify(this.buildPayload(text)),
-      signal: AbortSignal.timeout(this.totalTimeoutMs)
-    });
-    if (!response.ok) {
-      const body = await response.text();
-      throw new Error(`Rime TTS error ${response.status}: ${body}`);
-    }
-    const contentType = response.headers.get("content-type") ?? "";
-    if (!contentType.startsWith("audio")) {
-      const body = await response.text();
-      throw new Error(`Rime returned non-audio response: ${body.slice(0, 500)}`);
-    }
-    if (!response.body) {
-      throw new Error("Rime TTS: no response body");
-    }
-    const reader = response.body.getReader();
-    try {
-      while (true) {
-        const { done, value } = await reader.read();
-        if (done) break;
-        if (value && value.length > 0) {
-          yield Buffer.from(value);
-        }
-      }
-    } finally {
-      if (typeof reader.cancel === "function")
-        await reader.cancel().catch(() => {
-        });
-      reader.releaseLock();
-    }
-  }
-};
-// src/tts/rime.ts
 var TTS5 = class extends RimeTTS {
   static providerKey = "rime";
   constructor(opts = {}) {
@@ -6469,12 +6627,6 @@ init_esm_shims();
 // src/providers/groq-llm.ts
 init_esm_shims();
-// src/version.ts
-init_esm_shims();
-var VERSION = "0.5.5";
-// src/providers/groq-llm.ts
 var GROQ_BASE_URL = "https://api.groq.com/openai/v1";
 var GroqModel = {
   LLAMA_3_3_70B_VERSATILE: "llama-3.3-70b-versatile",
@@ -8131,12 +8283,28 @@ var TwilioAdapter = class _TwilioAdapter {
     return { callSid: call.sid };
   }
   /**
-   * Build a minimal ``<Response><Connect><Stream url="..."/></Connect></Response>``
-   * TwiML document. Mirrors the Python adapter's ``generate_stream_twiml``.
+   * Build a ``<Response><Connect><Stream url="...">`` TwiML document.
+   *
+   * ``parameters`` is forwarded as ``<Parameter name="..." value="..."/>``
+   * children of ``<Stream>``. Twilio Media Streams strips query-string params
+   * from the ``<Stream url=...>`` before the WS handshake, so
+   * ``<Parameter>`` tags are the supported way to pre-populate
+   * ``start.customParameters`` on the WS ``start`` frame. Used by the
+   * inbound path to carry caller / callee through to the bridge.
+   *
+   * Mirrors the Python adapter's ``generate_stream_twiml``.
    */
-  static generateStreamTwiml(streamUrl) {
-    const escaped = streamUrl.replace(/&/g, "&amp;").replace(/</g, "&lt;").replace(/>/g, "&gt;").replace(/"/g, "&quot;").replace(/'/g, "&apos;");
-    return `<?xml version="1.0" encoding="UTF-8"?><Response><Connect><Stream url="${escaped}"/></Connect></Response>`;
+  static generateStreamTwiml(streamUrl, parameters) {
+    const esc = (s) => s.replace(/&/g, "&amp;").replace(/</g, "&lt;").replace(/>/g, "&gt;").replace(/"/g, "&quot;").replace(/'/g, "&apos;");
+    const escapedUrl = esc(streamUrl);
+    let paramTags = "";
+    if (parameters) {
+      for (const [name, value] of Object.entries(parameters)) {
+        if (value == null) continue;
+        paramTags += `<Parameter name="${esc(name)}" value="${esc(String(value))}"/>`;
+      }
+    }
+    return `<?xml version="1.0" encoding="UTF-8"?><Response><Connect><Stream url="${escapedUrl}">${paramTags}</Stream></Connect></Response>`;
   }
   /** Force-complete an in-progress call. */
   async endCall(callSid) {
@@ -8529,6 +8697,8 @@ export {
   CallMetricsAccumulator,
   STT4 as CartesiaSTT,
   TTS4 as CartesiaTTS,
+  CartesiaTTSModel,
+  CartesiaTTSVoiceMode,
   LLM4 as CerebrasLLM,
   ChatContext,
   CloudflareTunnel,
@@ -8536,10 +8706,13 @@ export {
   DEFAULT_PRICING,
   DTMF_EVENTS,
   DeepFilterNetFilter,
+  DeepgramModel,
   STT as DeepgramSTT,
   DefaultToolExecutor,
   ConvAI as ElevenLabsConvAI,
   ElevenLabsConvAIAdapter,
+  ElevenLabsModel,
+  ElevenLabsOutputFormat,
   ElevenLabsTTS as ElevenLabsRestTTS,
   TTS as ElevenLabsTTS,
   TTS2 as ElevenLabsWebSocketTTS,
@@ -8568,8 +8741,15 @@ export {
   Realtime2 as OpenAIRealtime2,
   OpenAIRealtime2Adapter,
   OpenAIRealtimeAdapter,
+  OpenAIRealtimeAudioFormat,
+  OpenAIRealtimeModel,
+  OpenAIRealtimeVADType,
   TTS3 as OpenAITTS,
   STT3 as OpenAITranscribeSTT,
+  OpenAITranscriptionModel,
+  OpenAIVoice,
+  PRICING_LAST_UPDATED,
+  PRICING_VERSION,
   PartialStreamError,
   Patter,
   PatterConnectionError,
@@ -8577,9 +8757,12 @@ export {
   PatterTool,
   PcmCarry,
   PipelineHookExecutor,
+  PricingUnit,
   ProvisionError,
   RateLimitError,
   RemoteMessageHandler,
+  RimeAudioFormat,
+  RimeModel,
   TTS5 as RimeTTS,
   SPAN_BARGEIN,
   SPAN_CALL,