npm - getpatter - Versions diffs - 0.6.0 → 0.6.1 - Mend

getpatter 0.6.0 → 0.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

package/dist/barge-in-strategies-X6ARMGIQ.mjs +12 -0
package/dist/chunk-D4424JZR.mjs +71 -0
package/dist/{chunk-X3364LSI.mjs → chunk-RV7APPYE.mjs} +36 -2
package/dist/{chunk-JUQ5WQTQ.mjs → chunk-TEW3NAZJ.mjs} +3244 -1674
package/dist/cli.js +277 -24
package/dist/dashboard/ui.html +13 -13
package/dist/index.d.mts +1525 -364
package/dist/index.d.ts +1525 -364
package/dist/index.js +3921 -986
package/dist/index.mjs +1310 -70
package/dist/{silero-vad-YLCXT5GQ.mjs → silero-vad-NSEXI4XS.mjs} +1 -1
package/dist/{test-mode-Y7YG5LFZ.mjs → test-mode-WEKKNBLD.mjs} +1 -1
package/package.json +1 -1
package/src/dashboard/ui.html +13 -13

package/dist/index.d.ts CHANGED Viewed

@@ -1,3 +1,5 @@
+import * as WebSocket from 'ws';
+import WebSocket__default from 'ws';
 import { EventEmitter } from 'events';
 import { Request, Response, NextFunction, Express } from 'express';
@@ -104,6 +106,61 @@ declare class Realtime {
     constructor(opts?: RealtimeOptions);
 }
+/**
+ * OpenAI Realtime 2 engine — marker class for Patter client dispatch.
+ *
+ * Wraps `gpt-realtime-2` (GA Realtime API). Separate marker from
+ * {@link import('./openai').Realtime} because the GA endpoint speaks a
+ * different `session.update` wire shape; the client dispatches to
+ * `OpenAIRealtime2Adapter` when this marker is passed.
+ */
+/** Constructor options for the OpenAI `Realtime2` engine marker. */
+interface Realtime2Options {
+    /** API key. Falls back to OPENAI_API_KEY env var when omitted. */
+    apiKey?: string;
+    /** GA Realtime model. Defaults to `gpt-realtime-2`. */
+    model?: string;
+    /** Voice preset. Defaults to alloy. */
+    voice?: string;
+    /**
+     * Reasoning-effort tier. When omitted the field is not sent and the
+     * server default applies. OpenAI recommends `"low"` for production
+     * voice flows — higher tiers add measurable per-turn latency.
+     */
+    reasoningEffort?: 'minimal' | 'low' | 'medium' | 'high';
+    /**
+     * Override for `audio.input.transcription.model`. Omit to keep the
+     * adapter default (`whisper-1`). Use `"gpt-realtime-whisper"` for
+     * low-latency transcript partials.
+     */
+    inputAudioTranscriptionModel?: string;
+}
+/**
+ * OpenAI Realtime 2 engine marker — selects `gpt-realtime-2` on the GA
+ * Realtime API.
+ *
+ * @example
+ * ```ts
+ * import { Patter, Twilio, OpenAIRealtime2 } from "getpatter";
+ *
+ * const phone = new Patter({ carrier: new Twilio(), phoneNumber: "+1..." });
+ * const agent = phone.agent({
+ *   engine: new OpenAIRealtime2({ reasoningEffort: "low" }),
+ *   systemPrompt: "You are a friendly receptionist.",
+ *   firstMessage: "Hello! How can I help?",
+ * });
+ * ```
+ */
+declare class Realtime2 {
+    readonly kind: "openai_realtime_2";
+    readonly apiKey: string;
+    readonly model: string;
+    readonly voice: string;
+    readonly reasoningEffort?: 'minimal' | 'low' | 'medium' | 'high';
+    readonly inputAudioTranscriptionModel?: string;
+    constructor(opts?: Realtime2Options);
+}
 /** ElevenLabs ConvAI engine — marker class for Patter client dispatch. */
 /** Constructor options for the ElevenLabs `ConvAI` engine marker. */
 interface ConvAIOptions {
@@ -273,71 +330,6 @@ declare class Tool implements ToolDefinition {
 /** Factory helper mirroring Python's `tool(...)` function. */
 declare function tool(opts: ToolOptions): Tool;
-/**
- * Shared STT / TTS adapter dispatch.
- *
- * In v0.5.0+ callers always pass pre-instantiated adapters (``agent.stt`` /
- * ``agent.tts`` are ``STTAdapter`` / ``TTSAdapter`` instances), so these
- * helpers are thin pass-throughs that return the instance or null. Kept as
- * functions so the Twilio/Telnyx bridges have a single dispatch point.
- */
-/** Per-word timings / metadata (Deepgram-shaped). Optional on every adapter. */
-interface STTWord {
-    readonly word?: string;
-    readonly start?: number;
-    readonly end?: number;
-    readonly confidence?: number;
-    readonly punctuated_word?: string;
-    readonly speaker?: number;
-}
-/**
- * Facade transcript shape — widened to surface richer provider fields
- * (Deepgram emits all of them) without forcing adapters that only know
- * ``text``/``isFinal`` to change. All non-text fields are optional.
- */
-interface STTTranscript {
-    text: string;
-    isFinal?: boolean;
-    /** Overall transcript confidence in [0, 1]. */
-    confidence?: number;
-    /** Provider-side end-of-utterance hint (faster than ``isFinal``). */
-    speechFinal?: boolean;
-    /** True when the result was produced in response to a Finalize command. */
-    fromFinalize?: boolean;
-    /** Provider request id (Deepgram populates this from the Metadata frame). */
-    requestId?: string;
-    /** Per-word timings / metadata when the provider emits them. */
-    words?: ReadonlyArray<STTWord>;
-    /** Which provider event this transcript represents (e.g. ``Results``). */
-    eventType?: string;
-}
-/** Callback invoked by an `STTAdapter` for each (partial or final) transcript event. */
-type STTTranscriptCallback = (t: STTTranscript) => Promise<void> | void;
-/** Shape shared by every STT adapter in the SDK. */
-interface STTAdapter {
-    connect(): Promise<void>;
-    sendAudio(pcm: Buffer): void | Promise<void>;
-    onTranscript(cb: STTTranscriptCallback): void;
-    close(): void | Promise<void>;
-    /**
-     * Optional: ask the provider to immediately finalise the in-flight
-     * utterance (rather than waiting for its own endpoint timer). Called by
-     * ``StreamHandler`` whenever the SDK's VAD signals ``speech_end``, and
-     * after a barge-in cancel — both moments where waiting for the
-     * provider's endpoint heuristic stalls the next turn.
-     *
-     * Implementations that do not support utterance-level finalisation
-     * (e.g. one-shot transcribers like Whisper) should omit this method
-     * entirely; the stream handler does an optional-chained call.
-     */
-    finalize?(): void | Promise<void>;
-}
-/** Shape shared by every TTS adapter in the SDK. */
-interface TTSAdapter {
-    synthesizeStream(text: string): AsyncIterable<Buffer>;
-}
 /**
  * Pipeline hook executor for pipeline mode.
  *
@@ -616,6 +608,22 @@ interface LLMStreamOptions {
 }
 interface LLMProvider {
     stream(messages: Array<Record<string, unknown>>, tools?: Array<Record<string, unknown>> | null, opts?: LLMStreamOptions): AsyncGenerator<LLMChunk, void, unknown>;
+    /**
+     * Optional best-effort pre-call DNS / TLS / HTTP-keepalive warmup.
+     *
+     * Called once per outbound call from ``Patter.call`` when the agent has
+     * ``prewarm: true`` (the default). Concrete providers (OpenAI,
+     * Anthropic, Google, Cerebras, Groq) override this to issue a
+     * lightweight HTTPS GET to their inference endpoint so by the time the
+     * first ``stream()`` call lands, the connection pool already has a
+     * warm socket. Failures are logged at debug level and never abort the
+     * call — pure latency optimisation.
+     *
+     * Optional on the interface (``warmup?: ...``) so providers without a
+     * warmup hook still satisfy the type. Detected via runtime
+     * ``typeof provider.warmup === 'function'`` in the client.
+     */
+    warmup?(): Promise<void>;
 }
 /** Optional sampling kwargs forwarded into the OpenAI Chat Completions body. */
 interface OpenAILLMSamplingOptions {
@@ -642,6 +650,8 @@ interface OpenAILLMSamplingOptions {
 }
 /** LLM provider backed by OpenAI Chat Completions (streaming). */
 declare class OpenAILLMProvider implements LLMProvider {
+    /** Stable pricing/dashboard key — read by stream-handler/metrics. */
+    static readonly providerKey = "openai";
     private readonly apiKey;
     readonly model: string;
     private readonly temperature?;
@@ -655,6 +665,23 @@ declare class OpenAILLMProvider implements LLMProvider {
     private readonly presencePenalty?;
     private readonly stop?;
     constructor(apiKey: string, model: string, sampling?: OpenAILLMSamplingOptions);
+    /** Subclasses (Cerebras, Groq) override this with their own host. */
+    protected get baseUrl(): string;
+    /**
+     * Pre-call DNS / TLS / HTTP-keepalive warmup.
+     *
+     * Issues a lightweight ``GET ${baseUrl}/models`` so DNS, TLS and HTTP/2
+     * are already up by the time the first ``chat.completions`` call lands.
+     * Best-effort: 5 s timeout, all exceptions swallowed at debug level.
+     *
+     * Note: an HTTPS GET warms DNS + TLS + connection pool but does NOT
+     * warm the inference path itself; for true inference warmup a real
+     * low-token request is needed, left as a follow-up. STT / TTS providers ship concrete
+     * WebSocket-based prewarms (Cartesia / Deepgram / AssemblyAI for STT;
+     * ElevenLabs WS for TTS) which save 200-500 ms each — those dominate
+     * the cold-start latency budget.
+     */
+    warmup(): Promise<void>;
     /** Stream OpenAI Chat Completions chunks for the given messages/tools. */
     stream(messages: Array<Record<string, unknown>>, tools?: Array<Record<string, unknown>> | null, opts?: LLMStreamOptions): AsyncGenerator<LLMChunk, void, unknown>;
 }
@@ -669,6 +696,8 @@ declare class LLMLoop {
     private eventBus?;
     private readonly _providerName;
     private readonly _modelName;
+    private _usageMissingCount;
+    private _loggedUsageFallback;
     private onToolCall?;
     constructor(apiKey: string, model: string, systemPrompt: string, tools?: ToolDefinition[] | null, llmProvider?: LLMProvider, disablePhonePreamble?: boolean);
     /**
@@ -706,6 +735,87 @@ declare class LLMLoop {
     private buildMessages;
 }
+/**
+ * Barge-in confirmation strategies.
+ *
+ * When a caller starts speaking while the agent's TTS is in flight, the SDK
+ * has to decide whether the speech is a real interruption or just a brief
+ * backchannel ("uh-huh", "okay") / room noise / cough. The default
+ * behaviour is to treat any VAD speech_start as a confirmed barge-in and
+ * cancel the agent immediately. That is fine for clean inputs but
+ * produces frequent false positives on PSTN: the agent gets cut
+ * mid-sentence by background chatter, breath, or filler words and never
+ * recovers the conversational thread.
+ *
+ * Each ``BargeInStrategy`` is consulted on every STT transcript while a
+ * barge-in is *pending* (VAD fired, but the agent has not yet been
+ * cancelled). The first strategy that returns ``true`` confirms the
+ * barge-in; if none do within the configured timeout the pending state
+ * is dropped and the agent resumes streaming TTS as if nothing happened.
+ * With an empty ``bargeInStrategies`` array the SDK falls back to the
+ * legacy "interrupt immediately on VAD" path, so adding strategies is
+ * a strict opt-in.
+ */
+interface EvaluateContext {
+    /** Latest STT output text (interim or final). */
+    readonly transcript: string;
+    /** ``true`` for interim partials, ``false`` for finals. */
+    readonly isInterim: boolean;
+    /** Whether the agent's TTS is currently in flight. */
+    readonly agentSpeaking: boolean;
+}
+/**
+ * Decides whether a pending barge-in should be confirmed.
+ *
+ * Implementations must be safe to call from any number of evaluations
+ * per turn. ``reset`` is invoked when the agent finishes speaking
+ * naturally and when a pending barge-in times out without
+ * confirmation.
+ */
+interface BargeInStrategy {
+    evaluate(ctx: EvaluateContext): Promise<boolean> | boolean;
+    reset?(): Promise<void> | void;
+}
+interface MinWordsStrategyOptions {
+    /**
+     * Minimum word count required while the agent is speaking. Reasonable
+     * values are 2-5; 3 is a good starting point for production phone
+     * agents. Must be ``>= 1``.
+     */
+    readonly minWords: number;
+    /**
+     * When ``true`` (default), interim STT partials are evaluated as soon
+     * as they arrive. Set to ``false`` to wait for finals only — slower
+     * but free of partial-word noise on jittery STT providers.
+     */
+    readonly useInterim?: boolean;
+}
+/**
+ * Confirm barge-in only after the caller has spoken ``minWords`` words.
+ *
+ * Filters short backchannels, single-word utterances, and stray
+ * transcription fragments that VAD picked up but were not real
+ * interruptions. While the agent is silent the strategy permits any
+ * speech to count (one word is enough), so the first user turn is not
+ * delayed.
+ */
+declare class MinWordsStrategy implements BargeInStrategy {
+    private readonly minWords;
+    private readonly useInterim;
+    constructor(options: MinWordsStrategyOptions);
+    evaluate(ctx: EvaluateContext): boolean;
+    reset(): Promise<void>;
+}
+/**
+ * Short-circuit-OR composition: first strategy that confirms wins.
+ * Returns ``false`` for an empty array so callers can use the empty
+ * default to mean "no opt-in confirmation, fall back to legacy
+ * interrupt-on-VAD".
+ */
+declare function evaluateStrategies(strategies: readonly BargeInStrategy[], ctx: EvaluateContext): Promise<boolean>;
+/** Call ``reset()`` on every strategy, swallowing per-strategy errors. */
+declare function resetStrategies(strategies: readonly BargeInStrategy[]): Promise<void>;
 /**
  * Public type definitions for the Patter SDK — agent options, pipeline hooks,
  * provider config envelopes, and serve/call request/response shapes.
@@ -967,6 +1077,15 @@ interface VADEvent {
 interface VADProvider {
     processFrame(pcmChunk: Buffer, sampleRate: number): Promise<VADEvent | null>;
     close(): Promise<void>;
+    /**
+     * Optional: reset all per-utterance state so the next ``processFrame``
+     * starts from a clean SILENCE state. Useful between agent turns to
+     * prevent a "stuck SPEECH" condition where PSTN echo / loopback kept the
+     * detector's internal probability above the deactivation threshold for
+     * the full agent turn, leaving the VAD unable to emit ``speech_start``
+     * on the next user utterance (one-shot barge-in bug).
+     */
+    reset?(): Promise<void> | void;
 }
 /** Pre-STT audio filter — noise cancellation, gain, EQ. */
 interface AudioFilter {
@@ -1062,7 +1181,7 @@ interface AgentOptions {
      * matching mode (``openai_realtime`` or ``elevenlabs_convai``). When absent,
      * pipeline mode is selected if ``stt`` and ``tts`` are provided.
      */
-    engine?: Realtime | ConvAI;
+    engine?: Realtime | Realtime2 | ConvAI;
     /**
      * Provider mode. Normally derived from ``engine`` / ``stt`` + ``tts``. Pass
      * ``'pipeline'`` explicitly when building a pipeline-mode agent without
@@ -1103,6 +1222,59 @@ interface AgentOptions {
      * Default: 300.
      */
     bargeInThresholdMs?: number;
+    /**
+     * Opt-in barge-in confirmation strategies (pipeline mode). With the
+     * default empty array the SDK falls back to the legacy
+     * "interrupt immediately on VAD speech_start" behaviour. When at
+     * least one strategy is provided, a VAD speech_start during TTS
+     * marks the barge-in as *pending* — the agent's TTS continues
+     * streaming naturally and its in-flight LLM stream is preserved —
+     * and the strategies are consulted on every STT transcript. The first strategy that
+     * returns ``true`` confirms the barge-in (cancels TTS, flushes the
+     * inbound ring buffer); if none confirm within
+     * ``bargeInConfirmMs`` the pending state is dropped and TTS resumes.
+     *
+     * See ``getpatter`` exports ``BargeInStrategy`` /
+     * ``MinWordsStrategy`` for the protocol and a reference
+     * implementation.
+     */
+    bargeInStrategies?: readonly BargeInStrategy[];
+    /**
+     * Maximum time (ms) to wait for at least one strategy to confirm a
+     * pending barge-in before discarding the pending state and resuming
+     * TTS. Only consulted when ``bargeInStrategies`` is non-empty.
+     * Default: 1500.
+     */
+    bargeInConfirmMs?: number;
+    /**
+     * When ``true`` (default), ``Patter.call`` warms up the STT, TTS, and
+     * LLM provider connections in parallel with the carrier-side
+     * ``initiateCall`` request so DNS, TLS, and HTTP/2 handshakes are
+     * already complete by the time the callee answers. Adapters expose a
+     * ``warmup()`` method returning ``Promise<void>`` (default no-op) —
+     * providers can override to dial open a persistent connection ahead
+     * of the WebSocket bridge. Best-effort: warmup failures are logged
+     * at debug level and never abort the call. Default: ``true``.
+     */
+    prewarm?: boolean;
+    /**
+     * When ``true`` (default ``false``), ``Patter.call`` also pre-renders
+     * ``firstMessage`` to TTS audio bytes during the ringing window and
+     * streams the cached buffer immediately when the carrier emits
+     * ``start``. Eliminates the 200-700 ms TTS first-byte latency on the
+     * greeting at the cost of paying the TTS bill even if the call is
+     * never answered (silently logged at warn level when the call
+     * fails). Off by default to preserve the prior cost surface; opt-in
+     * for production outbound where every millisecond of greeting
+     * latency hurts conversion. Default: ``false``.
+     *
+     * **Pipeline mode only.** Realtime / ConvAI provider modes never
+     * consume the prewarm cache (the StreamHandler for those modes runs
+     * its first-message emit through the provider's own audio path), so
+     * ``Patter.call`` refuses to spawn the prewarm task and emits a warn
+     * when ``provider !== 'pipeline'``.
+     */
+    prewarmFirstMessage?: boolean;
     /**
      * When true, the sentence chunker emits the first clause of each response
      * on a soft punctuation boundary (",", em-dash, en-dash) once ~40 chars
@@ -1194,36 +1366,449 @@ interface LocalCallOptions {
     to: string;
     agent: AgentOptions;
     /**
-     * Enable answering-machine detection. **Defaults to ``true``** — the SDK
-     * asks Twilio (``MachineDetection=DetectMessageEnd`` + Async AMD) or
-     * Telnyx (``answering_machine_detection=greeting_end``) to classify
-     * whoever picks up. Async AMD on Twilio adds ~0 answer-latency on human
-     * pickups (the call connects immediately and the result arrives via
-     * webhook 2-5 s later), so ON-by-default is safe. Pass ``false`` to
-     * disable when you want to skip per-call AMD billing or you already
-     * know the destination is a human.
+     * Enable answering-machine detection. **Defaults to ``true``** — the SDK
+     * asks Twilio (``MachineDetection=DetectMessageEnd`` + Async AMD) or
+     * Telnyx (``answering_machine_detection=greeting_end``) to classify
+     * whoever picks up. Async AMD on Twilio adds ~0 answer-latency on human
+     * pickups (the call connects immediately and the result arrives via
+     * webhook 2-5 s later), so ON-by-default is safe. Pass ``false`` to
+     * disable when you want to skip per-call AMD billing or you already
+     * know the destination is a human.
+     */
+    machineDetection?: boolean;
+    /**
+     * Called once when the carrier finishes the AMD check. Fires for both
+     * ``human`` and ``machine`` outcomes. Combine with ``voicemailMessage``
+     * to get both the legacy voicemail-drop AND a result callback (the SDK
+     * fires the callback after the drop is queued). Acceptance tests use
+     * this to mark a run INVALID when ``classification !== 'human'``.
+     */
+    onMachineDetection?: (result: MachineDetectionResult) => void | Promise<void>;
+    /** If set, spoken as a voicemail message when AMD detects a machine. Implicitly enables ``machineDetection``. */
+    voicemailMessage?: string;
+    /** Dynamic variables merged into agent.variables before call. Override agent-level variables. */
+    variables?: Record<string, string>;
+    /**
+     * Ring timeout in seconds. Forwarded to Twilio as `Timeout` and to Telnyx
+     * as `timeout_secs`. Defaults to **25 s** — the production-recommended
+     * value that limits phantom calls. Pass `60` for legacy carrier-default
+     * parity, or `null` to omit the parameter entirely (carrier picks its
+     * own default).
+     */
+    ringTimeout?: number | null;
+}
+/**
+ * Shared STT / TTS adapter dispatch.
+ *
+ * In v0.5.0+ callers always pass pre-instantiated adapters (``agent.stt`` /
+ * ``agent.tts`` are ``STTAdapter`` / ``TTSAdapter`` instances), so these
+ * helpers are thin pass-throughs that return the instance or null. Kept as
+ * functions so the Twilio/Telnyx bridges have a single dispatch point.
+ */
+/** Per-word timings / metadata (Deepgram-shaped). Optional on every adapter. */
+interface STTWord {
+    readonly word?: string;
+    readonly start?: number;
+    readonly end?: number;
+    readonly confidence?: number;
+    readonly punctuated_word?: string;
+    readonly speaker?: number;
+}
+/**
+ * Facade transcript shape — widened to surface richer provider fields
+ * (Deepgram emits all of them) without forcing adapters that only know
+ * ``text``/``isFinal`` to change. All non-text fields are optional.
+ */
+interface STTTranscript {
+    text: string;
+    isFinal?: boolean;
+    /** Overall transcript confidence in [0, 1]. */
+    confidence?: number;
+    /** Provider-side end-of-utterance hint (faster than ``isFinal``). */
+    speechFinal?: boolean;
+    /** True when the result was produced in response to a Finalize command. */
+    fromFinalize?: boolean;
+    /** Provider request id (Deepgram populates this from the Metadata frame). */
+    requestId?: string;
+    /** Per-word timings / metadata when the provider emits them. */
+    words?: ReadonlyArray<STTWord>;
+    /** Which provider event this transcript represents (e.g. ``Results``). */
+    eventType?: string;
+}
+/** Callback invoked by an `STTAdapter` for each (partial or final) transcript event. */
+type STTTranscriptCallback = (t: STTTranscript) => Promise<void> | void;
+/** Shape shared by every STT adapter in the SDK. */
+interface STTAdapter {
+    connect(): Promise<void>;
+    sendAudio(pcm: Buffer): void | Promise<void>;
+    onTranscript(cb: STTTranscriptCallback): void;
+    close(): void | Promise<void>;
+    /**
+     * Optional: ask the provider to immediately finalise the in-flight
+     * utterance (rather than waiting for its own endpoint timer). Called by
+     * ``StreamHandler`` whenever the SDK's VAD signals ``speech_end``, and
+     * after a barge-in cancel — both moments where waiting for the
+     * provider's endpoint heuristic stalls the next turn.
+     *
+     * Implementations that do not support utterance-level finalisation
+     * (e.g. one-shot transcribers like Whisper) should omit this method
+     * entirely; the stream handler does an optional-chained call.
+     */
+    finalize?(): void | Promise<void>;
+    /**
+     * Optional best-effort pre-call DNS / TLS / HTTP-keepalive warmup.
+     * Default behaviour is a no-op — providers that benefit (e.g.
+     * provider WebSockets with a slow handshake) can override. Failures
+     * must never abort the call.
+     */
+    warmup?(): Promise<void>;
+}
+/** Shape shared by every TTS adapter in the SDK. */
+interface TTSAdapter {
+    synthesizeStream(text: string): AsyncIterable<Buffer>;
+    /**
+     * Optional best-effort pre-call DNS / TLS / HTTP-keepalive warmup.
+     * Default behaviour is a no-op. Failures must never abort the call.
+     */
+    warmup?(): Promise<void>;
+}
+/**
+ * Known stable ElevenLabs voice models (from the official ElevenLabs API
+ * reference). Exposed as a typed `as const` object so callers can pass
+ * `ElevenLabsModel.FLASH_V2_5` and get autocomplete / static checking; the
+ * public `modelId` option also accepts an arbitrary `string` so users can
+ * pass forward-compat IDs we haven't enumerated yet.
+ *
+ * - `V3` — newest, highest quality (slower TTFT than Flash).
+ * - `FLASH_V2_5` — current default, fastest (~75 ms TTFT).
+ * - `TURBO_V2_5` — balanced quality/speed.
+ * - `MULTILINGUAL_V2` — best multilingual support.
+ * - `MONOLINGUAL_V1` — legacy English-only.
+ */
+declare const ElevenLabsModel: {
+    readonly V3: "eleven_v3";
+    readonly FLASH_V2_5: "eleven_flash_v2_5";
+    readonly TURBO_V2_5: "eleven_turbo_v2_5";
+    readonly MULTILINGUAL_V2: "eleven_multilingual_v2";
+    readonly MONOLINGUAL_V1: "eleven_monolingual_v1";
+};
+/** Union of {@link ElevenLabsModel} string values. */
+type ElevenLabsModel = (typeof ElevenLabsModel)[keyof typeof ElevenLabsModel];
+declare const ElevenLabsOutputFormat: {
+    readonly MP3_22050_32: "mp3_22050_32";
+    readonly MP3_44100_32: "mp3_44100_32";
+    readonly MP3_44100_64: "mp3_44100_64";
+    readonly MP3_44100_96: "mp3_44100_96";
+    readonly MP3_44100_128: "mp3_44100_128";
+    readonly MP3_44100_192: "mp3_44100_192";
+    readonly PCM_8000: "pcm_8000";
+    readonly PCM_16000: "pcm_16000";
+    readonly PCM_22050: "pcm_22050";
+    readonly PCM_24000: "pcm_24000";
+    readonly PCM_44100: "pcm_44100";
+    readonly ULAW_8000: "ulaw_8000";
+};
+/** Union of {@link ElevenLabsOutputFormat} string values. */
+type ElevenLabsOutputFormat = (typeof ElevenLabsOutputFormat)[keyof typeof ElevenLabsOutputFormat];
+/** ElevenLabs voice tuning knobs forwarded as `voice_settings` in the request. */
+interface ElevenLabsVoiceSettings {
+    stability?: number;
+    similarity_boost?: number;
+    style?: number;
+    use_speaker_boost?: boolean;
+}
+/** Constructor options for {@link ElevenLabsTTS}. */
+interface ElevenLabsTTSOptions$1 {
+    voiceId?: string;
+    /**
+     * ElevenLabs voice model ID. The default ``eleven_flash_v2_5`` has the
+     * lowest TTFT (~75 ms). Pass ``eleven_v3`` for highest quality, or any
+     * arbitrary string for forward-compat with future models.
+     */
+    modelId?: ElevenLabsModel | string;
+    outputFormat?: ElevenLabsOutputFormat;
+    voiceSettings?: ElevenLabsVoiceSettings;
+    languageCode?: string;
+    chunkSize?: number;
+}
+/**
+ * ElevenLabs streaming TTS adapter.
+ *
+ * Supported `modelId` values are autocompleted via {@link ElevenLabsModel}.
+ * Default is `eleven_flash_v2_5` (lowest TTFT, ~75 ms).
+ *
+ * **Telephony optimization** — the constructor default
+ * `outputFormat='pcm_16000'` is correct for web playback, dashboard
+ * previews, and 16 kHz pipelines. For real phone calls, use the
+ * carrier-specific factories instead:
+ *
+ * - {@link ElevenLabsTTS.forTwilio} emits `ulaw_8000` natively. Twilio's
+ *   media-stream WebSocket expects μ-law @ 8 kHz, so the SDK normally
+ *   resamples 16 kHz → 8 kHz and PCM → μ-law before sending. Asking
+ *   ElevenLabs to produce μ-law directly skips that step (saves
+ *   ~30–80 ms first-byte plus per-frame CPU and avoids any resampling
+ *   aliasing).
+ * - {@link ElevenLabsTTS.forTelnyx} emits `pcm_16000`. Telnyx negotiates
+ *   L16/16000 on its bidirectional media WebSocket, so 16 kHz PCM is
+ *   already the format used end-to-end and no transcoding happens.
+ *   ElevenLabs *also* supports `ulaw_8000` if your Telnyx profile is
+ *   pinned to PCMU/8000 — pass `outputFormat: 'ulaw_8000'` explicitly
+ *   in that case.
+ */
+declare class ElevenLabsTTS {
+    static readonly providerKey = "elevenlabs";
+    private readonly apiKey;
+    private readonly voiceId;
+    private readonly modelId;
+    private readonly outputFormat;
+    private readonly voiceSettings;
+    private readonly languageCode;
+    private readonly chunkSize;
+    constructor(apiKey: string, voiceId?: string, modelId?: string, outputFormat?: ElevenLabsOutputFormat | string);
+    constructor(apiKey: string, options: ElevenLabsTTSOptions$1);
+    /**
+     * Construct an instance pre-configured for Twilio Media Streams.
+     *
+     * Sets `outputFormat='ulaw_8000'` so ElevenLabs emits μ-law @ 8 kHz
+     * directly — the exact wire format Twilio's media stream uses — letting
+     * the SDK skip the 16 kHz→8 kHz resample and PCM→μ-law conversion in
+     * `TwilioAudioSender`. Saves ~30–80 ms first-byte and per-frame CPU,
+     * and removes a potential aliasing source.
+     *
+     * `voiceSettings` defaults to a low-bandwidth-friendly profile
+     * (speaker boost off, modest stability) which sounds cleaner at 8 kHz
+     * μ-law than the studio default. Pass an explicit object to override.
+     */
+    static forTwilio(apiKey: string, options?: Omit<ElevenLabsTTSOptions$1, 'outputFormat'>): ElevenLabsTTS;
+    /**
+     * Construct an instance pre-configured for Telnyx bidirectional media.
+     *
+     * Telnyx's default media-streaming codec is L16 PCM @ 16 kHz, which
+     * matches our default Telnyx handler. We pick `pcm_16000` so the audio
+     * flows end-to-end with zero resampling or transcoding.
+     *
+     * Trade-off: if your Telnyx profile is pinned to PCMU/8000 (μ-law),
+     * construct `ElevenLabsTTS` directly with `outputFormat: 'ulaw_8000'`
+     * — Telnyx supports that natively too.
+     */
+    static forTelnyx(apiKey: string, options?: Omit<ElevenLabsTTSOptions$1, 'outputFormat'>): ElevenLabsTTS;
+    /**
+     * Synthesise text to speech and return the full audio as a single Buffer.
+     *
+     * For large chunks (or when latency matters) call `synthesizeStream` instead.
+     */
+    synthesize(text: string): Promise<Buffer>;
+    /**
+     * Synthesise text and yield audio chunks as they arrive (streaming).
+     *
+     * The yielded buffers are raw PCM at 16 kHz (or whatever `outputFormat` is
+     * configured to). `chunkSize` controls the maximum yield size — 512 is a
+     * good choice for low-latency telephony.
+     */
+    synthesizeStream(text: string): AsyncGenerator<Buffer>;
+}
+/**
+ * WebSocket-based ElevenLabs TTS provider — opt-in low-latency variant.
+ *
+ * Targets the ElevenLabs streaming-input WebSocket endpoint
+ * (`/v1/text-to-speech/{voice_id}/stream-input`) instead of the HTTP
+ * `/stream` endpoint used by `ElevenLabsTTS`. Saves the HTTP request setup
+ * time per utterance (~50 ms) and avoids the HTTP cold-start TLS handshake
+ * when calls are bursty.
+ *
+ * API matches `ElevenLabsTTS` (`synthesizeStream(text)` returns an
+ * `AsyncGenerator<Buffer>`) so it can be passed anywhere a TTSAdapter is
+ * expected.
+ *
+ * Behaviour notes
+ * - WebSocket is opened **per-utterance** (matches HTTP semantics). A
+ *   future revision may pool a WS across utterances of the same call
+ *   session — see roadmap Phase 5b.
+ * - `auto_mode=true` is enabled by default. Pass `autoMode: false` to
+ *   send a custom `chunk_length_schedule`.
+ * - `outputFormat` is exposed as a query parameter so `ulaw_8000` (Twilio
+ *   native) and `pcm_16000` (Telnyx native) work without resampling.
+ * - `eleven_v3` is **not** supported — the WS endpoint rejects it.
+ * - `optimize_streaming_latency` is officially deprecated and is not
+ *   exposed.
+ */
+/** Constructor options for {@link ElevenLabsWebSocketTTS}. */
+interface ElevenLabsWebSocketTTSOptions {
+    apiKey: string;
+    voiceId?: string;
+    modelId?: ElevenLabsModel | string;
+    outputFormat?: string;
+    voiceSettings?: Record<string, unknown>;
+    languageCode?: string;
+    /** Let the server pick chunk timing. Default true. */
+    autoMode?: boolean;
+    /** WS keep-alive timeout in seconds (5–180). Default 60. */
+    inactivityTimeout?: number;
+    /**
+     * Manual chunk schedule, only used when ``autoMode: false``. Each value
+     * must be 5–500. ElevenLabs default is ``[120, 160, 250, 290]``.
+     */
+    chunkLengthSchedule?: number[];
+    /** Outgoing audio re-chunk size in bytes. Default 4096. */
+    chunkSize?: number;
+}
+/**
+ * Parked WS handle returned by {@link ElevenLabsWebSocketTTS.openParkedConnection}.
+ *
+ * `bosSent` records whether the BOS frame (`{"text": " ", ...}`) has
+ * already been written to the wire. The prewarm pipeline always sends
+ * the BOS so the upstream worker is selected on the parked connection;
+ * `synthesizeStream` adopts the WS and SKIPS its own BOS send to avoid
+ * a protocol error.
+ */
+interface ElevenLabsParkedWS {
+    ws: WebSocket__default;
+    bosSent: boolean;
+}
+/** WebSocket-based ElevenLabs TTS adapter — opt-in low-latency variant. */
+declare class ElevenLabsWebSocketTTS implements TTSAdapter {
+    static readonly providerKey = "elevenlabs_ws";
+    readonly apiKey: string;
+    readonly voiceId: string;
+    readonly modelId: string;
+    readonly voiceSettings?: Record<string, unknown>;
+    readonly languageCode?: string;
+    readonly autoMode: boolean;
+    readonly inactivityTimeout: number;
+    readonly chunkLengthSchedule?: number[];
+    readonly chunkSize: number;
+    /**
+     * Single-slot adoption queue. The prewarm pipeline parks one WS per
+     * outbound call here; the next `synthesizeStream` call consumes it
+     * (skipping `new WebSocket()` and the BOS send) instead of opening
+     * a fresh socket. The slot is consumed exactly once: if a second
+     * `synthesizeStream` runs before the first, only the first benefits.
+     *
+     * We keep this on the adapter (not in a parameter) so the existing
+     * `for await (const chunk of agent.tts.synthesizeStream(...))` call
+     * site in `StreamHandler` continues to work without signature
+     * changes.
+     */
+    private adoptedConnection;
+    /**
+     * The wire format requested over the ElevenLabs WS. Initially set from
+     * the constructor; ``setTelephonyCarrier`` may auto-flip it to the
+     * carrier's native codec when the caller did NOT pass ``outputFormat``
+     * explicitly.
+     */
+    private _outputFormat;
+    private readonly _outputFormatExplicit;
+    /** Public read-only view of the (possibly auto-flipped) wire format. */
+    get outputFormat(): string;
+    constructor(opts: ElevenLabsWebSocketTTSOptions);
+    /**
+     * Hook called by ``StreamHandler`` to advise the carrier wire format.
+     *
+     * When the user did NOT pass an explicit ``outputFormat`` in the
+     * constructor options, this flips the format to the carrier's native
+     * wire codec — saving a client-side transcode step. Calling with an
+     * unknown carrier (``""`` / ``"custom"``) is a no-op.
+     *
+     * When ``outputFormat`` was explicitly passed (incl. via the
+     * ``forTwilio`` / ``forTelnyx`` factories), this method is a no-op —
+     * the user's choice always wins.
+     */
+    setTelephonyCarrier(carrier: string): void;
+    /** Pre-configured for Twilio Media Streams (`ulaw_8000`). */
+    static forTwilio(opts: Omit<ElevenLabsWebSocketTTSOptions, 'outputFormat'>): ElevenLabsWebSocketTTS;
+    /** Pre-configured for Telnyx (`pcm_16000`). */
+    static forTelnyx(opts: Omit<ElevenLabsWebSocketTTSOptions, 'outputFormat'>): ElevenLabsWebSocketTTS;
+    private buildUrl;
+    /**
+     * Build the protocol-required BOS frame sent on every fresh WS.
+     *
+     * The single-space `{"text": " "}` keep-alive establishes the session
+     * without committing any synthesis (no `flush: true`, no real text).
+     * Production `synthesizeStream()` and `warmup()` share this exact
+     * construction so the upstream worker chooses the same per-session
+     * config in both cases — otherwise the warm session is on a different
+     * worker than the live request, which defeats the warmup goal.
+     */
+    private buildBosFrame;
+    /**
+     * Single-shot synthesis: open WS, send text, yield bytes, close.
+     *
+     * Resilience contract:
+     * - Connection bounded by ``CONNECT_TIMEOUT_MS`` (5s, was 15s).
+     * - Each idle wait bounded by ``FRAME_TIMEOUT_MS`` (30s) so a stalled
+     *   server cannot keep the generator alive indefinitely.
+     * - Permanent error handler attached BEFORE the open await — prevents
+     *   ``uncaughtException`` if an error fires after the once-listener
+     *   resolves.
+     * - All event listeners removed in ``finally`` (no closure leak past
+     *   socket close).
+     * - Server-reported ``error`` raises ``ElevenLabsTTSError``.
+     * - Per-frame audio payload capped at ``MAX_AUDIO_B64_BYTES``.
+     * - Best-effort EOS ``{"text":""}`` sent in finally (not immediately
+     *   after flush — auto_mode could otherwise truncate the tail audio).
+     */
+    synthesizeStream(text: string): AsyncGenerator<Buffer>;
+    /**
+     * Pre-call WebSocket warmup for the ElevenLabs `/stream-input` endpoint.
+     *
+     * Opens the WS (DNS + TLS + auth handshake), sends the EXACT same BOS
+     * frame the production `synthesizeStream()` path sends — including
+     * `voice_settings` and (when configured) `generation_config` — so
+     * ElevenLabs instantiates the same per-session worker for both
+     * warmup and the live request. If the BOS frames differ, the server
+     * may route warmup and the real call to two different workers, and
+     * the warmed worker is wasted. Idles ~250 ms, then closes. By the
+     * time the first `synthesizeStream()` call lands during the call,
+     * the connection pool has the upstream warm — net wire time saving
+     * of 200-500 ms.
+     *
+     * Billing safety: ElevenLabs bills on synthesised characters
+     * delivered via `audio` frames (per https://elevenlabs.io/pricing).
+     * The keepalive (single-space `text`, no `flush: true`, no real
+     * transcript) is documented as the session-establishment frame and
+     * does NOT generate synthesis. Closing without sending the actual
+     * transcript does not consume billable characters. Best-effort:
+     * failures logged at debug level.
+     */
+    warmup(): Promise<void>;
+    /**
+     * Open a fresh WS, send the EXACT BOS frame the live `synthesizeStream`
+     * sends, and return the OPEN socket without closing it. Used by the
+     * prewarm pipeline to park a TTS connection during the carrier ringing
+     * window so the next `synthesizeStream` call can adopt it via
+     * {@link adoptWebSocket} and skip ~400-900 ms of TLS + BOS round-trip.
+     *
+     * Returns a parked-handle the caller stashes; the next
+     * `synthesizeStream` will detect the adoption queue and skip its own
+     * `new WebSocket()` + BOS send.
+     *
+     * Billing safety: BOS is the documented session-establishment frame
+     * (single space `text`, no `flush: true`) and does not generate
+     * synthesis. ElevenLabs bills on `audio` frames received from the
+     * server, not on BOS bytes sent by the client.
      */
-    machineDetection?: boolean;
+    openParkedConnection(): Promise<ElevenLabsParkedWS>;
     /**
-     * Called once when the carrier finishes the AMD check. Fires for both
-     * ``human`` and ``machine`` outcomes. Combine with ``voicemailMessage``
-     * to get both the legacy voicemail-drop AND a result callback (the SDK
-     * fires the callback after the drop is queued). Acceptance tests use
-     * this to mark a run INVALID when ``classification !== 'human'``.
+     * Stash a parked WS handle so the next `synthesizeStream` call adopts
+     * it instead of opening a fresh socket. Caller is responsible for
+     * holding the handle alive until either the live request consumes it
+     * or the call ends (in which case `discardAdoptedConnection()`
+     * cleans it up).
      */
-    onMachineDetection?: (result: MachineDetectionResult) => void | Promise<void>;
-    /** If set, spoken as a voicemail message when AMD detects a machine. Implicitly enables ``machineDetection``. */
-    voicemailMessage?: string;
-    /** Dynamic variables merged into agent.variables before call. Override agent-level variables. */
-    variables?: Record<string, string>;
+    adoptWebSocket(parked: ElevenLabsParkedWS): void;
     /**
-     * Ring timeout in seconds. Forwarded to Twilio as `Timeout` and to Telnyx
-     * as `timeout_secs`. Defaults to **25 s** — the production-recommended
-     * value that limits phantom calls. Pass `60` for legacy carrier-default
-     * parity, or `null` to omit the parameter entirely (carrier picks its
-     * own default).
+     * Drop and close any pending parked WS without consuming it. Used on
+     * call-failure paths so a never-started call does not leak a TTS WS
+     * that ElevenLabs will close after its inactivity timeout anyway.
      */
-    ringTimeout?: number | null;
+    discardAdoptedConnection(): void;
+    /** No-op — connections are per-utterance and torn down inside synthesizeStream. */
+    close(): Promise<void>;
 }
 /**
@@ -1272,6 +1857,19 @@ declare class MetricsStore extends EventEmitter {
     private readonly maxCalls;
     private calls;
     private activeCalls;
+    /**
+     * User-driven soft delete: call_ids the operator removed from the
+     * dashboard view. The on-disk artefacts written by ``CallLogger``
+     * (``metadata.json``, ``transcript.jsonl``) are intentionally NOT
+     * touched — they serve as the durable backup. All read paths
+     * (``getCalls`` / ``getCall`` / ``getAggregates`` / ``getCallsInRange``
+     * / ``hydrate``) filter against this set so the call is invisible
+     * to the UI and excluded from rolling metrics. Populated from
+     * ``<logRoot>/.deleted_call_ids.json`` on hydrate so deletions
+     * survive a process restart. Parity with Python.
+     */
+    private deletedCallIds;
+    private deletedIdsPath;
     /**
      * Accepts either a numeric ``maxCalls`` (legacy positional — matches the
      * original TS API) or an options object ``{ maxCalls }`` to align with the
@@ -1300,19 +1898,66 @@ declare class MetricsStore extends EventEmitter {
     recordTurn(data: Record<string, unknown>): void;
     /** Move a call from active to completed and persist its final metrics. */
     recordCallEnd(data: Record<string, unknown>, metrics?: Record<string, unknown> | null): void;
-    /** Return a window of completed calls in newest-first order. */
+    /**
+     * Return a window of completed calls in newest-first order.
+     *
+     * Soft-deleted call_ids (see ``deleteCalls``) are filtered out so the
+     * dashboard never re-shows a row the user removed. The on-disk
+     * artefacts are intentionally preserved as a backup.
+     */
     getCalls(limit?: number, offset?: number): CallRecord[];
-    /** Look up a completed call by id (newest match wins). */
+    /**
+     * Look up a completed call by id (newest match wins).
+     *
+     * Soft-deleted call_ids resolve to ``null`` so the SPA's detail pane
+     * cannot render a row the user removed.
+     */
     getCall(callId: string): CallRecord | null;
+    /**
+     * Soft-delete one or more calls from the dashboard view.
+     *
+     * Adds each ``call_id`` to an in-memory set. Subsequent reads via
+     * ``getCalls`` / ``getCall`` / ``getAggregates`` / ``getCallsInRange``
+     * exclude the deleted ids, so rolling metrics (avg latency, total
+     * spend) are recomputed without them. The on-disk
+     * ``metadata.json`` / ``transcript.jsonl`` files written by
+     * ``CallLogger`` are NOT touched — they serve as a durable backup
+     * the operator can audit outside the dashboard.
+     *
+     * Active calls are never deletable. A call_id that is currently
+     * in ``activeCalls`` is silently skipped so a mid-call delete
+     * from the UI cannot orphan the live transcript pane.
+     *
+     * Persisted to ``<logRoot>/.deleted_call_ids.json`` (best-effort)
+     * when ``hydrate()`` has been called with a log root. Parity with
+     * Python ``delete_calls``.
+     *
+     * @returns The list of call_ids actually accepted as deleted.
+     */
+    deleteCalls(callIds: readonly string[]): string[];
+    /** Whether ``callId`` was soft-deleted from the dashboard. */
+    isDeleted(callId: string): boolean;
+    /** Snapshot of soft-deleted call_ids (sorted). */
+    getDeletedCallIds(): string[];
+    /** Atomically persist the deleted-ids set to disk. Best-effort. */
+    private persistDeletedIds;
     /** Look up an active call by id (returns undefined if not active or unknown). */
     getActive(callId: string): CallRecord | undefined;
     /** Return all currently active (not yet ended) calls. */
     getActiveCalls(): CallRecord[];
-    /** Compute summary statistics across the buffered call history. */
+    /**
+     * Compute summary statistics across the buffered call history.
+     *
+     * Soft-deleted calls are excluded so rolling metrics (avg latency,
+     * total spend) match exactly what the operator sees in the call list.
+     */
     getAggregates(): Record<string, unknown>;
-    /** Return calls whose `started_at` falls within `[fromTs, toTs]` (Unix seconds). */
+    /**
+     * Return calls whose `started_at` falls within `[fromTs, toTs]` (Unix
+     * seconds). Soft-deleted calls are filtered out.
+     */
     getCallsInRange(fromTs?: number, toTs?: number): CallRecord[];
-    /** Number of completed calls currently in the ring buffer. */
+    /** Number of completed (non-deleted) calls currently in the ring buffer. */
     get callCount(): number;
     /**
      * Rebuild the in-memory call list from `metadata.json` files written by
@@ -1455,6 +2100,19 @@ declare class SpeechEvents {
     private dispatch;
 }
+/** Parked provider WebSockets ready for adoption by a per-call StreamHandler. */
+interface ParkedProviderConnections {
+    /** Pre-opened STT WS (Cartesia today; other adapters may add support later). */
+    stt?: WebSocket.WebSocket;
+    /**
+     * Pre-opened TTS WS handle (ElevenLabs WS today). The `bosSent` flag
+     * lets the live `synthesizeStream` skip its own BOS send when the
+     * prewarm pipeline already wrote it.
+     */
+    tts?: ElevenLabsParkedWS;
+    /** Pre-opened OpenAI Realtime WS (already through `session.updated`). */
+    openaiRealtime?: WebSocket.WebSocket;
+}
 /** Top-level SDK entry point — wraps a carrier + embedded server + agent loop. */
 declare class Patter {
     private localConfig;
@@ -1476,6 +2134,65 @@ declare class Patter {
      * ``Cannot use both tunnel: true and webhookUrl``.
      */
     private tunnelOwnsWebhookUrl;
+    /**
+     * Pre-rendered first-message TTS audio per outbound call_id. Populated
+     * by :meth:`call` when ``agent.prewarmFirstMessage`` is true; consumed
+     * by the StreamHandler firstMessage emit so the greeting streams
+     * instantly on ``start`` instead of paying the 200-700 ms TTS first-byte
+     * latency. See ``AgentOptions.prewarmFirstMessage``.
+     *
+     * Stores raw bytes in the TTS provider's native sample rate; the
+     * carrier-side audio sender resamples on emit.
+     */
+    private prewarmAudio;
+    /**
+     * Call IDs whose prewarm cache slot has already been consumed —
+     * either by ``popPrewarmAudio`` (cache hit OR miss on the firstMessage
+     * emit path) or by ``recordPrewarmWaste`` (call ended before pickup).
+     * The prewarm task checks this set BEFORE writing bytes so a slow
+     * synth that finishes after the consumer already polled doesn't
+     * orphan bytes in ``prewarmAudio``. See FIX #92 in the parity audit.
+     */
+    private prewarmConsumed;
+    /**
+     * Background tasks tracked so :meth:`disconnect` can wait on / drop any
+     * still-running prewarm-first-message synth before tearing down.
+     */
+    private prewarmTasks;
+    /**
+     * TTL eviction timers keyed by call_id so :meth:`disconnect` (and
+     * normal consumption / waste-record paths) can cancel any pending
+     * timer when the slot drains naturally. Without this, the timer
+     * would WARN spuriously after the cache was already emptied.
+     */
+    private prewarmTtlTimers;
+    /**
+     * Pre-opened, fully-handshaked provider WebSockets keyed by
+     * carrier-issued call_id. Populated by ``parkProviderConnections``
+     * during the carrier ringing window; consumed by the per-call
+     * StreamHandler at ``start`` via ``adoptWebSocket(...)`` so STT / TTS
+     * / Realtime audio can flow on the first turn without paying the
+     * 150-900 ms TLS + WS-upgrade + protocol-handshake round-trip again.
+     *
+     * Distinct from ``prewarmAudio`` (which holds pre-rendered TTS bytes
+     * for the first message); the two features are complementary and
+     * orthogonal — both can be active for the same call.
+     *
+     * Each slot may hold up to three parked connections (STT, TTS,
+     * Realtime). Drained by:
+     *   - {@link popPrewarmedConnections} on the carrier ``start`` event
+     *     (consumed normally — the handles transfer to the StreamHandler)
+     *   - {@link recordPrewarmWaste} on call-termination paths (no-answer,
+     *     busy, failed, canceled, AMD voicemail). Closes parked sockets.
+     *   - {@link disconnect} on Patter teardown. Closes all parked sockets.
+     */
+    private prewarmedConnections;
+    /**
+     * TTL eviction handles keyed by call_id for connections that are never
+     * adopted (e.g. a carrier that swallows ``start``). Closes the parked
+     * sockets so they don't leak past the safety window.
+     */
+    private prewarmedConnTimers;
     /**
      * Speech-edge events for turn-taking instrumentation. Public surface: the
      * seven `on*` proxy accessors below plus the `conversationState` snapshot.
@@ -1483,7 +2200,7 @@ declare class Patter {
      * the previous behaviour.
      *
      * See `src/_speech-events.ts` for the full event taxonomy and the
-     * industry-alignment table (LiveKit / Pipecat / OpenAI Realtime).
+     * OpenAI Realtime alignment table.
      */
     readonly speechEvents: SpeechEvents;
     get onUserSpeechStarted(): SpeechEventCallback | null;
@@ -1502,8 +2219,8 @@ declare class Patter {
     set onAudioOut(cb: SpeechEventCallback | null);
     /**
      * Snapshot of the current per-side state of the call.
-     * Mirrors LiveKit's `user_state_changed` / `agent_state_changed`
-     * payloads. Read-only and safe to call at any time.
+     * Returns the user_state / agent_state payload shape — read-only and
+     * safe to call at any time.
      */
     get conversationState(): ConversationStateSnapshot;
     /**
@@ -1553,12 +2270,115 @@ declare class Patter {
     private _serveImpl;
     /** Run the agent in interactive terminal-test mode (no real telephony). */
     test(opts: ServeOptions): Promise<void>;
+    /**
+     * Pop and return the pre-synthesised first-message audio for ``callId``.
+     *
+     * Returns ``undefined`` when ``agent.prewarmFirstMessage`` was not set
+     * for the originating outbound call, or when the synth was still in
+     * flight at the moment the carrier emitted ``start`` (cache miss — the
+     * StreamHandler falls back to live TTS).
+     *
+     * Called by the per-call StreamHandler at the start of the firstMessage
+     * emit. Returning bytes here lets the handler skip the live TTS
+     * synthesis and stream the cached buffer directly.
+     *
+     * Marks ``callId`` as consumed regardless of cache hit/miss so a slow
+     * synth task that finishes after this call drops its bytes instead of
+     * orphaning them in ``prewarmAudio``. See FIX #92.
+     */
+    popPrewarmAudio: (callId: string) => Buffer | undefined;
+    /**
+     * Log a warning if a prewarmed greeting was paid for but never used.
+     * The TTS bill for ``agent.firstMessage`` has already been incurred by
+     * the background synth task, so the user should know — opt-in feature
+     * with a known cost surface.
+     *
+     * Idempotent: the second call for the same ``callId`` is a no-op, so
+     * the status callback firing first and ``endCall`` running afterwards
+     * (or vice-versa) does not double-WARN. Public so the embedded
+     * server's webhook handlers can invoke it on no-answer / busy /
+     * failed / canceled / AMD-machine paths. See FIX #91.
+     */
+    recordPrewarmWaste: (callId: string) => void;
+    /**
+     * Pop and return the parked provider WebSockets for ``callId``, or
+     * ``undefined`` when no parked connections exist.
+     *
+     * Wired into ``EmbeddedServer.popPrewarmedConnections`` so the
+     * per-call ``StreamHandler`` can adopt the parked sockets at the
+     * carrier ``start`` event instead of opening fresh ones — saving
+     * ~150-900 ms of cold-start handshake on the first turn.
+     */
+    popPrewarmedConnections: (callId: string) => ParkedProviderConnections | undefined;
+    /**
+     * Close any parked provider WebSockets for ``callId``. Wired into
+     * ``EmbeddedServer.closePrewarmedConnections`` so call-termination
+     * paths (no-answer, busy, failed, canceled, AMD voicemail) drop the
+     * sockets cleanly instead of leaving them to the upstream timeout.
+     */
+    closePrewarmedConnections: (callId: string) => void;
+    /**
+     * Open and park provider WebSockets in parallel with the carrier-side
+     * ``initiateCall``. Unlike :meth:`spawnProviderWarmup` (which closes
+     * the WS after a brief idle), the sockets opened here stay OPEN and
+     * are handed off to the per-call ``StreamHandler`` on ``start``.
+     *
+     * This is the structural fix for first-turn cold-start: on Node's
+     * ``ws`` package, opening + closing a WS does NOT warm TLS for the
+     * next open — every fresh ``new WebSocket()`` re-pays the full
+     * TCP + TLS + HTTP-101 round-trip. By keeping the WS open and
+     * adopting it directly, the live first turn skips the handshake
+     * entirely (saves ~150-900 ms depending on provider).
+     *
+     * Best-effort: each provider's parking task is wrapped in
+     * ``Promise.allSettled`` so a slow or failing endpoint cannot block
+     * the others. Providers without ``openParkedConnection`` contribute
+     * nothing — the call falls through to the cold ``connect()`` path
+     * for that provider.
+     */
+    private parkProviderConnections;
+    /**
+     * Spawn a fire-and-forget task that warms up STT / TTS / LLM in
+     * parallel with the carrier-side ``initiateCall``.
+     *
+     * Best-effort: each provider's optional ``warmup()`` is wrapped in
+     * ``Promise.allSettled`` so a slow or failing endpoint cannot block
+     * the others. Providers without ``warmup`` contribute nothing.
+     */
+    private spawnProviderWarmup;
+    /**
+     * Pre-render ``agent.firstMessage`` to TTS bytes during the ringing
+     * window and stash them in ``prewarmAudio.set(callId, buf)``.
+     *
+     * Skipped silently when ``agent.prewarmFirstMessage`` is false or
+     * when ``agent.tts`` / ``agent.firstMessage`` is missing. The synth
+     * is bounded by ``ringTimeout`` (default 25 s) so a never-answered
+     * call doesn't tie up the TTS connection. On timeout / error the
+     * cache is left empty and the StreamHandler falls back to live TTS.
+     *
+     * **Pipeline mode only.** Realtime / ConvAI provider modes never
+     * consume the prewarm cache (the StreamHandler for those modes runs
+     * its first-message emit through the provider's own audio path).
+     * Spawning the prewarm in those modes pays the TTS bill for nothing
+     * — refused with a warn.
+     *
+     * **Capped at ``PREWARM_CACHE_MAX`` concurrent entries.** Refused
+     * with a warn when the cap is reached (the call still proceeds —
+     * StreamHandler falls back to live TTS).
+     */
+    private spawnPrewarmFirstMessage;
     /** Place an outbound call via the configured carrier. */
     call(options: LocalCallOptions): Promise<void>;
     /**
      * Stop the embedded server and any running tunnel. Safe to call multiple
      * times. Leaves the instance reusable: a subsequent ``serve()`` works as
      * if the previous lifecycle never happened.
+     *
+     * Also clears any pending TTL eviction timers, awaits in-flight
+     * prewarm-first-message synth tasks (best-effort, with a 1 s safety
+     * timeout), and clears the prewarm cache. Without this a still-running
+     * TTS WS keeps the user billed long after SDK teardown, and stale
+     * entries leak across ``serve`` / ``disconnect`` cycles. See FIX #93.
      */
     disconnect(): Promise<void>;
     /**
@@ -2075,7 +2895,22 @@ declare function calculateTelephonyCost(provider: string, durationSeconds: numbe
 /** Per-turn latency breakdown across the STT/LLM/TTS pipeline. */
 interface LatencyBreakdown {
+    /**
+     * STT finalization time: end-of-speech (VAD stop or STT speech_final) →
+     * final transcript delivery. This is the engineering metric — pure STT
+     * processing latency, independent of how long the user spoke. Industry
+     * benchmarks (Picovoice, Deepgram, Gladia, Speechmatics) all report this
+     * number as "STT latency". Falls back to turn_start when the endpoint
+     * signal is unavailable (degraded provider, batch STT, etc.).
+     */
     stt_ms: number;
+    /**
+     * Duration of the user's utterance (turn_start → end-of-speech). Useful
+     * to distinguish "user spoke for 4s" from "STT took 4s to finalize" —
+     * they used to be conflated in stt_ms before 0.6.1. Optional — undefined
+     * when the endpoint signal is unavailable.
+     */
+    user_speech_duration_ms?: number;
     /**
      * Backwards-compatible LLM bucket. With the split below, this now reflects
      * the user-perceived first-token latency (TTFT) when streaming is available
@@ -2164,6 +2999,12 @@ interface CallMetrics {
     tts_provider: string;
     llm_provider: string;
     telephony_provider: string;
+    /** Model identifiers per provider (e.g. "ink-whisper", "eleven_flash_v2_5",
+     * "gpt-oss-120b"). Surface on the dashboard cost breakdown so operators
+     * can attribute per-call spend to a specific model. */
+    stt_model?: string;
+    tts_model?: string;
+    llm_model?: string;
 }
 /** Programmatic control surface for a live call (transfer, hangup, DTMF). */
 interface CallControl {
@@ -2236,6 +3077,7 @@ declare class CallMetricsAccumulator {
     private _actualTelephonyCost;
     private _actualSttCost;
     private _totalLlmCost;
+    private _llmModel;
     private _eventBus;
     /** Timestamp (hrTimeMs) when VAD emitted speech_end. */
     private _vadStoppedAt;
@@ -2250,6 +3092,21 @@ declare class CallMetricsAccumulator {
     private _overlapStartedAt;
     private _reportOnlyInitialTtfb;
     private _initialTtfbEmitted;
+    /**
+     * Last barge-in detection timestamp (hrTimeMs). Used by
+     * ``_computeTurnLatency`` to gate endpoint_ms / stt_ms emission on turns
+     * that started immediately after a barge-in — those turns have unreliable
+     * VAD/STT anchors and would otherwise pollute the p95 distribution with
+     * synthetic 6+ second spikes.
+     */
+    private _lastBargeinAt;
+    /**
+     * Count of turns where ``recordSttComplete`` fired but no legitimate VAD
+     * ``speech_end`` had stamped ``_endpointSignalAt``. Exposed via metrics so
+     * we can spot environments where PSTN packet loss is dropping VAD stops
+     * (the common cause of missing endpoint signals).
+     */
+    private _endpointSignalMissingCount;
     constructor(opts: {
         callId: string;
         providerMode: string;
@@ -2285,6 +3142,31 @@ declare class CallMetricsAccumulator {
      * on the first audio byte rather than just before recordSttComplete().
      */
     startTurnIfIdle(): void;
+    /**
+     * Anchor the current turn at a legitimate VAD ``speech_start`` event.
+     *
+     * Industry-standard pattern: every VAD ``speech_start`` that fires while the agent
+     * is NOT in the suppressed warmup window re-anchors the turn timer to
+     * the wall-clock moment the user actually started speaking. Re-anchors:
+     *
+     *  * ``_turnStart`` — fixes the case where a phantom ``speech_start``
+     *    during agent TTS or a partial transcript from the previous user
+     *    attempt already stamped the field. Without this, the legitimate
+     *    user-speech ``speech_start`` no-op'd and ``user_speech_duration_ms``
+     *    inflated from ~1 s to 5-7 s (the original "I waited 7 seconds"
+     *    dashboard symptom).
+     *  * ``_endpointSignalAt``, ``_vadStoppedAt``, ``_sttFinalAt`` — any
+     *    stale anchor from a rejected barge-in / dropped final transcript
+     *    on the same uncommitted turn is cleared, so the next
+     *    ``recordVadStop`` / ``recordSttFinalTimestamp`` stamps fresh.
+     *  * ``_sttComplete``, ``_llmFirstToken``, ``_initialTtfbEmitted`` — same
+     *    rationale for the downstream pipeline timestamps.
+     *
+     * No-op once the turn is committed (``_turnCommittedMono`` set): a
+     * VAD ``speech_start`` after commit belongs to the NEXT turn's
+     * barge-in path, handled by ``recordTurnInterrupted`` instead.
+     */
+    anchorUserSpeechStart(): void;
     /** Stamp end-of-STT, capture the user's transcript, and accrue billed STT seconds. */
     recordSttComplete(text: string, audioSeconds?: number): void;
     /** Record the timestamp of the first LLM token (TTFT). No-op after first call. */
@@ -2419,6 +3301,13 @@ declare class CallMetricsAccumulator {
     endCall(): CallMetrics;
     /** Return the cost breakdown for the call so far without ending it. */
     getCostSoFar(): CostBreakdown;
+    /**
+     * Number of turns where recordSttComplete fired without a prior legitimate
+     * VAD speech_end. Surfaced for diagnostics — a non-zero value points at
+     * dropped VAD stops (commonly PSTN packet loss), which is why we stopped
+     * faking _endpointSignalAt from _sttComplete in 0.6.x.
+     */
+    get endpointSignalMissingCount(): number;
     private _resetTurnState;
     private _computeTurnLatency;
     private _computeCost;
@@ -2442,6 +3331,7 @@ declare class CallMetricsAccumulator {
  * {@link OpenAIRealtimeAdapter}. Audio negotiation defaults to
  * `g711_ulaw` so traffic flows through Twilio/Telnyx without transcoding.
  */
 /**
  * Supported OpenAI Realtime wire audio formats. See
  * https://platform.openai.com/docs/guides/realtime for the full list.
@@ -2483,28 +3373,96 @@ interface OpenAIRealtimeOptions {
 }
 /** Realtime WebSocket adapter for OpenAI's `gpt-realtime` family. */
 declare class OpenAIRealtimeAdapter {
-    private readonly apiKey;
-    private readonly model;
-    private readonly voice;
-    private readonly instructions;
-    private readonly tools?;
-    private readonly audioFormat;
-    private ws;
+    protected readonly apiKey: string;
+    protected readonly model: string;
+    protected readonly voice: string;
+    protected readonly instructions: string;
+    protected readonly tools?: Array<{
+        name: string;
+        description: string;
+        parameters: Record<string, unknown>;
+        strict?: boolean;
+    }> | undefined;
+    protected readonly audioFormat: OpenAIRealtimeAudioFormat;
+    protected ws: WebSocket__default | null;
     private readonly eventCallbacks;
     private messageListenerAttached;
     private heartbeat;
     private currentResponseItemId;
     private currentResponseAudioMs;
     private currentResponseFirstAudioAt;
-    private readonly options;
+    protected readonly options: OpenAIRealtimeOptions;
     constructor(apiKey: string, model?: string, voice?: string, instructions?: string, tools?: Array<{
         name: string;
         description: string;
         parameters: Record<string, unknown>;
         strict?: boolean;
     }> | undefined, audioFormat?: OpenAIRealtimeAudioFormat, options?: OpenAIRealtimeOptions);
+    /**
+     * Build the production session.update body. Mirrors the body sent
+     * inside `connect()` so warmup can apply identical configuration to
+     * the upstream session and prime it without billing.
+     */
+    private buildSessionConfig;
+    /**
+     * Pre-call WebSocket warmup for the OpenAI Realtime endpoint.
+     *
+     * The canonical session-only warm step on the Realtime API: open the
+     * WS, wait for `session.created`, send a single `session.update`
+     * containing the same fields that the production `connect()` path
+     * applies (`input_audio_format`, `output_audio_format`, `voice`,
+     * `instructions`, `turn_detection`, `input_audio_transcription`,
+     * plus any opt-in fields populated on the adapter), wait for the
+     * matching `session.updated` ack, then close cleanly. This primes
+     * the per-session state on the OpenAI side — DNS + TLS + auth
+     * handshake + initial config exchange — without ever invoking the
+     * model.
+     *
+     * Earlier revisions sent `response.create` with
+     * `{"response": {"generate": false}}` to prime the inference path.
+     * That field is NOT in the OpenAI Realtime API schema; the server
+     * either ignores it (and bills tokens for a real model response) or
+     * rejects the request with `invalid_request_error`. Both behaviours
+     * are billing-unsafe or a no-op beyond TLS warm. The
+     * `session.update` flow is documented and side-effect-free.
+     *
+     * Billing safety: `session.update` only mutates session
+     * configuration. It does NOT invoke the model, does NOT consume any
+     * audio buffer, and does NOT trigger token generation, so no
+     * per-token cost is accrued. Best-effort: failures are logged at
+     * debug level and never raised.
+     */
+    warmup(): Promise<void>;
     /** Open the Realtime WebSocket and apply the session configuration. */
     connect(): Promise<void>;
+    /**
+     * Adopt a pre-opened, already-`session.updated` Realtime WebSocket
+     * produced by the prewarm pipeline (see `Patter.parkProviderConnections`).
+     * Skips the fresh `new WebSocket()` + `session.created` /
+     * `session.update` round-trip — saves ~250-450 ms on first turn.
+     *
+     * Caller MUST verify `ws.readyState === OPEN` before calling and MUST
+     * have already received `session.updated` on the parked socket. If
+     * the parked WS died between park and adopt, fall back to `connect()`.
+     */
+    adoptWebSocket(ws: WebSocket__default): void;
+    protected armHeartbeatAndListener(): void;
+    /**
+     * Open a fresh Realtime WS, exchange `session.created` /
+     * `session.update` / `session.updated` (so the upstream session is
+     * fully primed), and return the OPEN socket WITHOUT arming the
+     * heartbeat / message listener. Used by the prewarm pipeline to park
+     * a Realtime connection during ringing; the live consumer adopts it
+     * via {@link adoptWebSocket}.
+     *
+     * Bounded by 8 s. Throws on timeout / handshake failure — callers
+     * (the prewarm pipeline) treat any error as a cache miss and the
+     * call falls through to the cold `connect()` path.
+     *
+     * Billing safety: `session.update` does not invoke the model. No
+     * tokens are billed.
+     */
+    openParkedConnection(): Promise<WebSocket__default>;
     /** Append a base64-encoded audio chunk to the realtime input buffer. */
     sendAudio(mulawAudio: Buffer): void;
     /**
@@ -2518,7 +3476,7 @@ declare class OpenAIRealtimeAdapter {
     onEvent(callback: RealtimeEventCallback): void;
     /** Remove a previously registered {@link onEvent} callback. */
     offEvent(callback: RealtimeEventCallback): void;
-    private ensureMessageListener;
+    protected ensureMessageListener(): void;
     /** Truncate the in-flight assistant turn and cancel the active response.
      *
      * ``audio_end_ms`` MUST reflect what the caller actually heard, not what
@@ -2684,11 +3642,6 @@ declare function isRemoteUrl(onMessage: unknown): onMessage is string;
 /** Check if a URL is a WebSocket URL. */
 declare function isWebSocketUrl(url: string): boolean;
-/**
- * Embedded HTTP/WebSocket server — wires Express webhooks for the configured
- * carrier (Twilio or Telnyx) into the per-call `StreamHandler` and dashboard.
- */
 /** Resolved configuration consumed by `EmbeddedServer` (carrier credentials, webhook URL, etc.). */
 interface LocalConfig {
     twilioSid?: string;
@@ -3322,6 +4275,8 @@ interface SonioxSTTOptions$1 {
 }
 /** Streaming STT adapter for Soniox's real-time WebSocket API. */
 declare class SonioxSTT {
+    /** Stable pricing/dashboard key — read by stream-handler/metrics. */
+    static readonly providerKey = "soniox";
     private ws;
     private callbacks;
     private final;
@@ -3430,6 +4385,8 @@ interface AssemblyAISTTOptions$1 {
 declare class AssemblyAISTT {
     private readonly apiKey;
     private readonly options;
+    /** Stable pricing/dashboard key — read by stream-handler/metrics. */
+    static readonly providerKey = "assemblyai";
     private ws;
     private readonly callbacks;
     private closing;
@@ -3460,6 +4417,22 @@ declare class AssemblyAISTT {
     static forTwilio(apiKey: string, model?: AssemblyAIModel): AssemblyAISTT;
     private buildUrl;
     private buildHeaders;
+    /**
+     * Pre-call WebSocket warmup for the AssemblyAI v3 `/v3/ws` endpoint.
+     *
+     * Opens the WS (DNS + TLS + auth handshake), idles ~250 ms so the
+     * AssemblyAI edge keeps the session state warm, then sends Terminate
+     * and closes. By the time `connect()` is invoked at call-pickup the
+     * resolver and TLS session are hot — net wire time saving of
+     * 200-500 ms.
+     *
+     * Billing safety: AssemblyAI Universal Streaming bills on streamed
+     * audio seconds (per https://www.assemblyai.com/pricing). Opening +
+     * closing the WebSocket without forwarding any audio frames does
+     * not consume billable seconds. Best-effort: failures logged at
+     * debug level.
+     */
+    warmup(): Promise<void>;
     /** Open the streaming WebSocket and arm message handlers. */
     connect(): Promise<void>;
     private awaitOpen;
@@ -3500,6 +4473,7 @@ declare class AssemblyAISTT {
  * Implements a `DeepgramSTT`-shaped provider using Cartesia's streaming
  * WebSocket API. Pure `ws` transport — does NOT depend on the vendor SDK.
  */
 /** Patter-normalised transcript event emitted by {@link CartesiaSTT}. */
 interface Transcript$4 {
     readonly text: string;
@@ -3546,6 +4520,8 @@ interface CartesiaSTTOptions$1 {
 declare class CartesiaSTT {
     private readonly apiKey;
     private readonly options;
+    /** Stable pricing/dashboard key — read by stream-handler/metrics. */
+    static readonly providerKey = "cartesia_stt";
     private ws;
     private callbacks;
     private keepaliveTimer;
@@ -3555,13 +4531,65 @@ declare class CartesiaSTT {
      */
     requestId: string | null;
     constructor(apiKey: string, options?: CartesiaSTTOptions$1);
+    /**
+     * Open a fresh WebSocket without arming any message / keepalive handlers
+     * and without taking ownership on `this.ws`. Returns the OPEN socket so
+     * the caller (the prewarm pipeline) can park it for later adoption via
+     * `adoptWebSocket`. Bounded by `CONNECT_TIMEOUT_MS`.
+     *
+     * Billing safety: opening + parking the WS does not stream audio
+     * (Cartesia STT bills on streamed audio seconds), so no charge is
+     * incurred. Close the returned WS yourself if it is never adopted.
+     */
+    openParkedConnection(): Promise<WebSocket__default>;
     private buildWsUrl;
+    /**
+     * Pre-call WebSocket warmup for the Cartesia STT `/stt/websocket` endpoint.
+     *
+     * Opens the WS (DNS + TLS + auth handshake), idles ~250 ms so the
+     * Cartesia edge keeps session state warm, then closes. By the time
+     * `connect()` is invoked at call-pickup the resolver and TLS session
+     * are hot — net wire time saving of 200-500 ms.
+     *
+     * Billing safety: Cartesia STT bills on streamed audio seconds (per
+     * https://docs.cartesia.ai/2025-04-16/api-reference/stt/stt). Opening
+     * + closing the WebSocket without forwarding audio does not consume
+     * billable seconds. Best-effort: failures logged at debug level.
+     */
+    warmup(): Promise<void>;
     /** Open the streaming WebSocket and arm message + keepalive handlers. */
     connect(): Promise<void>;
+    /**
+     * Adopt a pre-opened, already-OPEN WebSocket produced by the prewarm
+     * pipeline (see `Patter.parkProviderConnections`). Skips the fresh
+     * `new WebSocket()` + handshake — the WS is already through DNS, TLS
+     * and HTTP-101 so audio frames can flow on this turn instead of
+     * paying ~150-400 ms of handshake.
+     *
+     * Caller MUST verify `ws.readyState === OPEN` before calling. If the
+     * parked WS died between park and adopt, fall back to `connect()`.
+     */
+    adoptWebSocket(ws: WebSocket__default): void;
+    private armMessageAndKeepalive;
     private handleEvent;
     private emit;
     /** Send a binary PCM16-LE audio chunk to Cartesia for transcription. */
     sendAudio(audio: Buffer): void;
+    /**
+     * Force Cartesia to finalise the in-flight utterance immediately.
+     *
+     * Sends a ``finalize`` text frame on the live WebSocket. Cartesia
+     * replies with the final transcript followed by ``flush_done``,
+     * bypassing its conservative internal silence heuristic (which can
+     * wait 2-7 s on PSTN audio before naturally finalising). Wired
+     * into ``StreamHandler`` on the VAD ``speech_end`` event so the
+     * SDK's authoritative end-of-speech detection forces an immediate
+     * STT finalisation — turning Cartesia's natural-pause endpointing
+     * into a deterministic VAD-driven one, parity with the Deepgram
+     * fast-path. No-op when the WS isn't open. Parity with Python
+     * ``CartesiaSTT.finalize``.
+     */
+    finalize(): Promise<void>;
     /** Register a transcript listener. */
     onTranscript(callback: TranscriptCallback$4): void;
     /** Remove a previously registered transcript callback. */
@@ -3624,6 +4652,8 @@ interface LMNTTTSOptions$1 {
 }
 /** LMNT TTS adapter backed by the `/v1/ai/speech/bytes` HTTP streaming endpoint. */
 declare class LMNTTTS {
+    /** Stable pricing/dashboard key — read by stream-handler/metrics. */
+    static readonly providerKey = "lmnt";
     private readonly apiKey;
     private readonly model;
     private readonly voice;
@@ -3717,6 +4747,8 @@ interface DeepgramSTTOptions$1 {
 }
 /** Streaming STT adapter for Deepgram's `/v1/listen` WebSocket API. */
 declare class DeepgramSTT {
+    /** Stable pricing/dashboard key — read by stream-handler/metrics. */
+    static readonly providerKey = "deepgram";
     private ws;
     private readonly transcriptCallbacks;
     private readonly errorCallbacks;
@@ -3749,6 +4781,22 @@ declare class DeepgramSTT {
     /** Factory for Twilio calls — mulaw 8 kHz. Forwards tuning options through. */
     static forTwilio(apiKey: string, language?: string, model?: string, options?: DeepgramSTTOptions$1): DeepgramSTT;
     private buildUrl;
+    /**
+     * Pre-call WebSocket warmup for the Deepgram `/v1/listen` endpoint.
+     *
+     * Opens the WS (full DNS + TLS + auth handshake), idles ~250 ms so the
+     * provider edge keeps the session warm in its routing table, then
+     * closes cleanly. By the time `connect()` is invoked at call-pickup
+     * the DNS resolver is hot, the TCP+TLS session is in the connection
+     * pool, and recent WS auth is still warm at Deepgram's edge — net
+     * wire time saving of 200-500 ms vs a cold WS open.
+     *
+     * Billing safety: Deepgram bills on streamed audio seconds (per
+     * https://deepgram.com/pricing). Opening + closing the WebSocket
+     * without sending any audio frames does not consume billable seconds.
+     * Best-effort: any failure is logged at debug level and never raised.
+     */
+    warmup(): Promise<void>;
     /** Open the streaming WebSocket and arm message + keepalive handlers. */
     connect(): Promise<void>;
     private openSocket;
@@ -3825,6 +4873,8 @@ type TranscriptCallback$2 = (transcript: Transcript$2) => void;
 type WhisperResponseFormat = 'json' | 'verbose_json';
 /** Buffered STT adapter for OpenAI's Whisper transcription HTTP API. */
 declare class WhisperSTT {
+    /** Stable pricing/dashboard key — read by stream-handler/metrics. */
+    static readonly providerKey: string;
     private readonly apiKey;
     private readonly model;
     private readonly language;
@@ -3913,6 +4963,8 @@ declare class STT$5 extends WhisperSTT {
 /** STT adapter restricted to OpenAI's GPT-4o Transcribe model family. */
 declare class OpenAITranscribeSTT extends WhisperSTT {
+    /** Stable pricing/dashboard key — read by stream-handler/metrics. */
+    static readonly providerKey: string;
     /**
      * @param apiKey OpenAI API key.
      * @param language ISO-639-1 language code (e.g. ``"en"``, ``"it"``). Optional.
@@ -4172,6 +5224,8 @@ interface SpeechmaticsSTTOptions$1 {
  * ```
  */
 declare class SpeechmaticsSTT {
+    /** Stable pricing/dashboard key — read by stream-handler/metrics. */
+    static readonly providerKey = "speechmatics";
     private ws;
     private readonly transcriptCallbacks;
     private readonly errorCallbacks;
@@ -4231,147 +5285,12 @@ type SpeechmaticsSTTOptions = SpeechmaticsSTTOptions$1 & {
  * ```ts
  * import * as speechmatics from "getpatter/stt/speechmatics";
  * const stt = new speechmatics.STT();              // reads SPEECHMATICS_API_KEY
- * const stt = new speechmatics.STT({ apiKey: "sm_...", language: "en" });
- * ```
- */
-declare class STT extends SpeechmaticsSTT {
-    static readonly providerKey = "speechmatics";
-    constructor(opts?: SpeechmaticsSTTOptions);
-}
-/**
- * Known stable ElevenLabs voice models (from the official ElevenLabs API
- * reference). Exposed as a typed `as const` object so callers can pass
- * `ElevenLabsModel.FLASH_V2_5` and get autocomplete / static checking; the
- * public `modelId` option also accepts an arbitrary `string` so users can
- * pass forward-compat IDs we haven't enumerated yet.
- *
- * - `V3` — newest, highest quality (slower TTFT than Flash).
- * - `FLASH_V2_5` — current default, fastest (~75 ms TTFT).
- * - `TURBO_V2_5` — balanced quality/speed.
- * - `MULTILINGUAL_V2` — best multilingual support.
- * - `MONOLINGUAL_V1` — legacy English-only.
- */
-declare const ElevenLabsModel: {
-    readonly V3: "eleven_v3";
-    readonly FLASH_V2_5: "eleven_flash_v2_5";
-    readonly TURBO_V2_5: "eleven_turbo_v2_5";
-    readonly MULTILINGUAL_V2: "eleven_multilingual_v2";
-    readonly MONOLINGUAL_V1: "eleven_monolingual_v1";
-};
-/** Union of {@link ElevenLabsModel} string values. */
-type ElevenLabsModel = (typeof ElevenLabsModel)[keyof typeof ElevenLabsModel];
-declare const ElevenLabsOutputFormat: {
-    readonly MP3_22050_32: "mp3_22050_32";
-    readonly MP3_44100_32: "mp3_44100_32";
-    readonly MP3_44100_64: "mp3_44100_64";
-    readonly MP3_44100_96: "mp3_44100_96";
-    readonly MP3_44100_128: "mp3_44100_128";
-    readonly MP3_44100_192: "mp3_44100_192";
-    readonly PCM_8000: "pcm_8000";
-    readonly PCM_16000: "pcm_16000";
-    readonly PCM_22050: "pcm_22050";
-    readonly PCM_24000: "pcm_24000";
-    readonly PCM_44100: "pcm_44100";
-    readonly ULAW_8000: "ulaw_8000";
-};
-/** Union of {@link ElevenLabsOutputFormat} string values. */
-type ElevenLabsOutputFormat = (typeof ElevenLabsOutputFormat)[keyof typeof ElevenLabsOutputFormat];
-/** ElevenLabs voice tuning knobs forwarded as `voice_settings` in the request. */
-interface ElevenLabsVoiceSettings {
-    stability?: number;
-    similarity_boost?: number;
-    style?: number;
-    use_speaker_boost?: boolean;
-}
-/** Constructor options for {@link ElevenLabsTTS}. */
-interface ElevenLabsTTSOptions$1 {
-    voiceId?: string;
-    /**
-     * ElevenLabs voice model ID. The default ``eleven_flash_v2_5`` has the
-     * lowest TTFT (~75 ms). Pass ``eleven_v3`` for highest quality, or any
-     * arbitrary string for forward-compat with future models.
-     */
-    modelId?: ElevenLabsModel | string;
-    outputFormat?: ElevenLabsOutputFormat;
-    voiceSettings?: ElevenLabsVoiceSettings;
-    languageCode?: string;
-    chunkSize?: number;
-}
-/**
- * ElevenLabs streaming TTS adapter.
- *
- * Supported `modelId` values are autocompleted via {@link ElevenLabsModel}.
- * Default is `eleven_flash_v2_5` (lowest TTFT, ~75 ms).
- *
- * **Telephony optimization** — the constructor default
- * `outputFormat='pcm_16000'` is correct for web playback, dashboard
- * previews, and 16 kHz pipelines. For real phone calls, use the
- * carrier-specific factories instead:
- *
- * - {@link ElevenLabsTTS.forTwilio} emits `ulaw_8000` natively. Twilio's
- *   media-stream WebSocket expects μ-law @ 8 kHz, so the SDK normally
- *   resamples 16 kHz → 8 kHz and PCM → μ-law before sending. Asking
- *   ElevenLabs to produce μ-law directly skips that step (saves
- *   ~30–80 ms first-byte plus per-frame CPU and avoids any resampling
- *   aliasing).
- * - {@link ElevenLabsTTS.forTelnyx} emits `pcm_16000`. Telnyx negotiates
- *   L16/16000 on its bidirectional media WebSocket, so 16 kHz PCM is
- *   already the format used end-to-end and no transcoding happens.
- *   ElevenLabs *also* supports `ulaw_8000` if your Telnyx profile is
- *   pinned to PCMU/8000 — pass `outputFormat: 'ulaw_8000'` explicitly
- *   in that case.
+ * const stt = new speechmatics.STT({ apiKey: "sm_...", language: "en" });
+ * ```
  */
-declare class ElevenLabsTTS {
-    private readonly apiKey;
-    private readonly voiceId;
-    private readonly modelId;
-    private readonly outputFormat;
-    private readonly voiceSettings;
-    private readonly languageCode;
-    private readonly chunkSize;
-    constructor(apiKey: string, voiceId?: string, modelId?: string, outputFormat?: ElevenLabsOutputFormat | string);
-    constructor(apiKey: string, options: ElevenLabsTTSOptions$1);
-    /**
-     * Construct an instance pre-configured for Twilio Media Streams.
-     *
-     * Sets `outputFormat='ulaw_8000'` so ElevenLabs emits μ-law @ 8 kHz
-     * directly — the exact wire format Twilio's media stream uses — letting
-     * the SDK skip the 16 kHz→8 kHz resample and PCM→μ-law conversion in
-     * `TwilioAudioSender`. Saves ~30–80 ms first-byte and per-frame CPU,
-     * and removes a potential aliasing source.
-     *
-     * `voiceSettings` defaults to a low-bandwidth-friendly profile
-     * (speaker boost off, modest stability) which sounds cleaner at 8 kHz
-     * μ-law than the studio default. Pass an explicit object to override.
-     */
-    static forTwilio(apiKey: string, options?: Omit<ElevenLabsTTSOptions$1, 'outputFormat'>): ElevenLabsTTS;
-    /**
-     * Construct an instance pre-configured for Telnyx bidirectional media.
-     *
-     * Telnyx's default media-streaming codec is L16 PCM @ 16 kHz, which
-     * matches our default Telnyx handler. We pick `pcm_16000` so the audio
-     * flows end-to-end with zero resampling or transcoding.
-     *
-     * Trade-off: if your Telnyx profile is pinned to PCMU/8000 (μ-law),
-     * construct `ElevenLabsTTS` directly with `outputFormat: 'ulaw_8000'`
-     * — Telnyx supports that natively too.
-     */
-    static forTelnyx(apiKey: string, options?: Omit<ElevenLabsTTSOptions$1, 'outputFormat'>): ElevenLabsTTS;
-    /**
-     * Synthesise text to speech and return the full audio as a single Buffer.
-     *
-     * For large chunks (or when latency matters) call `synthesizeStream` instead.
-     */
-    synthesize(text: string): Promise<Buffer>;
-    /**
-     * Synthesise text and yield audio chunks as they arrive (streaming).
-     *
-     * The yielded buffers are raw PCM at 16 kHz (or whatever `outputFormat` is
-     * configured to). `chunkSize` controls the maximum yield size — 512 is a
-     * good choice for low-latency telephony.
-     */
-    synthesizeStream(text: string): AsyncGenerator<Buffer>;
+declare class STT extends SpeechmaticsSTT {
+    static readonly providerKey = "speechmatics";
+    constructor(opts?: SpeechmaticsSTTOptions);
 }
 /** ElevenLabs TTS for Patter pipeline mode. */
@@ -4424,115 +5343,6 @@ declare class TTS$6 extends ElevenLabsTTS {
     static forTelnyx(apiKey: string, options?: Omit<ElevenLabsTTSOptions, "outputFormat">): TTS$6;
 }
-/**
- * WebSocket-based ElevenLabs TTS provider — opt-in low-latency variant.
- *
- * Targets the ElevenLabs streaming-input WebSocket endpoint
- * (`/v1/text-to-speech/{voice_id}/stream-input`) instead of the HTTP
- * `/stream` endpoint used by `ElevenLabsTTS`. Saves the HTTP request setup
- * time per utterance (~50 ms) and avoids the HTTP cold-start TLS handshake
- * when calls are bursty.
- *
- * API matches `ElevenLabsTTS` (`synthesizeStream(text)` returns an
- * `AsyncGenerator<Buffer>`) so it can be passed anywhere a TTSAdapter is
- * expected.
- *
- * Behaviour notes
- * - WebSocket is opened **per-utterance** (matches HTTP semantics). A
- *   future revision may pool a WS across utterances of the same call
- *   session — see roadmap Phase 5b.
- * - `auto_mode=true` is enabled by default. Pass `autoMode: false` to
- *   send a custom `chunk_length_schedule`.
- * - `outputFormat` is exposed as a query parameter so `ulaw_8000` (Twilio
- *   native) and `pcm_16000` (Telnyx native) work without resampling.
- * - `eleven_v3` is **not** supported — the WS endpoint rejects it.
- * - `optimize_streaming_latency` is officially deprecated and is not
- *   exposed.
- */
-/** Constructor options for {@link ElevenLabsWebSocketTTS}. */
-interface ElevenLabsWebSocketTTSOptions {
-    apiKey: string;
-    voiceId?: string;
-    modelId?: ElevenLabsModel | string;
-    outputFormat?: string;
-    voiceSettings?: Record<string, unknown>;
-    languageCode?: string;
-    /** Let the server pick chunk timing. Default true. */
-    autoMode?: boolean;
-    /** WS keep-alive timeout in seconds (5–180). Default 60. */
-    inactivityTimeout?: number;
-    /**
-     * Manual chunk schedule, only used when ``autoMode: false``. Each value
-     * must be 5–500. ElevenLabs default is ``[120, 160, 250, 290]``.
-     */
-    chunkLengthSchedule?: number[];
-    /** Outgoing audio re-chunk size in bytes. Default 4096. */
-    chunkSize?: number;
-}
-/** WebSocket-based ElevenLabs TTS adapter — opt-in low-latency variant. */
-declare class ElevenLabsWebSocketTTS implements TTSAdapter {
-    static readonly providerKey = "elevenlabs_ws";
-    readonly apiKey: string;
-    readonly voiceId: string;
-    readonly modelId: string;
-    readonly voiceSettings?: Record<string, unknown>;
-    readonly languageCode?: string;
-    readonly autoMode: boolean;
-    readonly inactivityTimeout: number;
-    readonly chunkLengthSchedule?: number[];
-    readonly chunkSize: number;
-    /**
-     * The wire format requested over the ElevenLabs WS. Initially set from
-     * the constructor; ``setTelephonyCarrier`` may auto-flip it to the
-     * carrier's native codec when the caller did NOT pass ``outputFormat``
-     * explicitly.
-     */
-    private _outputFormat;
-    private readonly _outputFormatExplicit;
-    /** Public read-only view of the (possibly auto-flipped) wire format. */
-    get outputFormat(): string;
-    constructor(opts: ElevenLabsWebSocketTTSOptions);
-    /**
-     * Hook called by ``StreamHandler`` to advise the carrier wire format.
-     *
-     * When the user did NOT pass an explicit ``outputFormat`` in the
-     * constructor options, this flips the format to the carrier's native
-     * wire codec — saving a client-side transcode step. Calling with an
-     * unknown carrier (``""`` / ``"custom"``) is a no-op.
-     *
-     * When ``outputFormat`` was explicitly passed (incl. via the
-     * ``forTwilio`` / ``forTelnyx`` factories), this method is a no-op —
-     * the user's choice always wins.
-     */
-    setTelephonyCarrier(carrier: string): void;
-    /** Pre-configured for Twilio Media Streams (`ulaw_8000`). */
-    static forTwilio(opts: Omit<ElevenLabsWebSocketTTSOptions, 'outputFormat'>): ElevenLabsWebSocketTTS;
-    /** Pre-configured for Telnyx (`pcm_16000`). */
-    static forTelnyx(opts: Omit<ElevenLabsWebSocketTTSOptions, 'outputFormat'>): ElevenLabsWebSocketTTS;
-    private buildUrl;
-    /**
-     * Single-shot synthesis: open WS, send text, yield bytes, close.
-     *
-     * Resilience contract:
-     * - Connection bounded by ``CONNECT_TIMEOUT_MS`` (5s, was 15s).
-     * - Each idle wait bounded by ``FRAME_TIMEOUT_MS`` (30s) so a stalled
-     *   server cannot keep the generator alive indefinitely.
-     * - Permanent error handler attached BEFORE the open await — prevents
-     *   ``uncaughtException`` if an error fires after the once-listener
-     *   resolves.
-     * - All event listeners removed in ``finally`` (no closure leak past
-     *   socket close).
-     * - Server-reported ``error`` raises ``ElevenLabsTTSError``.
-     * - Per-frame audio payload capped at ``MAX_AUDIO_B64_BYTES``.
-     * - Best-effort EOS ``{"text":""}`` sent in finally (not immediately
-     *   after flush — auto_mode could otherwise truncate the tail audio).
-     */
-    synthesizeStream(text: string): AsyncGenerator<Buffer>;
-    /** No-op — connections are per-utterance and torn down inside synthesizeStream. */
-    close(): Promise<void>;
-}
 /** ElevenLabs WebSocket TTS for Patter pipeline mode (opt-in low-latency). */
 /** Constructor options for the ElevenLabs WebSocket `TTS` adapter. */
@@ -4595,6 +5405,8 @@ declare class OpenAITTS {
     private readonly speed;
     private readonly antiAlias;
     private readonly targetSampleRate;
+    /** Stable pricing/dashboard key — read by stream-handler/metrics. */
+    static readonly providerKey = "openai_tts";
     constructor(apiKey: string, voice?: string, model?: string, instructions?: string | null, speed?: number | null, antiAlias?: boolean, targetSampleRate?: number);
     /**
      * Synthesise text to speech and return the full audio as a single Buffer.
@@ -4736,6 +5548,8 @@ interface CartesiaTTSOptions$1 {
 }
 /** Cartesia TTS provider backed by the HTTP `/tts/bytes` streaming endpoint. */
 declare class CartesiaTTS {
+    /** Stable pricing/dashboard key — read by stream-handler/metrics. */
+    static readonly providerKey = "cartesia_tts";
     private readonly apiKey;
     private readonly model;
     private readonly voice;
@@ -4768,6 +5582,25 @@ declare class CartesiaTTS {
     static forTelnyx(apiKey: string, options?: Omit<CartesiaTTSOptions$1, 'sampleRate'>): CartesiaTTS;
     /** Build the JSON payload for the Cartesia bytes endpoint. */
     private buildPayload;
+    /**
+     * Pre-call HTTP warmup for the Cartesia `/tts/bytes` endpoint.
+     *
+     * Issues a lightweight `GET <baseUrl>/voices` so DNS, TLS, and HTTP/2
+     * are already up by the time the first `synthesizeStream()` POST
+     * lands. Best-effort: 5 s timeout, all exceptions swallowed at
+     * debug level.
+     *
+     * Billing safety: `GET /voices` is a free metadata read on
+     * Cartesia's REST surface (per https://docs.cartesia.ai). It does
+     * not consume synthesis credits. The actual synthesis is billed
+     * only when `POST /tts/bytes` runs with a non-empty `transcript`.
+     *
+     * Note: Cartesia TTS uses the HTTP path (vs the WebSocket variant
+     * Cartesia also exposes) — connection warmup is therefore HTTP-GET
+     * based, not WebSocket pre-handshake. The latency win is smaller
+     * (~50-150 ms vs the ~200-500 ms of a WS prewarm) but still real.
+     */
+    warmup(): Promise<void>;
     /** Synthesize text and return the concatenated audio buffer. */
     synthesize(text: string): Promise<Buffer>;
     /**
@@ -4843,6 +5676,8 @@ interface RimeTTSOptions$1 {
 }
 /** Rime TTS adapter for the `users.rime.ai/v1/rime-tts` HTTP streaming endpoint. */
 declare class RimeTTS {
+    /** Stable pricing/dashboard key — read by stream-handler/metrics. */
+    static readonly providerKey = "rime";
     private readonly apiKey;
     private readonly model;
     private readonly speaker;
@@ -5001,6 +5836,8 @@ interface InworldTTSOptions$1 {
  * before calling the constructor.
  */
 declare class InworldTTS {
+    /** Stable pricing/dashboard key — read by stream-handler/metrics. */
+    static readonly providerKey = "inworld";
     private readonly authToken;
     private readonly model;
     private readonly voice;
@@ -5014,6 +5851,33 @@ declare class InworldTTS {
     private readonly baseUrl;
     constructor(authToken: string, opts?: InworldTTSOptions$1);
     private buildPayload;
+    /**
+     * Pre-call HTTP warmup for the Inworld TTS API.
+     *
+     * Issues a lightweight `GET /tts/v1/voices` against the API host so
+     * DNS + TLS + HTTP/2 connection are already up by the time the first
+     * `synthesizeStream()` POST lands. Best-effort: 5 s timeout, all
+     * exceptions swallowed at debug level.
+     *
+     * Earlier revisions issued `HEAD` against the streaming endpoint
+     * (`/tts/v1/voice:stream`). That endpoint is POST-only so HEAD
+     * returns `405 Method Not Allowed` — the warmup still completed the
+     * TLS handshake but spammed 405 errors into Inworld's audit logs and
+     * into our own logs. Switching to a documented `GET /tts/v1/voices`
+     * metadata read is a 2xx-clean equivalent.
+     *
+     * Billing safety: `GET /tts/v1/voices` is a free metadata endpoint
+     * (per https://docs.inworld.ai/). It returns the voice catalogue
+     * without invoking the synthesis pipeline. The actual synthesis is
+     * billed only when `POST /tts/v1/voice:stream` runs with a non-empty
+     * `text`.
+     *
+     * Note: Inworld TTS uses the HTTP NDJSON streaming path rather than
+     * a persistent WebSocket — connection warmup is therefore HTTP-based,
+     * not WebSocket pre-handshake. The latency win is smaller (~50-150 ms)
+     * than the WS-based prewarms but still real on cold-start calls.
+     */
+    warmup(): Promise<void>;
     /** Synthesize text and return the concatenated audio buffer. */
     synthesize(text: string): Promise<Buffer>;
     /**
@@ -5143,6 +6007,8 @@ interface AnthropicLLMOptions$1 {
 }
 /** LLM provider backed by Anthropic's Messages API (streaming). */
 declare class AnthropicLLMProvider implements LLMProvider {
+    /** Stable pricing/dashboard key — read by stream-handler/metrics. */
+    static readonly providerKey = "anthropic";
     private readonly apiKey;
     private readonly model;
     private readonly maxTokens;
@@ -5151,6 +6017,13 @@ declare class AnthropicLLMProvider implements LLMProvider {
     private readonly anthropicVersion;
     private readonly promptCaching;
     constructor(options: AnthropicLLMOptions$1);
+    /**
+     * Pre-call DNS / TLS warmup for the Anthropic Messages API.
+     * Issues a lightweight ``GET https://api.anthropic.com/v1/models`` so
+     * DNS, TLS and HTTP/2 are already up by the time the first ``messages``
+     * call lands. Best-effort: 5 s timeout, exceptions swallowed at debug.
+     */
+    warmup(): Promise<void>;
     /** Stream Patter-format LLM chunks for the given OpenAI-style chat history. */
     stream(messages: Array<Record<string, unknown>>, tools?: Array<Record<string, unknown>> | null, opts?: LLMStreamOptions): AsyncGenerator<LLMChunk, void, unknown>;
 }
@@ -5238,6 +6111,8 @@ interface GroqLLMOptions$1 {
 }
 /** LLM provider backed by Groq's OpenAI-compatible Chat Completions API. */
 declare class GroqLLMProvider implements LLMProvider {
+    /** Stable pricing/dashboard key — read by stream-handler/metrics. */
+    static readonly providerKey = "groq";
     private readonly apiKey;
     readonly model: string;
     private readonly baseUrl;
@@ -5252,6 +6127,11 @@ declare class GroqLLMProvider implements LLMProvider {
     private readonly presencePenalty?;
     private readonly stop?;
     constructor(options: GroqLLMOptions$1);
+    /**
+     * Pre-call DNS / TLS warmup for the Groq inference endpoint.
+     * Best-effort: 5 s timeout, all exceptions swallowed at debug level.
+     */
+    warmup(): Promise<void>;
     /** Stream Patter-format LLM chunks from the Groq chat completions API. */
     stream(messages: Array<Record<string, unknown>>, tools?: Array<Record<string, unknown>> | null, opts?: LLMStreamOptions): AsyncGenerator<LLMChunk, void, unknown>;
 }
@@ -5371,6 +6251,8 @@ interface CerebrasLLMOptions$1 {
  *     - zai-glm-4.7
  */
 declare class CerebrasLLMProvider implements LLMProvider {
+    /** Stable pricing/dashboard key — read by stream-handler/metrics. */
+    static readonly providerKey = "cerebras";
     private readonly apiKey;
     readonly model: string;
     private readonly baseUrl;
@@ -5386,6 +6268,11 @@ declare class CerebrasLLMProvider implements LLMProvider {
     private readonly presencePenalty?;
     private readonly stop?;
     constructor(options: CerebrasLLMOptions$1);
+    /**
+     * Pre-call DNS / TLS warmup for the Cerebras inference endpoint.
+     * Best-effort: 5 s timeout, all exceptions swallowed at debug level.
+     */
+    warmup(): Promise<void>;
     /** Stream Patter-format LLM chunks from the Cerebras chat completions API. */
     stream(messages: Array<Record<string, unknown>>, tools?: Array<Record<string, unknown>> | null, opts?: LLMStreamOptions): AsyncGenerator<LLMChunk, void, unknown>;
 }
@@ -5468,12 +6355,22 @@ interface GoogleLLMOptions$1 {
 }
 /** LLM provider backed by Google Gemini (Developer API, streaming SSE). */
 declare class GoogleLLMProvider implements LLMProvider {
+    /** Stable pricing/dashboard key — read by stream-handler/metrics. */
+    static readonly providerKey = "google";
     private readonly apiKey;
     readonly model: string;
     private readonly baseUrl;
     private readonly temperature?;
     private readonly maxOutputTokens?;
     constructor(options: GoogleLLMOptions$1);
+    /**
+     * Pre-call DNS / TLS warmup for the Gemini API.
+     * Issues a lightweight ``GET ${baseUrl}/models?key=...`` so DNS, TLS
+     * and HTTP/2 are already up by the time the first
+     * ``streamGenerateContent`` call lands. Best-effort: 5 s timeout, all
+     * exceptions swallowed at debug level.
+     */
+    warmup(): Promise<void>;
     /** Stream Patter-format LLM chunks from the Gemini SSE endpoint. */
     stream(messages: Array<Record<string, unknown>>, tools?: Array<Record<string, unknown>> | null, opts?: LLMStreamOptions): AsyncGenerator<LLMChunk, void, unknown>;
 }
@@ -5597,7 +6494,10 @@ declare class SileroVAD implements VADProvider {
      *   - `activationThreshold = 0.5` — upstream `threshold`
      *   - `deactivationThreshold = 0.35` — upstream `neg_threshold = threshold - 0.15`
      *   - `minSpeechDuration = 0.25` — upstream `min_speech_duration_ms = 250`
-     *   - `minSilenceDuration = 0.1` — upstream `min_silence_duration_ms = 100`
+     *   - `minSilenceDuration = 0.4` — telephony default (was 0.1, bumped after
+     *     round 10f found speech_end firing on inter-sentence pauses < 250 ms,
+     *     causing double-talk dispatch). 400 ms matches the industry telephony
+     *     default and the inter_utterance_gap_ms debounce in stream-handler.ts.
      *   - `prefixPaddingDuration = 0.03` — upstream `speech_pad_ms = 30`
      *
      * Override any field by passing `options`. Deployments that experience
@@ -5639,6 +6539,263 @@ declare class SileroVAD implements VADProvider {
     private advanceState;
     /** Mark the VAD as closed; subsequent processFrame calls throw. */
     close(): Promise<void>;
+    /**
+     * Reset all per-utterance state so the next ``processFrame`` starts from
+     * a clean SILENCE state.
+     *
+     * Called by the stream handler between agent turns to prevent a "stuck
+     * SPEECH" condition where PSTN echo / loopback kept the detector's
+     * probability above ``deactivationThreshold`` for the entire agent turn.
+     * Without this reset the next user utterance would never trigger a
+     * SILENCE→SPEECH transition and barge-in would feel "one-shot" (works
+     * once, then never again until the call ends).
+     *
+     * Safe to call any time including on a closed instance (no-op).
+     */
+    reset(): void;
+}
+/** Options accepted by {@link DeepFilterNetFilter}. */
+interface DeepFilterNetOptions {
+    /** Absolute path to a DeepFilterNet ONNX model.  If omitted, the filter
+     *  logs a warning and becomes a pass-through. */
+    modelPath?: string;
+    /** When true, disable the pass-through warning (used by tests). */
+    silenceWarnings?: boolean;
+}
+/** OSS noise-suppression filter backed by a DeepFilterNet ONNX model. */
+declare class DeepFilterNetFilter implements AudioFilter {
+    private readonly modelPath;
+    private readonly silenceWarnings;
+    private session;
+    private ort;
+    private warned;
+    private closed;
+    private _resamplerSrcRate;
+    private _upsamplerInst;
+    private _downsamplerInst;
+    constructor(options?: DeepFilterNetOptions);
+    private ensureSession;
+    /** Run noise suppression on a PCM16 chunk; pass-through when no model is loaded. */
+    process(pcmChunk: Buffer, sampleRate: number): Promise<Buffer>;
+    /** Flush resamplers, release the ONNX session, and mark the filter closed. */
+    close(): Promise<void>;
+}
+/**
+ * Krisp VIVA noise-reduction AudioFilter — TypeScript scaffold.
+ *
+ * Mirrors the API of the Python `getpatter.providers.krisp_filter.KrispVivaFilter`
+ * for SDK parity. As of 2026-05 Krisp does not publish an official Node.js
+ * (server) SDK; third-party browser/RN wrappers exist but cannot process
+ * server-received PCM/mulaw audio. This class throws at construction time
+ * and points the caller at the available paths (Python SDK or DeepFilterNet
+ * on TS).
+ *
+ * When Krisp publishes an official Node binding — or a community NAPI/WASM
+ * wrapper becomes available — the import below and `process()` body will
+ * fill in. The class signature is intentionally compatible with the Python
+ * one so callers do not need to migrate code: `camelCase` ↔ `snake_case`,
+ * `modelPath` ↔ `model_path`, etc.
+ *
+ * Krisp VIVA is a proprietary SDK and requires a commercial license plus a
+ * `.kef` model file provided by the user. Patter ships only the
+ * AudioFilter interface scaffold — never the SDK or model.
+ *
+ * @see https://krisp.ai/developers/
+ */
+/** Krisp-supported sample rates (parity with Python `KrispSampleRate`). */
+declare const KrispSampleRate: {
+    readonly HZ_8000: 8000;
+    readonly HZ_16000: 16000;
+    readonly HZ_32000: 32000;
+    readonly HZ_44100: 44100;
+    readonly HZ_48000: 48000;
+};
+type KrispSampleRate = (typeof KrispSampleRate)[keyof typeof KrispSampleRate];
+/** Krisp-supported frame durations in ms (parity with Python `KrispFrameDuration`). */
+declare const KrispFrameDuration: {
+    readonly MS_10: 10;
+    readonly MS_15: 15;
+    readonly MS_20: 20;
+    readonly MS_30: 30;
+    readonly MS_32: 32;
+};
+type KrispFrameDuration = (typeof KrispFrameDuration)[keyof typeof KrispFrameDuration];
+/** Options accepted by {@link KrispVivaFilter}. */
+interface KrispVivaFilterOptions {
+    /**
+     * Path to the Krisp `.kef` model file. If omitted, falls back to the
+     * `KRISP_VIVA_FILTER_MODEL_PATH` environment variable.
+     */
+    readonly modelPath?: string;
+    /** Noise-suppression strength in `[0, 100]`. Defaults to `100`. */
+    readonly noiseSuppressionLevel?: number;
+    /** Frame duration in ms. One of `10, 15, 20, 30, 32`. Defaults to `10`. */
+    readonly frameDurationMs?: KrispFrameDuration | number;
+    /** Initial sample rate in Hz. Defaults to `16000`. Re-created lazily if it changes mid-call. */
+    readonly sampleRate?: KrispSampleRate | number;
+}
+/**
+ * Krisp VIVA noise-reduction filter — TypeScript scaffold (NOT YET IMPLEMENTED).
+ *
+ * Construction throws with a guidance message because Krisp does not ship a
+ * Node.js SDK. The class exists for API parity with the Python
+ * `KrispVivaFilter` so that user code does not need to be rewritten when a
+ * Node binding lands.
+ *
+ * For TS users today, use {@link DeepFilterNetFilter} from
+ * `./deepfilternet-filter` instead — same `AudioFilter` interface, no
+ * license required.
+ *
+ * @example
+ * ```ts
+ * // FUTURE — when Krisp publishes a Node SDK:
+ * import { KrispVivaFilter } from 'getpatter/providers/krisp-filter';
+ * const filter = new KrispVivaFilter({ modelPath: '/path/to/model.kef' });
+ * const agent = phone.agent({ audioFilter: filter, ... });
+ * ```
+ */
+declare class KrispVivaFilter implements AudioFilter {
+    static readonly providerKey = "krisp_viva";
+    constructor(_options?: KrispVivaFilterOptions);
+    process(pcmChunk: Buffer, _sampleRate: number): Promise<Buffer>;
+    close(): Promise<void>;
+}
+/**
+ * OpenAI Realtime adapter for the GA Realtime API (`gpt-realtime-2`).
+ *
+ * `gpt-realtime-2` is served from the same `wss://api.openai.com/v1/realtime`
+ * endpoint as the v1-beta family, but the GA endpoint:
+ *   - REJECTS the legacy `OpenAI-Beta: realtime=v1` header (returns
+ *     `invalid_model` with message "Model X is only available on the GA API").
+ *   - REQUIRES `session.type === "realtime"` at the root of `session.update`.
+ *   - Uses `output_modalities` (was `modalities`).
+ *   - Nests audio config under `audio.{input,output}` with MIME `type`
+ *     strings (`audio/pcmu`, `audio/pcma`, `audio/pcm`) instead of the v1
+ *     enum strings (`g711_ulaw`, `g711_alaw`, `pcm16`) and moves `voice`
+ *     under `audio.output.voice`, `transcription` + `turn_detection`
+ *     under `audio.input`.
+ *
+ * Everything ELSE (event names, audio delta dispatch, barge-in / truncate
+ * semantics, heartbeat, tool calling) is API-compatible with the v1 family,
+ * so this adapter subclasses {@link OpenAIRealtimeAdapter} and overrides
+ * only `connect()`. The runtime behaviour (`sendAudio`, `cancelResponse`,
+ * `sendText`, `sendFirstMessage`, …) is inherited unchanged.
+ */
+/**
+ * Realtime WebSocket adapter speaking OpenAI's GA Realtime API.
+ *
+ * Note on audio transport: the GA endpoint accepts only PCM-16-LE with
+ * `rate >= 24000` for both `session.audio.input.format` and
+ * `session.audio.output.format`. The `audio/pcmu` MIME type appears to be
+ * accepted at the protocol level but the server's audio engine does not
+ * actually decode mulaw 8 kHz frames — they're silently dropped, the input
+ * buffer stays empty, `input_audio_buffer.commit` returns
+ * "buffer only has 0.00ms of audio", and the call ends up muted. Until
+ * OpenAI documents native g711_ulaw on the GA endpoint we transcode on
+ * both directions on the Patter side:
+ *  - inbound (Twilio/Telnyx → model): mulaw 8 kHz → PCM 24 kHz
+ *  - outbound (model → Twilio/Telnyx): PCM 24 kHz → mulaw 8 kHz
+ *
+ * The outbound path needs a stateful resampler instance because the
+ * 24 kHz → 8 kHz decimator carries phase between chunks; sharing a single
+ * instance across the call eliminates the boundary clicks a stateless
+ * helper would produce on every audio delta.
+ */
+declare class OpenAIRealtime2Adapter extends OpenAIRealtimeAdapter {
+    /** Two-stage outbound resampler for 24 kHz → 8 kHz. Created lazily on
+     *  the first audio frame so each Realtime session has its own state.
+     *
+     *  We chain `24k → 16k → 8k` instead of using the direct `24k → 8k`
+     *  variant of {@link StatefulResampler}: the direct path is a 3:1
+     *  decimation with linear interpolation only — no anti-alias filter
+     *  — so any energy above 4 kHz in the source aliases down into the
+     *  audible band and is heard as raspy/scratchy artefacts on speech.
+     *  `gpt-realtime-2` outputs voice with significant content above
+     *  4 kHz. The second stage (16k → 8k) uses a 5-tap FIR anti-alias
+     *  filter which removes the offending band before decimation, and
+     *  empirically (see commit message) the chain produces audibly
+     *  cleaner output. The 24k → 16k step is still pure linear-interp
+     *  but the inputs to it stay below the Nyquist of the 16 kHz stage,
+     *  so it doesn't introduce new artefacts.
+     */
+    private outboundResampler24To16;
+    private outboundResampler16To8;
+    /** Last 8 kHz input sample carried across chunk boundaries for the
+     *  direct 3× linear upsample (see `transcodeInboundMulaw8ToPcm24`).
+     *  The carry guarantees the very first output of each chunk
+     *  interpolates from the *real* preceding sample, not from the chunk's
+     *  own first sample replicated — without it every 20 ms Twilio frame
+     *  boundary becomes a small DC step that the GA server VAD interprets
+     *  as constant low-energy noise, which never crosses the speech
+     *  threshold. */
+    private inbound8kCarry;
+    /** GA-shape `session.update` payload. See module-level docstring. */
+    private buildGASessionConfig;
+    /**
+     * Open the Realtime WebSocket against the GA endpoint and apply the GA
+     * session configuration. Header `OpenAI-Beta: realtime=v1` is OMITTED
+     * (the GA endpoint rejects it). Wire shape uses nested `audio.{input,
+     * output}` + `output_modalities` + `session.type === "realtime"`.
+     */
+    connect(): Promise<void>;
+    /**
+     * GA-API variant of {@link OpenAIRealtimeAdapter.sendFirstMessage}. Two
+     * differences from the v1 path:
+     *
+     * 1. The v1 implementation sends `response.modalities` which the GA
+     *    endpoint rejects with `Unknown parameter: 'response.modalities'`.
+     *    Use `output_modalities` to match the GA `session.update` shape.
+     *
+     * 2. The GA `response.create` does NOT inherit `audio.output.voice`
+     *    from the session — it falls back to the server-side default
+     *    (`marin`, female) when the field is omitted on the response
+     *    itself. Session-level `voice: "alloy"` only affects subsequent
+     *    server-VAD-triggered responses, NOT this explicit
+     *    `response.create`. We re-inject the configured voice here so the
+     *    first-message voice matches the rest of the call.
+     */
+    /**
+     * Override the parent `sendAudio` to transcode inbound carrier audio
+     * (mulaw 8 kHz from Twilio/Telnyx) into PCM-16 24 kHz before sending
+     * `input_audio_buffer.append`. The GA server's audio engine ignores
+     * mulaw frames (commit returns "buffer only has 0.00ms of audio") even
+     * though it accepts `audio/pcmu` at the protocol level.
+     */
+    sendAudio(mulawAudio: Buffer): void;
+    /**
+     * mulaw 8 kHz Buffer → PCM-16-LE 24 kHz Buffer.
+     *
+     * Direct 3× linear-interpolation upsample with a one-sample carry
+     * across chunk boundaries. For every consecutive pair of 8 kHz
+     * samples `(s_a, s_b)` we emit three 24 kHz samples:
+     *
+     *     out_0 = s_a
+     *     out_1 = 2/3·s_a + 1/3·s_b
+     *     out_2 = 1/3·s_a + 2/3·s_b
+     *
+     * The carry stores the last 8 kHz sample of the chunk so the next
+     * chunk can start by pairing `(carry, firstNewSample)` — that's what
+     * keeps the output rate exact (each input sample → 3 output samples)
+     * and eliminates the chunk-boundary DC step that confused the GA
+     * server VAD. The first chunk has no carry and loses 3 samples at
+     * the leading edge (375 µs of audio); that's well below any audible
+     * artefact and well below the GA VAD's 300 ms prefix-padding window.
+     */
+    private transcodeInboundMulaw8ToPcm24;
+    /**
+     * Base64 PCM-16-LE 24 kHz → Base64 mulaw 8 kHz. Used by the WS
+     * translation shim on each `response.output_audio.delta`. The stateful
+     * resampler is created lazily and reused across all deltas in this
+     * session so the 3:1 decimator's phase carries across chunk
+     * boundaries — without that, every chunk boundary produces a click.
+     */
+    private transcodeOutboundPcm24ToMulaw8Buffer;
+    sendFirstMessage(text: string): Promise<void>;
 }
 /**
@@ -6379,6 +7536,8 @@ declare class TelnyxSTT {
     private readonly transcriptionEngine;
     private readonly sampleRate;
     private readonly baseUrl;
+    /** Stable pricing/dashboard key — read by stream-handler/metrics. */
+    static readonly providerKey = "telnyx_stt";
     private ws;
     private callbacks;
     private headerSent;
@@ -6425,6 +7584,8 @@ declare class TelnyxTTS {
     private readonly apiKey;
     private readonly voice;
     private readonly baseUrl;
+    /** Stable pricing/dashboard key — read by stream-handler/metrics. */
+    static readonly providerKey = "telnyx_tts";
     constructor(apiKey: string, voice?: string, baseUrl?: string);
     /** Collect every audio chunk into a single Buffer. */
     synthesize(text: string): Promise<Buffer>;
@@ -6504,4 +7665,4 @@ interface CallEvent {
     readonly direction?: string;
 }
-export { type AgentOptions, type AgentState, AllProvidersFailedError, type AnthropicConversion, LLM$3 as AnthropicLLM, type AnthropicLLMOptions, type AnthropicMessage, AssemblyAIEncoding, AssemblyAIModel, STT$1 as AssemblyAISTT, type AssemblyAISTTOptions, type AudioConfig, type AudioSource, AuthenticationError, type BackgroundAudioOptions, BackgroundAudioPlayer, BuiltinAudioClip, type BuiltinAudioClipName, type BuiltinPcmSource, type CallControl, type CallEvent, type CallEventHandler, type CallMetrics, CallMetricsAccumulator, type CallRecord, type CartesiaEncoding, STT$3 as CartesiaSTT, type CartesiaSTTOptions, TTS$3 as CartesiaTTS, type CartesiaTTSOptions, LLM$1 as CerebrasLLM, type CerebrasLLMOptions, ChatContext, type ChatMessage, type ChatRole, CloudflareTunnel, type ConversationStateSnapshot, type CostBreakdown, DEFAULT_MIN_SENTENCE_LEN, DEFAULT_PRICING, DTMF_EVENTS, STT$6 as DeepgramSTT, type DeepgramSTTOptions, DefaultToolExecutor, type DefaultToolExecutorOptions, type DefineToolInput, type DtmfEvent, ConvAI as ElevenLabsConvAI, ElevenLabsConvAIAdapter, type ConvAIOptions as ElevenLabsConvAIOptions, TTS$6 as ElevenLabsTTS, type ElevenLabsTTSOptions, type ElevenLabsWebSocketOptions, TTS$5 as ElevenLabsWebSocketTTS, type EouTrigger, ErrorCode, EventBus, FallbackLLMProvider, type FallbackLLMProviderOptions, type FilePcmSource, GEMINI_DEFAULT_INPUT_SR, GEMINI_DEFAULT_OUTPUT_SR, GeminiLiveAdapter, type GeminiLiveEventHandler, LLM as GoogleLLM, type GoogleLLMOptions, LLM$2 as GroqLLM, type GroqLLMOptions, Guardrail$1 as Guardrail, type GuardrailOptions, type HookContext, IVRActivity, type IVRActivityOptions, type IVRToolDefinition, type IncomingMessage, type InitTracingOptions, TTS as InworldTTS, type InworldTTSOptions, type JobCallback, type LLMChunk, LLMLoop, type LLMProvider, LMNTAudioFormat, LMNTModel, LMNTSampleRate, TTS$1 as LMNTTTS, type LMNTTTSOptions, type LatencyBreakdown, type LocalCallOptions, type LocalConfig, type LocalOptions, type Logger, type LoopCallback, type MessageHandler, MetricsStore, Ngrok, LLM$4 as OpenAILLM, type OpenAILLMOptions, OpenAILLMProvider, type OpenAIMessage, Realtime as OpenAIRealtime, OpenAIRealtimeAdapter, type RealtimeOptions as OpenAIRealtimeOptions, TTS$4 as OpenAITTS, type OpenAITTSOptions, STT$4 as OpenAITranscribeSTT, type OpenAITranscribeSTTOptions, type ParamSpec, PartialStreamError, Patter, PatterConnectionError, PatterError, type PatterEventType, PatterTool, type PatterToolExecuteArgs, type PatterToolOptions, type PatterToolResult, PcmCarry, PipelineHookExecutor, type PipelineHooks, type PipelineMessageHandler, type ProviderPricing, ProvisionError, RateLimitError, type RawPcmSource, type RealtimeConfig, RemoteMessageHandler, TTS$2 as RimeTTS, type RimeTTSOptions, SPAN_BARGEIN, SPAN_CALL, SPAN_ENDPOINT, SPAN_LLM, SPAN_STT, SPAN_TOOL, SPAN_TTS, type SSEEvent, type STTConfig, type ScheduleHandle, SentenceChunker, type ServeOptions, type SilenceCallback, type SileroSampleRate, SileroVAD, type SileroVADOptions, STT$2 as SonioxSTT, type SonioxSTTOptions$1 as SonioxSTTOptions, type Span, type SpeechEventCallback, SpeechEvents, SpeechmaticsAudioEncoding, SpeechmaticsOperatingPoint, STT as SpeechmaticsSTT, type SpeechmaticsSTTOptions, SpeechmaticsSampleRate, SpeechmaticsServerMessage, TurnDetectionMode as SpeechmaticsTurnDetectionMode, StatefulResampler, type StatefulResamplerOptions, Static as StaticTunnel, type TTSConfig, Carrier as Telnyx, TelnyxAdapter, type TelnyxCarrierOptions, type ConfigureNumberOptions as TelnyxConfigureNumberOptions, type EndCallOptions as TelnyxEndCallOptions, type InitiateCallOptions as TelnyxInitiateCallOptions, type InitiateCallResult as TelnyxInitiateCallResult, type ProvisionNumberOptions as TelnyxProvisionNumberOptions, type ProvisionNumberResult as TelnyxProvisionNumberResult, TelnyxSTT, TelnyxSTTInputFormat, TelnyxSTTSampleRate, type Transcript as TelnyxSTTTranscript, TelnyxTTS, TelnyxTTSSampleRate, TelnyxTTSVoice, type TelnyxTranscriptionEngine, TestSession, TfidfLoopDetector, type TfidfLoopDetectorOptions, Tool, type ToolDefinition, type ToolExecutor, type ToolHandler, type ToolOptions, type TunnelHandle, type TurnMetrics, Carrier$1 as Twilio, TwilioAdapter, type TwilioAdapterOptions, type TwilioCarrierOptions, type ConfigureNumberOptions$1 as TwilioConfigureNumberOptions, type InitiateCallOptions$1 as TwilioInitiateCallOptions, type InitiateCallResult$1 as TwilioInitiateCallResult, type ProvisionNumberOptions$1 as TwilioProvisionNumberOptions, type ProvisionNumberResult$1 as TwilioProvisionNumberResult, ULTRAVOX_DEFAULT_API_BASE, ULTRAVOX_DEFAULT_SR, type UltravoxEventHandler, UltravoxRealtimeAdapter, type UserState, STT$5 as WhisperSTT, type WhisperSTTOptions, assemblyai, builtinClipPath, calculateRealtimeCost, calculateSttCost, calculateTelephonyCost, calculateTtsCost, callsToCsv, callsToJson, cartesia, createResampler16kTo8k, createResampler24kTo16k, createResampler24kTo8k, createResampler8kTo16k, deepgram, defineTool, elevenlabs, filterEmoji, filterForTTS, filterMarkdown, formatDtmf, geminiLive, getLogger, guardrail, initTracing, isRemoteUrl, isTracingEnabled, isWebSocketUrl, lmnt, makeAuthMiddleware, mergePricing, mixPcm, mountApi, mountDashboard, mulawToPcm16, notifyDashboard, openaiTts, pcm16ToMulaw, resample16kTo8k, resample24kTo16k, resample8kTo16k, resamplePcm, rime, scheduleCron, scheduleInterval, scheduleOnce, selectSoundFromList, setLogger, soniox, speechmatics, startSpan, startTunnel, tool, ultravox, whisper };
+export { type AgentOptions, type AgentState, AllProvidersFailedError, type AnthropicConversion, LLM$3 as AnthropicLLM, type AnthropicLLMOptions, type AnthropicMessage, AssemblyAIEncoding, AssemblyAIModel, STT$1 as AssemblyAISTT, type AssemblyAISTTOptions, type AudioConfig, type AudioSource, AuthenticationError, type BackgroundAudioOptions, BackgroundAudioPlayer, type EvaluateContext as BargeInEvaluateContext, type BargeInStrategy, BuiltinAudioClip, type BuiltinAudioClipName, type BuiltinPcmSource, type CallControl, type CallEvent, type CallEventHandler, type CallMetrics, CallMetricsAccumulator, type CallRecord, type CartesiaEncoding, STT$3 as CartesiaSTT, type CartesiaSTTOptions, TTS$3 as CartesiaTTS, type CartesiaTTSOptions, LLM$1 as CerebrasLLM, type CerebrasLLMOptions, ChatContext, type ChatMessage, type ChatRole, CloudflareTunnel, type ConversationStateSnapshot, type CostBreakdown, DEFAULT_MIN_SENTENCE_LEN, DEFAULT_PRICING, DTMF_EVENTS, DeepFilterNetFilter, type DeepFilterNetOptions, STT$6 as DeepgramSTT, type DeepgramSTTOptions, DefaultToolExecutor, type DefaultToolExecutorOptions, type DefineToolInput, type DtmfEvent, ConvAI as ElevenLabsConvAI, ElevenLabsConvAIAdapter, type ConvAIOptions as ElevenLabsConvAIOptions, ElevenLabsTTS as ElevenLabsRestTTS, TTS$6 as ElevenLabsTTS, type ElevenLabsTTSOptions, type ElevenLabsWebSocketOptions, TTS$5 as ElevenLabsWebSocketTTS, type EouTrigger, ErrorCode, EventBus, FallbackLLMProvider, type FallbackLLMProviderOptions, type FilePcmSource, GEMINI_DEFAULT_INPUT_SR, GEMINI_DEFAULT_OUTPUT_SR, GeminiLiveAdapter, type GeminiLiveEventHandler, LLM as GoogleLLM, type GoogleLLMOptions, LLM$2 as GroqLLM, type GroqLLMOptions, Guardrail$1 as Guardrail, type GuardrailOptions, type HookContext, IVRActivity, type IVRActivityOptions, type IVRToolDefinition, type IncomingMessage, type InitTracingOptions, TTS as InworldTTS, type InworldTTSOptions, type JobCallback, KrispFrameDuration, KrispSampleRate, KrispVivaFilter, type KrispVivaFilterOptions, type LLMChunk, LLMLoop, type LLMProvider, LMNTAudioFormat, LMNTModel, LMNTSampleRate, TTS$1 as LMNTTTS, type LMNTTTSOptions, type LatencyBreakdown, type LocalCallOptions, type LocalConfig, type LocalOptions, type Logger, type LoopCallback, type MessageHandler, MetricsStore, MinWordsStrategy, type MinWordsStrategyOptions, Ngrok, LLM$4 as OpenAILLM, type OpenAILLMOptions, OpenAILLMProvider, type OpenAIMessage, Realtime as OpenAIRealtime, Realtime2 as OpenAIRealtime2, OpenAIRealtime2Adapter, type Realtime2Options as OpenAIRealtime2Options, OpenAIRealtimeAdapter, type RealtimeOptions as OpenAIRealtimeOptions, TTS$4 as OpenAITTS, type OpenAITTSOptions, STT$4 as OpenAITranscribeSTT, type OpenAITranscribeSTTOptions, type ParamSpec, PartialStreamError, Patter, PatterConnectionError, PatterError, type PatterEventType, PatterTool, type PatterToolExecuteArgs, type PatterToolOptions, type PatterToolResult, PcmCarry, PipelineHookExecutor, type PipelineHooks, type PipelineMessageHandler, type ProviderPricing, ProvisionError, RateLimitError, type RawPcmSource, type RealtimeConfig, RemoteMessageHandler, TTS$2 as RimeTTS, type RimeTTSOptions, SPAN_BARGEIN, SPAN_CALL, SPAN_ENDPOINT, SPAN_LLM, SPAN_STT, SPAN_TOOL, SPAN_TTS, type SSEEvent, type STTConfig, type ScheduleHandle, SentenceChunker, type ServeOptions, type SilenceCallback, type SileroSampleRate, SileroVAD, type SileroVADOptions, STT$2 as SonioxSTT, type SonioxSTTOptions$1 as SonioxSTTOptions, type Span, type SpeechEventCallback, SpeechEvents, SpeechmaticsAudioEncoding, SpeechmaticsOperatingPoint, STT as SpeechmaticsSTT, type SpeechmaticsSTTOptions, SpeechmaticsSampleRate, SpeechmaticsServerMessage, TurnDetectionMode as SpeechmaticsTurnDetectionMode, StatefulResampler, type StatefulResamplerOptions, Static as StaticTunnel, type TTSConfig, Carrier as Telnyx, TelnyxAdapter, type TelnyxCarrierOptions, type ConfigureNumberOptions as TelnyxConfigureNumberOptions, type EndCallOptions as TelnyxEndCallOptions, type InitiateCallOptions as TelnyxInitiateCallOptions, type InitiateCallResult as TelnyxInitiateCallResult, type ProvisionNumberOptions as TelnyxProvisionNumberOptions, type ProvisionNumberResult as TelnyxProvisionNumberResult, TelnyxSTT, TelnyxSTTInputFormat, TelnyxSTTSampleRate, type Transcript as TelnyxSTTTranscript, TelnyxTTS, TelnyxTTSSampleRate, TelnyxTTSVoice, type TelnyxTranscriptionEngine, TestSession, TfidfLoopDetector, type TfidfLoopDetectorOptions, Tool, type ToolDefinition, type ToolExecutor, type ToolHandler, type ToolOptions, type TunnelHandle, type TurnMetrics, Carrier$1 as Twilio, TwilioAdapter, type TwilioAdapterOptions, type TwilioCarrierOptions, type ConfigureNumberOptions$1 as TwilioConfigureNumberOptions, type InitiateCallOptions$1 as TwilioInitiateCallOptions, type InitiateCallResult$1 as TwilioInitiateCallResult, type ProvisionNumberOptions$1 as TwilioProvisionNumberOptions, type ProvisionNumberResult$1 as TwilioProvisionNumberResult, ULTRAVOX_DEFAULT_API_BASE, ULTRAVOX_DEFAULT_SR, type UltravoxEventHandler, UltravoxRealtimeAdapter, type UserState, STT$5 as WhisperSTT, type WhisperSTTOptions, assemblyai, builtinClipPath, calculateRealtimeCost, calculateSttCost, calculateTelephonyCost, calculateTtsCost, callsToCsv, callsToJson, cartesia, createResampler16kTo8k, createResampler24kTo16k, createResampler24kTo8k, createResampler8kTo16k, deepgram, defineTool, elevenlabs, evaluateStrategies as evaluateBargeInStrategies, filterEmoji, filterForTTS, filterMarkdown, formatDtmf, geminiLive, getLogger, guardrail, initTracing, isRemoteUrl, isTracingEnabled, isWebSocketUrl, lmnt, makeAuthMiddleware, mergePricing, mixPcm, mountApi, mountDashboard, mulawToPcm16, notifyDashboard, openaiTts, pcm16ToMulaw, resample16kTo8k, resample24kTo16k, resample8kTo16k, resamplePcm, resetStrategies as resetBargeInStrategies, rime, scheduleCron, scheduleInterval, scheduleOnce, selectSoundFromList, setLogger, soniox, speechmatics, startSpan, startTunnel, tool, ultravox, whisper };