npm - getpatter - Versions diffs - 0.5.1 → 0.5.3 - Mend

getpatter 0.5.1 → 0.5.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

package/README.md +5 -5
package/dist/banner-3GNZ6VQK.mjs +19 -0
package/dist/{carrier-config-CPG5CROM.mjs → carrier-config-33HQ2W4V.mjs} +2 -2
package/dist/{chunk-B6C3KIBG.mjs → chunk-FIFIWBL7.mjs} +3226 -569
package/dist/chunk-QHHBUCMT.mjs +25 -0
package/dist/{chunk-AKQFOFLG.mjs → chunk-SEMKNPCD.mjs} +7 -2
package/dist/{chunk-FMNRCP5X.mjs → chunk-VJVDG4V5.mjs} +1 -1
package/dist/cli.js +133 -15
package/dist/dist-YRCCJQ26.mjs +1631 -0
package/dist/index.d.mts +2000 -289
package/dist/index.d.ts +2000 -289
package/dist/index.js +8019 -1984
package/dist/index.mjs +1885 -618
package/dist/node-cron-6PRPSBG5.mjs +1348 -0
package/dist/onnxruntime_binding-4Q2WV26X.node +0 -0
package/dist/onnxruntime_binding-5PVQ7RFC.node +0 -0
package/dist/onnxruntime_binding-FNOPH2XG.node +0 -0
package/dist/onnxruntime_binding-HSGOY4IT.node +0 -0
package/dist/onnxruntime_binding-OY2N3XIT.node +0 -0
package/dist/onnxruntime_binding-ZPEJPBCV.node +0 -0
package/dist/{persistence-CYIGNHSU.mjs → persistence-LQBYQPQQ.mjs} +1 -1
package/dist/test-mode-MVJ3SKG4.mjs +8 -0
package/dist/tunnel-UVR3PPAU.mjs +8 -0
package/package.json +10 -3
package/dist/chunk-OOIUSZB4.mjs +0 -37
package/dist/node-cron-373UVDIO.mjs +0 -935
package/dist/test-mode-JZMYE5HY.mjs +0 -8
package/dist/tunnel-O7ICMSTP.mjs +0 -8

package/dist/index.d.ts CHANGED Viewed

@@ -135,6 +135,35 @@ declare class Static {
         hostname: string;
     });
 }
+/**
+ * Ngrok tunnel marker — parity with the Python ``getpatter.tunnels.Ngrok``.
+ *
+ * Patter does not bundle the ngrok binary or auto-provision tunnels. This
+ * marker exists so applications can pass an existing ngrok hostname through
+ * the same code path as ``Static`` / ``CloudflareTunnel``. Constructing one
+ * without a hostname is allowed (mirrors the Python type), but ``start()``
+ * will throw — the user is expected to either pass a hostname or run the
+ * tunnel themselves and feed the resulting URL via ``Static``.
+ *
+ * @example
+ * ```ts
+ * import { Ngrok } from "getpatter/tunnels";
+ * const tunnel = new Ngrok({ hostname: "abc.ngrok.io" });
+ * ```
+ */
+declare class Ngrok {
+    readonly kind: "ngrok";
+    readonly hostname: string;
+    constructor(opts?: {
+        hostname?: string;
+    });
+    /**
+     * Returns the configured hostname or throws if the marker was constructed
+     * without one. Patter does not start ngrok itself — the user is expected
+     * to either supply a hostname or run ngrok out-of-band.
+     */
+    start(): string;
+}
 /**
  * Public API primitives — `Tool` and `Guardrail` classes, plus the
@@ -224,9 +253,35 @@ declare function tool(opts: ToolOptions): Tool;
  * functions so the Twilio/Telnyx bridges have a single dispatch point.
  */
+/** Per-word timings / metadata (Deepgram-shaped). Optional on every adapter. */
+interface STTWord {
+    readonly word?: string;
+    readonly start?: number;
+    readonly end?: number;
+    readonly confidence?: number;
+    readonly punctuated_word?: string;
+    readonly speaker?: number;
+}
+/**
+ * Facade transcript shape — widened to surface richer provider fields
+ * (Deepgram emits all of them) without forcing adapters that only know
+ * ``text``/``isFinal`` to change. All non-text fields are optional.
+ */
 interface STTTranscript {
     text: string;
     isFinal?: boolean;
+    /** Overall transcript confidence in [0, 1]. */
+    confidence?: number;
+    /** Provider-side end-of-utterance hint (faster than ``isFinal``). */
+    speechFinal?: boolean;
+    /** True when the result was produced in response to a Finalize command. */
+    fromFinalize?: boolean;
+    /** Provider request id (Deepgram populates this from the Metadata frame). */
+    requestId?: string;
+    /** Per-word timings / metadata when the provider emits them. */
+    words?: ReadonlyArray<STTWord>;
+    /** Which provider event this transcript represents (e.g. ``Results``). */
+    eventType?: string;
 }
 type STTTranscriptCallback = (t: STTTranscript) => Promise<void> | void;
 /** Shape shared by every STT adapter in the SDK. */
@@ -240,6 +295,81 @@ interface TTSAdapter {
     synthesizeStream(text: string): AsyncIterable<Buffer>;
 }
+/**
+ * Pipeline hook executor for pipeline mode.
+ *
+ * Runs user-defined hooks at each stage of the STT → LLM → TTS pipeline.
+ * Fail-open: if a hook throws, the error is logged and the original value
+ * passes through unchanged.
+ */
+declare class PipelineHookExecutor {
+    private readonly hooks;
+    constructor(hooks: PipelineHooks | undefined);
+    /**
+     * Run beforeSendToStt hook. Returns null to drop the audio chunk.
+     * If no hook is defined, returns the audio unchanged.
+     * Fail-open: on exception, the original audio passes through.
+     */
+    runBeforeSendToStt(audio: Buffer, ctx: HookContext): Promise<Buffer | null>;
+    /**
+     * Run afterTranscribe hook. Returns null if hook vetoes the turn.
+     * If no hook is defined, returns the transcript unchanged.
+     */
+    runAfterTranscribe(transcript: string, ctx: HookContext): Promise<string | null>;
+    /**
+     * Run beforeLlm hook. Returns a possibly-modified messages list.
+     * Returning ``null`` from the hook means "keep the original" — the LLM
+     * call is too important to be silently vetoed.
+     * Fail-open: on exception, the original messages pass through.
+     */
+    runBeforeLlm(messages: Array<Record<string, unknown>>, ctx: HookContext): Promise<Array<Record<string, unknown>>>;
+    /**
+     * Run afterLlm hook. Returns a possibly-modified assistant text.
+     * Returning ``null`` from the hook means "keep the original".
+     * Fail-open: on exception, the original text passes through.
+     */
+    runAfterLlm(text: string, ctx: HookContext): Promise<string>;
+    /**
+     * Whether ``afterLlm`` is configured. Used by the LLM loop to decide
+     * whether to buffer streaming tokens before yielding them.
+     */
+    hasAfterLlm(): boolean;
+    /**
+     * Run beforeSynthesize hook. Returns null if hook vetoes TTS for this sentence.
+     * If no hook is defined, returns the text unchanged.
+     */
+    runBeforeSynthesize(text: string, ctx: HookContext): Promise<string | null>;
+    /**
+     * Run afterSynthesize hook. Returns null if hook vetoes this audio chunk.
+     * If no hook is defined, returns the audio unchanged.
+     */
+    runAfterSynthesize(audio: Buffer, text: string, ctx: HookContext): Promise<Buffer | null>;
+}
+/**
+ * Lightweight in-process event bus for Patter call lifecycle events.
+ *
+ * Mirrors the Python ``PatterEventBus`` (sdk-py/getpatter/observability/event_bus.py).
+ * Consumers subscribe with ``on()`` and receive typed payloads.  ``emit()`` is
+ * synchronous but handles async listeners: rejections are surfaced via the
+ * Patter logger rather than being swallowed or crashing the call.
+ */
+type PatterEventType = 'turn_started' | 'turn_ended' | 'eou_metrics' | 'interruption' | 'llm_metrics' | 'tts_metrics' | 'stt_metrics' | 'metrics_collected' | 'call_ended' | 'transcript_partial' | 'transcript_final' | 'llm_chunk' | 'tts_chunk' | 'tool_call_started';
+type Listener<T = unknown> = (payload: T) => void | Promise<void>;
+declare class EventBus {
+    private readonly listeners;
+    /**
+     * Subscribe to an event type.  Returns an unsubscribe function.
+     */
+    on<T = unknown>(event: PatterEventType, cb: Listener<T>): () => void;
+    /**
+     * Emit an event synchronously.  Async listeners are fire-and-forget with
+     * rejection logging so a badly-behaved observer never stalls the call path.
+     */
+    emit<T = unknown>(event: PatterEventType, payload: T): void;
+}
 /**
  * Built-in LLM loop for pipeline mode when no onMessage handler is provided.
  *
@@ -248,14 +378,58 @@ interface TTSAdapter {
  * ``OpenAILLMProvider`` which preserves full backward compatibility.
  */
+/**
+ * Minimal interface for recording LLM usage chunks.
+ * Avoids a circular import from metrics.ts.
+ */
+interface LlmUsageRecorder {
+    recordLlmUsage(provider: string, model: string, inputTokens: number, outputTokens: number, cacheReadTokens?: number, cacheCreationTokens?: number): void;
+}
+/**
+ * Pluggable tool executor — mirrors the Python ``ToolExecutor`` in
+ * ``sdk-py/getpatter/services/tool_executor.py``.
+ *
+ * Implementors receive a fully-resolved ``ToolDefinition`` (handler +/ webhook
+ * URL already validated by the SDK) and MUST return a JSON-stringifiable
+ * result. Errors should be returned as JSON like
+ * ``{ error: "...", fallback: true }`` rather than thrown.
+ */
+interface ToolExecutor {
+    execute(toolDef: ToolDefinition, args: Record<string, unknown>, callContext: Record<string, unknown>): Promise<string>;
+}
+interface DefaultToolExecutorOptions {
+    /** Total attempts = maxRetries + 1. Default: 2 (i.e. 3 attempts). */
+    maxRetries?: number;
+    /** Delay between attempts, in ms. */
+    retryDelayMs?: number;
+    /** Per-request timeout for webhook calls, in ms. */
+    requestTimeoutMs?: number;
+}
+/**
+ * Default executor — webhook with retry/fallback and local handler preference.
+ *
+ * This is the out-of-the-box behavior and is 1:1 equivalent to the previous
+ * inline logic in ``LLMLoop.executeTool``.
+ */
+declare class DefaultToolExecutor implements ToolExecutor {
+    private readonly maxRetries;
+    private readonly retryDelayMs;
+    private readonly requestTimeoutMs;
+    constructor(opts?: DefaultToolExecutorOptions);
+    execute(toolDef: ToolDefinition, args: Record<string, unknown>, callContext: Record<string, unknown>): Promise<string>;
+}
 /** A single streaming chunk yielded by an LLM provider. */
 interface LLMChunk {
-    type: 'text' | 'tool_call' | 'done';
+    type: 'text' | 'tool_call' | 'done' | 'usage';
     content?: string;
     index?: number;
     id?: string;
     name?: string;
     arguments?: string;
+    inputTokens?: number;
+    outputTokens?: number;
+    cacheReadInputTokens?: number;
+    cacheCreationInputTokens?: number;
 }
 /**
  * Interface that any LLM provider must satisfy.
@@ -269,11 +443,44 @@ interface LLMChunk {
 interface LLMProvider {
     stream(messages: Array<Record<string, unknown>>, tools?: Array<Record<string, unknown>> | null): AsyncGenerator<LLMChunk, void, unknown>;
 }
+/** Optional sampling kwargs forwarded into the OpenAI Chat Completions body. */
+interface OpenAILLMSamplingOptions {
+    /** Sampling temperature [0, 2]. */
+    temperature?: number;
+    /** Max tokens in the assistant response (sent as ``max_completion_tokens``). */
+    maxTokens?: number;
+    /** OpenAI-style ``response_format`` for JSON mode / structured outputs. */
+    responseFormat?: Record<string, unknown>;
+    /** Whether to allow parallel tool calls. */
+    parallelToolCalls?: boolean;
+    /** ``"auto" | "none" | "required"`` or a specific tool object. */
+    toolChoice?: string | Record<string, unknown>;
+    /** Sampling seed for reproducible outputs. */
+    seed?: number;
+    /** Nucleus sampling cutoff in [0, 1]. */
+    topP?: number;
+    /** Penalty in [-2, 2] applied to repeated tokens. */
+    frequencyPenalty?: number;
+    /** Penalty in [-2, 2] applied to seen tokens. */
+    presencePenalty?: number;
+    /** Stop sequence(s). */
+    stop?: string | string[];
+}
 /** LLM provider backed by OpenAI Chat Completions (streaming). */
 declare class OpenAILLMProvider implements LLMProvider {
     private readonly apiKey;
-    private readonly model;
-    constructor(apiKey: string, model: string);
+    readonly model: string;
+    private readonly temperature?;
+    private readonly maxTokens?;
+    private readonly responseFormat?;
+    private readonly parallelToolCalls?;
+    private readonly toolChoice?;
+    private readonly seed?;
+    private readonly topP?;
+    private readonly frequencyPenalty?;
+    private readonly presencePenalty?;
+    private readonly stop?;
+    constructor(apiKey: string, model: string, sampling?: OpenAILLMSamplingOptions);
     stream(messages: Array<Record<string, unknown>>, tools?: Array<Record<string, unknown>> | null): AsyncGenerator<LLMChunk, void, unknown>;
 }
 declare class LLMLoop {
@@ -282,15 +489,34 @@ declare class LLMLoop {
     private readonly tools;
     private readonly openaiTools;
     private readonly toolMap;
+    private toolExecutor;
+    private eventBus?;
+    private readonly _providerName;
+    private readonly _modelName;
     constructor(apiKey: string, model: string, systemPrompt: string, tools?: ToolDefinition[] | null, llmProvider?: LLMProvider);
+    /**
+     * Swap in a custom tool executor (e.g. different retry policy, metrics
+     * wrapping, tenant-aware fan-out). The default is ``DefaultToolExecutor``.
+     */
+    setToolExecutor(executor: ToolExecutor): void;
+    /**
+     * Wire an :class:`EventBus` so the loop emits ``llm_chunk`` per text
+     * token and ``tool_call_started`` the first time each tool-call index
+     * appears. Set to ``undefined`` to disable.
+     */
+    setEventBus(bus: EventBus | undefined): void;
     /**
      * Stream LLM response tokens, handling tool calls automatically.
      * Yields text tokens as they arrive from the LLM.
+     *
+     * @param metrics Optional usage recorder — when provided, usage chunks
+     *   from the provider are forwarded to {@link LlmUsageRecorder.recordLlmUsage}
+     *   so token costs are included in the call cost breakdown (fix 10).
      */
     run(userText: string, history: Array<{
         role: string;
         text: string;
-    }>, callContext: Record<string, unknown>): AsyncGenerator<string, void, unknown>;
+    }>, callContext: Record<string, unknown>, metrics?: LlmUsageRecorder, hookExecutor?: PipelineHookExecutor, hookCtx?: HookContext): AsyncGenerator<string, void, unknown>;
     private executeTool;
     private buildMessages;
 }
@@ -305,11 +531,11 @@ interface STTConfig {
     readonly apiKey: string;
     readonly language: string;
     /**
-     * Optional — when present, called by internal serialisation. Not required for
-     * callers that pass a plain object literal (``{ provider, apiKey, language }``)
-     * to maintain parity with the Python SDK, which accepts dataclass-like inputs.
+     * Serialise the config into a JSON-compatible dict for the wire protocol.
+     * Mandatory — matches Python's ``STTConfig.to_dict()``. Concrete classes
+     * returned by ``stt(...)``/``deepgram(...)`` etc. all implement it.
      */
-    toDict?(): Record<string, string | Record<string, unknown>>;
+    toDict(): Record<string, string | Record<string, unknown>>;
     /** Provider-specific knobs (e.g. Deepgram endpointing). */
     options?: Record<string, unknown>;
 }
@@ -317,36 +543,15 @@ interface TTSConfig {
     readonly provider: string;
     readonly apiKey: string;
     readonly voice: string;
-    toDict?(): Record<string, string | Record<string, unknown>>;
+    /**
+     * Serialise the config into a JSON-compatible dict for the wire protocol.
+     * Mandatory — matches Python's ``TTSConfig.to_dict()``.
+     */
+    toDict(): Record<string, string | Record<string, unknown>>;
     options?: Record<string, unknown>;
 }
 type MessageHandler = (msg: IncomingMessage) => Promise<string>;
 type CallEventHandler = (data: Record<string, unknown>) => Promise<void>;
-interface PatterOptions {
-    apiKey: string;
-    backendUrl?: string;
-    restUrl?: string;
-}
-interface ConnectOptions {
-    onMessage: MessageHandler;
-    onCallStart?: CallEventHandler;
-    onCallEnd?: CallEventHandler;
-    provider?: string;
-    providerKey?: string;
-    providerSecret?: string;
-    number?: string;
-    country?: string;
-    stt?: STTConfig;
-    tts?: TTSConfig;
-}
-interface CallOptions {
-    to: string;
-    onMessage?: MessageHandler;
-    firstMessage?: string;
-    fromNumber?: string;
-    agentId?: string;
-    machineDetection?: boolean;
-}
 interface ToolDefinition {
     name: string;
     description: string;
@@ -356,58 +561,9 @@ interface ToolDefinition {
     /** Local handler function — when provided, called instead of webhookUrl. */
     handler?: (args: Record<string, unknown>, context: Record<string, unknown>) => Promise<string>;
 }
-interface CreateAgentOptions {
-    name: string;
-    systemPrompt: string;
-    model?: string;
-    voice?: string;
-    voiceProvider?: string;
-    language?: string;
-    firstMessage?: string;
-    tools?: ToolDefinition[];
-}
-interface Agent {
-    id: string;
-    name: string;
-    systemPrompt: string;
-    model: string;
-    voice: string;
-    voiceProvider: string;
-    language: string;
-    firstMessage: string | null;
-    tools: ToolDefinition[] | null;
-}
-interface PhoneNumber {
-    id: string;
-    number: string;
-    provider: string;
-    country: string;
-    status: string;
-    agentId: string | null;
-}
-interface Call {
-    id: string;
-    direction: string;
-    caller: string;
-    callee: string;
-    startedAt: string;
-    endedAt: string | null;
-    durationSeconds: number | null;
-    status: string;
-    transcript: Array<{
-        role: string;
-        text: string;
-        timestamp: string;
-    }> | null;
-}
 interface LocalOptions {
     /**
-     * Local mode is auto-detected when a ``carrier`` is passed. Pass
-     * ``mode: 'local'`` to force local mode explicitly.
-     */
-    mode?: 'local';
-    /**
-     * Telephony carrier instance. Required for local mode.
+     * Telephony carrier instance. Required.
      *
      * @example
      * ```ts
@@ -456,6 +612,14 @@ interface PipelineHooks {
     beforeSendToStt?: (audio: Buffer, ctx: HookContext) => Buffer | null | Promise<Buffer | null>;
     /** Called after STT produces a transcript, before LLM. Return null to skip this turn. */
     afterTranscribe?: (transcript: string, ctx: HookContext) => string | null | Promise<string | null>;
+    /** Called with the messages list before the LLM call.
+     *  Return null to keep them, or return a new list to replace
+     *  (useful for prompt injection, message filtering, RAG augmentation). */
+    beforeLlm?: (messages: Array<Record<string, unknown>>, ctx: HookContext) => Array<Record<string, unknown>> | null | Promise<Array<Record<string, unknown>> | null>;
+    /** Called with the final assistant text after the LLM stream completes.
+     *  Return null to keep, or return a new string to replace
+     *  (useful for output validation, redaction, post-processing). */
+    afterLlm?: (text: string, ctx: HookContext) => string | null | Promise<string | null>;
     /** Called before TTS, per-sentence in streaming mode. Return null to skip TTS for this sentence. */
     beforeSynthesize?: (text: string, ctx: HookContext) => string | null | Promise<string | null>;
     /** Called after TTS produces an audio chunk. Return null to discard this chunk. */
@@ -585,38 +749,120 @@ interface LocalCallOptions {
     variables?: Record<string, string>;
     /**
      * Ring timeout in seconds. Forwarded to Twilio as `Timeout` and to Telnyx
-     * as `timeout_secs`. Defaults to the carrier default (~28 s on Twilio) when
-     * omitted. Increase for international routes where the remote carrier
-     * silences short US→IT rings.
+     * as `timeout_secs`. Defaults to **25 s** — the production-recommended
+     * value that limits phantom calls. Pass `60` for legacy carrier-default
+     * parity, or `null` to omit the parameter entirely (carrier picks its
+     * own default).
      */
-    ringTimeout?: number;
+    ringTimeout?: number | null;
+}
+/**
+ * In-memory metrics store for the local dashboard.
+ *
+ * Keeps the last `maxCalls` completed calls and tracks active calls.
+ * Supports SSE event subscribers for real-time updates.
+ *
+ * Optional disk hydration: when `CallLogger` writes per-call records under
+ * `<root>/calls/YYYY/MM/DD/<call_id>/metadata.json`, calling
+ * `hydrate(logRoot)` on a fresh store rebuilds the in-memory list from those
+ * files so the dashboard survives process restarts (the persistence is in
+ * the JSONL/JSON files, the store is just a cache on top).
+ */
+interface CallRecord {
+    call_id: string;
+    caller: string;
+    callee: string;
+    direction: string;
+    started_at: number;
+    ended_at?: number;
+    /**
+     * Current lifecycle state: ``initiated`` (pre-registered), ``ringing``,
+     * ``in-progress``, ``completed``, ``no-answer``, ``busy``, ``failed``,
+     * ``canceled``, or ``webhook_error``.
+     */
+    status?: string;
+    transcript?: Array<{
+        role: string;
+        text: string;
+        timestamp: number;
+    }>;
+    turns?: unknown[];
+    metrics?: Record<string, unknown> | null;
+    [key: string]: unknown;
+}
+interface SSEEvent {
+    type: string;
+    data: Record<string, unknown>;
+}
+declare class MetricsStore extends EventEmitter {
+    private readonly maxCalls;
+    private calls;
+    private activeCalls;
+    /**
+     * Accepts either a numeric ``maxCalls`` (legacy positional — matches the
+     * original TS API) or an options object ``{ maxCalls }`` to align with the
+     * Python SDK's keyword-argument style. Plain literals also work:
+     * ``new MetricsStore()`` / ``new MetricsStore(100)`` / ``new MetricsStore({ maxCalls: 100 })``.
+     */
+    constructor(maxCallsOrOpts?: number | {
+        maxCalls?: number;
+    });
+    private publish;
+    recordCallStart(data: Record<string, unknown>): void;
+    /**
+     * Pre-register an outbound call before any webhook fires. Lets the
+     * dashboard surface attempts that never reach media (no-answer, busy,
+     * carrier-rejected). Mirrors the Python ``record_call_initiated``.
+     */
+    recordCallInitiated(data: Record<string, unknown>): void;
+    /**
+     * Update the status of an active or completed call. Terminal states
+     * (completed, no-answer, busy, failed, canceled, webhook_error) move the
+     * row from active to completed so the UI freezes the live duration timer.
+     */
+    updateCallStatus(callId: string, status: string, extra?: Record<string, unknown>): void;
+    recordTurn(data: Record<string, unknown>): void;
+    recordCallEnd(data: Record<string, unknown>, metrics?: Record<string, unknown> | null): void;
+    getCalls(limit?: number, offset?: number): CallRecord[];
+    getCall(callId: string): CallRecord | null;
+    /** Look up an active call by id (returns undefined if not active or unknown). */
+    getActive(callId: string): CallRecord | undefined;
+    getActiveCalls(): CallRecord[];
+    getAggregates(): Record<string, unknown>;
+    getCallsInRange(fromTs?: number, toTs?: number): CallRecord[];
+    get callCount(): number;
+    /**
+     * Rebuild the in-memory call list from `metadata.json` files written by
+     * `CallLogger` under `<logRoot>/calls/YYYY/MM/DD/<call_id>/`. Idempotent:
+     * call_ids already in the store are skipped. Errors per file are logged
+     * and swallowed so a single corrupt entry doesn't block hydration.
+     *
+     * Returns the number of calls newly added to the store.
+     *
+     * Safe to call before any traffic; intended to run once at server startup.
+     */
+    hydrate(logRoot: string | null | undefined): number;
 }
 declare class Patter {
-    readonly apiKey: string;
-    private readonly backendUrl;
-    private readonly restUrl;
-    private readonly connection;
-    private readonly mode;
     private localConfig;
     private embeddedServer;
     private tunnelHandle;
-    constructor(options: PatterOptions | LocalOptions);
+    /**
+     * Live `MetricsStore` for the embedded server. Returns `null` before
+     * `serve()` is called. Exposed so integrations like `PatterTool` can
+     * subscribe to per-call lifecycle events (`call_initiated`,
+     * `call_start`, `call_end`).
+     */
+    get metricsStore(): MetricsStore | null;
+    constructor(options: LocalOptions);
     agent(opts: AgentOptions): AgentOptions;
     serve(opts: ServeOptions): Promise<void>;
     test(opts: ServeOptions): Promise<void>;
-    connect(options: ConnectOptions): Promise<void>;
-    call(options: CallOptions | LocalCallOptions): Promise<void>;
+    call(options: LocalCallOptions): Promise<void>;
     disconnect(): Promise<void>;
-    createAgent(opts: CreateAgentOptions): Promise<Agent>;
-    listAgents(): Promise<Agent[]>;
-    buyNumber(opts?: {
-        country?: string;
-        provider?: string;
-    }): Promise<PhoneNumber>;
-    assignAgent(numberId: string, agentId: string): Promise<void>;
-    listCalls(limit?: number): Promise<Call[]>;
-    private registerNumber;
 }
 /**
@@ -704,51 +950,49 @@ declare const DEFAULT_MIN_SENTENCE_LEN = 20;
 declare class SentenceChunker {
     private buffer;
     private readonly minSentenceLen;
+    private readonly minWordsForShortFlush;
     constructor(options?: {
         minSentenceLen?: number;
+        minWordsForShortFlush?: number;
     });
-    /** Feed a token. Returns zero or more complete sentences. */
+    /**
+     * Feed a token. Returns zero or more complete sentences.
+     *
+     * Two emission paths:
+     * - **Standard path** — when the buffer is at least `minSentenceLen`
+     *   characters long and the regex tokenizer reports more than one
+     *   sentence, all but the last (potentially incomplete) are emitted.
+     * - **Short-flush path** — when the buffer is shorter than `minSentenceLen`
+     *   but ends with a sentence terminator AND has at least
+     *   `minWordsForShortFlush` whitespace-separated words, emit it
+     *   immediately. This drops TTS TTFB on short greetings like `"Hi there!"`
+     *   while keeping single-word utterances (`"Sì."`) buffered until
+     *   `flush()`.
+     */
     push(token: string): string[];
+    /**
+     * Emit the buffer when it's a short, complete single-sentence utterance.
+     *
+     * A buffer qualifies when **all** of these hold:
+     * 1. Last non-whitespace char is a sentence terminator.
+     * 2. Word count is at least `minWordsForShortFlush` (default 2 — keeps
+     *    single-word "Sì." / "Yes." buffered until `flush()`).
+     * 3. The buffer contains exactly one terminator (the trailing one).
+     *    Multiple terminators mean we may be mid-stream of a longer merged
+     *    utterance like `"Hey! Hi! Hello! This is a sentence."` — let the
+     *    standard path keep merging.
+     * 4. The char immediately before the terminator is NOT a digit (avoids
+     *    decimal mid-stream like `"f(x) = x * 2."` flushing before `54`).
+     * 5. The char immediately before the terminator is NOT an uppercase
+     *    ASCII letter (avoids acronym patterns like `"U.S."` / `"U."`).
+     */
+    private maybeShortFlush;
     /** Flush remaining buffer as final sentence(s). Call at end of stream. */
     flush(): string[];
     /** Discard buffered text. Call on interrupt. */
     reset(): void;
 }
-/**
- * Pipeline hook executor for pipeline mode.
- *
- * Runs user-defined hooks at each stage of the STT → LLM → TTS pipeline.
- * Fail-open: if a hook throws, the error is logged and the original value
- * passes through unchanged.
- */
-declare class PipelineHookExecutor {
-    private readonly hooks;
-    constructor(hooks: PipelineHooks | undefined);
-    /**
-     * Run beforeSendToStt hook. Returns null to drop the audio chunk.
-     * If no hook is defined, returns the audio unchanged.
-     * Fail-open: on exception, the original audio passes through.
-     */
-    runBeforeSendToStt(audio: Buffer, ctx: HookContext): Promise<Buffer | null>;
-    /**
-     * Run afterTranscribe hook. Returns null if hook vetoes the turn.
-     * If no hook is defined, returns the transcript unchanged.
-     */
-    runAfterTranscribe(transcript: string, ctx: HookContext): Promise<string | null>;
-    /**
-     * Run beforeSynthesize hook. Returns null if hook vetoes TTS for this sentence.
-     * If no hook is defined, returns the text unchanged.
-     */
-    runBeforeSynthesize(text: string, ctx: HookContext): Promise<string | null>;
-    /**
-     * Run afterSynthesize hook. Returns null if hook vetoes this audio chunk.
-     * If no hook is defined, returns the audio unchanged.
-     */
-    runAfterSynthesize(audio: Buffer, text: string, ctx: HookContext): Promise<Buffer | null>;
-}
 /**
  * Built-in text transforms for cleaning LLM output before TTS synthesis.
  *
@@ -787,7 +1031,23 @@ declare class AuthenticationError extends PatterError {
 declare class ProvisionError extends PatterError {
     constructor(message: string);
 }
+/** Thrown when a provider returns HTTP 429 on connect/upgrade. */
+declare class RateLimitError extends PatterConnectionError {
+    constructor(message: string);
+}
+/**
+ * Config envelope for realtime / ConvAI pipelines — mirrors the wire-level
+ * shape consumed by the backend. Kept narrow on purpose so callers can pass a
+ * plain object literal if they prefer.
+ */
+interface RealtimeConfig {
+    readonly provider: string;
+    readonly apiKey: string;
+    readonly model?: string;
+    readonly voice?: string;
+    readonly options?: Record<string, unknown>;
+}
 /**
  * Deepgram STT config builder. Tune latency via ``endpointingMs`` /
  * ``utteranceEndMs``. Internal only — public code should use ``DeepgramSTT``
@@ -815,13 +1075,64 @@ declare function openaiTts(opts: {
     apiKey: string;
     voice?: string;
 }): TTSConfig;
+/** Soniox real-time STT config helper. */
+declare function soniox(opts: {
+    apiKey: string;
+    language?: string;
+}): STTConfig;
+/**
+ * Speechmatics STT config helper.
+ *
+ * NOTE: the Speechmatics adapter is currently Python-only. Calling this helper
+ * throws a clear error so callers can switch providers or use the Python SDK
+ * until the TS adapter ships.
+ */
+declare function speechmatics(_opts: {
+    apiKey: string;
+    language?: string;
+}): STTConfig;
+/** AssemblyAI real-time STT config helper. */
+declare function assemblyai(opts: {
+    apiKey: string;
+    language?: string;
+}): STTConfig;
+/** Cartesia TTS config helper. Default voice matches Python SDK. */
+declare function cartesia(opts: {
+    apiKey: string;
+    voice?: string;
+}): TTSConfig;
+/** Rime TTS config helper. */
+declare function rime(opts: {
+    apiKey: string;
+    voice?: string;
+}): TTSConfig;
+/** LMNT TTS config helper. */
+declare function lmnt(opts: {
+    apiKey: string;
+    voice?: string;
+}): TTSConfig;
 /**
- * Default provider pricing and merge utilities.
+ * Ultravox realtime engine config helper.
  *
- * Pricing is based on public provider rates (as of early 2025).
- * Developers can override any provider's pricing.
+ * Returns a ``RealtimeConfig`` envelope that the backend can dispatch. For
+ * programmatic control over a live session use ``UltravoxRealtimeAdapter``
+ * directly.
  */
+declare function ultravox(opts: {
+    apiKey: string;
+    model?: string;
+    voice?: string;
+}): RealtimeConfig;
+/**
+ * Google Gemini Live realtime engine config helper. See
+ * ``GeminiLiveAdapter`` for direct session control.
+ */
+declare function geminiLive(opts: {
+    apiKey: string;
+    model?: string;
+    voice?: string;
+}): RealtimeConfig;
 interface ProviderPricing {
     unit: string;
     price?: number;
@@ -829,6 +1140,8 @@ interface ProviderPricing {
     audio_output_per_token?: number;
     text_input_per_token?: number;
     text_output_per_token?: number;
+    cached_audio_input_per_token?: number;
+    cached_text_input_per_token?: number;
 }
 declare const DEFAULT_PRICING: Record<string, ProviderPricing>;
 /**
@@ -840,18 +1153,37 @@ declare function mergePricing(overrides?: Record<string, Partial<ProviderPricing
 declare function calculateSttCost(provider: string, audioSeconds: number, pricing: Record<string, ProviderPricing>): number;
 /** Calculate TTS cost from character count. */
 declare function calculateTtsCost(provider: string, characterCount: number, pricing: Record<string, ProviderPricing>): number;
-/** Calculate OpenAI Realtime cost from token usage. */
+/**
+ * Calculate OpenAI Realtime cost from token usage.
+ *
+ * OpenAI bills the cached portion of ``input_token_details.audio_tokens`` and
+ * ``.text_tokens`` at the reduced cached rate (typically ~3% of full for audio,
+ * ~10% of full for text on the mini model). ``cached_tokens_details`` is a
+ * nested breakdown of the same ``input_token_details`` totals — the cached
+ * counts are already INCLUDED in the top-level totals, so we subtract them
+ * out before applying the full rate and add them back at the cached rate.
+ */
 declare function calculateRealtimeCost(usage: {
     input_token_details?: {
         audio_tokens?: number;
         text_tokens?: number;
+        cached_tokens_details?: {
+            audio_tokens?: number;
+            text_tokens?: number;
+        };
     };
     output_token_details?: {
         audio_tokens?: number;
         text_tokens?: number;
     };
 }, pricing: Record<string, ProviderPricing>): number;
-/** Calculate telephony cost from call duration. */
+/**
+ * Calculate telephony cost from call duration.
+ *
+ * Twilio bills in whole-minute increments (any partial minute is rounded up
+ * to the next full minute per twilio.com/help/223132307). Telnyx bills
+ * per-second. We detect Twilio by provider name and apply the round-up.
+ */
 declare function calculateTelephonyCost(provider: string, durationSeconds: number, pricing: Record<string, ProviderPricing>): number;
 /**
@@ -862,9 +1194,40 @@ declare function calculateTelephonyCost(provider: string, durationSeconds: numbe
 interface LatencyBreakdown {
     stt_ms: number;
+    /**
+     * Backwards-compatible LLM bucket. With the split below, this now reflects
+     * the user-perceived first-token latency (TTFT) when streaming is available
+     * and the full generation time otherwise. Prefer ``llm_ttft_ms`` /
+     * ``llm_total_ms`` in new code.
+     */
     llm_ms: number;
+    /** Time-to-first-token (UX-facing latency): stt_complete → first LLM token. */
+    llm_ttft_ms?: number;
+    /**
+     * Total LLM generation time: stt_complete → last LLM token. Distinct from
+     * ``llm_ms`` so cost/throughput analysis and TTFT can be tracked separately.
+     */
+    llm_total_ms?: number;
     tts_ms: number;
     total_ms: number;
+    /**
+     * Endpoint latency: time from end-of-user-speech (VAD stop or STT
+     * ``speech_final``) to LLM dispatch. Captures the silence-detection +
+     * transcript-finalization gap. Optional — undefined when the source signal
+     * is missing.
+     */
+    endpoint_ms?: number;
+    /**
+     * Barge-in latency: time from user-interrupt detection to TTS playback
+     * actually halting (i.e. after ``sendClear`` returned). Optional — only
+     * populated on interrupted turns.
+     */
+    bargein_ms?: number;
+    /**
+     * Total TTS time: LLM-first-token (or first-sentence boundary) to last
+     * TTS audio byte sent. Optional — undefined when TTS never completed.
+     */
+    tts_total_ms?: number;
 }
 interface CostBreakdown {
     stt: number;
@@ -872,6 +1235,12 @@ interface CostBreakdown {
     llm: number;
     telephony: number;
     total: number;
+    /**
+     * Amount saved on LLM cost thanks to OpenAI Realtime prompt caching.
+     * ``llm`` above is the net cost AFTER this discount. Dashboards can
+     * render ``saved $X (pct%)`` next to the LLM line when > 0.
+     */
+    llm_cached_savings?: number;
 }
 interface TurnMetrics {
     turn_index: number;
@@ -889,6 +1258,9 @@ interface CallMetrics {
     cost: CostBreakdown;
     latency_avg: LatencyBreakdown;
     latency_p95: LatencyBreakdown;
+    latency_p50?: LatencyBreakdown;
+    latency_p90?: LatencyBreakdown;
+    latency_p99?: LatencyBreakdown;
     provider_mode: string;
     stt_provider: string;
     tts_provider: string;
@@ -928,18 +1300,48 @@ declare class CallMetricsAccumulator {
     private readonly _turns;
     private _turnStart;
     private _sttComplete;
+    private _llmFirstToken;
+    private _llmFirstSentenceComplete;
     private _llmComplete;
     private _ttsFirstByte;
+    /** Last TTS audio byte sent (hrTimeMs). Stamped by ``recordTtsComplete`` /
+     *  ``recordTtsCompleteTs``. Used to compute ``tts_total_ms``. */
+    private _ttsLastByte;
+    /** Endpoint signal (hrTimeMs) — VAD stop or STT speech_final, whichever
+     *  fires first. Used to compute ``endpoint_ms``. */
+    private _endpointSignalAt;
+    /** Monotonic stamp of LLM dispatch (paired with ``_endpointSignalAt``). */
+    private _turnCommittedMono;
+    /** Barge-in detected timestamp (hrTimeMs). */
+    private _bargeinDetectedAt;
+    /** TTS-stopped timestamp after barge-in (hrTimeMs). */
+    private _bargeinStoppedAt;
     private _turnUserText;
     private _turnSttAudioSeconds;
     private _totalSttAudioSeconds;
     private _totalTtsCharacters;
     private _totalRealtimeCost;
+    private _totalRealtimeCachedSavings;
     private _sttByteCount;
     private _sttSampleRate;
     private _sttBytesPerSample;
     private _actualTelephonyCost;
     private _actualSttCost;
+    private _totalLlmCost;
+    private _eventBus;
+    /** Timestamp (hrTimeMs) when VAD emitted speech_end. */
+    private _vadStoppedAt;
+    /** Timestamp (hrTimeMs) when STT emitted its final transcript. */
+    private _sttFinalAt;
+    /** Timestamp (hrTimeMs) when the transcript was committed to the LLM. */
+    private _turnCommittedAt;
+    /** Delta (ms) from turn-committed to on_user_turn_completed hook done. */
+    private _onUserTurnCompletedDelayMs;
+    private _numInterruptions;
+    private _numBackchannels;
+    private _overlapStartedAt;
+    private _reportOnlyInitialTtfb;
+    private _initialTtfbEmitted;
     constructor(opts: {
         callId: string;
         providerMode: string;
@@ -948,23 +1350,116 @@ declare class CallMetricsAccumulator {
         ttsProvider?: string;
         llmProvider?: string;
         pricing?: Record<string, Partial<ProviderPricing>> | null;
+        eventBus?: EventBus;
+        /** When true, only the first TTFB emission per call is forwarded to the event bus. */
+        reportOnlyInitialTtfb?: boolean;
     });
+    /**
+     * Attach (or replace) an EventBus after construction.
+     * Useful when the bus is created after the accumulator (e.g. in tests).
+     */
+    attachEventBus(bus: EventBus): void;
     /** Configure audio format for STT byte-to-seconds conversion. */
     configureSttFormat(sampleRate?: number, bytesPerSample?: number): void;
     /** Whether a turn is currently being measured (startTurn called, not yet completed). */
     get turnActive(): boolean;
     startTurn(): void;
+    /**
+     * Start a new turn only if no turn is currently open.
+     * Use this at inbound-audio ingestion points so the turn timer begins
+     * on the first audio byte rather than just before recordSttComplete().
+     */
+    startTurnIfIdle(): void;
     recordSttComplete(text: string, audioSeconds?: number): void;
+    /** Record the timestamp of the first LLM token (TTFT). No-op after first call. */
+    recordLlmFirstToken(): void;
+    /**
+     * Record when the sentence chunker emits the first complete sentence.
+     * Used as the TTS span start so tts_ms reflects true TTS-provider latency
+     * rather than the gap from llm_complete (which fires after the full response).
+     * No-op after first call.
+     */
+    recordLlmFirstSentenceComplete(): void;
     recordLlmComplete(): void;
     recordTtsFirstByte(): void;
     recordTtsComplete(text: string): void;
+    /**
+     * Capture the timestamp when the last TTS audio byte was sent on the wire.
+     * Useful when the caller wants to record the timing without bumping the
+     * character counter (e.g. interrupted turns where audio actually went out
+     * but synthesis was truncated).
+     */
+    recordTtsCompleteTs(ts?: number): void;
+    /**
+     * Mark the moment a user interrupt (barge-in) was detected. Pairs with
+     * ``recordTtsStopped`` to compute ``bargein_ms``.
+     */
+    recordBargeinDetected(ts?: number): void;
+    /**
+     * Mark the moment TTS playback was actually halted after a barge-in. Call
+     * this *after* ``sendClear`` returns. Pairs with ``recordBargeinDetected``
+     * to compute ``bargein_ms``.
+     */
+    recordTtsStopped(ts?: number): void;
     recordTurnComplete(agentText: string): TurnMetrics;
     recordTurnInterrupted(): TurnMetrics | null;
+    /**
+     * Record the moment VAD emitted speech_end for the current utterance.
+     * @param ts Optional override timestamp in hrTimeMs units (defaults to now).
+     */
+    recordVadStop(ts?: number): void;
+    /**
+     * Record the moment the STT provider delivered its final transcript.
+     * Aliased to the same instant as recordSttComplete() when called from
+     * the standard pipeline; can be called independently for custom pipelines.
+     * @param ts Optional override timestamp in hrTimeMs units.
+     */
+    recordSttFinalTimestamp(ts?: number): void;
+    /**
+     * Record the moment the transcript was committed to the LLM (turn start).
+     * After this call, ``emitEouMetrics()`` can produce a complete EOUMetrics payload.
+     * @param ts Optional override timestamp in hrTimeMs units.
+     */
+    recordTurnCommitted(ts?: number): void;
+    /**
+     * Record the delta (ms) between turn-committed and when on_user_turn_completed
+     * pipeline hook finished.  Stored for inclusion in the next ``emitEouMetrics``
+     * call (or an explicit re-emit if desired).
+     */
+    recordOnUserTurnCompletedDelay(delayMs: number): void;
+    /**
+     * Compute and emit EOUMetrics when all three prerequisite timestamps are
+     * available (VAD stop, STT final, turn committed).
+     *
+     * ``endOfUtteranceDelay``     = sttFinal − vadStopped  (ms)
+     * ``transcriptionDelay``       = turnCommitted − vadStopped  (ms)
+     * ``onUserTurnCompletedDelay`` = caller-supplied delta (ms) or 0
+     */
+    emitEouMetrics(): void;
+    /**
+     * Record that a caller utterance started overlapping with agent speech.
+     * Call this when VAD detects speech_start during TTS playback.
+     * @param ts Optional override timestamp in hrTimeMs units.
+     */
+    recordOverlapStart(ts?: number): void;
+    /**
+     * Record that the overlap ended.  Emits ``InterruptionMetrics`` via the
+     * event bus.
+     *
+     * @param wasInterruption  true → barge-in (increments ``numInterruptions``),
+     *                         false → backchannel (increments ``numBackchannels``).
+     * @param ts Optional override timestamp in hrTimeMs units.
+     */
+    recordOverlapEnd(wasInterruption: boolean, ts?: number): void;
     addSttAudioBytes(byteCount: number): void;
     recordRealtimeUsage(usage: {
         input_token_details?: {
             audio_tokens?: number;
             text_tokens?: number;
+            cached_tokens_details?: {
+                audio_tokens?: number;
+                text_tokens?: number;
+            };
         };
         output_token_details?: {
             audio_tokens?: number;
@@ -973,118 +1468,161 @@ declare class CallMetricsAccumulator {
     }): void;
     setActualTelephonyCost(cost: number): void;
     setActualSttCost(cost: number): void;
+    /**
+     * Accumulate LLM token cost for pipeline mode (non-Realtime).
+     *
+     * Called by LLMLoop.run() when a usage chunk arrives from the provider.
+     * Mirrors Python's CallMetricsAccumulator.record_llm_usage().
+     *
+     * @param provider   LLM provider key (e.g. 'openai', 'anthropic')
+     * @param model      Model name (e.g. 'gpt-4o-mini')
+     * @param inputTokens       Total input tokens (includes cached)
+     * @param outputTokens      Total output tokens
+     * @param cacheReadTokens   Cached input tokens (subtracted from input before billing full rate)
+     * @param cacheWriteTokens  Cache write tokens (billed at cache_write rate if present)
+     */
+    recordLlmUsage(provider: string, model: string, inputTokens: number, outputTokens: number, cacheReadTokens?: number, cacheWriteTokens?: number): void;
     endCall(): CallMetrics;
     getCostSoFar(): CostBreakdown;
     private _resetTurnState;
     private _computeTurnLatency;
     private _computeCost;
+    /**
+     * Turns eligible for latency statistics.
+     *
+     * Excludes turns marked ``[interrupted]`` (barge-in, cancelled replacements)
+     * because their recorded latency either reflects partial state or zero —
+     * including them would drag every p95/avg bucket toward meaningless numbers.
+     */
+    private _completedTurns;
     private _computeAverageLatency;
-    private _computeP95Latency;
+    private _computePercentileLatency;
 }
+/**
+ * Supported OpenAI Realtime wire audio formats. See
+ * https://platform.openai.com/docs/guides/realtime for the full list.
+ * ``g711_ulaw`` matches what Twilio/Telnyx emit natively on the phone leg,
+ * so no transcoding is needed. ``pcm16`` is used in the terminal test-mode
+ * path and when the telephony provider negotiates L16/16000.
+ */
+type OpenAIRealtimeAudioFormat = 'g711_ulaw' | 'g711_alaw' | 'pcm16';
+type RealtimeEventCallback = (type: string, data: unknown) => void | Promise<void>;
+interface OpenAIRealtimeOptions {
+    temperature?: number;
+    maxResponseOutputTokens?: number | 'inf';
+    modalities?: string[];
+    toolChoice?: string | Record<string, unknown>;
+    inputAudioTranscriptionModel?: string;
+    vadType?: 'server_vad' | 'semantic_vad';
+    /**
+     * Trailing silence (ms) the server VAD waits for before treating the user's
+     * turn as complete. Defaults to 300 — OpenAI's documented sweet-spot for
+     * snappier turn-taking, ~200 ms faster than the previous 500 default.
+     * Increase for dictation-style flows where the user pauses mid-sentence.
+     */
+    silenceDurationMs?: number;
+}
 declare class OpenAIRealtimeAdapter {
     private readonly apiKey;
     private readonly model;
     private readonly voice;
     private readonly instructions;
     private readonly tools?;
+    private readonly audioFormat;
     private ws;
+    private readonly eventCallbacks;
+    private messageListenerAttached;
+    private heartbeat;
+    private currentResponseItemId;
+    private currentResponseAudioMs;
+    private readonly options;
     constructor(apiKey: string, model?: string, voice?: string, instructions?: string, tools?: Array<{
         name: string;
         description: string;
         parameters: Record<string, unknown>;
-    }> | undefined);
+    }> | undefined, audioFormat?: OpenAIRealtimeAudioFormat, options?: OpenAIRealtimeOptions);
     connect(): Promise<void>;
     sendAudio(mulawAudio: Buffer): void;
-    onEvent(callback: (type: string, data: unknown) => void | Promise<void>): void;
+    /**
+     * Register a listener for parsed realtime events.
+     *
+     * Previously every call attached a new ``ws.on('message')`` handler,
+     * which leaked listeners across retries and multi-consumer hooks. We now
+     * route all traffic through a single persistent handler that fans out to
+     * a Set of callbacks. Use {@link offEvent} to remove one.
+     */
+    onEvent(callback: RealtimeEventCallback): void;
+    offEvent(callback: RealtimeEventCallback): void;
+    private ensureMessageListener;
     cancelResponse(): void;
     sendText(text: string): Promise<void>;
     sendFunctionResult(callId: string, result: string): Promise<void>;
     close(): void;
 }
+interface ElevenLabsConvAIOptions {
+    apiKey: string;
+    agentId?: string;
+    voiceId?: string;
+    modelId?: string;
+    language?: string;
+    firstMessage?: string;
+    outputAudioFormat?: string;
+    inputAudioFormat?: string;
+    useSignedUrl?: boolean;
+}
+type EventCallback = (type: string, data: unknown) => void | Promise<void>;
 declare class ElevenLabsConvAIAdapter {
+    private ws;
+    private eventCallback;
     private readonly apiKey;
     private readonly agentId;
     private readonly voiceId;
+    readonly modelId: string;
+    private readonly language;
     private readonly firstMessage;
-    private ws;
-    private eventCallback;
-    constructor(apiKey: string, agentId?: string, voiceId?: string, _modelId?: string, _language?: string, firstMessage?: string);
-    connect(): Promise<void>;
-    sendAudio(audioBytes: Buffer): void;
-    onEvent(callback: (type: string, data: unknown) => void | Promise<void>): void;
-    close(): void;
-}
-/**
- * In-memory metrics store for the local dashboard.
- *
- * Keeps the last `maxCalls` completed calls and tracks active calls.
- * Supports SSE event subscribers for real-time updates.
- */
-interface CallRecord {
-    call_id: string;
-    caller: string;
-    callee: string;
-    direction: string;
-    started_at: number;
-    ended_at?: number;
-    /**
-     * Current lifecycle state: ``initiated`` (pre-registered), ``ringing``,
-     * ``in-progress``, ``completed``, ``no-answer``, ``busy``, ``failed``,
-     * ``canceled``, or ``webhook_error``.
-     */
-    status?: string;
-    transcript?: Array<{
-        role: string;
-        text: string;
-        timestamp: number;
-    }>;
-    turns?: unknown[];
-    metrics?: Record<string, unknown> | null;
-    [key: string]: unknown;
-}
-interface SSEEvent {
-    type: string;
-    data: Record<string, unknown>;
-}
-declare class MetricsStore extends EventEmitter {
-    private readonly maxCalls;
-    private calls;
-    private activeCalls;
-    /**
-     * Accepts either a numeric ``maxCalls`` (legacy positional — matches the
-     * original TS API) or an options object ``{ maxCalls }`` to align with the
-     * Python SDK's keyword-argument style. Plain literals also work:
-     * ``new MetricsStore()`` / ``new MetricsStore(100)`` / ``new MetricsStore({ maxCalls: 100 })``.
-     */
-    constructor(maxCallsOrOpts?: number | {
-        maxCalls?: number;
-    });
-    private publish;
-    recordCallStart(data: Record<string, unknown>): void;
+    readonly outputAudioFormat: string | undefined;
+    readonly inputAudioFormat: string | undefined;
+    private readonly useSignedUrl;
+    conversationId: string | null;
+    agentOutputAudioFormat: string | null;
+    userInputAudioFormat: string | null;
+    private agentSpeaking;
+    private silenceTimer;
+    private closePromise;
+    constructor(apiKey: string, agentId?: string, voiceId?: string, firstMessage?: string);
+    constructor(options: ElevenLabsConvAIOptions);
     /**
-     * Pre-register an outbound call before any webhook fires. Lets the
-     * dashboard surface attempts that never reach media (no-answer, busy,
-     * carrier-rejected). Mirrors the Python ``record_call_initiated``.
+     * Build an adapter pre-configured for Twilio Media Streams.
+     *
+     * Negotiates `ulaw_8000` for both `outputAudioFormat` and
+     * `inputAudioFormat`, matching Twilio's μ-law @ 8 kHz wire format. The
+     * SDK's stream handler detects this and skips the 8 kHz → 16 kHz inbound
+     * resample and the 16 kHz → 8 kHz / PCM → μ-law outbound transcode.
+     * Saves ~30–80 ms first-byte plus per-frame CPU on every turn.
      */
-    recordCallInitiated(data: Record<string, unknown>): void;
+    static forTwilio(apiKey: string, agentId: string, options?: Omit<ElevenLabsConvAIOptions, 'apiKey' | 'agentId' | 'outputAudioFormat' | 'inputAudioFormat'>): ElevenLabsConvAIAdapter;
     /**
-     * Update the status of an active or completed call. Terminal states
-     * (completed, no-answer, busy, failed, canceled, webhook_error) move the
-     * row from active to completed so the UI freezes the live duration timer.
+     * Build an adapter pre-configured for Telnyx bidirectional media.
+     *
+     * Telnyx negotiates PCMU @ 8 kHz when `streaming_start` sets
+     * `stream_bidirectional_codec=PCMU` (the SDK default). Picking
+     * `ulaw_8000` on both ConvAI directions removes every transcode on the
+     * audio path — same optimization as `forTwilio`.
      */
-    updateCallStatus(callId: string, status: string, extra?: Record<string, unknown>): void;
-    recordTurn(data: Record<string, unknown>): void;
-    recordCallEnd(data: Record<string, unknown>, metrics?: Record<string, unknown> | null): void;
-    getCalls(limit?: number, offset?: number): CallRecord[];
-    getCall(callId: string): CallRecord | null;
-    getActiveCalls(): CallRecord[];
-    getAggregates(): Record<string, unknown>;
-    getCallsInRange(fromTs?: number, toTs?: number): CallRecord[];
-    get callCount(): number;
+    static forTelnyx(apiKey: string, agentId: string, options?: Omit<ElevenLabsConvAIOptions, 'apiKey' | 'agentId' | 'outputAudioFormat' | 'inputAudioFormat'>): ElevenLabsConvAIAdapter;
+    private fetchSignedUrl;
+    connect(): Promise<void>;
+    private safeInvoke;
+    private respondToPing;
+    private clearSilenceTimer;
+    private finalizeAgentTurn;
+    private scheduleSilenceDone;
+    private handleMessage;
+    sendAudio(audioBytes: Buffer): void;
+    onEvent(callback: EventCallback): void;
+    close(): Promise<void>;
 }
 /**
@@ -1153,6 +1691,14 @@ interface LocalConfig {
      * are rejected with HTTP 403.
      */
     telnyxPublicKey?: string;
+    /**
+     * SECURITY: require valid webhook signatures on both Twilio and Telnyx
+     * inbound webhooks. When True (the default), a missing credential
+     * (twilioToken / telnyxPublicKey) causes the webhook to return
+     * 503 Service Unavailable instead of silently accepting the request.
+     * Set to false only for local development against mock providers.
+     */
+    requireSignature?: boolean;
 }
 /**
@@ -1210,6 +1756,12 @@ declare function mountApi(app: Express, store: MetricsStore, token?: string): vo
  * When the SDK completes a call, it fires a POST to the standalone dashboard
  * (if running) so calls appear in real time.  Data lives only in memory —
  * nothing is written to disk.
+ *
+ * TODO(parity): Python's `notify_dashboard` is now an async fire-and-forget
+ * coroutine (see sdk-py/getpatter/dashboard/persistence.py). This TS version
+ * uses `http.request` which is already non-blocking, but for parity consider
+ * exposing this as `async function notifyDashboard(...): Promise<void>` so
+ * call sites can `await` or `void` it explicitly, matching the Python API.
  */
 declare function notifyDashboard(callData: Record<string, unknown>, port?: number): void;
@@ -1275,6 +1827,215 @@ declare class FallbackLLMProvider implements LLMProvider {
     private stopRecovery;
 }
+/**
+ * PatterTool — wrap a live Patter instance as a tool callable from external
+ * agent frameworks (OpenAI Assistants, Anthropic Claude tool-use, LangChain,
+ * Hermes Agent, MCP, generic OpenAI-compatible endpoints).
+ *
+ * Pattern this enables: a customer already runs an agent in their existing
+ * stack (LangChain, OpenAI Assistant, Hermes Agent, …) and wants the agent
+ * to *make phone calls* during a conversation. With this tool, the customer
+ * registers `make_phone_call` and the agent's tool-call loop can dial out
+ * via Patter, get a transcript + cost back, and continue reasoning.
+ *
+ * ## Design
+ *
+ * Each `PatterTool` wraps one `Patter` instance (carrier + agent + serve).
+ * The tool exposes:
+ *
+ *   - `openaiSchema()`     — OpenAI / chat-completions tool spec
+ *   - `anthropicSchema()`  — Anthropic Claude tool spec
+ *   - `hermesSchema()`     — Hermes Agent / Nous registry schema (alias for
+ *                            anthropicSchema; same JSON-Schema shape)
+ *   - `execute(args)`      — dial outbound, await call end, return summary
+ *   - `hermesHandler()`    — `(args, **kw) => Promise<string>` wrapper that
+ *                            returns a JSON string and `{"error": "..."}` on
+ *                            failure (matches Hermes' tool contract)
+ *
+ * ## Usage (OpenAI / Anthropic)
+ *
+ * ```ts
+ * import { Patter, Twilio, DeepgramSTT, GroqLLM, ElevenLabsTTS } from 'getpatter';
+ * import { PatterTool } from 'getpatter/integrations';
+ *
+ * const phone = new Patter({
+ *   carrier: new Twilio(),
+ *   phoneNumber: process.env.TWILIO_PHONE_NUMBER!,
+ *   webhookUrl: 'agent.example.com',
+ * });
+ *
+ * const tool = new PatterTool({
+ *   phone,
+ *   agent: { stt: new DeepgramSTT(), llm: new GroqLLM(), tts: new ElevenLabsTTS() },
+ * });
+ *
+ * await tool.start();   // boots phone.serve() once
+ *
+ * // Register with your LLM
+ * const tools = [tool.openaiSchema()];
+ *
+ * // When the LLM emits a tool_call:
+ * const result = await tool.execute({
+ *   to: '+15551234567',
+ *   goal: 'Book a dentist appointment for next Tuesday afternoon.',
+ * });
+ * // → { call_id, status, duration_seconds, cost_usd, transcript, … }
+ * ```
+ *
+ * ## Usage (Hermes Agent)
+ *
+ * Hermes' contract: handler takes `args: dict` + kwargs, returns a JSON
+ * string. The TS SDK is meant to be invoked from Python via your own bridge
+ * (HTTP, MCP, subprocess); this `hermesSchema()` + `hermesHandler()` pair
+ * matches the Python adapter shipped under `getpatter.integrations` so the
+ * two SDKs stay in lockstep.
+ *
+ * For pure-Python Hermes setups, use `PatterTool` from `getpatter.integrations`
+ * directly inside a `tools/patter.py` module:
+ *
+ * ```python
+ * from tools.registry import registry
+ * from getpatter.integrations import PatterTool
+ *
+ * tool = PatterTool(phone=...)
+ * tool.register_hermes(registry)
+ * ```
+ */
+/** JSON-Schema of the call args. Identical wire shape across openai/anthropic/hermes. */
+declare const PARAMETERS_SCHEMA: {
+    readonly type: "object";
+    readonly properties: {
+        readonly to: {
+            readonly type: "string";
+            readonly description: "Destination phone number in E.164 format (e.g. \"+15551234567\"). Required.";
+        };
+        readonly goal: {
+            readonly type: "string";
+            readonly description: "What the agent should accomplish on the call. Becomes the in-call agent's system prompt for this single call.";
+        };
+        readonly first_message: {
+            readonly type: "string";
+            readonly description: "Optional first message the agent speaks when the callee answers. Defaults to a generic greeting.";
+        };
+        readonly max_duration_sec: {
+            readonly type: "integer";
+            readonly description: "Hard timeout for the call in seconds. Default 180. The call is force-ended at this deadline whether or not it has resolved.";
+            readonly minimum: 5;
+            readonly maximum: 1800;
+        };
+    };
+    readonly required: readonly ["to"];
+};
+interface PatterToolOptions {
+    /**
+     * Patter instance to dial through. Must be in local mode (have a `carrier`).
+     * The tool boots `phone.serve()` on `start()`; do not call `serve()` yourself.
+     */
+    phone: Patter;
+    /**
+     * Default agent config used for outbound calls. Per-call overrides come from
+     * `execute({ goal, first_message })`.
+     */
+    agent?: AgentOptions;
+    /** Tool name shown to the LLM. Default `'make_phone_call'`. */
+    name?: string;
+    /** Tool description for the LLM. Default tuned for English assistants. */
+    description?: string;
+    /** Default per-call timeout in seconds. Default 180. */
+    maxDurationSec?: number;
+    /**
+     * Optional pass-through for `phone.serve()`'s `recording` flag — record all
+     * outbound calls placed via this tool.
+     */
+    recording?: boolean;
+}
+interface PatterToolExecuteArgs {
+    to: string;
+    goal?: string;
+    first_message?: string;
+    max_duration_sec?: number;
+}
+interface PatterToolResult {
+    call_id: string;
+    status: string;
+    duration_seconds: number;
+    cost_usd?: number;
+    transcript: Array<{
+        role: string;
+        text: string;
+        timestamp?: number;
+    }>;
+    metrics?: Record<string, unknown> | null;
+}
+declare class PatterTool {
+    readonly name: string;
+    readonly description: string;
+    private readonly phone;
+    private readonly agent;
+    private readonly maxDurationSec;
+    private readonly recording;
+    private started;
+    /** Resolver for the next `call_initiated` SSE event. Only set inside the
+     *  dial mutex (`dialQueue`), so two parallel `execute()` calls never share
+     *  it and never lose a dispatch. */
+    private pendingDial;
+    /** Mutex that serializes the dial → call_id capture critical section.
+     *  Each `execute()` chains a continuation onto this promise so the
+     *  `pendingDial` slot is owned by exactly one caller at a time. */
+    private dialQueue;
+    /** Captured SSE listener so `stop()` can detach it (prevents leaks when
+     *  the underlying Patter instance outlives this tool). */
+    private sseListener;
+    /** Captured Patter metrics store, for cleanup in `stop()`. */
+    private metricsStoreRef;
+    /** call_id → pending promise machinery. */
+    private readonly pending;
+    private readonly bus;
+    /** How long to wait for the `call_initiated` SSE before failing the dial. */
+    private static readonly DIAL_CAPTURE_TIMEOUT_MS;
+    constructor(opts: PatterToolOptions);
+    /** OpenAI Chat Completions / Assistants tool spec. */
+    openaiSchema(): {
+        type: 'function';
+        function: {
+            name: string;
+            description: string;
+            parameters: typeof PARAMETERS_SCHEMA;
+        };
+    };
+    /** Anthropic Messages API tool spec. */
+    anthropicSchema(): {
+        name: string;
+        description: string;
+        input_schema: typeof PARAMETERS_SCHEMA;
+    };
+    /**
+     * Hermes Agent (Nous Research) registry schema. Same JSON-Schema shape as
+     * Anthropic's; Hermes consumes it via `registry.register({ schema: ... })`.
+     */
+    hermesSchema(): {
+        name: string;
+        description: string;
+        parameters: typeof PARAMETERS_SCHEMA;
+    };
+    /** Start the underlying Patter server. Idempotent. */
+    start(): Promise<void>;
+    /** Stop the underlying Patter server (and reject any pending calls). */
+    stop(): Promise<void>;
+    execute(args: PatterToolExecuteArgs): Promise<PatterToolResult>;
+    /** Issue the outbound dial under the mutex and return its assigned call_id. */
+    private acquireCallId;
+    /**
+     * Hermes-style handler: `(args, kwargs) => Promise<string>` returning a JSON
+     * string with either the result envelope or an `{"error": "..."}` payload.
+     * Mirrors the Python `PatterTool.hermes_handler` so cross-SDK adapters share
+     * the same wire contract.
+     */
+    hermesHandler(): (args: PatterToolExecuteArgs) => Promise<string>;
+    private onCallEndHandler;
+}
 /**
  * Interactive terminal test mode for voice agents.
  *
@@ -1303,6 +2064,12 @@ declare class TestSession {
  * not use Gemini Live do not pay the load cost. Install with:
  *
  *    npm install @google/genai
+ *
+ * NOTE: Native-audio Gemini Live models are **v1alpha-only**. We pass
+ * `httpOptions: { apiVersion: 'v1alpha' }` when constructing the client.
+ * When Google promotes native audio to GA, switch to `v1beta` / `v1` and
+ * update the default model below.
+ * See: https://ai.google.dev/gemini-api/docs/live
  */
 declare const GEMINI_DEFAULT_INPUT_SR = 16000;
 declare const GEMINI_DEFAULT_OUTPUT_SR = 24000;
@@ -1337,6 +2104,12 @@ declare class GeminiLiveAdapter {
     private receiveLoop;
     private handlers;
     private running;
+    /**
+     * Tracks call_id -> function name so tool responses can be sent back with
+     * the correct `name` field (Gemini expects the original function name,
+     * not the call_id).
+     */
+    private pendingToolCalls;
     constructor(apiKey: string, options?: GeminiLiveOptions);
     connect(): Promise<void>;
     sendAudio(pcm: Buffer): void;
@@ -1514,22 +2287,19 @@ declare class SonioxSTT {
 /**
  * AssemblyAI Universal Streaming STT adapter for the Patter SDK pipeline mode.
  *
- * Implements a `DeepgramSTT`-shaped provider using AssemblyAI's v3 streaming
- * WebSocket API. Pure `ws` transport — does NOT depend on the vendor SDK.
- *
- * Algorithm adapted from LiveKit Agents (Apache 2.0):
- * https://github.com/livekit/agents
- * Source: livekit-plugins/livekit-plugins-assemblyai/livekit/plugins/assemblyai/stt.py
- * Upstream ref SHA: 78a66bcf79c5cea82989401c408f1dff4b961a5b
+ * Pure `ws` transport — does NOT depend on the vendor SDK.
  */
 interface Transcript$3 {
     readonly text: string;
     readonly isFinal: boolean;
     readonly confidence: number;
+    /** Optional event hint, e.g. `"SpeechStarted"` for barge-in signals. */
+    readonly eventType?: string;
 }
 type TranscriptCallback$3 = (transcript: Transcript$3) => void;
 type AssemblyAIEncoding = 'pcm_s16le' | 'pcm_mulaw';
-type AssemblyAIModel = 'universal-streaming-english' | 'universal-streaming-multilingual' | 'u3-rt-pro';
+type AssemblyAIModel = 'universal-streaming-english' | 'universal-streaming-multilingual' | 'u3-rt-pro' | 'whisper-rt';
+type AssemblyAIDomain = 'general' | 'medical-v1';
 interface AssemblyAISTTOptions$1 {
     /** One of the AssemblyAI speech models. */
     readonly model?: AssemblyAIModel;
@@ -1539,6 +2309,11 @@ interface AssemblyAISTTOptions$1 {
     readonly sampleRate?: number;
     /** Override the streaming base URL (e.g. EU: `wss://streaming.eu.assemblyai.com`). */
     readonly baseUrl?: string;
+    /**
+     * Authenticate via `?token=<apiKey>` in the URL instead of the
+     * `Authorization` header. Default `false`.
+     */
+    readonly useQueryToken?: boolean;
     /** Enable automatic language detection (defaults: true for multilingual/u3-rt-pro). */
     readonly languageDetection?: boolean;
     /** 0..1 confidence required before end-of-turn is finalized. */
@@ -1553,34 +2328,54 @@ interface AssemblyAISTTOptions$1 {
     readonly keytermsPrompt?: readonly string[];
     /** Text prompt (u3-rt-pro only). */
     readonly prompt?: string;
-    /** VAD threshold (0..1). */
+    /** Accepted for backward compatibility but NOT sent — not a valid v3 param. */
     readonly vadThreshold?: number;
     /** Enable diarization / speaker labels. */
     readonly speakerLabels?: boolean;
     /** Max speakers for diarization. */
     readonly maxSpeakers?: number;
-    /** Domain hint (e.g. "medical"). */
-    readonly domain?: string;
+    /** Domain hint — must be `"general"` or `"medical-v1"`. */
+    readonly domain?: AssemblyAIDomain;
 }
 declare class AssemblyAISTT {
     private readonly apiKey;
     private readonly options;
     private ws;
-    private callbacks;
+    private readonly callbacks;
+    private closing;
+    private reconnectAttempts;
+    private terminationResolve;
     /** AssemblyAI session id — set when the `Begin` message arrives. */
-    sessionId: string;
+    sessionId: string | null;
     /** Unix timestamp when the AssemblyAI session expires. */
-    expiresAt: number;
+    expiresAt: number | null;
     constructor(apiKey: string, options?: AssemblyAISTTOptions$1);
     /** Factory for Twilio calls — mulaw 8 kHz. */
     static forTwilio(apiKey: string, model?: AssemblyAIModel): AssemblyAISTT;
     private buildUrl;
+    private buildHeaders;
     connect(): Promise<void>;
+    private awaitOpen;
+    private attachHandlers;
+    private reconnect;
     private handleEvent;
     private emit;
     sendAudio(audio: Buffer): void;
-    onTranscript(callback: TranscriptCallback$3): void;
-    close(): void;
+    private estimateChunkDurationMs;
+    /**
+     * Send an `UpdateConfiguration` frame to change settings mid-stream.
+     * Only defined fields are included.
+     */
+    updateConfiguration(params: {
+        keytermsPrompt?: readonly string[];
+        prompt?: string;
+        minTurnSilence?: number;
+        maxTurnSilence?: number;
+    }): void;
+    /** Force the server to finalize the current turn (for barge-in). */
+    forceEndpoint(): void;
+    onTranscript(callback: TranscriptCallback$3): () => void;
+    close(): Promise<void>;
 }
 /**
@@ -1620,8 +2415,11 @@ declare class CartesiaSTT {
     private ws;
     private callbacks;
     private keepaliveTimer;
-    /** Cartesia request id — set from the server transcript events. */
-    requestId: string;
+    /**
+     * Cartesia request id — set from the server transcript events.
+     * `null` until the first transcript event arrives (matches Python's `None`).
+     */
+    requestId: string | null;
     constructor(apiKey: string, options?: CartesiaSTTOptions$1);
     private buildWsUrl;
     connect(): Promise<void>;
@@ -1629,7 +2427,23 @@ declare class CartesiaSTT {
     private emit;
     sendAudio(audio: Buffer): void;
     onTranscript(callback: TranscriptCallback$2): void;
+    /** Remove a previously registered transcript callback. */
+    offTranscript(callback: TranscriptCallback$2): void;
+    /**
+     * Synchronous best-effort close. Sends `finalize` and closes the socket
+     * without waiting for the server to flush any remaining transcripts.
+     *
+     * Limitation: any transcript events produced between the `finalize` send
+     * and the socket close may be dropped. Callers that need to guarantee all
+     * transcripts are delivered should await :meth:`closeAsync` instead.
+     */
     close(): void;
+    /**
+     * Graceful close that awaits the `finalize` send and the socket closing
+     * handshake, matching the Python adapter's behavior. Use this when you
+     * need any in-flight transcripts to be flushed before teardown.
+     */
+    closeAsync(): Promise<void>;
 }
 type LMNTAudioFormat = 'aac' | 'mp3' | 'mulaw' | 'raw' | 'wav';
@@ -1662,12 +2476,32 @@ declare class LMNTTTS {
     synthesizeStream(text: string): AsyncGenerator<Buffer>;
 }
+type TranscriptEventType = 'Results' | 'UtteranceEnd' | 'SpeechStarted';
+interface DeepgramWord {
+    readonly word?: string;
+    readonly start?: number;
+    readonly end?: number;
+    readonly confidence?: number;
+    readonly punctuated_word?: string;
+    readonly speaker?: number;
+}
 interface Transcript$1 {
     readonly text: string;
     readonly isFinal: boolean;
     readonly confidence: number;
+    /** Deepgram VAD hint — faster end-of-utterance than ``isFinal``. */
+    readonly speechFinal?: boolean;
+    /** True when this Results frame was produced in response to a Finalize. */
+    readonly fromFinalize?: boolean;
+    /** Deepgram request id, populated from the initial Metadata frame. */
+    readonly requestId?: string;
+    /** Per-word timings/metadata when Deepgram emits them. */
+    readonly words?: ReadonlyArray<DeepgramWord>;
+    /** Which provider event this Transcript represents. Default ``Results``. */
+    readonly eventType?: TranscriptEventType;
 }
 type TranscriptCallback$1 = (transcript: Transcript$1) => void;
+type ErrorCallback = (error: Error) => void;
 /**
  * Optional tuning knobs for Deepgram live transcription.
  *
@@ -1692,7 +2526,13 @@ interface DeepgramSTTOptions$1 {
      * hard minimum of 1000 ms. Set to ``null`` to disable. Default ``1000``.
      */
     readonly utteranceEndMs?: number | null;
-    /** Enable smart formatting (punctuation + numerals). Default ``true``. */
+    /**
+     * Enable smart formatting (punctuation + numerals). Default ``false`` —
+     * smart formatting adds roughly 50–150 ms to TTFT on each final transcript
+     * and is rarely useful for telephony pipelines that pass the text straight
+     * to an LLM. Set to ``true`` for use cases (dashboards, raw transcripts)
+     * where the formatted text is surfaced directly to humans.
+     */
     readonly smartFormat?: boolean;
     /** Emit interim (non-final) transcripts. Default ``true``. */
     readonly interimResults?: boolean;
@@ -1701,7 +2541,11 @@ interface DeepgramSTTOptions$1 {
 }
 declare class DeepgramSTT {
     private ws;
-    private callbacks;
+    private readonly transcriptCallbacks;
+    private readonly errorCallbacks;
+    private keepaliveTimer;
+    private running;
+    private reconnectAttempted;
     /** Request ID from Deepgram — used to query actual cost post-call. */
     requestId: string;
     private readonly apiKey;
@@ -1727,27 +2571,30 @@ declare class DeepgramSTT {
     });
     /** Factory for Twilio calls — mulaw 8 kHz. Forwards tuning options through. */
     static forTwilio(apiKey: string, language?: string, model?: string, options?: DeepgramSTTOptions$1): DeepgramSTT;
+    private buildUrl;
     connect(): Promise<void>;
+    private openSocket;
+    private clearKeepalive;
+    private handleMessage;
+    private emitTranscript;
+    private emitError;
+    private handleError;
+    private handleClose;
     sendAudio(audio: Buffer): void;
     onTranscript(callback: TranscriptCallback$1): void;
+    offTranscript(callback: TranscriptCallback$1): void;
+    onError(callback: ErrorCallback): void;
+    offError(callback: ErrorCallback): void;
     close(): void;
 }
 /** Deepgram streaming STT for Patter pipeline mode. */
-interface DeepgramSTTOptions {
+type DeepgramSTTOptions = DeepgramSTTOptions$1 & {
     /** API key. Falls back to DEEPGRAM_API_KEY env var when omitted. */
     apiKey?: string;
     language?: string;
-    model?: string;
-    encoding?: string;
-    sampleRate?: number;
-    endpointingMs?: number;
-    utteranceEndMs?: number | null;
-    smartFormat?: boolean;
-    interimResults?: boolean;
-    vadEvents?: boolean;
-}
+};
 /**
  * Deepgram streaming STT.
  *
@@ -1758,7 +2605,8 @@ interface DeepgramSTTOptions {
  * const stt = new deepgram.STT({ apiKey: "dg_...", endpointingMs: 80 });
  * ```
  */
-declare class STT$4 extends DeepgramSTT {
+declare class STT$5 extends DeepgramSTT {
+    static readonly providerKey = "deepgram";
     constructor(opts?: DeepgramSTTOptions);
 }
@@ -1774,22 +2622,44 @@ interface Transcript {
     readonly confidence: number;
 }
 type TranscriptCallback = (transcript: Transcript) => void;
+type WhisperResponseFormat = 'json' | 'verbose_json';
 declare class WhisperSTT {
     private readonly apiKey;
     private readonly model;
     private readonly language;
     private readonly bufferSize;
-    private buffer;
+    private readonly responseFormat;
+    private chunks;
+    private bufferedBytes;
     private callbacks;
     private running;
     private pendingTranscriptions;
-    constructor(apiKey: string, model?: string, language?: string, bufferSize?: number);
+    /**
+     * @param apiKey OpenAI API key.
+     * @param language ISO-639-1 language code (e.g. ``"en"``, ``"it"``). Optional.
+     * @param model One of ``whisper-1``, ``gpt-4o-transcribe``, ``gpt-4o-mini-transcribe``.
+     * @param bufferSize Bytes of PCM16 to buffer before each transcription request.
+     * @param responseFormat ``"json"`` (default) or ``"verbose_json"``.
+     *
+     * Argument order matches the Python SDK's ``WhisperSTT(api_key, language, model, response_format)``
+     * for cross-language parity. Pre-0.5.3 the TS positional order was
+     * ``(apiKey, model, language, bufferSize, responseFormat)`` — callers using
+     * the old order will need to swap ``language`` and ``model``.
+     */
+    constructor(apiKey: string, language?: string, model?: string, bufferSize?: number, responseFormat?: WhisperResponseFormat);
     /** Factory for Twilio calls — mulaw 8 kHz is transcoded upstream, so we still receive PCM 16-bit. */
     static forTwilio(apiKey: string, language?: string, model?: string): WhisperSTT;
     connect(): Promise<void>;
     sendAudio(audio: Buffer): void;
+    private flushChunks;
     private trackTranscription;
+    /**
+     * Register a transcript listener. Unlike the previous implementation
+     * which capped at 10 and silently replaced the last one, we now keep all
+     * registered callbacks in a Set; use {@link offTranscript} to remove one.
+     */
     onTranscript(callback: TranscriptCallback): void;
+    offTranscript(callback: TranscriptCallback): void;
     close(): Promise<void>;
     private transcribeBuffer;
 }
@@ -1802,6 +2672,8 @@ interface WhisperSTTOptions {
     model?: string;
     language?: string;
     bufferSize?: number;
+    /** ``"verbose_json"`` exposes segment-level confidence / timestamps. */
+    responseFormat?: WhisperResponseFormat;
 }
 /**
  * OpenAI Whisper STT.
@@ -1813,10 +2685,68 @@ interface WhisperSTTOptions {
  * const stt = new whisper.STT({ apiKey: "sk-...", language: "en" });
  * ```
  */
-declare class STT$3 extends WhisperSTT {
+declare class STT$4 extends WhisperSTT {
+    static readonly providerKey = "whisper";
     constructor(opts?: WhisperSTTOptions);
 }
+/**
+ * OpenAI GPT-4o Transcribe STT adapter for the Patter SDK pipeline mode.
+ *
+ * First-class wrapper around OpenAI's ``gpt-4o-transcribe`` /
+ * ``gpt-4o-mini-transcribe`` models. They share the
+ * ``POST /v1/audio/transcriptions`` endpoint with Whisper-1 but offer ~10x
+ * lower latency and stronger multilingual quality, making them a drop-in
+ * replacement for ``WhisperSTT`` whenever speed matters.
+ *
+ * Use this class instead of ``WhisperSTT`` when you specifically want the
+ * GPT-4o Transcribe family — it restricts the accepted models so
+ * misconfigured calls fail fast instead of silently dropping back to
+ * ``whisper-1``.
+ */
+declare class OpenAITranscribeSTT extends WhisperSTT {
+    /**
+     * @param apiKey OpenAI API key.
+     * @param language ISO-639-1 language code (e.g. ``"en"``, ``"it"``). Optional.
+     * @param model One of ``gpt-4o-transcribe`` (default), ``gpt-4o-mini-transcribe``.
+     *   ``"whisper-1"`` is intentionally rejected here — use ``WhisperSTT`` for that.
+     * @param bufferSize Bytes of PCM16 to buffer before each transcription request.
+     * @param responseFormat ``"json"`` (default) or ``"verbose_json"``.
+     */
+    constructor(apiKey: string, language?: string, model?: string, bufferSize?: number, responseFormat?: WhisperResponseFormat);
+}
+/** OpenAI GPT-4o Transcribe STT for Patter pipeline mode. */
+interface OpenAITranscribeSTTOptions {
+    /** API key. Falls back to OPENAI_API_KEY env var when omitted. */
+    apiKey?: string;
+    /** ``gpt-4o-transcribe`` (default) or ``gpt-4o-mini-transcribe``. */
+    model?: string;
+    language?: string;
+    bufferSize?: number;
+    /** ``"verbose_json"`` exposes segment-level confidence / timestamps. */
+    responseFormat?: WhisperResponseFormat;
+}
+/**
+ * OpenAI GPT-4o Transcribe STT — ~10x faster than Whisper-1.
+ *
+ * Drop-in replacement for ``whisper.STT`` with stronger multilingual
+ * quality and significantly lower latency.
+ *
+ * @example
+ * ```ts
+ * import * as openaiTranscribe from "getpatter/stt/openai-transcribe";
+ * const stt = new openaiTranscribe.STT();              // reads OPENAI_API_KEY
+ * const stt = new openaiTranscribe.STT({ apiKey: "sk-...", language: "en" });
+ * ```
+ */
+declare class STT$3 extends OpenAITranscribeSTT {
+    static readonly providerKey = "openai_transcribe";
+    constructor(opts?: OpenAITranscribeSTTOptions);
+}
 /** Cartesia streaming STT for Patter pipeline mode. */
 interface CartesiaSTTOptions {
@@ -1839,6 +2769,7 @@ interface CartesiaSTTOptions {
  * ```
  */
 declare class STT$2 extends CartesiaSTT {
+    static readonly providerKey = "cartesia_stt";
     constructor(opts?: CartesiaSTTOptions);
 }
@@ -1869,6 +2800,7 @@ interface SonioxSTTOptions {
  * ```
  */
 declare class STT$1 extends SonioxSTT {
+    static readonly providerKey = "soniox";
     constructor(opts?: SonioxSTTOptions);
 }
@@ -1891,7 +2823,7 @@ interface AssemblyAISTTOptions {
     vadThreshold?: number;
     speakerLabels?: boolean;
     maxSpeakers?: number;
-    domain?: string;
+    domain?: AssemblyAIDomain;
 }
 /**
  * AssemblyAI Universal Streaming STT.
@@ -1904,15 +2836,103 @@ interface AssemblyAISTTOptions {
  * ```
  */
 declare class STT extends AssemblyAISTT {
+    static readonly providerKey = "assemblyai";
     constructor(opts?: AssemblyAISTTOptions);
 }
+/**
+ * Known stable ElevenLabs voice models (from the official ElevenLabs API
+ * reference). Provided as a string-literal union for autocomplete + type
+ * narrowing; the public ``modelId`` option also accepts ``string`` so
+ * users can pass forward-compat IDs we haven't enumerated yet.
+ *
+ * - ``eleven_v3`` — newest, highest quality (slower TTFT than Flash).
+ * - ``eleven_flash_v2_5`` — current default, fastest (~75 ms TTFT).
+ * - ``eleven_turbo_v2_5`` — balanced quality/speed.
+ * - ``eleven_multilingual_v2`` — best multilingual support.
+ * - ``eleven_monolingual_v1`` — legacy English-only.
+ */
+type ElevenLabsModel = 'eleven_v3' | 'eleven_flash_v2_5' | 'eleven_turbo_v2_5' | 'eleven_multilingual_v2' | 'eleven_monolingual_v1';
+type ElevenLabsOutputFormat = 'mp3_22050_32' | 'mp3_44100_32' | 'mp3_44100_64' | 'mp3_44100_96' | 'mp3_44100_128' | 'mp3_44100_192' | 'pcm_8000' | 'pcm_16000' | 'pcm_22050' | 'pcm_24000' | 'pcm_44100' | 'ulaw_8000';
+interface ElevenLabsVoiceSettings {
+    stability?: number;
+    similarity_boost?: number;
+    style?: number;
+    use_speaker_boost?: boolean;
+}
+interface ElevenLabsTTSOptions$1 {
+    voiceId?: string;
+    /**
+     * ElevenLabs voice model ID. The default ``eleven_flash_v2_5`` has the
+     * lowest TTFT (~75 ms). Pass ``eleven_v3`` for highest quality, or any
+     * arbitrary string for forward-compat with future models.
+     */
+    modelId?: ElevenLabsModel | string;
+    outputFormat?: ElevenLabsOutputFormat;
+    voiceSettings?: ElevenLabsVoiceSettings;
+    languageCode?: string;
+    chunkSize?: number;
+}
+/**
+ * ElevenLabs streaming TTS adapter.
+ *
+ * Supported `modelId` values are autocompleted via {@link ElevenLabsModel}.
+ * Default is `eleven_flash_v2_5` (lowest TTFT, ~75 ms).
+ *
+ * **Telephony optimization** — the constructor default
+ * `outputFormat='pcm_16000'` is correct for web playback, dashboard
+ * previews, and 16 kHz pipelines. For real phone calls, use the
+ * carrier-specific factories instead:
+ *
+ * - {@link ElevenLabsTTS.forTwilio} emits `ulaw_8000` natively. Twilio's
+ *   media-stream WebSocket expects μ-law @ 8 kHz, so the SDK normally
+ *   resamples 16 kHz → 8 kHz and PCM → μ-law before sending. Asking
+ *   ElevenLabs to produce μ-law directly skips that step (saves
+ *   ~30–80 ms first-byte plus per-frame CPU and avoids any resampling
+ *   aliasing).
+ * - {@link ElevenLabsTTS.forTelnyx} emits `pcm_16000`. Telnyx negotiates
+ *   L16/16000 on its bidirectional media WebSocket, so 16 kHz PCM is
+ *   already the format used end-to-end and no transcoding happens.
+ *   ElevenLabs *also* supports `ulaw_8000` if your Telnyx profile is
+ *   pinned to PCMU/8000 — pass `outputFormat: 'ulaw_8000'` explicitly
+ *   in that case.
+ */
 declare class ElevenLabsTTS {
     private readonly apiKey;
+    private readonly voiceId;
     private readonly modelId;
     private readonly outputFormat;
-    private readonly voiceId;
-    constructor(apiKey: string, voiceId?: string, modelId?: string, outputFormat?: string);
+    private readonly voiceSettings;
+    private readonly languageCode;
+    private readonly chunkSize;
+    constructor(apiKey: string, voiceId?: string, modelId?: string, outputFormat?: ElevenLabsOutputFormat | string);
+    constructor(apiKey: string, options: ElevenLabsTTSOptions$1);
+    /**
+     * Construct an instance pre-configured for Twilio Media Streams.
+     *
+     * Sets `outputFormat='ulaw_8000'` so ElevenLabs emits μ-law @ 8 kHz
+     * directly — the exact wire format Twilio's media stream uses — letting
+     * the SDK skip the 16 kHz→8 kHz resample and PCM→μ-law conversion in
+     * `TwilioAudioSender`. Saves ~30–80 ms first-byte and per-frame CPU,
+     * and removes a potential aliasing source.
+     *
+     * `voiceSettings` defaults to a low-bandwidth-friendly profile
+     * (speaker boost off, modest stability) which sounds cleaner at 8 kHz
+     * μ-law than the studio default. Pass an explicit object to override.
+     */
+    static forTwilio(apiKey: string, options?: Omit<ElevenLabsTTSOptions$1, 'outputFormat'>): ElevenLabsTTS;
+    /**
+     * Construct an instance pre-configured for Telnyx bidirectional media.
+     *
+     * Telnyx's default media-streaming codec is L16 PCM @ 16 kHz, which
+     * matches our default Telnyx handler. We pick `pcm_16000` so the audio
+     * flows end-to-end with zero resampling or transcoding.
+     *
+     * Trade-off: if your Telnyx profile is pinned to PCMU/8000 (μ-law),
+     * construct `ElevenLabsTTS` directly with `outputFormat: 'ulaw_8000'`
+     * — Telnyx supports that natively too.
+     */
+    static forTelnyx(apiKey: string, options?: Omit<ElevenLabsTTSOptions$1, 'outputFormat'>): ElevenLabsTTS;
     /**
      * Synthesise text to speech and return the full audio as a single Buffer.
      *
@@ -1923,7 +2943,8 @@ declare class ElevenLabsTTS {
      * Synthesise text and yield audio chunks as they arrive (streaming).
      *
      * The yielded buffers are raw PCM at 16 kHz (or whatever `outputFormat` is
-     * configured to).
+     * configured to). `chunkSize` controls the maximum yield size — 512 is a
+     * good choice for low-latency telephony.
      */
     synthesizeStream(text: string): AsyncGenerator<Buffer>;
 }
@@ -1934,9 +2955,15 @@ interface ElevenLabsTTSOptions {
     /** API key. Falls back to ELEVENLABS_API_KEY env var when omitted. */
     apiKey?: string;
     voiceId?: string;
-    modelId?: string;
+    /**
+     * ElevenLabs voice model ID. Default is ``eleven_flash_v2_5`` (lowest TTFT).
+     * Pass ``eleven_v3`` for highest quality, or any string for forward-compat.
+     */
+    modelId?: ElevenLabsModel | string;
     outputFormat?: string;
 }
+/** Options for the carrier-specific factories — same as the constructor minus `outputFormat`. */
+type ElevenLabsCarrierOptions = Omit<ElevenLabsTTSOptions, "outputFormat">;
 /**
  * ElevenLabs TTS.
  *
@@ -1946,16 +2973,31 @@ interface ElevenLabsTTSOptions {
  * const tts = new elevenlabs.TTS();              // reads ELEVENLABS_API_KEY
  * const tts = new elevenlabs.TTS({ apiKey: "...", voiceId: "rachel" });
  * ```
+ *
+ * **Telephony optimization** — use {@link TTS.forTwilio} (μ-law @ 8 kHz,
+ * native Twilio Media Streams format) or {@link TTS.forTelnyx} (PCM @
+ * 16 kHz, native Telnyx default) on phone calls to skip the SDK-side
+ * resampling / transcoding step.
  */
 declare class TTS$4 extends ElevenLabsTTS {
+    static readonly providerKey = "elevenlabs";
     constructor(opts?: ElevenLabsTTSOptions);
+    /** Pipeline TTS pre-configured for Twilio Media Streams (`ulaw_8000`). */
+    static forTwilio(opts?: ElevenLabsCarrierOptions): TTS$4;
+    static forTwilio(apiKey: string, options?: Omit<ElevenLabsTTSOptions, "outputFormat">): TTS$4;
+    /** Pipeline TTS pre-configured for Telnyx (`pcm_16000`). */
+    static forTelnyx(opts?: ElevenLabsCarrierOptions): TTS$4;
+    static forTelnyx(apiKey: string, options?: Omit<ElevenLabsTTSOptions, "outputFormat">): TTS$4;
 }
 declare class OpenAITTS {
     private readonly apiKey;
     private readonly voice;
     private readonly model;
-    constructor(apiKey: string, voice?: string, model?: string);
+    private readonly instructions;
+    private readonly speed;
+    private readonly antiAlias;
+    constructor(apiKey: string, voice?: string, model?: string, instructions?: string | null, speed?: number | null, antiAlias?: boolean);
     /**
      * Synthesise text to speech and return the full audio as a single Buffer.
      *
@@ -1965,26 +3007,36 @@ declare class OpenAITTS {
     /**
      * Synthesise text and yield audio chunks as they arrive (streaming).
      *
-     * OpenAI returns 24 kHz PCM16; each chunk is resampled to 16 kHz before
-     * yielding so the output is ready for telephony pipelines.
+     * OpenAI returns 24 kHz PCM16; each chunk is lowpass-filtered then
+     * decimated 3:2 to 16 kHz before yielding so the output is ready for
+     * telephony pipelines.
      *
-     * The resampler carries state (buffered samples + odd trailing byte)
-     * between chunks — without that state cross-chunk sample alignment drifts
-     * and the caller hears pops / dropped audio (BUG #23, mirror of the
-     * Python `audioop.ratecv` fix).
+     * The resampler carries state (filter memory + buffered samples + odd
+     * trailing byte) between chunks so cross-chunk sample alignment and
+     * filter phase don't reset on every network read.
      */
     synthesizeStream(text: string): AsyncGenerator<Buffer>;
     /**
-     * Streaming 24 kHz → 16 kHz resampler (PCM16-LE). Maintains cross-chunk
-     * state so the 3:2 pattern doesn't reset at every network read.
+     * Streaming 24 kHz → 16 kHz resampler (PCM16-LE). Applies a single-pole
+     * lowpass ahead of the 3:2 decimation and carries filter + sample state
+     * across chunks so the cadence doesn't reset at every network read.
+     *
+     * ``ctx.lpfEnabled`` (default true on the streaming path, false for the
+     * legacy static helper) controls whether the LPF is engaged — we keep
+     * the helper bit-exact for the downsample-only tests while the real
+     * streaming path gets anti-alias filtering.
      */
-    static resampleStreaming(audio: Buffer, ctx: {
-        carryByte: number | null;
-        leftover: number[];
-    }): Buffer;
+    static resampleStreaming(audio: Buffer, ctx: ResampleCtx): Buffer;
     /** @deprecated use {@link resampleStreaming} with persistent state. */
     static resample24kTo16k(audio: Buffer): Buffer;
 }
+interface ResampleCtx {
+    carryByte: number | null;
+    leftover: number[];
+    lpfPrev: number;
+    /** Enable the single-pole lowpass ahead of decimation. Default true. */
+    lpfEnabled?: boolean;
+}
 /** OpenAI TTS for Patter pipeline mode. */
@@ -1993,6 +3045,16 @@ interface OpenAITTSOptions {
     apiKey?: string;
     voice?: string;
     model?: string;
+    /** Voice-direction prompt (only honoured for gpt-4o-mini-tts and newer). */
+    instructions?: string;
+    /** Speech speed multiplier, must be in [0.25, 4.0] when set. */
+    speed?: number;
+    /**
+     * Enable anti-aliasing LPF ahead of the 3:2 decimation. Defaults to
+     * ``false`` for backwards-compatibility; set to ``true`` for cleaner
+     * audio on sibilants / fricatives.
+     */
+    antiAlias?: boolean;
 }
 /**
  * OpenAI TTS.
@@ -2005,6 +3067,7 @@ interface OpenAITTSOptions {
  * ```
  */
 declare class TTS$3 extends OpenAITTS {
+    static readonly providerKey = "openai_tts";
     constructor(opts?: OpenAITTSOptions);
 }
@@ -2031,6 +3094,25 @@ declare class CartesiaTTS {
     private readonly baseUrl;
     private readonly apiVersion;
     constructor(apiKey: string, opts?: CartesiaTTSOptions$1);
+    /**
+     * Construct an instance pre-configured for Twilio Media Streams.
+     *
+     * Sets `sampleRate=8000` so Cartesia emits PCM_S16LE @ 8 kHz directly.
+     * Twilio's media stream uses μ-law @ 8 kHz so the SDK still does the
+     * PCM → μ-law transcode client-side, but the 16 kHz → 8 kHz resample
+     * step is skipped. Saves ~10–30 ms first-byte plus per-frame CPU and
+     * removes a potential aliasing source.
+     */
+    static forTwilio(apiKey: string, options?: Omit<CartesiaTTSOptions$1, 'sampleRate'>): CartesiaTTS;
+    /**
+     * Construct an instance pre-configured for Telnyx bidirectional media.
+     *
+     * Sets `sampleRate=16000` to match Telnyx's L16/16000 default codec —
+     * audio flows end-to-end with zero resampling or transcoding. Same as
+     * the bare-constructor default; exists for API symmetry with
+     * {@link CartesiaTTS.forTwilio}.
+     */
+    static forTelnyx(apiKey: string, options?: Omit<CartesiaTTSOptions$1, 'sampleRate'>): CartesiaTTS;
     /** Build the JSON payload for the Cartesia bytes endpoint. */
     private buildPayload;
     /** Synthesize text and return the concatenated audio buffer. */
@@ -2057,8 +3139,14 @@ interface CartesiaTTSOptions {
     baseUrl?: string;
     apiVersion?: string;
 }
+/** Options for the carrier-specific factories — same as the constructor minus `sampleRate`. */
+type CartesiaCarrierOptions = Omit<CartesiaTTSOptions, "sampleRate">;
 /**
- * Cartesia TTS (sonic-2).
+ * Cartesia TTS (sonic-3 GA, ~90 ms TTFB).
+ *
+ * The default model is `sonic-3` — Cartesia's current GA model. Voice IDs
+ * from the previous `sonic-2` family (including the default Katie voice)
+ * remain compatible.
  *
  * @example
  * ```ts
@@ -2066,9 +3154,21 @@ interface CartesiaTTSOptions {
  * const tts = new cartesia.TTS();              // reads CARTESIA_API_KEY
  * const tts = new cartesia.TTS({ apiKey: "..." });
  * ```
+ *
+ * **Telephony optimization** — use {@link TTS.forTwilio} (PCM @ 8 kHz,
+ * skipping the SDK-side 16 kHz → 8 kHz resample before μ-law transcoding)
+ * or {@link TTS.forTelnyx} (PCM @ 16 kHz, native Telnyx default) on
+ * phone calls.
  */
 declare class TTS$2 extends CartesiaTTS {
+    static readonly providerKey = "cartesia_tts";
     constructor(opts?: CartesiaTTSOptions);
+    /** Pipeline TTS pre-configured for Twilio Media Streams (PCM @ 8 kHz). */
+    static forTwilio(opts?: CartesiaCarrierOptions): TTS$2;
+    static forTwilio(apiKey: string, options?: Omit<CartesiaTTSOptions, "sampleRate">): TTS$2;
+    /** Pipeline TTS pre-configured for Telnyx (PCM @ 16 kHz). */
+    static forTelnyx(opts?: CartesiaCarrierOptions): TTS$2;
+    static forTelnyx(apiKey: string, options?: Omit<CartesiaTTSOptions, "sampleRate">): TTS$2;
 }
 interface RimeTTSOptions$1 {
@@ -2142,6 +3242,7 @@ interface RimeTTSOptions {
  * ```
  */
 declare class TTS$1 extends RimeTTS {
+    static readonly providerKey = "rime";
     constructor(opts?: RimeTTSOptions);
 }
@@ -2170,6 +3271,7 @@ interface LMNTTTSOptions {
  * ```
  */
 declare class TTS extends LMNTTTS {
+    static readonly providerKey = "lmnt";
     constructor(opts?: LMNTTTSOptions);
 }
@@ -2180,6 +3282,26 @@ interface OpenAILLMOptions {
     apiKey?: string;
     /** Chat Completions model id. Defaults to ``"gpt-4o-mini"``. */
     model?: string;
+    /** Sampling temperature [0, 2]. */
+    temperature?: number;
+    /** Max tokens in the assistant response (sent as ``max_completion_tokens``). */
+    maxTokens?: number;
+    /** OpenAI-style ``response_format`` for JSON mode / structured outputs. */
+    responseFormat?: Record<string, unknown>;
+    /** Whether to allow parallel tool calls. */
+    parallelToolCalls?: boolean;
+    /** ``"auto" | "none" | "required"`` or a specific tool object. */
+    toolChoice?: string | Record<string, unknown>;
+    /** Sampling seed for reproducible outputs. */
+    seed?: number;
+    /** Nucleus sampling cutoff in [0, 1]. */
+    topP?: number;
+    /** Penalty in [-2, 2] applied to repeated tokens. */
+    frequencyPenalty?: number;
+    /** Penalty in [-2, 2] applied to seen tokens. */
+    presencePenalty?: number;
+    /** Stop sequence(s). */
+    stop?: string | string[];
 }
 /**
  * OpenAI Chat Completions LLM provider.
@@ -2188,10 +3310,11 @@ interface OpenAILLMOptions {
  * ```ts
  * import * as openai from "getpatter/llm/openai";
  * const llm = new openai.LLM();                           // reads OPENAI_API_KEY
- * const llm = new openai.LLM({ apiKey: "sk-...", model: "gpt-4o-mini" });
+ * const llm = new openai.LLM({ apiKey: "sk-...", model: "gpt-4o-mini", temperature: 0.4 });
  * ```
  */
 declare class LLM$4 extends OpenAILLMProvider {
+    static readonly providerKey = "openai";
     constructor(opts?: OpenAILLMOptions);
 }
@@ -2230,6 +3353,19 @@ interface AnthropicLLMOptions$1 {
     temperature?: number;
     baseUrl?: string;
     anthropicVersion?: string;
+    /**
+     * Enable Anthropic prompt caching for the system prompt and tools.
+     * Defaults to ``true`` — for voice agents with long instruction-dense
+     * system prompts, the cache saves ~100-400 ms TTFT and ~90% of input-
+     * token cost on every cached turn. The cache lives ~5 minutes; the
+     * first request writes it, subsequent requests within that window
+     * hit it.
+     *
+     * Disable when the system prompt + tools combined are smaller than
+     * Anthropic's minimum cacheable size (~1024 tokens for Sonnet/Opus,
+     * ~2048 for Haiku) — caching has no effect below that threshold.
+     */
+    promptCaching?: boolean;
 }
 /** LLM provider backed by Anthropic's Messages API (streaming). */
 declare class AnthropicLLMProvider implements LLMProvider {
@@ -2239,6 +3375,7 @@ declare class AnthropicLLMProvider implements LLMProvider {
     private readonly temperature?;
     private readonly url;
     private readonly anthropicVersion;
+    private readonly promptCaching;
     constructor(options: AnthropicLLMOptions$1);
     stream(messages: Array<Record<string, unknown>>, tools?: Array<Record<string, unknown>> | null): AsyncGenerator<LLMChunk, void, unknown>;
 }
@@ -2248,7 +3385,7 @@ declare class AnthropicLLMProvider implements LLMProvider {
 interface AnthropicLLMOptions {
     /** API key. Falls back to ANTHROPIC_API_KEY env var when omitted. */
     apiKey?: string;
-    /** Anthropic Messages API model id (e.g. ``"claude-3-5-sonnet-20241022"``). */
+    /** Anthropic Messages API model id (e.g. ``"claude-haiku-4-5-20251001"``). */
     model?: string;
     /** Maximum number of tokens to sample. Defaults to the adapter default. */
     maxTokens?: number;
@@ -2258,18 +3395,33 @@ interface AnthropicLLMOptions {
     baseUrl?: string;
     /** ``anthropic-version`` header override. */
     anthropicVersion?: string;
+    /**
+     * Enable Anthropic prompt caching (default: ``true``). For voice
+     * agents with long instruction-dense system prompts, the cache saves
+     * ~100-400 ms TTFT and ~90% input-token cost per cached turn. Disable
+     * if your system prompt + tools are below Anthropic's minimum
+     * cacheable size (~1024 tokens for Sonnet/Opus, ~2048 for Haiku) —
+     * caching has no effect below that threshold.
+     */
+    promptCaching?: boolean;
 }
 /**
  * Anthropic Claude LLM provider (Messages API, streaming).
  *
+ * Prompt caching is **enabled by default**. The first request writes
+ * the cache; subsequent requests within ~5 minutes hit it. Pass
+ * ``{ promptCaching: false }`` to opt out.
+ *
  * @example
  * ```ts
  * import * as anthropic from "getpatter/llm/anthropic";
  * const llm = new anthropic.LLM();                                   // reads ANTHROPIC_API_KEY
- * const llm = new anthropic.LLM({ apiKey: "sk-ant-...", model: "claude-3-5-sonnet-20241022" });
+ * const llm = new anthropic.LLM({ apiKey: "sk-ant-...", model: "claude-haiku-4-5-20251001" });
+ * const llm = new anthropic.LLM({ promptCaching: false });           // opt out of caching
  * ```
  */
 declare class LLM$3 extends AnthropicLLMProvider {
+    static readonly providerKey = "anthropic";
     constructor(opts?: AnthropicLLMOptions);
 }
@@ -2296,12 +3448,42 @@ interface GroqLLMOptions$1 {
     apiKey: string;
     model?: string;
     baseUrl?: string;
+    /** Sampling temperature [0, 2]. */
+    temperature?: number;
+    /** Max tokens in the assistant response (sent as ``max_completion_tokens``). */
+    maxTokens?: number;
+    /** OpenAI-style ``response_format`` for JSON mode / structured outputs. */
+    responseFormat?: Record<string, unknown>;
+    /** Whether to allow parallel tool calls. */
+    parallelToolCalls?: boolean;
+    /** ``"auto" | "none" | "required"`` or a specific tool object. */
+    toolChoice?: string | Record<string, unknown>;
+    /** Sampling seed. */
+    seed?: number;
+    /** Nucleus sampling cutoff in [0, 1]. */
+    topP?: number;
+    /** Penalty in [-2, 2] applied to repeated tokens. */
+    frequencyPenalty?: number;
+    /** Penalty in [-2, 2] applied to seen tokens. */
+    presencePenalty?: number;
+    /** Stop sequence(s). */
+    stop?: string | string[];
 }
 /** LLM provider backed by Groq's OpenAI-compatible Chat Completions API. */
 declare class GroqLLMProvider implements LLMProvider {
     private readonly apiKey;
-    private readonly model;
+    readonly model: string;
     private readonly baseUrl;
+    private readonly temperature?;
+    private readonly maxTokens?;
+    private readonly responseFormat?;
+    private readonly parallelToolCalls?;
+    private readonly toolChoice?;
+    private readonly seed?;
+    private readonly topP?;
+    private readonly frequencyPenalty?;
+    private readonly presencePenalty?;
+    private readonly stop?;
     constructor(options: GroqLLMOptions$1);
     stream(messages: Array<Record<string, unknown>>, tools?: Array<Record<string, unknown>> | null): AsyncGenerator<LLMChunk, void, unknown>;
 }
@@ -2315,6 +3497,26 @@ interface GroqLLMOptions {
     model?: string;
     /** Override the OpenAI-compatible base URL (rarely needed). */
     baseUrl?: string;
+    /** Sampling temperature [0, 2]. */
+    temperature?: number;
+    /** Max tokens in the assistant response (sent as ``max_completion_tokens``). */
+    maxTokens?: number;
+    /** OpenAI-style ``response_format`` for JSON mode / structured outputs. */
+    responseFormat?: Record<string, unknown>;
+    /** Whether to allow parallel tool calls. */
+    parallelToolCalls?: boolean;
+    /** ``"auto" | "none" | "required"`` or a specific tool object. */
+    toolChoice?: string | Record<string, unknown>;
+    /** Sampling seed. */
+    seed?: number;
+    /** Nucleus sampling cutoff in [0, 1]. */
+    topP?: number;
+    /** Penalty in [-2, 2] applied to repeated tokens. */
+    frequencyPenalty?: number;
+    /** Penalty in [-2, 2] applied to seen tokens. */
+    presencePenalty?: number;
+    /** Stop sequence(s). */
+    stop?: string | string[];
 }
 /**
  * Groq LLM provider (OpenAI-compatible Chat Completions, streaming).
@@ -2327,6 +3529,7 @@ interface GroqLLMOptions {
  * ```
  */
 declare class LLM$2 extends GroqLLMProvider {
+    static readonly providerKey = "groq";
     constructor(opts?: GroqLLMOptions);
 }
@@ -2358,15 +3561,68 @@ interface CerebrasLLMOptions$1 {
     apiKey: string;
     model?: string;
     baseUrl?: string;
-    /** Gzip request payloads for faster TTFT on large prompts. */
+    /**
+     * Gzip request payloads for faster TTFT on large prompts. Defaults to
+     * ``true`` (parity with Python SDK) — set ``false`` to disable.
+     *
+     * msgpack encoding is Python-only; TS uses gzip alone, which captures
+     * ~85% of the TTFT win.
+     */
     gzipCompression?: boolean;
+    /** Sampling temperature [0, 2]. */
+    temperature?: number;
+    /** Max tokens in the assistant response (sent as ``max_completion_tokens``). */
+    maxTokens?: number;
+    /**
+     * Optional OpenAI-style ``response_format`` for JSON mode / structured
+     * outputs, e.g. ``{ type: 'json_schema', json_schema: { ... } }``.
+     * See https://inference-docs.cerebras.ai/capabilities/structured-outputs.
+     */
+    responseFormat?: Record<string, unknown>;
+    /** Whether to allow parallel tool calls. */
+    parallelToolCalls?: boolean;
+    /** ``"auto" | "none" | "required"`` or a specific tool object. */
+    toolChoice?: string | Record<string, unknown>;
+    /** Sampling seed for reproducible outputs. */
+    seed?: number;
+    /** Nucleus sampling cutoff in [0, 1]. */
+    topP?: number;
+    /** Penalty in [-2, 2] applied to repeated tokens. */
+    frequencyPenalty?: number;
+    /** Penalty in [-2, 2] applied to seen tokens. */
+    presencePenalty?: number;
+    /** Stop sequence(s). */
+    stop?: string | string[];
 }
-/** LLM provider backed by Cerebras's OpenAI-compatible Inference API. */
+/**
+ * LLM provider backed by Cerebras's OpenAI-compatible Inference API.
+ *
+ * Available models on Cerebras (verified against
+ * https://inference-docs.cerebras.ai/models/overview):
+ *
+ *   Production:
+ *     - gpt-oss-120b                         (default — highest throughput on Cerebras, no deprecation)
+ *     - llama3.1-8b                          (smaller context alternative; deprecating 2026-05-27)
+ *
+ *   Preview (opt-in):
+ *     - qwen-3-235b-a22b-instruct-2507       (multilingual, strong on European languages)
+ *     - zai-glm-4.7
+ */
 declare class CerebrasLLMProvider implements LLMProvider {
     private readonly apiKey;
-    private readonly model;
+    readonly model: string;
     private readonly baseUrl;
     private readonly gzipCompression;
+    private readonly temperature?;
+    private readonly maxTokens?;
+    private readonly responseFormat?;
+    private readonly parallelToolCalls?;
+    private readonly toolChoice?;
+    private readonly seed?;
+    private readonly topP?;
+    private readonly frequencyPenalty?;
+    private readonly presencePenalty?;
+    private readonly stop?;
     constructor(options: CerebrasLLMOptions$1);
     stream(messages: Array<Record<string, unknown>>, tools?: Array<Record<string, unknown>> | null): AsyncGenerator<LLMChunk, void, unknown>;
 }
@@ -2376,12 +3632,32 @@ declare class CerebrasLLMProvider implements LLMProvider {
 interface CerebrasLLMOptions {
     /** API key. Falls back to CEREBRAS_API_KEY env var when omitted. */
     apiKey?: string;
-    /** Model id (e.g. ``"llama3.1-8b"``). */
+    /** Model id (e.g. ``"gpt-oss-120b"``). */
     model?: string;
     /** Override the OpenAI-compatible base URL (rarely needed). */
     baseUrl?: string;
     /** Gzip request payloads for faster TTFT on large prompts. */
     gzipCompression?: boolean;
+    /** Sampling temperature [0, 2]. */
+    temperature?: number;
+    /** Max tokens in the assistant response (sent as ``max_completion_tokens``). */
+    maxTokens?: number;
+    /** OpenAI-style ``response_format`` for JSON mode / structured outputs. */
+    responseFormat?: Record<string, unknown>;
+    /** Whether to allow parallel tool calls. */
+    parallelToolCalls?: boolean;
+    /** ``"auto" | "none" | "required"`` or a specific tool object. */
+    toolChoice?: string | Record<string, unknown>;
+    /** Sampling seed for reproducible outputs. */
+    seed?: number;
+    /** Nucleus sampling cutoff in [0, 1]. */
+    topP?: number;
+    /** Penalty in [-2, 2] applied to repeated tokens. */
+    frequencyPenalty?: number;
+    /** Penalty in [-2, 2] applied to seen tokens. */
+    presencePenalty?: number;
+    /** Stop sequence(s). */
+    stop?: string | string[];
 }
 /**
  * Cerebras LLM provider (OpenAI-compatible Inference API, streaming).
@@ -2390,10 +3666,13 @@ interface CerebrasLLMOptions {
  * ```ts
  * import * as cerebras from "getpatter/llm/cerebras";
  * const llm = new cerebras.LLM();                              // reads CEREBRAS_API_KEY
+ * const llm = new cerebras.LLM({ apiKey: "csk-...", model: "gpt-oss-120b" });
+ * // smaller-context alternative:
  * const llm = new cerebras.LLM({ apiKey: "csk-...", model: "llama3.1-8b" });
  * ```
  */
 declare class LLM$1 extends CerebrasLLMProvider {
+    static readonly providerKey = "cerebras";
     constructor(opts?: CerebrasLLMOptions);
 }
@@ -2433,7 +3712,7 @@ interface GoogleLLMOptions$1 {
 /** LLM provider backed by Google Gemini (Developer API, streaming SSE). */
 declare class GoogleLLMProvider implements LLMProvider {
     private readonly apiKey;
-    private readonly model;
+    readonly model: string;
     private readonly baseUrl;
     private readonly temperature?;
     private readonly maxOutputTokens?;
@@ -2470,9 +3749,109 @@ interface GoogleLLMOptions {
  * ```
  */
 declare class LLM extends GoogleLLMProvider {
+    static readonly providerKey = "google";
     constructor(opts?: GoogleLLMOptions);
 }
+/**
+ * Silero VAD provider (TypeScript port).
+ *
+ * Acoustic voice activity detection backed by the Silero ONNX model. Buffers
+ * incoming int16 LE PCM frames, runs inference on fixed-size windows
+ * (256 samples at 8 kHz, 512 at 16 kHz), applies an exponential probability
+ * filter, and emits VADEvent transitions (speech_start / speech_end).
+ *
+ * Ported from LiveKit Agents (Apache 2.0):
+ *   https://github.com/livekit/agents
+ * Sources:
+ *   - livekit-plugins/livekit-plugins-silero/livekit/plugins/silero/vad.py
+ *   - livekit-plugins/livekit-plugins-silero/livekit/plugins/silero/onnx_model.py
+ *
+ * Adaptations for Patter:
+ *   - Input is raw PCM `Buffer` (int16 LE, mono) via
+ *     `processFrame(pcmChunk, sampleRate)`, not `livekit.rtc.AudioFrame`.
+ *   - onnxruntime-node is loaded lazily as an optional dependency.
+ *   - Emits `VADEvent` (Patter protocol) instead of LiveKit event types.
+ */
+declare const SUPPORTED_SAMPLE_RATES: readonly [8000, 16000];
+type SileroSampleRate = (typeof SUPPORTED_SAMPLE_RATES)[number];
+interface SileroVADOptions {
+    minSpeechDuration?: number;
+    minSilenceDuration?: number;
+    prefixPaddingDuration?: number;
+    activationThreshold?: number;
+    deactivationThreshold?: number;
+    sampleRate?: SileroSampleRate;
+    forceCpu?: boolean;
+    onnxFilePath?: string;
+}
+/**
+ * Minimal structural type for the subset of `onnxruntime-node` we depend on.
+ * Declared locally so consumers don't need the package installed at build time.
+ */
+interface OnnxInferenceSession {
+    run(feeds: Record<string, OnnxTensor>): Promise<Record<string, OnnxTensor>>;
+}
+interface OnnxTensor {
+    readonly data: Float32Array | BigInt64Array;
+    readonly dims: readonly number[];
+}
+interface OnnxRuntime {
+    InferenceSession: {
+        create(pathOrBuffer: string | Uint8Array, options?: Record<string, unknown>): Promise<OnnxInferenceSession>;
+    };
+    Tensor: new (type: 'float32' | 'int64', data: Float32Array | BigInt64Array, dims: readonly number[]) => OnnxTensor;
+}
+/**
+ * Silero-based `VADProvider`. Load via `SileroVAD.load()`:
+ *
+ *     const vad = await SileroVAD.load({ sampleRate: 16000 });
+ *     const evt = await vad.processFrame(pcm, 16000);
+ *     if (evt && evt.type === 'speech_start') { ... }
+ *     await vad.close();
+ */
+declare class SileroVAD implements VADProvider {
+    private readonly model;
+    private readonly opts;
+    private pending;
+    private expFilter;
+    private pubSpeaking;
+    private speechThresholdDuration;
+    private silenceThresholdDuration;
+    private closed;
+    private constructor();
+    /**
+     * Load the Silero VAD model. Defaults match the LiveKit Silero plugin.
+     * Throws if `onnxruntime-node` is not installed.
+     */
+    static load(options?: SileroVADOptions): Promise<SileroVAD>;
+    /**
+     * Internal factory used by tests — bypasses onnxruntime-node loading.
+     * @internal
+     */
+    static fromOnnxModel(runtime: OnnxRuntime, session: OnnxInferenceSession, options: Required<Omit<SileroVADOptions, 'onnxFilePath' | 'forceCpu'>>): SileroVAD;
+    get sampleRate(): SileroSampleRate;
+    /**
+     * Number of int16 PCM samples that must be provided per call to
+     * processFrame for the model to run one inference window.
+     *
+     * Constraint (ported from LiveKit Agents / Silero ONNX spec):
+     *   - 16 000 Hz → 512 samples (32 ms)
+     *   -  8 000 Hz → 256 samples (32 ms)
+     *
+     * Callers that feed raw audio in fixed-size chunks (e.g. WebSocket frames)
+     * should buffer incoming audio until at least numFramesRequired() int16
+     * samples are available before calling processFrame.  The provider
+     * internally buffers partial windows so smaller chunks are also safe, but
+     * passing exactly one window per call minimises heap allocation.
+     */
+    numFramesRequired(): number;
+    processFrame(pcmChunk: Buffer, sampleRate: number): Promise<VADEvent | null>;
+    private advanceState;
+    close(): Promise<void>;
+}
 /**
  * Audio transcoding utilities for Patter TypeScript SDK.
  *
@@ -2495,6 +3874,137 @@ declare function mulawToPcm16(mulawData: Buffer): Buffer;
  * If the input length is odd, the trailing byte is ignored.
  */
 declare function pcm16ToMulaw(pcmData: Buffer): Buffer;
+/**
+ * Buffers a trailing odd byte across chunk boundaries so that downstream
+ * consumers (resamplers, encoders) always receive even-length (2-byte-aligned)
+ * PCM16 buffers.
+ *
+ * Mirror of the Python-side PcmCarry helper. Typical usage:
+ *
+ * ```ts
+ * const carry = new PcmCarry();
+ * for (const raw of stream) {
+ *   const aligned = carry.push(raw);
+ *   if (aligned.length > 0) process(aligned);
+ * }
+ * const tail = carry.flush();
+ * if (tail.length > 0) process(tail);
+ * ```
+ */
+declare class PcmCarry {
+    private pending;
+    /**
+     * Prepend any carried odd byte, return the even-length prefix, and stash
+     * any new trailing odd byte for the next call.
+     *
+     * Returns a zero-length buffer when no complete sample is yet available.
+     */
+    push(chunk: Buffer): Buffer;
+    /**
+     * Return any pending byte as a 1-byte buffer (rare in practice — only if
+     * the entire stream had an odd byte count), then reset internal state.
+     */
+    flush(): Buffer;
+    /** Reset carry state without flushing. */
+    reset(): void;
+}
+/** Options for constructing a {@link StatefulResampler}. */
+interface StatefulResamplerOptions {
+    srcRate: number;
+    dstRate: number;
+    /** Number of channels (default 1 / mono). */
+    channels?: number;
+}
+/**
+ * Stateful PCM16 resampler that carries tail state across chunk boundaries,
+ * eliminating the boundary discontinuities present in the legacy one-shot
+ * helpers.
+ *
+ * Supported conversions:
+ * - 16 000 → 8 000 Hz  (2:1 decimation with 5-tap FIR anti-alias)
+ * - 8 000 → 16 000 Hz  (1:2 linear interpolation)
+ * - 24 000 → 16 000 Hz (3:2 linear interpolation)
+ *
+ * All methods accept and return Buffer (PCM16-LE, mono by default).
+ */
+declare class StatefulResampler {
+    private readonly srcRate;
+    private readonly dstRate;
+    private firHistory;
+    private firHistoryValid;
+    private firPendingSample;
+    private upsampleLast;
+    private upsampleHasHistory;
+    private resample24Last;
+    private resample24Phase;
+    private resample24HasHistory;
+    private readonly carry;
+    constructor(opts: StatefulResamplerOptions);
+    /**
+     * Process a chunk of PCM16-LE samples.
+     *
+     * Handles odd-byte inputs via an internal carry buffer. Returns an even-byte-
+     * aligned output buffer; may return a zero-length buffer if not enough
+     * aligned input is available yet.
+     */
+    process(pcm: Buffer): Buffer;
+    /**
+     * Flush internal state and return any remaining output samples.
+     *
+     * For 8k→16k: the deferred last sample is emitted duplicated (matching
+     * the stateless helper's end-of-stream behaviour).
+     * For 16k→8k: any pending odd sample is processed with edge-replication.
+     * Resets all state after flushing.
+     */
+    flush(): Buffer;
+    /** Reset all carried state (e.g. at call boundaries). */
+    reset(): void;
+    /**
+     * 2:1 decimation with a 5-tap binomial FIR anti-alias filter.
+     *
+     * FIR coefficients: [1, 4, 6, 4, 1] / 16 (cutoff ~Fs/4 = 4 kHz).
+     *
+     * Cross-chunk state:
+     * - `firHistory[0]` = s_{-2}, `firHistory[1]` = s_{-1} relative to the
+     *   virtual stream (seeded to first-sample on the very first call).
+     * - `firPendingSample` = a lone input sample carried from a chunk whose
+     *   sample count was odd; it will become the first input of the next chunk.
+     *
+     * Decimation: outputs are at even positions (0, 2, 4 …) in the virtual
+     * extended stream, so every 2 input samples yield 1 output. An odd-sample-
+     * count chunk leaves 1 sample in `firPendingSample`; the next chunk
+     * prepends it so the output cadence is unbroken.
+     */
+    private _downsample16kTo8k;
+    /**
+     * 1:2 linear-interpolation upsampler.
+     *
+     * For the first chunk (no history): emits 2*(N-1) samples and defers the
+     * last sample. For subsequent chunks (with history): emits the deferred
+     * sample + its interpolated midpoint THEN 2*(N-1) samples from the new
+     * chunk, deferring the new last sample. Total across K chunks + flush =
+     * 2*total_input_samples (correct output length).
+     *
+     * Call flush() after the final chunk to emit the last deferred sample
+     * pair (self-duplicate at end of stream).
+     */
+    private _upsample8kTo16k;
+    /**
+     * 3:2 linear-interpolation decimator (ratio srcRate/dstRate = 1.5).
+     *
+     * `resample24Phase` tracks the fractional input position of the next output
+     * sample relative to the START of the next chunk. Negative phase means the
+     * next output straddles the previous/current chunk boundary; those are
+     * handled using `resample24Last`.
+     */
+    private _resample24kTo16k;
+}
+/** Create a stateful 16 kHz → 8 kHz downsampling resampler. */
+declare function createResampler16kTo8k(): StatefulResampler;
+/** Create a stateful 8 kHz → 16 kHz upsampling resampler. */
+declare function createResampler8kTo16k(): StatefulResampler;
+/** Create a stateful 24 kHz → 16 kHz resampler (3:2 linear interpolation). */
+declare function createResampler24kTo16k(): StatefulResampler;
 /**
  * Upsample 8 kHz PCM16 to 16 kHz using linear interpolation.
  *
@@ -2503,21 +4013,33 @@ declare function pcm16ToMulaw(pcmData: Buffer): Buffer;
  * is duplicated to fill the final position.
  *
  * Output length = input length * 2.
+ *
+ * @deprecated Use {@link StatefulResampler} or {@link createResampler8kTo16k}
+ * for streaming pipelines where chunk-boundary continuity matters.
  */
 declare function resample8kTo16k(pcm8k: Buffer): Buffer;
 /**
- * Downsample 16 kHz PCM16 to 8 kHz by taking every 2nd sample.
+ * Downsample 16 kHz PCM16 to 8 kHz with anti-aliasing.
+ *
+ * Uses a 5-tap binomial low-pass FIR filter ([1, 4, 6, 4, 1] / 16) applied
+ * to every pair of input samples before decimating by 2.
  *
  * Output length = input length / 2.
+ *
+ * @deprecated Use {@link StatefulResampler} or {@link createResampler16kTo8k}
+ * for streaming pipelines where chunk-boundary continuity matters.
  */
 declare function resample16kTo8k(pcm16k: Buffer): Buffer;
 /**
- * Downsample 24 kHz PCM16 to 16 kHz by taking 2 of every 3 samples.
+ * Downsample 24 kHz PCM16 to 16 kHz with linear interpolation.
  *
- * Matches the Python backend approach: for every group of 3 input samples,
- * output the 1st and 2nd, skip the 3rd.
+ * For a 3:2 ratio, each output sample is a weighted blend of the two
+ * neighbouring input samples rather than a raw pick-every-third.
  *
  * Output length = floor(inputSamples * 2 / 3) * 2 bytes.
+ *
+ * @deprecated Use {@link StatefulResampler} or {@link OpenAITTS.resampleStreaming}
+ * for anti-aliased resampling.
  */
 declare function resample24kTo16k(pcm24k: Buffer): Buffer;
@@ -2834,4 +4356,193 @@ declare class BackgroundAudioPlayer implements BackgroundAudioPlayer$1 {
     private resampleTo;
 }
-export { type Agent, type AgentOptions, AllProvidersFailedError, type AnthropicConversion, LLM$3 as AnthropicLLM, type AnthropicLLMOptions, type AnthropicMessage, type AssemblyAIEncoding, type AssemblyAIModel, STT as AssemblyAISTT, type AssemblyAISTTOptions, type AudioConfig, type AudioSource, AuthenticationError, type BackgroundAudioOptions, BackgroundAudioPlayer, BuiltinAudioClip, type BuiltinAudioClipName, type BuiltinPcmSource, type Call, type CallControl, type CallEventHandler, type CallMetrics, CallMetricsAccumulator, type CallOptions, type CallRecord, type CartesiaEncoding, STT$2 as CartesiaSTT, type CartesiaSTTOptions, TTS$2 as CartesiaTTS, type CartesiaTTSOptions, LLM$1 as CerebrasLLM, type CerebrasLLMOptions, ChatContext, type ChatMessage, type ChatRole, CloudflareTunnel, type ConnectOptions, type CostBreakdown, type CreateAgentOptions, DEFAULT_MIN_SENTENCE_LEN, DEFAULT_PRICING, DTMF_EVENTS, STT$4 as DeepgramSTT, type DeepgramSTTOptions, type DefineToolInput, type DtmfEvent, ConvAI as ElevenLabsConvAI, ElevenLabsConvAIAdapter, type ConvAIOptions as ElevenLabsConvAIOptions, TTS$4 as ElevenLabsTTS, type ElevenLabsTTSOptions, FallbackLLMProvider, type FallbackLLMProviderOptions, type FilePcmSource, GEMINI_DEFAULT_INPUT_SR, GEMINI_DEFAULT_OUTPUT_SR, GeminiLiveAdapter, type GeminiLiveEventHandler, LLM as GoogleLLM, type GoogleLLMOptions, LLM$2 as GroqLLM, type GroqLLMOptions, Guardrail$1 as Guardrail, type GuardrailOptions, type HookContext, IVRActivity, type IVRActivityOptions, type IVRToolDefinition, type IncomingMessage, type JobCallback, type LLMChunk, LLMLoop, type LLMProvider, type LMNTAudioFormat, type LMNTModel, type LMNTSampleRate, TTS as LMNTTTS, type LMNTTTSOptions, type LatencyBreakdown, type LocalCallOptions, type LocalConfig, type LocalOptions, type Logger, type LoopCallback, type MessageHandler, MetricsStore, LLM$4 as OpenAILLM, type OpenAILLMOptions, OpenAILLMProvider, type OpenAIMessage, Realtime as OpenAIRealtime, OpenAIRealtimeAdapter, type RealtimeOptions as OpenAIRealtimeOptions, TTS$3 as OpenAITTS, type OpenAITTSOptions, type ParamSpec, PartialStreamError, Patter, PatterConnectionError, PatterError, type PatterOptions, type PhoneNumber, PipelineHookExecutor, type PipelineHooks, type PipelineMessageHandler, type ProviderPricing, ProvisionError, type RawPcmSource, RemoteMessageHandler, TTS$1 as RimeTTS, type RimeTTSOptions, type SSEEvent, type STTConfig, type ScheduleHandle, SentenceChunker, type ServeOptions, type SilenceCallback, STT$1 as SonioxSTT, type SonioxSTTOptions$1 as SonioxSTTOptions, Static as StaticTunnel, type TTSConfig, Carrier as Telnyx, type TelnyxCarrierOptions, TestSession, TfidfLoopDetector, type TfidfLoopDetectorOptions, Tool, type ToolDefinition, type ToolHandler, type ToolOptions, type TunnelHandle, type TurnMetrics, Carrier$1 as Twilio, type TwilioCarrierOptions, ULTRAVOX_DEFAULT_API_BASE, ULTRAVOX_DEFAULT_SR, type UltravoxEventHandler, UltravoxRealtimeAdapter, STT$3 as WhisperSTT, type WhisperSTTOptions, builtinClipPath, calculateRealtimeCost, calculateSttCost, calculateTelephonyCost, calculateTtsCost, callsToCsv, callsToJson, deepgram, defineTool, elevenlabs, filterEmoji, filterForTTS, filterMarkdown, formatDtmf, getLogger, guardrail, isRemoteUrl, isWebSocketUrl, makeAuthMiddleware, mergePricing, mixPcm, mountApi, mountDashboard, mulawToPcm16, notifyDashboard, openaiTts, pcm16ToMulaw, resample16kTo8k, resample24kTo16k, resample8kTo16k, resamplePcm, scheduleCron, scheduleInterval, scheduleOnce, selectSoundFromList, setLogger, startTunnel, tool, whisper };
+interface TwilioAdapterOptions {
+    /** Optional Twilio edge region (e.g. ``ie1`` for Ireland). */
+    region?: string;
+}
+interface ProvisionNumberOptions$1 {
+    /** ISO-3166-1 alpha-2 country code, e.g. ``"US"``. */
+    countryCode: string;
+    /** Optional North-American area code (e.g. ``"415"``). */
+    areaCode?: string;
+}
+interface ProvisionNumberResult$1 {
+    readonly phoneNumber: string;
+    readonly sid: string;
+}
+interface ConfigureNumberOptions$1 {
+    /** URL Twilio should hit when the number receives a call. */
+    voiceUrl: string;
+    /** Optional status callback URL for call lifecycle events. */
+    statusCallback?: string;
+}
+interface InitiateCallOptions$1 {
+    from: string;
+    to: string;
+    /**
+     * TwiML or absolute URL Twilio should request when the call connects.
+     * Mutually exclusive with ``streamUrl`` — provide exactly one.
+     */
+    url?: string;
+    /**
+     * Optional WebSocket stream URL. When provided (and ``url`` is not), the
+     * adapter auto-builds a ``<Response><Connect><Stream>`` TwiML document
+     * via :meth:`generateStreamTwiml` and sends it as the ``Twiml`` form
+     * parameter. Mirrors the Python adapter's ``stream_url`` convenience path.
+     */
+    streamUrl?: string;
+    statusCallback?: string;
+    /** Value accepted by Twilio's ``MachineDetection`` parameter. */
+    machineDetection?: 'Enable' | 'DetectMessageEnd' | 'false';
+    /** Raw extra form parameters forwarded to the Calls endpoint. */
+    extraParams?: Record<string, string>;
+}
+interface InitiateCallResult$1 {
+    readonly callSid: string;
+}
+declare class TwilioAdapter {
+    readonly accountSid: string;
+    readonly region: string | undefined;
+    private readonly baseUrl;
+    private readonly authHeader;
+    constructor(accountSid: string, authToken: string, opts?: TwilioAdapterOptions);
+    private request;
+    /**
+     * Provision a local phone number in the given country.
+     *
+     * Lists available local numbers, then purchases the first match.
+     */
+    provisionNumber(opts: ProvisionNumberOptions$1): Promise<ProvisionNumberResult$1>;
+    /** Update an already-purchased number to point at our voice webhook. */
+    configureNumber(phoneNumberSid: string, opts: ConfigureNumberOptions$1): Promise<void>;
+    /** Place an outbound call. Returns the Twilio call SID. */
+    initiateCall(opts: InitiateCallOptions$1): Promise<InitiateCallResult$1>;
+    /**
+     * Build a minimal ``<Response><Connect><Stream url="..."/></Connect></Response>``
+     * TwiML document. Mirrors the Python adapter's ``generate_stream_twiml``.
+     */
+    static generateStreamTwiml(streamUrl: string): string;
+    /** Force-complete an in-progress call. */
+    endCall(callSid: string): Promise<void>;
+}
+interface ProvisionNumberOptions {
+    /** ISO-3166-1 alpha-2 country code (e.g. ``"US"``). */
+    countryCode: string;
+}
+interface ProvisionNumberResult {
+    readonly phoneNumber: string;
+    readonly orderId: string;
+}
+interface ConfigureNumberOptions {
+    /** Telnyx Call Control Application / Connection ID. */
+    connectionId: string;
+}
+interface InitiateCallOptions {
+    from: string;
+    to: string;
+    /** Override ``connectionId`` at dial time. Falls back to the adapter default. */
+    connectionId?: string;
+    /** Opaque state string that Telnyx echoes back on webhooks. Base64-encoded on wire. */
+    clientState?: string;
+}
+interface InitiateCallResult {
+    readonly callControlId: string;
+}
+interface EndCallOptions {
+    /** Idempotency key for the hangup command. */
+    commandId?: string;
+}
+declare class TelnyxAdapter {
+    private readonly apiKey;
+    readonly connectionId: string | undefined;
+    private readonly baseUrl;
+    constructor(apiKey: string, connectionId?: string);
+    private request;
+    /**
+     * Search available numbers for ``countryCode`` and place an order for the
+     * first match. Returns both the reserved E.164 number and the order ID.
+     */
+    provisionNumber(opts: ProvisionNumberOptions): Promise<ProvisionNumberResult>;
+    /** Attach a number to a Call Control Application. */
+    configureNumber(phoneNumber: string, opts: ConfigureNumberOptions): Promise<void>;
+    /**
+     * Place an outbound call on the Call Control Application.
+     *
+     * Note: we intentionally do NOT pass ``stream_url`` here — audio streaming
+     * is configured on the Application itself (or started explicitly via a
+     * ``streaming_start`` command). Passing ``stream_url`` on dial is a
+     * deprecated code path that Telnyx rejects in newer API versions.
+     */
+    initiateCall(opts: InitiateCallOptions): Promise<InitiateCallResult>;
+    /** Hang up an in-progress call. */
+    endCall(callControlId: string, opts?: EndCallOptions): Promise<void>;
+}
+declare const SPAN_CALL = "getpatter.call";
+declare const SPAN_STT = "getpatter.stt";
+declare const SPAN_LLM = "getpatter.llm";
+declare const SPAN_TTS = "getpatter.tts";
+declare const SPAN_TOOL = "getpatter.tool";
+declare const SPAN_ENDPOINT = "getpatter.endpoint";
+declare const SPAN_BARGEIN = "getpatter.bargein";
+/**
+ * Minimal span surface area — subset of the OTel ``Span`` API the Patter SDK
+ * relies on. We keep this narrow so the no-op fallback stays trivial.
+ */
+interface Span {
+    setAttribute(key: string, value: unknown): void;
+    recordException(exception: unknown): void;
+    end(): void;
+}
+interface InitTracingOptions {
+    serviceName?: string;
+    otlpEndpoint?: string;
+    resourceAttributes?: Record<string, string>;
+}
+/**
+ * Initialize tracing. Returns ``true`` when OTel is wired, ``false`` otherwise
+ * (which covers both "env flag off" and "peer dep missing").
+ *
+ * If the optional SDK packages (``@opentelemetry/sdk-trace-node``,
+ * ``@opentelemetry/sdk-trace-base``, ``@opentelemetry/exporter-trace-otlp-http``)
+ * are installed, a ``NodeTracerProvider`` with OTLP/HTTP exporter is wired up
+ * automatically. Otherwise, spans produced via ``startSpan`` are still created
+ * against whatever global provider ``@opentelemetry/api`` resolves to (which
+ * may be a no-op if the host hasn't registered one).
+ */
+declare function initTracing(options?: InitTracingOptions): boolean;
+/** True only if the env flag is set AND the tracer initialized cleanly. */
+declare function isTracingEnabled(): boolean;
+/**
+ * Start a span. Callers must ``end()`` the returned span — use try/finally:
+ *
+ * ```ts
+ * const span = startSpan(SPAN_LLM, { 'llm.model': 'gpt-4o' });
+ * try { ... } finally { span.end(); }
+ * ```
+ *
+ * Returns a no-op span when tracing is disabled or unavailable.
+ */
+declare function startSpan(name: string, attrs?: Record<string, unknown>): Span;
+/**
+ * Observability entrypoint — re-exports the tracing API.
+ *
+ * See ``./tracing.ts`` for the implementation.
+ */
+/**
+ * Call lifecycle event — TS mirror of ``getpatter.models.CallEvent``.
+ *
+ * Kept in the observability namespace because the primary consumers are
+ * metrics/tracing sinks (e.g. dashboard ingestion).
+ */
+interface CallEvent {
+    readonly callId: string;
+    readonly caller?: string;
+    readonly callee?: string;
+    readonly direction?: string;
+}
+export { type AgentOptions, AllProvidersFailedError, type AnthropicConversion, LLM$3 as AnthropicLLM, type AnthropicLLMOptions, type AnthropicMessage, type AssemblyAIEncoding, type AssemblyAIModel, STT as AssemblyAISTT, type AssemblyAISTTOptions, type AudioConfig, type AudioSource, AuthenticationError, type BackgroundAudioOptions, BackgroundAudioPlayer, BuiltinAudioClip, type BuiltinAudioClipName, type BuiltinPcmSource, type CallControl, type CallEvent, type CallEventHandler, type CallMetrics, CallMetricsAccumulator, type CallRecord, type CartesiaEncoding, STT$2 as CartesiaSTT, type CartesiaSTTOptions, TTS$2 as CartesiaTTS, type CartesiaTTSOptions, LLM$1 as CerebrasLLM, type CerebrasLLMOptions, ChatContext, type ChatMessage, type ChatRole, CloudflareTunnel, type CostBreakdown, DEFAULT_MIN_SENTENCE_LEN, DEFAULT_PRICING, DTMF_EVENTS, STT$5 as DeepgramSTT, type DeepgramSTTOptions, DefaultToolExecutor, type DefaultToolExecutorOptions, type DefineToolInput, type DtmfEvent, ConvAI as ElevenLabsConvAI, ElevenLabsConvAIAdapter, type ConvAIOptions as ElevenLabsConvAIOptions, TTS$4 as ElevenLabsTTS, type ElevenLabsTTSOptions, EventBus, FallbackLLMProvider, type FallbackLLMProviderOptions, type FilePcmSource, GEMINI_DEFAULT_INPUT_SR, GEMINI_DEFAULT_OUTPUT_SR, GeminiLiveAdapter, type GeminiLiveEventHandler, LLM as GoogleLLM, type GoogleLLMOptions, LLM$2 as GroqLLM, type GroqLLMOptions, Guardrail$1 as Guardrail, type GuardrailOptions, type HookContext, IVRActivity, type IVRActivityOptions, type IVRToolDefinition, type IncomingMessage, type InitTracingOptions, type JobCallback, type LLMChunk, LLMLoop, type LLMProvider, type LMNTAudioFormat, type LMNTModel, type LMNTSampleRate, TTS as LMNTTTS, type LMNTTTSOptions, type LatencyBreakdown, type LocalCallOptions, type LocalConfig, type LocalOptions, type Logger, type LoopCallback, type MessageHandler, MetricsStore, Ngrok, LLM$4 as OpenAILLM, type OpenAILLMOptions, OpenAILLMProvider, type OpenAIMessage, Realtime as OpenAIRealtime, OpenAIRealtimeAdapter, type RealtimeOptions as OpenAIRealtimeOptions, TTS$3 as OpenAITTS, type OpenAITTSOptions, STT$3 as OpenAITranscribeSTT, type OpenAITranscribeSTTOptions, type ParamSpec, PartialStreamError, Patter, PatterConnectionError, PatterError, type PatterEventType, PatterTool, type PatterToolExecuteArgs, type PatterToolOptions, type PatterToolResult, PcmCarry, PipelineHookExecutor, type PipelineHooks, type PipelineMessageHandler, type ProviderPricing, ProvisionError, RateLimitError, type RawPcmSource, type RealtimeConfig, RemoteMessageHandler, TTS$1 as RimeTTS, type RimeTTSOptions, SPAN_BARGEIN, SPAN_CALL, SPAN_ENDPOINT, SPAN_LLM, SPAN_STT, SPAN_TOOL, SPAN_TTS, type SSEEvent, type STTConfig, type ScheduleHandle, SentenceChunker, type ServeOptions, type SilenceCallback, type SileroSampleRate, SileroVAD, type SileroVADOptions, STT$1 as SonioxSTT, type SonioxSTTOptions$1 as SonioxSTTOptions, type Span, StatefulResampler, type StatefulResamplerOptions, Static as StaticTunnel, type TTSConfig, Carrier as Telnyx, TelnyxAdapter, type TelnyxCarrierOptions, type ConfigureNumberOptions as TelnyxConfigureNumberOptions, type EndCallOptions as TelnyxEndCallOptions, type InitiateCallOptions as TelnyxInitiateCallOptions, type InitiateCallResult as TelnyxInitiateCallResult, type ProvisionNumberOptions as TelnyxProvisionNumberOptions, type ProvisionNumberResult as TelnyxProvisionNumberResult, TestSession, TfidfLoopDetector, type TfidfLoopDetectorOptions, Tool, type ToolDefinition, type ToolExecutor, type ToolHandler, type ToolOptions, type TunnelHandle, type TurnMetrics, Carrier$1 as Twilio, TwilioAdapter, type TwilioAdapterOptions, type TwilioCarrierOptions, type ConfigureNumberOptions$1 as TwilioConfigureNumberOptions, type InitiateCallOptions$1 as TwilioInitiateCallOptions, type InitiateCallResult$1 as TwilioInitiateCallResult, type ProvisionNumberOptions$1 as TwilioProvisionNumberOptions, type ProvisionNumberResult$1 as TwilioProvisionNumberResult, ULTRAVOX_DEFAULT_API_BASE, ULTRAVOX_DEFAULT_SR, type UltravoxEventHandler, UltravoxRealtimeAdapter, STT$4 as WhisperSTT, type WhisperSTTOptions, assemblyai, builtinClipPath, calculateRealtimeCost, calculateSttCost, calculateTelephonyCost, calculateTtsCost, callsToCsv, callsToJson, cartesia, createResampler16kTo8k, createResampler24kTo16k, createResampler8kTo16k, deepgram, defineTool, elevenlabs, filterEmoji, filterForTTS, filterMarkdown, formatDtmf, geminiLive, getLogger, guardrail, initTracing, isRemoteUrl, isTracingEnabled, isWebSocketUrl, lmnt, makeAuthMiddleware, mergePricing, mixPcm, mountApi, mountDashboard, mulawToPcm16, notifyDashboard, openaiTts, pcm16ToMulaw, resample16kTo8k, resample24kTo16k, resample8kTo16k, resamplePcm, rime, scheduleCron, scheduleInterval, scheduleOnce, selectSoundFromList, setLogger, soniox, speechmatics, startSpan, startTunnel, tool, ultravox, whisper };