npm - getpatter - Versions diffs - 0.6.3 → 0.6.5 - Mend

getpatter 0.6.3 → 0.6.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

package/README.md +5 -4
package/dist/{carrier-config-3WDQXP5J.mjs → carrier-config-7YGNRBPO.mjs} +17 -11
package/dist/{chunk-R2T4JABZ.mjs → chunk-3VVATR6A.mjs} +8 -6
package/dist/{chunk-CL2U3YET.mjs → chunk-BO227NTF.mjs} +271 -54
package/dist/{chunk-Z6W5XFWS.mjs → chunk-CRPJLVHB.mjs} +992 -197
package/dist/cli.js +63 -20
package/dist/dashboard/ui.html +10 -10
package/dist/index.d.mts +1250 -192
package/dist/index.d.ts +1250 -192
package/dist/index.js +2062 -518
package/dist/index.mjs +759 -250
package/dist/{openai-realtime-2-CNFARP25.mjs → openai-realtime-2-L5EKAAUH.mjs} +1 -1
package/dist/{silero-vad-LNDFGIY7.mjs → silero-vad-RGF5HCIR.mjs} +1 -1
package/dist/{test-mode-MDBQ4ECE.mjs → test-mode-HGHI2AUV.mjs} +2 -2
package/package.json +2 -1
package/src/dashboard/ui.html +10 -10

package/dist/index.d.ts CHANGED Viewed

@@ -189,8 +189,17 @@ declare class SpeechEvents {
  *
  * Wraps `wss://api.openai.com/v1/realtime` and exposes the unified
  * Patter realtime contract (`connect / sendAudio / onEvent / close`) on
- * {@link OpenAIRealtimeAdapter}. Audio negotiation defaults to
- * `g711_ulaw` so traffic flows through Twilio/Telnyx without transcoding.
+ * {@link OpenAIRealtimeAdapter}.
+ *
+ * NOTE (issue #154): this class is no longer instantiated directly for the
+ * telephony bridge. OpenAI deprecated the Beta Realtime API, so its flat
+ * `output_audio_format: g711_ulaw` session shape is ignored by GA models —
+ * the server falls back to PCM16 @ 24 kHz, which this adapter would forward to
+ * Twilio framed as 8 kHz mulaw (static + broken STT). `buildAIAdapter` in
+ * `server.ts` now routes BOTH the `OpenAIRealtime` and `OpenAIRealtime2`
+ * engines through {@link OpenAIRealtime2Adapter} (GA session shape + internal
+ * PCM24→mulaw8 transcode). This class is retained as the shared base class
+ * that `OpenAIRealtime2Adapter` extends.
  */
 /**
@@ -292,6 +301,46 @@ interface OpenAIRealtimeOptions {
      * Has no effect on models that don't support the `reasoning` field.
      */
     reasoningEffort?: 'minimal' | 'low' | 'medium' | 'high';
+    /**
+     * Input noise reduction for speakerphone / conference audio. `undefined`
+     * (default) omits the field entirely (no reduction — today's behavior).
+     * `"far_field"` is recommended for phone / speakerphone calls;
+     * `"near_field"` for a handset close to the mouth.
+     *
+     * v1 wire shape: emitted at the top level of `session.update` as
+     * `input_audio_noise_reduction: { type }`. The GA adapter
+     * (`OpenAIRealtime2Adapter`) nests it under `audio.input` instead.
+     *
+     * Mirrors Python `noise_reduction` on `OpenAIRealtimeAdapter`.
+     */
+    noiseReduction?: 'near_field' | 'far_field';
+    /**
+     * Turn-detection tuning. `undefined` (default) keeps the adapter's current
+     * hardcoded `server_vad` / threshold `0.5` / silence 300 ms settings.
+     * Raise `threshold` or switch to `semantic_vad` with `eagerness: 'low'` to
+     * stop speakerphone / conference noise from triggering false barge-ins.
+     *
+     * Mirrors Python `turn_detection` on `OpenAIRealtimeAdapter` and
+     * `turn_detection` on the engine marker `engines.openai.Realtime`.
+     */
+    turnDetection?: RealtimeTurnDetection;
+    /**
+     * Gate the model's response on the Whisper transcript (legacy behavior).
+     *
+     * `false` (default) — the stream handler requests the response on
+     * `speech_stopped`, independently of the Whisper `transcript_input` event.
+     * The transcript is display-only (dashboard / history / `onTranscript`).
+     * `true` — the stream handler requests the response only after the
+     * `transcript_input` event passes the hallucination filter (prior
+     * behavior).
+     *
+     * The adapter itself does not act on this flag — it is read by the stream
+     * handler via {@link OpenAIRealtimeAdapter.getGateResponseOnTranscript} to
+     * decide WHEN to call {@link OpenAIRealtimeAdapter.requestResponse}.
+     *
+     * Mirrors Python `gate_response_on_transcript` on `OpenAIRealtimeAdapter`.
+     */
+    gateResponseOnTranscript?: boolean;
 }
 /** Realtime WebSocket adapter for OpenAI's `gpt-realtime` family. */
 declare class OpenAIRealtimeAdapter {
@@ -314,12 +363,22 @@ declare class OpenAIRealtimeAdapter {
     private currentResponseAudioMs;
     private currentResponseFirstAudioAt;
     protected readonly options: OpenAIRealtimeOptions;
+    private readonly gateResponseOnTranscript;
     constructor(apiKey: string, model?: string, voice?: string, instructions?: string, tools?: Array<{
         name: string;
         description: string;
         parameters: Record<string, unknown>;
         strict?: boolean;
     }> | undefined, audioFormat?: OpenAIRealtimeAudioFormat, options?: OpenAIRealtimeOptions);
+    /**
+     * Whether the stream handler should gate the model response on the Whisper
+     * transcript (legacy) or fire it on `speech_stopped` (default, decoupled).
+     *
+     * `false` (default) — the response is requested on `speech_stopped`,
+     * independently of Whisper. `true` — the response is requested only after
+     * `transcript_input` passes the hallucination filter.
+     */
+    getGateResponseOnTranscript(): boolean;
     /**
      * Build the production session.update body. Mirrors the body sent
      * inside `connect()` so warmup can apply identical configuration to
@@ -399,18 +458,45 @@ declare class OpenAIRealtimeAdapter {
     /** Remove a previously registered {@link onEvent} callback. */
     offEvent(callback: RealtimeEventCallback): void;
     protected ensureMessageListener(): void;
-    /** Truncate the in-flight assistant turn and cancel the active response.
+    /** Truncate the in-flight assistant turn's playback offset on the server.
+     *
+     * Sends ONLY ``conversation.item.truncate`` — no ``response.cancel``. This
+     * is the half of barge-in handling that a WebSocket transport MUST always
+     * perform: per OpenAI's docs, the GA server auto-truncates on barge-in only
+     * over WebRTC / SIP; on the WebSocket transport the client is responsible
+     * for telling the server how much of the assistant turn was actually heard.
+     * In server-managed mode (``interrupt_response: true``) the server already
+     * cancels the response itself, so issuing ``response.cancel`` here would be
+     * redundant / rejected — call this method, not {@link cancelResponse}.
      *
      * ``audio_end_ms`` MUST reflect what the caller actually heard, not what
      * the server generated. OpenAI streams audio at 5-10x real-time, so the
      * byte-derived counter overstates playback whenever the consumer cleared
-     * its playout buffer (e.g. ``send_clear``) before the audio reached the
+     * its playout buffer (e.g. ``sendClear``) before the audio reached the
      * speaker. We bound the truncate point by wall-clock time since the first
      * chunk of this response — that's the physical maximum a 1x real-time
      * playback could have produced. Without this cap, OpenAI keeps the full
      * generated assistant text on the transcript, and the model replays /
      * resumes from it on the next turn — manifesting as re-greetings and
      * mid-sentence fragments after a barge-in storm.
+     *
+     * No-op when no response is in flight, keeping it idempotent across stale
+     * callers. Resets per-response tracking so post-truncate late frames and
+     * the next response start clean.
+     */
+    truncate(): void;
+    /** Truncate the in-flight assistant turn AND cancel the active response.
+     *
+     * Sends BOTH ``conversation.item.truncate`` (the played-offset bookkeeping)
+     * AND ``response.cancel``. Use this on the LEGACY client-managed barge-in
+     * path (``gateResponseOnTranscript`` true → ``interrupt_response: false``,
+     * so the server does NOT cancel for us) and for explicit cancels driven by
+     * Patter (e.g. on transfer / hangup). In server-managed mode call
+     * {@link truncate} instead — the server already cancels the response, and an
+     * extra ``response.cancel`` would be redundant / rejected.
+     *
+     * Truncation bounding semantics are identical to {@link truncate}; see its
+     * doc comment for the ``audio_end_ms`` wall-clock cap rationale.
      */
     cancelResponse(): void;
     /** Inject a user text turn and request a new response. */
@@ -441,6 +527,24 @@ declare class OpenAIRealtimeAdapter {
      * customer cue).
      */
     sendFirstMessage(text: string): Promise<void>;
+    /**
+     * Speak a short reassurance filler WITHOUT injecting a `role:user` turn.
+     *
+     * Same no-fake-turn shape as {@link sendFirstMessage}: a bare
+     * `response.create` carrying explicit `instructions`, so the filler is the
+     * assistant's own in-band audio. The reassurance scheduler in the
+     * stream-handler routes here instead of {@link sendText} — which would emit
+     * a `conversation.item.create` with `role:'user'` and falsely show the
+     * caller saying "One moment." in the transcript. Fillers must not imply
+     * success or failure.
+     *
+     * Uses `modalities: ['audio', 'text']` (v1-beta shape). The GA subclass
+     * {@link OpenAIRealtime2Adapter} overrides this with `output_modalities`
+     * and re-injects `audio.output.voice` so the GA endpoint does not reject
+     * the request. Mirrors Python `OpenAIRealtimeAdapter.send_reassurance` in
+     * `providers/openai_realtime.py`.
+     */
+    sendReassurance(text: string): Promise<void>;
     /** Submit a tool/function-call result and request the next response. */
     sendFunctionResult(callId: string, result: string): Promise<void>;
     /** Stop the heartbeat, drop listeners, and close the Realtime WebSocket. */
@@ -703,48 +807,48 @@ interface LatencyBreakdown {
      * number as "STT latency". Falls back to turn_start when the endpoint
      * signal is unavailable (degraded provider, batch STT, etc.).
      */
-    stt_ms: number;
+    readonly stt_ms: number;
     /**
      * Duration of the user's utterance (turn_start → end-of-speech). Useful
      * to distinguish "user spoke for 4s" from "STT took 4s to finalize" —
      * they used to be conflated in stt_ms before 0.6.1. Optional — undefined
      * when the endpoint signal is unavailable.
      */
-    user_speech_duration_ms?: number;
+    readonly user_speech_duration_ms?: number;
     /**
      * Backwards-compatible LLM bucket. With the split below, this now reflects
      * the user-perceived first-token latency (TTFT) when streaming is available
      * and the full generation time otherwise. Prefer ``llm_ttft_ms`` /
      * ``llm_total_ms`` in new code.
      */
-    llm_ms: number;
+    readonly llm_ms: number;
     /** Time-to-first-token (UX-facing latency): stt_complete → first LLM token. */
-    llm_ttft_ms?: number;
+    readonly llm_ttft_ms?: number;
     /**
      * Total LLM generation time: stt_complete → last LLM token. Distinct from
      * ``llm_ms`` so cost/throughput analysis and TTFT can be tracked separately.
      */
-    llm_total_ms?: number;
-    tts_ms: number;
-    total_ms: number;
+    readonly llm_total_ms?: number;
+    readonly tts_ms: number;
+    readonly total_ms: number;
     /**
      * Endpoint latency: time from end-of-user-speech (VAD stop or STT
      * ``speech_final``) to LLM dispatch. Captures the silence-detection +
      * transcript-finalization gap. Optional — undefined when the source signal
      * is missing.
      */
-    endpoint_ms?: number;
+    readonly endpoint_ms?: number;
     /**
      * Barge-in latency: time from user-interrupt detection to TTS playback
      * actually halting (i.e. after ``sendClear`` returned). Optional — only
      * populated on interrupted turns.
      */
-    bargein_ms?: number;
+    readonly bargein_ms?: number;
     /**
      * Total TTS time: LLM-first-token (or first-sentence boundary) to last
      * TTS audio byte sent. Optional — undefined when TTS never completed.
      */
-    tts_total_ms?: number;
+    readonly tts_total_ms?: number;
     /**
      * **User-perceived agent response latency**: time from end-of-user-speech
      * (VAD stop or STT ``speech_final``) to the first audio byte the agent
@@ -757,54 +861,54 @@ interface LatencyBreakdown {
      * the system-controlled latency: silence detection + LLM TTFT + TTS
      * first byte.
      */
-    agent_response_ms?: number;
+    readonly agent_response_ms?: number;
 }
 /** Per-call cost breakdown by component (STT/TTS/LLM/telephony) plus the total. */
 interface CostBreakdown {
-    stt: number;
-    tts: number;
-    llm: number;
-    telephony: number;
-    total: number;
+    readonly stt: number;
+    readonly tts: number;
+    readonly llm: number;
+    readonly telephony: number;
+    readonly total: number;
     /**
      * Amount saved on LLM cost thanks to OpenAI Realtime prompt caching.
      * ``llm`` above is the net cost AFTER this discount. Dashboards can
      * render ``saved $X (pct%)`` next to the LLM line when > 0.
      */
-    llm_cached_savings?: number;
+    readonly llm_cached_savings: number;
 }
 /** Metrics captured for a single conversation turn. */
 interface TurnMetrics {
-    turn_index: number;
-    user_text: string;
-    agent_text: string;
-    latency: LatencyBreakdown;
-    stt_audio_seconds: number;
-    tts_characters: number;
-    timestamp: number;
+    readonly turn_index: number;
+    readonly user_text: string;
+    readonly agent_text: string;
+    readonly latency: LatencyBreakdown;
+    readonly stt_audio_seconds: number;
+    readonly tts_characters: number;
+    readonly timestamp: number;
 }
 /** Aggregated metrics for an entire call (turns, costs, latency percentiles). */
 interface CallMetrics {
-    call_id: string;
-    duration_seconds: number;
-    turns: TurnMetrics[];
-    cost: CostBreakdown;
-    latency_avg: LatencyBreakdown;
-    latency_p95: LatencyBreakdown;
-    latency_p50?: LatencyBreakdown;
-    latency_p90?: LatencyBreakdown;
-    latency_p99?: LatencyBreakdown;
-    provider_mode: string;
-    stt_provider: string;
-    tts_provider: string;
-    llm_provider: string;
-    telephony_provider: string;
+    readonly call_id: string;
+    readonly duration_seconds: number;
+    readonly turns: readonly TurnMetrics[];
+    readonly cost: CostBreakdown;
+    readonly latency_avg: LatencyBreakdown;
+    readonly latency_p95: LatencyBreakdown;
+    readonly latency_p50: LatencyBreakdown;
+    readonly latency_p90: LatencyBreakdown;
+    readonly latency_p99: LatencyBreakdown;
+    readonly provider_mode: string;
+    readonly stt_provider: string;
+    readonly tts_provider: string;
+    readonly llm_provider: string;
+    readonly telephony_provider: string;
     /** Model identifiers per provider (e.g. "ink-whisper", "eleven_flash_v2_5",
      * "gpt-oss-120b"). Surface on the dashboard cost breakdown so operators
      * can attribute per-call spend to a specific model. */
-    stt_model?: string;
-    tts_model?: string;
-    llm_model?: string;
+    readonly stt_model?: string;
+    readonly tts_model?: string;
+    readonly llm_model?: string;
 }
 /** Programmatic control surface for a live call (transfer, hangup, DTMF). */
 interface CallControl {
@@ -830,7 +934,7 @@ interface CallControl {
 }
 /** Mutable per-call accumulator that stamps timestamps and emits final `CallMetrics`. */
 declare class CallMetricsAccumulator {
-    callId: string;
+    readonly callId: string;
     readonly providerMode: string;
     readonly telephonyProvider: string;
     readonly sttProvider: string;
@@ -922,6 +1026,16 @@ declare class CallMetricsAccumulator {
      * (the common cause of missing endpoint signals).
      */
     private _endpointSignalMissingCount;
+    /**
+     * Monotonic per-call turn counter. Reserved at turn OPEN
+     * (``onAdapterSpeechStopped`` / ``speech_stopped``) via
+     * ``reserveTurnIndex()`` and threaded through the buffering pipeline into
+     * ``recordTurnComplete`` / ``recordTurnInterrupted`` as ``preReservedIndex``.
+     * This makes ``turn_index`` stable under drops / interrupts (previously it
+     * was assigned at completion as ``this._turns.length``, which shifted when a
+     * turn was dropped). Parity with Python ``_next_turn_index``.
+     */
+    private _nextTurnIndex;
     constructor(opts: {
         callId: string;
         providerMode: string;
@@ -951,6 +1065,18 @@ declare class CallMetricsAccumulator {
     get turnActive(): boolean;
     /** Begin a new turn — stamps the turn start timestamp and resets per-turn state. */
     startTurn(): void;
+    /**
+     * Reserve and return the next monotonic turn index.
+     *
+     * Called once per turn at the moment the turn OPENS (Realtime:
+     * ``onAdapterSpeechStopped``). The returned index is threaded through the
+     * buffering pipeline and handed back to ``recordTurnComplete`` /
+     * ``recordTurnInterrupted`` as ``preReservedIndex`` so the emitted
+     * ``turn_index`` matches the live per-line transcript ordering even when a
+     * turn is dropped or interrupted between open and close. Parity with Python
+     * ``reserve_turn_index``.
+     */
+    reserveTurnIndex(): number;
     /**
      * Start a new turn only if no turn is currently open.
      * Use this at inbound-audio ingestion points so the turn timer begins
@@ -1027,7 +1153,7 @@ declare class CallMetricsAccumulator {
      * ``user_text=''``. The caller treats ``null`` as "nothing to emit";
      * ``emitTurnMetrics`` is already null-safe.
      */
-    recordTurnComplete(agentText: string): TurnMetrics | null;
+    recordTurnComplete(agentText: string, preReservedIndex?: number): TurnMetrics | null;
     /**
      * Close the current turn as interrupted (barge-in) and return the
      * recorded metrics. Returns ``null`` when no turn is open, OR when
@@ -1037,7 +1163,7 @@ declare class CallMetricsAccumulator {
      * a future refactor that reorders the bargein + LLM-unwind paths)
      * from overwriting a turn that the complete path already emitted.
      */
-    recordTurnInterrupted(): TurnMetrics | null;
+    recordTurnInterrupted(preReservedIndex?: number): TurnMetrics | null;
     /**
      * Record the moment VAD emitted speech_end for the current utterance.
      * @param ts Optional override timestamp in hrTimeMs units (defaults to now).
@@ -1058,8 +1184,10 @@ declare class CallMetricsAccumulator {
     recordTurnCommitted(ts?: number): void;
     /**
      * Record the delta (ms) between turn-committed and when on_user_turn_completed
-     * pipeline hook finished.  Stored for inclusion in the next ``emitEouMetrics``
-     * call (or an explicit re-emit if desired).
+     * pipeline hook finished. Does NOT re-emit: like Python's
+     * ``record_on_user_turn_completed_delay``, this only stores the value; the
+     * single EOU emission happens on ``recordTurnCommitted`` (3-timestamp guard,
+     * delay defaults to 0 if not yet recorded).
      */
     recordOnUserTurnCompletedDelay(delayMs: number): void;
     /**
@@ -1070,7 +1198,7 @@ declare class CallMetricsAccumulator {
      * ``transcriptionDelay``       = turnCommitted − vadStopped  (ms)
      * ``onUserTurnCompletedDelay`` = caller-supplied delta (ms) or 0
      */
-    /** Emit `EOUMetrics` once VAD-stop, STT-final, and turn-committed timestamps are all known. */
+    /** Emit `EOUMetrics` once VAD-stop, STT-final, turn-committed, and on_user_turn_completed delay are all known. */
     emitEouMetrics(): void;
     /**
      * Record that a caller utterance started overlapping with agent speech.
@@ -1221,31 +1349,32 @@ declare function isWebSocketUrl(url: string): boolean;
 /** Snapshot of a call as held by the dashboard store. */
 interface CallRecord {
-    call_id: string;
-    caller: string;
-    callee: string;
-    direction: string;
-    started_at: number;
-    ended_at?: number;
+    readonly call_id: string;
+    readonly caller: string;
+    readonly callee: string;
+    readonly direction: string;
+    readonly started_at: number;
+    readonly ended_at?: number;
     /**
      * Current lifecycle state: ``initiated`` (pre-registered), ``ringing``,
      * ``in-progress``, ``completed``, ``no-answer``, ``busy``, ``failed``,
      * ``canceled``, or ``webhook_error``.
      */
-    status?: string;
-    transcript?: Array<{
-        role: string;
-        text: string;
-        timestamp: number;
+    readonly status?: string;
+    readonly transcript?: ReadonlyArray<{
+        readonly role: string;
+        readonly text: string;
+        readonly timestamp: number;
+        readonly turnIndex?: number;
     }>;
-    turns?: unknown[];
-    metrics?: Record<string, unknown> | null;
-    [key: string]: unknown;
+    readonly turns?: readonly unknown[];
+    readonly metrics?: Record<string, unknown> | null;
+    readonly [key: string]: unknown;
 }
 /** Server-Sent-Event payload broadcast by `MetricsStore` for live UI updates. */
 interface SSEEvent {
-    type: string;
-    data: Record<string, unknown>;
+    readonly type: string;
+    readonly data: Readonly<Record<string, unknown>>;
 }
 /** In-memory bounded ring buffer of recent calls plus active-call tracking. */
 declare class MetricsStore extends EventEmitter {
@@ -1289,6 +1418,27 @@ declare class MetricsStore extends EventEmitter {
      * row from active to completed so the UI freezes the live duration timer.
      */
     updateCallStatus(callId: string, status: string, extra?: Record<string, unknown>): void;
+    /**
+     * Record a single transcript line (user/assistant) as it becomes known.
+     *
+     * FIX-5 (issue #154): the live forward path for the dashboard transcript.
+     * The Realtime stream handler calls this the moment each line is known — the
+     * user line right after the hallucination filter accepts it, the assistant
+     * line when its turn flushes — keyed by the monotonic ``turnIndex`` reserved
+     * at turn-open (``reserveTurnIndex``). Each line is appended to the active
+     * call's ``transcript`` array and broadcast over SSE as a ``transcript_line``
+     * event so the dashboard can render lines as they arrive and re-sort by
+     * ``(turnIndex, user<assistant)`` — making a late-arriving user line land
+     * ABOVE its agent line. ``recordTurn`` de-dups against the lines pushed here
+     * by ``(turnIndex, role)`` so the metrics path never double-pushes the same
+     * text. Parity with Python ``record_transcript_line``.
+     */
+    recordTranscriptLine(data: {
+        call_id: string;
+        turnIndex: number;
+        role: 'user' | 'assistant';
+        text: string;
+    }): void;
     /** Append a single conversation turn to an active call and broadcast it via SSE. */
     recordTurn(data: Record<string, unknown>): void;
     /** Move a call from active to completed and persist its final metrics. */
@@ -1334,7 +1484,7 @@ declare class MetricsStore extends EventEmitter {
     isDeleted(callId: string): boolean;
     /** Snapshot of soft-deleted call_ids (sorted). */
     getDeletedCallIds(): string[];
-    /** Atomically persist the deleted-ids set to disk. Best-effort. */
+    /** Atomically persist the deleted-ids set to disk. Best-effort async. */
     private persistDeletedIds;
     /** Look up an active call by id (returns undefined if not active or unknown). */
     getActive(callId: string): CallRecord | undefined;
@@ -1452,6 +1602,7 @@ declare class Carrier {
 }
 /** OpenAI Realtime engine — marker class for Patter client dispatch. */
 /** Constructor options for the OpenAI `Realtime` engine marker. */
 interface RealtimeOptions {
     /** API key. Falls back to OPENAI_API_KEY env var when omitted. */
@@ -1479,6 +1630,42 @@ interface RealtimeOptions {
      * `"gpt-4o-transcribe"` for higher accuracy.
      */
     inputAudioTranscriptionModel?: string;
+    /**
+     * Input noise reduction for speakerphone / conference audio. `undefined`
+     * (default) omits the field (no reduction). `"far_field"` recommended for
+     * phone / speakerphone calls; `"near_field"` for a handset close to the
+     * mouth. Mirrors `openai_realtime_noise_reduction` on `Patter.agent()`.
+     */
+    noiseReduction?: 'near_field' | 'far_field';
+    /**
+     * Turn-detection tuning. `undefined` (default) keeps the adapter's
+     * current hardcoded `server_vad` / threshold `0.5` / silence 300 ms.
+     * Raise threshold or switch to `semantic_vad` eagerness `'low'` to stop
+     * speakerphone noise from triggering false barge-ins.
+     *
+     * Maps to `turn_detection` on the Python `engines.openai.Realtime` marker;
+     * propagates to `realtimeTurnDetection` on `AgentOptions`.
+     */
+    turnDetection?: RealtimeTurnDetection;
+    /**
+     * Gate the model's response on the Whisper transcript (legacy behavior).
+     *
+     * `false` (default) — the speech-to-speech model responds as soon as the
+     * user stops speaking (on `speech_stopped`), independently of the Whisper
+     * input transcription. The transcript becomes a pure observability
+     * side-channel (dashboard / history / `onTranscript`) and never gates,
+     * triggers, or cancels the response. This reclaims ~500 ms of latency
+     * because the model no longer waits for Whisper.
+     *
+     * `true` — restores the prior behavior where the response is requested
+     * only after the Whisper `transcript_input` event arrives and passes the
+     * hallucination filter.
+     *
+     * Maps to `gate_response_on_transcript` on the Python
+     * `engines.openai.Realtime` marker; propagates to
+     * `openaiRealtimeGateResponseOnTranscript` on `AgentOptions`.
+     */
+    gateResponseOnTranscript?: boolean;
 }
 /**
  * OpenAI Realtime engine marker.
@@ -1502,6 +1689,9 @@ declare class Realtime {
     readonly voice: string;
     readonly reasoningEffort?: 'minimal' | 'low' | 'medium' | 'high';
     readonly inputAudioTranscriptionModel?: string;
+    readonly noiseReduction?: 'near_field' | 'far_field';
+    readonly turnDetection?: RealtimeTurnDetection;
+    readonly gateResponseOnTranscript?: boolean;
     constructor(opts?: RealtimeOptions);
 }
@@ -1513,6 +1703,7 @@ declare class Realtime {
  * different `session.update` wire shape; the client dispatches to
  * `OpenAIRealtime2Adapter` when this marker is passed.
  */
 /** Constructor options for the OpenAI `Realtime2` engine marker. */
 interface Realtime2Options {
     /** API key. Falls back to OPENAI_API_KEY env var when omitted. */
@@ -1533,6 +1724,44 @@ interface Realtime2Options {
      * low-latency transcript partials.
      */
     inputAudioTranscriptionModel?: string;
+    /**
+     * Input noise reduction for speakerphone / conference audio. `undefined`
+     * (default) omits the field (no reduction). `"far_field"` recommended for
+     * phone / speakerphone calls; `"near_field"` for a handset close to the
+     * mouth. On the GA endpoint this is nested under
+     * `audio.input.input_audio_noise_reduction: { type }`.
+     * Mirrors `openai_realtime_noise_reduction` on `Patter.agent()`.
+     */
+    noiseReduction?: 'near_field' | 'far_field';
+    /**
+     * Turn-detection tuning. `undefined` (default) keeps the adapter's
+     * current hardcoded `server_vad` / threshold `0.5` / silence 300 ms.
+     * Raise threshold or switch to `semantic_vad` eagerness `'low'` to stop
+     * speakerphone noise from triggering false barge-ins.
+     *
+     * Maps to `turn_detection` on the Python `engines.openai_realtime_2.Realtime2`
+     * marker; propagates to `realtimeTurnDetection` on `AgentOptions`.
+     */
+    turnDetection?: RealtimeTurnDetection;
+    /**
+     * Gate the model's response on the Whisper transcript (legacy behavior).
+     *
+     * `false` (default) — the speech-to-speech model responds as soon as the
+     * user stops speaking (on `speech_stopped`), independently of the Whisper
+     * input transcription. The transcript becomes a pure observability
+     * side-channel (dashboard / history / `onTranscript`) and never gates,
+     * triggers, or cancels the response. This reclaims ~500 ms of latency
+     * because the model no longer waits for Whisper.
+     *
+     * `true` — restores the prior behavior where the response is requested
+     * only after the Whisper `transcript_input` event arrives and passes the
+     * hallucination filter.
+     *
+     * Maps to `gate_response_on_transcript` on the Python
+     * `engines.openai_realtime_2.Realtime2` marker; propagates to
+     * `openaiRealtimeGateResponseOnTranscript` on `AgentOptions`.
+     */
+    gateResponseOnTranscript?: boolean;
 }
 /**
  * OpenAI Realtime 2 engine marker — selects `gpt-realtime-2` on the GA
@@ -1557,6 +1786,9 @@ declare class Realtime2 {
     readonly voice: string;
     readonly reasoningEffort?: 'minimal' | 'low' | 'medium' | 'high';
     readonly inputAudioTranscriptionModel?: string;
+    readonly noiseReduction?: 'near_field' | 'far_field';
+    readonly turnDetection?: RealtimeTurnDetection;
+    readonly gateResponseOnTranscript?: boolean;
     constructor(opts?: Realtime2Options);
 }
@@ -1701,6 +1933,33 @@ interface ToolOptions {
     handler?: ToolHandler;
     /** URL to POST to when the LLM invokes the tool. */
     webhookUrl?: string;
+    /**
+     * Optional reassurance filler the agent speaks while a slow tool call runs.
+     * Two forms:
+     *  - `string`: shorthand for `{ message: <string>, afterMs: 1500 }`.
+     *  - object: explicit `{ message, afterMs? }`.
+     * Currently honoured only in Realtime mode. Off by default.
+     *
+     * Mirrors Python `reassurance` on `Tool` / `tool()`.
+     */
+    reassurance?: string | {
+        message: string;
+        afterMs?: number;
+    };
+    /**
+     * Per-tool execution timeout in milliseconds, applied to BOTH the handler
+     * and webhook paths. `undefined` (default) uses the executor default
+     * (10 000 ms). Raise for long browser-automation / external-API tools
+     * (e.g. `60_000`). Clamped to a 300 000 ms ceiling by the executor.
+     *
+     * Mirrors Python `timeout_s` on `Tool` / `tool()`.
+     */
+    timeoutMs?: number;
+    /**
+     * Enable OpenAI strict mode for this tool's function schema. Mirrors
+     * Python `strict` on `Tool`. Off by default.
+     */
+    strict?: boolean;
 }
 /**
  * Tool definition. Structurally matches `ToolDefinition` so it drops
@@ -1724,6 +1983,20 @@ declare class Tool implements ToolDefinition {
     readonly parameters: Record<string, unknown>;
     readonly handler?: ToolHandler;
     readonly webhookUrl?: string;
+    readonly reassurance?: string | Readonly<{
+        message: string;
+        afterMs?: number;
+    }>;
+    /**
+     * Per-tool execution timeout in milliseconds. `undefined` uses the
+     * executor default (10 000 ms). Mirrors Python `timeout_s`.
+     */
+    readonly timeoutMs?: number;
+    /**
+     * Enable OpenAI strict mode for this tool's function schema. Off by
+     * default. Mirrors Python `strict` on `Tool`.
+     */
+    readonly strict?: boolean;
     constructor(opts: ToolOptions);
 }
 /** Factory helper mirroring Python's `tool(...)` function. */
@@ -1850,6 +2123,8 @@ interface PerToolState {
     state: CircuitBreakerState;
     consecutiveFailures: number;
     openedAt: number;
+    /** True while a HALF_OPEN probe call is already in-flight. */
+    probeInFlight: boolean;
 }
 /** Per-name registry tracking circuit state for a fleet of tools. */
 declare class CircuitBreakerRegistry {
@@ -1888,7 +2163,7 @@ declare class CircuitBreakerRegistry {
  * Avoids a circular import from metrics.ts.
  */
 interface LlmUsageRecorder {
-    recordLlmUsage(provider: string, model: string, inputTokens: number, outputTokens: number, cacheReadTokens?: number, cacheCreationTokens?: number): void;
+    recordLlmUsage(provider: string, model: string, inputTokens: number, outputTokens: number, cacheReadTokens?: number, cacheWriteTokens?: number): void;
 }
 /**
  * Pluggable tool executor — mirrors the Python ``ToolExecutor`` in
@@ -1956,7 +2231,7 @@ interface LLMChunk {
     inputTokens?: number;
     outputTokens?: number;
     cacheReadInputTokens?: number;
-    cacheCreationInputTokens?: number;
+    cacheWriteInputTokens?: number;
 }
 /**
  * Interface that any LLM provider must satisfy.
@@ -1979,6 +2254,18 @@ interface LLMChunk {
  */
 interface LLMStreamOptions {
     signal?: AbortSignal;
+    /**
+     * Stable per-call id (the same value the stream handler builds into
+     * ``callCtx.call_id``). Threaded through purely so session-aware providers
+     * — currently {@link OpenAICompatibleLLMProvider} and its Hermes / OpenClaw
+     * presets — can emit the OpenAI ``user`` field as ``patter-call-<callId>``,
+     * giving the upstream agent runtime one durable session per phone call.
+     *
+     * Additive and optional: every existing provider reads only ``signal`` and
+     * is unaffected. When unset (or when a provider has no session-continuity
+     * config) no ``user`` field is sent — fully backward compatible.
+     */
+    callId?: string;
 }
 interface LLMProvider {
     stream(messages: Array<Record<string, unknown>>, tools?: Array<Record<string, unknown>> | null, opts?: LLMStreamOptions): AsyncGenerator<LLMChunk, void, unknown>;
@@ -2250,13 +2537,55 @@ type MCPServerConfig = string | {
     /** Optional logical name for telemetry / log lines. */
     readonly name?: string;
 };
+/**
+ * OpenAI Realtime turn-detection tuning.
+ *
+ * Raise the VAD {@link threshold} (`server_vad`) or switch to
+ * `semantic_vad` with {@link eagerness} `'low'` to stop speakerphone /
+ * conference-room noise (mouse clicks, phone shifts, background chatter)
+ * from being mistaken for the caller speaking and cutting the agent off.
+ *
+ * Each unset field falls back to the adapter's current default
+ * (`server_vad`, threshold `0.5`, `prefixPaddingMs` `300`,
+ * `silenceDurationMs` `300`). `type === 'semantic_vad'` emits
+ * `{ type, eagerness }` only — OpenAI rejects `threshold` /
+ * `prefixPaddingMs` / `silenceDurationMs` on the semantic detector.
+ * `createResponse` / `interruptResponse` are NOT exposed (Patter keeps
+ * its client-gated barge-in safety values).
+ *
+ * Mirrors Python `RealtimeTurnDetection` dataclass in `models.py`.
+ */
+interface RealtimeTurnDetection {
+    /** `"server_vad"` (default) or `"semantic_vad"`. */
+    readonly type?: 'server_vad' | 'semantic_vad';
+    /**
+     * `server_vad` only — 0..1, higher rejects more background noise.
+     * `undefined` keeps the adapter default (`0.5`).
+     */
+    readonly threshold?: number;
+    /**
+     * `server_vad` only — milliseconds of speech required before VAD
+     * triggers. `undefined` keeps the adapter default (`300`).
+     */
+    readonly prefixPaddingMs?: number;
+    /**
+     * `server_vad` only — trailing silence (ms) before the turn ends.
+     * `undefined` keeps the adapter default (`300`).
+     */
+    readonly silenceDurationMs?: number;
+    /**
+     * `semantic_vad` only — `"low"` lets the caller finish (least likely
+     * to interrupt), through `"high"` / `"auto"`.
+     */
+    readonly eagerness?: 'low' | 'medium' | 'high' | 'auto';
+}
 /** Internal shape of a tool definition (matches `Tool` from `public-api.ts`). */
 interface ToolDefinition {
-    name: string;
-    description: string;
-    parameters: Record<string, unknown>;
+    readonly name: string;
+    readonly description: string;
+    readonly parameters: Readonly<Record<string, unknown>>;
     /** Webhook URL — called when the LLM invokes this tool. Mutually exclusive with handler. */
-    webhookUrl?: string;
+    readonly webhookUrl?: string;
     /**
      * Local handler — called instead of ``webhookUrl`` when present.
      *
@@ -2274,7 +2603,7 @@ interface ToolDefinition {
      *    ignores the progress yields — the final value is still used as
      *    the tool result.
      */
-    handler?: ((args: Record<string, unknown>, context: Record<string, unknown>) => Promise<string>) | ((args: Record<string, unknown>, context: Record<string, unknown>) => AsyncGenerator<{
+    readonly handler?: ((args: Record<string, unknown>, context: Record<string, unknown>) => Promise<string>) | ((args: Record<string, unknown>, context: Record<string, unknown>) => AsyncGenerator<{
         progress?: string;
         result?: string;
     }, string | void, unknown>);
@@ -2294,10 +2623,10 @@ interface ToolDefinition {
      * synthesises it inline. Pipeline mode has no clean injection point
      * mid-turn yet; the option is silently ignored there. Off by default.
      */
-    reassurance?: string | {
+    readonly reassurance?: string | Readonly<{
         message: string;
         afterMs?: number;
-    };
+    }>;
     /**
      * Enable OpenAI strict mode for this tool's function schema. When ``true``
      * the model is constrained to emit arguments that exactly match the
@@ -2318,7 +2647,123 @@ interface ToolDefinition {
      * Recommended for any tool whose handler/webhook can't safely tolerate
      * malformed arguments (DB writes, payment, transfers).
      */
-    strict?: boolean;
+    readonly strict?: boolean;
+    /**
+     * Per-tool execution timeout in milliseconds, applied to BOTH the handler
+     * and webhook paths. `undefined` (default) uses the executor default
+     * (10 000 ms). Raise for long browser-automation / external-API tools
+     * (e.g. `60_000`). Clamped to a 300 000 ms ceiling by the executor.
+     *
+     * Mirrors Python's `timeout_s` on `Tool` / `tool()`.
+     */
+    readonly timeoutMs?: number;
+}
+/**
+ * Configuration for the built-in ``consult`` escalation tool.
+ *
+ * When set on an agent, Patter auto-injects a tool (default name
+ * ``consult_agent``) that the in-call agent can invoke mid-call to reach the
+ * caller's own back-office agent over HTTP for deeper reasoning, fresh
+ * information, or an action beyond the call. Patter keeps STT + LLM/voice +
+ * TTS + carrier; the back-office agent is consulted only on demand (never on
+ * the per-turn path). The tool POSTs ``{ request, call_id, caller, callee }``
+ * to {@link url}; the endpoint returns JSON with a ``reply`` / ``response`` /
+ * ``text`` string (or any JSON / plain text) and the agent speaks it.
+ *
+ * Injected in **Realtime** and **Pipeline** modes only — ElevenLabs ConvAI
+ * tools live on the ElevenLabs-hosted agent, so ``consult`` does not apply
+ * there (a warning is emitted if set with that provider).
+ */
+interface ConsultConfig {
+    /**
+     * Generic webhook endpoint Patter POSTs ``{ request, call_id, caller, callee }``
+     * to. SSRF-validated at call start. Mutually exclusive with
+     * {@link openaiCompatible} — set exactly one.
+     */
+    readonly url?: string;
+    /**
+     * Native target that speaks an OpenAI-compatible ``/chat/completions``
+     * endpoint directly (e.g. an OpenClaw agent, or vLLM / Ollama / Groq) — no
+     * hand-written adapter. Mutually exclusive with {@link url}. Use
+     * {@link openclawConsult} for the OpenClaw preset.
+     */
+    readonly openaiCompatible?: OpenAICompatibleConsult;
+    /** Optional headers (e.g. an ``Authorization`` bearer). Never logged. */
+    readonly headers?: Readonly<Record<string, string>>;
+    /**
+     * Per-consult HTTP timeout in milliseconds. Higher than the generic
+     * webhook-tool default (10 000 ms) because a consult may run deeper
+     * reasoning. Default ``30000``.
+     */
+    readonly timeoutMs?: number;
+    /** Name the LLM sees for the tool. Default ``"consult_agent"``. */
+    readonly toolName?: string;
+    /** Description the LLM sees — tune to steer when the agent escalates. */
+    readonly description?: string;
+    /**
+     * Optional filler the agent speaks while the consult runs (Realtime mode
+     * only) so a multi-second back-office call is not dead air. Omitted plays no
+     * filler; the {@link openclawConsult} preset sets a sensible default.
+     */
+    readonly reassurance?: string | Readonly<{
+        message: string;
+        afterMs?: number;
+    }>;
+    /**
+     * Opt-in: allow {@link url} to point at a loopback / private / link-local
+     * host (e.g. a back-office agent on ``127.0.0.1`` or an RFC1918 LAN host).
+     *
+     * Default ``false`` (or ``undefined``) — the URL is SSRF-validated and
+     * loopback/private/link-local targets are rejected, preserving the strict
+     * default behaviour. Set ``true`` ONLY for a trusted, developer-configured
+     * local agent: the URL is your own config, not caller-derived input.
+     *
+     * Even when ``true``, non-HTTP(S) schemes (``file:``, ``javascript:`` …)
+     * are still rejected. Note: opting in also makes cloud-metadata hostnames
+     * (``metadata``, ``metadata.google.internal``, ``metadata.azure.com``) and
+     * the IMDS IP ``169.254.169.254`` reachable — an accepted tradeoff for a URL
+     * you control. Scopes ONLY to
+     * the consult tool; the generic webhook-tool path stays strict.
+     */
+    readonly allowLoopback?: boolean;
+}
+/**
+ * Native {@link ConsultConfig} target that speaks an OpenAI-compatible
+ * ``/chat/completions`` endpoint directly — no hand-written adapter.
+ *
+ * Lets ``consult`` reach an OpenClaw agent (or any OpenAI-compatible gateway:
+ * vLLM, Ollama, Groq, …). The consult handler builds a standard chat-completions
+ * request (``model`` + ``messages`` + ``user``) and speaks
+ * ``choices[0].message.content``. Prefer {@link openclawConsult} for the
+ * OpenClaw preset rather than constructing this directly.
+ */
+interface OpenAICompatibleConsult {
+    /**
+     * OpenAI-compatible base URL ending in ``/v1`` (the handler POSTs to
+     * ``{baseUrl}/chat/completions``), e.g. ``http://127.0.0.1:18789/v1``.
+     */
+    readonly baseUrl: string;
+    /**
+     * Model / agent target. For OpenClaw this is the namespaced agent id, e.g.
+     * ``"openclaw/receptionist"``.
+     */
+    readonly model: string;
+    /**
+     * Bearer token. Prefer {@link apiKeyEnv} so the secret stays out of source.
+     * For OpenClaw this is an OPERATOR-grade credential — never logged.
+     */
+    readonly apiKey?: string;
+    /**
+     * Environment variable to read the bearer from when {@link apiKey} is not
+     * given (e.g. ``"OPENCLAW_API_KEY"``).
+     */
+    readonly apiKeyEnv?: string;
+    /**
+     * Optional header carrying the per-call session id (the call id), e.g.
+     * ``"x-openclaw-session-key"``. The call id is also sent as the OpenAI
+     * ``user`` field.
+     */
+    readonly sessionHeader?: string;
 }
 /** Constructor options for `new Patter({...})` in local-server mode. */
 interface LocalOptions {
@@ -2331,14 +2776,14 @@ interface LocalOptions {
      * const phone = new Patter({ carrier: new Twilio(), phoneNumber: "+1..." });
      * ```
      */
-    carrier: Carrier$2 | Carrier$1 | Carrier;
+    readonly carrier: Carrier$2 | Carrier$1 | Carrier;
     /**
      * Tunnel configuration. Accepts a tunnel instance, ``true`` (alias for
      * ``new CloudflareTunnel()``), or ``false`` / omitted (no tunnel).
      */
-    tunnel?: CloudflareTunnel | Static | boolean;
-    phoneNumber: string;
-    webhookUrl?: string;
+    readonly tunnel?: CloudflareTunnel | Static | boolean;
+    readonly phoneNumber: string;
+    readonly webhookUrl?: string;
     /**
      * On-disk persistence for the dashboard's call history. The dashboard
      * itself is in-memory, but enabling ``persist`` writes per-call records
@@ -2366,25 +2811,25 @@ interface LocalOptions {
      * Phone numbers are masked by default; control via
      * ``PATTER_LOG_REDACT_PHONE``.
      */
-    persist?: boolean | string;
+    readonly persist?: boolean | string;
     /**
      * @internal — allows ``StreamHandler`` to build the default OpenAI
      * ``LLMLoop`` when no ``onMessage`` handler is supplied. The
      * ``OpenAIRealtime`` engine instance carries its own key when one is
      * used via ``phone.agent({ engine: new OpenAIRealtime({ apiKey }) })``.
      */
-    openaiKey?: string;
+    readonly openaiKey?: string;
 }
 /** Internal shape of a guardrail (matches `Guardrail` class from `public-api.ts`). */
 interface Guardrail {
     /** Name for logging when triggered */
-    name: string;
+    readonly name: string;
     /** List of terms that trigger the guardrail (case-insensitive) */
-    blockedTerms?: string[];
+    readonly blockedTerms?: ReadonlyArray<string>;
     /** Custom check function — return true to block the response */
-    check?: (text: string) => boolean;
+    readonly check?: (text: string) => boolean;
     /** Replacement text spoken when guardrail triggers */
-    replacement?: string;
+    readonly replacement?: string;
 }
 /** Per-call context passed to every pipeline hook. */
 interface HookContext {
@@ -2493,29 +2938,41 @@ interface BackgroundAudioPlayer$1 {
  */
 /** Configuration for a local-mode voice AI agent (passed to `phone.agent({...})`). */
 interface AgentOptions {
-    systemPrompt: string;
+    readonly systemPrompt: string;
     /**
      * Voice preset. When ``engine`` is provided, its ``voice`` is used unless
      * explicitly overridden here. Format depends on the engine:
      * OpenAI Realtime accepts a name (``'alloy'``, ``'echo'``, ...);
      * ElevenLabs ConvAI accepts a voice ID.
      */
-    voice?: string;
+    readonly voice?: string;
     /**
      * LLM / Realtime model. When ``engine`` is provided, its ``model`` is used
      * unless explicitly overridden here.
      */
-    model?: string;
+    readonly model?: string;
     /**
      * BCP-47 language code (e.g. ``'en'``, ``'it'``). Forwarded to STT (in
      * pipeline mode) and to the engine adapter at call time. STTConfig has its
      * own ``language`` field for the rare case where STT must use a different
      * language than the rest of the pipeline.
      */
-    language?: string;
-    firstMessage?: string;
+    readonly language?: string;
+    readonly firstMessage?: string;
+    /**
+     * Opt-in spoken fallback for pipeline mode when the per-turn LLM stream
+     * throws (gateway-down / 120 s timeout) BEFORE any assistant text was
+     * spoken. Agent-runtime providers (Hermes / OpenClaw) run tools+memory
+     * internally so a turn can take 30-90 s; on failure the caller currently
+     * hears SILENCE then a silent turn-end. When set to a non-empty string,
+     * the SDK synthesizes and speaks this line through the normal TTS turn
+     * lifecycle (subject to barge-in). ``undefined`` (default) preserves
+     * today's behaviour: nothing is spoken on LLM error. Pipeline mode only.
+     * Mirrors Python ``llm_error_message`` on ``Patter.agent()`` / ``Agent``.
+     */
+    readonly llmErrorMessage?: string;
     /** Tool definitions — ``Tool`` class instances from ``getpatter``. */
-    tools?: Array<Tool>;
+    readonly tools?: ReadonlyArray<Tool>;
     /**
      * Model Context Protocol (MCP) servers to plug into this agent. Each
      * server is queried at call start via ``tools/list`` and its tools
@@ -2536,14 +2993,23 @@ interface AgentOptions {
      * call start (~50-200 ms × N servers). Future iterations may cache
      * the discovered list process-wide.
      */
-    mcpServers?: ReadonlyArray<MCPServerConfig>;
+    readonly mcpServers?: ReadonlyArray<MCPServerConfig>;
+    /**
+     * Optional back-office "consult" escalation. When set, Patter auto-injects a
+     * ``consult_agent`` tool (Realtime + Pipeline modes) that the in-call agent
+     * can invoke to reach the caller's own orchestrator over HTTP for deeper
+     * reasoning / fresh info, then speak the reply. The orchestrator stays off
+     * the per-turn path — consulted only on demand. ``undefined`` (default)
+     * disables it. See {@link ConsultConfig}.
+     */
+    readonly consult?: ConsultConfig;
     /**
      * When ``true``, ship ``systemPrompt`` to the LLM verbatim. Default
      * (``false``) prepends a phone-friendly preamble that instructs the
      * model to avoid markdown, emojis, bullet lists, and verbose replies —
      * the conventions live phone calls require.
      */
-    disablePhonePreamble?: boolean;
+    readonly disablePhonePreamble?: boolean;
     /**
      * Acoustic echo cancellation. When `true` (pipeline mode only) the SDK
      * instantiates an `NlmsEchoCanceller` that subtracts the agent's own
@@ -2555,53 +3021,53 @@ interface AgentOptions {
      * convergence period would briefly attenuate caller speech if they
      * spoke before any TTS played.
      */
-    echoCancellation?: boolean;
+    readonly echoCancellation?: boolean;
     /**
      * Realtime / ConvAI engine instance. When present, the agent runs in the
      * matching mode (``openai_realtime`` or ``elevenlabs_convai``). When absent,
      * pipeline mode is selected if ``stt`` and ``tts`` are provided.
      */
-    engine?: Realtime | Realtime2 | ConvAI;
+    readonly engine?: Realtime | Realtime2 | ConvAI;
     /**
      * Provider mode. Normally derived from ``engine`` / ``stt`` + ``tts``. Pass
      * ``'pipeline'`` explicitly when building a pipeline-mode agent without
      * an engine instance.
      */
-    provider?: 'openai_realtime' | 'elevenlabs_convai' | 'pipeline';
+    readonly provider?: 'openai_realtime' | 'elevenlabs_convai' | 'pipeline';
     /** Pre-instantiated STT adapter (e.g. ``new DeepgramSTT({ apiKey })``). */
-    stt?: STTAdapter;
+    readonly stt?: STTAdapter;
     /** Pre-instantiated TTS adapter (e.g. ``new ElevenLabsTTS({ apiKey })``). */
-    tts?: TTSAdapter;
+    readonly tts?: TTSAdapter;
     /**
      * Pipeline-mode LLM provider (e.g. ``new AnthropicLLM()``). When set, the
      * built-in LLM loop uses this provider instead of the OpenAI default.
      * Mutually exclusive with ``onMessage`` passed to ``serve()``. Ignored
      * when ``engine`` is set (realtime mode bypasses the pipeline LLM).
      */
-    llm?: LLMProvider;
+    readonly llm?: LLMProvider;
     /** Dynamic variables for ``{placeholder}`` substitution in systemPrompt at call time. */
-    variables?: Record<string, string>;
+    readonly variables?: Readonly<Record<string, string>>;
     /** Output guardrails — ``Guardrail`` class instances from ``getpatter``. */
-    guardrails?: Array<Guardrail>;
+    readonly guardrails?: ReadonlyArray<Guardrail>;
     /** Pipeline hooks — intercept and transform data at each pipeline stage (pipeline mode only). */
-    hooks?: PipelineHooks;
+    readonly hooks?: PipelineHooks;
     /** Text transforms applied to LLM output before TTS (pipeline mode only).
      *  Each function receives a string and returns the transformed string.
      *  Applied in order before the ``beforeSynthesize`` hook. */
-    textTransforms?: Array<(text: string) => string>;
+    readonly textTransforms?: ReadonlyArray<(text: string) => string>;
     /** Optional server-side VAD (e.g., Silero). Pipeline mode only. */
-    vad?: VADProvider;
+    readonly vad?: VADProvider;
     /** Optional pre-STT audio filter (noise cancellation). Pipeline mode only. */
-    audioFilter?: AudioFilter;
+    readonly audioFilter?: AudioFilter;
     /** Optional background audio mixer (hold music, thinking cues). Pipeline mode only. */
-    backgroundAudio?: BackgroundAudioPlayer$1;
+    readonly backgroundAudio?: BackgroundAudioPlayer$1;
     /**
      * Minimum sustained voice (ms) before treating caller audio as a barge-in
      * and interrupting TTS. `0` disables barge-in entirely — useful on noisy
      * links (ngrok tunnels, speakerphone) where the agent can hear itself.
      * Default: 300.
      */
-    bargeInThresholdMs?: number;
+    readonly bargeInThresholdMs?: number;
     /**
      * Opt-in barge-in confirmation strategies (pipeline mode). With the
      * default empty array the SDK falls back to the legacy
@@ -2618,14 +3084,14 @@ interface AgentOptions {
      * ``MinWordsStrategy`` for the protocol and a reference
      * implementation.
      */
-    bargeInStrategies?: readonly BargeInStrategy[];
+    readonly bargeInStrategies?: readonly BargeInStrategy[];
     /**
      * Maximum time (ms) to wait for at least one strategy to confirm a
      * pending barge-in before discarding the pending state and resuming
      * TTS. Only consulted when ``bargeInStrategies`` is non-empty.
      * Default: 1500.
      */
-    bargeInConfirmMs?: number;
+    readonly bargeInConfirmMs?: number;
     /**
      * When ``true`` (default), ``Patter.call`` warms up the STT, TTS, and
      * LLM provider connections in parallel with the carrier-side
@@ -2636,7 +3102,7 @@ interface AgentOptions {
      * of the WebSocket bridge. Best-effort: warmup failures are logged
      * at debug level and never abort the call. Default: ``true``.
      */
-    prewarm?: boolean;
+    readonly prewarm?: boolean;
     /**
      * When ``true`` (default since 0.6.2 in pipeline mode), ``Patter.call``
      * pre-renders ``firstMessage`` to TTS audio bytes during the ringing
@@ -2655,7 +3121,7 @@ interface AgentOptions {
      * ``Patter.call`` refuses to spawn the prewarm task and emits a warn
      * when ``provider !== 'pipeline'``.
      */
-    prewarmFirstMessage?: boolean;
+    readonly prewarmFirstMessage?: boolean;
     /**
      * When true, the sentence chunker emits the first clause of each response
      * on a soft punctuation boundary (",", em-dash, en-dash) once ~40 chars
@@ -2667,38 +3133,124 @@ interface AgentOptions {
      * See SentenceChunker constructor for the full guard list (decimal,
      * currency, balanced delimiter, ellipsis).
      */
-    aggressiveFirstFlush?: boolean;
+    readonly aggressiveFirstFlush?: boolean;
+    /**
+     * Input noise reduction for speakerphone / conference audio (OpenAI
+     * Realtime mode only). `undefined` (default) omits the field entirely
+     * (no reduction — today's behavior).
+     *
+     * - `"far_field"` — recommended for phone / speakerphone calls where
+     *   the mic is more than ~30 cm from the speaker.
+     * - `"near_field"` — for a handset held close to the mouth.
+     *
+     * v1 Realtime: emitted at the top level of `session.update` as
+     * `input_audio_noise_reduction: { type }`. GA Realtime (gpt-realtime-2):
+     * nested under `audio.input.input_audio_noise_reduction: { type }`.
+     *
+     * Mirrors Python `openai_realtime_noise_reduction` on `Patter.agent()` /
+     * `Agent` and `noise_reduction` on `engines.openai.Realtime`.
+     */
+    readonly openaiRealtimeNoiseReduction?: 'near_field' | 'far_field';
+    /**
+     * Turn-detection tuning for OpenAI Realtime mode. `undefined` (default)
+     * keeps the adapter's current hardcoded `server_vad` / threshold `0.5` /
+     * silence 300 ms settings.
+     *
+     * Raise {@link RealtimeTurnDetection.threshold} (`server_vad`) or switch
+     * to `semantic_vad` with `eagerness: 'low'` to stop speakerphone /
+     * conference noise from triggering false barge-ins.
+     *
+     * Mirrors Python `realtime_turn_detection` on `Patter.agent()` / `Agent`
+     * and `turn_detection` on `engines.openai.Realtime`.
+     */
+    readonly realtimeTurnDetection?: RealtimeTurnDetection;
+    /**
+     * Gate the OpenAI Realtime model's response on the Whisper input
+     * transcript (legacy behavior). OpenAI Realtime mode only.
+     *
+     * - `false` / `undefined` (default) — the speech-to-speech model responds
+     *   as soon as the user stops speaking (`speech_stopped`), independently
+     *   of the Whisper transcription. The transcript becomes a pure
+     *   observability side-channel (dashboard / history / `onTranscript`) and
+     *   never gates, triggers, or cancels the response. Reclaims ~500 ms of
+     *   latency because the model no longer waits for Whisper.
+     * - `true` — restores the prior behavior where the response is requested
+     *   only after the Whisper `transcript_input` event arrives. Production
+     *   flows should keep the default; this is for callers that depended on
+     *   the old transcript-gated ordering.
+     *
+     * Mirrors Python `realtime_gate_response_on_transcript` on `Patter.agent()`
+     * / `Agent` and `gate_response_on_transcript` on `engines.openai.Realtime`.
+     */
+    readonly openaiRealtimeGateResponseOnTranscript?: boolean;
+    /**
+     * When set, Patter prepends a native "# Preambles" guidance block to the
+     * OpenAI Realtime session `instructions` so the model speaks one short,
+     * action-describing sentence ("I'll check that order now.") before a tool
+     * call that may take a moment, in its own voice. Most effective on
+     * `gpt-realtime-2`, where preambles are first-class.
+     *
+     * - `undefined` / `false` (default) — no change to the prompt; the
+     *   instructions stay byte-identical to prior releases.
+     * - `true` — Patter prepends the built-in block.
+     * - `string` — used verbatim as the full preamble block (override).
+     *
+     * Realtime modes only; pipeline mode has its own phone preamble (see
+     * `disablePhonePreamble`). Mirrors Python `tool_call_preambles` on
+     * `Patter.agent()` / `Agent`.
+     */
+    readonly toolCallPreambles?: boolean | string;
 }
 /** Pipeline-mode message handler — given full turn context, returns the agent's reply. */
 type PipelineMessageHandler = (data: Record<string, unknown>) => Promise<string>;
 /** Options for `Patter.serve({...})`. */
 interface ServeOptions {
-    agent: AgentOptions;
-    port?: number;
+    readonly agent: AgentOptions;
+    readonly port?: number;
     /** When true, start a cloudflared tunnel automatically (requires `cloudflared` npm package). */
-    tunnel?: boolean;
-    onCallStart?: (data: Record<string, unknown>) => Promise<void>;
-    onCallEnd?: (data: Record<string, unknown>) => Promise<void>;
-    onTranscript?: (data: Record<string, unknown>) => Promise<void>;
+    readonly tunnel?: boolean;
+    readonly onCallStart?: (data: Record<string, unknown>) => Promise<void>;
+    readonly onCallEnd?: (data: Record<string, unknown>) => Promise<void>;
+    readonly onTranscript?: (data: Record<string, unknown>) => Promise<void>;
     /** Pipeline mode only — called with the user's transcript; return value is spoken.
      *  Can also be a URL string for remote webhook/WebSocket integration. */
-    onMessage?: PipelineMessageHandler | string;
+    readonly onMessage?: PipelineMessageHandler | string;
     /** Called after each turn with per-turn metrics. */
-    onMetrics?: (data: Record<string, unknown>) => Promise<void>;
+    readonly onMetrics?: (data: Record<string, unknown>) => Promise<void>;
     /** When true, record calls via the Twilio Recordings API. */
-    recording?: boolean;
+    readonly recording?: boolean;
     /** If set, spoken as a voicemail message when AMD detects a machine. */
-    voicemailMessage?: string;
+    readonly voicemailMessage?: string;
     /** Custom pricing overrides for cost calculation. */
-    pricing?: Record<string, Record<string, unknown>>;
+    readonly pricing?: Readonly<Record<string, Record<string, unknown>>>;
     /** When true (default), serve a dashboard UI at /dashboard. */
-    dashboard?: boolean;
+    readonly dashboard?: boolean;
     /** Bearer token for dashboard/API authentication. */
-    dashboardToken?: string;
+    readonly dashboardToken?: string;
+    /**
+     * When true, serve the dashboard (and the call-data `/api/*` routes)
+     * fully OPEN — WITHOUT authentication — even when the server is
+     * reachable beyond loopback (e.g. behind a tunnel or a public webhook
+     * URL). **NOT RECOMMENDED on a public network** — the dashboard exposes
+     * call transcripts and metadata (PII) to anyone who can reach the URL.
+     *
+     * Defaults to `false` (security). With the default, when the dashboard
+     * is enabled, `dashboardToken` is empty, AND the server is exposed
+     * beyond `127.0.0.1`, the SDK auto-generates a one-time token and mounts
+     * the dashboard behind it (the startup banner prints the ready-to-use
+     * URL with `?token=...`). The dashboard is always available — it just
+     * requires the printed or configured token. Loopback-only local dev is
+     * unchanged: served open with no token.
+     *
+     * For a stable token instead of the per-process auto-generated one, set
+     * `dashboardToken`. Set this flag only as the deliberate escape hatch
+     * for the rare case where unauthenticated public exposure is intentional.
+     */
+    readonly allowInsecureDashboard?: boolean;
     /** Path to SQLite database for dashboard persistence (not used in TS yet). */
-    dashboardDb?: string;
+    readonly dashboardDb?: string;
     /** When true (default), persist dashboard data. */
-    dashboardPersist?: boolean;
+    readonly dashboardPersist?: boolean;
     /**
      * When true (default), `serve()` calls the carrier's API on startup to
      * point the configured phone number's webhook URL at this server. Set
@@ -2718,7 +3270,7 @@ interface ServeOptions {
      * hostname is dynamic and only known at runtime — the carrier MUST be
      * reconfigured for inbound calls to land.
      */
-    manageWebhook?: boolean;
+    readonly manageWebhook?: boolean;
 }
 /**
  * Normalised AMD (answering-machine detection) result emitted to
@@ -2744,8 +3296,8 @@ interface MachineDetectionResult {
 }
 /** Options for `Patter.call({...})` to place an outbound call. */
 interface LocalCallOptions {
-    to: string;
-    agent: AgentOptions;
+    readonly to: string;
+    readonly agent: AgentOptions;
     /**
      * Enable answering-machine detection. **Defaults to ``true``** — the SDK
      * asks Twilio (``MachineDetection=DetectMessageEnd`` + Async AMD) or
@@ -2756,7 +3308,7 @@ interface LocalCallOptions {
      * disable when you want to skip per-call AMD billing or you already
      * know the destination is a human.
      */
-    machineDetection?: boolean;
+    readonly machineDetection?: boolean;
     /**
      * Called once when the carrier finishes the AMD check. Fires for both
      * ``human`` and ``machine`` outcomes. Combine with ``voicemailMessage``
@@ -2764,11 +3316,11 @@ interface LocalCallOptions {
      * fires the callback after the drop is queued). Acceptance tests use
      * this to mark a run INVALID when ``classification !== 'human'``.
      */
-    onMachineDetection?: (result: MachineDetectionResult) => void | Promise<void>;
+    readonly onMachineDetection?: (result: MachineDetectionResult) => void | Promise<void>;
     /** If set, spoken as a voicemail message when AMD detects a machine. Implicitly enables ``machineDetection``. */
-    voicemailMessage?: string;
+    readonly voicemailMessage?: string;
     /** Dynamic variables merged into agent.variables before call. Override agent-level variables. */
-    variables?: Record<string, string>;
+    readonly variables?: Readonly<Record<string, string>>;
     /**
      * Ring timeout in seconds. Forwarded to Twilio as `Timeout` and to Telnyx
      * as `timeout_secs`. Defaults to **25 s** — the production-recommended
@@ -2776,7 +3328,7 @@ interface LocalCallOptions {
      * parity, or `null` to omit the parameter entirely (carrier picks its
      * own default).
      */
-    ringTimeout?: number | null;
+    readonly ringTimeout?: number | null;
     /**
      * When `true`, block until the call reaches a terminal state and resolve
      * to a {@link CallResult} (`outcome` ∈ answered / voicemail / no_answer /
@@ -2790,7 +3342,7 @@ interface LocalCallOptions {
      *
      * Mirrors Python's `Patter.call(..., wait=True)`.
      */
-    wait?: boolean;
+    readonly wait?: boolean;
 }
 /**
  * Carrier-agnostic terminal outcomes for an outbound call. `answered` means a
@@ -3136,7 +3688,7 @@ interface ElevenLabsParkedWS {
 /** WebSocket-based ElevenLabs TTS adapter — opt-in low-latency variant. */
 declare class ElevenLabsWebSocketTTS implements TTSAdapter {
     static readonly providerKey = "elevenlabs_ws";
-    readonly apiKey: string;
+    private readonly apiKey;
     readonly voiceId: string;
     readonly modelId: string;
     readonly voiceSettings?: Record<string, unknown>;
@@ -3692,6 +4244,86 @@ interface DefineToolInput {
  */
 declare function defineTool(input: DefineToolInput): ToolDefinition;
+/**
+ * Built-in ``consult`` tool — lets the in-call agent escalate to the caller's
+ * own back-office agent for deeper reasoning or fresh information, then speak
+ * the answer.
+ *
+ * This is the *dispatch + consult* pattern: Patter conducts the call (STT +
+ * LLM/voice + TTS + carrier); when the in-call agent hits something it cannot
+ * answer directly, it invokes this tool, which reaches the configured
+ * back-office agent and returns the reply for the agent to speak. The
+ * back-office agent stays off the per-turn path — consulted only on demand, so
+ * ordinary turns keep their low latency.
+ *
+ * Two targets are supported (see {@link ConsultConfig}):
+ *
+ * - ``url`` — the generic webhook path: POSTs ``{ request, call_id, caller,
+ *   callee }`` to your endpoint and reads a ``reply`` field back.
+ * - ``openaiCompatible`` — speaks an OpenAI-compatible ``/chat/completions``
+ *   endpoint directly (e.g. an OpenClaw agent, or vLLM / Ollama / Groq) with no
+ *   hand-written adapter: POSTs ``{ model, messages, user }`` and speaks
+ *   ``choices[0].message.content``. Use {@link openclawConsult}.
+ *
+ * The handler does the HTTP call itself so the per-consult timeout and auth from
+ * {@link ConsultConfig} are honoured. ``config.reassurance``, when set, is
+ * attached so the agent speaks a filler while the consult runs (Realtime mode
+ * only).
+ */
+/**
+ * Build a {@link ConsultConfig} that consults a specific OpenClaw agent directly
+ * (no hand-written adapter) — the TypeScript equivalent of Python's
+ * ``ConsultConfig.openclaw(...)``.
+ *
+ * ``agent`` is the OpenClaw agent id (e.g. ``"receptionist"``) → targets
+ * ``model="openclaw/<agent>"``. An already-namespaced target (``"openclaw/x"``,
+ * ``"openclaw:x"``, ``"agent:x"``) is passed through. ``allowLoopback`` defaults
+ * to ``true`` when ``baseUrl`` is loopback/private (the intended co-located
+ * deployment). The gateway bearer is read from ``apiKey`` or the
+ * ``OPENCLAW_API_KEY`` env var (operator-grade — never logged). Sized at the
+ * phone-safe 30 s default; raise only for batch-style agents, never above 30 s
+ * on a live call.
+ */
+declare function openclawConsult(agent: string, opts?: {
+    readonly baseUrl?: string;
+    readonly apiKey?: string;
+    readonly timeoutMs?: number;
+    readonly toolName?: string;
+    readonly description?: string;
+    readonly reassurance?: string | Readonly<{
+        message: string;
+        afterMs?: number;
+    }>;
+    readonly headers?: Readonly<Record<string, string>>;
+    readonly allowLoopback?: boolean;
+}): ConsultConfig;
+/**
+ * Return an ``on_call_end`` callback that posts the finished call's record to a
+ * specific OpenClaw agent, so the brain has the record and can follow up — the
+ * TypeScript equivalent of Python's ``openclaw_post_call_notifier``.
+ *
+ * Wire it on ``serve``:
+ *
+ *     await phone.serve({ agent, onCallEnd: openclawPostCallNotifier('receptionist') });
+ *
+ * The record is POSTed to the same OpenClaw agent over its OpenAI-compatible
+ * ``/chat/completions`` gateway, keyed to the call id (the ``user`` field +
+ * ``x-openclaw-session-key`` header) so it lands in the SAME OpenClaw session as
+ * the in-call ``consult`` turns. Fire-and-forget: any error is logged by type
+ * only (never the URL / headers / key) and never thrown into teardown. Args
+ * mirror {@link openclawConsult}; the bearer is read from ``apiKey`` or
+ * ``OPENCLAW_API_KEY`` (operator-grade — never logged).
+ */
+declare function openclawPostCallNotifier(agent: string, opts?: {
+    readonly baseUrl?: string;
+    readonly apiKey?: string;
+    readonly timeoutMs?: number;
+    readonly allowLoopback?: boolean;
+    readonly includeTranscript?: boolean;
+    readonly instruction?: string;
+}): (data: Record<string, unknown>) => Promise<void>;
 /**
  * Process-wide logger used by the SDK.
  *
@@ -3907,6 +4539,16 @@ declare class PatterError extends Error {
         code?: ErrorCode;
     });
 }
+/**
+ * Invalid constructor arguments, a missing required environment variable, or a
+ * frozen-config constraint violation. Parity with Python's
+ * ``PatterConfigError`` in ``libraries/python/getpatter/exceptions.py``.
+ */
+declare class PatterConfigError extends PatterError {
+    constructor(message: string, options?: {
+        code?: ErrorCode;
+    });
+}
 /** Network / WebSocket / HTTP-level connectivity failure when talking to a provider. */
 declare class PatterConnectionError extends PatterError {
     constructor(message: string, options?: {
@@ -4154,9 +4796,9 @@ declare class FallbackLLMProvider implements LLMProvider {
      * markers are filtered out so callers can concatenate the yielded strings
      * directly.
      */
-    completeStream(messages: Array<Record<string, unknown>>, tools?: Array<Record<string, unknown>> | null): AsyncGenerator<string, void, unknown>;
+    completeStream(messages: Array<Record<string, unknown>>, tools?: Array<Record<string, unknown>> | null, opts?: LLMStreamOptions): AsyncGenerator<string, void, unknown>;
     /** Streaming entry point — yields chunks from the first provider that succeeds. */
-    stream(messages: Array<Record<string, unknown>>, tools?: Array<Record<string, unknown>> | null): AsyncGenerator<LLMChunk, void, unknown>;
+    stream(messages: Array<Record<string, unknown>>, tools?: Array<Record<string, unknown>> | null, opts?: LLMStreamOptions): AsyncGenerator<LLMChunk, void, unknown>;
     private tryProviders;
     private markUnavailable;
     private startRecovery;
@@ -4269,49 +4911,49 @@ interface PatterToolOptions {
      * Patter instance to dial through. Must be in local mode (have a `carrier`).
      * The tool boots `phone.serve()` on `start()`; do not call `serve()` yourself.
      */
-    phone: Patter;
+    readonly phone: Patter;
     /**
      * Default agent config used for outbound calls. Per-call overrides come from
      * `execute({ goal, first_message })`.
      */
-    agent?: AgentOptions;
+    readonly agent?: AgentOptions;
     /** Tool name shown to the LLM. Default `'make_phone_call'`. */
-    name?: string;
+    readonly name?: string;
     /** Tool description for the LLM. Default tuned for English assistants. */
-    description?: string;
+    readonly description?: string;
     /** Default per-call timeout in seconds. Default 180. */
-    maxDurationSec?: number;
+    readonly maxDurationSec?: number;
     /**
      * Optional pass-through for `phone.serve()`'s `recording` flag — record all
      * outbound calls placed via this tool.
      */
-    recording?: boolean;
+    readonly recording?: boolean;
 }
 /** Args accepted by `PatterTool.execute()` (and the OpenAI/Anthropic/Hermes tool schemas). */
 interface PatterToolExecuteArgs {
-    to: string;
-    goal?: string;
-    first_message?: string;
-    max_duration_sec?: number;
+    readonly to: string;
+    readonly goal?: string;
+    readonly first_message?: string;
+    readonly max_duration_sec?: number;
 }
 /** Result envelope returned by `PatterTool.execute()` once the underlying call ends. */
 interface PatterToolResult {
-    call_id: string;
-    status: string;
-    duration_seconds: number;
+    readonly call_id: string;
+    readonly status: string;
+    readonly duration_seconds: number;
     /**
      * Carrier-agnostic outcome (answered / voicemail / no_answer / busy /
      * failed) lifted from the SDK {@link CallResult}. Optional for backward
      * compatibility with any code constructing this envelope without it.
      */
-    outcome?: string;
-    cost_usd?: number;
-    transcript: Array<{
+    readonly outcome?: string;
+    readonly cost_usd?: number;
+    readonly transcript: ReadonlyArray<Readonly<{
         role: string;
         text: string;
         timestamp?: number;
-    }>;
-    metrics?: Record<string, unknown> | null;
+    }>>;
+    readonly metrics?: Readonly<Record<string, unknown>> | null;
 }
 /** Wraps a live `Patter` instance as a tool callable from external agent frameworks. */
 declare class PatterTool {
@@ -4322,6 +4964,11 @@ declare class PatterTool {
     private readonly maxDurationSec;
     private readonly recording;
     private started;
+    /** Cached in-progress (or completed) start promise so concurrent execute()
+     *  callers all await the same boot sequence instead of each racing into
+     *  phone.serve(). Reset to null on failure so callers can retry after a
+     *  transient error. */
+    private startPromise;
     constructor(opts: PatterToolOptions);
     /** OpenAI Chat Completions / Assistants tool spec. */
     openaiSchema(): {
@@ -4355,8 +5002,12 @@ declare class PatterTool {
      * `serve()` provides here. No `onCallEnd` callback is wired: the SDK's own
      * per-callId completion registry resolves the result, so the user's
      * `onCallEnd` slot is left free.
+     *
+     * Idempotent and concurrency-safe: concurrent callers all await the same
+     * in-progress boot instead of each racing into `phone.serve()`.
      */
     start(): Promise<void>;
+    private _doStart;
     /** Best-effort shutdown — tear the Patter server down via `disconnect()`. */
     stop(): Promise<void>;
     /**
@@ -4608,23 +5259,23 @@ interface Transcript$6 {
 type TranscriptCallback$6 = (transcript: Transcript$6) => void;
 /** Constructor options for {@link SonioxSTT}. */
 interface SonioxSTTOptions$1 {
-    model?: SonioxModel | string;
-    languageHints?: string[];
-    languageHintsStrict?: boolean;
-    sampleRate?: SonioxSampleRate | number;
-    numChannels?: number;
-    enableSpeakerDiarization?: boolean;
-    enableLanguageIdentification?: boolean;
-    maxEndpointDelayMs?: number;
-    clientReferenceId?: string;
-    baseUrl?: string;
+    readonly model?: SonioxModel | string;
+    readonly languageHints?: readonly string[];
+    readonly languageHintsStrict?: boolean;
+    readonly sampleRate?: SonioxSampleRate | number;
+    readonly numChannels?: number;
+    readonly enableSpeakerDiarization?: boolean;
+    readonly enableLanguageIdentification?: boolean;
+    readonly maxEndpointDelayMs?: number;
+    readonly clientReferenceId?: string;
+    readonly baseUrl?: string;
 }
 /** Streaming STT adapter for Soniox's real-time WebSocket API. */
 declare class SonioxSTT {
     /** Stable pricing/dashboard key — read by stream-handler/metrics. */
     static readonly providerKey = "soniox";
     private ws;
-    private callbacks;
+    private readonly callbacks;
     private final;
     private keepaliveTimer;
     private readonly apiKey;
@@ -4649,8 +5300,10 @@ declare class SonioxSTT {
     private emit;
     /** Send a binary PCM16-LE audio chunk to Soniox for transcription. */
     sendAudio(audio: Buffer): void;
-    /** Register a transcript listener (max 10 concurrent listeners). */
+    /** Register a transcript listener. */
     onTranscript(callback: TranscriptCallback$6): void;
+    /** Unregister a previously registered transcript listener. */
+    offTranscript(callback: TranscriptCallback$6): void;
     /** Send the empty-frame stream terminator and close the WebSocket. */
     close(): void;
 }
@@ -6022,8 +6675,8 @@ interface OpenAITTSOptions {
     speed?: number;
     /**
      * Enable anti-aliasing LPF ahead of the 3:2 decimation. Defaults to
-     * ``false`` for backwards-compatibility; set to ``true`` for cleaner
-     * audio on sibilants / fricatives.
+     * ``true`` (matches the provider default); set to ``false`` to opt out
+     * for bit-exact downsample-only output.
      */
     antiAlias?: boolean;
 }
@@ -6344,7 +6997,7 @@ interface OpenAILLMOptions {
  * const llm = new openai.LLM({ apiKey: "sk-...", model: "gpt-4o-mini", temperature: 0.4 });
  * ```
  */
-declare class LLM$4 extends OpenAILLMProvider {
+declare class LLM$7 extends OpenAILLMProvider {
     static readonly providerKey = "openai";
     constructor(opts?: OpenAILLMOptions);
 }
@@ -6455,7 +7108,7 @@ interface AnthropicLLMOptions {
  * const llm = new anthropic.LLM({ promptCaching: false });           // opt out of caching
  * ```
  */
-declare class LLM$3 extends AnthropicLLMProvider {
+declare class LLM$6 extends AnthropicLLMProvider {
     static readonly providerKey = "anthropic";
     constructor(opts?: AnthropicLLMOptions);
 }
@@ -6563,7 +7216,7 @@ interface GroqLLMOptions {
  * const llm = new groq.LLM({ apiKey: "gsk_...", model: "llama-3.3-70b-versatile" });
  * ```
  */
-declare class LLM$2 extends GroqLLMProvider {
+declare class LLM$5 extends GroqLLMProvider {
     static readonly providerKey = "groq";
     constructor(opts?: GroqLLMOptions);
 }
@@ -6708,7 +7361,7 @@ interface CerebrasLLMOptions {
  * const llm = new cerebras.LLM({ apiKey: "csk-...", model: "llama3.1-8b" });
  * ```
  */
-declare class LLM$1 extends CerebrasLLMProvider {
+declare class LLM$4 extends CerebrasLLMProvider {
     static readonly providerKey = "cerebras";
     constructor(opts?: CerebrasLLMOptions);
 }
@@ -6790,11 +7443,365 @@ interface GoogleLLMOptions {
  * const llm = new google.LLM({ apiKey: "AIza...", model: "gemini-2.5-flash" });
  * ```
  */
-declare class LLM extends GoogleLLMProvider {
+declare class LLM$3 extends GoogleLLMProvider {
     static readonly providerKey = "google";
     constructor(opts?: GoogleLLMOptions);
 }
+/**
+ * Generic OpenAI-compatible LLM provider for Patter's pipeline mode.
+ *
+ * Drives *any* OpenAI-compatible ``/chat/completions`` endpoint — an agent
+ * runtime (Hermes, OpenClaw) or a local inference gateway (Ollama, vLLM,
+ * LM Studio). Patter owns the carrier + STT + turn-taking + TTS; this
+ * provider turns each conversation turn into a single
+ * ``POST {baseUrl}/chat/completions`` request and speaks the response.
+ *
+ * PARITY NOTE (internal divergence, allowed by ``sdk-parity.md``): on the
+ * Python side this provider subclasses ``OpenAILLMProvider`` and merely swaps
+ * the ``AsyncOpenAI`` client (passing ``timeout=`` / ``base_url=``). The TS
+ * base ``OpenAILLMProvider`` is a raw-``fetch`` class with a HARDCODED 30 s
+ * timeout and ``baseUrl`` exposed as a ``protected get`` rather than a
+ * constructor field, so the "swap the client" trick is impossible here.
+ * Instead this is a STANDALONE ``implements LLMProvider`` class (same shape as
+ * {@link GroqLLMProvider} / {@link CerebrasLLMProvider}) that owns its own
+ * configurable timeout and reuses {@link parseOpenAISseStream}. Observably
+ * identical to Python (same 60 s / 120 s ceilings, same ``user`` field, same
+ * headers); only the timeout *mechanism* differs.
+ *
+ * Two additions over the base OpenAI provider:
+ *
+ * - **Long timeout.** Agent runtimes execute tools / memory / skills before
+ *   replying, so a turn can take 30-90 s. The default is 60 s here (the
+ *   presets raise it to 120 s), REPLACING the base provider's hardcoded 30 s.
+ * - **Session continuity.** Three independent, opt-in signals — each gated on
+ *   its own config, none coupled to another:
+ *     - ``sessionUserPrefix`` → emits the OpenAI ``user`` field as
+ *       ``` `${sessionUserPrefix}${callId}` ```. Used by runtimes that derive
+ *       a session from ``user`` (e.g. OpenClaw's gateway).
+ *     - ``sessionIdHeader`` (+ optional ``sessionIdPrefix``) → emits a per-call
+ *       header carrying ``` `${sessionIdPrefix}${callId}` ``` for per-call
+ *       session / transcript continuity on stateless runtimes that key off
+ *       headers (e.g. Hermes' ``X-Hermes-Session-Id``).
+ *     - ``sessionKeyHeader`` (+ ``sessionKey``) → emits a STATIC header for
+ *       long-term memory scoping (e.g. Hermes' ``X-Hermes-Session-Key``); the
+ *       value is the raw ``sessionKey``, never interpolated with the call id.
+ *   All three are OFF by default — fully backward compatible. ``sessionKey`` is
+ *   a credential-grade memory scope and is NEVER logged.
+ *
+ * Keyless gateways (Ollama / vLLM / LM Studio accept no key) are supported:
+ * the ``Authorization`` header is simply omitted from the request (sending a
+ * ``Bearer EMPTY`` placeholder breaks some gateways).
+ */
+/** Constructor options for {@link OpenAICompatibleLLMProvider}. */
+interface OpenAICompatibleLLMOptions {
+    /**
+     * Bearer token. If omitted and ``apiKeyEnv`` is given, read from that
+     * environment variable. May resolve to undefined for keyless local
+     * gateways — the ``Authorization`` header is then omitted entirely.
+     */
+    apiKey?: string;
+    /**
+     * Environment variable to read the bearer from when ``apiKey`` is not given
+     * (e.g. ``"OPENCLAW_API_KEY"``).
+     */
+    apiKeyEnv?: string;
+    /**
+     * OpenAI-compatible base URL ending in ``/v1`` — the whole point of this
+     * provider, so it is **required**. Operator-controlled config, never derived
+     * from caller / transcript input.
+     */
+    baseUrl: string;
+    /** Model / agent target — **required**. */
+    model: string;
+    /**
+     * Per-request timeout in **seconds**. Default ``60`` (the base OpenAI
+     * provider hardcodes 30 s — raised here because agent runtimes run tools
+     * before replying). Converted to ``AbortSignal.timeout(timeout * 1000)``.
+     */
+    timeout?: number;
+    /**
+     * Extra headers merged into the request *after* the ``User-Agent`` so the
+     * SDK attribution is not silently clobbered (a caller can still override
+     * ``User-Agent`` explicitly).
+     */
+    extraHeaders?: Record<string, string>;
+    /**
+     * When set, emits the OpenAI ``user`` field as
+     * ``` `${sessionUserPrefix}${callId}` ``` for per-call session continuity.
+     * ``undefined`` (default) means no ``user`` field is sent. Independent of the
+     * session headers below.
+     */
+    sessionUserPrefix?: string;
+    /**
+     * Optional header NAME carrying a per-call session id, e.g.
+     * ``"X-Hermes-Session-Id"`` or ``"x-openclaw-session-key"``. When set AND a
+     * ``callId`` is available, the header VALUE is
+     * ``` `${sessionIdPrefix}${callId}` ```. ``undefined`` (default) means off.
+     */
+    sessionIdHeader?: string;
+    /**
+     * Prefix for the session-id header VALUE. Defaults to ``""`` (raw call id).
+     * Only meaningful when ``sessionIdHeader`` is set.
+     */
+    sessionIdPrefix?: string;
+    /**
+     * Optional STATIC header NAME for long-term memory scoping, e.g.
+     * ``"X-Hermes-Session-Key"``. Emitted with the raw ``sessionKey`` value (no
+     * call-id interpolation) only when BOTH ``sessionKeyHeader`` and
+     * ``sessionKey`` are set. ``undefined`` (default) means off.
+     */
+    sessionKeyHeader?: string;
+    /**
+     * Static value emitted in ``sessionKeyHeader``. Credential-grade memory
+     * scope — NEVER logged. ``undefined`` (default) means the header is omitted.
+     */
+    sessionKey?: string;
+    /** Sampling temperature [0, 2]. */
+    temperature?: number;
+    /** Max tokens in the assistant response (sent as ``max_completion_tokens``). */
+    maxTokens?: number;
+    /** OpenAI-style ``response_format`` for JSON mode / structured outputs. */
+    responseFormat?: Record<string, unknown>;
+    /** Whether to allow parallel tool calls. */
+    parallelToolCalls?: boolean;
+    /** ``"auto" | "none" | "required"`` or a specific tool object. */
+    toolChoice?: string | Record<string, unknown>;
+    /** Sampling seed for reproducible outputs. */
+    seed?: number;
+    /** Nucleus sampling cutoff in [0, 1]. */
+    topP?: number;
+    /** Penalty in [-2, 2] applied to repeated tokens. */
+    frequencyPenalty?: number;
+    /** Penalty in [-2, 2] applied to seen tokens. */
+    presencePenalty?: number;
+    /** Stop sequence(s). */
+    stop?: string | string[];
+}
+/**
+ * LLM provider for any OpenAI-compatible ``/chat/completions`` endpoint.
+ *
+ * Streams in the same ``{ type: "text" | "tool_call" | "usage" }`` chunk
+ * format as the base OpenAI provider via the shared {@link parseOpenAISseStream}.
+ */
+declare class OpenAICompatibleLLMProvider implements LLMProvider {
+    /**
+     * Stable pricing/dashboard key — read by stream-handler/metrics. Typed as
+     * ``string`` (not the narrowed literal) so the Hermes / OpenClaw presets can
+     * override it with their own key while still extending this class.
+     */
+    static readonly providerKey: string;
+    /** Resolved bearer; undefined for keyless gateways. */
+    private readonly apiKey?;
+    readonly model: string;
+    private readonly baseUrl;
+    private readonly timeoutMs;
+    private readonly extraHeaders?;
+    private readonly sessionUserPrefix?;
+    private readonly sessionIdHeader?;
+    private readonly sessionIdPrefix?;
+    private readonly sessionKeyHeader?;
+    private readonly sessionKey?;
+    private readonly temperature?;
+    private readonly maxTokens?;
+    private readonly responseFormat?;
+    private readonly parallelToolCalls?;
+    private readonly toolChoice?;
+    private readonly seed?;
+    private readonly topP?;
+    private readonly frequencyPenalty?;
+    private readonly presencePenalty?;
+    private readonly stop?;
+    constructor(options: OpenAICompatibleLLMOptions);
+    /**
+     * Assemble the request headers. ``User-Agent`` is set first so any
+     * ``extraHeaders`` (and the per-call session headers) layer on top without
+     * silently dropping the SDK attribution, and the ``Authorization`` header is
+     * only added when a key is present (keyless gateways omit it).
+     *
+     * The two session headers are emitted INDEPENDENTLY, each gated on its own
+     * config (decoupled from ``sessionUserPrefix`` and from each other):
+     *  - ``sessionIdHeader`` (+ ``callId``) → ``` `${sessionIdPrefix}${callId}` ```
+     *  - ``sessionKeyHeader`` (+ ``sessionKey``) → the static ``sessionKey`` value.
+     * ``sessionKey`` is a credential-grade memory scope and is never logged.
+     */
+    private buildHeaders;
+    /**
+     * Pre-call DNS / TLS warmup for the configured endpoint. Best-effort:
+     * 5 s timeout, all exceptions swallowed at debug level. The ``Authorization``
+     * header is only sent when a key is present so the operator-grade bearer is
+     * never echoed for keyless gateways (and the key is never logged).
+     */
+    warmup(): Promise<void>;
+    /**
+     * Build the request body. Mirrors the base OpenAI provider's sampling-kwarg
+     * assembly and additionally sets ``user`` for session continuity when
+     * ``sessionUserPrefix`` is set AND a ``callId`` is available — so the default
+     * (prefix unset) behaviour is byte-identical to the base provider.
+     */
+    private buildBody;
+    /** Stream Patter-format LLM chunks from the configured chat completions API. */
+    stream(messages: Array<Record<string, unknown>>, tools?: Array<Record<string, unknown>> | null, opts?: LLMStreamOptions): AsyncGenerator<LLMChunk, void, unknown>;
+}
+/**
+ * Public alias of {@link OpenAICompatibleLLMProvider} for the
+ * ``getpatter/llm/openai-compatible`` namespace.
+ *
+ * @example
+ * ```ts
+ * import * as openaiCompatible from "getpatter/llm/openai-compatible";
+ * // Ollama / vLLM / LM Studio (keyless local gateway):
+ * const llm = new openaiCompatible.LLM({
+ *   baseUrl: "http://127.0.0.1:11434/v1",
+ *   model: "llama3.1",
+ * });
+ * ```
+ */
+declare class LLM$2 extends OpenAICompatibleLLMProvider {
+    static readonly providerKey = "openai_compatible";
+}
+/**
+ * Hermes agent-runtime LLM preset for Patter's pipeline mode.
+ *
+ * Thin preset over {@link OpenAICompatibleLLMProvider}: defaults the base URL,
+ * model, env-key name, timeout, and session-continuity prefix for the Hermes
+ * agent runtime so a user just writes ``phone.agent({ llm: new hermes.LLM() })``.
+ *
+ * Hermes runs tools / memory / skills internally before replying, so a single
+ * conversation turn can take 30-90 s — hence the 120 s default timeout. Hermes
+ * is stateless and keys continuity off HEADERS, not the OpenAI ``user`` field:
+ * the preset sends ``X-Hermes-Session-Id: patter-call-<callId>`` on every turn
+ * for per-call session / transcript continuity (on by default), and optionally
+ * ``X-Hermes-Session-Key: <sessionKey>`` for long-term memory scoping when you
+ * pass ``sessionKey``. (It also still emits ``user=patter-call-<callId>`` for
+ * upstream-log correlation, but that is not what drives the session.)
+ */
+/** Constructor options for the Hermes ``LLM`` preset. */
+interface HermesLLMOptions {
+    /** Bearer token. Falls back to ``API_SERVER_KEY`` env var when omitted. */
+    apiKey?: string;
+    /** Override the Hermes base URL (rarely needed). */
+    baseUrl?: string;
+    /** Model id. Falls back to ``API_SERVER_MODEL_NAME`` env, then ``"hermes-agent"``. */
+    model?: string;
+    /** Per-request timeout in seconds. Default ``120``. */
+    timeout?: number;
+    /**
+     * Long-term memory scope. When set, emits ``X-Hermes-Session-Key`` so Hermes
+     * scopes durable memory to this value across calls. ``undefined`` (default)
+     * means the header is not sent. Credential-grade — never logged.
+     */
+    sessionKey?: string;
+    /** Extra headers merged after the SDK ``User-Agent``. */
+    extraHeaders?: Record<string, string>;
+    /** Sampling temperature [0, 2]. */
+    temperature?: number;
+    /** Max tokens in the assistant response (sent as ``max_completion_tokens``). */
+    maxTokens?: number;
+    /** OpenAI-style ``response_format`` for JSON mode / structured outputs. */
+    responseFormat?: Record<string, unknown>;
+    /** Whether to allow parallel tool calls. */
+    parallelToolCalls?: boolean;
+    /** ``"auto" | "none" | "required"`` or a specific tool object. */
+    toolChoice?: string | Record<string, unknown>;
+    /** Sampling seed for reproducible outputs. */
+    seed?: number;
+    /** Nucleus sampling cutoff in [0, 1]. */
+    topP?: number;
+    /** Penalty in [-2, 2] applied to repeated tokens. */
+    frequencyPenalty?: number;
+    /** Penalty in [-2, 2] applied to seen tokens. */
+    presencePenalty?: number;
+    /** Stop sequence(s). */
+    stop?: string | string[];
+}
+/**
+ * Hermes agent-runtime LLM provider (OpenAI-compatible, streaming).
+ *
+ * @example
+ * ```ts
+ * import * as hermes from "getpatter/llm/hermes";
+ * const llm = new hermes.LLM();                       // env-defaulted, keyless OK
+ * const llm = new hermes.LLM({ apiKey: "...", model: "hermes-7b" });
+ * ```
+ */
+declare class LLM$1 extends OpenAICompatibleLLMProvider {
+    static readonly providerKey = "hermes";
+    constructor(opts?: HermesLLMOptions);
+}
+/**
+ * OpenClaw agent-runtime LLM preset for Patter's pipeline mode.
+ *
+ * Thin preset over {@link OpenAICompatibleLLMProvider}, aligned with the
+ * shipped ``openclawConsult`` builder in ``src/consult.ts``: same loopback
+ * base URL (``:18789/v1``), same ``OPENCLAW_API_KEY`` env var, same
+ * ``model="openclaw/<agent>"`` pass-through convention, same agent-id charset
+ * rule, and the same ``x-openclaw-session-key`` session header. Takes an
+ * ``agent`` id (not a raw model string), exactly like ``openclawConsult``.
+ *
+ * OpenClaw runs tools / memory / skills internally before replying, so a turn
+ * can take 30-90 s — hence the 120 s default timeout (unlike the consult
+ * preset's phone-safe 30 s filler default; here the runtime IS the per-turn
+ * brain, not an on-demand escalation). It keys sessions off BOTH the OpenAI
+ * ``user`` field and the ``x-openclaw-session-key`` header, so the preset
+ * enables both for one runtime session per phone call.
+ */
+/** Constructor options for the OpenClaw ``LLM`` preset. */
+interface OpenClawLLMOptions {
+    /**
+     * OpenClaw agent id (e.g. ``"receptionist"``). Mapped to
+     * ``model="openclaw/<agent>"``; an already-namespaced id (``"openclaw/x"``,
+     * ``"agent:x"``) is passed through unchanged. **Required.**
+     */
+    agent: string;
+    /** Override the OpenClaw base URL (rarely needed). */
+    baseUrl?: string;
+    /** Bearer token. Falls back to ``OPENCLAW_API_KEY`` env var when omitted. */
+    apiKey?: string;
+    /** Per-request timeout in seconds. Default ``120``. */
+    timeout?: number;
+    /** Extra headers merged after the SDK ``User-Agent``. */
+    extraHeaders?: Record<string, string>;
+    /** Sampling temperature [0, 2]. */
+    temperature?: number;
+    /** Max tokens in the assistant response (sent as ``max_completion_tokens``). */
+    maxTokens?: number;
+    /** OpenAI-style ``response_format`` for JSON mode / structured outputs. */
+    responseFormat?: Record<string, unknown>;
+    /** Whether to allow parallel tool calls. */
+    parallelToolCalls?: boolean;
+    /** ``"auto" | "none" | "required"`` or a specific tool object. */
+    toolChoice?: string | Record<string, unknown>;
+    /** Sampling seed for reproducible outputs. */
+    seed?: number;
+    /** Nucleus sampling cutoff in [0, 1]. */
+    topP?: number;
+    /** Penalty in [-2, 2] applied to repeated tokens. */
+    frequencyPenalty?: number;
+    /** Penalty in [-2, 2] applied to seen tokens. */
+    presencePenalty?: number;
+    /** Stop sequence(s). */
+    stop?: string | string[];
+}
+/**
+ * OpenClaw agent-runtime LLM provider (OpenAI-compatible, streaming).
+ *
+ * @example
+ * ```ts
+ * import * as openclaw from "getpatter/llm/openclaw";
+ * const llm = new openclaw.LLM({ agent: "receptionist" }); // reads OPENCLAW_API_KEY
+ * ```
+ */
+declare class LLM extends OpenAICompatibleLLMProvider {
+    static readonly providerKey = "openclaw";
+    constructor(opts: OpenClawLLMOptions);
+}
 /**
  * Silero VAD provider.
  *
@@ -6815,14 +7822,14 @@ declare const SUPPORTED_SAMPLE_RATES: readonly [8000, 16000];
 type SileroSampleRate = (typeof SUPPORTED_SAMPLE_RATES)[number];
 /** Options accepted by {@link SileroVAD.load}. */
 interface SileroVADOptions {
-    minSpeechDuration?: number;
-    minSilenceDuration?: number;
-    prefixPaddingDuration?: number;
-    activationThreshold?: number;
-    deactivationThreshold?: number;
-    sampleRate?: SileroSampleRate;
-    forceCpu?: boolean;
-    onnxFilePath?: string;
+    readonly minSpeechDuration?: number;
+    readonly minSilenceDuration?: number;
+    readonly prefixPaddingDuration?: number;
+    readonly activationThreshold?: number;
+    readonly deactivationThreshold?: number;
+    readonly sampleRate?: SileroSampleRate;
+    readonly forceCpu?: boolean;
+    readonly onnxFilePath?: string;
 }
 /**
  * Minimal structural type for the subset of `onnxruntime-node` we depend on.
@@ -6861,6 +7868,8 @@ declare class SileroVAD implements VADProvider {
     private speechThresholdDuration;
     private silenceThresholdDuration;
     private closed;
+    /** Transitions produced in the current processFrame call but not yet returned. */
+    private eventQueue;
     private constructor();
     /**
      * Load the Silero VAD model.
@@ -6945,9 +7954,9 @@ declare class SileroVAD implements VADProvider {
 interface DeepFilterNetOptions {
     /** Absolute path to a DeepFilterNet ONNX model.  If omitted, the filter
      *  logs a warning and becomes a pass-through. */
-    modelPath?: string;
+    readonly modelPath?: string;
     /** When true, disable the pass-through warning (used by tests). */
-    silenceWarnings?: boolean;
+    readonly silenceWarnings?: boolean;
 }
 /** OSS noise-suppression filter backed by a DeepFilterNet ONNX model. */
 declare class DeepFilterNetFilter implements AudioFilter {
@@ -7208,6 +8217,36 @@ declare class OpenAIRealtime2Adapter extends OpenAIRealtimeAdapter {
      * artefact and well below the GA VAD's 300 ms prefix-padding window.
      */
     private transcodeInboundMulaw8ToPcm24;
+    /**
+     * Log-only safety net for issue #154. The GA server echoes the *effective*
+     * session config in `session.updated`; we request `audio/pcm` @ 24 kHz and
+     * transcode PCM24→mulaw8 ourselves (see
+     * `transcodeOutboundPcm24ToMulaw8Buffer`). If a future GA schema change ever
+     * made the server return a different output format, that transcode — which
+     * assumes PCM16-LE @ 24 kHz — would silently corrupt audio, exactly the
+     * v1-beta failure mode #154 fixed. Warn so the drift surfaces in logs instead
+     * of as static. Never gates audio.
+     */
+    private warnIfOutputFormatUnexpected;
+    /**
+     * Shared audio-delta translation helper. Transcodes a GA
+     * `response.output_audio.delta` payload (base64 PCM-16-LE 24 kHz)
+     * into mulaw 8 kHz and splits the result into 160-byte (20 ms) frames,
+     * dispatching one synthetic `response.audio.delta` event per frame.
+     *
+     * Called from BOTH the `connect()` shim and the `adoptWebSocket()` shim
+     * so that warm-path (prewarm/adopted) calls receive identical transcoding
+     * to cold-path calls. Without this, adopted sockets forwarded raw PCM-24
+     * to Twilio/Telnyx, producing garbled or silent audio on every warm call.
+     *
+     * @param parsed  - The parsed GA event object (type already checked to be
+     *                  `response.output_audio.delta` with a string `delta`).
+     * @param handler - The downstream message listener to dispatch each frame to.
+     * @param rest    - Extra arguments forwarded from the original `message` event.
+     * @returns `true` if frames were dispatched (caller should return early),
+     *          `false` if the resampler is still warming up (zero output bytes).
+     */
+    private translateGaAudioDelta;
     /**
      * Base64 PCM-16-LE 24 kHz → Base64 mulaw 8 kHz. Used by the WS
      * translation shim on each `response.output_audio.delta`. The stateful
@@ -7217,6 +8256,23 @@ declare class OpenAIRealtime2Adapter extends OpenAIRealtimeAdapter {
      */
     private transcodeOutboundPcm24ToMulaw8Buffer;
     sendFirstMessage(text: string): Promise<void>;
+    /**
+     * Speak a short reassurance filler WITHOUT injecting a `role:user` turn.
+     *
+     * GA-shape sibling of {@link sendFirstMessage} (and override of the base v1
+     * {@link OpenAIRealtimeAdapter.sendReassurance}): a bare `response.create`
+     * carrying explicit `instructions` so the filler is the assistant's own
+     * in-band audio. No `conversation.item.create` with `role:"user"` is
+     * emitted, so the transcript shows no phantom caller line. The GA endpoint
+     * rejects `response.modalities` and does not inherit `audio.output.voice`
+     * for an explicit `response.create`, so — exactly as in
+     * {@link sendFirstMessage} — we send `output_modalities` and re-inject the
+     * voice. Fillers must not imply success or failure.
+     *
+     * Mirrors Python `OpenAIRealtime2Adapter.send_reassurance` in
+     * `providers/openai_realtime_2.py`.
+     */
+    sendReassurance(text: string): Promise<void>;
 }
 /**
@@ -7541,7 +8597,7 @@ declare class ChatContext {
  */
 /** Valid DTMF tone values (keypad characters). */
-declare const DTMF_EVENTS: readonly ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "*", "#", "A", "B", "C", "D"];
+declare const DTMF_EVENTS: readonly ["1", "2", "3", "4", "5", "6", "7", "8", "9", "0", "*", "#", "A", "B", "C", "D"];
 /** Single DTMF tone value (a member of `DTMF_EVENTS`). */
 type DtmfEvent = (typeof DTMF_EVENTS)[number];
 /** Join DTMF events into a space-separated debug string. */
@@ -8030,8 +9086,10 @@ declare class TelnyxSTT {
     connect(): Promise<void>;
     /** Send a binary PCM16 audio chunk; emits the WAV header on the first call. */
     sendAudio(audio: Buffer): void;
-    /** Register a transcript listener (max 10 concurrent listeners). */
+    /** Register a transcript listener. */
     onTranscript(callback: TranscriptCallback): void;
+    /** Unregister a previously-registered transcript listener. */
+    offTranscript(callback: TranscriptCallback): void;
     /** Close the streaming WebSocket. */
     close(): void;
 }
@@ -8149,4 +9207,4 @@ interface CallEvent {
     readonly direction?: string;
 }
-export { type AgentOptions, type AgentState, AllProvidersFailedError, type AnthropicConversion, LLM$3 as AnthropicLLM, type AnthropicLLMOptions, type AnthropicMessage, AssemblyAIEncoding, AssemblyAIModel, STT$1 as AssemblyAISTT, type AssemblyAISTTOptions, type AudioConfig, type AudioSource, AuthenticationError, type BackgroundAudioOptions, BackgroundAudioPlayer, type EvaluateContext as BargeInEvaluateContext, type BargeInStrategy, BuiltinAudioClip, type BuiltinAudioClipName, type BuiltinPcmSource, type CallControl, type CallEvent, type CallEventHandler, type CallMetrics, CallMetricsAccumulator, type CallOutcome, type CallRecord, type CallResult, type CarrierKind, type CartesiaEncoding, STT$3 as CartesiaSTT, type CartesiaSTTOptions, TTS$3 as CartesiaTTS, CartesiaTTSModel, type CartesiaTTSOptions, CartesiaTTSVoiceMode, LLM$1 as CerebrasLLM, type CerebrasLLMOptions, ChatContext, type ChatMessage, type ChatRole, CloudflareTunnel, type ConversationStateSnapshot, type CostBreakdown, DEFAULT_MIN_SENTENCE_LEN, DEFAULT_PRICING, DTMF_EVENTS, DeepFilterNetFilter, type DeepFilterNetOptions, DeepgramModel, STT$6 as DeepgramSTT, type DeepgramSTTOptions, DefaultToolExecutor, type DefaultToolExecutorOptions, type DefineToolInput, type DtmfEvent, ConvAI as ElevenLabsConvAI, ElevenLabsConvAIAdapter, type ConvAIOptions as ElevenLabsConvAIOptions, ElevenLabsModel, ElevenLabsOutputFormat, ElevenLabsTTS as ElevenLabsRestTTS, TTS$6 as ElevenLabsTTS, type ElevenLabsTTSOptions, type ElevenLabsWebSocketOptions, TTS$5 as ElevenLabsWebSocketTTS, type EouTrigger, ErrorCode, EventBus, FallbackLLMProvider, type FallbackLLMProviderOptions, type FilePcmSource, GEMINI_DEFAULT_INPUT_SR, GEMINI_DEFAULT_OUTPUT_SR, GeminiLiveAdapter, type GeminiLiveEventHandler, LLM as GoogleLLM, type GoogleLLMOptions, LLM$2 as GroqLLM, type GroqLLMOptions, Guardrail$1 as Guardrail, type GuardrailOptions, type HookContext, IVRActivity, type IVRActivityOptions, type IVRToolDefinition, type IncomingMessage, type InitTracingOptions, TTS as InworldTTS, type InworldTTSOptions, type JobCallback, KrispFrameDuration, KrispSampleRate, KrispVivaFilter, type KrispVivaFilterOptions, type LLMChunk, LLMLoop, type LLMProvider, LMNTAudioFormat, LMNTModel, LMNTSampleRate, TTS$1 as LMNTTTS, type LMNTTTSOptions, type LatencyBreakdown, type LocalCallOptions, type LocalConfig, type LocalOptions, type Logger, type LoopCallback, type MessageHandler, MetricsStore, MinWordsStrategy, type MinWordsStrategyOptions, type ModelPricing, Ngrok, LLM$4 as OpenAILLM, type OpenAILLMOptions, OpenAILLMProvider, type OpenAIMessage, Realtime as OpenAIRealtime, Realtime2 as OpenAIRealtime2, OpenAIRealtime2Adapter, type Realtime2Options as OpenAIRealtime2Options, OpenAIRealtimeAdapter, OpenAIRealtimeAudioFormat, OpenAIRealtimeModel, type RealtimeOptions as OpenAIRealtimeOptions, OpenAIRealtimeVADType, TTS$4 as OpenAITTS, type OpenAITTSOptions, STT$4 as OpenAITranscribeSTT, type OpenAITranscribeSTTOptions, OpenAITranscriptionModel, OpenAIVoice, PRICING_LAST_UPDATED, PRICING_VERSION, type ParamSpec, PartialStreamError, Patter, PatterConnectionError, PatterError, type PatterEventType, PatterTool, type PatterToolExecuteArgs, type PatterToolOptions, type PatterToolResult, PcmCarry, PipelineHookExecutor, type PipelineHooks, type PipelineMessageHandler, Carrier as Plivo, PlivoAdapter, type PlivoCarrierOptions, type InitiateCallOptions as PlivoInitiateCallOptions, type InitiateCallResult as PlivoInitiateCallResult, PricingUnit, type PricingUnitValue, type ProviderPricing, ProvisionError, RateLimitError, type RawPcmSource, type RealtimeConfig, RemoteMessageHandler, RimeAudioFormat, RimeModel, TTS$2 as RimeTTS, type RimeTTSOptions, SPAN_BARGEIN, SPAN_CALL, SPAN_ENDPOINT, SPAN_LLM, SPAN_STT, SPAN_TOOL, SPAN_TTS, type SSEEvent, type STTConfig, type ScheduleHandle, SentenceChunker, type ServeOptions, type SilenceCallback, type SileroSampleRate, SileroVAD, type SileroVADOptions, STT$2 as SonioxSTT, type SonioxSTTOptions$1 as SonioxSTTOptions, type Span, type SpeechEventCallback, SpeechEvents, SpeechmaticsAudioEncoding, SpeechmaticsOperatingPoint, STT as SpeechmaticsSTT, type SpeechmaticsSTTOptions, SpeechmaticsSampleRate, SpeechmaticsServerMessage, TurnDetectionMode as SpeechmaticsTurnDetectionMode, StatefulResampler, type StatefulResamplerOptions, Static as StaticTunnel, type TTSConfig, Carrier$1 as Telnyx, TelnyxAdapter, type TelnyxCarrierOptions, type ConfigureNumberOptions as TelnyxConfigureNumberOptions, type EndCallOptions as TelnyxEndCallOptions, type InitiateCallOptions$1 as TelnyxInitiateCallOptions, type InitiateCallResult$1 as TelnyxInitiateCallResult, type ProvisionNumberOptions as TelnyxProvisionNumberOptions, type ProvisionNumberResult as TelnyxProvisionNumberResult, TelnyxSTT, TelnyxSTTInputFormat, TelnyxSTTSampleRate, type Transcript as TelnyxSTTTranscript, TelnyxTTS, TelnyxTTSSampleRate, TelnyxTTSVoice, type TelnyxTranscriptionEngine, TestSession, TfidfLoopDetector, type TfidfLoopDetectorOptions, Tool, type ToolDefinition, type ToolExecutor, type ToolHandler, type ToolOptions, type TunnelHandle, type TurnMetrics, Carrier$2 as Twilio, TwilioAdapter, type TwilioAdapterOptions, type TwilioCarrierOptions, type ConfigureNumberOptions$1 as TwilioConfigureNumberOptions, type InitiateCallOptions$2 as TwilioInitiateCallOptions, type InitiateCallResult$2 as TwilioInitiateCallResult, type ProvisionNumberOptions$1 as TwilioProvisionNumberOptions, type ProvisionNumberResult$1 as TwilioProvisionNumberResult, ULTRAVOX_DEFAULT_API_BASE, ULTRAVOX_DEFAULT_SR, type UltravoxEventHandler, UltravoxRealtimeAdapter, type UserState, STT$5 as WhisperSTT, type WhisperSTTOptions, assemblyai, builtinClipPath, calculateRealtimeCost, calculateSttCost, calculateTelephonyCost, calculateTtsCost, callsToCsv, callsToJson, cartesia, createResampler16kTo8k, createResampler24kTo16k, createResampler24kTo8k, createResampler8kTo16k, deepgram, defineTool, elevenlabs, evaluateStrategies as evaluateBargeInStrategies, filterEmoji, filterForTTS, filterMarkdown, formatDtmf, geminiLive, getLogger, guardrail, initTracing, isRemoteUrl, isTracingEnabled, isWebSocketUrl, lmnt, makeAuthMiddleware, mergePricing, mixPcm, mountApi, mountDashboard, mulawToPcm16, notifyDashboard, openaiTts, pcm16ToMulaw, resample16kTo8k, resample24kTo16k, resample8kTo16k, resamplePcm, resetStrategies as resetBargeInStrategies, rime, scheduleCron, scheduleInterval, scheduleOnce, selectSoundFromList, setLogger, soniox, speechmatics, startSpan, startTunnel, tool, ultravox, whisper };
+export { type AgentOptions, type AgentState, AllProvidersFailedError, type AnthropicConversion, LLM$6 as AnthropicLLM, type AnthropicLLMOptions, type AnthropicMessage, AssemblyAIEncoding, AssemblyAIModel, STT$1 as AssemblyAISTT, type AssemblyAISTTOptions, type AudioConfig, type AudioSource, AuthenticationError, type BackgroundAudioOptions, BackgroundAudioPlayer, type EvaluateContext as BargeInEvaluateContext, type BargeInStrategy, BuiltinAudioClip, type BuiltinAudioClipName, type BuiltinPcmSource, type CallControl, type CallEvent, type CallEventHandler, type CallMetrics, CallMetricsAccumulator, type CallOutcome, type CallRecord, type CallResult, type CarrierKind, type CartesiaEncoding, STT$3 as CartesiaSTT, type CartesiaSTTOptions, TTS$3 as CartesiaTTS, CartesiaTTSModel, type CartesiaTTSOptions, CartesiaTTSVoiceMode, LLM$4 as CerebrasLLM, type CerebrasLLMOptions, ChatContext, type ChatMessage, type ChatRole, CloudflareTunnel, type ConsultConfig, type ConversationStateSnapshot, type CostBreakdown, DEFAULT_MIN_SENTENCE_LEN, DEFAULT_PRICING, DTMF_EVENTS, DeepFilterNetFilter, type DeepFilterNetOptions, DeepgramModel, STT$6 as DeepgramSTT, type DeepgramSTTOptions, DefaultToolExecutor, type DefaultToolExecutorOptions, type DefineToolInput, type DtmfEvent, ConvAI as ElevenLabsConvAI, ElevenLabsConvAIAdapter, type ConvAIOptions as ElevenLabsConvAIOptions, ElevenLabsModel, ElevenLabsOutputFormat, ElevenLabsTTS as ElevenLabsRestTTS, TTS$6 as ElevenLabsTTS, type ElevenLabsTTSOptions, type ElevenLabsWebSocketOptions, TTS$5 as ElevenLabsWebSocketTTS, type EouTrigger, ErrorCode, EventBus, FallbackLLMProvider, type FallbackLLMProviderOptions, type FilePcmSource, GEMINI_DEFAULT_INPUT_SR, GEMINI_DEFAULT_OUTPUT_SR, GeminiLiveAdapter, type GeminiLiveEventHandler, LLM$3 as GoogleLLM, type GoogleLLMOptions, LLM$5 as GroqLLM, type GroqLLMOptions, Guardrail$1 as Guardrail, type GuardrailOptions, LLM$1 as HermesLLM, type HermesLLMOptions, type HookContext, IVRActivity, type IVRActivityOptions, type IVRToolDefinition, type IncomingMessage, type InitTracingOptions, TTS as InworldTTS, type InworldTTSOptions, type JobCallback, KrispFrameDuration, KrispSampleRate, KrispVivaFilter, type KrispVivaFilterOptions, type LLMChunk, LLMLoop, type LLMProvider, LMNTAudioFormat, LMNTModel, LMNTSampleRate, TTS$1 as LMNTTTS, type LMNTTTSOptions, type LatencyBreakdown, type LocalCallOptions, type LocalConfig, type LocalOptions, type Logger, type LoopCallback, type MessageHandler, MetricsStore, MinWordsStrategy, type MinWordsStrategyOptions, type ModelPricing, Ngrok, type OpenAICompatibleConsult, LLM$2 as OpenAICompatibleLLM, type OpenAICompatibleLLMOptions, OpenAICompatibleLLMProvider, LLM$7 as OpenAILLM, type OpenAILLMOptions, OpenAILLMProvider, type OpenAIMessage, Realtime as OpenAIRealtime, Realtime2 as OpenAIRealtime2, OpenAIRealtime2Adapter, type Realtime2Options as OpenAIRealtime2Options, OpenAIRealtimeAdapter, OpenAIRealtimeAudioFormat, OpenAIRealtimeModel, type RealtimeOptions as OpenAIRealtimeOptions, OpenAIRealtimeVADType, TTS$4 as OpenAITTS, type OpenAITTSOptions, STT$4 as OpenAITranscribeSTT, type OpenAITranscribeSTTOptions, OpenAITranscriptionModel, OpenAIVoice, LLM as OpenClawLLM, type OpenClawLLMOptions, PRICING_LAST_UPDATED, PRICING_VERSION, type ParamSpec, PartialStreamError, Patter, PatterConfigError, PatterConnectionError, PatterError, type PatterEventType, PatterTool, type PatterToolExecuteArgs, type PatterToolOptions, type PatterToolResult, PcmCarry, PipelineHookExecutor, type PipelineHooks, type PipelineMessageHandler, Carrier as Plivo, PlivoAdapter, type PlivoCarrierOptions, type InitiateCallOptions as PlivoInitiateCallOptions, type InitiateCallResult as PlivoInitiateCallResult, PricingUnit, type PricingUnitValue, type ProviderPricing, ProvisionError, RateLimitError, type RawPcmSource, type RealtimeConfig, type RealtimeTurnDetection, RemoteMessageHandler, RimeAudioFormat, RimeModel, TTS$2 as RimeTTS, type RimeTTSOptions, SPAN_BARGEIN, SPAN_CALL, SPAN_ENDPOINT, SPAN_LLM, SPAN_STT, SPAN_TOOL, SPAN_TTS, type SSEEvent, type STTConfig, type ScheduleHandle, SentenceChunker, type ServeOptions, type SilenceCallback, type SileroSampleRate, SileroVAD, type SileroVADOptions, STT$2 as SonioxSTT, type SonioxSTTOptions$1 as SonioxSTTOptions, type Span, type SpeechEventCallback, SpeechEvents, SpeechmaticsAudioEncoding, SpeechmaticsOperatingPoint, STT as SpeechmaticsSTT, type SpeechmaticsSTTOptions, SpeechmaticsSampleRate, SpeechmaticsServerMessage, TurnDetectionMode as SpeechmaticsTurnDetectionMode, StatefulResampler, type StatefulResamplerOptions, Static as StaticTunnel, type TTSConfig, Carrier$1 as Telnyx, TelnyxAdapter, type TelnyxCarrierOptions, type ConfigureNumberOptions as TelnyxConfigureNumberOptions, type EndCallOptions as TelnyxEndCallOptions, type InitiateCallOptions$1 as TelnyxInitiateCallOptions, type InitiateCallResult$1 as TelnyxInitiateCallResult, type ProvisionNumberOptions as TelnyxProvisionNumberOptions, type ProvisionNumberResult as TelnyxProvisionNumberResult, TelnyxSTT, TelnyxSTTInputFormat, TelnyxSTTSampleRate, type Transcript as TelnyxSTTTranscript, TelnyxTTS, TelnyxTTSSampleRate, TelnyxTTSVoice, type TelnyxTranscriptionEngine, TestSession, TfidfLoopDetector, type TfidfLoopDetectorOptions, Tool, type ToolDefinition, type ToolExecutor, type ToolHandler, type ToolOptions, type TunnelHandle, type TurnMetrics, Carrier$2 as Twilio, TwilioAdapter, type TwilioAdapterOptions, type TwilioCarrierOptions, type ConfigureNumberOptions$1 as TwilioConfigureNumberOptions, type InitiateCallOptions$2 as TwilioInitiateCallOptions, type InitiateCallResult$2 as TwilioInitiateCallResult, type ProvisionNumberOptions$1 as TwilioProvisionNumberOptions, type ProvisionNumberResult$1 as TwilioProvisionNumberResult, ULTRAVOX_DEFAULT_API_BASE, ULTRAVOX_DEFAULT_SR, type UltravoxEventHandler, UltravoxRealtimeAdapter, type UserState, STT$5 as WhisperSTT, type WhisperSTTOptions, assemblyai, builtinClipPath, calculateRealtimeCost, calculateSttCost, calculateTelephonyCost, calculateTtsCost, callsToCsv, callsToJson, cartesia, createResampler16kTo8k, createResampler24kTo16k, createResampler24kTo8k, createResampler8kTo16k, deepgram, defineTool, elevenlabs, evaluateStrategies as evaluateBargeInStrategies, filterEmoji, filterForTTS, filterMarkdown, formatDtmf, geminiLive, getLogger, guardrail, initTracing, isRemoteUrl, isTracingEnabled, isWebSocketUrl, lmnt, makeAuthMiddleware, mergePricing, mixPcm, mountApi, mountDashboard, mulawToPcm16, notifyDashboard, openaiTts, openclawConsult, openclawPostCallNotifier, pcm16ToMulaw, resample16kTo8k, resample24kTo16k, resample8kTo16k, resamplePcm, resetStrategies as resetBargeInStrategies, rime, scheduleCron, scheduleInterval, scheduleOnce, selectSoundFromList, setLogger, soniox, speechmatics, startSpan, startTunnel, tool, ultravox, whisper };