getpatter 0.6.3 → 0.6.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.ts CHANGED
@@ -189,8 +189,17 @@ declare class SpeechEvents {
189
189
  *
190
190
  * Wraps `wss://api.openai.com/v1/realtime` and exposes the unified
191
191
  * Patter realtime contract (`connect / sendAudio / onEvent / close`) on
192
- * {@link OpenAIRealtimeAdapter}. Audio negotiation defaults to
193
- * `g711_ulaw` so traffic flows through Twilio/Telnyx without transcoding.
192
+ * {@link OpenAIRealtimeAdapter}.
193
+ *
194
+ * NOTE (issue #154): this class is no longer instantiated directly for the
195
+ * telephony bridge. OpenAI deprecated the Beta Realtime API, so its flat
196
+ * `output_audio_format: g711_ulaw` session shape is ignored by GA models —
197
+ * the server falls back to PCM16 @ 24 kHz, which this adapter would forward to
198
+ * Twilio framed as 8 kHz mulaw (static + broken STT). `buildAIAdapter` in
199
+ * `server.ts` now routes BOTH the `OpenAIRealtime` and `OpenAIRealtime2`
200
+ * engines through {@link OpenAIRealtime2Adapter} (GA session shape + internal
201
+ * PCM24→mulaw8 transcode). This class is retained as the shared base class
202
+ * that `OpenAIRealtime2Adapter` extends.
194
203
  */
195
204
 
196
205
  /**
@@ -292,6 +301,46 @@ interface OpenAIRealtimeOptions {
292
301
  * Has no effect on models that don't support the `reasoning` field.
293
302
  */
294
303
  reasoningEffort?: 'minimal' | 'low' | 'medium' | 'high';
304
+ /**
305
+ * Input noise reduction for speakerphone / conference audio. `undefined`
306
+ * (default) omits the field entirely (no reduction — today's behavior).
307
+ * `"far_field"` is recommended for phone / speakerphone calls;
308
+ * `"near_field"` for a handset close to the mouth.
309
+ *
310
+ * v1 wire shape: emitted at the top level of `session.update` as
311
+ * `input_audio_noise_reduction: { type }`. The GA adapter
312
+ * (`OpenAIRealtime2Adapter`) nests it under `audio.input` instead.
313
+ *
314
+ * Mirrors Python `noise_reduction` on `OpenAIRealtimeAdapter`.
315
+ */
316
+ noiseReduction?: 'near_field' | 'far_field';
317
+ /**
318
+ * Turn-detection tuning. `undefined` (default) keeps the adapter's current
319
+ * hardcoded `server_vad` / threshold `0.5` / silence 300 ms settings.
320
+ * Raise `threshold` or switch to `semantic_vad` with `eagerness: 'low'` to
321
+ * stop speakerphone / conference noise from triggering false barge-ins.
322
+ *
323
+ * Mirrors Python `turn_detection` on `OpenAIRealtimeAdapter` and
324
+ * `turn_detection` on the engine marker `engines.openai.Realtime`.
325
+ */
326
+ turnDetection?: RealtimeTurnDetection;
327
+ /**
328
+ * Gate the model's response on the Whisper transcript (legacy behavior).
329
+ *
330
+ * `false` (default) — the stream handler requests the response on
331
+ * `speech_stopped`, independently of the Whisper `transcript_input` event.
332
+ * The transcript is display-only (dashboard / history / `onTranscript`).
333
+ * `true` — the stream handler requests the response only after the
334
+ * `transcript_input` event passes the hallucination filter (prior
335
+ * behavior).
336
+ *
337
+ * The adapter itself does not act on this flag — it is read by the stream
338
+ * handler via {@link OpenAIRealtimeAdapter.getGateResponseOnTranscript} to
339
+ * decide WHEN to call {@link OpenAIRealtimeAdapter.requestResponse}.
340
+ *
341
+ * Mirrors Python `gate_response_on_transcript` on `OpenAIRealtimeAdapter`.
342
+ */
343
+ gateResponseOnTranscript?: boolean;
295
344
  }
296
345
  /** Realtime WebSocket adapter for OpenAI's `gpt-realtime` family. */
297
346
  declare class OpenAIRealtimeAdapter {
@@ -314,12 +363,22 @@ declare class OpenAIRealtimeAdapter {
314
363
  private currentResponseAudioMs;
315
364
  private currentResponseFirstAudioAt;
316
365
  protected readonly options: OpenAIRealtimeOptions;
366
+ private readonly gateResponseOnTranscript;
317
367
  constructor(apiKey: string, model?: string, voice?: string, instructions?: string, tools?: Array<{
318
368
  name: string;
319
369
  description: string;
320
370
  parameters: Record<string, unknown>;
321
371
  strict?: boolean;
322
372
  }> | undefined, audioFormat?: OpenAIRealtimeAudioFormat, options?: OpenAIRealtimeOptions);
373
+ /**
374
+ * Whether the stream handler should gate the model response on the Whisper
375
+ * transcript (legacy) or fire it on `speech_stopped` (default, decoupled).
376
+ *
377
+ * `false` (default) — the response is requested on `speech_stopped`,
378
+ * independently of Whisper. `true` — the response is requested only after
379
+ * `transcript_input` passes the hallucination filter.
380
+ */
381
+ getGateResponseOnTranscript(): boolean;
323
382
  /**
324
383
  * Build the production session.update body. Mirrors the body sent
325
384
  * inside `connect()` so warmup can apply identical configuration to
@@ -399,18 +458,45 @@ declare class OpenAIRealtimeAdapter {
399
458
  /** Remove a previously registered {@link onEvent} callback. */
400
459
  offEvent(callback: RealtimeEventCallback): void;
401
460
  protected ensureMessageListener(): void;
402
- /** Truncate the in-flight assistant turn and cancel the active response.
461
+ /** Truncate the in-flight assistant turn's playback offset on the server.
462
+ *
463
+ * Sends ONLY ``conversation.item.truncate`` — no ``response.cancel``. This
464
+ * is the half of barge-in handling that a WebSocket transport MUST always
465
+ * perform: per OpenAI's docs, the GA server auto-truncates on barge-in only
466
+ * over WebRTC / SIP; on the WebSocket transport the client is responsible
467
+ * for telling the server how much of the assistant turn was actually heard.
468
+ * In server-managed mode (``interrupt_response: true``) the server already
469
+ * cancels the response itself, so issuing ``response.cancel`` here would be
470
+ * redundant / rejected — call this method, not {@link cancelResponse}.
403
471
  *
404
472
  * ``audio_end_ms`` MUST reflect what the caller actually heard, not what
405
473
  * the server generated. OpenAI streams audio at 5-10x real-time, so the
406
474
  * byte-derived counter overstates playback whenever the consumer cleared
407
- * its playout buffer (e.g. ``send_clear``) before the audio reached the
475
+ * its playout buffer (e.g. ``sendClear``) before the audio reached the
408
476
  * speaker. We bound the truncate point by wall-clock time since the first
409
477
  * chunk of this response — that's the physical maximum a 1x real-time
410
478
  * playback could have produced. Without this cap, OpenAI keeps the full
411
479
  * generated assistant text on the transcript, and the model replays /
412
480
  * resumes from it on the next turn — manifesting as re-greetings and
413
481
  * mid-sentence fragments after a barge-in storm.
482
+ *
483
+ * No-op when no response is in flight, keeping it idempotent across stale
484
+ * callers. Resets per-response tracking so post-truncate late frames and
485
+ * the next response start clean.
486
+ */
487
+ truncate(): void;
488
+ /** Truncate the in-flight assistant turn AND cancel the active response.
489
+ *
490
+ * Sends BOTH ``conversation.item.truncate`` (the played-offset bookkeeping)
491
+ * AND ``response.cancel``. Use this on the LEGACY client-managed barge-in
492
+ * path (``gateResponseOnTranscript`` true → ``interrupt_response: false``,
493
+ * so the server does NOT cancel for us) and for explicit cancels driven by
494
+ * Patter (e.g. on transfer / hangup). In server-managed mode call
495
+ * {@link truncate} instead — the server already cancels the response, and an
496
+ * extra ``response.cancel`` would be redundant / rejected.
497
+ *
498
+ * Truncation bounding semantics are identical to {@link truncate}; see its
499
+ * doc comment for the ``audio_end_ms`` wall-clock cap rationale.
414
500
  */
415
501
  cancelResponse(): void;
416
502
  /** Inject a user text turn and request a new response. */
@@ -441,6 +527,24 @@ declare class OpenAIRealtimeAdapter {
441
527
  * customer cue).
442
528
  */
443
529
  sendFirstMessage(text: string): Promise<void>;
530
+ /**
531
+ * Speak a short reassurance filler WITHOUT injecting a `role:user` turn.
532
+ *
533
+ * Same no-fake-turn shape as {@link sendFirstMessage}: a bare
534
+ * `response.create` carrying explicit `instructions`, so the filler is the
535
+ * assistant's own in-band audio. The reassurance scheduler in the
536
+ * stream-handler routes here instead of {@link sendText} — which would emit
537
+ * a `conversation.item.create` with `role:'user'` and falsely show the
538
+ * caller saying "One moment." in the transcript. Fillers must not imply
539
+ * success or failure.
540
+ *
541
+ * Uses `modalities: ['audio', 'text']` (v1-beta shape). The GA subclass
542
+ * {@link OpenAIRealtime2Adapter} overrides this with `output_modalities`
543
+ * and re-injects `audio.output.voice` so the GA endpoint does not reject
544
+ * the request. Mirrors Python `OpenAIRealtimeAdapter.send_reassurance` in
545
+ * `providers/openai_realtime.py`.
546
+ */
547
+ sendReassurance(text: string): Promise<void>;
444
548
  /** Submit a tool/function-call result and request the next response. */
445
549
  sendFunctionResult(callId: string, result: string): Promise<void>;
446
550
  /** Stop the heartbeat, drop listeners, and close the Realtime WebSocket. */
@@ -703,48 +807,48 @@ interface LatencyBreakdown {
703
807
  * number as "STT latency". Falls back to turn_start when the endpoint
704
808
  * signal is unavailable (degraded provider, batch STT, etc.).
705
809
  */
706
- stt_ms: number;
810
+ readonly stt_ms: number;
707
811
  /**
708
812
  * Duration of the user's utterance (turn_start → end-of-speech). Useful
709
813
  * to distinguish "user spoke for 4s" from "STT took 4s to finalize" —
710
814
  * they used to be conflated in stt_ms before 0.6.1. Optional — undefined
711
815
  * when the endpoint signal is unavailable.
712
816
  */
713
- user_speech_duration_ms?: number;
817
+ readonly user_speech_duration_ms?: number;
714
818
  /**
715
819
  * Backwards-compatible LLM bucket. With the split below, this now reflects
716
820
  * the user-perceived first-token latency (TTFT) when streaming is available
717
821
  * and the full generation time otherwise. Prefer ``llm_ttft_ms`` /
718
822
  * ``llm_total_ms`` in new code.
719
823
  */
720
- llm_ms: number;
824
+ readonly llm_ms: number;
721
825
  /** Time-to-first-token (UX-facing latency): stt_complete → first LLM token. */
722
- llm_ttft_ms?: number;
826
+ readonly llm_ttft_ms?: number;
723
827
  /**
724
828
  * Total LLM generation time: stt_complete → last LLM token. Distinct from
725
829
  * ``llm_ms`` so cost/throughput analysis and TTFT can be tracked separately.
726
830
  */
727
- llm_total_ms?: number;
728
- tts_ms: number;
729
- total_ms: number;
831
+ readonly llm_total_ms?: number;
832
+ readonly tts_ms: number;
833
+ readonly total_ms: number;
730
834
  /**
731
835
  * Endpoint latency: time from end-of-user-speech (VAD stop or STT
732
836
  * ``speech_final``) to LLM dispatch. Captures the silence-detection +
733
837
  * transcript-finalization gap. Optional — undefined when the source signal
734
838
  * is missing.
735
839
  */
736
- endpoint_ms?: number;
840
+ readonly endpoint_ms?: number;
737
841
  /**
738
842
  * Barge-in latency: time from user-interrupt detection to TTS playback
739
843
  * actually halting (i.e. after ``sendClear`` returned). Optional — only
740
844
  * populated on interrupted turns.
741
845
  */
742
- bargein_ms?: number;
846
+ readonly bargein_ms?: number;
743
847
  /**
744
848
  * Total TTS time: LLM-first-token (or first-sentence boundary) to last
745
849
  * TTS audio byte sent. Optional — undefined when TTS never completed.
746
850
  */
747
- tts_total_ms?: number;
851
+ readonly tts_total_ms?: number;
748
852
  /**
749
853
  * **User-perceived agent response latency**: time from end-of-user-speech
750
854
  * (VAD stop or STT ``speech_final``) to the first audio byte the agent
@@ -757,54 +861,54 @@ interface LatencyBreakdown {
757
861
  * the system-controlled latency: silence detection + LLM TTFT + TTS
758
862
  * first byte.
759
863
  */
760
- agent_response_ms?: number;
864
+ readonly agent_response_ms?: number;
761
865
  }
762
866
  /** Per-call cost breakdown by component (STT/TTS/LLM/telephony) plus the total. */
763
867
  interface CostBreakdown {
764
- stt: number;
765
- tts: number;
766
- llm: number;
767
- telephony: number;
768
- total: number;
868
+ readonly stt: number;
869
+ readonly tts: number;
870
+ readonly llm: number;
871
+ readonly telephony: number;
872
+ readonly total: number;
769
873
  /**
770
874
  * Amount saved on LLM cost thanks to OpenAI Realtime prompt caching.
771
875
  * ``llm`` above is the net cost AFTER this discount. Dashboards can
772
876
  * render ``saved $X (pct%)`` next to the LLM line when > 0.
773
877
  */
774
- llm_cached_savings?: number;
878
+ readonly llm_cached_savings: number;
775
879
  }
776
880
  /** Metrics captured for a single conversation turn. */
777
881
  interface TurnMetrics {
778
- turn_index: number;
779
- user_text: string;
780
- agent_text: string;
781
- latency: LatencyBreakdown;
782
- stt_audio_seconds: number;
783
- tts_characters: number;
784
- timestamp: number;
882
+ readonly turn_index: number;
883
+ readonly user_text: string;
884
+ readonly agent_text: string;
885
+ readonly latency: LatencyBreakdown;
886
+ readonly stt_audio_seconds: number;
887
+ readonly tts_characters: number;
888
+ readonly timestamp: number;
785
889
  }
786
890
  /** Aggregated metrics for an entire call (turns, costs, latency percentiles). */
787
891
  interface CallMetrics {
788
- call_id: string;
789
- duration_seconds: number;
790
- turns: TurnMetrics[];
791
- cost: CostBreakdown;
792
- latency_avg: LatencyBreakdown;
793
- latency_p95: LatencyBreakdown;
794
- latency_p50?: LatencyBreakdown;
795
- latency_p90?: LatencyBreakdown;
796
- latency_p99?: LatencyBreakdown;
797
- provider_mode: string;
798
- stt_provider: string;
799
- tts_provider: string;
800
- llm_provider: string;
801
- telephony_provider: string;
892
+ readonly call_id: string;
893
+ readonly duration_seconds: number;
894
+ readonly turns: readonly TurnMetrics[];
895
+ readonly cost: CostBreakdown;
896
+ readonly latency_avg: LatencyBreakdown;
897
+ readonly latency_p95: LatencyBreakdown;
898
+ readonly latency_p50: LatencyBreakdown;
899
+ readonly latency_p90: LatencyBreakdown;
900
+ readonly latency_p99: LatencyBreakdown;
901
+ readonly provider_mode: string;
902
+ readonly stt_provider: string;
903
+ readonly tts_provider: string;
904
+ readonly llm_provider: string;
905
+ readonly telephony_provider: string;
802
906
  /** Model identifiers per provider (e.g. "ink-whisper", "eleven_flash_v2_5",
803
907
  * "gpt-oss-120b"). Surface on the dashboard cost breakdown so operators
804
908
  * can attribute per-call spend to a specific model. */
805
- stt_model?: string;
806
- tts_model?: string;
807
- llm_model?: string;
909
+ readonly stt_model?: string;
910
+ readonly tts_model?: string;
911
+ readonly llm_model?: string;
808
912
  }
809
913
  /** Programmatic control surface for a live call (transfer, hangup, DTMF). */
810
914
  interface CallControl {
@@ -830,7 +934,7 @@ interface CallControl {
830
934
  }
831
935
  /** Mutable per-call accumulator that stamps timestamps and emits final `CallMetrics`. */
832
936
  declare class CallMetricsAccumulator {
833
- callId: string;
937
+ readonly callId: string;
834
938
  readonly providerMode: string;
835
939
  readonly telephonyProvider: string;
836
940
  readonly sttProvider: string;
@@ -922,6 +1026,16 @@ declare class CallMetricsAccumulator {
922
1026
  * (the common cause of missing endpoint signals).
923
1027
  */
924
1028
  private _endpointSignalMissingCount;
1029
+ /**
1030
+ * Monotonic per-call turn counter. Reserved at turn OPEN
1031
+ * (``onAdapterSpeechStopped`` / ``speech_stopped``) via
1032
+ * ``reserveTurnIndex()`` and threaded through the buffering pipeline into
1033
+ * ``recordTurnComplete`` / ``recordTurnInterrupted`` as ``preReservedIndex``.
1034
+ * This makes ``turn_index`` stable under drops / interrupts (previously it
1035
+ * was assigned at completion as ``this._turns.length``, which shifted when a
1036
+ * turn was dropped). Parity with Python ``_next_turn_index``.
1037
+ */
1038
+ private _nextTurnIndex;
925
1039
  constructor(opts: {
926
1040
  callId: string;
927
1041
  providerMode: string;
@@ -951,6 +1065,18 @@ declare class CallMetricsAccumulator {
951
1065
  get turnActive(): boolean;
952
1066
  /** Begin a new turn — stamps the turn start timestamp and resets per-turn state. */
953
1067
  startTurn(): void;
1068
+ /**
1069
+ * Reserve and return the next monotonic turn index.
1070
+ *
1071
+ * Called once per turn at the moment the turn OPENS (Realtime:
1072
+ * ``onAdapterSpeechStopped``). The returned index is threaded through the
1073
+ * buffering pipeline and handed back to ``recordTurnComplete`` /
1074
+ * ``recordTurnInterrupted`` as ``preReservedIndex`` so the emitted
1075
+ * ``turn_index`` matches the live per-line transcript ordering even when a
1076
+ * turn is dropped or interrupted between open and close. Parity with Python
1077
+ * ``reserve_turn_index``.
1078
+ */
1079
+ reserveTurnIndex(): number;
954
1080
  /**
955
1081
  * Start a new turn only if no turn is currently open.
956
1082
  * Use this at inbound-audio ingestion points so the turn timer begins
@@ -1027,7 +1153,7 @@ declare class CallMetricsAccumulator {
1027
1153
  * ``user_text=''``. The caller treats ``null`` as "nothing to emit";
1028
1154
  * ``emitTurnMetrics`` is already null-safe.
1029
1155
  */
1030
- recordTurnComplete(agentText: string): TurnMetrics | null;
1156
+ recordTurnComplete(agentText: string, preReservedIndex?: number): TurnMetrics | null;
1031
1157
  /**
1032
1158
  * Close the current turn as interrupted (barge-in) and return the
1033
1159
  * recorded metrics. Returns ``null`` when no turn is open, OR when
@@ -1037,7 +1163,7 @@ declare class CallMetricsAccumulator {
1037
1163
  * a future refactor that reorders the bargein + LLM-unwind paths)
1038
1164
  * from overwriting a turn that the complete path already emitted.
1039
1165
  */
1040
- recordTurnInterrupted(): TurnMetrics | null;
1166
+ recordTurnInterrupted(preReservedIndex?: number): TurnMetrics | null;
1041
1167
  /**
1042
1168
  * Record the moment VAD emitted speech_end for the current utterance.
1043
1169
  * @param ts Optional override timestamp in hrTimeMs units (defaults to now).
@@ -1058,8 +1184,10 @@ declare class CallMetricsAccumulator {
1058
1184
  recordTurnCommitted(ts?: number): void;
1059
1185
  /**
1060
1186
  * Record the delta (ms) between turn-committed and when on_user_turn_completed
1061
- * pipeline hook finished. Stored for inclusion in the next ``emitEouMetrics``
1062
- * call (or an explicit re-emit if desired).
1187
+ * pipeline hook finished. Does NOT re-emit: like Python's
1188
+ * ``record_on_user_turn_completed_delay``, this only stores the value; the
1189
+ * single EOU emission happens on ``recordTurnCommitted`` (3-timestamp guard,
1190
+ * delay defaults to 0 if not yet recorded).
1063
1191
  */
1064
1192
  recordOnUserTurnCompletedDelay(delayMs: number): void;
1065
1193
  /**
@@ -1070,7 +1198,7 @@ declare class CallMetricsAccumulator {
1070
1198
  * ``transcriptionDelay`` = turnCommitted − vadStopped (ms)
1071
1199
  * ``onUserTurnCompletedDelay`` = caller-supplied delta (ms) or 0
1072
1200
  */
1073
- /** Emit `EOUMetrics` once VAD-stop, STT-final, and turn-committed timestamps are all known. */
1201
+ /** Emit `EOUMetrics` once VAD-stop, STT-final, turn-committed, and on_user_turn_completed delay are all known. */
1074
1202
  emitEouMetrics(): void;
1075
1203
  /**
1076
1204
  * Record that a caller utterance started overlapping with agent speech.
@@ -1221,31 +1349,32 @@ declare function isWebSocketUrl(url: string): boolean;
1221
1349
 
1222
1350
  /** Snapshot of a call as held by the dashboard store. */
1223
1351
  interface CallRecord {
1224
- call_id: string;
1225
- caller: string;
1226
- callee: string;
1227
- direction: string;
1228
- started_at: number;
1229
- ended_at?: number;
1352
+ readonly call_id: string;
1353
+ readonly caller: string;
1354
+ readonly callee: string;
1355
+ readonly direction: string;
1356
+ readonly started_at: number;
1357
+ readonly ended_at?: number;
1230
1358
  /**
1231
1359
  * Current lifecycle state: ``initiated`` (pre-registered), ``ringing``,
1232
1360
  * ``in-progress``, ``completed``, ``no-answer``, ``busy``, ``failed``,
1233
1361
  * ``canceled``, or ``webhook_error``.
1234
1362
  */
1235
- status?: string;
1236
- transcript?: Array<{
1237
- role: string;
1238
- text: string;
1239
- timestamp: number;
1363
+ readonly status?: string;
1364
+ readonly transcript?: ReadonlyArray<{
1365
+ readonly role: string;
1366
+ readonly text: string;
1367
+ readonly timestamp: number;
1368
+ readonly turnIndex?: number;
1240
1369
  }>;
1241
- turns?: unknown[];
1242
- metrics?: Record<string, unknown> | null;
1243
- [key: string]: unknown;
1370
+ readonly turns?: readonly unknown[];
1371
+ readonly metrics?: Record<string, unknown> | null;
1372
+ readonly [key: string]: unknown;
1244
1373
  }
1245
1374
  /** Server-Sent-Event payload broadcast by `MetricsStore` for live UI updates. */
1246
1375
  interface SSEEvent {
1247
- type: string;
1248
- data: Record<string, unknown>;
1376
+ readonly type: string;
1377
+ readonly data: Readonly<Record<string, unknown>>;
1249
1378
  }
1250
1379
  /** In-memory bounded ring buffer of recent calls plus active-call tracking. */
1251
1380
  declare class MetricsStore extends EventEmitter {
@@ -1289,6 +1418,27 @@ declare class MetricsStore extends EventEmitter {
1289
1418
  * row from active to completed so the UI freezes the live duration timer.
1290
1419
  */
1291
1420
  updateCallStatus(callId: string, status: string, extra?: Record<string, unknown>): void;
1421
+ /**
1422
+ * Record a single transcript line (user/assistant) as it becomes known.
1423
+ *
1424
+ * FIX-5 (issue #154): the live forward path for the dashboard transcript.
1425
+ * The Realtime stream handler calls this the moment each line is known — the
1426
+ * user line right after the hallucination filter accepts it, the assistant
1427
+ * line when its turn flushes — keyed by the monotonic ``turnIndex`` reserved
1428
+ * at turn-open (``reserveTurnIndex``). Each line is appended to the active
1429
+ * call's ``transcript`` array and broadcast over SSE as a ``transcript_line``
1430
+ * event so the dashboard can render lines as they arrive and re-sort by
1431
+ * ``(turnIndex, user<assistant)`` — making a late-arriving user line land
1432
+ * ABOVE its agent line. ``recordTurn`` de-dups against the lines pushed here
1433
+ * by ``(turnIndex, role)`` so the metrics path never double-pushes the same
1434
+ * text. Parity with Python ``record_transcript_line``.
1435
+ */
1436
+ recordTranscriptLine(data: {
1437
+ call_id: string;
1438
+ turnIndex: number;
1439
+ role: 'user' | 'assistant';
1440
+ text: string;
1441
+ }): void;
1292
1442
  /** Append a single conversation turn to an active call and broadcast it via SSE. */
1293
1443
  recordTurn(data: Record<string, unknown>): void;
1294
1444
  /** Move a call from active to completed and persist its final metrics. */
@@ -1334,7 +1484,7 @@ declare class MetricsStore extends EventEmitter {
1334
1484
  isDeleted(callId: string): boolean;
1335
1485
  /** Snapshot of soft-deleted call_ids (sorted). */
1336
1486
  getDeletedCallIds(): string[];
1337
- /** Atomically persist the deleted-ids set to disk. Best-effort. */
1487
+ /** Atomically persist the deleted-ids set to disk. Best-effort async. */
1338
1488
  private persistDeletedIds;
1339
1489
  /** Look up an active call by id (returns undefined if not active or unknown). */
1340
1490
  getActive(callId: string): CallRecord | undefined;
@@ -1452,6 +1602,7 @@ declare class Carrier {
1452
1602
  }
1453
1603
 
1454
1604
  /** OpenAI Realtime engine — marker class for Patter client dispatch. */
1605
+
1455
1606
  /** Constructor options for the OpenAI `Realtime` engine marker. */
1456
1607
  interface RealtimeOptions {
1457
1608
  /** API key. Falls back to OPENAI_API_KEY env var when omitted. */
@@ -1479,6 +1630,42 @@ interface RealtimeOptions {
1479
1630
  * `"gpt-4o-transcribe"` for higher accuracy.
1480
1631
  */
1481
1632
  inputAudioTranscriptionModel?: string;
1633
+ /**
1634
+ * Input noise reduction for speakerphone / conference audio. `undefined`
1635
+ * (default) omits the field (no reduction). `"far_field"` recommended for
1636
+ * phone / speakerphone calls; `"near_field"` for a handset close to the
1637
+ * mouth. Mirrors `openai_realtime_noise_reduction` on `Patter.agent()`.
1638
+ */
1639
+ noiseReduction?: 'near_field' | 'far_field';
1640
+ /**
1641
+ * Turn-detection tuning. `undefined` (default) keeps the adapter's
1642
+ * current hardcoded `server_vad` / threshold `0.5` / silence 300 ms.
1643
+ * Raise threshold or switch to `semantic_vad` eagerness `'low'` to stop
1644
+ * speakerphone noise from triggering false barge-ins.
1645
+ *
1646
+ * Maps to `turn_detection` on the Python `engines.openai.Realtime` marker;
1647
+ * propagates to `realtimeTurnDetection` on `AgentOptions`.
1648
+ */
1649
+ turnDetection?: RealtimeTurnDetection;
1650
+ /**
1651
+ * Gate the model's response on the Whisper transcript (legacy behavior).
1652
+ *
1653
+ * `false` (default) — the speech-to-speech model responds as soon as the
1654
+ * user stops speaking (on `speech_stopped`), independently of the Whisper
1655
+ * input transcription. The transcript becomes a pure observability
1656
+ * side-channel (dashboard / history / `onTranscript`) and never gates,
1657
+ * triggers, or cancels the response. This reclaims ~500 ms of latency
1658
+ * because the model no longer waits for Whisper.
1659
+ *
1660
+ * `true` — restores the prior behavior where the response is requested
1661
+ * only after the Whisper `transcript_input` event arrives and passes the
1662
+ * hallucination filter.
1663
+ *
1664
+ * Maps to `gate_response_on_transcript` on the Python
1665
+ * `engines.openai.Realtime` marker; propagates to
1666
+ * `openaiRealtimeGateResponseOnTranscript` on `AgentOptions`.
1667
+ */
1668
+ gateResponseOnTranscript?: boolean;
1482
1669
  }
1483
1670
  /**
1484
1671
  * OpenAI Realtime engine marker.
@@ -1502,6 +1689,9 @@ declare class Realtime {
1502
1689
  readonly voice: string;
1503
1690
  readonly reasoningEffort?: 'minimal' | 'low' | 'medium' | 'high';
1504
1691
  readonly inputAudioTranscriptionModel?: string;
1692
+ readonly noiseReduction?: 'near_field' | 'far_field';
1693
+ readonly turnDetection?: RealtimeTurnDetection;
1694
+ readonly gateResponseOnTranscript?: boolean;
1505
1695
  constructor(opts?: RealtimeOptions);
1506
1696
  }
1507
1697
 
@@ -1513,6 +1703,7 @@ declare class Realtime {
1513
1703
  * different `session.update` wire shape; the client dispatches to
1514
1704
  * `OpenAIRealtime2Adapter` when this marker is passed.
1515
1705
  */
1706
+
1516
1707
  /** Constructor options for the OpenAI `Realtime2` engine marker. */
1517
1708
  interface Realtime2Options {
1518
1709
  /** API key. Falls back to OPENAI_API_KEY env var when omitted. */
@@ -1533,6 +1724,44 @@ interface Realtime2Options {
1533
1724
  * low-latency transcript partials.
1534
1725
  */
1535
1726
  inputAudioTranscriptionModel?: string;
1727
+ /**
1728
+ * Input noise reduction for speakerphone / conference audio. `undefined`
1729
+ * (default) omits the field (no reduction). `"far_field"` recommended for
1730
+ * phone / speakerphone calls; `"near_field"` for a handset close to the
1731
+ * mouth. On the GA endpoint this is nested under
1732
+ * `audio.input.input_audio_noise_reduction: { type }`.
1733
+ * Mirrors `openai_realtime_noise_reduction` on `Patter.agent()`.
1734
+ */
1735
+ noiseReduction?: 'near_field' | 'far_field';
1736
+ /**
1737
+ * Turn-detection tuning. `undefined` (default) keeps the adapter's
1738
+ * current hardcoded `server_vad` / threshold `0.5` / silence 300 ms.
1739
+ * Raise threshold or switch to `semantic_vad` eagerness `'low'` to stop
1740
+ * speakerphone noise from triggering false barge-ins.
1741
+ *
1742
+ * Maps to `turn_detection` on the Python `engines.openai_realtime_2.Realtime2`
1743
+ * marker; propagates to `realtimeTurnDetection` on `AgentOptions`.
1744
+ */
1745
+ turnDetection?: RealtimeTurnDetection;
1746
+ /**
1747
+ * Gate the model's response on the Whisper transcript (legacy behavior).
1748
+ *
1749
+ * `false` (default) — the speech-to-speech model responds as soon as the
1750
+ * user stops speaking (on `speech_stopped`), independently of the Whisper
1751
+ * input transcription. The transcript becomes a pure observability
1752
+ * side-channel (dashboard / history / `onTranscript`) and never gates,
1753
+ * triggers, or cancels the response. This reclaims ~500 ms of latency
1754
+ * because the model no longer waits for Whisper.
1755
+ *
1756
+ * `true` — restores the prior behavior where the response is requested
1757
+ * only after the Whisper `transcript_input` event arrives and passes the
1758
+ * hallucination filter.
1759
+ *
1760
+ * Maps to `gate_response_on_transcript` on the Python
1761
+ * `engines.openai_realtime_2.Realtime2` marker; propagates to
1762
+ * `openaiRealtimeGateResponseOnTranscript` on `AgentOptions`.
1763
+ */
1764
+ gateResponseOnTranscript?: boolean;
1536
1765
  }
1537
1766
  /**
1538
1767
  * OpenAI Realtime 2 engine marker — selects `gpt-realtime-2` on the GA
@@ -1557,6 +1786,9 @@ declare class Realtime2 {
1557
1786
  readonly voice: string;
1558
1787
  readonly reasoningEffort?: 'minimal' | 'low' | 'medium' | 'high';
1559
1788
  readonly inputAudioTranscriptionModel?: string;
1789
+ readonly noiseReduction?: 'near_field' | 'far_field';
1790
+ readonly turnDetection?: RealtimeTurnDetection;
1791
+ readonly gateResponseOnTranscript?: boolean;
1560
1792
  constructor(opts?: Realtime2Options);
1561
1793
  }
1562
1794
 
@@ -1701,6 +1933,33 @@ interface ToolOptions {
1701
1933
  handler?: ToolHandler;
1702
1934
  /** URL to POST to when the LLM invokes the tool. */
1703
1935
  webhookUrl?: string;
1936
+ /**
1937
+ * Optional reassurance filler the agent speaks while a slow tool call runs.
1938
+ * Two forms:
1939
+ * - `string`: shorthand for `{ message: <string>, afterMs: 1500 }`.
1940
+ * - object: explicit `{ message, afterMs? }`.
1941
+ * Currently honoured only in Realtime mode. Off by default.
1942
+ *
1943
+ * Mirrors Python `reassurance` on `Tool` / `tool()`.
1944
+ */
1945
+ reassurance?: string | {
1946
+ message: string;
1947
+ afterMs?: number;
1948
+ };
1949
+ /**
1950
+ * Per-tool execution timeout in milliseconds, applied to BOTH the handler
1951
+ * and webhook paths. `undefined` (default) uses the executor default
1952
+ * (10 000 ms). Raise for long browser-automation / external-API tools
1953
+ * (e.g. `60_000`). Clamped to a 300 000 ms ceiling by the executor.
1954
+ *
1955
+ * Mirrors Python `timeout_s` on `Tool` / `tool()`.
1956
+ */
1957
+ timeoutMs?: number;
1958
+ /**
1959
+ * Enable OpenAI strict mode for this tool's function schema. Mirrors
1960
+ * Python `strict` on `Tool`. Off by default.
1961
+ */
1962
+ strict?: boolean;
1704
1963
  }
1705
1964
  /**
1706
1965
  * Tool definition. Structurally matches `ToolDefinition` so it drops
@@ -1724,6 +1983,20 @@ declare class Tool implements ToolDefinition {
1724
1983
  readonly parameters: Record<string, unknown>;
1725
1984
  readonly handler?: ToolHandler;
1726
1985
  readonly webhookUrl?: string;
1986
+ readonly reassurance?: string | Readonly<{
1987
+ message: string;
1988
+ afterMs?: number;
1989
+ }>;
1990
+ /**
1991
+ * Per-tool execution timeout in milliseconds. `undefined` uses the
1992
+ * executor default (10 000 ms). Mirrors Python `timeout_s`.
1993
+ */
1994
+ readonly timeoutMs?: number;
1995
+ /**
1996
+ * Enable OpenAI strict mode for this tool's function schema. Off by
1997
+ * default. Mirrors Python `strict` on `Tool`.
1998
+ */
1999
+ readonly strict?: boolean;
1727
2000
  constructor(opts: ToolOptions);
1728
2001
  }
1729
2002
  /** Factory helper mirroring Python's `tool(...)` function. */
@@ -1850,6 +2123,8 @@ interface PerToolState {
1850
2123
  state: CircuitBreakerState;
1851
2124
  consecutiveFailures: number;
1852
2125
  openedAt: number;
2126
+ /** True while a HALF_OPEN probe call is already in-flight. */
2127
+ probeInFlight: boolean;
1853
2128
  }
1854
2129
  /** Per-name registry tracking circuit state for a fleet of tools. */
1855
2130
  declare class CircuitBreakerRegistry {
@@ -1888,7 +2163,7 @@ declare class CircuitBreakerRegistry {
1888
2163
  * Avoids a circular import from metrics.ts.
1889
2164
  */
1890
2165
  interface LlmUsageRecorder {
1891
- recordLlmUsage(provider: string, model: string, inputTokens: number, outputTokens: number, cacheReadTokens?: number, cacheCreationTokens?: number): void;
2166
+ recordLlmUsage(provider: string, model: string, inputTokens: number, outputTokens: number, cacheReadTokens?: number, cacheWriteTokens?: number): void;
1892
2167
  }
1893
2168
  /**
1894
2169
  * Pluggable tool executor — mirrors the Python ``ToolExecutor`` in
@@ -1956,7 +2231,7 @@ interface LLMChunk {
1956
2231
  inputTokens?: number;
1957
2232
  outputTokens?: number;
1958
2233
  cacheReadInputTokens?: number;
1959
- cacheCreationInputTokens?: number;
2234
+ cacheWriteInputTokens?: number;
1960
2235
  }
1961
2236
  /**
1962
2237
  * Interface that any LLM provider must satisfy.
@@ -1979,6 +2254,18 @@ interface LLMChunk {
1979
2254
  */
1980
2255
  interface LLMStreamOptions {
1981
2256
  signal?: AbortSignal;
2257
+ /**
2258
+ * Stable per-call id (the same value the stream handler builds into
2259
+ * ``callCtx.call_id``). Threaded through purely so session-aware providers
2260
+ * — currently {@link OpenAICompatibleLLMProvider} and its Hermes / OpenClaw
2261
+ * presets — can emit the OpenAI ``user`` field as ``patter-call-<callId>``,
2262
+ * giving the upstream agent runtime one durable session per phone call.
2263
+ *
2264
+ * Additive and optional: every existing provider reads only ``signal`` and
2265
+ * is unaffected. When unset (or when a provider has no session-continuity
2266
+ * config) no ``user`` field is sent — fully backward compatible.
2267
+ */
2268
+ callId?: string;
1982
2269
  }
1983
2270
  interface LLMProvider {
1984
2271
  stream(messages: Array<Record<string, unknown>>, tools?: Array<Record<string, unknown>> | null, opts?: LLMStreamOptions): AsyncGenerator<LLMChunk, void, unknown>;
@@ -2250,13 +2537,55 @@ type MCPServerConfig = string | {
2250
2537
  /** Optional logical name for telemetry / log lines. */
2251
2538
  readonly name?: string;
2252
2539
  };
2540
+ /**
2541
+ * OpenAI Realtime turn-detection tuning.
2542
+ *
2543
+ * Raise the VAD {@link threshold} (`server_vad`) or switch to
2544
+ * `semantic_vad` with {@link eagerness} `'low'` to stop speakerphone /
2545
+ * conference-room noise (mouse clicks, phone shifts, background chatter)
2546
+ * from being mistaken for the caller speaking and cutting the agent off.
2547
+ *
2548
+ * Each unset field falls back to the adapter's current default
2549
+ * (`server_vad`, threshold `0.5`, `prefixPaddingMs` `300`,
2550
+ * `silenceDurationMs` `300`). `type === 'semantic_vad'` emits
2551
+ * `{ type, eagerness }` only — OpenAI rejects `threshold` /
2552
+ * `prefixPaddingMs` / `silenceDurationMs` on the semantic detector.
2553
+ * `createResponse` / `interruptResponse` are NOT exposed (Patter keeps
2554
+ * its client-gated barge-in safety values).
2555
+ *
2556
+ * Mirrors Python `RealtimeTurnDetection` dataclass in `models.py`.
2557
+ */
2558
+ interface RealtimeTurnDetection {
2559
+ /** `"server_vad"` (default) or `"semantic_vad"`. */
2560
+ readonly type?: 'server_vad' | 'semantic_vad';
2561
+ /**
2562
+ * `server_vad` only — 0..1, higher rejects more background noise.
2563
+ * `undefined` keeps the adapter default (`0.5`).
2564
+ */
2565
+ readonly threshold?: number;
2566
+ /**
2567
+ * `server_vad` only — milliseconds of speech required before VAD
2568
+ * triggers. `undefined` keeps the adapter default (`300`).
2569
+ */
2570
+ readonly prefixPaddingMs?: number;
2571
+ /**
2572
+ * `server_vad` only — trailing silence (ms) before the turn ends.
2573
+ * `undefined` keeps the adapter default (`300`).
2574
+ */
2575
+ readonly silenceDurationMs?: number;
2576
+ /**
2577
+ * `semantic_vad` only — `"low"` lets the caller finish (least likely
2578
+ * to interrupt), through `"high"` / `"auto"`.
2579
+ */
2580
+ readonly eagerness?: 'low' | 'medium' | 'high' | 'auto';
2581
+ }
2253
2582
  /** Internal shape of a tool definition (matches `Tool` from `public-api.ts`). */
2254
2583
  interface ToolDefinition {
2255
- name: string;
2256
- description: string;
2257
- parameters: Record<string, unknown>;
2584
+ readonly name: string;
2585
+ readonly description: string;
2586
+ readonly parameters: Readonly<Record<string, unknown>>;
2258
2587
  /** Webhook URL — called when the LLM invokes this tool. Mutually exclusive with handler. */
2259
- webhookUrl?: string;
2588
+ readonly webhookUrl?: string;
2260
2589
  /**
2261
2590
  * Local handler — called instead of ``webhookUrl`` when present.
2262
2591
  *
@@ -2274,7 +2603,7 @@ interface ToolDefinition {
2274
2603
  * ignores the progress yields — the final value is still used as
2275
2604
  * the tool result.
2276
2605
  */
2277
- handler?: ((args: Record<string, unknown>, context: Record<string, unknown>) => Promise<string>) | ((args: Record<string, unknown>, context: Record<string, unknown>) => AsyncGenerator<{
2606
+ readonly handler?: ((args: Record<string, unknown>, context: Record<string, unknown>) => Promise<string>) | ((args: Record<string, unknown>, context: Record<string, unknown>) => AsyncGenerator<{
2278
2607
  progress?: string;
2279
2608
  result?: string;
2280
2609
  }, string | void, unknown>);
@@ -2294,10 +2623,10 @@ interface ToolDefinition {
2294
2623
  * synthesises it inline. Pipeline mode has no clean injection point
2295
2624
  * mid-turn yet; the option is silently ignored there. Off by default.
2296
2625
  */
2297
- reassurance?: string | {
2626
+ readonly reassurance?: string | Readonly<{
2298
2627
  message: string;
2299
2628
  afterMs?: number;
2300
- };
2629
+ }>;
2301
2630
  /**
2302
2631
  * Enable OpenAI strict mode for this tool's function schema. When ``true``
2303
2632
  * the model is constrained to emit arguments that exactly match the
@@ -2318,7 +2647,123 @@ interface ToolDefinition {
2318
2647
  * Recommended for any tool whose handler/webhook can't safely tolerate
2319
2648
  * malformed arguments (DB writes, payment, transfers).
2320
2649
  */
2321
- strict?: boolean;
2650
+ readonly strict?: boolean;
2651
+ /**
2652
+ * Per-tool execution timeout in milliseconds, applied to BOTH the handler
2653
+ * and webhook paths. `undefined` (default) uses the executor default
2654
+ * (10 000 ms). Raise for long browser-automation / external-API tools
2655
+ * (e.g. `60_000`). Clamped to a 300 000 ms ceiling by the executor.
2656
+ *
2657
+ * Mirrors Python's `timeout_s` on `Tool` / `tool()`.
2658
+ */
2659
+ readonly timeoutMs?: number;
2660
+ }
2661
+ /**
2662
+ * Configuration for the built-in ``consult`` escalation tool.
2663
+ *
2664
+ * When set on an agent, Patter auto-injects a tool (default name
2665
+ * ``consult_agent``) that the in-call agent can invoke mid-call to reach the
2666
+ * caller's own back-office agent over HTTP for deeper reasoning, fresh
2667
+ * information, or an action beyond the call. Patter keeps STT + LLM/voice +
2668
+ * TTS + carrier; the back-office agent is consulted only on demand (never on
2669
+ * the per-turn path). The tool POSTs ``{ request, call_id, caller, callee }``
2670
+ * to {@link url}; the endpoint returns JSON with a ``reply`` / ``response`` /
2671
+ * ``text`` string (or any JSON / plain text) and the agent speaks it.
2672
+ *
2673
+ * Injected in **Realtime** and **Pipeline** modes only — ElevenLabs ConvAI
2674
+ * tools live on the ElevenLabs-hosted agent, so ``consult`` does not apply
2675
+ * there (a warning is emitted if set with that provider).
2676
+ */
2677
+ interface ConsultConfig {
2678
+ /**
2679
+ * Generic webhook endpoint Patter POSTs ``{ request, call_id, caller, callee }``
2680
+ * to. SSRF-validated at call start. Mutually exclusive with
2681
+ * {@link openaiCompatible} — set exactly one.
2682
+ */
2683
+ readonly url?: string;
2684
+ /**
2685
+ * Native target that speaks an OpenAI-compatible ``/chat/completions``
2686
+ * endpoint directly (e.g. an OpenClaw agent, or vLLM / Ollama / Groq) — no
2687
+ * hand-written adapter. Mutually exclusive with {@link url}. Use
2688
+ * {@link openclawConsult} for the OpenClaw preset.
2689
+ */
2690
+ readonly openaiCompatible?: OpenAICompatibleConsult;
2691
+ /** Optional headers (e.g. an ``Authorization`` bearer). Never logged. */
2692
+ readonly headers?: Readonly<Record<string, string>>;
2693
+ /**
2694
+ * Per-consult HTTP timeout in milliseconds. Higher than the generic
2695
+ * webhook-tool default (10 000 ms) because a consult may run deeper
2696
+ * reasoning. Default ``30000``.
2697
+ */
2698
+ readonly timeoutMs?: number;
2699
+ /** Name the LLM sees for the tool. Default ``"consult_agent"``. */
2700
+ readonly toolName?: string;
2701
+ /** Description the LLM sees — tune to steer when the agent escalates. */
2702
+ readonly description?: string;
2703
+ /**
2704
+ * Optional filler the agent speaks while the consult runs (Realtime mode
2705
+ * only) so a multi-second back-office call is not dead air. Omitted plays no
2706
+ * filler; the {@link openclawConsult} preset sets a sensible default.
2707
+ */
2708
+ readonly reassurance?: string | Readonly<{
2709
+ message: string;
2710
+ afterMs?: number;
2711
+ }>;
2712
+ /**
2713
+ * Opt-in: allow {@link url} to point at a loopback / private / link-local
2714
+ * host (e.g. a back-office agent on ``127.0.0.1`` or an RFC1918 LAN host).
2715
+ *
2716
+ * Default ``false`` (or ``undefined``) — the URL is SSRF-validated and
2717
+ * loopback/private/link-local targets are rejected, preserving the strict
2718
+ * default behaviour. Set ``true`` ONLY for a trusted, developer-configured
2719
+ * local agent: the URL is your own config, not caller-derived input.
2720
+ *
2721
+ * Even when ``true``, non-HTTP(S) schemes (``file:``, ``javascript:`` …)
2722
+ * are still rejected. Note: opting in also makes cloud-metadata hostnames
2723
+ * (``metadata``, ``metadata.google.internal``, ``metadata.azure.com``) and
2724
+ * the IMDS IP ``169.254.169.254`` reachable — an accepted tradeoff for a URL
2725
+ * you control. Scopes ONLY to
2726
+ * the consult tool; the generic webhook-tool path stays strict.
2727
+ */
2728
+ readonly allowLoopback?: boolean;
2729
+ }
2730
+ /**
2731
+ * Native {@link ConsultConfig} target that speaks an OpenAI-compatible
2732
+ * ``/chat/completions`` endpoint directly — no hand-written adapter.
2733
+ *
2734
+ * Lets ``consult`` reach an OpenClaw agent (or any OpenAI-compatible gateway:
2735
+ * vLLM, Ollama, Groq, …). The consult handler builds a standard chat-completions
2736
+ * request (``model`` + ``messages`` + ``user``) and speaks
2737
+ * ``choices[0].message.content``. Prefer {@link openclawConsult} for the
2738
+ * OpenClaw preset rather than constructing this directly.
2739
+ */
2740
+ interface OpenAICompatibleConsult {
2741
+ /**
2742
+ * OpenAI-compatible base URL ending in ``/v1`` (the handler POSTs to
2743
+ * ``{baseUrl}/chat/completions``), e.g. ``http://127.0.0.1:18789/v1``.
2744
+ */
2745
+ readonly baseUrl: string;
2746
+ /**
2747
+ * Model / agent target. For OpenClaw this is the namespaced agent id, e.g.
2748
+ * ``"openclaw/receptionist"``.
2749
+ */
2750
+ readonly model: string;
2751
+ /**
2752
+ * Bearer token. Prefer {@link apiKeyEnv} so the secret stays out of source.
2753
+ * For OpenClaw this is an OPERATOR-grade credential — never logged.
2754
+ */
2755
+ readonly apiKey?: string;
2756
+ /**
2757
+ * Environment variable to read the bearer from when {@link apiKey} is not
2758
+ * given (e.g. ``"OPENCLAW_API_KEY"``).
2759
+ */
2760
+ readonly apiKeyEnv?: string;
2761
+ /**
2762
+ * Optional header carrying the per-call session id (the call id), e.g.
2763
+ * ``"x-openclaw-session-key"``. The call id is also sent as the OpenAI
2764
+ * ``user`` field.
2765
+ */
2766
+ readonly sessionHeader?: string;
2322
2767
  }
2323
2768
  /** Constructor options for `new Patter({...})` in local-server mode. */
2324
2769
  interface LocalOptions {
@@ -2331,14 +2776,14 @@ interface LocalOptions {
2331
2776
  * const phone = new Patter({ carrier: new Twilio(), phoneNumber: "+1..." });
2332
2777
  * ```
2333
2778
  */
2334
- carrier: Carrier$2 | Carrier$1 | Carrier;
2779
+ readonly carrier: Carrier$2 | Carrier$1 | Carrier;
2335
2780
  /**
2336
2781
  * Tunnel configuration. Accepts a tunnel instance, ``true`` (alias for
2337
2782
  * ``new CloudflareTunnel()``), or ``false`` / omitted (no tunnel).
2338
2783
  */
2339
- tunnel?: CloudflareTunnel | Static | boolean;
2340
- phoneNumber: string;
2341
- webhookUrl?: string;
2784
+ readonly tunnel?: CloudflareTunnel | Static | boolean;
2785
+ readonly phoneNumber: string;
2786
+ readonly webhookUrl?: string;
2342
2787
  /**
2343
2788
  * On-disk persistence for the dashboard's call history. The dashboard
2344
2789
  * itself is in-memory, but enabling ``persist`` writes per-call records
@@ -2366,25 +2811,25 @@ interface LocalOptions {
2366
2811
  * Phone numbers are masked by default; control via
2367
2812
  * ``PATTER_LOG_REDACT_PHONE``.
2368
2813
  */
2369
- persist?: boolean | string;
2814
+ readonly persist?: boolean | string;
2370
2815
  /**
2371
2816
  * @internal — allows ``StreamHandler`` to build the default OpenAI
2372
2817
  * ``LLMLoop`` when no ``onMessage`` handler is supplied. The
2373
2818
  * ``OpenAIRealtime`` engine instance carries its own key when one is
2374
2819
  * used via ``phone.agent({ engine: new OpenAIRealtime({ apiKey }) })``.
2375
2820
  */
2376
- openaiKey?: string;
2821
+ readonly openaiKey?: string;
2377
2822
  }
2378
2823
  /** Internal shape of a guardrail (matches `Guardrail` class from `public-api.ts`). */
2379
2824
  interface Guardrail {
2380
2825
  /** Name for logging when triggered */
2381
- name: string;
2826
+ readonly name: string;
2382
2827
  /** List of terms that trigger the guardrail (case-insensitive) */
2383
- blockedTerms?: string[];
2828
+ readonly blockedTerms?: ReadonlyArray<string>;
2384
2829
  /** Custom check function — return true to block the response */
2385
- check?: (text: string) => boolean;
2830
+ readonly check?: (text: string) => boolean;
2386
2831
  /** Replacement text spoken when guardrail triggers */
2387
- replacement?: string;
2832
+ readonly replacement?: string;
2388
2833
  }
2389
2834
  /** Per-call context passed to every pipeline hook. */
2390
2835
  interface HookContext {
@@ -2493,29 +2938,41 @@ interface BackgroundAudioPlayer$1 {
2493
2938
  */
2494
2939
  /** Configuration for a local-mode voice AI agent (passed to `phone.agent({...})`). */
2495
2940
  interface AgentOptions {
2496
- systemPrompt: string;
2941
+ readonly systemPrompt: string;
2497
2942
  /**
2498
2943
  * Voice preset. When ``engine`` is provided, its ``voice`` is used unless
2499
2944
  * explicitly overridden here. Format depends on the engine:
2500
2945
  * OpenAI Realtime accepts a name (``'alloy'``, ``'echo'``, ...);
2501
2946
  * ElevenLabs ConvAI accepts a voice ID.
2502
2947
  */
2503
- voice?: string;
2948
+ readonly voice?: string;
2504
2949
  /**
2505
2950
  * LLM / Realtime model. When ``engine`` is provided, its ``model`` is used
2506
2951
  * unless explicitly overridden here.
2507
2952
  */
2508
- model?: string;
2953
+ readonly model?: string;
2509
2954
  /**
2510
2955
  * BCP-47 language code (e.g. ``'en'``, ``'it'``). Forwarded to STT (in
2511
2956
  * pipeline mode) and to the engine adapter at call time. STTConfig has its
2512
2957
  * own ``language`` field for the rare case where STT must use a different
2513
2958
  * language than the rest of the pipeline.
2514
2959
  */
2515
- language?: string;
2516
- firstMessage?: string;
2960
+ readonly language?: string;
2961
+ readonly firstMessage?: string;
2962
+ /**
2963
+ * Opt-in spoken fallback for pipeline mode when the per-turn LLM stream
2964
+ * throws (gateway-down / 120 s timeout) BEFORE any assistant text was
2965
+ * spoken. Agent-runtime providers (Hermes / OpenClaw) run tools+memory
2966
+ * internally so a turn can take 30-90 s; on failure the caller currently
2967
+ * hears SILENCE then a silent turn-end. When set to a non-empty string,
2968
+ * the SDK synthesizes and speaks this line through the normal TTS turn
2969
+ * lifecycle (subject to barge-in). ``undefined`` (default) preserves
2970
+ * today's behaviour: nothing is spoken on LLM error. Pipeline mode only.
2971
+ * Mirrors Python ``llm_error_message`` on ``Patter.agent()`` / ``Agent``.
2972
+ */
2973
+ readonly llmErrorMessage?: string;
2517
2974
  /** Tool definitions — ``Tool`` class instances from ``getpatter``. */
2518
- tools?: Array<Tool>;
2975
+ readonly tools?: ReadonlyArray<Tool>;
2519
2976
  /**
2520
2977
  * Model Context Protocol (MCP) servers to plug into this agent. Each
2521
2978
  * server is queried at call start via ``tools/list`` and its tools
@@ -2536,14 +2993,23 @@ interface AgentOptions {
2536
2993
  * call start (~50-200 ms × N servers). Future iterations may cache
2537
2994
  * the discovered list process-wide.
2538
2995
  */
2539
- mcpServers?: ReadonlyArray<MCPServerConfig>;
2996
+ readonly mcpServers?: ReadonlyArray<MCPServerConfig>;
2997
+ /**
2998
+ * Optional back-office "consult" escalation. When set, Patter auto-injects a
2999
+ * ``consult_agent`` tool (Realtime + Pipeline modes) that the in-call agent
3000
+ * can invoke to reach the caller's own orchestrator over HTTP for deeper
3001
+ * reasoning / fresh info, then speak the reply. The orchestrator stays off
3002
+ * the per-turn path — consulted only on demand. ``undefined`` (default)
3003
+ * disables it. See {@link ConsultConfig}.
3004
+ */
3005
+ readonly consult?: ConsultConfig;
2540
3006
  /**
2541
3007
  * When ``true``, ship ``systemPrompt`` to the LLM verbatim. Default
2542
3008
  * (``false``) prepends a phone-friendly preamble that instructs the
2543
3009
  * model to avoid markdown, emojis, bullet lists, and verbose replies —
2544
3010
  * the conventions live phone calls require.
2545
3011
  */
2546
- disablePhonePreamble?: boolean;
3012
+ readonly disablePhonePreamble?: boolean;
2547
3013
  /**
2548
3014
  * Acoustic echo cancellation. When `true` (pipeline mode only) the SDK
2549
3015
  * instantiates an `NlmsEchoCanceller` that subtracts the agent's own
@@ -2555,53 +3021,53 @@ interface AgentOptions {
2555
3021
  * convergence period would briefly attenuate caller speech if they
2556
3022
  * spoke before any TTS played.
2557
3023
  */
2558
- echoCancellation?: boolean;
3024
+ readonly echoCancellation?: boolean;
2559
3025
  /**
2560
3026
  * Realtime / ConvAI engine instance. When present, the agent runs in the
2561
3027
  * matching mode (``openai_realtime`` or ``elevenlabs_convai``). When absent,
2562
3028
  * pipeline mode is selected if ``stt`` and ``tts`` are provided.
2563
3029
  */
2564
- engine?: Realtime | Realtime2 | ConvAI;
3030
+ readonly engine?: Realtime | Realtime2 | ConvAI;
2565
3031
  /**
2566
3032
  * Provider mode. Normally derived from ``engine`` / ``stt`` + ``tts``. Pass
2567
3033
  * ``'pipeline'`` explicitly when building a pipeline-mode agent without
2568
3034
  * an engine instance.
2569
3035
  */
2570
- provider?: 'openai_realtime' | 'elevenlabs_convai' | 'pipeline';
3036
+ readonly provider?: 'openai_realtime' | 'elevenlabs_convai' | 'pipeline';
2571
3037
  /** Pre-instantiated STT adapter (e.g. ``new DeepgramSTT({ apiKey })``). */
2572
- stt?: STTAdapter;
3038
+ readonly stt?: STTAdapter;
2573
3039
  /** Pre-instantiated TTS adapter (e.g. ``new ElevenLabsTTS({ apiKey })``). */
2574
- tts?: TTSAdapter;
3040
+ readonly tts?: TTSAdapter;
2575
3041
  /**
2576
3042
  * Pipeline-mode LLM provider (e.g. ``new AnthropicLLM()``). When set, the
2577
3043
  * built-in LLM loop uses this provider instead of the OpenAI default.
2578
3044
  * Mutually exclusive with ``onMessage`` passed to ``serve()``. Ignored
2579
3045
  * when ``engine`` is set (realtime mode bypasses the pipeline LLM).
2580
3046
  */
2581
- llm?: LLMProvider;
3047
+ readonly llm?: LLMProvider;
2582
3048
  /** Dynamic variables for ``{placeholder}`` substitution in systemPrompt at call time. */
2583
- variables?: Record<string, string>;
3049
+ readonly variables?: Readonly<Record<string, string>>;
2584
3050
  /** Output guardrails — ``Guardrail`` class instances from ``getpatter``. */
2585
- guardrails?: Array<Guardrail>;
3051
+ readonly guardrails?: ReadonlyArray<Guardrail>;
2586
3052
  /** Pipeline hooks — intercept and transform data at each pipeline stage (pipeline mode only). */
2587
- hooks?: PipelineHooks;
3053
+ readonly hooks?: PipelineHooks;
2588
3054
  /** Text transforms applied to LLM output before TTS (pipeline mode only).
2589
3055
  * Each function receives a string and returns the transformed string.
2590
3056
  * Applied in order before the ``beforeSynthesize`` hook. */
2591
- textTransforms?: Array<(text: string) => string>;
3057
+ readonly textTransforms?: ReadonlyArray<(text: string) => string>;
2592
3058
  /** Optional server-side VAD (e.g., Silero). Pipeline mode only. */
2593
- vad?: VADProvider;
3059
+ readonly vad?: VADProvider;
2594
3060
  /** Optional pre-STT audio filter (noise cancellation). Pipeline mode only. */
2595
- audioFilter?: AudioFilter;
3061
+ readonly audioFilter?: AudioFilter;
2596
3062
  /** Optional background audio mixer (hold music, thinking cues). Pipeline mode only. */
2597
- backgroundAudio?: BackgroundAudioPlayer$1;
3063
+ readonly backgroundAudio?: BackgroundAudioPlayer$1;
2598
3064
  /**
2599
3065
  * Minimum sustained voice (ms) before treating caller audio as a barge-in
2600
3066
  * and interrupting TTS. `0` disables barge-in entirely — useful on noisy
2601
3067
  * links (ngrok tunnels, speakerphone) where the agent can hear itself.
2602
3068
  * Default: 300.
2603
3069
  */
2604
- bargeInThresholdMs?: number;
3070
+ readonly bargeInThresholdMs?: number;
2605
3071
  /**
2606
3072
  * Opt-in barge-in confirmation strategies (pipeline mode). With the
2607
3073
  * default empty array the SDK falls back to the legacy
@@ -2618,14 +3084,14 @@ interface AgentOptions {
2618
3084
  * ``MinWordsStrategy`` for the protocol and a reference
2619
3085
  * implementation.
2620
3086
  */
2621
- bargeInStrategies?: readonly BargeInStrategy[];
3087
+ readonly bargeInStrategies?: readonly BargeInStrategy[];
2622
3088
  /**
2623
3089
  * Maximum time (ms) to wait for at least one strategy to confirm a
2624
3090
  * pending barge-in before discarding the pending state and resuming
2625
3091
  * TTS. Only consulted when ``bargeInStrategies`` is non-empty.
2626
3092
  * Default: 1500.
2627
3093
  */
2628
- bargeInConfirmMs?: number;
3094
+ readonly bargeInConfirmMs?: number;
2629
3095
  /**
2630
3096
  * When ``true`` (default), ``Patter.call`` warms up the STT, TTS, and
2631
3097
  * LLM provider connections in parallel with the carrier-side
@@ -2636,7 +3102,7 @@ interface AgentOptions {
2636
3102
  * of the WebSocket bridge. Best-effort: warmup failures are logged
2637
3103
  * at debug level and never abort the call. Default: ``true``.
2638
3104
  */
2639
- prewarm?: boolean;
3105
+ readonly prewarm?: boolean;
2640
3106
  /**
2641
3107
  * When ``true`` (default since 0.6.2 in pipeline mode), ``Patter.call``
2642
3108
  * pre-renders ``firstMessage`` to TTS audio bytes during the ringing
@@ -2655,7 +3121,7 @@ interface AgentOptions {
2655
3121
  * ``Patter.call`` refuses to spawn the prewarm task and emits a warn
2656
3122
  * when ``provider !== 'pipeline'``.
2657
3123
  */
2658
- prewarmFirstMessage?: boolean;
3124
+ readonly prewarmFirstMessage?: boolean;
2659
3125
  /**
2660
3126
  * When true, the sentence chunker emits the first clause of each response
2661
3127
  * on a soft punctuation boundary (",", em-dash, en-dash) once ~40 chars
@@ -2667,38 +3133,124 @@ interface AgentOptions {
2667
3133
  * See SentenceChunker constructor for the full guard list (decimal,
2668
3134
  * currency, balanced delimiter, ellipsis).
2669
3135
  */
2670
- aggressiveFirstFlush?: boolean;
3136
+ readonly aggressiveFirstFlush?: boolean;
3137
+ /**
3138
+ * Input noise reduction for speakerphone / conference audio (OpenAI
3139
+ * Realtime mode only). `undefined` (default) omits the field entirely
3140
+ * (no reduction — today's behavior).
3141
+ *
3142
+ * - `"far_field"` — recommended for phone / speakerphone calls where
3143
+ * the mic is more than ~30 cm from the speaker.
3144
+ * - `"near_field"` — for a handset held close to the mouth.
3145
+ *
3146
+ * v1 Realtime: emitted at the top level of `session.update` as
3147
+ * `input_audio_noise_reduction: { type }`. GA Realtime (gpt-realtime-2):
3148
+ * nested under `audio.input.input_audio_noise_reduction: { type }`.
3149
+ *
3150
+ * Mirrors Python `openai_realtime_noise_reduction` on `Patter.agent()` /
3151
+ * `Agent` and `noise_reduction` on `engines.openai.Realtime`.
3152
+ */
3153
+ readonly openaiRealtimeNoiseReduction?: 'near_field' | 'far_field';
3154
+ /**
3155
+ * Turn-detection tuning for OpenAI Realtime mode. `undefined` (default)
3156
+ * keeps the adapter's current hardcoded `server_vad` / threshold `0.5` /
3157
+ * silence 300 ms settings.
3158
+ *
3159
+ * Raise {@link RealtimeTurnDetection.threshold} (`server_vad`) or switch
3160
+ * to `semantic_vad` with `eagerness: 'low'` to stop speakerphone /
3161
+ * conference noise from triggering false barge-ins.
3162
+ *
3163
+ * Mirrors Python `realtime_turn_detection` on `Patter.agent()` / `Agent`
3164
+ * and `turn_detection` on `engines.openai.Realtime`.
3165
+ */
3166
+ readonly realtimeTurnDetection?: RealtimeTurnDetection;
3167
+ /**
3168
+ * Gate the OpenAI Realtime model's response on the Whisper input
3169
+ * transcript (legacy behavior). OpenAI Realtime mode only.
3170
+ *
3171
+ * - `false` / `undefined` (default) — the speech-to-speech model responds
3172
+ * as soon as the user stops speaking (`speech_stopped`), independently
3173
+ * of the Whisper transcription. The transcript becomes a pure
3174
+ * observability side-channel (dashboard / history / `onTranscript`) and
3175
+ * never gates, triggers, or cancels the response. Reclaims ~500 ms of
3176
+ * latency because the model no longer waits for Whisper.
3177
+ * - `true` — restores the prior behavior where the response is requested
3178
+ * only after the Whisper `transcript_input` event arrives. Production
3179
+ * flows should keep the default; this is for callers that depended on
3180
+ * the old transcript-gated ordering.
3181
+ *
3182
+ * Mirrors Python `realtime_gate_response_on_transcript` on `Patter.agent()`
3183
+ * / `Agent` and `gate_response_on_transcript` on `engines.openai.Realtime`.
3184
+ */
3185
+ readonly openaiRealtimeGateResponseOnTranscript?: boolean;
3186
+ /**
3187
+ * When set, Patter prepends a native "# Preambles" guidance block to the
3188
+ * OpenAI Realtime session `instructions` so the model speaks one short,
3189
+ * action-describing sentence ("I'll check that order now.") before a tool
3190
+ * call that may take a moment, in its own voice. Most effective on
3191
+ * `gpt-realtime-2`, where preambles are first-class.
3192
+ *
3193
+ * - `undefined` / `false` (default) — no change to the prompt; the
3194
+ * instructions stay byte-identical to prior releases.
3195
+ * - `true` — Patter prepends the built-in block.
3196
+ * - `string` — used verbatim as the full preamble block (override).
3197
+ *
3198
+ * Realtime modes only; pipeline mode has its own phone preamble (see
3199
+ * `disablePhonePreamble`). Mirrors Python `tool_call_preambles` on
3200
+ * `Patter.agent()` / `Agent`.
3201
+ */
3202
+ readonly toolCallPreambles?: boolean | string;
2671
3203
  }
2672
3204
  /** Pipeline-mode message handler — given full turn context, returns the agent's reply. */
2673
3205
  type PipelineMessageHandler = (data: Record<string, unknown>) => Promise<string>;
2674
3206
  /** Options for `Patter.serve({...})`. */
2675
3207
  interface ServeOptions {
2676
- agent: AgentOptions;
2677
- port?: number;
3208
+ readonly agent: AgentOptions;
3209
+ readonly port?: number;
2678
3210
  /** When true, start a cloudflared tunnel automatically (requires `cloudflared` npm package). */
2679
- tunnel?: boolean;
2680
- onCallStart?: (data: Record<string, unknown>) => Promise<void>;
2681
- onCallEnd?: (data: Record<string, unknown>) => Promise<void>;
2682
- onTranscript?: (data: Record<string, unknown>) => Promise<void>;
3211
+ readonly tunnel?: boolean;
3212
+ readonly onCallStart?: (data: Record<string, unknown>) => Promise<void>;
3213
+ readonly onCallEnd?: (data: Record<string, unknown>) => Promise<void>;
3214
+ readonly onTranscript?: (data: Record<string, unknown>) => Promise<void>;
2683
3215
  /** Pipeline mode only — called with the user's transcript; return value is spoken.
2684
3216
  * Can also be a URL string for remote webhook/WebSocket integration. */
2685
- onMessage?: PipelineMessageHandler | string;
3217
+ readonly onMessage?: PipelineMessageHandler | string;
2686
3218
  /** Called after each turn with per-turn metrics. */
2687
- onMetrics?: (data: Record<string, unknown>) => Promise<void>;
3219
+ readonly onMetrics?: (data: Record<string, unknown>) => Promise<void>;
2688
3220
  /** When true, record calls via the Twilio Recordings API. */
2689
- recording?: boolean;
3221
+ readonly recording?: boolean;
2690
3222
  /** If set, spoken as a voicemail message when AMD detects a machine. */
2691
- voicemailMessage?: string;
3223
+ readonly voicemailMessage?: string;
2692
3224
  /** Custom pricing overrides for cost calculation. */
2693
- pricing?: Record<string, Record<string, unknown>>;
3225
+ readonly pricing?: Readonly<Record<string, Record<string, unknown>>>;
2694
3226
  /** When true (default), serve a dashboard UI at /dashboard. */
2695
- dashboard?: boolean;
3227
+ readonly dashboard?: boolean;
2696
3228
  /** Bearer token for dashboard/API authentication. */
2697
- dashboardToken?: string;
3229
+ readonly dashboardToken?: string;
3230
+ /**
3231
+ * When true, serve the dashboard (and the call-data `/api/*` routes)
3232
+ * fully OPEN — WITHOUT authentication — even when the server is
3233
+ * reachable beyond loopback (e.g. behind a tunnel or a public webhook
3234
+ * URL). **NOT RECOMMENDED on a public network** — the dashboard exposes
3235
+ * call transcripts and metadata (PII) to anyone who can reach the URL.
3236
+ *
3237
+ * Defaults to `false` (security). With the default, when the dashboard
3238
+ * is enabled, `dashboardToken` is empty, AND the server is exposed
3239
+ * beyond `127.0.0.1`, the SDK auto-generates a one-time token and mounts
3240
+ * the dashboard behind it (the startup banner prints the ready-to-use
3241
+ * URL with `?token=...`). The dashboard is always available — it just
3242
+ * requires the printed or configured token. Loopback-only local dev is
3243
+ * unchanged: served open with no token.
3244
+ *
3245
+ * For a stable token instead of the per-process auto-generated one, set
3246
+ * `dashboardToken`. Set this flag only as the deliberate escape hatch
3247
+ * for the rare case where unauthenticated public exposure is intentional.
3248
+ */
3249
+ readonly allowInsecureDashboard?: boolean;
2698
3250
  /** Path to SQLite database for dashboard persistence (not used in TS yet). */
2699
- dashboardDb?: string;
3251
+ readonly dashboardDb?: string;
2700
3252
  /** When true (default), persist dashboard data. */
2701
- dashboardPersist?: boolean;
3253
+ readonly dashboardPersist?: boolean;
2702
3254
  /**
2703
3255
  * When true (default), `serve()` calls the carrier's API on startup to
2704
3256
  * point the configured phone number's webhook URL at this server. Set
@@ -2718,7 +3270,7 @@ interface ServeOptions {
2718
3270
  * hostname is dynamic and only known at runtime — the carrier MUST be
2719
3271
  * reconfigured for inbound calls to land.
2720
3272
  */
2721
- manageWebhook?: boolean;
3273
+ readonly manageWebhook?: boolean;
2722
3274
  }
2723
3275
  /**
2724
3276
  * Normalised AMD (answering-machine detection) result emitted to
@@ -2744,8 +3296,8 @@ interface MachineDetectionResult {
2744
3296
  }
2745
3297
  /** Options for `Patter.call({...})` to place an outbound call. */
2746
3298
  interface LocalCallOptions {
2747
- to: string;
2748
- agent: AgentOptions;
3299
+ readonly to: string;
3300
+ readonly agent: AgentOptions;
2749
3301
  /**
2750
3302
  * Enable answering-machine detection. **Defaults to ``true``** — the SDK
2751
3303
  * asks Twilio (``MachineDetection=DetectMessageEnd`` + Async AMD) or
@@ -2756,7 +3308,7 @@ interface LocalCallOptions {
2756
3308
  * disable when you want to skip per-call AMD billing or you already
2757
3309
  * know the destination is a human.
2758
3310
  */
2759
- machineDetection?: boolean;
3311
+ readonly machineDetection?: boolean;
2760
3312
  /**
2761
3313
  * Called once when the carrier finishes the AMD check. Fires for both
2762
3314
  * ``human`` and ``machine`` outcomes. Combine with ``voicemailMessage``
@@ -2764,11 +3316,11 @@ interface LocalCallOptions {
2764
3316
  * fires the callback after the drop is queued). Acceptance tests use
2765
3317
  * this to mark a run INVALID when ``classification !== 'human'``.
2766
3318
  */
2767
- onMachineDetection?: (result: MachineDetectionResult) => void | Promise<void>;
3319
+ readonly onMachineDetection?: (result: MachineDetectionResult) => void | Promise<void>;
2768
3320
  /** If set, spoken as a voicemail message when AMD detects a machine. Implicitly enables ``machineDetection``. */
2769
- voicemailMessage?: string;
3321
+ readonly voicemailMessage?: string;
2770
3322
  /** Dynamic variables merged into agent.variables before call. Override agent-level variables. */
2771
- variables?: Record<string, string>;
3323
+ readonly variables?: Readonly<Record<string, string>>;
2772
3324
  /**
2773
3325
  * Ring timeout in seconds. Forwarded to Twilio as `Timeout` and to Telnyx
2774
3326
  * as `timeout_secs`. Defaults to **25 s** — the production-recommended
@@ -2776,7 +3328,7 @@ interface LocalCallOptions {
2776
3328
  * parity, or `null` to omit the parameter entirely (carrier picks its
2777
3329
  * own default).
2778
3330
  */
2779
- ringTimeout?: number | null;
3331
+ readonly ringTimeout?: number | null;
2780
3332
  /**
2781
3333
  * When `true`, block until the call reaches a terminal state and resolve
2782
3334
  * to a {@link CallResult} (`outcome` ∈ answered / voicemail / no_answer /
@@ -2790,7 +3342,7 @@ interface LocalCallOptions {
2790
3342
  *
2791
3343
  * Mirrors Python's `Patter.call(..., wait=True)`.
2792
3344
  */
2793
- wait?: boolean;
3345
+ readonly wait?: boolean;
2794
3346
  }
2795
3347
  /**
2796
3348
  * Carrier-agnostic terminal outcomes for an outbound call. `answered` means a
@@ -3136,7 +3688,7 @@ interface ElevenLabsParkedWS {
3136
3688
  /** WebSocket-based ElevenLabs TTS adapter — opt-in low-latency variant. */
3137
3689
  declare class ElevenLabsWebSocketTTS implements TTSAdapter {
3138
3690
  static readonly providerKey = "elevenlabs_ws";
3139
- readonly apiKey: string;
3691
+ private readonly apiKey;
3140
3692
  readonly voiceId: string;
3141
3693
  readonly modelId: string;
3142
3694
  readonly voiceSettings?: Record<string, unknown>;
@@ -3692,6 +4244,86 @@ interface DefineToolInput {
3692
4244
  */
3693
4245
  declare function defineTool(input: DefineToolInput): ToolDefinition;
3694
4246
 
4247
+ /**
4248
+ * Built-in ``consult`` tool — lets the in-call agent escalate to the caller's
4249
+ * own back-office agent for deeper reasoning or fresh information, then speak
4250
+ * the answer.
4251
+ *
4252
+ * This is the *dispatch + consult* pattern: Patter conducts the call (STT +
4253
+ * LLM/voice + TTS + carrier); when the in-call agent hits something it cannot
4254
+ * answer directly, it invokes this tool, which reaches the configured
4255
+ * back-office agent and returns the reply for the agent to speak. The
4256
+ * back-office agent stays off the per-turn path — consulted only on demand, so
4257
+ * ordinary turns keep their low latency.
4258
+ *
4259
+ * Two targets are supported (see {@link ConsultConfig}):
4260
+ *
4261
+ * - ``url`` — the generic webhook path: POSTs ``{ request, call_id, caller,
4262
+ * callee }`` to your endpoint and reads a ``reply`` field back.
4263
+ * - ``openaiCompatible`` — speaks an OpenAI-compatible ``/chat/completions``
4264
+ * endpoint directly (e.g. an OpenClaw agent, or vLLM / Ollama / Groq) with no
4265
+ * hand-written adapter: POSTs ``{ model, messages, user }`` and speaks
4266
+ * ``choices[0].message.content``. Use {@link openclawConsult}.
4267
+ *
4268
+ * The handler does the HTTP call itself so the per-consult timeout and auth from
4269
+ * {@link ConsultConfig} are honoured. ``config.reassurance``, when set, is
4270
+ * attached so the agent speaks a filler while the consult runs (Realtime mode
4271
+ * only).
4272
+ */
4273
+
4274
+ /**
4275
+ * Build a {@link ConsultConfig} that consults a specific OpenClaw agent directly
4276
+ * (no hand-written adapter) — the TypeScript equivalent of Python's
4277
+ * ``ConsultConfig.openclaw(...)``.
4278
+ *
4279
+ * ``agent`` is the OpenClaw agent id (e.g. ``"receptionist"``) → targets
4280
+ * ``model="openclaw/<agent>"``. An already-namespaced target (``"openclaw/x"``,
4281
+ * ``"openclaw:x"``, ``"agent:x"``) is passed through. ``allowLoopback`` defaults
4282
+ * to ``true`` when ``baseUrl`` is loopback/private (the intended co-located
4283
+ * deployment). The gateway bearer is read from ``apiKey`` or the
4284
+ * ``OPENCLAW_API_KEY`` env var (operator-grade — never logged). Sized at the
4285
+ * phone-safe 30 s default; raise only for batch-style agents, never above 30 s
4286
+ * on a live call.
4287
+ */
4288
+ declare function openclawConsult(agent: string, opts?: {
4289
+ readonly baseUrl?: string;
4290
+ readonly apiKey?: string;
4291
+ readonly timeoutMs?: number;
4292
+ readonly toolName?: string;
4293
+ readonly description?: string;
4294
+ readonly reassurance?: string | Readonly<{
4295
+ message: string;
4296
+ afterMs?: number;
4297
+ }>;
4298
+ readonly headers?: Readonly<Record<string, string>>;
4299
+ readonly allowLoopback?: boolean;
4300
+ }): ConsultConfig;
4301
+ /**
4302
+ * Return an ``on_call_end`` callback that posts the finished call's record to a
4303
+ * specific OpenClaw agent, so the brain has the record and can follow up — the
4304
+ * TypeScript equivalent of Python's ``openclaw_post_call_notifier``.
4305
+ *
4306
+ * Wire it on ``serve``:
4307
+ *
4308
+ * await phone.serve({ agent, onCallEnd: openclawPostCallNotifier('receptionist') });
4309
+ *
4310
+ * The record is POSTed to the same OpenClaw agent over its OpenAI-compatible
4311
+ * ``/chat/completions`` gateway, keyed to the call id (the ``user`` field +
4312
+ * ``x-openclaw-session-key`` header) so it lands in the SAME OpenClaw session as
4313
+ * the in-call ``consult`` turns. Fire-and-forget: any error is logged by type
4314
+ * only (never the URL / headers / key) and never thrown into teardown. Args
4315
+ * mirror {@link openclawConsult}; the bearer is read from ``apiKey`` or
4316
+ * ``OPENCLAW_API_KEY`` (operator-grade — never logged).
4317
+ */
4318
+ declare function openclawPostCallNotifier(agent: string, opts?: {
4319
+ readonly baseUrl?: string;
4320
+ readonly apiKey?: string;
4321
+ readonly timeoutMs?: number;
4322
+ readonly allowLoopback?: boolean;
4323
+ readonly includeTranscript?: boolean;
4324
+ readonly instruction?: string;
4325
+ }): (data: Record<string, unknown>) => Promise<void>;
4326
+
3695
4327
  /**
3696
4328
  * Process-wide logger used by the SDK.
3697
4329
  *
@@ -3907,6 +4539,16 @@ declare class PatterError extends Error {
3907
4539
  code?: ErrorCode;
3908
4540
  });
3909
4541
  }
4542
+ /**
4543
+ * Invalid constructor arguments, a missing required environment variable, or a
4544
+ * frozen-config constraint violation. Parity with Python's
4545
+ * ``PatterConfigError`` in ``libraries/python/getpatter/exceptions.py``.
4546
+ */
4547
+ declare class PatterConfigError extends PatterError {
4548
+ constructor(message: string, options?: {
4549
+ code?: ErrorCode;
4550
+ });
4551
+ }
3910
4552
  /** Network / WebSocket / HTTP-level connectivity failure when talking to a provider. */
3911
4553
  declare class PatterConnectionError extends PatterError {
3912
4554
  constructor(message: string, options?: {
@@ -4154,9 +4796,9 @@ declare class FallbackLLMProvider implements LLMProvider {
4154
4796
  * markers are filtered out so callers can concatenate the yielded strings
4155
4797
  * directly.
4156
4798
  */
4157
- completeStream(messages: Array<Record<string, unknown>>, tools?: Array<Record<string, unknown>> | null): AsyncGenerator<string, void, unknown>;
4799
+ completeStream(messages: Array<Record<string, unknown>>, tools?: Array<Record<string, unknown>> | null, opts?: LLMStreamOptions): AsyncGenerator<string, void, unknown>;
4158
4800
  /** Streaming entry point — yields chunks from the first provider that succeeds. */
4159
- stream(messages: Array<Record<string, unknown>>, tools?: Array<Record<string, unknown>> | null): AsyncGenerator<LLMChunk, void, unknown>;
4801
+ stream(messages: Array<Record<string, unknown>>, tools?: Array<Record<string, unknown>> | null, opts?: LLMStreamOptions): AsyncGenerator<LLMChunk, void, unknown>;
4160
4802
  private tryProviders;
4161
4803
  private markUnavailable;
4162
4804
  private startRecovery;
@@ -4269,49 +4911,49 @@ interface PatterToolOptions {
4269
4911
  * Patter instance to dial through. Must be in local mode (have a `carrier`).
4270
4912
  * The tool boots `phone.serve()` on `start()`; do not call `serve()` yourself.
4271
4913
  */
4272
- phone: Patter;
4914
+ readonly phone: Patter;
4273
4915
  /**
4274
4916
  * Default agent config used for outbound calls. Per-call overrides come from
4275
4917
  * `execute({ goal, first_message })`.
4276
4918
  */
4277
- agent?: AgentOptions;
4919
+ readonly agent?: AgentOptions;
4278
4920
  /** Tool name shown to the LLM. Default `'make_phone_call'`. */
4279
- name?: string;
4921
+ readonly name?: string;
4280
4922
  /** Tool description for the LLM. Default tuned for English assistants. */
4281
- description?: string;
4923
+ readonly description?: string;
4282
4924
  /** Default per-call timeout in seconds. Default 180. */
4283
- maxDurationSec?: number;
4925
+ readonly maxDurationSec?: number;
4284
4926
  /**
4285
4927
  * Optional pass-through for `phone.serve()`'s `recording` flag — record all
4286
4928
  * outbound calls placed via this tool.
4287
4929
  */
4288
- recording?: boolean;
4930
+ readonly recording?: boolean;
4289
4931
  }
4290
4932
  /** Args accepted by `PatterTool.execute()` (and the OpenAI/Anthropic/Hermes tool schemas). */
4291
4933
  interface PatterToolExecuteArgs {
4292
- to: string;
4293
- goal?: string;
4294
- first_message?: string;
4295
- max_duration_sec?: number;
4934
+ readonly to: string;
4935
+ readonly goal?: string;
4936
+ readonly first_message?: string;
4937
+ readonly max_duration_sec?: number;
4296
4938
  }
4297
4939
  /** Result envelope returned by `PatterTool.execute()` once the underlying call ends. */
4298
4940
  interface PatterToolResult {
4299
- call_id: string;
4300
- status: string;
4301
- duration_seconds: number;
4941
+ readonly call_id: string;
4942
+ readonly status: string;
4943
+ readonly duration_seconds: number;
4302
4944
  /**
4303
4945
  * Carrier-agnostic outcome (answered / voicemail / no_answer / busy /
4304
4946
  * failed) lifted from the SDK {@link CallResult}. Optional for backward
4305
4947
  * compatibility with any code constructing this envelope without it.
4306
4948
  */
4307
- outcome?: string;
4308
- cost_usd?: number;
4309
- transcript: Array<{
4949
+ readonly outcome?: string;
4950
+ readonly cost_usd?: number;
4951
+ readonly transcript: ReadonlyArray<Readonly<{
4310
4952
  role: string;
4311
4953
  text: string;
4312
4954
  timestamp?: number;
4313
- }>;
4314
- metrics?: Record<string, unknown> | null;
4955
+ }>>;
4956
+ readonly metrics?: Readonly<Record<string, unknown>> | null;
4315
4957
  }
4316
4958
  /** Wraps a live `Patter` instance as a tool callable from external agent frameworks. */
4317
4959
  declare class PatterTool {
@@ -4322,6 +4964,11 @@ declare class PatterTool {
4322
4964
  private readonly maxDurationSec;
4323
4965
  private readonly recording;
4324
4966
  private started;
4967
+ /** Cached in-progress (or completed) start promise so concurrent execute()
4968
+ * callers all await the same boot sequence instead of each racing into
4969
+ * phone.serve(). Reset to null on failure so callers can retry after a
4970
+ * transient error. */
4971
+ private startPromise;
4325
4972
  constructor(opts: PatterToolOptions);
4326
4973
  /** OpenAI Chat Completions / Assistants tool spec. */
4327
4974
  openaiSchema(): {
@@ -4355,8 +5002,12 @@ declare class PatterTool {
4355
5002
  * `serve()` provides here. No `onCallEnd` callback is wired: the SDK's own
4356
5003
  * per-callId completion registry resolves the result, so the user's
4357
5004
  * `onCallEnd` slot is left free.
5005
+ *
5006
+ * Idempotent and concurrency-safe: concurrent callers all await the same
5007
+ * in-progress boot instead of each racing into `phone.serve()`.
4358
5008
  */
4359
5009
  start(): Promise<void>;
5010
+ private _doStart;
4360
5011
  /** Best-effort shutdown — tear the Patter server down via `disconnect()`. */
4361
5012
  stop(): Promise<void>;
4362
5013
  /**
@@ -4608,23 +5259,23 @@ interface Transcript$6 {
4608
5259
  type TranscriptCallback$6 = (transcript: Transcript$6) => void;
4609
5260
  /** Constructor options for {@link SonioxSTT}. */
4610
5261
  interface SonioxSTTOptions$1 {
4611
- model?: SonioxModel | string;
4612
- languageHints?: string[];
4613
- languageHintsStrict?: boolean;
4614
- sampleRate?: SonioxSampleRate | number;
4615
- numChannels?: number;
4616
- enableSpeakerDiarization?: boolean;
4617
- enableLanguageIdentification?: boolean;
4618
- maxEndpointDelayMs?: number;
4619
- clientReferenceId?: string;
4620
- baseUrl?: string;
5262
+ readonly model?: SonioxModel | string;
5263
+ readonly languageHints?: readonly string[];
5264
+ readonly languageHintsStrict?: boolean;
5265
+ readonly sampleRate?: SonioxSampleRate | number;
5266
+ readonly numChannels?: number;
5267
+ readonly enableSpeakerDiarization?: boolean;
5268
+ readonly enableLanguageIdentification?: boolean;
5269
+ readonly maxEndpointDelayMs?: number;
5270
+ readonly clientReferenceId?: string;
5271
+ readonly baseUrl?: string;
4621
5272
  }
4622
5273
  /** Streaming STT adapter for Soniox's real-time WebSocket API. */
4623
5274
  declare class SonioxSTT {
4624
5275
  /** Stable pricing/dashboard key — read by stream-handler/metrics. */
4625
5276
  static readonly providerKey = "soniox";
4626
5277
  private ws;
4627
- private callbacks;
5278
+ private readonly callbacks;
4628
5279
  private final;
4629
5280
  private keepaliveTimer;
4630
5281
  private readonly apiKey;
@@ -4649,8 +5300,10 @@ declare class SonioxSTT {
4649
5300
  private emit;
4650
5301
  /** Send a binary PCM16-LE audio chunk to Soniox for transcription. */
4651
5302
  sendAudio(audio: Buffer): void;
4652
- /** Register a transcript listener (max 10 concurrent listeners). */
5303
+ /** Register a transcript listener. */
4653
5304
  onTranscript(callback: TranscriptCallback$6): void;
5305
+ /** Unregister a previously registered transcript listener. */
5306
+ offTranscript(callback: TranscriptCallback$6): void;
4654
5307
  /** Send the empty-frame stream terminator and close the WebSocket. */
4655
5308
  close(): void;
4656
5309
  }
@@ -6022,8 +6675,8 @@ interface OpenAITTSOptions {
6022
6675
  speed?: number;
6023
6676
  /**
6024
6677
  * Enable anti-aliasing LPF ahead of the 3:2 decimation. Defaults to
6025
- * ``false`` for backwards-compatibility; set to ``true`` for cleaner
6026
- * audio on sibilants / fricatives.
6678
+ * ``true`` (matches the provider default); set to ``false`` to opt out
6679
+ * for bit-exact downsample-only output.
6027
6680
  */
6028
6681
  antiAlias?: boolean;
6029
6682
  }
@@ -6344,7 +6997,7 @@ interface OpenAILLMOptions {
6344
6997
  * const llm = new openai.LLM({ apiKey: "sk-...", model: "gpt-4o-mini", temperature: 0.4 });
6345
6998
  * ```
6346
6999
  */
6347
- declare class LLM$4 extends OpenAILLMProvider {
7000
+ declare class LLM$7 extends OpenAILLMProvider {
6348
7001
  static readonly providerKey = "openai";
6349
7002
  constructor(opts?: OpenAILLMOptions);
6350
7003
  }
@@ -6455,7 +7108,7 @@ interface AnthropicLLMOptions {
6455
7108
  * const llm = new anthropic.LLM({ promptCaching: false }); // opt out of caching
6456
7109
  * ```
6457
7110
  */
6458
- declare class LLM$3 extends AnthropicLLMProvider {
7111
+ declare class LLM$6 extends AnthropicLLMProvider {
6459
7112
  static readonly providerKey = "anthropic";
6460
7113
  constructor(opts?: AnthropicLLMOptions);
6461
7114
  }
@@ -6563,7 +7216,7 @@ interface GroqLLMOptions {
6563
7216
  * const llm = new groq.LLM({ apiKey: "gsk_...", model: "llama-3.3-70b-versatile" });
6564
7217
  * ```
6565
7218
  */
6566
- declare class LLM$2 extends GroqLLMProvider {
7219
+ declare class LLM$5 extends GroqLLMProvider {
6567
7220
  static readonly providerKey = "groq";
6568
7221
  constructor(opts?: GroqLLMOptions);
6569
7222
  }
@@ -6708,7 +7361,7 @@ interface CerebrasLLMOptions {
6708
7361
  * const llm = new cerebras.LLM({ apiKey: "csk-...", model: "llama3.1-8b" });
6709
7362
  * ```
6710
7363
  */
6711
- declare class LLM$1 extends CerebrasLLMProvider {
7364
+ declare class LLM$4 extends CerebrasLLMProvider {
6712
7365
  static readonly providerKey = "cerebras";
6713
7366
  constructor(opts?: CerebrasLLMOptions);
6714
7367
  }
@@ -6790,11 +7443,365 @@ interface GoogleLLMOptions {
6790
7443
  * const llm = new google.LLM({ apiKey: "AIza...", model: "gemini-2.5-flash" });
6791
7444
  * ```
6792
7445
  */
6793
- declare class LLM extends GoogleLLMProvider {
7446
+ declare class LLM$3 extends GoogleLLMProvider {
6794
7447
  static readonly providerKey = "google";
6795
7448
  constructor(opts?: GoogleLLMOptions);
6796
7449
  }
6797
7450
 
7451
+ /**
7452
+ * Generic OpenAI-compatible LLM provider for Patter's pipeline mode.
7453
+ *
7454
+ * Drives *any* OpenAI-compatible ``/chat/completions`` endpoint — an agent
7455
+ * runtime (Hermes, OpenClaw) or a local inference gateway (Ollama, vLLM,
7456
+ * LM Studio). Patter owns the carrier + STT + turn-taking + TTS; this
7457
+ * provider turns each conversation turn into a single
7458
+ * ``POST {baseUrl}/chat/completions`` request and speaks the response.
7459
+ *
7460
+ * PARITY NOTE (internal divergence, allowed by ``sdk-parity.md``): on the
7461
+ * Python side this provider subclasses ``OpenAILLMProvider`` and merely swaps
7462
+ * the ``AsyncOpenAI`` client (passing ``timeout=`` / ``base_url=``). The TS
7463
+ * base ``OpenAILLMProvider`` is a raw-``fetch`` class with a HARDCODED 30 s
7464
+ * timeout and ``baseUrl`` exposed as a ``protected get`` rather than a
7465
+ * constructor field, so the "swap the client" trick is impossible here.
7466
+ * Instead this is a STANDALONE ``implements LLMProvider`` class (same shape as
7467
+ * {@link GroqLLMProvider} / {@link CerebrasLLMProvider}) that owns its own
7468
+ * configurable timeout and reuses {@link parseOpenAISseStream}. Observably
7469
+ * identical to Python (same 60 s / 120 s ceilings, same ``user`` field, same
7470
+ * headers); only the timeout *mechanism* differs.
7471
+ *
7472
+ * Two additions over the base OpenAI provider:
7473
+ *
7474
+ * - **Long timeout.** Agent runtimes execute tools / memory / skills before
7475
+ * replying, so a turn can take 30-90 s. The default is 60 s here (the
7476
+ * presets raise it to 120 s), REPLACING the base provider's hardcoded 30 s.
7477
+ * - **Session continuity.** Three independent, opt-in signals — each gated on
7478
+ * its own config, none coupled to another:
7479
+ * - ``sessionUserPrefix`` → emits the OpenAI ``user`` field as
7480
+ * ``` `${sessionUserPrefix}${callId}` ```. Used by runtimes that derive
7481
+ * a session from ``user`` (e.g. OpenClaw's gateway).
7482
+ * - ``sessionIdHeader`` (+ optional ``sessionIdPrefix``) → emits a per-call
7483
+ * header carrying ``` `${sessionIdPrefix}${callId}` ``` for per-call
7484
+ * session / transcript continuity on stateless runtimes that key off
7485
+ * headers (e.g. Hermes' ``X-Hermes-Session-Id``).
7486
+ * - ``sessionKeyHeader`` (+ ``sessionKey``) → emits a STATIC header for
7487
+ * long-term memory scoping (e.g. Hermes' ``X-Hermes-Session-Key``); the
7488
+ * value is the raw ``sessionKey``, never interpolated with the call id.
7489
+ * All three are OFF by default — fully backward compatible. ``sessionKey`` is
7490
+ * a credential-grade memory scope and is NEVER logged.
7491
+ *
7492
+ * Keyless gateways (Ollama / vLLM / LM Studio accept no key) are supported:
7493
+ * the ``Authorization`` header is simply omitted from the request (sending a
7494
+ * ``Bearer EMPTY`` placeholder breaks some gateways).
7495
+ */
7496
+
7497
+ /** Constructor options for {@link OpenAICompatibleLLMProvider}. */
7498
+ interface OpenAICompatibleLLMOptions {
7499
+ /**
7500
+ * Bearer token. If omitted and ``apiKeyEnv`` is given, read from that
7501
+ * environment variable. May resolve to undefined for keyless local
7502
+ * gateways — the ``Authorization`` header is then omitted entirely.
7503
+ */
7504
+ apiKey?: string;
7505
+ /**
7506
+ * Environment variable to read the bearer from when ``apiKey`` is not given
7507
+ * (e.g. ``"OPENCLAW_API_KEY"``).
7508
+ */
7509
+ apiKeyEnv?: string;
7510
+ /**
7511
+ * OpenAI-compatible base URL ending in ``/v1`` — the whole point of this
7512
+ * provider, so it is **required**. Operator-controlled config, never derived
7513
+ * from caller / transcript input.
7514
+ */
7515
+ baseUrl: string;
7516
+ /** Model / agent target — **required**. */
7517
+ model: string;
7518
+ /**
7519
+ * Per-request timeout in **seconds**. Default ``60`` (the base OpenAI
7520
+ * provider hardcodes 30 s — raised here because agent runtimes run tools
7521
+ * before replying). Converted to ``AbortSignal.timeout(timeout * 1000)``.
7522
+ */
7523
+ timeout?: number;
7524
+ /**
7525
+ * Extra headers merged into the request *after* the ``User-Agent`` so the
7526
+ * SDK attribution is not silently clobbered (a caller can still override
7527
+ * ``User-Agent`` explicitly).
7528
+ */
7529
+ extraHeaders?: Record<string, string>;
7530
+ /**
7531
+ * When set, emits the OpenAI ``user`` field as
7532
+ * ``` `${sessionUserPrefix}${callId}` ``` for per-call session continuity.
7533
+ * ``undefined`` (default) means no ``user`` field is sent. Independent of the
7534
+ * session headers below.
7535
+ */
7536
+ sessionUserPrefix?: string;
7537
+ /**
7538
+ * Optional header NAME carrying a per-call session id, e.g.
7539
+ * ``"X-Hermes-Session-Id"`` or ``"x-openclaw-session-key"``. When set AND a
7540
+ * ``callId`` is available, the header VALUE is
7541
+ * ``` `${sessionIdPrefix}${callId}` ```. ``undefined`` (default) means off.
7542
+ */
7543
+ sessionIdHeader?: string;
7544
+ /**
7545
+ * Prefix for the session-id header VALUE. Defaults to ``""`` (raw call id).
7546
+ * Only meaningful when ``sessionIdHeader`` is set.
7547
+ */
7548
+ sessionIdPrefix?: string;
7549
+ /**
7550
+ * Optional STATIC header NAME for long-term memory scoping, e.g.
7551
+ * ``"X-Hermes-Session-Key"``. Emitted with the raw ``sessionKey`` value (no
7552
+ * call-id interpolation) only when BOTH ``sessionKeyHeader`` and
7553
+ * ``sessionKey`` are set. ``undefined`` (default) means off.
7554
+ */
7555
+ sessionKeyHeader?: string;
7556
+ /**
7557
+ * Static value emitted in ``sessionKeyHeader``. Credential-grade memory
7558
+ * scope — NEVER logged. ``undefined`` (default) means the header is omitted.
7559
+ */
7560
+ sessionKey?: string;
7561
+ /** Sampling temperature [0, 2]. */
7562
+ temperature?: number;
7563
+ /** Max tokens in the assistant response (sent as ``max_completion_tokens``). */
7564
+ maxTokens?: number;
7565
+ /** OpenAI-style ``response_format`` for JSON mode / structured outputs. */
7566
+ responseFormat?: Record<string, unknown>;
7567
+ /** Whether to allow parallel tool calls. */
7568
+ parallelToolCalls?: boolean;
7569
+ /** ``"auto" | "none" | "required"`` or a specific tool object. */
7570
+ toolChoice?: string | Record<string, unknown>;
7571
+ /** Sampling seed for reproducible outputs. */
7572
+ seed?: number;
7573
+ /** Nucleus sampling cutoff in [0, 1]. */
7574
+ topP?: number;
7575
+ /** Penalty in [-2, 2] applied to repeated tokens. */
7576
+ frequencyPenalty?: number;
7577
+ /** Penalty in [-2, 2] applied to seen tokens. */
7578
+ presencePenalty?: number;
7579
+ /** Stop sequence(s). */
7580
+ stop?: string | string[];
7581
+ }
7582
+ /**
7583
+ * LLM provider for any OpenAI-compatible ``/chat/completions`` endpoint.
7584
+ *
7585
+ * Streams in the same ``{ type: "text" | "tool_call" | "usage" }`` chunk
7586
+ * format as the base OpenAI provider via the shared {@link parseOpenAISseStream}.
7587
+ */
7588
+ declare class OpenAICompatibleLLMProvider implements LLMProvider {
7589
+ /**
7590
+ * Stable pricing/dashboard key — read by stream-handler/metrics. Typed as
7591
+ * ``string`` (not the narrowed literal) so the Hermes / OpenClaw presets can
7592
+ * override it with their own key while still extending this class.
7593
+ */
7594
+ static readonly providerKey: string;
7595
+ /** Resolved bearer; undefined for keyless gateways. */
7596
+ private readonly apiKey?;
7597
+ readonly model: string;
7598
+ private readonly baseUrl;
7599
+ private readonly timeoutMs;
7600
+ private readonly extraHeaders?;
7601
+ private readonly sessionUserPrefix?;
7602
+ private readonly sessionIdHeader?;
7603
+ private readonly sessionIdPrefix?;
7604
+ private readonly sessionKeyHeader?;
7605
+ private readonly sessionKey?;
7606
+ private readonly temperature?;
7607
+ private readonly maxTokens?;
7608
+ private readonly responseFormat?;
7609
+ private readonly parallelToolCalls?;
7610
+ private readonly toolChoice?;
7611
+ private readonly seed?;
7612
+ private readonly topP?;
7613
+ private readonly frequencyPenalty?;
7614
+ private readonly presencePenalty?;
7615
+ private readonly stop?;
7616
+ constructor(options: OpenAICompatibleLLMOptions);
7617
+ /**
7618
+ * Assemble the request headers. ``User-Agent`` is set first so any
7619
+ * ``extraHeaders`` (and the per-call session headers) layer on top without
7620
+ * silently dropping the SDK attribution, and the ``Authorization`` header is
7621
+ * only added when a key is present (keyless gateways omit it).
7622
+ *
7623
+ * The two session headers are emitted INDEPENDENTLY, each gated on its own
7624
+ * config (decoupled from ``sessionUserPrefix`` and from each other):
7625
+ * - ``sessionIdHeader`` (+ ``callId``) → ``` `${sessionIdPrefix}${callId}` ```
7626
+ * - ``sessionKeyHeader`` (+ ``sessionKey``) → the static ``sessionKey`` value.
7627
+ * ``sessionKey`` is a credential-grade memory scope and is never logged.
7628
+ */
7629
+ private buildHeaders;
7630
+ /**
7631
+ * Pre-call DNS / TLS warmup for the configured endpoint. Best-effort:
7632
+ * 5 s timeout, all exceptions swallowed at debug level. The ``Authorization``
7633
+ * header is only sent when a key is present so the operator-grade bearer is
7634
+ * never echoed for keyless gateways (and the key is never logged).
7635
+ */
7636
+ warmup(): Promise<void>;
7637
+ /**
7638
+ * Build the request body. Mirrors the base OpenAI provider's sampling-kwarg
7639
+ * assembly and additionally sets ``user`` for session continuity when
7640
+ * ``sessionUserPrefix`` is set AND a ``callId`` is available — so the default
7641
+ * (prefix unset) behaviour is byte-identical to the base provider.
7642
+ */
7643
+ private buildBody;
7644
+ /** Stream Patter-format LLM chunks from the configured chat completions API. */
7645
+ stream(messages: Array<Record<string, unknown>>, tools?: Array<Record<string, unknown>> | null, opts?: LLMStreamOptions): AsyncGenerator<LLMChunk, void, unknown>;
7646
+ }
7647
+ /**
7648
+ * Public alias of {@link OpenAICompatibleLLMProvider} for the
7649
+ * ``getpatter/llm/openai-compatible`` namespace.
7650
+ *
7651
+ * @example
7652
+ * ```ts
7653
+ * import * as openaiCompatible from "getpatter/llm/openai-compatible";
7654
+ * // Ollama / vLLM / LM Studio (keyless local gateway):
7655
+ * const llm = new openaiCompatible.LLM({
7656
+ * baseUrl: "http://127.0.0.1:11434/v1",
7657
+ * model: "llama3.1",
7658
+ * });
7659
+ * ```
7660
+ */
7661
+ declare class LLM$2 extends OpenAICompatibleLLMProvider {
7662
+ static readonly providerKey = "openai_compatible";
7663
+ }
7664
+
7665
+ /**
7666
+ * Hermes agent-runtime LLM preset for Patter's pipeline mode.
7667
+ *
7668
+ * Thin preset over {@link OpenAICompatibleLLMProvider}: defaults the base URL,
7669
+ * model, env-key name, timeout, and session-continuity prefix for the Hermes
7670
+ * agent runtime so a user just writes ``phone.agent({ llm: new hermes.LLM() })``.
7671
+ *
7672
+ * Hermes runs tools / memory / skills internally before replying, so a single
7673
+ * conversation turn can take 30-90 s — hence the 120 s default timeout. Hermes
7674
+ * is stateless and keys continuity off HEADERS, not the OpenAI ``user`` field:
7675
+ * the preset sends ``X-Hermes-Session-Id: patter-call-<callId>`` on every turn
7676
+ * for per-call session / transcript continuity (on by default), and optionally
7677
+ * ``X-Hermes-Session-Key: <sessionKey>`` for long-term memory scoping when you
7678
+ * pass ``sessionKey``. (It also still emits ``user=patter-call-<callId>`` for
7679
+ * upstream-log correlation, but that is not what drives the session.)
7680
+ */
7681
+
7682
+ /** Constructor options for the Hermes ``LLM`` preset. */
7683
+ interface HermesLLMOptions {
7684
+ /** Bearer token. Falls back to ``API_SERVER_KEY`` env var when omitted. */
7685
+ apiKey?: string;
7686
+ /** Override the Hermes base URL (rarely needed). */
7687
+ baseUrl?: string;
7688
+ /** Model id. Falls back to ``API_SERVER_MODEL_NAME`` env, then ``"hermes-agent"``. */
7689
+ model?: string;
7690
+ /** Per-request timeout in seconds. Default ``120``. */
7691
+ timeout?: number;
7692
+ /**
7693
+ * Long-term memory scope. When set, emits ``X-Hermes-Session-Key`` so Hermes
7694
+ * scopes durable memory to this value across calls. ``undefined`` (default)
7695
+ * means the header is not sent. Credential-grade — never logged.
7696
+ */
7697
+ sessionKey?: string;
7698
+ /** Extra headers merged after the SDK ``User-Agent``. */
7699
+ extraHeaders?: Record<string, string>;
7700
+ /** Sampling temperature [0, 2]. */
7701
+ temperature?: number;
7702
+ /** Max tokens in the assistant response (sent as ``max_completion_tokens``). */
7703
+ maxTokens?: number;
7704
+ /** OpenAI-style ``response_format`` for JSON mode / structured outputs. */
7705
+ responseFormat?: Record<string, unknown>;
7706
+ /** Whether to allow parallel tool calls. */
7707
+ parallelToolCalls?: boolean;
7708
+ /** ``"auto" | "none" | "required"`` or a specific tool object. */
7709
+ toolChoice?: string | Record<string, unknown>;
7710
+ /** Sampling seed for reproducible outputs. */
7711
+ seed?: number;
7712
+ /** Nucleus sampling cutoff in [0, 1]. */
7713
+ topP?: number;
7714
+ /** Penalty in [-2, 2] applied to repeated tokens. */
7715
+ frequencyPenalty?: number;
7716
+ /** Penalty in [-2, 2] applied to seen tokens. */
7717
+ presencePenalty?: number;
7718
+ /** Stop sequence(s). */
7719
+ stop?: string | string[];
7720
+ }
7721
+ /**
7722
+ * Hermes agent-runtime LLM provider (OpenAI-compatible, streaming).
7723
+ *
7724
+ * @example
7725
+ * ```ts
7726
+ * import * as hermes from "getpatter/llm/hermes";
7727
+ * const llm = new hermes.LLM(); // env-defaulted, keyless OK
7728
+ * const llm = new hermes.LLM({ apiKey: "...", model: "hermes-7b" });
7729
+ * ```
7730
+ */
7731
+ declare class LLM$1 extends OpenAICompatibleLLMProvider {
7732
+ static readonly providerKey = "hermes";
7733
+ constructor(opts?: HermesLLMOptions);
7734
+ }
7735
+
7736
+ /**
7737
+ * OpenClaw agent-runtime LLM preset for Patter's pipeline mode.
7738
+ *
7739
+ * Thin preset over {@link OpenAICompatibleLLMProvider}, aligned with the
7740
+ * shipped ``openclawConsult`` builder in ``src/consult.ts``: same loopback
7741
+ * base URL (``:18789/v1``), same ``OPENCLAW_API_KEY`` env var, same
7742
+ * ``model="openclaw/<agent>"`` pass-through convention, same agent-id charset
7743
+ * rule, and the same ``x-openclaw-session-key`` session header. Takes an
7744
+ * ``agent`` id (not a raw model string), exactly like ``openclawConsult``.
7745
+ *
7746
+ * OpenClaw runs tools / memory / skills internally before replying, so a turn
7747
+ * can take 30-90 s — hence the 120 s default timeout (unlike the consult
7748
+ * preset's phone-safe 30 s filler default; here the runtime IS the per-turn
7749
+ * brain, not an on-demand escalation). It keys sessions off BOTH the OpenAI
7750
+ * ``user`` field and the ``x-openclaw-session-key`` header, so the preset
7751
+ * enables both for one runtime session per phone call.
7752
+ */
7753
+
7754
+ /** Constructor options for the OpenClaw ``LLM`` preset. */
7755
+ interface OpenClawLLMOptions {
7756
+ /**
7757
+ * OpenClaw agent id (e.g. ``"receptionist"``). Mapped to
7758
+ * ``model="openclaw/<agent>"``; an already-namespaced id (``"openclaw/x"``,
7759
+ * ``"agent:x"``) is passed through unchanged. **Required.**
7760
+ */
7761
+ agent: string;
7762
+ /** Override the OpenClaw base URL (rarely needed). */
7763
+ baseUrl?: string;
7764
+ /** Bearer token. Falls back to ``OPENCLAW_API_KEY`` env var when omitted. */
7765
+ apiKey?: string;
7766
+ /** Per-request timeout in seconds. Default ``120``. */
7767
+ timeout?: number;
7768
+ /** Extra headers merged after the SDK ``User-Agent``. */
7769
+ extraHeaders?: Record<string, string>;
7770
+ /** Sampling temperature [0, 2]. */
7771
+ temperature?: number;
7772
+ /** Max tokens in the assistant response (sent as ``max_completion_tokens``). */
7773
+ maxTokens?: number;
7774
+ /** OpenAI-style ``response_format`` for JSON mode / structured outputs. */
7775
+ responseFormat?: Record<string, unknown>;
7776
+ /** Whether to allow parallel tool calls. */
7777
+ parallelToolCalls?: boolean;
7778
+ /** ``"auto" | "none" | "required"`` or a specific tool object. */
7779
+ toolChoice?: string | Record<string, unknown>;
7780
+ /** Sampling seed for reproducible outputs. */
7781
+ seed?: number;
7782
+ /** Nucleus sampling cutoff in [0, 1]. */
7783
+ topP?: number;
7784
+ /** Penalty in [-2, 2] applied to repeated tokens. */
7785
+ frequencyPenalty?: number;
7786
+ /** Penalty in [-2, 2] applied to seen tokens. */
7787
+ presencePenalty?: number;
7788
+ /** Stop sequence(s). */
7789
+ stop?: string | string[];
7790
+ }
7791
+ /**
7792
+ * OpenClaw agent-runtime LLM provider (OpenAI-compatible, streaming).
7793
+ *
7794
+ * @example
7795
+ * ```ts
7796
+ * import * as openclaw from "getpatter/llm/openclaw";
7797
+ * const llm = new openclaw.LLM({ agent: "receptionist" }); // reads OPENCLAW_API_KEY
7798
+ * ```
7799
+ */
7800
+ declare class LLM extends OpenAICompatibleLLMProvider {
7801
+ static readonly providerKey = "openclaw";
7802
+ constructor(opts: OpenClawLLMOptions);
7803
+ }
7804
+
6798
7805
  /**
6799
7806
  * Silero VAD provider.
6800
7807
  *
@@ -6815,14 +7822,14 @@ declare const SUPPORTED_SAMPLE_RATES: readonly [8000, 16000];
6815
7822
  type SileroSampleRate = (typeof SUPPORTED_SAMPLE_RATES)[number];
6816
7823
  /** Options accepted by {@link SileroVAD.load}. */
6817
7824
  interface SileroVADOptions {
6818
- minSpeechDuration?: number;
6819
- minSilenceDuration?: number;
6820
- prefixPaddingDuration?: number;
6821
- activationThreshold?: number;
6822
- deactivationThreshold?: number;
6823
- sampleRate?: SileroSampleRate;
6824
- forceCpu?: boolean;
6825
- onnxFilePath?: string;
7825
+ readonly minSpeechDuration?: number;
7826
+ readonly minSilenceDuration?: number;
7827
+ readonly prefixPaddingDuration?: number;
7828
+ readonly activationThreshold?: number;
7829
+ readonly deactivationThreshold?: number;
7830
+ readonly sampleRate?: SileroSampleRate;
7831
+ readonly forceCpu?: boolean;
7832
+ readonly onnxFilePath?: string;
6826
7833
  }
6827
7834
  /**
6828
7835
  * Minimal structural type for the subset of `onnxruntime-node` we depend on.
@@ -6861,6 +7868,8 @@ declare class SileroVAD implements VADProvider {
6861
7868
  private speechThresholdDuration;
6862
7869
  private silenceThresholdDuration;
6863
7870
  private closed;
7871
+ /** Transitions produced in the current processFrame call but not yet returned. */
7872
+ private eventQueue;
6864
7873
  private constructor();
6865
7874
  /**
6866
7875
  * Load the Silero VAD model.
@@ -6945,9 +7954,9 @@ declare class SileroVAD implements VADProvider {
6945
7954
  interface DeepFilterNetOptions {
6946
7955
  /** Absolute path to a DeepFilterNet ONNX model. If omitted, the filter
6947
7956
  * logs a warning and becomes a pass-through. */
6948
- modelPath?: string;
7957
+ readonly modelPath?: string;
6949
7958
  /** When true, disable the pass-through warning (used by tests). */
6950
- silenceWarnings?: boolean;
7959
+ readonly silenceWarnings?: boolean;
6951
7960
  }
6952
7961
  /** OSS noise-suppression filter backed by a DeepFilterNet ONNX model. */
6953
7962
  declare class DeepFilterNetFilter implements AudioFilter {
@@ -7208,6 +8217,36 @@ declare class OpenAIRealtime2Adapter extends OpenAIRealtimeAdapter {
7208
8217
  * artefact and well below the GA VAD's 300 ms prefix-padding window.
7209
8218
  */
7210
8219
  private transcodeInboundMulaw8ToPcm24;
8220
+ /**
8221
+ * Log-only safety net for issue #154. The GA server echoes the *effective*
8222
+ * session config in `session.updated`; we request `audio/pcm` @ 24 kHz and
8223
+ * transcode PCM24→mulaw8 ourselves (see
8224
+ * `transcodeOutboundPcm24ToMulaw8Buffer`). If a future GA schema change ever
8225
+ * made the server return a different output format, that transcode — which
8226
+ * assumes PCM16-LE @ 24 kHz — would silently corrupt audio, exactly the
8227
+ * v1-beta failure mode #154 fixed. Warn so the drift surfaces in logs instead
8228
+ * of as static. Never gates audio.
8229
+ */
8230
+ private warnIfOutputFormatUnexpected;
8231
+ /**
8232
+ * Shared audio-delta translation helper. Transcodes a GA
8233
+ * `response.output_audio.delta` payload (base64 PCM-16-LE 24 kHz)
8234
+ * into mulaw 8 kHz and splits the result into 160-byte (20 ms) frames,
8235
+ * dispatching one synthetic `response.audio.delta` event per frame.
8236
+ *
8237
+ * Called from BOTH the `connect()` shim and the `adoptWebSocket()` shim
8238
+ * so that warm-path (prewarm/adopted) calls receive identical transcoding
8239
+ * to cold-path calls. Without this, adopted sockets forwarded raw PCM-24
8240
+ * to Twilio/Telnyx, producing garbled or silent audio on every warm call.
8241
+ *
8242
+ * @param parsed - The parsed GA event object (type already checked to be
8243
+ * `response.output_audio.delta` with a string `delta`).
8244
+ * @param handler - The downstream message listener to dispatch each frame to.
8245
+ * @param rest - Extra arguments forwarded from the original `message` event.
8246
+ * @returns `true` if frames were dispatched (caller should return early),
8247
+ * `false` if the resampler is still warming up (zero output bytes).
8248
+ */
8249
+ private translateGaAudioDelta;
7211
8250
  /**
7212
8251
  * Base64 PCM-16-LE 24 kHz → Base64 mulaw 8 kHz. Used by the WS
7213
8252
  * translation shim on each `response.output_audio.delta`. The stateful
@@ -7217,6 +8256,23 @@ declare class OpenAIRealtime2Adapter extends OpenAIRealtimeAdapter {
7217
8256
  */
7218
8257
  private transcodeOutboundPcm24ToMulaw8Buffer;
7219
8258
  sendFirstMessage(text: string): Promise<void>;
8259
+ /**
8260
+ * Speak a short reassurance filler WITHOUT injecting a `role:user` turn.
8261
+ *
8262
+ * GA-shape sibling of {@link sendFirstMessage} (and override of the base v1
8263
+ * {@link OpenAIRealtimeAdapter.sendReassurance}): a bare `response.create`
8264
+ * carrying explicit `instructions` so the filler is the assistant's own
8265
+ * in-band audio. No `conversation.item.create` with `role:"user"` is
8266
+ * emitted, so the transcript shows no phantom caller line. The GA endpoint
8267
+ * rejects `response.modalities` and does not inherit `audio.output.voice`
8268
+ * for an explicit `response.create`, so — exactly as in
8269
+ * {@link sendFirstMessage} — we send `output_modalities` and re-inject the
8270
+ * voice. Fillers must not imply success or failure.
8271
+ *
8272
+ * Mirrors Python `OpenAIRealtime2Adapter.send_reassurance` in
8273
+ * `providers/openai_realtime_2.py`.
8274
+ */
8275
+ sendReassurance(text: string): Promise<void>;
7220
8276
  }
7221
8277
 
7222
8278
  /**
@@ -7541,7 +8597,7 @@ declare class ChatContext {
7541
8597
  */
7542
8598
 
7543
8599
  /** Valid DTMF tone values (keypad characters). */
7544
- declare const DTMF_EVENTS: readonly ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "*", "#", "A", "B", "C", "D"];
8600
+ declare const DTMF_EVENTS: readonly ["1", "2", "3", "4", "5", "6", "7", "8", "9", "0", "*", "#", "A", "B", "C", "D"];
7545
8601
  /** Single DTMF tone value (a member of `DTMF_EVENTS`). */
7546
8602
  type DtmfEvent = (typeof DTMF_EVENTS)[number];
7547
8603
  /** Join DTMF events into a space-separated debug string. */
@@ -8030,8 +9086,10 @@ declare class TelnyxSTT {
8030
9086
  connect(): Promise<void>;
8031
9087
  /** Send a binary PCM16 audio chunk; emits the WAV header on the first call. */
8032
9088
  sendAudio(audio: Buffer): void;
8033
- /** Register a transcript listener (max 10 concurrent listeners). */
9089
+ /** Register a transcript listener. */
8034
9090
  onTranscript(callback: TranscriptCallback): void;
9091
+ /** Unregister a previously-registered transcript listener. */
9092
+ offTranscript(callback: TranscriptCallback): void;
8035
9093
  /** Close the streaming WebSocket. */
8036
9094
  close(): void;
8037
9095
  }
@@ -8149,4 +9207,4 @@ interface CallEvent {
8149
9207
  readonly direction?: string;
8150
9208
  }
8151
9209
 
8152
- export { type AgentOptions, type AgentState, AllProvidersFailedError, type AnthropicConversion, LLM$3 as AnthropicLLM, type AnthropicLLMOptions, type AnthropicMessage, AssemblyAIEncoding, AssemblyAIModel, STT$1 as AssemblyAISTT, type AssemblyAISTTOptions, type AudioConfig, type AudioSource, AuthenticationError, type BackgroundAudioOptions, BackgroundAudioPlayer, type EvaluateContext as BargeInEvaluateContext, type BargeInStrategy, BuiltinAudioClip, type BuiltinAudioClipName, type BuiltinPcmSource, type CallControl, type CallEvent, type CallEventHandler, type CallMetrics, CallMetricsAccumulator, type CallOutcome, type CallRecord, type CallResult, type CarrierKind, type CartesiaEncoding, STT$3 as CartesiaSTT, type CartesiaSTTOptions, TTS$3 as CartesiaTTS, CartesiaTTSModel, type CartesiaTTSOptions, CartesiaTTSVoiceMode, LLM$1 as CerebrasLLM, type CerebrasLLMOptions, ChatContext, type ChatMessage, type ChatRole, CloudflareTunnel, type ConversationStateSnapshot, type CostBreakdown, DEFAULT_MIN_SENTENCE_LEN, DEFAULT_PRICING, DTMF_EVENTS, DeepFilterNetFilter, type DeepFilterNetOptions, DeepgramModel, STT$6 as DeepgramSTT, type DeepgramSTTOptions, DefaultToolExecutor, type DefaultToolExecutorOptions, type DefineToolInput, type DtmfEvent, ConvAI as ElevenLabsConvAI, ElevenLabsConvAIAdapter, type ConvAIOptions as ElevenLabsConvAIOptions, ElevenLabsModel, ElevenLabsOutputFormat, ElevenLabsTTS as ElevenLabsRestTTS, TTS$6 as ElevenLabsTTS, type ElevenLabsTTSOptions, type ElevenLabsWebSocketOptions, TTS$5 as ElevenLabsWebSocketTTS, type EouTrigger, ErrorCode, EventBus, FallbackLLMProvider, type FallbackLLMProviderOptions, type FilePcmSource, GEMINI_DEFAULT_INPUT_SR, GEMINI_DEFAULT_OUTPUT_SR, GeminiLiveAdapter, type GeminiLiveEventHandler, LLM as GoogleLLM, type GoogleLLMOptions, LLM$2 as GroqLLM, type GroqLLMOptions, Guardrail$1 as Guardrail, type GuardrailOptions, type HookContext, IVRActivity, type IVRActivityOptions, type IVRToolDefinition, type IncomingMessage, type InitTracingOptions, TTS as InworldTTS, type InworldTTSOptions, type JobCallback, KrispFrameDuration, KrispSampleRate, KrispVivaFilter, type KrispVivaFilterOptions, type LLMChunk, LLMLoop, type LLMProvider, LMNTAudioFormat, LMNTModel, LMNTSampleRate, TTS$1 as LMNTTTS, type LMNTTTSOptions, type LatencyBreakdown, type LocalCallOptions, type LocalConfig, type LocalOptions, type Logger, type LoopCallback, type MessageHandler, MetricsStore, MinWordsStrategy, type MinWordsStrategyOptions, type ModelPricing, Ngrok, LLM$4 as OpenAILLM, type OpenAILLMOptions, OpenAILLMProvider, type OpenAIMessage, Realtime as OpenAIRealtime, Realtime2 as OpenAIRealtime2, OpenAIRealtime2Adapter, type Realtime2Options as OpenAIRealtime2Options, OpenAIRealtimeAdapter, OpenAIRealtimeAudioFormat, OpenAIRealtimeModel, type RealtimeOptions as OpenAIRealtimeOptions, OpenAIRealtimeVADType, TTS$4 as OpenAITTS, type OpenAITTSOptions, STT$4 as OpenAITranscribeSTT, type OpenAITranscribeSTTOptions, OpenAITranscriptionModel, OpenAIVoice, PRICING_LAST_UPDATED, PRICING_VERSION, type ParamSpec, PartialStreamError, Patter, PatterConnectionError, PatterError, type PatterEventType, PatterTool, type PatterToolExecuteArgs, type PatterToolOptions, type PatterToolResult, PcmCarry, PipelineHookExecutor, type PipelineHooks, type PipelineMessageHandler, Carrier as Plivo, PlivoAdapter, type PlivoCarrierOptions, type InitiateCallOptions as PlivoInitiateCallOptions, type InitiateCallResult as PlivoInitiateCallResult, PricingUnit, type PricingUnitValue, type ProviderPricing, ProvisionError, RateLimitError, type RawPcmSource, type RealtimeConfig, RemoteMessageHandler, RimeAudioFormat, RimeModel, TTS$2 as RimeTTS, type RimeTTSOptions, SPAN_BARGEIN, SPAN_CALL, SPAN_ENDPOINT, SPAN_LLM, SPAN_STT, SPAN_TOOL, SPAN_TTS, type SSEEvent, type STTConfig, type ScheduleHandle, SentenceChunker, type ServeOptions, type SilenceCallback, type SileroSampleRate, SileroVAD, type SileroVADOptions, STT$2 as SonioxSTT, type SonioxSTTOptions$1 as SonioxSTTOptions, type Span, type SpeechEventCallback, SpeechEvents, SpeechmaticsAudioEncoding, SpeechmaticsOperatingPoint, STT as SpeechmaticsSTT, type SpeechmaticsSTTOptions, SpeechmaticsSampleRate, SpeechmaticsServerMessage, TurnDetectionMode as SpeechmaticsTurnDetectionMode, StatefulResampler, type StatefulResamplerOptions, Static as StaticTunnel, type TTSConfig, Carrier$1 as Telnyx, TelnyxAdapter, type TelnyxCarrierOptions, type ConfigureNumberOptions as TelnyxConfigureNumberOptions, type EndCallOptions as TelnyxEndCallOptions, type InitiateCallOptions$1 as TelnyxInitiateCallOptions, type InitiateCallResult$1 as TelnyxInitiateCallResult, type ProvisionNumberOptions as TelnyxProvisionNumberOptions, type ProvisionNumberResult as TelnyxProvisionNumberResult, TelnyxSTT, TelnyxSTTInputFormat, TelnyxSTTSampleRate, type Transcript as TelnyxSTTTranscript, TelnyxTTS, TelnyxTTSSampleRate, TelnyxTTSVoice, type TelnyxTranscriptionEngine, TestSession, TfidfLoopDetector, type TfidfLoopDetectorOptions, Tool, type ToolDefinition, type ToolExecutor, type ToolHandler, type ToolOptions, type TunnelHandle, type TurnMetrics, Carrier$2 as Twilio, TwilioAdapter, type TwilioAdapterOptions, type TwilioCarrierOptions, type ConfigureNumberOptions$1 as TwilioConfigureNumberOptions, type InitiateCallOptions$2 as TwilioInitiateCallOptions, type InitiateCallResult$2 as TwilioInitiateCallResult, type ProvisionNumberOptions$1 as TwilioProvisionNumberOptions, type ProvisionNumberResult$1 as TwilioProvisionNumberResult, ULTRAVOX_DEFAULT_API_BASE, ULTRAVOX_DEFAULT_SR, type UltravoxEventHandler, UltravoxRealtimeAdapter, type UserState, STT$5 as WhisperSTT, type WhisperSTTOptions, assemblyai, builtinClipPath, calculateRealtimeCost, calculateSttCost, calculateTelephonyCost, calculateTtsCost, callsToCsv, callsToJson, cartesia, createResampler16kTo8k, createResampler24kTo16k, createResampler24kTo8k, createResampler8kTo16k, deepgram, defineTool, elevenlabs, evaluateStrategies as evaluateBargeInStrategies, filterEmoji, filterForTTS, filterMarkdown, formatDtmf, geminiLive, getLogger, guardrail, initTracing, isRemoteUrl, isTracingEnabled, isWebSocketUrl, lmnt, makeAuthMiddleware, mergePricing, mixPcm, mountApi, mountDashboard, mulawToPcm16, notifyDashboard, openaiTts, pcm16ToMulaw, resample16kTo8k, resample24kTo16k, resample8kTo16k, resamplePcm, resetStrategies as resetBargeInStrategies, rime, scheduleCron, scheduleInterval, scheduleOnce, selectSoundFromList, setLogger, soniox, speechmatics, startSpan, startTunnel, tool, ultravox, whisper };
9210
+ export { type AgentOptions, type AgentState, AllProvidersFailedError, type AnthropicConversion, LLM$6 as AnthropicLLM, type AnthropicLLMOptions, type AnthropicMessage, AssemblyAIEncoding, AssemblyAIModel, STT$1 as AssemblyAISTT, type AssemblyAISTTOptions, type AudioConfig, type AudioSource, AuthenticationError, type BackgroundAudioOptions, BackgroundAudioPlayer, type EvaluateContext as BargeInEvaluateContext, type BargeInStrategy, BuiltinAudioClip, type BuiltinAudioClipName, type BuiltinPcmSource, type CallControl, type CallEvent, type CallEventHandler, type CallMetrics, CallMetricsAccumulator, type CallOutcome, type CallRecord, type CallResult, type CarrierKind, type CartesiaEncoding, STT$3 as CartesiaSTT, type CartesiaSTTOptions, TTS$3 as CartesiaTTS, CartesiaTTSModel, type CartesiaTTSOptions, CartesiaTTSVoiceMode, LLM$4 as CerebrasLLM, type CerebrasLLMOptions, ChatContext, type ChatMessage, type ChatRole, CloudflareTunnel, type ConsultConfig, type ConversationStateSnapshot, type CostBreakdown, DEFAULT_MIN_SENTENCE_LEN, DEFAULT_PRICING, DTMF_EVENTS, DeepFilterNetFilter, type DeepFilterNetOptions, DeepgramModel, STT$6 as DeepgramSTT, type DeepgramSTTOptions, DefaultToolExecutor, type DefaultToolExecutorOptions, type DefineToolInput, type DtmfEvent, ConvAI as ElevenLabsConvAI, ElevenLabsConvAIAdapter, type ConvAIOptions as ElevenLabsConvAIOptions, ElevenLabsModel, ElevenLabsOutputFormat, ElevenLabsTTS as ElevenLabsRestTTS, TTS$6 as ElevenLabsTTS, type ElevenLabsTTSOptions, type ElevenLabsWebSocketOptions, TTS$5 as ElevenLabsWebSocketTTS, type EouTrigger, ErrorCode, EventBus, FallbackLLMProvider, type FallbackLLMProviderOptions, type FilePcmSource, GEMINI_DEFAULT_INPUT_SR, GEMINI_DEFAULT_OUTPUT_SR, GeminiLiveAdapter, type GeminiLiveEventHandler, LLM$3 as GoogleLLM, type GoogleLLMOptions, LLM$5 as GroqLLM, type GroqLLMOptions, Guardrail$1 as Guardrail, type GuardrailOptions, LLM$1 as HermesLLM, type HermesLLMOptions, type HookContext, IVRActivity, type IVRActivityOptions, type IVRToolDefinition, type IncomingMessage, type InitTracingOptions, TTS as InworldTTS, type InworldTTSOptions, type JobCallback, KrispFrameDuration, KrispSampleRate, KrispVivaFilter, type KrispVivaFilterOptions, type LLMChunk, LLMLoop, type LLMProvider, LMNTAudioFormat, LMNTModel, LMNTSampleRate, TTS$1 as LMNTTTS, type LMNTTTSOptions, type LatencyBreakdown, type LocalCallOptions, type LocalConfig, type LocalOptions, type Logger, type LoopCallback, type MessageHandler, MetricsStore, MinWordsStrategy, type MinWordsStrategyOptions, type ModelPricing, Ngrok, type OpenAICompatibleConsult, LLM$2 as OpenAICompatibleLLM, type OpenAICompatibleLLMOptions, OpenAICompatibleLLMProvider, LLM$7 as OpenAILLM, type OpenAILLMOptions, OpenAILLMProvider, type OpenAIMessage, Realtime as OpenAIRealtime, Realtime2 as OpenAIRealtime2, OpenAIRealtime2Adapter, type Realtime2Options as OpenAIRealtime2Options, OpenAIRealtimeAdapter, OpenAIRealtimeAudioFormat, OpenAIRealtimeModel, type RealtimeOptions as OpenAIRealtimeOptions, OpenAIRealtimeVADType, TTS$4 as OpenAITTS, type OpenAITTSOptions, STT$4 as OpenAITranscribeSTT, type OpenAITranscribeSTTOptions, OpenAITranscriptionModel, OpenAIVoice, LLM as OpenClawLLM, type OpenClawLLMOptions, PRICING_LAST_UPDATED, PRICING_VERSION, type ParamSpec, PartialStreamError, Patter, PatterConfigError, PatterConnectionError, PatterError, type PatterEventType, PatterTool, type PatterToolExecuteArgs, type PatterToolOptions, type PatterToolResult, PcmCarry, PipelineHookExecutor, type PipelineHooks, type PipelineMessageHandler, Carrier as Plivo, PlivoAdapter, type PlivoCarrierOptions, type InitiateCallOptions as PlivoInitiateCallOptions, type InitiateCallResult as PlivoInitiateCallResult, PricingUnit, type PricingUnitValue, type ProviderPricing, ProvisionError, RateLimitError, type RawPcmSource, type RealtimeConfig, type RealtimeTurnDetection, RemoteMessageHandler, RimeAudioFormat, RimeModel, TTS$2 as RimeTTS, type RimeTTSOptions, SPAN_BARGEIN, SPAN_CALL, SPAN_ENDPOINT, SPAN_LLM, SPAN_STT, SPAN_TOOL, SPAN_TTS, type SSEEvent, type STTConfig, type ScheduleHandle, SentenceChunker, type ServeOptions, type SilenceCallback, type SileroSampleRate, SileroVAD, type SileroVADOptions, STT$2 as SonioxSTT, type SonioxSTTOptions$1 as SonioxSTTOptions, type Span, type SpeechEventCallback, SpeechEvents, SpeechmaticsAudioEncoding, SpeechmaticsOperatingPoint, STT as SpeechmaticsSTT, type SpeechmaticsSTTOptions, SpeechmaticsSampleRate, SpeechmaticsServerMessage, TurnDetectionMode as SpeechmaticsTurnDetectionMode, StatefulResampler, type StatefulResamplerOptions, Static as StaticTunnel, type TTSConfig, Carrier$1 as Telnyx, TelnyxAdapter, type TelnyxCarrierOptions, type ConfigureNumberOptions as TelnyxConfigureNumberOptions, type EndCallOptions as TelnyxEndCallOptions, type InitiateCallOptions$1 as TelnyxInitiateCallOptions, type InitiateCallResult$1 as TelnyxInitiateCallResult, type ProvisionNumberOptions as TelnyxProvisionNumberOptions, type ProvisionNumberResult as TelnyxProvisionNumberResult, TelnyxSTT, TelnyxSTTInputFormat, TelnyxSTTSampleRate, type Transcript as TelnyxSTTTranscript, TelnyxTTS, TelnyxTTSSampleRate, TelnyxTTSVoice, type TelnyxTranscriptionEngine, TestSession, TfidfLoopDetector, type TfidfLoopDetectorOptions, Tool, type ToolDefinition, type ToolExecutor, type ToolHandler, type ToolOptions, type TunnelHandle, type TurnMetrics, Carrier$2 as Twilio, TwilioAdapter, type TwilioAdapterOptions, type TwilioCarrierOptions, type ConfigureNumberOptions$1 as TwilioConfigureNumberOptions, type InitiateCallOptions$2 as TwilioInitiateCallOptions, type InitiateCallResult$2 as TwilioInitiateCallResult, type ProvisionNumberOptions$1 as TwilioProvisionNumberOptions, type ProvisionNumberResult$1 as TwilioProvisionNumberResult, ULTRAVOX_DEFAULT_API_BASE, ULTRAVOX_DEFAULT_SR, type UltravoxEventHandler, UltravoxRealtimeAdapter, type UserState, STT$5 as WhisperSTT, type WhisperSTTOptions, assemblyai, builtinClipPath, calculateRealtimeCost, calculateSttCost, calculateTelephonyCost, calculateTtsCost, callsToCsv, callsToJson, cartesia, createResampler16kTo8k, createResampler24kTo16k, createResampler24kTo8k, createResampler8kTo16k, deepgram, defineTool, elevenlabs, evaluateStrategies as evaluateBargeInStrategies, filterEmoji, filterForTTS, filterMarkdown, formatDtmf, geminiLive, getLogger, guardrail, initTracing, isRemoteUrl, isTracingEnabled, isWebSocketUrl, lmnt, makeAuthMiddleware, mergePricing, mixPcm, mountApi, mountDashboard, mulawToPcm16, notifyDashboard, openaiTts, openclawConsult, openclawPostCallNotifier, pcm16ToMulaw, resample16kTo8k, resample24kTo16k, resample8kTo16k, resamplePcm, resetStrategies as resetBargeInStrategies, rime, scheduleCron, scheduleInterval, scheduleOnce, selectSoundFromList, setLogger, soniox, speechmatics, startSpan, startTunnel, tool, ultravox, whisper };