getpatter 0.6.3 → 0.6.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.ts CHANGED
@@ -189,8 +189,17 @@ declare class SpeechEvents {
189
189
  *
190
190
  * Wraps `wss://api.openai.com/v1/realtime` and exposes the unified
191
191
  * Patter realtime contract (`connect / sendAudio / onEvent / close`) on
192
- * {@link OpenAIRealtimeAdapter}. Audio negotiation defaults to
193
- * `g711_ulaw` so traffic flows through Twilio/Telnyx without transcoding.
192
+ * {@link OpenAIRealtimeAdapter}.
193
+ *
194
+ * NOTE (issue #154): this class is no longer instantiated directly for the
195
+ * telephony bridge. OpenAI deprecated the Beta Realtime API, so its flat
196
+ * `output_audio_format: g711_ulaw` session shape is ignored by GA models —
197
+ * the server falls back to PCM16 @ 24 kHz, which this adapter would forward to
198
+ * Twilio framed as 8 kHz mulaw (static + broken STT). `buildAIAdapter` in
199
+ * `server.ts` now routes BOTH the `OpenAIRealtime` and `OpenAIRealtime2`
200
+ * engines through {@link OpenAIRealtime2Adapter} (GA session shape + internal
201
+ * PCM24→mulaw8 transcode). This class is retained as the shared base class
202
+ * that `OpenAIRealtime2Adapter` extends.
194
203
  */
195
204
 
196
205
  /**
@@ -292,6 +301,46 @@ interface OpenAIRealtimeOptions {
292
301
  * Has no effect on models that don't support the `reasoning` field.
293
302
  */
294
303
  reasoningEffort?: 'minimal' | 'low' | 'medium' | 'high';
304
+ /**
305
+ * Input noise reduction for speakerphone / conference audio. `undefined`
306
+ * (default) omits the field entirely (no reduction — today's behavior).
307
+ * `"far_field"` is recommended for phone / speakerphone calls;
308
+ * `"near_field"` for a handset close to the mouth.
309
+ *
310
+ * v1 wire shape: emitted at the top level of `session.update` as
311
+ * `input_audio_noise_reduction: { type }`. The GA adapter
312
+ * (`OpenAIRealtime2Adapter`) nests it under `audio.input` instead.
313
+ *
314
+ * Mirrors Python `noise_reduction` on `OpenAIRealtimeAdapter`.
315
+ */
316
+ noiseReduction?: 'near_field' | 'far_field';
317
+ /**
318
+ * Turn-detection tuning. `undefined` (default) keeps the adapter's current
319
+ * hardcoded `server_vad` / threshold `0.5` / silence 300 ms settings.
320
+ * Raise `threshold` or switch to `semantic_vad` with `eagerness: 'low'` to
321
+ * stop speakerphone / conference noise from triggering false barge-ins.
322
+ *
323
+ * Mirrors Python `turn_detection` on `OpenAIRealtimeAdapter` and
324
+ * `turn_detection` on the engine marker `engines.openai.Realtime`.
325
+ */
326
+ turnDetection?: RealtimeTurnDetection;
327
+ /**
328
+ * Gate the model's response on the Whisper transcript (legacy behavior).
329
+ *
330
+ * `false` (default) — the stream handler requests the response on
331
+ * `speech_stopped`, independently of the Whisper `transcript_input` event.
332
+ * The transcript is display-only (dashboard / history / `onTranscript`).
333
+ * `true` — the stream handler requests the response only after the
334
+ * `transcript_input` event passes the hallucination filter (prior
335
+ * behavior).
336
+ *
337
+ * The adapter itself does not act on this flag — it is read by the stream
338
+ * handler via {@link OpenAIRealtimeAdapter.getGateResponseOnTranscript} to
339
+ * decide WHEN to call {@link OpenAIRealtimeAdapter.requestResponse}.
340
+ *
341
+ * Mirrors Python `gate_response_on_transcript` on `OpenAIRealtimeAdapter`.
342
+ */
343
+ gateResponseOnTranscript?: boolean;
295
344
  }
296
345
  /** Realtime WebSocket adapter for OpenAI's `gpt-realtime` family. */
297
346
  declare class OpenAIRealtimeAdapter {
@@ -314,12 +363,22 @@ declare class OpenAIRealtimeAdapter {
314
363
  private currentResponseAudioMs;
315
364
  private currentResponseFirstAudioAt;
316
365
  protected readonly options: OpenAIRealtimeOptions;
366
+ private readonly gateResponseOnTranscript;
317
367
  constructor(apiKey: string, model?: string, voice?: string, instructions?: string, tools?: Array<{
318
368
  name: string;
319
369
  description: string;
320
370
  parameters: Record<string, unknown>;
321
371
  strict?: boolean;
322
372
  }> | undefined, audioFormat?: OpenAIRealtimeAudioFormat, options?: OpenAIRealtimeOptions);
373
+ /**
374
+ * Whether the stream handler should gate the model response on the Whisper
375
+ * transcript (legacy) or fire it on `speech_stopped` (default, decoupled).
376
+ *
377
+ * `false` (default) — the response is requested on `speech_stopped`,
378
+ * independently of Whisper. `true` — the response is requested only after
379
+ * `transcript_input` passes the hallucination filter.
380
+ */
381
+ getGateResponseOnTranscript(): boolean;
323
382
  /**
324
383
  * Build the production session.update body. Mirrors the body sent
325
384
  * inside `connect()` so warmup can apply identical configuration to
@@ -399,18 +458,45 @@ declare class OpenAIRealtimeAdapter {
399
458
  /** Remove a previously registered {@link onEvent} callback. */
400
459
  offEvent(callback: RealtimeEventCallback): void;
401
460
  protected ensureMessageListener(): void;
402
- /** Truncate the in-flight assistant turn and cancel the active response.
461
+ /** Truncate the in-flight assistant turn's playback offset on the server.
462
+ *
463
+ * Sends ONLY ``conversation.item.truncate`` — no ``response.cancel``. This
464
+ * is the half of barge-in handling that a WebSocket transport MUST always
465
+ * perform: per OpenAI's docs, the GA server auto-truncates on barge-in only
466
+ * over WebRTC / SIP; on the WebSocket transport the client is responsible
467
+ * for telling the server how much of the assistant turn was actually heard.
468
+ * In server-managed mode (``interrupt_response: true``) the server already
469
+ * cancels the response itself, so issuing ``response.cancel`` here would be
470
+ * redundant / rejected — call this method, not {@link cancelResponse}.
403
471
  *
404
472
  * ``audio_end_ms`` MUST reflect what the caller actually heard, not what
405
473
  * the server generated. OpenAI streams audio at 5-10x real-time, so the
406
474
  * byte-derived counter overstates playback whenever the consumer cleared
407
- * its playout buffer (e.g. ``send_clear``) before the audio reached the
475
+ * its playout buffer (e.g. ``sendClear``) before the audio reached the
408
476
  * speaker. We bound the truncate point by wall-clock time since the first
409
477
  * chunk of this response — that's the physical maximum a 1x real-time
410
478
  * playback could have produced. Without this cap, OpenAI keeps the full
411
479
  * generated assistant text on the transcript, and the model replays /
412
480
  * resumes from it on the next turn — manifesting as re-greetings and
413
481
  * mid-sentence fragments after a barge-in storm.
482
+ *
483
+ * No-op when no response is in flight, keeping it idempotent across stale
484
+ * callers. Resets per-response tracking so post-truncate late frames and
485
+ * the next response start clean.
486
+ */
487
+ truncate(): void;
488
+ /** Truncate the in-flight assistant turn AND cancel the active response.
489
+ *
490
+ * Sends BOTH ``conversation.item.truncate`` (the played-offset bookkeeping)
491
+ * AND ``response.cancel``. Use this on the LEGACY client-managed barge-in
492
+ * path (``gateResponseOnTranscript`` true → ``interrupt_response: false``,
493
+ * so the server does NOT cancel for us) and for explicit cancels driven by
494
+ * Patter (e.g. on transfer / hangup). In server-managed mode call
495
+ * {@link truncate} instead — the server already cancels the response, and an
496
+ * extra ``response.cancel`` would be redundant / rejected.
497
+ *
498
+ * Truncation bounding semantics are identical to {@link truncate}; see its
499
+ * doc comment for the ``audio_end_ms`` wall-clock cap rationale.
414
500
  */
415
501
  cancelResponse(): void;
416
502
  /** Inject a user text turn and request a new response. */
@@ -441,6 +527,24 @@ declare class OpenAIRealtimeAdapter {
441
527
  * customer cue).
442
528
  */
443
529
  sendFirstMessage(text: string): Promise<void>;
530
+ /**
531
+ * Speak a short reassurance filler WITHOUT injecting a `role:user` turn.
532
+ *
533
+ * Same no-fake-turn shape as {@link sendFirstMessage}: a bare
534
+ * `response.create` carrying explicit `instructions`, so the filler is the
535
+ * assistant's own in-band audio. The reassurance scheduler in the
536
+ * stream-handler routes here instead of {@link sendText} — which would emit
537
+ * a `conversation.item.create` with `role:'user'` and falsely show the
538
+ * caller saying "One moment." in the transcript. Fillers must not imply
539
+ * success or failure.
540
+ *
541
+ * Uses `modalities: ['audio', 'text']` (v1-beta shape). The GA subclass
542
+ * {@link OpenAIRealtime2Adapter} overrides this with `output_modalities`
543
+ * and re-injects `audio.output.voice` so the GA endpoint does not reject
544
+ * the request. Mirrors Python `OpenAIRealtimeAdapter.send_reassurance` in
545
+ * `providers/openai_realtime.py`.
546
+ */
547
+ sendReassurance(text: string): Promise<void>;
444
548
  /** Submit a tool/function-call result and request the next response. */
445
549
  sendFunctionResult(callId: string, result: string): Promise<void>;
446
550
  /** Stop the heartbeat, drop listeners, and close the Realtime WebSocket. */
@@ -703,48 +807,48 @@ interface LatencyBreakdown {
703
807
  * number as "STT latency". Falls back to turn_start when the endpoint
704
808
  * signal is unavailable (degraded provider, batch STT, etc.).
705
809
  */
706
- stt_ms: number;
810
+ readonly stt_ms: number;
707
811
  /**
708
812
  * Duration of the user's utterance (turn_start → end-of-speech). Useful
709
813
  * to distinguish "user spoke for 4s" from "STT took 4s to finalize" —
710
814
  * they used to be conflated in stt_ms before 0.6.1. Optional — undefined
711
815
  * when the endpoint signal is unavailable.
712
816
  */
713
- user_speech_duration_ms?: number;
817
+ readonly user_speech_duration_ms?: number;
714
818
  /**
715
819
  * Backwards-compatible LLM bucket. With the split below, this now reflects
716
820
  * the user-perceived first-token latency (TTFT) when streaming is available
717
821
  * and the full generation time otherwise. Prefer ``llm_ttft_ms`` /
718
822
  * ``llm_total_ms`` in new code.
719
823
  */
720
- llm_ms: number;
824
+ readonly llm_ms: number;
721
825
  /** Time-to-first-token (UX-facing latency): stt_complete → first LLM token. */
722
- llm_ttft_ms?: number;
826
+ readonly llm_ttft_ms?: number;
723
827
  /**
724
828
  * Total LLM generation time: stt_complete → last LLM token. Distinct from
725
829
  * ``llm_ms`` so cost/throughput analysis and TTFT can be tracked separately.
726
830
  */
727
- llm_total_ms?: number;
728
- tts_ms: number;
729
- total_ms: number;
831
+ readonly llm_total_ms?: number;
832
+ readonly tts_ms: number;
833
+ readonly total_ms: number;
730
834
  /**
731
835
  * Endpoint latency: time from end-of-user-speech (VAD stop or STT
732
836
  * ``speech_final``) to LLM dispatch. Captures the silence-detection +
733
837
  * transcript-finalization gap. Optional — undefined when the source signal
734
838
  * is missing.
735
839
  */
736
- endpoint_ms?: number;
840
+ readonly endpoint_ms?: number;
737
841
  /**
738
842
  * Barge-in latency: time from user-interrupt detection to TTS playback
739
843
  * actually halting (i.e. after ``sendClear`` returned). Optional — only
740
844
  * populated on interrupted turns.
741
845
  */
742
- bargein_ms?: number;
846
+ readonly bargein_ms?: number;
743
847
  /**
744
848
  * Total TTS time: LLM-first-token (or first-sentence boundary) to last
745
849
  * TTS audio byte sent. Optional — undefined when TTS never completed.
746
850
  */
747
- tts_total_ms?: number;
851
+ readonly tts_total_ms?: number;
748
852
  /**
749
853
  * **User-perceived agent response latency**: time from end-of-user-speech
750
854
  * (VAD stop or STT ``speech_final``) to the first audio byte the agent
@@ -757,54 +861,54 @@ interface LatencyBreakdown {
757
861
  * the system-controlled latency: silence detection + LLM TTFT + TTS
758
862
  * first byte.
759
863
  */
760
- agent_response_ms?: number;
864
+ readonly agent_response_ms?: number;
761
865
  }
762
866
  /** Per-call cost breakdown by component (STT/TTS/LLM/telephony) plus the total. */
763
867
  interface CostBreakdown {
764
- stt: number;
765
- tts: number;
766
- llm: number;
767
- telephony: number;
768
- total: number;
868
+ readonly stt: number;
869
+ readonly tts: number;
870
+ readonly llm: number;
871
+ readonly telephony: number;
872
+ readonly total: number;
769
873
  /**
770
874
  * Amount saved on LLM cost thanks to OpenAI Realtime prompt caching.
771
875
  * ``llm`` above is the net cost AFTER this discount. Dashboards can
772
876
  * render ``saved $X (pct%)`` next to the LLM line when > 0.
773
877
  */
774
- llm_cached_savings?: number;
878
+ readonly llm_cached_savings: number;
775
879
  }
776
880
  /** Metrics captured for a single conversation turn. */
777
881
  interface TurnMetrics {
778
- turn_index: number;
779
- user_text: string;
780
- agent_text: string;
781
- latency: LatencyBreakdown;
782
- stt_audio_seconds: number;
783
- tts_characters: number;
784
- timestamp: number;
882
+ readonly turn_index: number;
883
+ readonly user_text: string;
884
+ readonly agent_text: string;
885
+ readonly latency: LatencyBreakdown;
886
+ readonly stt_audio_seconds: number;
887
+ readonly tts_characters: number;
888
+ readonly timestamp: number;
785
889
  }
786
890
  /** Aggregated metrics for an entire call (turns, costs, latency percentiles). */
787
891
  interface CallMetrics {
788
- call_id: string;
789
- duration_seconds: number;
790
- turns: TurnMetrics[];
791
- cost: CostBreakdown;
792
- latency_avg: LatencyBreakdown;
793
- latency_p95: LatencyBreakdown;
794
- latency_p50?: LatencyBreakdown;
795
- latency_p90?: LatencyBreakdown;
796
- latency_p99?: LatencyBreakdown;
797
- provider_mode: string;
798
- stt_provider: string;
799
- tts_provider: string;
800
- llm_provider: string;
801
- telephony_provider: string;
892
+ readonly call_id: string;
893
+ readonly duration_seconds: number;
894
+ readonly turns: readonly TurnMetrics[];
895
+ readonly cost: CostBreakdown;
896
+ readonly latency_avg: LatencyBreakdown;
897
+ readonly latency_p95: LatencyBreakdown;
898
+ readonly latency_p50: LatencyBreakdown;
899
+ readonly latency_p90: LatencyBreakdown;
900
+ readonly latency_p99: LatencyBreakdown;
901
+ readonly provider_mode: string;
902
+ readonly stt_provider: string;
903
+ readonly tts_provider: string;
904
+ readonly llm_provider: string;
905
+ readonly telephony_provider: string;
802
906
  /** Model identifiers per provider (e.g. "ink-whisper", "eleven_flash_v2_5",
803
907
  * "gpt-oss-120b"). Surface on the dashboard cost breakdown so operators
804
908
  * can attribute per-call spend to a specific model. */
805
- stt_model?: string;
806
- tts_model?: string;
807
- llm_model?: string;
909
+ readonly stt_model?: string;
910
+ readonly tts_model?: string;
911
+ readonly llm_model?: string;
808
912
  }
809
913
  /** Programmatic control surface for a live call (transfer, hangup, DTMF). */
810
914
  interface CallControl {
@@ -830,7 +934,7 @@ interface CallControl {
830
934
  }
831
935
  /** Mutable per-call accumulator that stamps timestamps and emits final `CallMetrics`. */
832
936
  declare class CallMetricsAccumulator {
833
- callId: string;
937
+ readonly callId: string;
834
938
  readonly providerMode: string;
835
939
  readonly telephonyProvider: string;
836
940
  readonly sttProvider: string;
@@ -922,6 +1026,16 @@ declare class CallMetricsAccumulator {
922
1026
  * (the common cause of missing endpoint signals).
923
1027
  */
924
1028
  private _endpointSignalMissingCount;
1029
+ /**
1030
+ * Monotonic per-call turn counter. Reserved at turn OPEN
1031
+ * (``onAdapterSpeechStopped`` / ``speech_stopped``) via
1032
+ * ``reserveTurnIndex()`` and threaded through the buffering pipeline into
1033
+ * ``recordTurnComplete`` / ``recordTurnInterrupted`` as ``preReservedIndex``.
1034
+ * This makes ``turn_index`` stable under drops / interrupts (previously it
1035
+ * was assigned at completion as ``this._turns.length``, which shifted when a
1036
+ * turn was dropped). Parity with Python ``_next_turn_index``.
1037
+ */
1038
+ private _nextTurnIndex;
925
1039
  constructor(opts: {
926
1040
  callId: string;
927
1041
  providerMode: string;
@@ -951,6 +1065,18 @@ declare class CallMetricsAccumulator {
951
1065
  get turnActive(): boolean;
952
1066
  /** Begin a new turn — stamps the turn start timestamp and resets per-turn state. */
953
1067
  startTurn(): void;
1068
+ /**
1069
+ * Reserve and return the next monotonic turn index.
1070
+ *
1071
+ * Called once per turn at the moment the turn OPENS (Realtime:
1072
+ * ``onAdapterSpeechStopped``). The returned index is threaded through the
1073
+ * buffering pipeline and handed back to ``recordTurnComplete`` /
1074
+ * ``recordTurnInterrupted`` as ``preReservedIndex`` so the emitted
1075
+ * ``turn_index`` matches the live per-line transcript ordering even when a
1076
+ * turn is dropped or interrupted between open and close. Parity with Python
1077
+ * ``reserve_turn_index``.
1078
+ */
1079
+ reserveTurnIndex(): number;
954
1080
  /**
955
1081
  * Start a new turn only if no turn is currently open.
956
1082
  * Use this at inbound-audio ingestion points so the turn timer begins
@@ -1027,7 +1153,7 @@ declare class CallMetricsAccumulator {
1027
1153
  * ``user_text=''``. The caller treats ``null`` as "nothing to emit";
1028
1154
  * ``emitTurnMetrics`` is already null-safe.
1029
1155
  */
1030
- recordTurnComplete(agentText: string): TurnMetrics | null;
1156
+ recordTurnComplete(agentText: string, preReservedIndex?: number): TurnMetrics | null;
1031
1157
  /**
1032
1158
  * Close the current turn as interrupted (barge-in) and return the
1033
1159
  * recorded metrics. Returns ``null`` when no turn is open, OR when
@@ -1037,7 +1163,7 @@ declare class CallMetricsAccumulator {
1037
1163
  * a future refactor that reorders the bargein + LLM-unwind paths)
1038
1164
  * from overwriting a turn that the complete path already emitted.
1039
1165
  */
1040
- recordTurnInterrupted(): TurnMetrics | null;
1166
+ recordTurnInterrupted(preReservedIndex?: number): TurnMetrics | null;
1041
1167
  /**
1042
1168
  * Record the moment VAD emitted speech_end for the current utterance.
1043
1169
  * @param ts Optional override timestamp in hrTimeMs units (defaults to now).
@@ -1058,8 +1184,10 @@ declare class CallMetricsAccumulator {
1058
1184
  recordTurnCommitted(ts?: number): void;
1059
1185
  /**
1060
1186
  * Record the delta (ms) between turn-committed and when on_user_turn_completed
1061
- * pipeline hook finished. Stored for inclusion in the next ``emitEouMetrics``
1062
- * call (or an explicit re-emit if desired).
1187
+ * pipeline hook finished. Does NOT re-emit: like Python's
1188
+ * ``record_on_user_turn_completed_delay``, this only stores the value; the
1189
+ * single EOU emission happens on ``recordTurnCommitted`` (3-timestamp guard,
1190
+ * delay defaults to 0 if not yet recorded).
1063
1191
  */
1064
1192
  recordOnUserTurnCompletedDelay(delayMs: number): void;
1065
1193
  /**
@@ -1070,7 +1198,7 @@ declare class CallMetricsAccumulator {
1070
1198
  * ``transcriptionDelay`` = turnCommitted − vadStopped (ms)
1071
1199
  * ``onUserTurnCompletedDelay`` = caller-supplied delta (ms) or 0
1072
1200
  */
1073
- /** Emit `EOUMetrics` once VAD-stop, STT-final, and turn-committed timestamps are all known. */
1201
+ /** Emit `EOUMetrics` once VAD-stop, STT-final, turn-committed, and on_user_turn_completed delay are all known. */
1074
1202
  emitEouMetrics(): void;
1075
1203
  /**
1076
1204
  * Record that a caller utterance started overlapping with agent speech.
@@ -1221,31 +1349,32 @@ declare function isWebSocketUrl(url: string): boolean;
1221
1349
 
1222
1350
  /** Snapshot of a call as held by the dashboard store. */
1223
1351
  interface CallRecord {
1224
- call_id: string;
1225
- caller: string;
1226
- callee: string;
1227
- direction: string;
1228
- started_at: number;
1229
- ended_at?: number;
1352
+ readonly call_id: string;
1353
+ readonly caller: string;
1354
+ readonly callee: string;
1355
+ readonly direction: string;
1356
+ readonly started_at: number;
1357
+ readonly ended_at?: number;
1230
1358
  /**
1231
1359
  * Current lifecycle state: ``initiated`` (pre-registered), ``ringing``,
1232
1360
  * ``in-progress``, ``completed``, ``no-answer``, ``busy``, ``failed``,
1233
1361
  * ``canceled``, or ``webhook_error``.
1234
1362
  */
1235
- status?: string;
1236
- transcript?: Array<{
1237
- role: string;
1238
- text: string;
1239
- timestamp: number;
1363
+ readonly status?: string;
1364
+ readonly transcript?: ReadonlyArray<{
1365
+ readonly role: string;
1366
+ readonly text: string;
1367
+ readonly timestamp: number;
1368
+ readonly turnIndex?: number;
1240
1369
  }>;
1241
- turns?: unknown[];
1242
- metrics?: Record<string, unknown> | null;
1243
- [key: string]: unknown;
1370
+ readonly turns?: readonly unknown[];
1371
+ readonly metrics?: Record<string, unknown> | null;
1372
+ readonly [key: string]: unknown;
1244
1373
  }
1245
1374
  /** Server-Sent-Event payload broadcast by `MetricsStore` for live UI updates. */
1246
1375
  interface SSEEvent {
1247
- type: string;
1248
- data: Record<string, unknown>;
1376
+ readonly type: string;
1377
+ readonly data: Readonly<Record<string, unknown>>;
1249
1378
  }
1250
1379
  /** In-memory bounded ring buffer of recent calls plus active-call tracking. */
1251
1380
  declare class MetricsStore extends EventEmitter {
@@ -1289,6 +1418,27 @@ declare class MetricsStore extends EventEmitter {
1289
1418
  * row from active to completed so the UI freezes the live duration timer.
1290
1419
  */
1291
1420
  updateCallStatus(callId: string, status: string, extra?: Record<string, unknown>): void;
1421
+ /**
1422
+ * Record a single transcript line (user/assistant) as it becomes known.
1423
+ *
1424
+ * FIX-5 (issue #154): the live forward path for the dashboard transcript.
1425
+ * The Realtime stream handler calls this the moment each line is known — the
1426
+ * user line right after the hallucination filter accepts it, the assistant
1427
+ * line when its turn flushes — keyed by the monotonic ``turnIndex`` reserved
1428
+ * at turn-open (``reserveTurnIndex``). Each line is appended to the active
1429
+ * call's ``transcript`` array and broadcast over SSE as a ``transcript_line``
1430
+ * event so the dashboard can render lines as they arrive and re-sort by
1431
+ * ``(turnIndex, user<assistant)`` — making a late-arriving user line land
1432
+ * ABOVE its agent line. ``recordTurn`` de-dups against the lines pushed here
1433
+ * by ``(turnIndex, role)`` so the metrics path never double-pushes the same
1434
+ * text. Parity with Python ``record_transcript_line``.
1435
+ */
1436
+ recordTranscriptLine(data: {
1437
+ call_id: string;
1438
+ turnIndex: number;
1439
+ role: 'user' | 'assistant';
1440
+ text: string;
1441
+ }): void;
1292
1442
  /** Append a single conversation turn to an active call and broadcast it via SSE. */
1293
1443
  recordTurn(data: Record<string, unknown>): void;
1294
1444
  /** Move a call from active to completed and persist its final metrics. */
@@ -1334,7 +1484,7 @@ declare class MetricsStore extends EventEmitter {
1334
1484
  isDeleted(callId: string): boolean;
1335
1485
  /** Snapshot of soft-deleted call_ids (sorted). */
1336
1486
  getDeletedCallIds(): string[];
1337
- /** Atomically persist the deleted-ids set to disk. Best-effort. */
1487
+ /** Atomically persist the deleted-ids set to disk. Best-effort async. */
1338
1488
  private persistDeletedIds;
1339
1489
  /** Look up an active call by id (returns undefined if not active or unknown). */
1340
1490
  getActive(callId: string): CallRecord | undefined;
@@ -1452,6 +1602,7 @@ declare class Carrier {
1452
1602
  }
1453
1603
 
1454
1604
  /** OpenAI Realtime engine — marker class for Patter client dispatch. */
1605
+
1455
1606
  /** Constructor options for the OpenAI `Realtime` engine marker. */
1456
1607
  interface RealtimeOptions {
1457
1608
  /** API key. Falls back to OPENAI_API_KEY env var when omitted. */
@@ -1479,6 +1630,42 @@ interface RealtimeOptions {
1479
1630
  * `"gpt-4o-transcribe"` for higher accuracy.
1480
1631
  */
1481
1632
  inputAudioTranscriptionModel?: string;
1633
+ /**
1634
+ * Input noise reduction for speakerphone / conference audio. `undefined`
1635
+ * (default) omits the field (no reduction). `"far_field"` recommended for
1636
+ * phone / speakerphone calls; `"near_field"` for a handset close to the
1637
+ * mouth. Mirrors `openai_realtime_noise_reduction` on `Patter.agent()`.
1638
+ */
1639
+ noiseReduction?: 'near_field' | 'far_field';
1640
+ /**
1641
+ * Turn-detection tuning. `undefined` (default) keeps the adapter's
1642
+ * current hardcoded `server_vad` / threshold `0.5` / silence 300 ms.
1643
+ * Raise threshold or switch to `semantic_vad` eagerness `'low'` to stop
1644
+ * speakerphone noise from triggering false barge-ins.
1645
+ *
1646
+ * Maps to `turn_detection` on the Python `engines.openai.Realtime` marker;
1647
+ * propagates to `realtimeTurnDetection` on `AgentOptions`.
1648
+ */
1649
+ turnDetection?: RealtimeTurnDetection;
1650
+ /**
1651
+ * Gate the model's response on the Whisper transcript (legacy behavior).
1652
+ *
1653
+ * `false` (default) — the speech-to-speech model responds as soon as the
1654
+ * user stops speaking (on `speech_stopped`), independently of the Whisper
1655
+ * input transcription. The transcript becomes a pure observability
1656
+ * side-channel (dashboard / history / `onTranscript`) and never gates,
1657
+ * triggers, or cancels the response. This reclaims ~500 ms of latency
1658
+ * because the model no longer waits for Whisper.
1659
+ *
1660
+ * `true` — restores the prior behavior where the response is requested
1661
+ * only after the Whisper `transcript_input` event arrives and passes the
1662
+ * hallucination filter.
1663
+ *
1664
+ * Maps to `gate_response_on_transcript` on the Python
1665
+ * `engines.openai.Realtime` marker; propagates to
1666
+ * `openaiRealtimeGateResponseOnTranscript` on `AgentOptions`.
1667
+ */
1668
+ gateResponseOnTranscript?: boolean;
1482
1669
  }
1483
1670
  /**
1484
1671
  * OpenAI Realtime engine marker.
@@ -1502,6 +1689,9 @@ declare class Realtime {
1502
1689
  readonly voice: string;
1503
1690
  readonly reasoningEffort?: 'minimal' | 'low' | 'medium' | 'high';
1504
1691
  readonly inputAudioTranscriptionModel?: string;
1692
+ readonly noiseReduction?: 'near_field' | 'far_field';
1693
+ readonly turnDetection?: RealtimeTurnDetection;
1694
+ readonly gateResponseOnTranscript?: boolean;
1505
1695
  constructor(opts?: RealtimeOptions);
1506
1696
  }
1507
1697
 
@@ -1513,6 +1703,7 @@ declare class Realtime {
1513
1703
  * different `session.update` wire shape; the client dispatches to
1514
1704
  * `OpenAIRealtime2Adapter` when this marker is passed.
1515
1705
  */
1706
+
1516
1707
  /** Constructor options for the OpenAI `Realtime2` engine marker. */
1517
1708
  interface Realtime2Options {
1518
1709
  /** API key. Falls back to OPENAI_API_KEY env var when omitted. */
@@ -1533,6 +1724,44 @@ interface Realtime2Options {
1533
1724
  * low-latency transcript partials.
1534
1725
  */
1535
1726
  inputAudioTranscriptionModel?: string;
1727
+ /**
1728
+ * Input noise reduction for speakerphone / conference audio. `undefined`
1729
+ * (default) omits the field (no reduction). `"far_field"` recommended for
1730
+ * phone / speakerphone calls; `"near_field"` for a handset close to the
1731
+ * mouth. On the GA endpoint this is nested under
1732
+ * `audio.input.input_audio_noise_reduction: { type }`.
1733
+ * Mirrors `openai_realtime_noise_reduction` on `Patter.agent()`.
1734
+ */
1735
+ noiseReduction?: 'near_field' | 'far_field';
1736
+ /**
1737
+ * Turn-detection tuning. `undefined` (default) keeps the adapter's
1738
+ * current hardcoded `server_vad` / threshold `0.5` / silence 300 ms.
1739
+ * Raise threshold or switch to `semantic_vad` eagerness `'low'` to stop
1740
+ * speakerphone noise from triggering false barge-ins.
1741
+ *
1742
+ * Maps to `turn_detection` on the Python `engines.openai_realtime_2.Realtime2`
1743
+ * marker; propagates to `realtimeTurnDetection` on `AgentOptions`.
1744
+ */
1745
+ turnDetection?: RealtimeTurnDetection;
1746
+ /**
1747
+ * Gate the model's response on the Whisper transcript (legacy behavior).
1748
+ *
1749
+ * `false` (default) — the speech-to-speech model responds as soon as the
1750
+ * user stops speaking (on `speech_stopped`), independently of the Whisper
1751
+ * input transcription. The transcript becomes a pure observability
1752
+ * side-channel (dashboard / history / `onTranscript`) and never gates,
1753
+ * triggers, or cancels the response. This reclaims ~500 ms of latency
1754
+ * because the model no longer waits for Whisper.
1755
+ *
1756
+ * `true` — restores the prior behavior where the response is requested
1757
+ * only after the Whisper `transcript_input` event arrives and passes the
1758
+ * hallucination filter.
1759
+ *
1760
+ * Maps to `gate_response_on_transcript` on the Python
1761
+ * `engines.openai_realtime_2.Realtime2` marker; propagates to
1762
+ * `openaiRealtimeGateResponseOnTranscript` on `AgentOptions`.
1763
+ */
1764
+ gateResponseOnTranscript?: boolean;
1536
1765
  }
1537
1766
  /**
1538
1767
  * OpenAI Realtime 2 engine marker — selects `gpt-realtime-2` on the GA
@@ -1557,6 +1786,9 @@ declare class Realtime2 {
1557
1786
  readonly voice: string;
1558
1787
  readonly reasoningEffort?: 'minimal' | 'low' | 'medium' | 'high';
1559
1788
  readonly inputAudioTranscriptionModel?: string;
1789
+ readonly noiseReduction?: 'near_field' | 'far_field';
1790
+ readonly turnDetection?: RealtimeTurnDetection;
1791
+ readonly gateResponseOnTranscript?: boolean;
1560
1792
  constructor(opts?: Realtime2Options);
1561
1793
  }
1562
1794
 
@@ -1701,6 +1933,33 @@ interface ToolOptions {
1701
1933
  handler?: ToolHandler;
1702
1934
  /** URL to POST to when the LLM invokes the tool. */
1703
1935
  webhookUrl?: string;
1936
+ /**
1937
+ * Optional reassurance filler the agent speaks while a slow tool call runs.
1938
+ * Two forms:
1939
+ * - `string`: shorthand for `{ message: <string>, afterMs: 1500 }`.
1940
+ * - object: explicit `{ message, afterMs? }`.
1941
+ * Currently honoured only in Realtime mode. Off by default.
1942
+ *
1943
+ * Mirrors Python `reassurance` on `Tool` / `tool()`.
1944
+ */
1945
+ reassurance?: string | {
1946
+ message: string;
1947
+ afterMs?: number;
1948
+ };
1949
+ /**
1950
+ * Per-tool execution timeout in milliseconds, applied to BOTH the handler
1951
+ * and webhook paths. `undefined` (default) uses the executor default
1952
+ * (10 000 ms). Raise for long browser-automation / external-API tools
1953
+ * (e.g. `60_000`). Clamped to a 300 000 ms ceiling by the executor.
1954
+ *
1955
+ * Mirrors Python `timeout_s` on `Tool` / `tool()`.
1956
+ */
1957
+ timeoutMs?: number;
1958
+ /**
1959
+ * Enable OpenAI strict mode for this tool's function schema. Mirrors
1960
+ * Python `strict` on `Tool`. Off by default.
1961
+ */
1962
+ strict?: boolean;
1704
1963
  }
1705
1964
  /**
1706
1965
  * Tool definition. Structurally matches `ToolDefinition` so it drops
@@ -1724,6 +1983,20 @@ declare class Tool implements ToolDefinition {
1724
1983
  readonly parameters: Record<string, unknown>;
1725
1984
  readonly handler?: ToolHandler;
1726
1985
  readonly webhookUrl?: string;
1986
+ readonly reassurance?: string | Readonly<{
1987
+ message: string;
1988
+ afterMs?: number;
1989
+ }>;
1990
+ /**
1991
+ * Per-tool execution timeout in milliseconds. `undefined` uses the
1992
+ * executor default (10 000 ms). Mirrors Python `timeout_s`.
1993
+ */
1994
+ readonly timeoutMs?: number;
1995
+ /**
1996
+ * Enable OpenAI strict mode for this tool's function schema. Off by
1997
+ * default. Mirrors Python `strict` on `Tool`.
1998
+ */
1999
+ readonly strict?: boolean;
1727
2000
  constructor(opts: ToolOptions);
1728
2001
  }
1729
2002
  /** Factory helper mirroring Python's `tool(...)` function. */
@@ -1850,6 +2123,8 @@ interface PerToolState {
1850
2123
  state: CircuitBreakerState;
1851
2124
  consecutiveFailures: number;
1852
2125
  openedAt: number;
2126
+ /** True while a HALF_OPEN probe call is already in-flight. */
2127
+ probeInFlight: boolean;
1853
2128
  }
1854
2129
  /** Per-name registry tracking circuit state for a fleet of tools. */
1855
2130
  declare class CircuitBreakerRegistry {
@@ -1888,7 +2163,7 @@ declare class CircuitBreakerRegistry {
1888
2163
  * Avoids a circular import from metrics.ts.
1889
2164
  */
1890
2165
  interface LlmUsageRecorder {
1891
- recordLlmUsage(provider: string, model: string, inputTokens: number, outputTokens: number, cacheReadTokens?: number, cacheCreationTokens?: number): void;
2166
+ recordLlmUsage(provider: string, model: string, inputTokens: number, outputTokens: number, cacheReadTokens?: number, cacheWriteTokens?: number): void;
1892
2167
  }
1893
2168
  /**
1894
2169
  * Pluggable tool executor — mirrors the Python ``ToolExecutor`` in
@@ -1956,7 +2231,7 @@ interface LLMChunk {
1956
2231
  inputTokens?: number;
1957
2232
  outputTokens?: number;
1958
2233
  cacheReadInputTokens?: number;
1959
- cacheCreationInputTokens?: number;
2234
+ cacheWriteInputTokens?: number;
1960
2235
  }
1961
2236
  /**
1962
2237
  * Interface that any LLM provider must satisfy.
@@ -2250,13 +2525,55 @@ type MCPServerConfig = string | {
2250
2525
  /** Optional logical name for telemetry / log lines. */
2251
2526
  readonly name?: string;
2252
2527
  };
2528
+ /**
2529
+ * OpenAI Realtime turn-detection tuning.
2530
+ *
2531
+ * Raise the VAD {@link threshold} (`server_vad`) or switch to
2532
+ * `semantic_vad` with {@link eagerness} `'low'` to stop speakerphone /
2533
+ * conference-room noise (mouse clicks, phone shifts, background chatter)
2534
+ * from being mistaken for the caller speaking and cutting the agent off.
2535
+ *
2536
+ * Each unset field falls back to the adapter's current default
2537
+ * (`server_vad`, threshold `0.5`, `prefixPaddingMs` `300`,
2538
+ * `silenceDurationMs` `300`). `type === 'semantic_vad'` emits
2539
+ * `{ type, eagerness }` only — OpenAI rejects `threshold` /
2540
+ * `prefixPaddingMs` / `silenceDurationMs` on the semantic detector.
2541
+ * `createResponse` / `interruptResponse` are NOT exposed (Patter keeps
2542
+ * its client-gated barge-in safety values).
2543
+ *
2544
+ * Mirrors Python `RealtimeTurnDetection` dataclass in `models.py`.
2545
+ */
2546
+ interface RealtimeTurnDetection {
2547
+ /** `"server_vad"` (default) or `"semantic_vad"`. */
2548
+ readonly type?: 'server_vad' | 'semantic_vad';
2549
+ /**
2550
+ * `server_vad` only — 0..1, higher rejects more background noise.
2551
+ * `undefined` keeps the adapter default (`0.5`).
2552
+ */
2553
+ readonly threshold?: number;
2554
+ /**
2555
+ * `server_vad` only — milliseconds of speech required before VAD
2556
+ * triggers. `undefined` keeps the adapter default (`300`).
2557
+ */
2558
+ readonly prefixPaddingMs?: number;
2559
+ /**
2560
+ * `server_vad` only — trailing silence (ms) before the turn ends.
2561
+ * `undefined` keeps the adapter default (`300`).
2562
+ */
2563
+ readonly silenceDurationMs?: number;
2564
+ /**
2565
+ * `semantic_vad` only — `"low"` lets the caller finish (least likely
2566
+ * to interrupt), through `"high"` / `"auto"`.
2567
+ */
2568
+ readonly eagerness?: 'low' | 'medium' | 'high' | 'auto';
2569
+ }
2253
2570
  /** Internal shape of a tool definition (matches `Tool` from `public-api.ts`). */
2254
2571
  interface ToolDefinition {
2255
- name: string;
2256
- description: string;
2257
- parameters: Record<string, unknown>;
2572
+ readonly name: string;
2573
+ readonly description: string;
2574
+ readonly parameters: Readonly<Record<string, unknown>>;
2258
2575
  /** Webhook URL — called when the LLM invokes this tool. Mutually exclusive with handler. */
2259
- webhookUrl?: string;
2576
+ readonly webhookUrl?: string;
2260
2577
  /**
2261
2578
  * Local handler — called instead of ``webhookUrl`` when present.
2262
2579
  *
@@ -2274,7 +2591,7 @@ interface ToolDefinition {
2274
2591
  * ignores the progress yields — the final value is still used as
2275
2592
  * the tool result.
2276
2593
  */
2277
- handler?: ((args: Record<string, unknown>, context: Record<string, unknown>) => Promise<string>) | ((args: Record<string, unknown>, context: Record<string, unknown>) => AsyncGenerator<{
2594
+ readonly handler?: ((args: Record<string, unknown>, context: Record<string, unknown>) => Promise<string>) | ((args: Record<string, unknown>, context: Record<string, unknown>) => AsyncGenerator<{
2278
2595
  progress?: string;
2279
2596
  result?: string;
2280
2597
  }, string | void, unknown>);
@@ -2294,10 +2611,10 @@ interface ToolDefinition {
2294
2611
  * synthesises it inline. Pipeline mode has no clean injection point
2295
2612
  * mid-turn yet; the option is silently ignored there. Off by default.
2296
2613
  */
2297
- reassurance?: string | {
2614
+ readonly reassurance?: string | Readonly<{
2298
2615
  message: string;
2299
2616
  afterMs?: number;
2300
- };
2617
+ }>;
2301
2618
  /**
2302
2619
  * Enable OpenAI strict mode for this tool's function schema. When ``true``
2303
2620
  * the model is constrained to emit arguments that exactly match the
@@ -2318,7 +2635,123 @@ interface ToolDefinition {
2318
2635
  * Recommended for any tool whose handler/webhook can't safely tolerate
2319
2636
  * malformed arguments (DB writes, payment, transfers).
2320
2637
  */
2321
- strict?: boolean;
2638
+ readonly strict?: boolean;
2639
+ /**
2640
+ * Per-tool execution timeout in milliseconds, applied to BOTH the handler
2641
+ * and webhook paths. `undefined` (default) uses the executor default
2642
+ * (10 000 ms). Raise for long browser-automation / external-API tools
2643
+ * (e.g. `60_000`). Clamped to a 300 000 ms ceiling by the executor.
2644
+ *
2645
+ * Mirrors Python's `timeout_s` on `Tool` / `tool()`.
2646
+ */
2647
+ readonly timeoutMs?: number;
2648
+ }
2649
+ /**
2650
+ * Configuration for the built-in ``consult`` escalation tool.
2651
+ *
2652
+ * When set on an agent, Patter auto-injects a tool (default name
2653
+ * ``consult_agent``) that the in-call agent can invoke mid-call to reach the
2654
+ * caller's own back-office agent over HTTP for deeper reasoning, fresh
2655
+ * information, or an action beyond the call. Patter keeps STT + LLM/voice +
2656
+ * TTS + carrier; the back-office agent is consulted only on demand (never on
2657
+ * the per-turn path). The tool POSTs ``{ request, call_id, caller, callee }``
2658
+ * to {@link url}; the endpoint returns JSON with a ``reply`` / ``response`` /
2659
+ * ``text`` string (or any JSON / plain text) and the agent speaks it.
2660
+ *
2661
+ * Injected in **Realtime** and **Pipeline** modes only — ElevenLabs ConvAI
2662
+ * tools live on the ElevenLabs-hosted agent, so ``consult`` does not apply
2663
+ * there (a warning is emitted if set with that provider).
2664
+ */
2665
+ interface ConsultConfig {
2666
+ /**
2667
+ * Generic webhook endpoint Patter POSTs ``{ request, call_id, caller, callee }``
2668
+ * to. SSRF-validated at call start. Mutually exclusive with
2669
+ * {@link openaiCompatible} — set exactly one.
2670
+ */
2671
+ readonly url?: string;
2672
+ /**
2673
+ * Native target that speaks an OpenAI-compatible ``/chat/completions``
2674
+ * endpoint directly (e.g. an OpenClaw agent, or vLLM / Ollama / Groq) — no
2675
+ * hand-written adapter. Mutually exclusive with {@link url}. Use
2676
+ * {@link openclawConsult} for the OpenClaw preset.
2677
+ */
2678
+ readonly openaiCompatible?: OpenAICompatibleConsult;
2679
+ /** Optional headers (e.g. an ``Authorization`` bearer). Never logged. */
2680
+ readonly headers?: Readonly<Record<string, string>>;
2681
+ /**
2682
+ * Per-consult HTTP timeout in milliseconds. Higher than the generic
2683
+ * webhook-tool default (10 000 ms) because a consult may run deeper
2684
+ * reasoning. Default ``30000``.
2685
+ */
2686
+ readonly timeoutMs?: number;
2687
+ /** Name the LLM sees for the tool. Default ``"consult_agent"``. */
2688
+ readonly toolName?: string;
2689
+ /** Description the LLM sees — tune to steer when the agent escalates. */
2690
+ readonly description?: string;
2691
+ /**
2692
+ * Optional filler the agent speaks while the consult runs (Realtime mode
2693
+ * only) so a multi-second back-office call is not dead air. Omitted plays no
2694
+ * filler; the {@link openclawConsult} preset sets a sensible default.
2695
+ */
2696
+ readonly reassurance?: string | Readonly<{
2697
+ message: string;
2698
+ afterMs?: number;
2699
+ }>;
2700
+ /**
2701
+ * Opt-in: allow {@link url} to point at a loopback / private / link-local
2702
+ * host (e.g. a back-office agent on ``127.0.0.1`` or an RFC1918 LAN host).
2703
+ *
2704
+ * Default ``false`` (or ``undefined``) — the URL is SSRF-validated and
2705
+ * loopback/private/link-local targets are rejected, preserving the strict
2706
+ * default behaviour. Set ``true`` ONLY for a trusted, developer-configured
2707
+ * local agent: the URL is your own config, not caller-derived input.
2708
+ *
2709
+ * Even when ``true``, non-HTTP(S) schemes (``file:``, ``javascript:`` …)
2710
+ * are still rejected. Note: opting in also makes cloud-metadata hostnames
2711
+ * (``metadata``, ``metadata.google.internal``, ``metadata.azure.com``) and
2712
+ * the IMDS IP ``169.254.169.254`` reachable — an accepted tradeoff for a URL
2713
+ * you control. Scopes ONLY to
2714
+ * the consult tool; the generic webhook-tool path stays strict.
2715
+ */
2716
+ readonly allowLoopback?: boolean;
2717
+ }
2718
+ /**
2719
+ * Native {@link ConsultConfig} target that speaks an OpenAI-compatible
2720
+ * ``/chat/completions`` endpoint directly — no hand-written adapter.
2721
+ *
2722
+ * Lets ``consult`` reach an OpenClaw agent (or any OpenAI-compatible gateway:
2723
+ * vLLM, Ollama, Groq, …). The consult handler builds a standard chat-completions
2724
+ * request (``model`` + ``messages`` + ``user``) and speaks
2725
+ * ``choices[0].message.content``. Prefer {@link openclawConsult} for the
2726
+ * OpenClaw preset rather than constructing this directly.
2727
+ */
2728
+ interface OpenAICompatibleConsult {
2729
+ /**
2730
+ * OpenAI-compatible base URL ending in ``/v1`` (the handler POSTs to
2731
+ * ``{baseUrl}/chat/completions``), e.g. ``http://127.0.0.1:18789/v1``.
2732
+ */
2733
+ readonly baseUrl: string;
2734
+ /**
2735
+ * Model / agent target. For OpenClaw this is the namespaced agent id, e.g.
2736
+ * ``"openclaw/receptionist"``.
2737
+ */
2738
+ readonly model: string;
2739
+ /**
2740
+ * Bearer token. Prefer {@link apiKeyEnv} so the secret stays out of source.
2741
+ * For OpenClaw this is an OPERATOR-grade credential — never logged.
2742
+ */
2743
+ readonly apiKey?: string;
2744
+ /**
2745
+ * Environment variable to read the bearer from when {@link apiKey} is not
2746
+ * given (e.g. ``"OPENCLAW_API_KEY"``).
2747
+ */
2748
+ readonly apiKeyEnv?: string;
2749
+ /**
2750
+ * Optional header carrying the per-call session id (the call id), e.g.
2751
+ * ``"x-openclaw-session-key"``. The call id is also sent as the OpenAI
2752
+ * ``user`` field.
2753
+ */
2754
+ readonly sessionHeader?: string;
2322
2755
  }
2323
2756
  /** Constructor options for `new Patter({...})` in local-server mode. */
2324
2757
  interface LocalOptions {
@@ -2331,14 +2764,14 @@ interface LocalOptions {
2331
2764
  * const phone = new Patter({ carrier: new Twilio(), phoneNumber: "+1..." });
2332
2765
  * ```
2333
2766
  */
2334
- carrier: Carrier$2 | Carrier$1 | Carrier;
2767
+ readonly carrier: Carrier$2 | Carrier$1 | Carrier;
2335
2768
  /**
2336
2769
  * Tunnel configuration. Accepts a tunnel instance, ``true`` (alias for
2337
2770
  * ``new CloudflareTunnel()``), or ``false`` / omitted (no tunnel).
2338
2771
  */
2339
- tunnel?: CloudflareTunnel | Static | boolean;
2340
- phoneNumber: string;
2341
- webhookUrl?: string;
2772
+ readonly tunnel?: CloudflareTunnel | Static | boolean;
2773
+ readonly phoneNumber: string;
2774
+ readonly webhookUrl?: string;
2342
2775
  /**
2343
2776
  * On-disk persistence for the dashboard's call history. The dashboard
2344
2777
  * itself is in-memory, but enabling ``persist`` writes per-call records
@@ -2366,25 +2799,25 @@ interface LocalOptions {
2366
2799
  * Phone numbers are masked by default; control via
2367
2800
  * ``PATTER_LOG_REDACT_PHONE``.
2368
2801
  */
2369
- persist?: boolean | string;
2802
+ readonly persist?: boolean | string;
2370
2803
  /**
2371
2804
  * @internal — allows ``StreamHandler`` to build the default OpenAI
2372
2805
  * ``LLMLoop`` when no ``onMessage`` handler is supplied. The
2373
2806
  * ``OpenAIRealtime`` engine instance carries its own key when one is
2374
2807
  * used via ``phone.agent({ engine: new OpenAIRealtime({ apiKey }) })``.
2375
2808
  */
2376
- openaiKey?: string;
2809
+ readonly openaiKey?: string;
2377
2810
  }
2378
2811
  /** Internal shape of a guardrail (matches `Guardrail` class from `public-api.ts`). */
2379
2812
  interface Guardrail {
2380
2813
  /** Name for logging when triggered */
2381
- name: string;
2814
+ readonly name: string;
2382
2815
  /** List of terms that trigger the guardrail (case-insensitive) */
2383
- blockedTerms?: string[];
2816
+ readonly blockedTerms?: ReadonlyArray<string>;
2384
2817
  /** Custom check function — return true to block the response */
2385
- check?: (text: string) => boolean;
2818
+ readonly check?: (text: string) => boolean;
2386
2819
  /** Replacement text spoken when guardrail triggers */
2387
- replacement?: string;
2820
+ readonly replacement?: string;
2388
2821
  }
2389
2822
  /** Per-call context passed to every pipeline hook. */
2390
2823
  interface HookContext {
@@ -2493,29 +2926,29 @@ interface BackgroundAudioPlayer$1 {
2493
2926
  */
2494
2927
  /** Configuration for a local-mode voice AI agent (passed to `phone.agent({...})`). */
2495
2928
  interface AgentOptions {
2496
- systemPrompt: string;
2929
+ readonly systemPrompt: string;
2497
2930
  /**
2498
2931
  * Voice preset. When ``engine`` is provided, its ``voice`` is used unless
2499
2932
  * explicitly overridden here. Format depends on the engine:
2500
2933
  * OpenAI Realtime accepts a name (``'alloy'``, ``'echo'``, ...);
2501
2934
  * ElevenLabs ConvAI accepts a voice ID.
2502
2935
  */
2503
- voice?: string;
2936
+ readonly voice?: string;
2504
2937
  /**
2505
2938
  * LLM / Realtime model. When ``engine`` is provided, its ``model`` is used
2506
2939
  * unless explicitly overridden here.
2507
2940
  */
2508
- model?: string;
2941
+ readonly model?: string;
2509
2942
  /**
2510
2943
  * BCP-47 language code (e.g. ``'en'``, ``'it'``). Forwarded to STT (in
2511
2944
  * pipeline mode) and to the engine adapter at call time. STTConfig has its
2512
2945
  * own ``language`` field for the rare case where STT must use a different
2513
2946
  * language than the rest of the pipeline.
2514
2947
  */
2515
- language?: string;
2516
- firstMessage?: string;
2948
+ readonly language?: string;
2949
+ readonly firstMessage?: string;
2517
2950
  /** Tool definitions — ``Tool`` class instances from ``getpatter``. */
2518
- tools?: Array<Tool>;
2951
+ readonly tools?: ReadonlyArray<Tool>;
2519
2952
  /**
2520
2953
  * Model Context Protocol (MCP) servers to plug into this agent. Each
2521
2954
  * server is queried at call start via ``tools/list`` and its tools
@@ -2536,14 +2969,23 @@ interface AgentOptions {
2536
2969
  * call start (~50-200 ms × N servers). Future iterations may cache
2537
2970
  * the discovered list process-wide.
2538
2971
  */
2539
- mcpServers?: ReadonlyArray<MCPServerConfig>;
2972
+ readonly mcpServers?: ReadonlyArray<MCPServerConfig>;
2973
+ /**
2974
+ * Optional back-office "consult" escalation. When set, Patter auto-injects a
2975
+ * ``consult_agent`` tool (Realtime + Pipeline modes) that the in-call agent
2976
+ * can invoke to reach the caller's own orchestrator over HTTP for deeper
2977
+ * reasoning / fresh info, then speak the reply. The orchestrator stays off
2978
+ * the per-turn path — consulted only on demand. ``undefined`` (default)
2979
+ * disables it. See {@link ConsultConfig}.
2980
+ */
2981
+ readonly consult?: ConsultConfig;
2540
2982
  /**
2541
2983
  * When ``true``, ship ``systemPrompt`` to the LLM verbatim. Default
2542
2984
  * (``false``) prepends a phone-friendly preamble that instructs the
2543
2985
  * model to avoid markdown, emojis, bullet lists, and verbose replies —
2544
2986
  * the conventions live phone calls require.
2545
2987
  */
2546
- disablePhonePreamble?: boolean;
2988
+ readonly disablePhonePreamble?: boolean;
2547
2989
  /**
2548
2990
  * Acoustic echo cancellation. When `true` (pipeline mode only) the SDK
2549
2991
  * instantiates an `NlmsEchoCanceller` that subtracts the agent's own
@@ -2555,53 +2997,53 @@ interface AgentOptions {
2555
2997
  * convergence period would briefly attenuate caller speech if they
2556
2998
  * spoke before any TTS played.
2557
2999
  */
2558
- echoCancellation?: boolean;
3000
+ readonly echoCancellation?: boolean;
2559
3001
  /**
2560
3002
  * Realtime / ConvAI engine instance. When present, the agent runs in the
2561
3003
  * matching mode (``openai_realtime`` or ``elevenlabs_convai``). When absent,
2562
3004
  * pipeline mode is selected if ``stt`` and ``tts`` are provided.
2563
3005
  */
2564
- engine?: Realtime | Realtime2 | ConvAI;
3006
+ readonly engine?: Realtime | Realtime2 | ConvAI;
2565
3007
  /**
2566
3008
  * Provider mode. Normally derived from ``engine`` / ``stt`` + ``tts``. Pass
2567
3009
  * ``'pipeline'`` explicitly when building a pipeline-mode agent without
2568
3010
  * an engine instance.
2569
3011
  */
2570
- provider?: 'openai_realtime' | 'elevenlabs_convai' | 'pipeline';
3012
+ readonly provider?: 'openai_realtime' | 'elevenlabs_convai' | 'pipeline';
2571
3013
  /** Pre-instantiated STT adapter (e.g. ``new DeepgramSTT({ apiKey })``). */
2572
- stt?: STTAdapter;
3014
+ readonly stt?: STTAdapter;
2573
3015
  /** Pre-instantiated TTS adapter (e.g. ``new ElevenLabsTTS({ apiKey })``). */
2574
- tts?: TTSAdapter;
3016
+ readonly tts?: TTSAdapter;
2575
3017
  /**
2576
3018
  * Pipeline-mode LLM provider (e.g. ``new AnthropicLLM()``). When set, the
2577
3019
  * built-in LLM loop uses this provider instead of the OpenAI default.
2578
3020
  * Mutually exclusive with ``onMessage`` passed to ``serve()``. Ignored
2579
3021
  * when ``engine`` is set (realtime mode bypasses the pipeline LLM).
2580
3022
  */
2581
- llm?: LLMProvider;
3023
+ readonly llm?: LLMProvider;
2582
3024
  /** Dynamic variables for ``{placeholder}`` substitution in systemPrompt at call time. */
2583
- variables?: Record<string, string>;
3025
+ readonly variables?: Readonly<Record<string, string>>;
2584
3026
  /** Output guardrails — ``Guardrail`` class instances from ``getpatter``. */
2585
- guardrails?: Array<Guardrail>;
3027
+ readonly guardrails?: ReadonlyArray<Guardrail>;
2586
3028
  /** Pipeline hooks — intercept and transform data at each pipeline stage (pipeline mode only). */
2587
- hooks?: PipelineHooks;
3029
+ readonly hooks?: PipelineHooks;
2588
3030
  /** Text transforms applied to LLM output before TTS (pipeline mode only).
2589
3031
  * Each function receives a string and returns the transformed string.
2590
3032
  * Applied in order before the ``beforeSynthesize`` hook. */
2591
- textTransforms?: Array<(text: string) => string>;
3033
+ readonly textTransforms?: ReadonlyArray<(text: string) => string>;
2592
3034
  /** Optional server-side VAD (e.g., Silero). Pipeline mode only. */
2593
- vad?: VADProvider;
3035
+ readonly vad?: VADProvider;
2594
3036
  /** Optional pre-STT audio filter (noise cancellation). Pipeline mode only. */
2595
- audioFilter?: AudioFilter;
3037
+ readonly audioFilter?: AudioFilter;
2596
3038
  /** Optional background audio mixer (hold music, thinking cues). Pipeline mode only. */
2597
- backgroundAudio?: BackgroundAudioPlayer$1;
3039
+ readonly backgroundAudio?: BackgroundAudioPlayer$1;
2598
3040
  /**
2599
3041
  * Minimum sustained voice (ms) before treating caller audio as a barge-in
2600
3042
  * and interrupting TTS. `0` disables barge-in entirely — useful on noisy
2601
3043
  * links (ngrok tunnels, speakerphone) where the agent can hear itself.
2602
3044
  * Default: 300.
2603
3045
  */
2604
- bargeInThresholdMs?: number;
3046
+ readonly bargeInThresholdMs?: number;
2605
3047
  /**
2606
3048
  * Opt-in barge-in confirmation strategies (pipeline mode). With the
2607
3049
  * default empty array the SDK falls back to the legacy
@@ -2618,14 +3060,14 @@ interface AgentOptions {
2618
3060
  * ``MinWordsStrategy`` for the protocol and a reference
2619
3061
  * implementation.
2620
3062
  */
2621
- bargeInStrategies?: readonly BargeInStrategy[];
3063
+ readonly bargeInStrategies?: readonly BargeInStrategy[];
2622
3064
  /**
2623
3065
  * Maximum time (ms) to wait for at least one strategy to confirm a
2624
3066
  * pending barge-in before discarding the pending state and resuming
2625
3067
  * TTS. Only consulted when ``bargeInStrategies`` is non-empty.
2626
3068
  * Default: 1500.
2627
3069
  */
2628
- bargeInConfirmMs?: number;
3070
+ readonly bargeInConfirmMs?: number;
2629
3071
  /**
2630
3072
  * When ``true`` (default), ``Patter.call`` warms up the STT, TTS, and
2631
3073
  * LLM provider connections in parallel with the carrier-side
@@ -2636,7 +3078,7 @@ interface AgentOptions {
2636
3078
  * of the WebSocket bridge. Best-effort: warmup failures are logged
2637
3079
  * at debug level and never abort the call. Default: ``true``.
2638
3080
  */
2639
- prewarm?: boolean;
3081
+ readonly prewarm?: boolean;
2640
3082
  /**
2641
3083
  * When ``true`` (default since 0.6.2 in pipeline mode), ``Patter.call``
2642
3084
  * pre-renders ``firstMessage`` to TTS audio bytes during the ringing
@@ -2655,7 +3097,7 @@ interface AgentOptions {
2655
3097
  * ``Patter.call`` refuses to spawn the prewarm task and emits a warn
2656
3098
  * when ``provider !== 'pipeline'``.
2657
3099
  */
2658
- prewarmFirstMessage?: boolean;
3100
+ readonly prewarmFirstMessage?: boolean;
2659
3101
  /**
2660
3102
  * When true, the sentence chunker emits the first clause of each response
2661
3103
  * on a soft punctuation boundary (",", em-dash, en-dash) once ~40 chars
@@ -2667,38 +3109,124 @@ interface AgentOptions {
2667
3109
  * See SentenceChunker constructor for the full guard list (decimal,
2668
3110
  * currency, balanced delimiter, ellipsis).
2669
3111
  */
2670
- aggressiveFirstFlush?: boolean;
3112
+ readonly aggressiveFirstFlush?: boolean;
3113
+ /**
3114
+ * Input noise reduction for speakerphone / conference audio (OpenAI
3115
+ * Realtime mode only). `undefined` (default) omits the field entirely
3116
+ * (no reduction — today's behavior).
3117
+ *
3118
+ * - `"far_field"` — recommended for phone / speakerphone calls where
3119
+ * the mic is more than ~30 cm from the speaker.
3120
+ * - `"near_field"` — for a handset held close to the mouth.
3121
+ *
3122
+ * v1 Realtime: emitted at the top level of `session.update` as
3123
+ * `input_audio_noise_reduction: { type }`. GA Realtime (gpt-realtime-2):
3124
+ * nested under `audio.input.input_audio_noise_reduction: { type }`.
3125
+ *
3126
+ * Mirrors Python `openai_realtime_noise_reduction` on `Patter.agent()` /
3127
+ * `Agent` and `noise_reduction` on `engines.openai.Realtime`.
3128
+ */
3129
+ readonly openaiRealtimeNoiseReduction?: 'near_field' | 'far_field';
3130
+ /**
3131
+ * Turn-detection tuning for OpenAI Realtime mode. `undefined` (default)
3132
+ * keeps the adapter's current hardcoded `server_vad` / threshold `0.5` /
3133
+ * silence 300 ms settings.
3134
+ *
3135
+ * Raise {@link RealtimeTurnDetection.threshold} (`server_vad`) or switch
3136
+ * to `semantic_vad` with `eagerness: 'low'` to stop speakerphone /
3137
+ * conference noise from triggering false barge-ins.
3138
+ *
3139
+ * Mirrors Python `realtime_turn_detection` on `Patter.agent()` / `Agent`
3140
+ * and `turn_detection` on `engines.openai.Realtime`.
3141
+ */
3142
+ readonly realtimeTurnDetection?: RealtimeTurnDetection;
3143
+ /**
3144
+ * Gate the OpenAI Realtime model's response on the Whisper input
3145
+ * transcript (legacy behavior). OpenAI Realtime mode only.
3146
+ *
3147
+ * - `false` / `undefined` (default) — the speech-to-speech model responds
3148
+ * as soon as the user stops speaking (`speech_stopped`), independently
3149
+ * of the Whisper transcription. The transcript becomes a pure
3150
+ * observability side-channel (dashboard / history / `onTranscript`) and
3151
+ * never gates, triggers, or cancels the response. Reclaims ~500 ms of
3152
+ * latency because the model no longer waits for Whisper.
3153
+ * - `true` — restores the prior behavior where the response is requested
3154
+ * only after the Whisper `transcript_input` event arrives. Production
3155
+ * flows should keep the default; this is for callers that depended on
3156
+ * the old transcript-gated ordering.
3157
+ *
3158
+ * Mirrors Python `realtime_gate_response_on_transcript` on `Patter.agent()`
3159
+ * / `Agent` and `gate_response_on_transcript` on `engines.openai.Realtime`.
3160
+ */
3161
+ readonly openaiRealtimeGateResponseOnTranscript?: boolean;
3162
+ /**
3163
+ * When set, Patter prepends a native "# Preambles" guidance block to the
3164
+ * OpenAI Realtime session `instructions` so the model speaks one short,
3165
+ * action-describing sentence ("I'll check that order now.") before a tool
3166
+ * call that may take a moment, in its own voice. Most effective on
3167
+ * `gpt-realtime-2`, where preambles are first-class.
3168
+ *
3169
+ * - `undefined` / `false` (default) — no change to the prompt; the
3170
+ * instructions stay byte-identical to prior releases.
3171
+ * - `true` — Patter prepends the built-in block.
3172
+ * - `string` — used verbatim as the full preamble block (override).
3173
+ *
3174
+ * Realtime modes only; pipeline mode has its own phone preamble (see
3175
+ * `disablePhonePreamble`). Mirrors Python `tool_call_preambles` on
3176
+ * `Patter.agent()` / `Agent`.
3177
+ */
3178
+ readonly toolCallPreambles?: boolean | string;
2671
3179
  }
2672
3180
  /** Pipeline-mode message handler — given full turn context, returns the agent's reply. */
2673
3181
  type PipelineMessageHandler = (data: Record<string, unknown>) => Promise<string>;
2674
3182
  /** Options for `Patter.serve({...})`. */
2675
3183
  interface ServeOptions {
2676
- agent: AgentOptions;
2677
- port?: number;
3184
+ readonly agent: AgentOptions;
3185
+ readonly port?: number;
2678
3186
  /** When true, start a cloudflared tunnel automatically (requires `cloudflared` npm package). */
2679
- tunnel?: boolean;
2680
- onCallStart?: (data: Record<string, unknown>) => Promise<void>;
2681
- onCallEnd?: (data: Record<string, unknown>) => Promise<void>;
2682
- onTranscript?: (data: Record<string, unknown>) => Promise<void>;
3187
+ readonly tunnel?: boolean;
3188
+ readonly onCallStart?: (data: Record<string, unknown>) => Promise<void>;
3189
+ readonly onCallEnd?: (data: Record<string, unknown>) => Promise<void>;
3190
+ readonly onTranscript?: (data: Record<string, unknown>) => Promise<void>;
2683
3191
  /** Pipeline mode only — called with the user's transcript; return value is spoken.
2684
3192
  * Can also be a URL string for remote webhook/WebSocket integration. */
2685
- onMessage?: PipelineMessageHandler | string;
3193
+ readonly onMessage?: PipelineMessageHandler | string;
2686
3194
  /** Called after each turn with per-turn metrics. */
2687
- onMetrics?: (data: Record<string, unknown>) => Promise<void>;
3195
+ readonly onMetrics?: (data: Record<string, unknown>) => Promise<void>;
2688
3196
  /** When true, record calls via the Twilio Recordings API. */
2689
- recording?: boolean;
3197
+ readonly recording?: boolean;
2690
3198
  /** If set, spoken as a voicemail message when AMD detects a machine. */
2691
- voicemailMessage?: string;
3199
+ readonly voicemailMessage?: string;
2692
3200
  /** Custom pricing overrides for cost calculation. */
2693
- pricing?: Record<string, Record<string, unknown>>;
3201
+ readonly pricing?: Readonly<Record<string, Record<string, unknown>>>;
2694
3202
  /** When true (default), serve a dashboard UI at /dashboard. */
2695
- dashboard?: boolean;
3203
+ readonly dashboard?: boolean;
2696
3204
  /** Bearer token for dashboard/API authentication. */
2697
- dashboardToken?: string;
3205
+ readonly dashboardToken?: string;
3206
+ /**
3207
+ * When true, serve the dashboard (and the call-data `/api/*` routes)
3208
+ * fully OPEN — WITHOUT authentication — even when the server is
3209
+ * reachable beyond loopback (e.g. behind a tunnel or a public webhook
3210
+ * URL). **NOT RECOMMENDED on a public network** — the dashboard exposes
3211
+ * call transcripts and metadata (PII) to anyone who can reach the URL.
3212
+ *
3213
+ * Defaults to `false` (security). With the default, when the dashboard
3214
+ * is enabled, `dashboardToken` is empty, AND the server is exposed
3215
+ * beyond `127.0.0.1`, the SDK auto-generates a one-time token and mounts
3216
+ * the dashboard behind it (the startup banner prints the ready-to-use
3217
+ * URL with `?token=...`). The dashboard is always available — it just
3218
+ * requires the printed or configured token. Loopback-only local dev is
3219
+ * unchanged: served open with no token.
3220
+ *
3221
+ * For a stable token instead of the per-process auto-generated one, set
3222
+ * `dashboardToken`. Set this flag only as the deliberate escape hatch
3223
+ * for the rare case where unauthenticated public exposure is intentional.
3224
+ */
3225
+ readonly allowInsecureDashboard?: boolean;
2698
3226
  /** Path to SQLite database for dashboard persistence (not used in TS yet). */
2699
- dashboardDb?: string;
3227
+ readonly dashboardDb?: string;
2700
3228
  /** When true (default), persist dashboard data. */
2701
- dashboardPersist?: boolean;
3229
+ readonly dashboardPersist?: boolean;
2702
3230
  /**
2703
3231
  * When true (default), `serve()` calls the carrier's API on startup to
2704
3232
  * point the configured phone number's webhook URL at this server. Set
@@ -2718,7 +3246,7 @@ interface ServeOptions {
2718
3246
  * hostname is dynamic and only known at runtime — the carrier MUST be
2719
3247
  * reconfigured for inbound calls to land.
2720
3248
  */
2721
- manageWebhook?: boolean;
3249
+ readonly manageWebhook?: boolean;
2722
3250
  }
2723
3251
  /**
2724
3252
  * Normalised AMD (answering-machine detection) result emitted to
@@ -2744,8 +3272,8 @@ interface MachineDetectionResult {
2744
3272
  }
2745
3273
  /** Options for `Patter.call({...})` to place an outbound call. */
2746
3274
  interface LocalCallOptions {
2747
- to: string;
2748
- agent: AgentOptions;
3275
+ readonly to: string;
3276
+ readonly agent: AgentOptions;
2749
3277
  /**
2750
3278
  * Enable answering-machine detection. **Defaults to ``true``** — the SDK
2751
3279
  * asks Twilio (``MachineDetection=DetectMessageEnd`` + Async AMD) or
@@ -2756,7 +3284,7 @@ interface LocalCallOptions {
2756
3284
  * disable when you want to skip per-call AMD billing or you already
2757
3285
  * know the destination is a human.
2758
3286
  */
2759
- machineDetection?: boolean;
3287
+ readonly machineDetection?: boolean;
2760
3288
  /**
2761
3289
  * Called once when the carrier finishes the AMD check. Fires for both
2762
3290
  * ``human`` and ``machine`` outcomes. Combine with ``voicemailMessage``
@@ -2764,11 +3292,11 @@ interface LocalCallOptions {
2764
3292
  * fires the callback after the drop is queued). Acceptance tests use
2765
3293
  * this to mark a run INVALID when ``classification !== 'human'``.
2766
3294
  */
2767
- onMachineDetection?: (result: MachineDetectionResult) => void | Promise<void>;
3295
+ readonly onMachineDetection?: (result: MachineDetectionResult) => void | Promise<void>;
2768
3296
  /** If set, spoken as a voicemail message when AMD detects a machine. Implicitly enables ``machineDetection``. */
2769
- voicemailMessage?: string;
3297
+ readonly voicemailMessage?: string;
2770
3298
  /** Dynamic variables merged into agent.variables before call. Override agent-level variables. */
2771
- variables?: Record<string, string>;
3299
+ readonly variables?: Readonly<Record<string, string>>;
2772
3300
  /**
2773
3301
  * Ring timeout in seconds. Forwarded to Twilio as `Timeout` and to Telnyx
2774
3302
  * as `timeout_secs`. Defaults to **25 s** — the production-recommended
@@ -2776,7 +3304,7 @@ interface LocalCallOptions {
2776
3304
  * parity, or `null` to omit the parameter entirely (carrier picks its
2777
3305
  * own default).
2778
3306
  */
2779
- ringTimeout?: number | null;
3307
+ readonly ringTimeout?: number | null;
2780
3308
  /**
2781
3309
  * When `true`, block until the call reaches a terminal state and resolve
2782
3310
  * to a {@link CallResult} (`outcome` ∈ answered / voicemail / no_answer /
@@ -2790,7 +3318,7 @@ interface LocalCallOptions {
2790
3318
  *
2791
3319
  * Mirrors Python's `Patter.call(..., wait=True)`.
2792
3320
  */
2793
- wait?: boolean;
3321
+ readonly wait?: boolean;
2794
3322
  }
2795
3323
  /**
2796
3324
  * Carrier-agnostic terminal outcomes for an outbound call. `answered` means a
@@ -3136,7 +3664,7 @@ interface ElevenLabsParkedWS {
3136
3664
  /** WebSocket-based ElevenLabs TTS adapter — opt-in low-latency variant. */
3137
3665
  declare class ElevenLabsWebSocketTTS implements TTSAdapter {
3138
3666
  static readonly providerKey = "elevenlabs_ws";
3139
- readonly apiKey: string;
3667
+ private readonly apiKey;
3140
3668
  readonly voiceId: string;
3141
3669
  readonly modelId: string;
3142
3670
  readonly voiceSettings?: Record<string, unknown>;
@@ -3692,6 +4220,86 @@ interface DefineToolInput {
3692
4220
  */
3693
4221
  declare function defineTool(input: DefineToolInput): ToolDefinition;
3694
4222
 
4223
+ /**
4224
+ * Built-in ``consult`` tool — lets the in-call agent escalate to the caller's
4225
+ * own back-office agent for deeper reasoning or fresh information, then speak
4226
+ * the answer.
4227
+ *
4228
+ * This is the *dispatch + consult* pattern: Patter conducts the call (STT +
4229
+ * LLM/voice + TTS + carrier); when the in-call agent hits something it cannot
4230
+ * answer directly, it invokes this tool, which reaches the configured
4231
+ * back-office agent and returns the reply for the agent to speak. The
4232
+ * back-office agent stays off the per-turn path — consulted only on demand, so
4233
+ * ordinary turns keep their low latency.
4234
+ *
4235
+ * Two targets are supported (see {@link ConsultConfig}):
4236
+ *
4237
+ * - ``url`` — the generic webhook path: POSTs ``{ request, call_id, caller,
4238
+ * callee }`` to your endpoint and reads a ``reply`` field back.
4239
+ * - ``openaiCompatible`` — speaks an OpenAI-compatible ``/chat/completions``
4240
+ * endpoint directly (e.g. an OpenClaw agent, or vLLM / Ollama / Groq) with no
4241
+ * hand-written adapter: POSTs ``{ model, messages, user }`` and speaks
4242
+ * ``choices[0].message.content``. Use {@link openclawConsult}.
4243
+ *
4244
+ * The handler does the HTTP call itself so the per-consult timeout and auth from
4245
+ * {@link ConsultConfig} are honoured. ``config.reassurance``, when set, is
4246
+ * attached so the agent speaks a filler while the consult runs (Realtime mode
4247
+ * only).
4248
+ */
4249
+
4250
+ /**
4251
+ * Build a {@link ConsultConfig} that consults a specific OpenClaw agent directly
4252
+ * (no hand-written adapter) — the TypeScript equivalent of Python's
4253
+ * ``ConsultConfig.openclaw(...)``.
4254
+ *
4255
+ * ``agent`` is the OpenClaw agent id (e.g. ``"receptionist"``) → targets
4256
+ * ``model="openclaw/<agent>"``. An already-namespaced target (``"openclaw/x"``,
4257
+ * ``"openclaw:x"``, ``"agent:x"``) is passed through. ``allowLoopback`` defaults
4258
+ * to ``true`` when ``baseUrl`` is loopback/private (the intended co-located
4259
+ * deployment). The gateway bearer is read from ``apiKey`` or the
4260
+ * ``OPENCLAW_API_KEY`` env var (operator-grade — never logged). Sized at the
4261
+ * phone-safe 30 s default; raise only for batch-style agents, never above 30 s
4262
+ * on a live call.
4263
+ */
4264
+ declare function openclawConsult(agent: string, opts?: {
4265
+ readonly baseUrl?: string;
4266
+ readonly apiKey?: string;
4267
+ readonly timeoutMs?: number;
4268
+ readonly toolName?: string;
4269
+ readonly description?: string;
4270
+ readonly reassurance?: string | Readonly<{
4271
+ message: string;
4272
+ afterMs?: number;
4273
+ }>;
4274
+ readonly headers?: Readonly<Record<string, string>>;
4275
+ readonly allowLoopback?: boolean;
4276
+ }): ConsultConfig;
4277
+ /**
4278
+ * Return an ``on_call_end`` callback that posts the finished call's record to a
4279
+ * specific OpenClaw agent, so the brain has the record and can follow up — the
4280
+ * TypeScript equivalent of Python's ``openclaw_post_call_notifier``.
4281
+ *
4282
+ * Wire it on ``serve``:
4283
+ *
4284
+ * await phone.serve({ agent, onCallEnd: openclawPostCallNotifier('receptionist') });
4285
+ *
4286
+ * The record is POSTed to the same OpenClaw agent over its OpenAI-compatible
4287
+ * ``/chat/completions`` gateway, keyed to the call id (the ``user`` field +
4288
+ * ``x-openclaw-session-key`` header) so it lands in the SAME OpenClaw session as
4289
+ * the in-call ``consult`` turns. Fire-and-forget: any error is logged by type
4290
+ * only (never the URL / headers / key) and never thrown into teardown. Args
4291
+ * mirror {@link openclawConsult}; the bearer is read from ``apiKey`` or
4292
+ * ``OPENCLAW_API_KEY`` (operator-grade — never logged).
4293
+ */
4294
+ declare function openclawPostCallNotifier(agent: string, opts?: {
4295
+ readonly baseUrl?: string;
4296
+ readonly apiKey?: string;
4297
+ readonly timeoutMs?: number;
4298
+ readonly allowLoopback?: boolean;
4299
+ readonly includeTranscript?: boolean;
4300
+ readonly instruction?: string;
4301
+ }): (data: Record<string, unknown>) => Promise<void>;
4302
+
3695
4303
  /**
3696
4304
  * Process-wide logger used by the SDK.
3697
4305
  *
@@ -3907,6 +4515,16 @@ declare class PatterError extends Error {
3907
4515
  code?: ErrorCode;
3908
4516
  });
3909
4517
  }
4518
+ /**
4519
+ * Invalid constructor arguments, a missing required environment variable, or a
4520
+ * frozen-config constraint violation. Parity with Python's
4521
+ * ``PatterConfigError`` in ``libraries/python/getpatter/exceptions.py``.
4522
+ */
4523
+ declare class PatterConfigError extends PatterError {
4524
+ constructor(message: string, options?: {
4525
+ code?: ErrorCode;
4526
+ });
4527
+ }
3910
4528
  /** Network / WebSocket / HTTP-level connectivity failure when talking to a provider. */
3911
4529
  declare class PatterConnectionError extends PatterError {
3912
4530
  constructor(message: string, options?: {
@@ -4154,9 +4772,9 @@ declare class FallbackLLMProvider implements LLMProvider {
4154
4772
  * markers are filtered out so callers can concatenate the yielded strings
4155
4773
  * directly.
4156
4774
  */
4157
- completeStream(messages: Array<Record<string, unknown>>, tools?: Array<Record<string, unknown>> | null): AsyncGenerator<string, void, unknown>;
4775
+ completeStream(messages: Array<Record<string, unknown>>, tools?: Array<Record<string, unknown>> | null, opts?: LLMStreamOptions): AsyncGenerator<string, void, unknown>;
4158
4776
  /** Streaming entry point — yields chunks from the first provider that succeeds. */
4159
- stream(messages: Array<Record<string, unknown>>, tools?: Array<Record<string, unknown>> | null): AsyncGenerator<LLMChunk, void, unknown>;
4777
+ stream(messages: Array<Record<string, unknown>>, tools?: Array<Record<string, unknown>> | null, opts?: LLMStreamOptions): AsyncGenerator<LLMChunk, void, unknown>;
4160
4778
  private tryProviders;
4161
4779
  private markUnavailable;
4162
4780
  private startRecovery;
@@ -4269,49 +4887,49 @@ interface PatterToolOptions {
4269
4887
  * Patter instance to dial through. Must be in local mode (have a `carrier`).
4270
4888
  * The tool boots `phone.serve()` on `start()`; do not call `serve()` yourself.
4271
4889
  */
4272
- phone: Patter;
4890
+ readonly phone: Patter;
4273
4891
  /**
4274
4892
  * Default agent config used for outbound calls. Per-call overrides come from
4275
4893
  * `execute({ goal, first_message })`.
4276
4894
  */
4277
- agent?: AgentOptions;
4895
+ readonly agent?: AgentOptions;
4278
4896
  /** Tool name shown to the LLM. Default `'make_phone_call'`. */
4279
- name?: string;
4897
+ readonly name?: string;
4280
4898
  /** Tool description for the LLM. Default tuned for English assistants. */
4281
- description?: string;
4899
+ readonly description?: string;
4282
4900
  /** Default per-call timeout in seconds. Default 180. */
4283
- maxDurationSec?: number;
4901
+ readonly maxDurationSec?: number;
4284
4902
  /**
4285
4903
  * Optional pass-through for `phone.serve()`'s `recording` flag — record all
4286
4904
  * outbound calls placed via this tool.
4287
4905
  */
4288
- recording?: boolean;
4906
+ readonly recording?: boolean;
4289
4907
  }
4290
4908
  /** Args accepted by `PatterTool.execute()` (and the OpenAI/Anthropic/Hermes tool schemas). */
4291
4909
  interface PatterToolExecuteArgs {
4292
- to: string;
4293
- goal?: string;
4294
- first_message?: string;
4295
- max_duration_sec?: number;
4910
+ readonly to: string;
4911
+ readonly goal?: string;
4912
+ readonly first_message?: string;
4913
+ readonly max_duration_sec?: number;
4296
4914
  }
4297
4915
  /** Result envelope returned by `PatterTool.execute()` once the underlying call ends. */
4298
4916
  interface PatterToolResult {
4299
- call_id: string;
4300
- status: string;
4301
- duration_seconds: number;
4917
+ readonly call_id: string;
4918
+ readonly status: string;
4919
+ readonly duration_seconds: number;
4302
4920
  /**
4303
4921
  * Carrier-agnostic outcome (answered / voicemail / no_answer / busy /
4304
4922
  * failed) lifted from the SDK {@link CallResult}. Optional for backward
4305
4923
  * compatibility with any code constructing this envelope without it.
4306
4924
  */
4307
- outcome?: string;
4308
- cost_usd?: number;
4309
- transcript: Array<{
4925
+ readonly outcome?: string;
4926
+ readonly cost_usd?: number;
4927
+ readonly transcript: ReadonlyArray<Readonly<{
4310
4928
  role: string;
4311
4929
  text: string;
4312
4930
  timestamp?: number;
4313
- }>;
4314
- metrics?: Record<string, unknown> | null;
4931
+ }>>;
4932
+ readonly metrics?: Readonly<Record<string, unknown>> | null;
4315
4933
  }
4316
4934
  /** Wraps a live `Patter` instance as a tool callable from external agent frameworks. */
4317
4935
  declare class PatterTool {
@@ -4322,6 +4940,11 @@ declare class PatterTool {
4322
4940
  private readonly maxDurationSec;
4323
4941
  private readonly recording;
4324
4942
  private started;
4943
+ /** Cached in-progress (or completed) start promise so concurrent execute()
4944
+ * callers all await the same boot sequence instead of each racing into
4945
+ * phone.serve(). Reset to null on failure so callers can retry after a
4946
+ * transient error. */
4947
+ private startPromise;
4325
4948
  constructor(opts: PatterToolOptions);
4326
4949
  /** OpenAI Chat Completions / Assistants tool spec. */
4327
4950
  openaiSchema(): {
@@ -4355,8 +4978,12 @@ declare class PatterTool {
4355
4978
  * `serve()` provides here. No `onCallEnd` callback is wired: the SDK's own
4356
4979
  * per-callId completion registry resolves the result, so the user's
4357
4980
  * `onCallEnd` slot is left free.
4981
+ *
4982
+ * Idempotent and concurrency-safe: concurrent callers all await the same
4983
+ * in-progress boot instead of each racing into `phone.serve()`.
4358
4984
  */
4359
4985
  start(): Promise<void>;
4986
+ private _doStart;
4360
4987
  /** Best-effort shutdown — tear the Patter server down via `disconnect()`. */
4361
4988
  stop(): Promise<void>;
4362
4989
  /**
@@ -4608,23 +5235,23 @@ interface Transcript$6 {
4608
5235
  type TranscriptCallback$6 = (transcript: Transcript$6) => void;
4609
5236
  /** Constructor options for {@link SonioxSTT}. */
4610
5237
  interface SonioxSTTOptions$1 {
4611
- model?: SonioxModel | string;
4612
- languageHints?: string[];
4613
- languageHintsStrict?: boolean;
4614
- sampleRate?: SonioxSampleRate | number;
4615
- numChannels?: number;
4616
- enableSpeakerDiarization?: boolean;
4617
- enableLanguageIdentification?: boolean;
4618
- maxEndpointDelayMs?: number;
4619
- clientReferenceId?: string;
4620
- baseUrl?: string;
5238
+ readonly model?: SonioxModel | string;
5239
+ readonly languageHints?: readonly string[];
5240
+ readonly languageHintsStrict?: boolean;
5241
+ readonly sampleRate?: SonioxSampleRate | number;
5242
+ readonly numChannels?: number;
5243
+ readonly enableSpeakerDiarization?: boolean;
5244
+ readonly enableLanguageIdentification?: boolean;
5245
+ readonly maxEndpointDelayMs?: number;
5246
+ readonly clientReferenceId?: string;
5247
+ readonly baseUrl?: string;
4621
5248
  }
4622
5249
  /** Streaming STT adapter for Soniox's real-time WebSocket API. */
4623
5250
  declare class SonioxSTT {
4624
5251
  /** Stable pricing/dashboard key — read by stream-handler/metrics. */
4625
5252
  static readonly providerKey = "soniox";
4626
5253
  private ws;
4627
- private callbacks;
5254
+ private readonly callbacks;
4628
5255
  private final;
4629
5256
  private keepaliveTimer;
4630
5257
  private readonly apiKey;
@@ -4649,8 +5276,10 @@ declare class SonioxSTT {
4649
5276
  private emit;
4650
5277
  /** Send a binary PCM16-LE audio chunk to Soniox for transcription. */
4651
5278
  sendAudio(audio: Buffer): void;
4652
- /** Register a transcript listener (max 10 concurrent listeners). */
5279
+ /** Register a transcript listener. */
4653
5280
  onTranscript(callback: TranscriptCallback$6): void;
5281
+ /** Unregister a previously registered transcript listener. */
5282
+ offTranscript(callback: TranscriptCallback$6): void;
4654
5283
  /** Send the empty-frame stream terminator and close the WebSocket. */
4655
5284
  close(): void;
4656
5285
  }
@@ -6022,8 +6651,8 @@ interface OpenAITTSOptions {
6022
6651
  speed?: number;
6023
6652
  /**
6024
6653
  * Enable anti-aliasing LPF ahead of the 3:2 decimation. Defaults to
6025
- * ``false`` for backwards-compatibility; set to ``true`` for cleaner
6026
- * audio on sibilants / fricatives.
6654
+ * ``true`` (matches the provider default); set to ``false`` to opt out
6655
+ * for bit-exact downsample-only output.
6027
6656
  */
6028
6657
  antiAlias?: boolean;
6029
6658
  }
@@ -6815,14 +7444,14 @@ declare const SUPPORTED_SAMPLE_RATES: readonly [8000, 16000];
6815
7444
  type SileroSampleRate = (typeof SUPPORTED_SAMPLE_RATES)[number];
6816
7445
  /** Options accepted by {@link SileroVAD.load}. */
6817
7446
  interface SileroVADOptions {
6818
- minSpeechDuration?: number;
6819
- minSilenceDuration?: number;
6820
- prefixPaddingDuration?: number;
6821
- activationThreshold?: number;
6822
- deactivationThreshold?: number;
6823
- sampleRate?: SileroSampleRate;
6824
- forceCpu?: boolean;
6825
- onnxFilePath?: string;
7447
+ readonly minSpeechDuration?: number;
7448
+ readonly minSilenceDuration?: number;
7449
+ readonly prefixPaddingDuration?: number;
7450
+ readonly activationThreshold?: number;
7451
+ readonly deactivationThreshold?: number;
7452
+ readonly sampleRate?: SileroSampleRate;
7453
+ readonly forceCpu?: boolean;
7454
+ readonly onnxFilePath?: string;
6826
7455
  }
6827
7456
  /**
6828
7457
  * Minimal structural type for the subset of `onnxruntime-node` we depend on.
@@ -6861,6 +7490,8 @@ declare class SileroVAD implements VADProvider {
6861
7490
  private speechThresholdDuration;
6862
7491
  private silenceThresholdDuration;
6863
7492
  private closed;
7493
+ /** Transitions produced in the current processFrame call but not yet returned. */
7494
+ private eventQueue;
6864
7495
  private constructor();
6865
7496
  /**
6866
7497
  * Load the Silero VAD model.
@@ -6945,9 +7576,9 @@ declare class SileroVAD implements VADProvider {
6945
7576
  interface DeepFilterNetOptions {
6946
7577
  /** Absolute path to a DeepFilterNet ONNX model. If omitted, the filter
6947
7578
  * logs a warning and becomes a pass-through. */
6948
- modelPath?: string;
7579
+ readonly modelPath?: string;
6949
7580
  /** When true, disable the pass-through warning (used by tests). */
6950
- silenceWarnings?: boolean;
7581
+ readonly silenceWarnings?: boolean;
6951
7582
  }
6952
7583
  /** OSS noise-suppression filter backed by a DeepFilterNet ONNX model. */
6953
7584
  declare class DeepFilterNetFilter implements AudioFilter {
@@ -7208,6 +7839,36 @@ declare class OpenAIRealtime2Adapter extends OpenAIRealtimeAdapter {
7208
7839
  * artefact and well below the GA VAD's 300 ms prefix-padding window.
7209
7840
  */
7210
7841
  private transcodeInboundMulaw8ToPcm24;
7842
+ /**
7843
+ * Log-only safety net for issue #154. The GA server echoes the *effective*
7844
+ * session config in `session.updated`; we request `audio/pcm` @ 24 kHz and
7845
+ * transcode PCM24→mulaw8 ourselves (see
7846
+ * `transcodeOutboundPcm24ToMulaw8Buffer`). If a future GA schema change ever
7847
+ * made the server return a different output format, that transcode — which
7848
+ * assumes PCM16-LE @ 24 kHz — would silently corrupt audio, exactly the
7849
+ * v1-beta failure mode #154 fixed. Warn so the drift surfaces in logs instead
7850
+ * of as static. Never gates audio.
7851
+ */
7852
+ private warnIfOutputFormatUnexpected;
7853
+ /**
7854
+ * Shared audio-delta translation helper. Transcodes a GA
7855
+ * `response.output_audio.delta` payload (base64 PCM-16-LE 24 kHz)
7856
+ * into mulaw 8 kHz and splits the result into 160-byte (20 ms) frames,
7857
+ * dispatching one synthetic `response.audio.delta` event per frame.
7858
+ *
7859
+ * Called from BOTH the `connect()` shim and the `adoptWebSocket()` shim
7860
+ * so that warm-path (prewarm/adopted) calls receive identical transcoding
7861
+ * to cold-path calls. Without this, adopted sockets forwarded raw PCM-24
7862
+ * to Twilio/Telnyx, producing garbled or silent audio on every warm call.
7863
+ *
7864
+ * @param parsed - The parsed GA event object (type already checked to be
7865
+ * `response.output_audio.delta` with a string `delta`).
7866
+ * @param handler - The downstream message listener to dispatch each frame to.
7867
+ * @param rest - Extra arguments forwarded from the original `message` event.
7868
+ * @returns `true` if frames were dispatched (caller should return early),
7869
+ * `false` if the resampler is still warming up (zero output bytes).
7870
+ */
7871
+ private translateGaAudioDelta;
7211
7872
  /**
7212
7873
  * Base64 PCM-16-LE 24 kHz → Base64 mulaw 8 kHz. Used by the WS
7213
7874
  * translation shim on each `response.output_audio.delta`. The stateful
@@ -7217,6 +7878,23 @@ declare class OpenAIRealtime2Adapter extends OpenAIRealtimeAdapter {
7217
7878
  */
7218
7879
  private transcodeOutboundPcm24ToMulaw8Buffer;
7219
7880
  sendFirstMessage(text: string): Promise<void>;
7881
+ /**
7882
+ * Speak a short reassurance filler WITHOUT injecting a `role:user` turn.
7883
+ *
7884
+ * GA-shape sibling of {@link sendFirstMessage} (and override of the base v1
7885
+ * {@link OpenAIRealtimeAdapter.sendReassurance}): a bare `response.create`
7886
+ * carrying explicit `instructions` so the filler is the assistant's own
7887
+ * in-band audio. No `conversation.item.create` with `role:"user"` is
7888
+ * emitted, so the transcript shows no phantom caller line. The GA endpoint
7889
+ * rejects `response.modalities` and does not inherit `audio.output.voice`
7890
+ * for an explicit `response.create`, so — exactly as in
7891
+ * {@link sendFirstMessage} — we send `output_modalities` and re-inject the
7892
+ * voice. Fillers must not imply success or failure.
7893
+ *
7894
+ * Mirrors Python `OpenAIRealtime2Adapter.send_reassurance` in
7895
+ * `providers/openai_realtime_2.py`.
7896
+ */
7897
+ sendReassurance(text: string): Promise<void>;
7220
7898
  }
7221
7899
 
7222
7900
  /**
@@ -7541,7 +8219,7 @@ declare class ChatContext {
7541
8219
  */
7542
8220
 
7543
8221
  /** Valid DTMF tone values (keypad characters). */
7544
- declare const DTMF_EVENTS: readonly ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "*", "#", "A", "B", "C", "D"];
8222
+ declare const DTMF_EVENTS: readonly ["1", "2", "3", "4", "5", "6", "7", "8", "9", "0", "*", "#", "A", "B", "C", "D"];
7545
8223
  /** Single DTMF tone value (a member of `DTMF_EVENTS`). */
7546
8224
  type DtmfEvent = (typeof DTMF_EVENTS)[number];
7547
8225
  /** Join DTMF events into a space-separated debug string. */
@@ -8030,8 +8708,10 @@ declare class TelnyxSTT {
8030
8708
  connect(): Promise<void>;
8031
8709
  /** Send a binary PCM16 audio chunk; emits the WAV header on the first call. */
8032
8710
  sendAudio(audio: Buffer): void;
8033
- /** Register a transcript listener (max 10 concurrent listeners). */
8711
+ /** Register a transcript listener. */
8034
8712
  onTranscript(callback: TranscriptCallback): void;
8713
+ /** Unregister a previously-registered transcript listener. */
8714
+ offTranscript(callback: TranscriptCallback): void;
8035
8715
  /** Close the streaming WebSocket. */
8036
8716
  close(): void;
8037
8717
  }
@@ -8149,4 +8829,4 @@ interface CallEvent {
8149
8829
  readonly direction?: string;
8150
8830
  }
8151
8831
 
8152
- export { type AgentOptions, type AgentState, AllProvidersFailedError, type AnthropicConversion, LLM$3 as AnthropicLLM, type AnthropicLLMOptions, type AnthropicMessage, AssemblyAIEncoding, AssemblyAIModel, STT$1 as AssemblyAISTT, type AssemblyAISTTOptions, type AudioConfig, type AudioSource, AuthenticationError, type BackgroundAudioOptions, BackgroundAudioPlayer, type EvaluateContext as BargeInEvaluateContext, type BargeInStrategy, BuiltinAudioClip, type BuiltinAudioClipName, type BuiltinPcmSource, type CallControl, type CallEvent, type CallEventHandler, type CallMetrics, CallMetricsAccumulator, type CallOutcome, type CallRecord, type CallResult, type CarrierKind, type CartesiaEncoding, STT$3 as CartesiaSTT, type CartesiaSTTOptions, TTS$3 as CartesiaTTS, CartesiaTTSModel, type CartesiaTTSOptions, CartesiaTTSVoiceMode, LLM$1 as CerebrasLLM, type CerebrasLLMOptions, ChatContext, type ChatMessage, type ChatRole, CloudflareTunnel, type ConversationStateSnapshot, type CostBreakdown, DEFAULT_MIN_SENTENCE_LEN, DEFAULT_PRICING, DTMF_EVENTS, DeepFilterNetFilter, type DeepFilterNetOptions, DeepgramModel, STT$6 as DeepgramSTT, type DeepgramSTTOptions, DefaultToolExecutor, type DefaultToolExecutorOptions, type DefineToolInput, type DtmfEvent, ConvAI as ElevenLabsConvAI, ElevenLabsConvAIAdapter, type ConvAIOptions as ElevenLabsConvAIOptions, ElevenLabsModel, ElevenLabsOutputFormat, ElevenLabsTTS as ElevenLabsRestTTS, TTS$6 as ElevenLabsTTS, type ElevenLabsTTSOptions, type ElevenLabsWebSocketOptions, TTS$5 as ElevenLabsWebSocketTTS, type EouTrigger, ErrorCode, EventBus, FallbackLLMProvider, type FallbackLLMProviderOptions, type FilePcmSource, GEMINI_DEFAULT_INPUT_SR, GEMINI_DEFAULT_OUTPUT_SR, GeminiLiveAdapter, type GeminiLiveEventHandler, LLM as GoogleLLM, type GoogleLLMOptions, LLM$2 as GroqLLM, type GroqLLMOptions, Guardrail$1 as Guardrail, type GuardrailOptions, type HookContext, IVRActivity, type IVRActivityOptions, type IVRToolDefinition, type IncomingMessage, type InitTracingOptions, TTS as InworldTTS, type InworldTTSOptions, type JobCallback, KrispFrameDuration, KrispSampleRate, KrispVivaFilter, type KrispVivaFilterOptions, type LLMChunk, LLMLoop, type LLMProvider, LMNTAudioFormat, LMNTModel, LMNTSampleRate, TTS$1 as LMNTTTS, type LMNTTTSOptions, type LatencyBreakdown, type LocalCallOptions, type LocalConfig, type LocalOptions, type Logger, type LoopCallback, type MessageHandler, MetricsStore, MinWordsStrategy, type MinWordsStrategyOptions, type ModelPricing, Ngrok, LLM$4 as OpenAILLM, type OpenAILLMOptions, OpenAILLMProvider, type OpenAIMessage, Realtime as OpenAIRealtime, Realtime2 as OpenAIRealtime2, OpenAIRealtime2Adapter, type Realtime2Options as OpenAIRealtime2Options, OpenAIRealtimeAdapter, OpenAIRealtimeAudioFormat, OpenAIRealtimeModel, type RealtimeOptions as OpenAIRealtimeOptions, OpenAIRealtimeVADType, TTS$4 as OpenAITTS, type OpenAITTSOptions, STT$4 as OpenAITranscribeSTT, type OpenAITranscribeSTTOptions, OpenAITranscriptionModel, OpenAIVoice, PRICING_LAST_UPDATED, PRICING_VERSION, type ParamSpec, PartialStreamError, Patter, PatterConnectionError, PatterError, type PatterEventType, PatterTool, type PatterToolExecuteArgs, type PatterToolOptions, type PatterToolResult, PcmCarry, PipelineHookExecutor, type PipelineHooks, type PipelineMessageHandler, Carrier as Plivo, PlivoAdapter, type PlivoCarrierOptions, type InitiateCallOptions as PlivoInitiateCallOptions, type InitiateCallResult as PlivoInitiateCallResult, PricingUnit, type PricingUnitValue, type ProviderPricing, ProvisionError, RateLimitError, type RawPcmSource, type RealtimeConfig, RemoteMessageHandler, RimeAudioFormat, RimeModel, TTS$2 as RimeTTS, type RimeTTSOptions, SPAN_BARGEIN, SPAN_CALL, SPAN_ENDPOINT, SPAN_LLM, SPAN_STT, SPAN_TOOL, SPAN_TTS, type SSEEvent, type STTConfig, type ScheduleHandle, SentenceChunker, type ServeOptions, type SilenceCallback, type SileroSampleRate, SileroVAD, type SileroVADOptions, STT$2 as SonioxSTT, type SonioxSTTOptions$1 as SonioxSTTOptions, type Span, type SpeechEventCallback, SpeechEvents, SpeechmaticsAudioEncoding, SpeechmaticsOperatingPoint, STT as SpeechmaticsSTT, type SpeechmaticsSTTOptions, SpeechmaticsSampleRate, SpeechmaticsServerMessage, TurnDetectionMode as SpeechmaticsTurnDetectionMode, StatefulResampler, type StatefulResamplerOptions, Static as StaticTunnel, type TTSConfig, Carrier$1 as Telnyx, TelnyxAdapter, type TelnyxCarrierOptions, type ConfigureNumberOptions as TelnyxConfigureNumberOptions, type EndCallOptions as TelnyxEndCallOptions, type InitiateCallOptions$1 as TelnyxInitiateCallOptions, type InitiateCallResult$1 as TelnyxInitiateCallResult, type ProvisionNumberOptions as TelnyxProvisionNumberOptions, type ProvisionNumberResult as TelnyxProvisionNumberResult, TelnyxSTT, TelnyxSTTInputFormat, TelnyxSTTSampleRate, type Transcript as TelnyxSTTTranscript, TelnyxTTS, TelnyxTTSSampleRate, TelnyxTTSVoice, type TelnyxTranscriptionEngine, TestSession, TfidfLoopDetector, type TfidfLoopDetectorOptions, Tool, type ToolDefinition, type ToolExecutor, type ToolHandler, type ToolOptions, type TunnelHandle, type TurnMetrics, Carrier$2 as Twilio, TwilioAdapter, type TwilioAdapterOptions, type TwilioCarrierOptions, type ConfigureNumberOptions$1 as TwilioConfigureNumberOptions, type InitiateCallOptions$2 as TwilioInitiateCallOptions, type InitiateCallResult$2 as TwilioInitiateCallResult, type ProvisionNumberOptions$1 as TwilioProvisionNumberOptions, type ProvisionNumberResult$1 as TwilioProvisionNumberResult, ULTRAVOX_DEFAULT_API_BASE, ULTRAVOX_DEFAULT_SR, type UltravoxEventHandler, UltravoxRealtimeAdapter, type UserState, STT$5 as WhisperSTT, type WhisperSTTOptions, assemblyai, builtinClipPath, calculateRealtimeCost, calculateSttCost, calculateTelephonyCost, calculateTtsCost, callsToCsv, callsToJson, cartesia, createResampler16kTo8k, createResampler24kTo16k, createResampler24kTo8k, createResampler8kTo16k, deepgram, defineTool, elevenlabs, evaluateStrategies as evaluateBargeInStrategies, filterEmoji, filterForTTS, filterMarkdown, formatDtmf, geminiLive, getLogger, guardrail, initTracing, isRemoteUrl, isTracingEnabled, isWebSocketUrl, lmnt, makeAuthMiddleware, mergePricing, mixPcm, mountApi, mountDashboard, mulawToPcm16, notifyDashboard, openaiTts, pcm16ToMulaw, resample16kTo8k, resample24kTo16k, resample8kTo16k, resamplePcm, resetStrategies as resetBargeInStrategies, rime, scheduleCron, scheduleInterval, scheduleOnce, selectSoundFromList, setLogger, soniox, speechmatics, startSpan, startTunnel, tool, ultravox, whisper };
8832
+ export { type AgentOptions, type AgentState, AllProvidersFailedError, type AnthropicConversion, LLM$3 as AnthropicLLM, type AnthropicLLMOptions, type AnthropicMessage, AssemblyAIEncoding, AssemblyAIModel, STT$1 as AssemblyAISTT, type AssemblyAISTTOptions, type AudioConfig, type AudioSource, AuthenticationError, type BackgroundAudioOptions, BackgroundAudioPlayer, type EvaluateContext as BargeInEvaluateContext, type BargeInStrategy, BuiltinAudioClip, type BuiltinAudioClipName, type BuiltinPcmSource, type CallControl, type CallEvent, type CallEventHandler, type CallMetrics, CallMetricsAccumulator, type CallOutcome, type CallRecord, type CallResult, type CarrierKind, type CartesiaEncoding, STT$3 as CartesiaSTT, type CartesiaSTTOptions, TTS$3 as CartesiaTTS, CartesiaTTSModel, type CartesiaTTSOptions, CartesiaTTSVoiceMode, LLM$1 as CerebrasLLM, type CerebrasLLMOptions, ChatContext, type ChatMessage, type ChatRole, CloudflareTunnel, type ConsultConfig, type ConversationStateSnapshot, type CostBreakdown, DEFAULT_MIN_SENTENCE_LEN, DEFAULT_PRICING, DTMF_EVENTS, DeepFilterNetFilter, type DeepFilterNetOptions, DeepgramModel, STT$6 as DeepgramSTT, type DeepgramSTTOptions, DefaultToolExecutor, type DefaultToolExecutorOptions, type DefineToolInput, type DtmfEvent, ConvAI as ElevenLabsConvAI, ElevenLabsConvAIAdapter, type ConvAIOptions as ElevenLabsConvAIOptions, ElevenLabsModel, ElevenLabsOutputFormat, ElevenLabsTTS as ElevenLabsRestTTS, TTS$6 as ElevenLabsTTS, type ElevenLabsTTSOptions, type ElevenLabsWebSocketOptions, TTS$5 as ElevenLabsWebSocketTTS, type EouTrigger, ErrorCode, EventBus, FallbackLLMProvider, type FallbackLLMProviderOptions, type FilePcmSource, GEMINI_DEFAULT_INPUT_SR, GEMINI_DEFAULT_OUTPUT_SR, GeminiLiveAdapter, type GeminiLiveEventHandler, LLM as GoogleLLM, type GoogleLLMOptions, LLM$2 as GroqLLM, type GroqLLMOptions, Guardrail$1 as Guardrail, type GuardrailOptions, type HookContext, IVRActivity, type IVRActivityOptions, type IVRToolDefinition, type IncomingMessage, type InitTracingOptions, TTS as InworldTTS, type InworldTTSOptions, type JobCallback, KrispFrameDuration, KrispSampleRate, KrispVivaFilter, type KrispVivaFilterOptions, type LLMChunk, LLMLoop, type LLMProvider, LMNTAudioFormat, LMNTModel, LMNTSampleRate, TTS$1 as LMNTTTS, type LMNTTTSOptions, type LatencyBreakdown, type LocalCallOptions, type LocalConfig, type LocalOptions, type Logger, type LoopCallback, type MessageHandler, MetricsStore, MinWordsStrategy, type MinWordsStrategyOptions, type ModelPricing, Ngrok, type OpenAICompatibleConsult, LLM$4 as OpenAILLM, type OpenAILLMOptions, OpenAILLMProvider, type OpenAIMessage, Realtime as OpenAIRealtime, Realtime2 as OpenAIRealtime2, OpenAIRealtime2Adapter, type Realtime2Options as OpenAIRealtime2Options, OpenAIRealtimeAdapter, OpenAIRealtimeAudioFormat, OpenAIRealtimeModel, type RealtimeOptions as OpenAIRealtimeOptions, OpenAIRealtimeVADType, TTS$4 as OpenAITTS, type OpenAITTSOptions, STT$4 as OpenAITranscribeSTT, type OpenAITranscribeSTTOptions, OpenAITranscriptionModel, OpenAIVoice, PRICING_LAST_UPDATED, PRICING_VERSION, type ParamSpec, PartialStreamError, Patter, PatterConfigError, PatterConnectionError, PatterError, type PatterEventType, PatterTool, type PatterToolExecuteArgs, type PatterToolOptions, type PatterToolResult, PcmCarry, PipelineHookExecutor, type PipelineHooks, type PipelineMessageHandler, Carrier as Plivo, PlivoAdapter, type PlivoCarrierOptions, type InitiateCallOptions as PlivoInitiateCallOptions, type InitiateCallResult as PlivoInitiateCallResult, PricingUnit, type PricingUnitValue, type ProviderPricing, ProvisionError, RateLimitError, type RawPcmSource, type RealtimeConfig, type RealtimeTurnDetection, RemoteMessageHandler, RimeAudioFormat, RimeModel, TTS$2 as RimeTTS, type RimeTTSOptions, SPAN_BARGEIN, SPAN_CALL, SPAN_ENDPOINT, SPAN_LLM, SPAN_STT, SPAN_TOOL, SPAN_TTS, type SSEEvent, type STTConfig, type ScheduleHandle, SentenceChunker, type ServeOptions, type SilenceCallback, type SileroSampleRate, SileroVAD, type SileroVADOptions, STT$2 as SonioxSTT, type SonioxSTTOptions$1 as SonioxSTTOptions, type Span, type SpeechEventCallback, SpeechEvents, SpeechmaticsAudioEncoding, SpeechmaticsOperatingPoint, STT as SpeechmaticsSTT, type SpeechmaticsSTTOptions, SpeechmaticsSampleRate, SpeechmaticsServerMessage, TurnDetectionMode as SpeechmaticsTurnDetectionMode, StatefulResampler, type StatefulResamplerOptions, Static as StaticTunnel, type TTSConfig, Carrier$1 as Telnyx, TelnyxAdapter, type TelnyxCarrierOptions, type ConfigureNumberOptions as TelnyxConfigureNumberOptions, type EndCallOptions as TelnyxEndCallOptions, type InitiateCallOptions$1 as TelnyxInitiateCallOptions, type InitiateCallResult$1 as TelnyxInitiateCallResult, type ProvisionNumberOptions as TelnyxProvisionNumberOptions, type ProvisionNumberResult as TelnyxProvisionNumberResult, TelnyxSTT, TelnyxSTTInputFormat, TelnyxSTTSampleRate, type Transcript as TelnyxSTTTranscript, TelnyxTTS, TelnyxTTSSampleRate, TelnyxTTSVoice, type TelnyxTranscriptionEngine, TestSession, TfidfLoopDetector, type TfidfLoopDetectorOptions, Tool, type ToolDefinition, type ToolExecutor, type ToolHandler, type ToolOptions, type TunnelHandle, type TurnMetrics, Carrier$2 as Twilio, TwilioAdapter, type TwilioAdapterOptions, type TwilioCarrierOptions, type ConfigureNumberOptions$1 as TwilioConfigureNumberOptions, type InitiateCallOptions$2 as TwilioInitiateCallOptions, type InitiateCallResult$2 as TwilioInitiateCallResult, type ProvisionNumberOptions$1 as TwilioProvisionNumberOptions, type ProvisionNumberResult$1 as TwilioProvisionNumberResult, ULTRAVOX_DEFAULT_API_BASE, ULTRAVOX_DEFAULT_SR, type UltravoxEventHandler, UltravoxRealtimeAdapter, type UserState, STT$5 as WhisperSTT, type WhisperSTTOptions, assemblyai, builtinClipPath, calculateRealtimeCost, calculateSttCost, calculateTelephonyCost, calculateTtsCost, callsToCsv, callsToJson, cartesia, createResampler16kTo8k, createResampler24kTo16k, createResampler24kTo8k, createResampler8kTo16k, deepgram, defineTool, elevenlabs, evaluateStrategies as evaluateBargeInStrategies, filterEmoji, filterForTTS, filterMarkdown, formatDtmf, geminiLive, getLogger, guardrail, initTracing, isRemoteUrl, isTracingEnabled, isWebSocketUrl, lmnt, makeAuthMiddleware, mergePricing, mixPcm, mountApi, mountDashboard, mulawToPcm16, notifyDashboard, openaiTts, openclawConsult, openclawPostCallNotifier, pcm16ToMulaw, resample16kTo8k, resample24kTo16k, resample8kTo16k, resamplePcm, resetStrategies as resetBargeInStrategies, rime, scheduleCron, scheduleInterval, scheduleOnce, selectSoundFromList, setLogger, soniox, speechmatics, startSpan, startTunnel, tool, ultravox, whisper };