getpatter 0.6.3 → 0.6.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +5 -4
- package/dist/{carrier-config-3WDQXP5J.mjs → carrier-config-7YGNRBPO.mjs} +17 -11
- package/dist/{chunk-R2T4JABZ.mjs → chunk-3VVATR6A.mjs} +8 -6
- package/dist/{chunk-Z6W5XFWS.mjs → chunk-7IIV3BY4.mjs} +981 -196
- package/dist/{chunk-CL2U3YET.mjs → chunk-BO227NTF.mjs} +271 -54
- package/dist/cli.js +63 -20
- package/dist/dashboard/ui.html +10 -10
- package/dist/index.d.mts +867 -187
- package/dist/index.d.ts +867 -187
- package/dist/index.js +1785 -517
- package/dist/index.mjs +501 -250
- package/dist/{openai-realtime-2-CNFARP25.mjs → openai-realtime-2-L5EKAAUH.mjs} +1 -1
- package/dist/{silero-vad-LNDFGIY7.mjs → silero-vad-RGF5HCIR.mjs} +1 -1
- package/dist/{test-mode-MDBQ4ECE.mjs → test-mode-4QLLWYVV.mjs} +2 -2
- package/package.json +2 -1
- package/src/dashboard/ui.html +10 -10
package/dist/index.d.ts
CHANGED
|
@@ -189,8 +189,17 @@ declare class SpeechEvents {
|
|
|
189
189
|
*
|
|
190
190
|
* Wraps `wss://api.openai.com/v1/realtime` and exposes the unified
|
|
191
191
|
* Patter realtime contract (`connect / sendAudio / onEvent / close`) on
|
|
192
|
-
* {@link OpenAIRealtimeAdapter}.
|
|
193
|
-
*
|
|
192
|
+
* {@link OpenAIRealtimeAdapter}.
|
|
193
|
+
*
|
|
194
|
+
* NOTE (issue #154): this class is no longer instantiated directly for the
|
|
195
|
+
* telephony bridge. OpenAI deprecated the Beta Realtime API, so its flat
|
|
196
|
+
* `output_audio_format: g711_ulaw` session shape is ignored by GA models —
|
|
197
|
+
* the server falls back to PCM16 @ 24 kHz, which this adapter would forward to
|
|
198
|
+
* Twilio framed as 8 kHz mulaw (static + broken STT). `buildAIAdapter` in
|
|
199
|
+
* `server.ts` now routes BOTH the `OpenAIRealtime` and `OpenAIRealtime2`
|
|
200
|
+
* engines through {@link OpenAIRealtime2Adapter} (GA session shape + internal
|
|
201
|
+
* PCM24→mulaw8 transcode). This class is retained as the shared base class
|
|
202
|
+
* that `OpenAIRealtime2Adapter` extends.
|
|
194
203
|
*/
|
|
195
204
|
|
|
196
205
|
/**
|
|
@@ -292,6 +301,46 @@ interface OpenAIRealtimeOptions {
|
|
|
292
301
|
* Has no effect on models that don't support the `reasoning` field.
|
|
293
302
|
*/
|
|
294
303
|
reasoningEffort?: 'minimal' | 'low' | 'medium' | 'high';
|
|
304
|
+
/**
|
|
305
|
+
* Input noise reduction for speakerphone / conference audio. `undefined`
|
|
306
|
+
* (default) omits the field entirely (no reduction — today's behavior).
|
|
307
|
+
* `"far_field"` is recommended for phone / speakerphone calls;
|
|
308
|
+
* `"near_field"` for a handset close to the mouth.
|
|
309
|
+
*
|
|
310
|
+
* v1 wire shape: emitted at the top level of `session.update` as
|
|
311
|
+
* `input_audio_noise_reduction: { type }`. The GA adapter
|
|
312
|
+
* (`OpenAIRealtime2Adapter`) nests it under `audio.input` instead.
|
|
313
|
+
*
|
|
314
|
+
* Mirrors Python `noise_reduction` on `OpenAIRealtimeAdapter`.
|
|
315
|
+
*/
|
|
316
|
+
noiseReduction?: 'near_field' | 'far_field';
|
|
317
|
+
/**
|
|
318
|
+
* Turn-detection tuning. `undefined` (default) keeps the adapter's current
|
|
319
|
+
* hardcoded `server_vad` / threshold `0.5` / silence 300 ms settings.
|
|
320
|
+
* Raise `threshold` or switch to `semantic_vad` with `eagerness: 'low'` to
|
|
321
|
+
* stop speakerphone / conference noise from triggering false barge-ins.
|
|
322
|
+
*
|
|
323
|
+
* Mirrors Python `turn_detection` on `OpenAIRealtimeAdapter` and
|
|
324
|
+
* `turn_detection` on the engine marker `engines.openai.Realtime`.
|
|
325
|
+
*/
|
|
326
|
+
turnDetection?: RealtimeTurnDetection;
|
|
327
|
+
/**
|
|
328
|
+
* Gate the model's response on the Whisper transcript (legacy behavior).
|
|
329
|
+
*
|
|
330
|
+
* `false` (default) — the stream handler requests the response on
|
|
331
|
+
* `speech_stopped`, independently of the Whisper `transcript_input` event.
|
|
332
|
+
* The transcript is display-only (dashboard / history / `onTranscript`).
|
|
333
|
+
* `true` — the stream handler requests the response only after the
|
|
334
|
+
* `transcript_input` event passes the hallucination filter (prior
|
|
335
|
+
* behavior).
|
|
336
|
+
*
|
|
337
|
+
* The adapter itself does not act on this flag — it is read by the stream
|
|
338
|
+
* handler via {@link OpenAIRealtimeAdapter.getGateResponseOnTranscript} to
|
|
339
|
+
* decide WHEN to call {@link OpenAIRealtimeAdapter.requestResponse}.
|
|
340
|
+
*
|
|
341
|
+
* Mirrors Python `gate_response_on_transcript` on `OpenAIRealtimeAdapter`.
|
|
342
|
+
*/
|
|
343
|
+
gateResponseOnTranscript?: boolean;
|
|
295
344
|
}
|
|
296
345
|
/** Realtime WebSocket adapter for OpenAI's `gpt-realtime` family. */
|
|
297
346
|
declare class OpenAIRealtimeAdapter {
|
|
@@ -314,12 +363,22 @@ declare class OpenAIRealtimeAdapter {
|
|
|
314
363
|
private currentResponseAudioMs;
|
|
315
364
|
private currentResponseFirstAudioAt;
|
|
316
365
|
protected readonly options: OpenAIRealtimeOptions;
|
|
366
|
+
private readonly gateResponseOnTranscript;
|
|
317
367
|
constructor(apiKey: string, model?: string, voice?: string, instructions?: string, tools?: Array<{
|
|
318
368
|
name: string;
|
|
319
369
|
description: string;
|
|
320
370
|
parameters: Record<string, unknown>;
|
|
321
371
|
strict?: boolean;
|
|
322
372
|
}> | undefined, audioFormat?: OpenAIRealtimeAudioFormat, options?: OpenAIRealtimeOptions);
|
|
373
|
+
/**
|
|
374
|
+
* Whether the stream handler should gate the model response on the Whisper
|
|
375
|
+
* transcript (legacy) or fire it on `speech_stopped` (default, decoupled).
|
|
376
|
+
*
|
|
377
|
+
* `false` (default) — the response is requested on `speech_stopped`,
|
|
378
|
+
* independently of Whisper. `true` — the response is requested only after
|
|
379
|
+
* `transcript_input` passes the hallucination filter.
|
|
380
|
+
*/
|
|
381
|
+
getGateResponseOnTranscript(): boolean;
|
|
323
382
|
/**
|
|
324
383
|
* Build the production session.update body. Mirrors the body sent
|
|
325
384
|
* inside `connect()` so warmup can apply identical configuration to
|
|
@@ -399,18 +458,45 @@ declare class OpenAIRealtimeAdapter {
|
|
|
399
458
|
/** Remove a previously registered {@link onEvent} callback. */
|
|
400
459
|
offEvent(callback: RealtimeEventCallback): void;
|
|
401
460
|
protected ensureMessageListener(): void;
|
|
402
|
-
/** Truncate the in-flight assistant turn
|
|
461
|
+
/** Truncate the in-flight assistant turn's playback offset on the server.
|
|
462
|
+
*
|
|
463
|
+
* Sends ONLY ``conversation.item.truncate`` — no ``response.cancel``. This
|
|
464
|
+
* is the half of barge-in handling that a WebSocket transport MUST always
|
|
465
|
+
* perform: per OpenAI's docs, the GA server auto-truncates on barge-in only
|
|
466
|
+
* over WebRTC / SIP; on the WebSocket transport the client is responsible
|
|
467
|
+
* for telling the server how much of the assistant turn was actually heard.
|
|
468
|
+
* In server-managed mode (``interrupt_response: true``) the server already
|
|
469
|
+
* cancels the response itself, so issuing ``response.cancel`` here would be
|
|
470
|
+
* redundant / rejected — call this method, not {@link cancelResponse}.
|
|
403
471
|
*
|
|
404
472
|
* ``audio_end_ms`` MUST reflect what the caller actually heard, not what
|
|
405
473
|
* the server generated. OpenAI streams audio at 5-10x real-time, so the
|
|
406
474
|
* byte-derived counter overstates playback whenever the consumer cleared
|
|
407
|
-
* its playout buffer (e.g. ``
|
|
475
|
+
* its playout buffer (e.g. ``sendClear``) before the audio reached the
|
|
408
476
|
* speaker. We bound the truncate point by wall-clock time since the first
|
|
409
477
|
* chunk of this response — that's the physical maximum a 1x real-time
|
|
410
478
|
* playback could have produced. Without this cap, OpenAI keeps the full
|
|
411
479
|
* generated assistant text on the transcript, and the model replays /
|
|
412
480
|
* resumes from it on the next turn — manifesting as re-greetings and
|
|
413
481
|
* mid-sentence fragments after a barge-in storm.
|
|
482
|
+
*
|
|
483
|
+
* No-op when no response is in flight, keeping it idempotent across stale
|
|
484
|
+
* callers. Resets per-response tracking so post-truncate late frames and
|
|
485
|
+
* the next response start clean.
|
|
486
|
+
*/
|
|
487
|
+
truncate(): void;
|
|
488
|
+
/** Truncate the in-flight assistant turn AND cancel the active response.
|
|
489
|
+
*
|
|
490
|
+
* Sends BOTH ``conversation.item.truncate`` (the played-offset bookkeeping)
|
|
491
|
+
* AND ``response.cancel``. Use this on the LEGACY client-managed barge-in
|
|
492
|
+
* path (``gateResponseOnTranscript`` true → ``interrupt_response: false``,
|
|
493
|
+
* so the server does NOT cancel for us) and for explicit cancels driven by
|
|
494
|
+
* Patter (e.g. on transfer / hangup). In server-managed mode call
|
|
495
|
+
* {@link truncate} instead — the server already cancels the response, and an
|
|
496
|
+
* extra ``response.cancel`` would be redundant / rejected.
|
|
497
|
+
*
|
|
498
|
+
* Truncation bounding semantics are identical to {@link truncate}; see its
|
|
499
|
+
* doc comment for the ``audio_end_ms`` wall-clock cap rationale.
|
|
414
500
|
*/
|
|
415
501
|
cancelResponse(): void;
|
|
416
502
|
/** Inject a user text turn and request a new response. */
|
|
@@ -441,6 +527,24 @@ declare class OpenAIRealtimeAdapter {
|
|
|
441
527
|
* customer cue).
|
|
442
528
|
*/
|
|
443
529
|
sendFirstMessage(text: string): Promise<void>;
|
|
530
|
+
/**
|
|
531
|
+
* Speak a short reassurance filler WITHOUT injecting a `role:user` turn.
|
|
532
|
+
*
|
|
533
|
+
* Same no-fake-turn shape as {@link sendFirstMessage}: a bare
|
|
534
|
+
* `response.create` carrying explicit `instructions`, so the filler is the
|
|
535
|
+
* assistant's own in-band audio. The reassurance scheduler in the
|
|
536
|
+
* stream-handler routes here instead of {@link sendText} — which would emit
|
|
537
|
+
* a `conversation.item.create` with `role:'user'` and falsely show the
|
|
538
|
+
* caller saying "One moment." in the transcript. Fillers must not imply
|
|
539
|
+
* success or failure.
|
|
540
|
+
*
|
|
541
|
+
* Uses `modalities: ['audio', 'text']` (v1-beta shape). The GA subclass
|
|
542
|
+
* {@link OpenAIRealtime2Adapter} overrides this with `output_modalities`
|
|
543
|
+
* and re-injects `audio.output.voice` so the GA endpoint does not reject
|
|
544
|
+
* the request. Mirrors Python `OpenAIRealtimeAdapter.send_reassurance` in
|
|
545
|
+
* `providers/openai_realtime.py`.
|
|
546
|
+
*/
|
|
547
|
+
sendReassurance(text: string): Promise<void>;
|
|
444
548
|
/** Submit a tool/function-call result and request the next response. */
|
|
445
549
|
sendFunctionResult(callId: string, result: string): Promise<void>;
|
|
446
550
|
/** Stop the heartbeat, drop listeners, and close the Realtime WebSocket. */
|
|
@@ -703,48 +807,48 @@ interface LatencyBreakdown {
|
|
|
703
807
|
* number as "STT latency". Falls back to turn_start when the endpoint
|
|
704
808
|
* signal is unavailable (degraded provider, batch STT, etc.).
|
|
705
809
|
*/
|
|
706
|
-
stt_ms: number;
|
|
810
|
+
readonly stt_ms: number;
|
|
707
811
|
/**
|
|
708
812
|
* Duration of the user's utterance (turn_start → end-of-speech). Useful
|
|
709
813
|
* to distinguish "user spoke for 4s" from "STT took 4s to finalize" —
|
|
710
814
|
* they used to be conflated in stt_ms before 0.6.1. Optional — undefined
|
|
711
815
|
* when the endpoint signal is unavailable.
|
|
712
816
|
*/
|
|
713
|
-
user_speech_duration_ms?: number;
|
|
817
|
+
readonly user_speech_duration_ms?: number;
|
|
714
818
|
/**
|
|
715
819
|
* Backwards-compatible LLM bucket. With the split below, this now reflects
|
|
716
820
|
* the user-perceived first-token latency (TTFT) when streaming is available
|
|
717
821
|
* and the full generation time otherwise. Prefer ``llm_ttft_ms`` /
|
|
718
822
|
* ``llm_total_ms`` in new code.
|
|
719
823
|
*/
|
|
720
|
-
llm_ms: number;
|
|
824
|
+
readonly llm_ms: number;
|
|
721
825
|
/** Time-to-first-token (UX-facing latency): stt_complete → first LLM token. */
|
|
722
|
-
llm_ttft_ms?: number;
|
|
826
|
+
readonly llm_ttft_ms?: number;
|
|
723
827
|
/**
|
|
724
828
|
* Total LLM generation time: stt_complete → last LLM token. Distinct from
|
|
725
829
|
* ``llm_ms`` so cost/throughput analysis and TTFT can be tracked separately.
|
|
726
830
|
*/
|
|
727
|
-
llm_total_ms?: number;
|
|
728
|
-
tts_ms: number;
|
|
729
|
-
total_ms: number;
|
|
831
|
+
readonly llm_total_ms?: number;
|
|
832
|
+
readonly tts_ms: number;
|
|
833
|
+
readonly total_ms: number;
|
|
730
834
|
/**
|
|
731
835
|
* Endpoint latency: time from end-of-user-speech (VAD stop or STT
|
|
732
836
|
* ``speech_final``) to LLM dispatch. Captures the silence-detection +
|
|
733
837
|
* transcript-finalization gap. Optional — undefined when the source signal
|
|
734
838
|
* is missing.
|
|
735
839
|
*/
|
|
736
|
-
endpoint_ms?: number;
|
|
840
|
+
readonly endpoint_ms?: number;
|
|
737
841
|
/**
|
|
738
842
|
* Barge-in latency: time from user-interrupt detection to TTS playback
|
|
739
843
|
* actually halting (i.e. after ``sendClear`` returned). Optional — only
|
|
740
844
|
* populated on interrupted turns.
|
|
741
845
|
*/
|
|
742
|
-
bargein_ms?: number;
|
|
846
|
+
readonly bargein_ms?: number;
|
|
743
847
|
/**
|
|
744
848
|
* Total TTS time: LLM-first-token (or first-sentence boundary) to last
|
|
745
849
|
* TTS audio byte sent. Optional — undefined when TTS never completed.
|
|
746
850
|
*/
|
|
747
|
-
tts_total_ms?: number;
|
|
851
|
+
readonly tts_total_ms?: number;
|
|
748
852
|
/**
|
|
749
853
|
* **User-perceived agent response latency**: time from end-of-user-speech
|
|
750
854
|
* (VAD stop or STT ``speech_final``) to the first audio byte the agent
|
|
@@ -757,54 +861,54 @@ interface LatencyBreakdown {
|
|
|
757
861
|
* the system-controlled latency: silence detection + LLM TTFT + TTS
|
|
758
862
|
* first byte.
|
|
759
863
|
*/
|
|
760
|
-
agent_response_ms?: number;
|
|
864
|
+
readonly agent_response_ms?: number;
|
|
761
865
|
}
|
|
762
866
|
/** Per-call cost breakdown by component (STT/TTS/LLM/telephony) plus the total. */
|
|
763
867
|
interface CostBreakdown {
|
|
764
|
-
stt: number;
|
|
765
|
-
tts: number;
|
|
766
|
-
llm: number;
|
|
767
|
-
telephony: number;
|
|
768
|
-
total: number;
|
|
868
|
+
readonly stt: number;
|
|
869
|
+
readonly tts: number;
|
|
870
|
+
readonly llm: number;
|
|
871
|
+
readonly telephony: number;
|
|
872
|
+
readonly total: number;
|
|
769
873
|
/**
|
|
770
874
|
* Amount saved on LLM cost thanks to OpenAI Realtime prompt caching.
|
|
771
875
|
* ``llm`` above is the net cost AFTER this discount. Dashboards can
|
|
772
876
|
* render ``saved $X (pct%)`` next to the LLM line when > 0.
|
|
773
877
|
*/
|
|
774
|
-
llm_cached_savings
|
|
878
|
+
readonly llm_cached_savings: number;
|
|
775
879
|
}
|
|
776
880
|
/** Metrics captured for a single conversation turn. */
|
|
777
881
|
interface TurnMetrics {
|
|
778
|
-
turn_index: number;
|
|
779
|
-
user_text: string;
|
|
780
|
-
agent_text: string;
|
|
781
|
-
latency: LatencyBreakdown;
|
|
782
|
-
stt_audio_seconds: number;
|
|
783
|
-
tts_characters: number;
|
|
784
|
-
timestamp: number;
|
|
882
|
+
readonly turn_index: number;
|
|
883
|
+
readonly user_text: string;
|
|
884
|
+
readonly agent_text: string;
|
|
885
|
+
readonly latency: LatencyBreakdown;
|
|
886
|
+
readonly stt_audio_seconds: number;
|
|
887
|
+
readonly tts_characters: number;
|
|
888
|
+
readonly timestamp: number;
|
|
785
889
|
}
|
|
786
890
|
/** Aggregated metrics for an entire call (turns, costs, latency percentiles). */
|
|
787
891
|
interface CallMetrics {
|
|
788
|
-
call_id: string;
|
|
789
|
-
duration_seconds: number;
|
|
790
|
-
turns: TurnMetrics[];
|
|
791
|
-
cost: CostBreakdown;
|
|
792
|
-
latency_avg: LatencyBreakdown;
|
|
793
|
-
latency_p95: LatencyBreakdown;
|
|
794
|
-
latency_p50
|
|
795
|
-
latency_p90
|
|
796
|
-
latency_p99
|
|
797
|
-
provider_mode: string;
|
|
798
|
-
stt_provider: string;
|
|
799
|
-
tts_provider: string;
|
|
800
|
-
llm_provider: string;
|
|
801
|
-
telephony_provider: string;
|
|
892
|
+
readonly call_id: string;
|
|
893
|
+
readonly duration_seconds: number;
|
|
894
|
+
readonly turns: readonly TurnMetrics[];
|
|
895
|
+
readonly cost: CostBreakdown;
|
|
896
|
+
readonly latency_avg: LatencyBreakdown;
|
|
897
|
+
readonly latency_p95: LatencyBreakdown;
|
|
898
|
+
readonly latency_p50: LatencyBreakdown;
|
|
899
|
+
readonly latency_p90: LatencyBreakdown;
|
|
900
|
+
readonly latency_p99: LatencyBreakdown;
|
|
901
|
+
readonly provider_mode: string;
|
|
902
|
+
readonly stt_provider: string;
|
|
903
|
+
readonly tts_provider: string;
|
|
904
|
+
readonly llm_provider: string;
|
|
905
|
+
readonly telephony_provider: string;
|
|
802
906
|
/** Model identifiers per provider (e.g. "ink-whisper", "eleven_flash_v2_5",
|
|
803
907
|
* "gpt-oss-120b"). Surface on the dashboard cost breakdown so operators
|
|
804
908
|
* can attribute per-call spend to a specific model. */
|
|
805
|
-
stt_model?: string;
|
|
806
|
-
tts_model?: string;
|
|
807
|
-
llm_model?: string;
|
|
909
|
+
readonly stt_model?: string;
|
|
910
|
+
readonly tts_model?: string;
|
|
911
|
+
readonly llm_model?: string;
|
|
808
912
|
}
|
|
809
913
|
/** Programmatic control surface for a live call (transfer, hangup, DTMF). */
|
|
810
914
|
interface CallControl {
|
|
@@ -830,7 +934,7 @@ interface CallControl {
|
|
|
830
934
|
}
|
|
831
935
|
/** Mutable per-call accumulator that stamps timestamps and emits final `CallMetrics`. */
|
|
832
936
|
declare class CallMetricsAccumulator {
|
|
833
|
-
callId: string;
|
|
937
|
+
readonly callId: string;
|
|
834
938
|
readonly providerMode: string;
|
|
835
939
|
readonly telephonyProvider: string;
|
|
836
940
|
readonly sttProvider: string;
|
|
@@ -922,6 +1026,16 @@ declare class CallMetricsAccumulator {
|
|
|
922
1026
|
* (the common cause of missing endpoint signals).
|
|
923
1027
|
*/
|
|
924
1028
|
private _endpointSignalMissingCount;
|
|
1029
|
+
/**
|
|
1030
|
+
* Monotonic per-call turn counter. Reserved at turn OPEN
|
|
1031
|
+
* (``onAdapterSpeechStopped`` / ``speech_stopped``) via
|
|
1032
|
+
* ``reserveTurnIndex()`` and threaded through the buffering pipeline into
|
|
1033
|
+
* ``recordTurnComplete`` / ``recordTurnInterrupted`` as ``preReservedIndex``.
|
|
1034
|
+
* This makes ``turn_index`` stable under drops / interrupts (previously it
|
|
1035
|
+
* was assigned at completion as ``this._turns.length``, which shifted when a
|
|
1036
|
+
* turn was dropped). Parity with Python ``_next_turn_index``.
|
|
1037
|
+
*/
|
|
1038
|
+
private _nextTurnIndex;
|
|
925
1039
|
constructor(opts: {
|
|
926
1040
|
callId: string;
|
|
927
1041
|
providerMode: string;
|
|
@@ -951,6 +1065,18 @@ declare class CallMetricsAccumulator {
|
|
|
951
1065
|
get turnActive(): boolean;
|
|
952
1066
|
/** Begin a new turn — stamps the turn start timestamp and resets per-turn state. */
|
|
953
1067
|
startTurn(): void;
|
|
1068
|
+
/**
|
|
1069
|
+
* Reserve and return the next monotonic turn index.
|
|
1070
|
+
*
|
|
1071
|
+
* Called once per turn at the moment the turn OPENS (Realtime:
|
|
1072
|
+
* ``onAdapterSpeechStopped``). The returned index is threaded through the
|
|
1073
|
+
* buffering pipeline and handed back to ``recordTurnComplete`` /
|
|
1074
|
+
* ``recordTurnInterrupted`` as ``preReservedIndex`` so the emitted
|
|
1075
|
+
* ``turn_index`` matches the live per-line transcript ordering even when a
|
|
1076
|
+
* turn is dropped or interrupted between open and close. Parity with Python
|
|
1077
|
+
* ``reserve_turn_index``.
|
|
1078
|
+
*/
|
|
1079
|
+
reserveTurnIndex(): number;
|
|
954
1080
|
/**
|
|
955
1081
|
* Start a new turn only if no turn is currently open.
|
|
956
1082
|
* Use this at inbound-audio ingestion points so the turn timer begins
|
|
@@ -1027,7 +1153,7 @@ declare class CallMetricsAccumulator {
|
|
|
1027
1153
|
* ``user_text=''``. The caller treats ``null`` as "nothing to emit";
|
|
1028
1154
|
* ``emitTurnMetrics`` is already null-safe.
|
|
1029
1155
|
*/
|
|
1030
|
-
recordTurnComplete(agentText: string): TurnMetrics | null;
|
|
1156
|
+
recordTurnComplete(agentText: string, preReservedIndex?: number): TurnMetrics | null;
|
|
1031
1157
|
/**
|
|
1032
1158
|
* Close the current turn as interrupted (barge-in) and return the
|
|
1033
1159
|
* recorded metrics. Returns ``null`` when no turn is open, OR when
|
|
@@ -1037,7 +1163,7 @@ declare class CallMetricsAccumulator {
|
|
|
1037
1163
|
* a future refactor that reorders the bargein + LLM-unwind paths)
|
|
1038
1164
|
* from overwriting a turn that the complete path already emitted.
|
|
1039
1165
|
*/
|
|
1040
|
-
recordTurnInterrupted(): TurnMetrics | null;
|
|
1166
|
+
recordTurnInterrupted(preReservedIndex?: number): TurnMetrics | null;
|
|
1041
1167
|
/**
|
|
1042
1168
|
* Record the moment VAD emitted speech_end for the current utterance.
|
|
1043
1169
|
* @param ts Optional override timestamp in hrTimeMs units (defaults to now).
|
|
@@ -1058,8 +1184,10 @@ declare class CallMetricsAccumulator {
|
|
|
1058
1184
|
recordTurnCommitted(ts?: number): void;
|
|
1059
1185
|
/**
|
|
1060
1186
|
* Record the delta (ms) between turn-committed and when on_user_turn_completed
|
|
1061
|
-
* pipeline hook finished.
|
|
1062
|
-
*
|
|
1187
|
+
* pipeline hook finished. Does NOT re-emit: like Python's
|
|
1188
|
+
* ``record_on_user_turn_completed_delay``, this only stores the value; the
|
|
1189
|
+
* single EOU emission happens on ``recordTurnCommitted`` (3-timestamp guard,
|
|
1190
|
+
* delay defaults to 0 if not yet recorded).
|
|
1063
1191
|
*/
|
|
1064
1192
|
recordOnUserTurnCompletedDelay(delayMs: number): void;
|
|
1065
1193
|
/**
|
|
@@ -1070,7 +1198,7 @@ declare class CallMetricsAccumulator {
|
|
|
1070
1198
|
* ``transcriptionDelay`` = turnCommitted − vadStopped (ms)
|
|
1071
1199
|
* ``onUserTurnCompletedDelay`` = caller-supplied delta (ms) or 0
|
|
1072
1200
|
*/
|
|
1073
|
-
/** Emit `EOUMetrics` once VAD-stop, STT-final,
|
|
1201
|
+
/** Emit `EOUMetrics` once VAD-stop, STT-final, turn-committed, and on_user_turn_completed delay are all known. */
|
|
1074
1202
|
emitEouMetrics(): void;
|
|
1075
1203
|
/**
|
|
1076
1204
|
* Record that a caller utterance started overlapping with agent speech.
|
|
@@ -1221,31 +1349,32 @@ declare function isWebSocketUrl(url: string): boolean;
|
|
|
1221
1349
|
|
|
1222
1350
|
/** Snapshot of a call as held by the dashboard store. */
|
|
1223
1351
|
interface CallRecord {
|
|
1224
|
-
call_id: string;
|
|
1225
|
-
caller: string;
|
|
1226
|
-
callee: string;
|
|
1227
|
-
direction: string;
|
|
1228
|
-
started_at: number;
|
|
1229
|
-
ended_at?: number;
|
|
1352
|
+
readonly call_id: string;
|
|
1353
|
+
readonly caller: string;
|
|
1354
|
+
readonly callee: string;
|
|
1355
|
+
readonly direction: string;
|
|
1356
|
+
readonly started_at: number;
|
|
1357
|
+
readonly ended_at?: number;
|
|
1230
1358
|
/**
|
|
1231
1359
|
* Current lifecycle state: ``initiated`` (pre-registered), ``ringing``,
|
|
1232
1360
|
* ``in-progress``, ``completed``, ``no-answer``, ``busy``, ``failed``,
|
|
1233
1361
|
* ``canceled``, or ``webhook_error``.
|
|
1234
1362
|
*/
|
|
1235
|
-
status?: string;
|
|
1236
|
-
transcript?:
|
|
1237
|
-
role: string;
|
|
1238
|
-
text: string;
|
|
1239
|
-
timestamp: number;
|
|
1363
|
+
readonly status?: string;
|
|
1364
|
+
readonly transcript?: ReadonlyArray<{
|
|
1365
|
+
readonly role: string;
|
|
1366
|
+
readonly text: string;
|
|
1367
|
+
readonly timestamp: number;
|
|
1368
|
+
readonly turnIndex?: number;
|
|
1240
1369
|
}>;
|
|
1241
|
-
turns?: unknown[];
|
|
1242
|
-
metrics?: Record<string, unknown> | null;
|
|
1243
|
-
[key: string]: unknown;
|
|
1370
|
+
readonly turns?: readonly unknown[];
|
|
1371
|
+
readonly metrics?: Record<string, unknown> | null;
|
|
1372
|
+
readonly [key: string]: unknown;
|
|
1244
1373
|
}
|
|
1245
1374
|
/** Server-Sent-Event payload broadcast by `MetricsStore` for live UI updates. */
|
|
1246
1375
|
interface SSEEvent {
|
|
1247
|
-
type: string;
|
|
1248
|
-
data: Record<string, unknown
|
|
1376
|
+
readonly type: string;
|
|
1377
|
+
readonly data: Readonly<Record<string, unknown>>;
|
|
1249
1378
|
}
|
|
1250
1379
|
/** In-memory bounded ring buffer of recent calls plus active-call tracking. */
|
|
1251
1380
|
declare class MetricsStore extends EventEmitter {
|
|
@@ -1289,6 +1418,27 @@ declare class MetricsStore extends EventEmitter {
|
|
|
1289
1418
|
* row from active to completed so the UI freezes the live duration timer.
|
|
1290
1419
|
*/
|
|
1291
1420
|
updateCallStatus(callId: string, status: string, extra?: Record<string, unknown>): void;
|
|
1421
|
+
/**
|
|
1422
|
+
* Record a single transcript line (user/assistant) as it becomes known.
|
|
1423
|
+
*
|
|
1424
|
+
* FIX-5 (issue #154): the live forward path for the dashboard transcript.
|
|
1425
|
+
* The Realtime stream handler calls this the moment each line is known — the
|
|
1426
|
+
* user line right after the hallucination filter accepts it, the assistant
|
|
1427
|
+
* line when its turn flushes — keyed by the monotonic ``turnIndex`` reserved
|
|
1428
|
+
* at turn-open (``reserveTurnIndex``). Each line is appended to the active
|
|
1429
|
+
* call's ``transcript`` array and broadcast over SSE as a ``transcript_line``
|
|
1430
|
+
* event so the dashboard can render lines as they arrive and re-sort by
|
|
1431
|
+
* ``(turnIndex, user<assistant)`` — making a late-arriving user line land
|
|
1432
|
+
* ABOVE its agent line. ``recordTurn`` de-dups against the lines pushed here
|
|
1433
|
+
* by ``(turnIndex, role)`` so the metrics path never double-pushes the same
|
|
1434
|
+
* text. Parity with Python ``record_transcript_line``.
|
|
1435
|
+
*/
|
|
1436
|
+
recordTranscriptLine(data: {
|
|
1437
|
+
call_id: string;
|
|
1438
|
+
turnIndex: number;
|
|
1439
|
+
role: 'user' | 'assistant';
|
|
1440
|
+
text: string;
|
|
1441
|
+
}): void;
|
|
1292
1442
|
/** Append a single conversation turn to an active call and broadcast it via SSE. */
|
|
1293
1443
|
recordTurn(data: Record<string, unknown>): void;
|
|
1294
1444
|
/** Move a call from active to completed and persist its final metrics. */
|
|
@@ -1334,7 +1484,7 @@ declare class MetricsStore extends EventEmitter {
|
|
|
1334
1484
|
isDeleted(callId: string): boolean;
|
|
1335
1485
|
/** Snapshot of soft-deleted call_ids (sorted). */
|
|
1336
1486
|
getDeletedCallIds(): string[];
|
|
1337
|
-
/** Atomically persist the deleted-ids set to disk. Best-effort. */
|
|
1487
|
+
/** Atomically persist the deleted-ids set to disk. Best-effort async. */
|
|
1338
1488
|
private persistDeletedIds;
|
|
1339
1489
|
/** Look up an active call by id (returns undefined if not active or unknown). */
|
|
1340
1490
|
getActive(callId: string): CallRecord | undefined;
|
|
@@ -1452,6 +1602,7 @@ declare class Carrier {
|
|
|
1452
1602
|
}
|
|
1453
1603
|
|
|
1454
1604
|
/** OpenAI Realtime engine — marker class for Patter client dispatch. */
|
|
1605
|
+
|
|
1455
1606
|
/** Constructor options for the OpenAI `Realtime` engine marker. */
|
|
1456
1607
|
interface RealtimeOptions {
|
|
1457
1608
|
/** API key. Falls back to OPENAI_API_KEY env var when omitted. */
|
|
@@ -1479,6 +1630,42 @@ interface RealtimeOptions {
|
|
|
1479
1630
|
* `"gpt-4o-transcribe"` for higher accuracy.
|
|
1480
1631
|
*/
|
|
1481
1632
|
inputAudioTranscriptionModel?: string;
|
|
1633
|
+
/**
|
|
1634
|
+
* Input noise reduction for speakerphone / conference audio. `undefined`
|
|
1635
|
+
* (default) omits the field (no reduction). `"far_field"` recommended for
|
|
1636
|
+
* phone / speakerphone calls; `"near_field"` for a handset close to the
|
|
1637
|
+
* mouth. Mirrors `openai_realtime_noise_reduction` on `Patter.agent()`.
|
|
1638
|
+
*/
|
|
1639
|
+
noiseReduction?: 'near_field' | 'far_field';
|
|
1640
|
+
/**
|
|
1641
|
+
* Turn-detection tuning. `undefined` (default) keeps the adapter's
|
|
1642
|
+
* current hardcoded `server_vad` / threshold `0.5` / silence 300 ms.
|
|
1643
|
+
* Raise threshold or switch to `semantic_vad` eagerness `'low'` to stop
|
|
1644
|
+
* speakerphone noise from triggering false barge-ins.
|
|
1645
|
+
*
|
|
1646
|
+
* Maps to `turn_detection` on the Python `engines.openai.Realtime` marker;
|
|
1647
|
+
* propagates to `realtimeTurnDetection` on `AgentOptions`.
|
|
1648
|
+
*/
|
|
1649
|
+
turnDetection?: RealtimeTurnDetection;
|
|
1650
|
+
/**
|
|
1651
|
+
* Gate the model's response on the Whisper transcript (legacy behavior).
|
|
1652
|
+
*
|
|
1653
|
+
* `false` (default) — the speech-to-speech model responds as soon as the
|
|
1654
|
+
* user stops speaking (on `speech_stopped`), independently of the Whisper
|
|
1655
|
+
* input transcription. The transcript becomes a pure observability
|
|
1656
|
+
* side-channel (dashboard / history / `onTranscript`) and never gates,
|
|
1657
|
+
* triggers, or cancels the response. This reclaims ~500 ms of latency
|
|
1658
|
+
* because the model no longer waits for Whisper.
|
|
1659
|
+
*
|
|
1660
|
+
* `true` — restores the prior behavior where the response is requested
|
|
1661
|
+
* only after the Whisper `transcript_input` event arrives and passes the
|
|
1662
|
+
* hallucination filter.
|
|
1663
|
+
*
|
|
1664
|
+
* Maps to `gate_response_on_transcript` on the Python
|
|
1665
|
+
* `engines.openai.Realtime` marker; propagates to
|
|
1666
|
+
* `openaiRealtimeGateResponseOnTranscript` on `AgentOptions`.
|
|
1667
|
+
*/
|
|
1668
|
+
gateResponseOnTranscript?: boolean;
|
|
1482
1669
|
}
|
|
1483
1670
|
/**
|
|
1484
1671
|
* OpenAI Realtime engine marker.
|
|
@@ -1502,6 +1689,9 @@ declare class Realtime {
|
|
|
1502
1689
|
readonly voice: string;
|
|
1503
1690
|
readonly reasoningEffort?: 'minimal' | 'low' | 'medium' | 'high';
|
|
1504
1691
|
readonly inputAudioTranscriptionModel?: string;
|
|
1692
|
+
readonly noiseReduction?: 'near_field' | 'far_field';
|
|
1693
|
+
readonly turnDetection?: RealtimeTurnDetection;
|
|
1694
|
+
readonly gateResponseOnTranscript?: boolean;
|
|
1505
1695
|
constructor(opts?: RealtimeOptions);
|
|
1506
1696
|
}
|
|
1507
1697
|
|
|
@@ -1513,6 +1703,7 @@ declare class Realtime {
|
|
|
1513
1703
|
* different `session.update` wire shape; the client dispatches to
|
|
1514
1704
|
* `OpenAIRealtime2Adapter` when this marker is passed.
|
|
1515
1705
|
*/
|
|
1706
|
+
|
|
1516
1707
|
/** Constructor options for the OpenAI `Realtime2` engine marker. */
|
|
1517
1708
|
interface Realtime2Options {
|
|
1518
1709
|
/** API key. Falls back to OPENAI_API_KEY env var when omitted. */
|
|
@@ -1533,6 +1724,44 @@ interface Realtime2Options {
|
|
|
1533
1724
|
* low-latency transcript partials.
|
|
1534
1725
|
*/
|
|
1535
1726
|
inputAudioTranscriptionModel?: string;
|
|
1727
|
+
/**
|
|
1728
|
+
* Input noise reduction for speakerphone / conference audio. `undefined`
|
|
1729
|
+
* (default) omits the field (no reduction). `"far_field"` recommended for
|
|
1730
|
+
* phone / speakerphone calls; `"near_field"` for a handset close to the
|
|
1731
|
+
* mouth. On the GA endpoint this is nested under
|
|
1732
|
+
* `audio.input.input_audio_noise_reduction: { type }`.
|
|
1733
|
+
* Mirrors `openai_realtime_noise_reduction` on `Patter.agent()`.
|
|
1734
|
+
*/
|
|
1735
|
+
noiseReduction?: 'near_field' | 'far_field';
|
|
1736
|
+
/**
|
|
1737
|
+
* Turn-detection tuning. `undefined` (default) keeps the adapter's
|
|
1738
|
+
* current hardcoded `server_vad` / threshold `0.5` / silence 300 ms.
|
|
1739
|
+
* Raise threshold or switch to `semantic_vad` eagerness `'low'` to stop
|
|
1740
|
+
* speakerphone noise from triggering false barge-ins.
|
|
1741
|
+
*
|
|
1742
|
+
* Maps to `turn_detection` on the Python `engines.openai_realtime_2.Realtime2`
|
|
1743
|
+
* marker; propagates to `realtimeTurnDetection` on `AgentOptions`.
|
|
1744
|
+
*/
|
|
1745
|
+
turnDetection?: RealtimeTurnDetection;
|
|
1746
|
+
/**
|
|
1747
|
+
* Gate the model's response on the Whisper transcript (legacy behavior).
|
|
1748
|
+
*
|
|
1749
|
+
* `false` (default) — the speech-to-speech model responds as soon as the
|
|
1750
|
+
* user stops speaking (on `speech_stopped`), independently of the Whisper
|
|
1751
|
+
* input transcription. The transcript becomes a pure observability
|
|
1752
|
+
* side-channel (dashboard / history / `onTranscript`) and never gates,
|
|
1753
|
+
* triggers, or cancels the response. This reclaims ~500 ms of latency
|
|
1754
|
+
* because the model no longer waits for Whisper.
|
|
1755
|
+
*
|
|
1756
|
+
* `true` — restores the prior behavior where the response is requested
|
|
1757
|
+
* only after the Whisper `transcript_input` event arrives and passes the
|
|
1758
|
+
* hallucination filter.
|
|
1759
|
+
*
|
|
1760
|
+
* Maps to `gate_response_on_transcript` on the Python
|
|
1761
|
+
* `engines.openai_realtime_2.Realtime2` marker; propagates to
|
|
1762
|
+
* `openaiRealtimeGateResponseOnTranscript` on `AgentOptions`.
|
|
1763
|
+
*/
|
|
1764
|
+
gateResponseOnTranscript?: boolean;
|
|
1536
1765
|
}
|
|
1537
1766
|
/**
|
|
1538
1767
|
* OpenAI Realtime 2 engine marker — selects `gpt-realtime-2` on the GA
|
|
@@ -1557,6 +1786,9 @@ declare class Realtime2 {
|
|
|
1557
1786
|
readonly voice: string;
|
|
1558
1787
|
readonly reasoningEffort?: 'minimal' | 'low' | 'medium' | 'high';
|
|
1559
1788
|
readonly inputAudioTranscriptionModel?: string;
|
|
1789
|
+
readonly noiseReduction?: 'near_field' | 'far_field';
|
|
1790
|
+
readonly turnDetection?: RealtimeTurnDetection;
|
|
1791
|
+
readonly gateResponseOnTranscript?: boolean;
|
|
1560
1792
|
constructor(opts?: Realtime2Options);
|
|
1561
1793
|
}
|
|
1562
1794
|
|
|
@@ -1701,6 +1933,33 @@ interface ToolOptions {
|
|
|
1701
1933
|
handler?: ToolHandler;
|
|
1702
1934
|
/** URL to POST to when the LLM invokes the tool. */
|
|
1703
1935
|
webhookUrl?: string;
|
|
1936
|
+
/**
|
|
1937
|
+
* Optional reassurance filler the agent speaks while a slow tool call runs.
|
|
1938
|
+
* Two forms:
|
|
1939
|
+
* - `string`: shorthand for `{ message: <string>, afterMs: 1500 }`.
|
|
1940
|
+
* - object: explicit `{ message, afterMs? }`.
|
|
1941
|
+
* Currently honoured only in Realtime mode. Off by default.
|
|
1942
|
+
*
|
|
1943
|
+
* Mirrors Python `reassurance` on `Tool` / `tool()`.
|
|
1944
|
+
*/
|
|
1945
|
+
reassurance?: string | {
|
|
1946
|
+
message: string;
|
|
1947
|
+
afterMs?: number;
|
|
1948
|
+
};
|
|
1949
|
+
/**
|
|
1950
|
+
* Per-tool execution timeout in milliseconds, applied to BOTH the handler
|
|
1951
|
+
* and webhook paths. `undefined` (default) uses the executor default
|
|
1952
|
+
* (10 000 ms). Raise for long browser-automation / external-API tools
|
|
1953
|
+
* (e.g. `60_000`). Clamped to a 300 000 ms ceiling by the executor.
|
|
1954
|
+
*
|
|
1955
|
+
* Mirrors Python `timeout_s` on `Tool` / `tool()`.
|
|
1956
|
+
*/
|
|
1957
|
+
timeoutMs?: number;
|
|
1958
|
+
/**
|
|
1959
|
+
* Enable OpenAI strict mode for this tool's function schema. Mirrors
|
|
1960
|
+
* Python `strict` on `Tool`. Off by default.
|
|
1961
|
+
*/
|
|
1962
|
+
strict?: boolean;
|
|
1704
1963
|
}
|
|
1705
1964
|
/**
|
|
1706
1965
|
* Tool definition. Structurally matches `ToolDefinition` so it drops
|
|
@@ -1724,6 +1983,20 @@ declare class Tool implements ToolDefinition {
|
|
|
1724
1983
|
readonly parameters: Record<string, unknown>;
|
|
1725
1984
|
readonly handler?: ToolHandler;
|
|
1726
1985
|
readonly webhookUrl?: string;
|
|
1986
|
+
readonly reassurance?: string | Readonly<{
|
|
1987
|
+
message: string;
|
|
1988
|
+
afterMs?: number;
|
|
1989
|
+
}>;
|
|
1990
|
+
/**
|
|
1991
|
+
* Per-tool execution timeout in milliseconds. `undefined` uses the
|
|
1992
|
+
* executor default (10 000 ms). Mirrors Python `timeout_s`.
|
|
1993
|
+
*/
|
|
1994
|
+
readonly timeoutMs?: number;
|
|
1995
|
+
/**
|
|
1996
|
+
* Enable OpenAI strict mode for this tool's function schema. Off by
|
|
1997
|
+
* default. Mirrors Python `strict` on `Tool`.
|
|
1998
|
+
*/
|
|
1999
|
+
readonly strict?: boolean;
|
|
1727
2000
|
constructor(opts: ToolOptions);
|
|
1728
2001
|
}
|
|
1729
2002
|
/** Factory helper mirroring Python's `tool(...)` function. */
|
|
@@ -1850,6 +2123,8 @@ interface PerToolState {
|
|
|
1850
2123
|
state: CircuitBreakerState;
|
|
1851
2124
|
consecutiveFailures: number;
|
|
1852
2125
|
openedAt: number;
|
|
2126
|
+
/** True while a HALF_OPEN probe call is already in-flight. */
|
|
2127
|
+
probeInFlight: boolean;
|
|
1853
2128
|
}
|
|
1854
2129
|
/** Per-name registry tracking circuit state for a fleet of tools. */
|
|
1855
2130
|
declare class CircuitBreakerRegistry {
|
|
@@ -1888,7 +2163,7 @@ declare class CircuitBreakerRegistry {
|
|
|
1888
2163
|
* Avoids a circular import from metrics.ts.
|
|
1889
2164
|
*/
|
|
1890
2165
|
interface LlmUsageRecorder {
|
|
1891
|
-
recordLlmUsage(provider: string, model: string, inputTokens: number, outputTokens: number, cacheReadTokens?: number,
|
|
2166
|
+
recordLlmUsage(provider: string, model: string, inputTokens: number, outputTokens: number, cacheReadTokens?: number, cacheWriteTokens?: number): void;
|
|
1892
2167
|
}
|
|
1893
2168
|
/**
|
|
1894
2169
|
* Pluggable tool executor — mirrors the Python ``ToolExecutor`` in
|
|
@@ -1956,7 +2231,7 @@ interface LLMChunk {
|
|
|
1956
2231
|
inputTokens?: number;
|
|
1957
2232
|
outputTokens?: number;
|
|
1958
2233
|
cacheReadInputTokens?: number;
|
|
1959
|
-
|
|
2234
|
+
cacheWriteInputTokens?: number;
|
|
1960
2235
|
}
|
|
1961
2236
|
/**
|
|
1962
2237
|
* Interface that any LLM provider must satisfy.
|
|
@@ -2250,13 +2525,55 @@ type MCPServerConfig = string | {
|
|
|
2250
2525
|
/** Optional logical name for telemetry / log lines. */
|
|
2251
2526
|
readonly name?: string;
|
|
2252
2527
|
};
|
|
2528
|
+
/**
|
|
2529
|
+
* OpenAI Realtime turn-detection tuning.
|
|
2530
|
+
*
|
|
2531
|
+
* Raise the VAD {@link threshold} (`server_vad`) or switch to
|
|
2532
|
+
* `semantic_vad` with {@link eagerness} `'low'` to stop speakerphone /
|
|
2533
|
+
* conference-room noise (mouse clicks, phone shifts, background chatter)
|
|
2534
|
+
* from being mistaken for the caller speaking and cutting the agent off.
|
|
2535
|
+
*
|
|
2536
|
+
* Each unset field falls back to the adapter's current default
|
|
2537
|
+
* (`server_vad`, threshold `0.5`, `prefixPaddingMs` `300`,
|
|
2538
|
+
* `silenceDurationMs` `300`). `type === 'semantic_vad'` emits
|
|
2539
|
+
* `{ type, eagerness }` only — OpenAI rejects `threshold` /
|
|
2540
|
+
* `prefixPaddingMs` / `silenceDurationMs` on the semantic detector.
|
|
2541
|
+
* `createResponse` / `interruptResponse` are NOT exposed (Patter keeps
|
|
2542
|
+
* its client-gated barge-in safety values).
|
|
2543
|
+
*
|
|
2544
|
+
* Mirrors Python `RealtimeTurnDetection` dataclass in `models.py`.
|
|
2545
|
+
*/
|
|
2546
|
+
interface RealtimeTurnDetection {
|
|
2547
|
+
/** `"server_vad"` (default) or `"semantic_vad"`. */
|
|
2548
|
+
readonly type?: 'server_vad' | 'semantic_vad';
|
|
2549
|
+
/**
|
|
2550
|
+
* `server_vad` only — 0..1, higher rejects more background noise.
|
|
2551
|
+
* `undefined` keeps the adapter default (`0.5`).
|
|
2552
|
+
*/
|
|
2553
|
+
readonly threshold?: number;
|
|
2554
|
+
/**
|
|
2555
|
+
* `server_vad` only — milliseconds of speech required before VAD
|
|
2556
|
+
* triggers. `undefined` keeps the adapter default (`300`).
|
|
2557
|
+
*/
|
|
2558
|
+
readonly prefixPaddingMs?: number;
|
|
2559
|
+
/**
|
|
2560
|
+
* `server_vad` only — trailing silence (ms) before the turn ends.
|
|
2561
|
+
* `undefined` keeps the adapter default (`300`).
|
|
2562
|
+
*/
|
|
2563
|
+
readonly silenceDurationMs?: number;
|
|
2564
|
+
/**
|
|
2565
|
+
* `semantic_vad` only — `"low"` lets the caller finish (least likely
|
|
2566
|
+
* to interrupt), through `"high"` / `"auto"`.
|
|
2567
|
+
*/
|
|
2568
|
+
readonly eagerness?: 'low' | 'medium' | 'high' | 'auto';
|
|
2569
|
+
}
|
|
2253
2570
|
/** Internal shape of a tool definition (matches `Tool` from `public-api.ts`). */
|
|
2254
2571
|
interface ToolDefinition {
|
|
2255
|
-
name: string;
|
|
2256
|
-
description: string;
|
|
2257
|
-
parameters: Record<string, unknown
|
|
2572
|
+
readonly name: string;
|
|
2573
|
+
readonly description: string;
|
|
2574
|
+
readonly parameters: Readonly<Record<string, unknown>>;
|
|
2258
2575
|
/** Webhook URL — called when the LLM invokes this tool. Mutually exclusive with handler. */
|
|
2259
|
-
webhookUrl?: string;
|
|
2576
|
+
readonly webhookUrl?: string;
|
|
2260
2577
|
/**
|
|
2261
2578
|
* Local handler — called instead of ``webhookUrl`` when present.
|
|
2262
2579
|
*
|
|
@@ -2274,7 +2591,7 @@ interface ToolDefinition {
|
|
|
2274
2591
|
* ignores the progress yields — the final value is still used as
|
|
2275
2592
|
* the tool result.
|
|
2276
2593
|
*/
|
|
2277
|
-
handler?: ((args: Record<string, unknown>, context: Record<string, unknown>) => Promise<string>) | ((args: Record<string, unknown>, context: Record<string, unknown>) => AsyncGenerator<{
|
|
2594
|
+
readonly handler?: ((args: Record<string, unknown>, context: Record<string, unknown>) => Promise<string>) | ((args: Record<string, unknown>, context: Record<string, unknown>) => AsyncGenerator<{
|
|
2278
2595
|
progress?: string;
|
|
2279
2596
|
result?: string;
|
|
2280
2597
|
}, string | void, unknown>);
|
|
@@ -2294,10 +2611,10 @@ interface ToolDefinition {
|
|
|
2294
2611
|
* synthesises it inline. Pipeline mode has no clean injection point
|
|
2295
2612
|
* mid-turn yet; the option is silently ignored there. Off by default.
|
|
2296
2613
|
*/
|
|
2297
|
-
reassurance?: string | {
|
|
2614
|
+
readonly reassurance?: string | Readonly<{
|
|
2298
2615
|
message: string;
|
|
2299
2616
|
afterMs?: number;
|
|
2300
|
-
}
|
|
2617
|
+
}>;
|
|
2301
2618
|
/**
|
|
2302
2619
|
* Enable OpenAI strict mode for this tool's function schema. When ``true``
|
|
2303
2620
|
* the model is constrained to emit arguments that exactly match the
|
|
@@ -2318,7 +2635,123 @@ interface ToolDefinition {
|
|
|
2318
2635
|
* Recommended for any tool whose handler/webhook can't safely tolerate
|
|
2319
2636
|
* malformed arguments (DB writes, payment, transfers).
|
|
2320
2637
|
*/
|
|
2321
|
-
strict?: boolean;
|
|
2638
|
+
readonly strict?: boolean;
|
|
2639
|
+
/**
|
|
2640
|
+
* Per-tool execution timeout in milliseconds, applied to BOTH the handler
|
|
2641
|
+
* and webhook paths. `undefined` (default) uses the executor default
|
|
2642
|
+
* (10 000 ms). Raise for long browser-automation / external-API tools
|
|
2643
|
+
* (e.g. `60_000`). Clamped to a 300 000 ms ceiling by the executor.
|
|
2644
|
+
*
|
|
2645
|
+
* Mirrors Python's `timeout_s` on `Tool` / `tool()`.
|
|
2646
|
+
*/
|
|
2647
|
+
readonly timeoutMs?: number;
|
|
2648
|
+
}
|
|
2649
|
+
/**
|
|
2650
|
+
* Configuration for the built-in ``consult`` escalation tool.
|
|
2651
|
+
*
|
|
2652
|
+
* When set on an agent, Patter auto-injects a tool (default name
|
|
2653
|
+
* ``consult_agent``) that the in-call agent can invoke mid-call to reach the
|
|
2654
|
+
* caller's own back-office agent over HTTP for deeper reasoning, fresh
|
|
2655
|
+
* information, or an action beyond the call. Patter keeps STT + LLM/voice +
|
|
2656
|
+
* TTS + carrier; the back-office agent is consulted only on demand (never on
|
|
2657
|
+
* the per-turn path). The tool POSTs ``{ request, call_id, caller, callee }``
|
|
2658
|
+
* to {@link url}; the endpoint returns JSON with a ``reply`` / ``response`` /
|
|
2659
|
+
* ``text`` string (or any JSON / plain text) and the agent speaks it.
|
|
2660
|
+
*
|
|
2661
|
+
* Injected in **Realtime** and **Pipeline** modes only — ElevenLabs ConvAI
|
|
2662
|
+
* tools live on the ElevenLabs-hosted agent, so ``consult`` does not apply
|
|
2663
|
+
* there (a warning is emitted if set with that provider).
|
|
2664
|
+
*/
|
|
2665
|
+
interface ConsultConfig {
|
|
2666
|
+
/**
|
|
2667
|
+
* Generic webhook endpoint Patter POSTs ``{ request, call_id, caller, callee }``
|
|
2668
|
+
* to. SSRF-validated at call start. Mutually exclusive with
|
|
2669
|
+
* {@link openaiCompatible} — set exactly one.
|
|
2670
|
+
*/
|
|
2671
|
+
readonly url?: string;
|
|
2672
|
+
/**
|
|
2673
|
+
* Native target that speaks an OpenAI-compatible ``/chat/completions``
|
|
2674
|
+
* endpoint directly (e.g. an OpenClaw agent, or vLLM / Ollama / Groq) — no
|
|
2675
|
+
* hand-written adapter. Mutually exclusive with {@link url}. Use
|
|
2676
|
+
* {@link openclawConsult} for the OpenClaw preset.
|
|
2677
|
+
*/
|
|
2678
|
+
readonly openaiCompatible?: OpenAICompatibleConsult;
|
|
2679
|
+
/** Optional headers (e.g. an ``Authorization`` bearer). Never logged. */
|
|
2680
|
+
readonly headers?: Readonly<Record<string, string>>;
|
|
2681
|
+
/**
|
|
2682
|
+
* Per-consult HTTP timeout in milliseconds. Higher than the generic
|
|
2683
|
+
* webhook-tool default (10 000 ms) because a consult may run deeper
|
|
2684
|
+
* reasoning. Default ``30000``.
|
|
2685
|
+
*/
|
|
2686
|
+
readonly timeoutMs?: number;
|
|
2687
|
+
/** Name the LLM sees for the tool. Default ``"consult_agent"``. */
|
|
2688
|
+
readonly toolName?: string;
|
|
2689
|
+
/** Description the LLM sees — tune to steer when the agent escalates. */
|
|
2690
|
+
readonly description?: string;
|
|
2691
|
+
/**
|
|
2692
|
+
* Optional filler the agent speaks while the consult runs (Realtime mode
|
|
2693
|
+
* only) so a multi-second back-office call is not dead air. Omitted plays no
|
|
2694
|
+
* filler; the {@link openclawConsult} preset sets a sensible default.
|
|
2695
|
+
*/
|
|
2696
|
+
readonly reassurance?: string | Readonly<{
|
|
2697
|
+
message: string;
|
|
2698
|
+
afterMs?: number;
|
|
2699
|
+
}>;
|
|
2700
|
+
/**
|
|
2701
|
+
* Opt-in: allow {@link url} to point at a loopback / private / link-local
|
|
2702
|
+
* host (e.g. a back-office agent on ``127.0.0.1`` or an RFC1918 LAN host).
|
|
2703
|
+
*
|
|
2704
|
+
* Default ``false`` (or ``undefined``) — the URL is SSRF-validated and
|
|
2705
|
+
* loopback/private/link-local targets are rejected, preserving the strict
|
|
2706
|
+
* default behaviour. Set ``true`` ONLY for a trusted, developer-configured
|
|
2707
|
+
* local agent: the URL is your own config, not caller-derived input.
|
|
2708
|
+
*
|
|
2709
|
+
* Even when ``true``, non-HTTP(S) schemes (``file:``, ``javascript:`` …)
|
|
2710
|
+
* are still rejected. Note: opting in also makes cloud-metadata hostnames
|
|
2711
|
+
* (``metadata``, ``metadata.google.internal``, ``metadata.azure.com``) and
|
|
2712
|
+
* the IMDS IP ``169.254.169.254`` reachable — an accepted tradeoff for a URL
|
|
2713
|
+
* you control. Scopes ONLY to
|
|
2714
|
+
* the consult tool; the generic webhook-tool path stays strict.
|
|
2715
|
+
*/
|
|
2716
|
+
readonly allowLoopback?: boolean;
|
|
2717
|
+
}
|
|
2718
|
+
/**
|
|
2719
|
+
* Native {@link ConsultConfig} target that speaks an OpenAI-compatible
|
|
2720
|
+
* ``/chat/completions`` endpoint directly — no hand-written adapter.
|
|
2721
|
+
*
|
|
2722
|
+
* Lets ``consult`` reach an OpenClaw agent (or any OpenAI-compatible gateway:
|
|
2723
|
+
* vLLM, Ollama, Groq, …). The consult handler builds a standard chat-completions
|
|
2724
|
+
* request (``model`` + ``messages`` + ``user``) and speaks
|
|
2725
|
+
* ``choices[0].message.content``. Prefer {@link openclawConsult} for the
|
|
2726
|
+
* OpenClaw preset rather than constructing this directly.
|
|
2727
|
+
*/
|
|
2728
|
+
interface OpenAICompatibleConsult {
|
|
2729
|
+
/**
|
|
2730
|
+
* OpenAI-compatible base URL ending in ``/v1`` (the handler POSTs to
|
|
2731
|
+
* ``{baseUrl}/chat/completions``), e.g. ``http://127.0.0.1:18789/v1``.
|
|
2732
|
+
*/
|
|
2733
|
+
readonly baseUrl: string;
|
|
2734
|
+
/**
|
|
2735
|
+
* Model / agent target. For OpenClaw this is the namespaced agent id, e.g.
|
|
2736
|
+
* ``"openclaw/receptionist"``.
|
|
2737
|
+
*/
|
|
2738
|
+
readonly model: string;
|
|
2739
|
+
/**
|
|
2740
|
+
* Bearer token. Prefer {@link apiKeyEnv} so the secret stays out of source.
|
|
2741
|
+
* For OpenClaw this is an OPERATOR-grade credential — never logged.
|
|
2742
|
+
*/
|
|
2743
|
+
readonly apiKey?: string;
|
|
2744
|
+
/**
|
|
2745
|
+
* Environment variable to read the bearer from when {@link apiKey} is not
|
|
2746
|
+
* given (e.g. ``"OPENCLAW_API_KEY"``).
|
|
2747
|
+
*/
|
|
2748
|
+
readonly apiKeyEnv?: string;
|
|
2749
|
+
/**
|
|
2750
|
+
* Optional header carrying the per-call session id (the call id), e.g.
|
|
2751
|
+
* ``"x-openclaw-session-key"``. The call id is also sent as the OpenAI
|
|
2752
|
+
* ``user`` field.
|
|
2753
|
+
*/
|
|
2754
|
+
readonly sessionHeader?: string;
|
|
2322
2755
|
}
|
|
2323
2756
|
/** Constructor options for `new Patter({...})` in local-server mode. */
|
|
2324
2757
|
interface LocalOptions {
|
|
@@ -2331,14 +2764,14 @@ interface LocalOptions {
|
|
|
2331
2764
|
* const phone = new Patter({ carrier: new Twilio(), phoneNumber: "+1..." });
|
|
2332
2765
|
* ```
|
|
2333
2766
|
*/
|
|
2334
|
-
carrier: Carrier$2 | Carrier$1 | Carrier;
|
|
2767
|
+
readonly carrier: Carrier$2 | Carrier$1 | Carrier;
|
|
2335
2768
|
/**
|
|
2336
2769
|
* Tunnel configuration. Accepts a tunnel instance, ``true`` (alias for
|
|
2337
2770
|
* ``new CloudflareTunnel()``), or ``false`` / omitted (no tunnel).
|
|
2338
2771
|
*/
|
|
2339
|
-
tunnel?: CloudflareTunnel | Static | boolean;
|
|
2340
|
-
phoneNumber: string;
|
|
2341
|
-
webhookUrl?: string;
|
|
2772
|
+
readonly tunnel?: CloudflareTunnel | Static | boolean;
|
|
2773
|
+
readonly phoneNumber: string;
|
|
2774
|
+
readonly webhookUrl?: string;
|
|
2342
2775
|
/**
|
|
2343
2776
|
* On-disk persistence for the dashboard's call history. The dashboard
|
|
2344
2777
|
* itself is in-memory, but enabling ``persist`` writes per-call records
|
|
@@ -2366,25 +2799,25 @@ interface LocalOptions {
|
|
|
2366
2799
|
* Phone numbers are masked by default; control via
|
|
2367
2800
|
* ``PATTER_LOG_REDACT_PHONE``.
|
|
2368
2801
|
*/
|
|
2369
|
-
persist?: boolean | string;
|
|
2802
|
+
readonly persist?: boolean | string;
|
|
2370
2803
|
/**
|
|
2371
2804
|
* @internal — allows ``StreamHandler`` to build the default OpenAI
|
|
2372
2805
|
* ``LLMLoop`` when no ``onMessage`` handler is supplied. The
|
|
2373
2806
|
* ``OpenAIRealtime`` engine instance carries its own key when one is
|
|
2374
2807
|
* used via ``phone.agent({ engine: new OpenAIRealtime({ apiKey }) })``.
|
|
2375
2808
|
*/
|
|
2376
|
-
openaiKey?: string;
|
|
2809
|
+
readonly openaiKey?: string;
|
|
2377
2810
|
}
|
|
2378
2811
|
/** Internal shape of a guardrail (matches `Guardrail` class from `public-api.ts`). */
|
|
2379
2812
|
interface Guardrail {
|
|
2380
2813
|
/** Name for logging when triggered */
|
|
2381
|
-
name: string;
|
|
2814
|
+
readonly name: string;
|
|
2382
2815
|
/** List of terms that trigger the guardrail (case-insensitive) */
|
|
2383
|
-
blockedTerms?: string
|
|
2816
|
+
readonly blockedTerms?: ReadonlyArray<string>;
|
|
2384
2817
|
/** Custom check function — return true to block the response */
|
|
2385
|
-
check?: (text: string) => boolean;
|
|
2818
|
+
readonly check?: (text: string) => boolean;
|
|
2386
2819
|
/** Replacement text spoken when guardrail triggers */
|
|
2387
|
-
replacement?: string;
|
|
2820
|
+
readonly replacement?: string;
|
|
2388
2821
|
}
|
|
2389
2822
|
/** Per-call context passed to every pipeline hook. */
|
|
2390
2823
|
interface HookContext {
|
|
@@ -2493,29 +2926,29 @@ interface BackgroundAudioPlayer$1 {
|
|
|
2493
2926
|
*/
|
|
2494
2927
|
/** Configuration for a local-mode voice AI agent (passed to `phone.agent({...})`). */
|
|
2495
2928
|
interface AgentOptions {
|
|
2496
|
-
systemPrompt: string;
|
|
2929
|
+
readonly systemPrompt: string;
|
|
2497
2930
|
/**
|
|
2498
2931
|
* Voice preset. When ``engine`` is provided, its ``voice`` is used unless
|
|
2499
2932
|
* explicitly overridden here. Format depends on the engine:
|
|
2500
2933
|
* OpenAI Realtime accepts a name (``'alloy'``, ``'echo'``, ...);
|
|
2501
2934
|
* ElevenLabs ConvAI accepts a voice ID.
|
|
2502
2935
|
*/
|
|
2503
|
-
voice?: string;
|
|
2936
|
+
readonly voice?: string;
|
|
2504
2937
|
/**
|
|
2505
2938
|
* LLM / Realtime model. When ``engine`` is provided, its ``model`` is used
|
|
2506
2939
|
* unless explicitly overridden here.
|
|
2507
2940
|
*/
|
|
2508
|
-
model?: string;
|
|
2941
|
+
readonly model?: string;
|
|
2509
2942
|
/**
|
|
2510
2943
|
* BCP-47 language code (e.g. ``'en'``, ``'it'``). Forwarded to STT (in
|
|
2511
2944
|
* pipeline mode) and to the engine adapter at call time. STTConfig has its
|
|
2512
2945
|
* own ``language`` field for the rare case where STT must use a different
|
|
2513
2946
|
* language than the rest of the pipeline.
|
|
2514
2947
|
*/
|
|
2515
|
-
language?: string;
|
|
2516
|
-
firstMessage?: string;
|
|
2948
|
+
readonly language?: string;
|
|
2949
|
+
readonly firstMessage?: string;
|
|
2517
2950
|
/** Tool definitions — ``Tool`` class instances from ``getpatter``. */
|
|
2518
|
-
tools?:
|
|
2951
|
+
readonly tools?: ReadonlyArray<Tool>;
|
|
2519
2952
|
/**
|
|
2520
2953
|
* Model Context Protocol (MCP) servers to plug into this agent. Each
|
|
2521
2954
|
* server is queried at call start via ``tools/list`` and its tools
|
|
@@ -2536,14 +2969,23 @@ interface AgentOptions {
|
|
|
2536
2969
|
* call start (~50-200 ms × N servers). Future iterations may cache
|
|
2537
2970
|
* the discovered list process-wide.
|
|
2538
2971
|
*/
|
|
2539
|
-
mcpServers?: ReadonlyArray<MCPServerConfig>;
|
|
2972
|
+
readonly mcpServers?: ReadonlyArray<MCPServerConfig>;
|
|
2973
|
+
/**
|
|
2974
|
+
* Optional back-office "consult" escalation. When set, Patter auto-injects a
|
|
2975
|
+
* ``consult_agent`` tool (Realtime + Pipeline modes) that the in-call agent
|
|
2976
|
+
* can invoke to reach the caller's own orchestrator over HTTP for deeper
|
|
2977
|
+
* reasoning / fresh info, then speak the reply. The orchestrator stays off
|
|
2978
|
+
* the per-turn path — consulted only on demand. ``undefined`` (default)
|
|
2979
|
+
* disables it. See {@link ConsultConfig}.
|
|
2980
|
+
*/
|
|
2981
|
+
readonly consult?: ConsultConfig;
|
|
2540
2982
|
/**
|
|
2541
2983
|
* When ``true``, ship ``systemPrompt`` to the LLM verbatim. Default
|
|
2542
2984
|
* (``false``) prepends a phone-friendly preamble that instructs the
|
|
2543
2985
|
* model to avoid markdown, emojis, bullet lists, and verbose replies —
|
|
2544
2986
|
* the conventions live phone calls require.
|
|
2545
2987
|
*/
|
|
2546
|
-
disablePhonePreamble?: boolean;
|
|
2988
|
+
readonly disablePhonePreamble?: boolean;
|
|
2547
2989
|
/**
|
|
2548
2990
|
* Acoustic echo cancellation. When `true` (pipeline mode only) the SDK
|
|
2549
2991
|
* instantiates an `NlmsEchoCanceller` that subtracts the agent's own
|
|
@@ -2555,53 +2997,53 @@ interface AgentOptions {
|
|
|
2555
2997
|
* convergence period would briefly attenuate caller speech if they
|
|
2556
2998
|
* spoke before any TTS played.
|
|
2557
2999
|
*/
|
|
2558
|
-
echoCancellation?: boolean;
|
|
3000
|
+
readonly echoCancellation?: boolean;
|
|
2559
3001
|
/**
|
|
2560
3002
|
* Realtime / ConvAI engine instance. When present, the agent runs in the
|
|
2561
3003
|
* matching mode (``openai_realtime`` or ``elevenlabs_convai``). When absent,
|
|
2562
3004
|
* pipeline mode is selected if ``stt`` and ``tts`` are provided.
|
|
2563
3005
|
*/
|
|
2564
|
-
engine?: Realtime | Realtime2 | ConvAI;
|
|
3006
|
+
readonly engine?: Realtime | Realtime2 | ConvAI;
|
|
2565
3007
|
/**
|
|
2566
3008
|
* Provider mode. Normally derived from ``engine`` / ``stt`` + ``tts``. Pass
|
|
2567
3009
|
* ``'pipeline'`` explicitly when building a pipeline-mode agent without
|
|
2568
3010
|
* an engine instance.
|
|
2569
3011
|
*/
|
|
2570
|
-
provider?: 'openai_realtime' | 'elevenlabs_convai' | 'pipeline';
|
|
3012
|
+
readonly provider?: 'openai_realtime' | 'elevenlabs_convai' | 'pipeline';
|
|
2571
3013
|
/** Pre-instantiated STT adapter (e.g. ``new DeepgramSTT({ apiKey })``). */
|
|
2572
|
-
stt?: STTAdapter;
|
|
3014
|
+
readonly stt?: STTAdapter;
|
|
2573
3015
|
/** Pre-instantiated TTS adapter (e.g. ``new ElevenLabsTTS({ apiKey })``). */
|
|
2574
|
-
tts?: TTSAdapter;
|
|
3016
|
+
readonly tts?: TTSAdapter;
|
|
2575
3017
|
/**
|
|
2576
3018
|
* Pipeline-mode LLM provider (e.g. ``new AnthropicLLM()``). When set, the
|
|
2577
3019
|
* built-in LLM loop uses this provider instead of the OpenAI default.
|
|
2578
3020
|
* Mutually exclusive with ``onMessage`` passed to ``serve()``. Ignored
|
|
2579
3021
|
* when ``engine`` is set (realtime mode bypasses the pipeline LLM).
|
|
2580
3022
|
*/
|
|
2581
|
-
llm?: LLMProvider;
|
|
3023
|
+
readonly llm?: LLMProvider;
|
|
2582
3024
|
/** Dynamic variables for ``{placeholder}`` substitution in systemPrompt at call time. */
|
|
2583
|
-
variables?: Record<string, string
|
|
3025
|
+
readonly variables?: Readonly<Record<string, string>>;
|
|
2584
3026
|
/** Output guardrails — ``Guardrail`` class instances from ``getpatter``. */
|
|
2585
|
-
guardrails?:
|
|
3027
|
+
readonly guardrails?: ReadonlyArray<Guardrail>;
|
|
2586
3028
|
/** Pipeline hooks — intercept and transform data at each pipeline stage (pipeline mode only). */
|
|
2587
|
-
hooks?: PipelineHooks;
|
|
3029
|
+
readonly hooks?: PipelineHooks;
|
|
2588
3030
|
/** Text transforms applied to LLM output before TTS (pipeline mode only).
|
|
2589
3031
|
* Each function receives a string and returns the transformed string.
|
|
2590
3032
|
* Applied in order before the ``beforeSynthesize`` hook. */
|
|
2591
|
-
textTransforms?:
|
|
3033
|
+
readonly textTransforms?: ReadonlyArray<(text: string) => string>;
|
|
2592
3034
|
/** Optional server-side VAD (e.g., Silero). Pipeline mode only. */
|
|
2593
|
-
vad?: VADProvider;
|
|
3035
|
+
readonly vad?: VADProvider;
|
|
2594
3036
|
/** Optional pre-STT audio filter (noise cancellation). Pipeline mode only. */
|
|
2595
|
-
audioFilter?: AudioFilter;
|
|
3037
|
+
readonly audioFilter?: AudioFilter;
|
|
2596
3038
|
/** Optional background audio mixer (hold music, thinking cues). Pipeline mode only. */
|
|
2597
|
-
backgroundAudio?: BackgroundAudioPlayer$1;
|
|
3039
|
+
readonly backgroundAudio?: BackgroundAudioPlayer$1;
|
|
2598
3040
|
/**
|
|
2599
3041
|
* Minimum sustained voice (ms) before treating caller audio as a barge-in
|
|
2600
3042
|
* and interrupting TTS. `0` disables barge-in entirely — useful on noisy
|
|
2601
3043
|
* links (ngrok tunnels, speakerphone) where the agent can hear itself.
|
|
2602
3044
|
* Default: 300.
|
|
2603
3045
|
*/
|
|
2604
|
-
bargeInThresholdMs?: number;
|
|
3046
|
+
readonly bargeInThresholdMs?: number;
|
|
2605
3047
|
/**
|
|
2606
3048
|
* Opt-in barge-in confirmation strategies (pipeline mode). With the
|
|
2607
3049
|
* default empty array the SDK falls back to the legacy
|
|
@@ -2618,14 +3060,14 @@ interface AgentOptions {
|
|
|
2618
3060
|
* ``MinWordsStrategy`` for the protocol and a reference
|
|
2619
3061
|
* implementation.
|
|
2620
3062
|
*/
|
|
2621
|
-
bargeInStrategies?: readonly BargeInStrategy[];
|
|
3063
|
+
readonly bargeInStrategies?: readonly BargeInStrategy[];
|
|
2622
3064
|
/**
|
|
2623
3065
|
* Maximum time (ms) to wait for at least one strategy to confirm a
|
|
2624
3066
|
* pending barge-in before discarding the pending state and resuming
|
|
2625
3067
|
* TTS. Only consulted when ``bargeInStrategies`` is non-empty.
|
|
2626
3068
|
* Default: 1500.
|
|
2627
3069
|
*/
|
|
2628
|
-
bargeInConfirmMs?: number;
|
|
3070
|
+
readonly bargeInConfirmMs?: number;
|
|
2629
3071
|
/**
|
|
2630
3072
|
* When ``true`` (default), ``Patter.call`` warms up the STT, TTS, and
|
|
2631
3073
|
* LLM provider connections in parallel with the carrier-side
|
|
@@ -2636,7 +3078,7 @@ interface AgentOptions {
|
|
|
2636
3078
|
* of the WebSocket bridge. Best-effort: warmup failures are logged
|
|
2637
3079
|
* at debug level and never abort the call. Default: ``true``.
|
|
2638
3080
|
*/
|
|
2639
|
-
prewarm?: boolean;
|
|
3081
|
+
readonly prewarm?: boolean;
|
|
2640
3082
|
/**
|
|
2641
3083
|
* When ``true`` (default since 0.6.2 in pipeline mode), ``Patter.call``
|
|
2642
3084
|
* pre-renders ``firstMessage`` to TTS audio bytes during the ringing
|
|
@@ -2655,7 +3097,7 @@ interface AgentOptions {
|
|
|
2655
3097
|
* ``Patter.call`` refuses to spawn the prewarm task and emits a warn
|
|
2656
3098
|
* when ``provider !== 'pipeline'``.
|
|
2657
3099
|
*/
|
|
2658
|
-
prewarmFirstMessage?: boolean;
|
|
3100
|
+
readonly prewarmFirstMessage?: boolean;
|
|
2659
3101
|
/**
|
|
2660
3102
|
* When true, the sentence chunker emits the first clause of each response
|
|
2661
3103
|
* on a soft punctuation boundary (",", em-dash, en-dash) once ~40 chars
|
|
@@ -2667,38 +3109,124 @@ interface AgentOptions {
|
|
|
2667
3109
|
* See SentenceChunker constructor for the full guard list (decimal,
|
|
2668
3110
|
* currency, balanced delimiter, ellipsis).
|
|
2669
3111
|
*/
|
|
2670
|
-
aggressiveFirstFlush?: boolean;
|
|
3112
|
+
readonly aggressiveFirstFlush?: boolean;
|
|
3113
|
+
/**
|
|
3114
|
+
* Input noise reduction for speakerphone / conference audio (OpenAI
|
|
3115
|
+
* Realtime mode only). `undefined` (default) omits the field entirely
|
|
3116
|
+
* (no reduction — today's behavior).
|
|
3117
|
+
*
|
|
3118
|
+
* - `"far_field"` — recommended for phone / speakerphone calls where
|
|
3119
|
+
* the mic is more than ~30 cm from the speaker.
|
|
3120
|
+
* - `"near_field"` — for a handset held close to the mouth.
|
|
3121
|
+
*
|
|
3122
|
+
* v1 Realtime: emitted at the top level of `session.update` as
|
|
3123
|
+
* `input_audio_noise_reduction: { type }`. GA Realtime (gpt-realtime-2):
|
|
3124
|
+
* nested under `audio.input.input_audio_noise_reduction: { type }`.
|
|
3125
|
+
*
|
|
3126
|
+
* Mirrors Python `openai_realtime_noise_reduction` on `Patter.agent()` /
|
|
3127
|
+
* `Agent` and `noise_reduction` on `engines.openai.Realtime`.
|
|
3128
|
+
*/
|
|
3129
|
+
readonly openaiRealtimeNoiseReduction?: 'near_field' | 'far_field';
|
|
3130
|
+
/**
|
|
3131
|
+
* Turn-detection tuning for OpenAI Realtime mode. `undefined` (default)
|
|
3132
|
+
* keeps the adapter's current hardcoded `server_vad` / threshold `0.5` /
|
|
3133
|
+
* silence 300 ms settings.
|
|
3134
|
+
*
|
|
3135
|
+
* Raise {@link RealtimeTurnDetection.threshold} (`server_vad`) or switch
|
|
3136
|
+
* to `semantic_vad` with `eagerness: 'low'` to stop speakerphone /
|
|
3137
|
+
* conference noise from triggering false barge-ins.
|
|
3138
|
+
*
|
|
3139
|
+
* Mirrors Python `realtime_turn_detection` on `Patter.agent()` / `Agent`
|
|
3140
|
+
* and `turn_detection` on `engines.openai.Realtime`.
|
|
3141
|
+
*/
|
|
3142
|
+
readonly realtimeTurnDetection?: RealtimeTurnDetection;
|
|
3143
|
+
/**
|
|
3144
|
+
* Gate the OpenAI Realtime model's response on the Whisper input
|
|
3145
|
+
* transcript (legacy behavior). OpenAI Realtime mode only.
|
|
3146
|
+
*
|
|
3147
|
+
* - `false` / `undefined` (default) — the speech-to-speech model responds
|
|
3148
|
+
* as soon as the user stops speaking (`speech_stopped`), independently
|
|
3149
|
+
* of the Whisper transcription. The transcript becomes a pure
|
|
3150
|
+
* observability side-channel (dashboard / history / `onTranscript`) and
|
|
3151
|
+
* never gates, triggers, or cancels the response. Reclaims ~500 ms of
|
|
3152
|
+
* latency because the model no longer waits for Whisper.
|
|
3153
|
+
* - `true` — restores the prior behavior where the response is requested
|
|
3154
|
+
* only after the Whisper `transcript_input` event arrives. Production
|
|
3155
|
+
* flows should keep the default; this is for callers that depended on
|
|
3156
|
+
* the old transcript-gated ordering.
|
|
3157
|
+
*
|
|
3158
|
+
* Mirrors Python `realtime_gate_response_on_transcript` on `Patter.agent()`
|
|
3159
|
+
* / `Agent` and `gate_response_on_transcript` on `engines.openai.Realtime`.
|
|
3160
|
+
*/
|
|
3161
|
+
readonly openaiRealtimeGateResponseOnTranscript?: boolean;
|
|
3162
|
+
/**
|
|
3163
|
+
* When set, Patter prepends a native "# Preambles" guidance block to the
|
|
3164
|
+
* OpenAI Realtime session `instructions` so the model speaks one short,
|
|
3165
|
+
* action-describing sentence ("I'll check that order now.") before a tool
|
|
3166
|
+
* call that may take a moment, in its own voice. Most effective on
|
|
3167
|
+
* `gpt-realtime-2`, where preambles are first-class.
|
|
3168
|
+
*
|
|
3169
|
+
* - `undefined` / `false` (default) — no change to the prompt; the
|
|
3170
|
+
* instructions stay byte-identical to prior releases.
|
|
3171
|
+
* - `true` — Patter prepends the built-in block.
|
|
3172
|
+
* - `string` — used verbatim as the full preamble block (override).
|
|
3173
|
+
*
|
|
3174
|
+
* Realtime modes only; pipeline mode has its own phone preamble (see
|
|
3175
|
+
* `disablePhonePreamble`). Mirrors Python `tool_call_preambles` on
|
|
3176
|
+
* `Patter.agent()` / `Agent`.
|
|
3177
|
+
*/
|
|
3178
|
+
readonly toolCallPreambles?: boolean | string;
|
|
2671
3179
|
}
|
|
2672
3180
|
/** Pipeline-mode message handler — given full turn context, returns the agent's reply. */
|
|
2673
3181
|
type PipelineMessageHandler = (data: Record<string, unknown>) => Promise<string>;
|
|
2674
3182
|
/** Options for `Patter.serve({...})`. */
|
|
2675
3183
|
interface ServeOptions {
|
|
2676
|
-
agent: AgentOptions;
|
|
2677
|
-
port?: number;
|
|
3184
|
+
readonly agent: AgentOptions;
|
|
3185
|
+
readonly port?: number;
|
|
2678
3186
|
/** When true, start a cloudflared tunnel automatically (requires `cloudflared` npm package). */
|
|
2679
|
-
tunnel?: boolean;
|
|
2680
|
-
onCallStart?: (data: Record<string, unknown>) => Promise<void>;
|
|
2681
|
-
onCallEnd?: (data: Record<string, unknown>) => Promise<void>;
|
|
2682
|
-
onTranscript?: (data: Record<string, unknown>) => Promise<void>;
|
|
3187
|
+
readonly tunnel?: boolean;
|
|
3188
|
+
readonly onCallStart?: (data: Record<string, unknown>) => Promise<void>;
|
|
3189
|
+
readonly onCallEnd?: (data: Record<string, unknown>) => Promise<void>;
|
|
3190
|
+
readonly onTranscript?: (data: Record<string, unknown>) => Promise<void>;
|
|
2683
3191
|
/** Pipeline mode only — called with the user's transcript; return value is spoken.
|
|
2684
3192
|
* Can also be a URL string for remote webhook/WebSocket integration. */
|
|
2685
|
-
onMessage?: PipelineMessageHandler | string;
|
|
3193
|
+
readonly onMessage?: PipelineMessageHandler | string;
|
|
2686
3194
|
/** Called after each turn with per-turn metrics. */
|
|
2687
|
-
onMetrics?: (data: Record<string, unknown>) => Promise<void>;
|
|
3195
|
+
readonly onMetrics?: (data: Record<string, unknown>) => Promise<void>;
|
|
2688
3196
|
/** When true, record calls via the Twilio Recordings API. */
|
|
2689
|
-
recording?: boolean;
|
|
3197
|
+
readonly recording?: boolean;
|
|
2690
3198
|
/** If set, spoken as a voicemail message when AMD detects a machine. */
|
|
2691
|
-
voicemailMessage?: string;
|
|
3199
|
+
readonly voicemailMessage?: string;
|
|
2692
3200
|
/** Custom pricing overrides for cost calculation. */
|
|
2693
|
-
pricing?: Record<string, Record<string, unknown
|
|
3201
|
+
readonly pricing?: Readonly<Record<string, Record<string, unknown>>>;
|
|
2694
3202
|
/** When true (default), serve a dashboard UI at /dashboard. */
|
|
2695
|
-
dashboard?: boolean;
|
|
3203
|
+
readonly dashboard?: boolean;
|
|
2696
3204
|
/** Bearer token for dashboard/API authentication. */
|
|
2697
|
-
dashboardToken?: string;
|
|
3205
|
+
readonly dashboardToken?: string;
|
|
3206
|
+
/**
|
|
3207
|
+
* When true, serve the dashboard (and the call-data `/api/*` routes)
|
|
3208
|
+
* fully OPEN — WITHOUT authentication — even when the server is
|
|
3209
|
+
* reachable beyond loopback (e.g. behind a tunnel or a public webhook
|
|
3210
|
+
* URL). **NOT RECOMMENDED on a public network** — the dashboard exposes
|
|
3211
|
+
* call transcripts and metadata (PII) to anyone who can reach the URL.
|
|
3212
|
+
*
|
|
3213
|
+
* Defaults to `false` (security). With the default, when the dashboard
|
|
3214
|
+
* is enabled, `dashboardToken` is empty, AND the server is exposed
|
|
3215
|
+
* beyond `127.0.0.1`, the SDK auto-generates a one-time token and mounts
|
|
3216
|
+
* the dashboard behind it (the startup banner prints the ready-to-use
|
|
3217
|
+
* URL with `?token=...`). The dashboard is always available — it just
|
|
3218
|
+
* requires the printed or configured token. Loopback-only local dev is
|
|
3219
|
+
* unchanged: served open with no token.
|
|
3220
|
+
*
|
|
3221
|
+
* For a stable token instead of the per-process auto-generated one, set
|
|
3222
|
+
* `dashboardToken`. Set this flag only as the deliberate escape hatch
|
|
3223
|
+
* for the rare case where unauthenticated public exposure is intentional.
|
|
3224
|
+
*/
|
|
3225
|
+
readonly allowInsecureDashboard?: boolean;
|
|
2698
3226
|
/** Path to SQLite database for dashboard persistence (not used in TS yet). */
|
|
2699
|
-
dashboardDb?: string;
|
|
3227
|
+
readonly dashboardDb?: string;
|
|
2700
3228
|
/** When true (default), persist dashboard data. */
|
|
2701
|
-
dashboardPersist?: boolean;
|
|
3229
|
+
readonly dashboardPersist?: boolean;
|
|
2702
3230
|
/**
|
|
2703
3231
|
* When true (default), `serve()` calls the carrier's API on startup to
|
|
2704
3232
|
* point the configured phone number's webhook URL at this server. Set
|
|
@@ -2718,7 +3246,7 @@ interface ServeOptions {
|
|
|
2718
3246
|
* hostname is dynamic and only known at runtime — the carrier MUST be
|
|
2719
3247
|
* reconfigured for inbound calls to land.
|
|
2720
3248
|
*/
|
|
2721
|
-
manageWebhook?: boolean;
|
|
3249
|
+
readonly manageWebhook?: boolean;
|
|
2722
3250
|
}
|
|
2723
3251
|
/**
|
|
2724
3252
|
* Normalised AMD (answering-machine detection) result emitted to
|
|
@@ -2744,8 +3272,8 @@ interface MachineDetectionResult {
|
|
|
2744
3272
|
}
|
|
2745
3273
|
/** Options for `Patter.call({...})` to place an outbound call. */
|
|
2746
3274
|
interface LocalCallOptions {
|
|
2747
|
-
to: string;
|
|
2748
|
-
agent: AgentOptions;
|
|
3275
|
+
readonly to: string;
|
|
3276
|
+
readonly agent: AgentOptions;
|
|
2749
3277
|
/**
|
|
2750
3278
|
* Enable answering-machine detection. **Defaults to ``true``** — the SDK
|
|
2751
3279
|
* asks Twilio (``MachineDetection=DetectMessageEnd`` + Async AMD) or
|
|
@@ -2756,7 +3284,7 @@ interface LocalCallOptions {
|
|
|
2756
3284
|
* disable when you want to skip per-call AMD billing or you already
|
|
2757
3285
|
* know the destination is a human.
|
|
2758
3286
|
*/
|
|
2759
|
-
machineDetection?: boolean;
|
|
3287
|
+
readonly machineDetection?: boolean;
|
|
2760
3288
|
/**
|
|
2761
3289
|
* Called once when the carrier finishes the AMD check. Fires for both
|
|
2762
3290
|
* ``human`` and ``machine`` outcomes. Combine with ``voicemailMessage``
|
|
@@ -2764,11 +3292,11 @@ interface LocalCallOptions {
|
|
|
2764
3292
|
* fires the callback after the drop is queued). Acceptance tests use
|
|
2765
3293
|
* this to mark a run INVALID when ``classification !== 'human'``.
|
|
2766
3294
|
*/
|
|
2767
|
-
onMachineDetection?: (result: MachineDetectionResult) => void | Promise<void>;
|
|
3295
|
+
readonly onMachineDetection?: (result: MachineDetectionResult) => void | Promise<void>;
|
|
2768
3296
|
/** If set, spoken as a voicemail message when AMD detects a machine. Implicitly enables ``machineDetection``. */
|
|
2769
|
-
voicemailMessage?: string;
|
|
3297
|
+
readonly voicemailMessage?: string;
|
|
2770
3298
|
/** Dynamic variables merged into agent.variables before call. Override agent-level variables. */
|
|
2771
|
-
variables?: Record<string, string
|
|
3299
|
+
readonly variables?: Readonly<Record<string, string>>;
|
|
2772
3300
|
/**
|
|
2773
3301
|
* Ring timeout in seconds. Forwarded to Twilio as `Timeout` and to Telnyx
|
|
2774
3302
|
* as `timeout_secs`. Defaults to **25 s** — the production-recommended
|
|
@@ -2776,7 +3304,7 @@ interface LocalCallOptions {
|
|
|
2776
3304
|
* parity, or `null` to omit the parameter entirely (carrier picks its
|
|
2777
3305
|
* own default).
|
|
2778
3306
|
*/
|
|
2779
|
-
ringTimeout?: number | null;
|
|
3307
|
+
readonly ringTimeout?: number | null;
|
|
2780
3308
|
/**
|
|
2781
3309
|
* When `true`, block until the call reaches a terminal state and resolve
|
|
2782
3310
|
* to a {@link CallResult} (`outcome` ∈ answered / voicemail / no_answer /
|
|
@@ -2790,7 +3318,7 @@ interface LocalCallOptions {
|
|
|
2790
3318
|
*
|
|
2791
3319
|
* Mirrors Python's `Patter.call(..., wait=True)`.
|
|
2792
3320
|
*/
|
|
2793
|
-
wait?: boolean;
|
|
3321
|
+
readonly wait?: boolean;
|
|
2794
3322
|
}
|
|
2795
3323
|
/**
|
|
2796
3324
|
* Carrier-agnostic terminal outcomes for an outbound call. `answered` means a
|
|
@@ -3136,7 +3664,7 @@ interface ElevenLabsParkedWS {
|
|
|
3136
3664
|
/** WebSocket-based ElevenLabs TTS adapter — opt-in low-latency variant. */
|
|
3137
3665
|
declare class ElevenLabsWebSocketTTS implements TTSAdapter {
|
|
3138
3666
|
static readonly providerKey = "elevenlabs_ws";
|
|
3139
|
-
readonly apiKey
|
|
3667
|
+
private readonly apiKey;
|
|
3140
3668
|
readonly voiceId: string;
|
|
3141
3669
|
readonly modelId: string;
|
|
3142
3670
|
readonly voiceSettings?: Record<string, unknown>;
|
|
@@ -3692,6 +4220,86 @@ interface DefineToolInput {
|
|
|
3692
4220
|
*/
|
|
3693
4221
|
declare function defineTool(input: DefineToolInput): ToolDefinition;
|
|
3694
4222
|
|
|
4223
|
+
/**
|
|
4224
|
+
* Built-in ``consult`` tool — lets the in-call agent escalate to the caller's
|
|
4225
|
+
* own back-office agent for deeper reasoning or fresh information, then speak
|
|
4226
|
+
* the answer.
|
|
4227
|
+
*
|
|
4228
|
+
* This is the *dispatch + consult* pattern: Patter conducts the call (STT +
|
|
4229
|
+
* LLM/voice + TTS + carrier); when the in-call agent hits something it cannot
|
|
4230
|
+
* answer directly, it invokes this tool, which reaches the configured
|
|
4231
|
+
* back-office agent and returns the reply for the agent to speak. The
|
|
4232
|
+
* back-office agent stays off the per-turn path — consulted only on demand, so
|
|
4233
|
+
* ordinary turns keep their low latency.
|
|
4234
|
+
*
|
|
4235
|
+
* Two targets are supported (see {@link ConsultConfig}):
|
|
4236
|
+
*
|
|
4237
|
+
* - ``url`` — the generic webhook path: POSTs ``{ request, call_id, caller,
|
|
4238
|
+
* callee }`` to your endpoint and reads a ``reply`` field back.
|
|
4239
|
+
* - ``openaiCompatible`` — speaks an OpenAI-compatible ``/chat/completions``
|
|
4240
|
+
* endpoint directly (e.g. an OpenClaw agent, or vLLM / Ollama / Groq) with no
|
|
4241
|
+
* hand-written adapter: POSTs ``{ model, messages, user }`` and speaks
|
|
4242
|
+
* ``choices[0].message.content``. Use {@link openclawConsult}.
|
|
4243
|
+
*
|
|
4244
|
+
* The handler does the HTTP call itself so the per-consult timeout and auth from
|
|
4245
|
+
* {@link ConsultConfig} are honoured. ``config.reassurance``, when set, is
|
|
4246
|
+
* attached so the agent speaks a filler while the consult runs (Realtime mode
|
|
4247
|
+
* only).
|
|
4248
|
+
*/
|
|
4249
|
+
|
|
4250
|
+
/**
|
|
4251
|
+
* Build a {@link ConsultConfig} that consults a specific OpenClaw agent directly
|
|
4252
|
+
* (no hand-written adapter) — the TypeScript equivalent of Python's
|
|
4253
|
+
* ``ConsultConfig.openclaw(...)``.
|
|
4254
|
+
*
|
|
4255
|
+
* ``agent`` is the OpenClaw agent id (e.g. ``"receptionist"``) → targets
|
|
4256
|
+
* ``model="openclaw/<agent>"``. An already-namespaced target (``"openclaw/x"``,
|
|
4257
|
+
* ``"openclaw:x"``, ``"agent:x"``) is passed through. ``allowLoopback`` defaults
|
|
4258
|
+
* to ``true`` when ``baseUrl`` is loopback/private (the intended co-located
|
|
4259
|
+
* deployment). The gateway bearer is read from ``apiKey`` or the
|
|
4260
|
+
* ``OPENCLAW_API_KEY`` env var (operator-grade — never logged). Sized at the
|
|
4261
|
+
* phone-safe 30 s default; raise only for batch-style agents, never above 30 s
|
|
4262
|
+
* on a live call.
|
|
4263
|
+
*/
|
|
4264
|
+
declare function openclawConsult(agent: string, opts?: {
|
|
4265
|
+
readonly baseUrl?: string;
|
|
4266
|
+
readonly apiKey?: string;
|
|
4267
|
+
readonly timeoutMs?: number;
|
|
4268
|
+
readonly toolName?: string;
|
|
4269
|
+
readonly description?: string;
|
|
4270
|
+
readonly reassurance?: string | Readonly<{
|
|
4271
|
+
message: string;
|
|
4272
|
+
afterMs?: number;
|
|
4273
|
+
}>;
|
|
4274
|
+
readonly headers?: Readonly<Record<string, string>>;
|
|
4275
|
+
readonly allowLoopback?: boolean;
|
|
4276
|
+
}): ConsultConfig;
|
|
4277
|
+
/**
|
|
4278
|
+
* Return an ``on_call_end`` callback that posts the finished call's record to a
|
|
4279
|
+
* specific OpenClaw agent, so the brain has the record and can follow up — the
|
|
4280
|
+
* TypeScript equivalent of Python's ``openclaw_post_call_notifier``.
|
|
4281
|
+
*
|
|
4282
|
+
* Wire it on ``serve``:
|
|
4283
|
+
*
|
|
4284
|
+
* await phone.serve({ agent, onCallEnd: openclawPostCallNotifier('receptionist') });
|
|
4285
|
+
*
|
|
4286
|
+
* The record is POSTed to the same OpenClaw agent over its OpenAI-compatible
|
|
4287
|
+
* ``/chat/completions`` gateway, keyed to the call id (the ``user`` field +
|
|
4288
|
+
* ``x-openclaw-session-key`` header) so it lands in the SAME OpenClaw session as
|
|
4289
|
+
* the in-call ``consult`` turns. Fire-and-forget: any error is logged by type
|
|
4290
|
+
* only (never the URL / headers / key) and never thrown into teardown. Args
|
|
4291
|
+
* mirror {@link openclawConsult}; the bearer is read from ``apiKey`` or
|
|
4292
|
+
* ``OPENCLAW_API_KEY`` (operator-grade — never logged).
|
|
4293
|
+
*/
|
|
4294
|
+
declare function openclawPostCallNotifier(agent: string, opts?: {
|
|
4295
|
+
readonly baseUrl?: string;
|
|
4296
|
+
readonly apiKey?: string;
|
|
4297
|
+
readonly timeoutMs?: number;
|
|
4298
|
+
readonly allowLoopback?: boolean;
|
|
4299
|
+
readonly includeTranscript?: boolean;
|
|
4300
|
+
readonly instruction?: string;
|
|
4301
|
+
}): (data: Record<string, unknown>) => Promise<void>;
|
|
4302
|
+
|
|
3695
4303
|
/**
|
|
3696
4304
|
* Process-wide logger used by the SDK.
|
|
3697
4305
|
*
|
|
@@ -3907,6 +4515,16 @@ declare class PatterError extends Error {
|
|
|
3907
4515
|
code?: ErrorCode;
|
|
3908
4516
|
});
|
|
3909
4517
|
}
|
|
4518
|
+
/**
|
|
4519
|
+
* Invalid constructor arguments, a missing required environment variable, or a
|
|
4520
|
+
* frozen-config constraint violation. Parity with Python's
|
|
4521
|
+
* ``PatterConfigError`` in ``libraries/python/getpatter/exceptions.py``.
|
|
4522
|
+
*/
|
|
4523
|
+
declare class PatterConfigError extends PatterError {
|
|
4524
|
+
constructor(message: string, options?: {
|
|
4525
|
+
code?: ErrorCode;
|
|
4526
|
+
});
|
|
4527
|
+
}
|
|
3910
4528
|
/** Network / WebSocket / HTTP-level connectivity failure when talking to a provider. */
|
|
3911
4529
|
declare class PatterConnectionError extends PatterError {
|
|
3912
4530
|
constructor(message: string, options?: {
|
|
@@ -4154,9 +4772,9 @@ declare class FallbackLLMProvider implements LLMProvider {
|
|
|
4154
4772
|
* markers are filtered out so callers can concatenate the yielded strings
|
|
4155
4773
|
* directly.
|
|
4156
4774
|
*/
|
|
4157
|
-
completeStream(messages: Array<Record<string, unknown>>, tools?: Array<Record<string, unknown>> | null): AsyncGenerator<string, void, unknown>;
|
|
4775
|
+
completeStream(messages: Array<Record<string, unknown>>, tools?: Array<Record<string, unknown>> | null, opts?: LLMStreamOptions): AsyncGenerator<string, void, unknown>;
|
|
4158
4776
|
/** Streaming entry point — yields chunks from the first provider that succeeds. */
|
|
4159
|
-
stream(messages: Array<Record<string, unknown>>, tools?: Array<Record<string, unknown>> | null): AsyncGenerator<LLMChunk, void, unknown>;
|
|
4777
|
+
stream(messages: Array<Record<string, unknown>>, tools?: Array<Record<string, unknown>> | null, opts?: LLMStreamOptions): AsyncGenerator<LLMChunk, void, unknown>;
|
|
4160
4778
|
private tryProviders;
|
|
4161
4779
|
private markUnavailable;
|
|
4162
4780
|
private startRecovery;
|
|
@@ -4269,49 +4887,49 @@ interface PatterToolOptions {
|
|
|
4269
4887
|
* Patter instance to dial through. Must be in local mode (have a `carrier`).
|
|
4270
4888
|
* The tool boots `phone.serve()` on `start()`; do not call `serve()` yourself.
|
|
4271
4889
|
*/
|
|
4272
|
-
phone: Patter;
|
|
4890
|
+
readonly phone: Patter;
|
|
4273
4891
|
/**
|
|
4274
4892
|
* Default agent config used for outbound calls. Per-call overrides come from
|
|
4275
4893
|
* `execute({ goal, first_message })`.
|
|
4276
4894
|
*/
|
|
4277
|
-
agent?: AgentOptions;
|
|
4895
|
+
readonly agent?: AgentOptions;
|
|
4278
4896
|
/** Tool name shown to the LLM. Default `'make_phone_call'`. */
|
|
4279
|
-
name?: string;
|
|
4897
|
+
readonly name?: string;
|
|
4280
4898
|
/** Tool description for the LLM. Default tuned for English assistants. */
|
|
4281
|
-
description?: string;
|
|
4899
|
+
readonly description?: string;
|
|
4282
4900
|
/** Default per-call timeout in seconds. Default 180. */
|
|
4283
|
-
maxDurationSec?: number;
|
|
4901
|
+
readonly maxDurationSec?: number;
|
|
4284
4902
|
/**
|
|
4285
4903
|
* Optional pass-through for `phone.serve()`'s `recording` flag — record all
|
|
4286
4904
|
* outbound calls placed via this tool.
|
|
4287
4905
|
*/
|
|
4288
|
-
recording?: boolean;
|
|
4906
|
+
readonly recording?: boolean;
|
|
4289
4907
|
}
|
|
4290
4908
|
/** Args accepted by `PatterTool.execute()` (and the OpenAI/Anthropic/Hermes tool schemas). */
|
|
4291
4909
|
interface PatterToolExecuteArgs {
|
|
4292
|
-
to: string;
|
|
4293
|
-
goal?: string;
|
|
4294
|
-
first_message?: string;
|
|
4295
|
-
max_duration_sec?: number;
|
|
4910
|
+
readonly to: string;
|
|
4911
|
+
readonly goal?: string;
|
|
4912
|
+
readonly first_message?: string;
|
|
4913
|
+
readonly max_duration_sec?: number;
|
|
4296
4914
|
}
|
|
4297
4915
|
/** Result envelope returned by `PatterTool.execute()` once the underlying call ends. */
|
|
4298
4916
|
interface PatterToolResult {
|
|
4299
|
-
call_id: string;
|
|
4300
|
-
status: string;
|
|
4301
|
-
duration_seconds: number;
|
|
4917
|
+
readonly call_id: string;
|
|
4918
|
+
readonly status: string;
|
|
4919
|
+
readonly duration_seconds: number;
|
|
4302
4920
|
/**
|
|
4303
4921
|
* Carrier-agnostic outcome (answered / voicemail / no_answer / busy /
|
|
4304
4922
|
* failed) lifted from the SDK {@link CallResult}. Optional for backward
|
|
4305
4923
|
* compatibility with any code constructing this envelope without it.
|
|
4306
4924
|
*/
|
|
4307
|
-
outcome?: string;
|
|
4308
|
-
cost_usd?: number;
|
|
4309
|
-
transcript:
|
|
4925
|
+
readonly outcome?: string;
|
|
4926
|
+
readonly cost_usd?: number;
|
|
4927
|
+
readonly transcript: ReadonlyArray<Readonly<{
|
|
4310
4928
|
role: string;
|
|
4311
4929
|
text: string;
|
|
4312
4930
|
timestamp?: number;
|
|
4313
|
-
}
|
|
4314
|
-
metrics?: Record<string, unknown
|
|
4931
|
+
}>>;
|
|
4932
|
+
readonly metrics?: Readonly<Record<string, unknown>> | null;
|
|
4315
4933
|
}
|
|
4316
4934
|
/** Wraps a live `Patter` instance as a tool callable from external agent frameworks. */
|
|
4317
4935
|
declare class PatterTool {
|
|
@@ -4322,6 +4940,11 @@ declare class PatterTool {
|
|
|
4322
4940
|
private readonly maxDurationSec;
|
|
4323
4941
|
private readonly recording;
|
|
4324
4942
|
private started;
|
|
4943
|
+
/** Cached in-progress (or completed) start promise so concurrent execute()
|
|
4944
|
+
* callers all await the same boot sequence instead of each racing into
|
|
4945
|
+
* phone.serve(). Reset to null on failure so callers can retry after a
|
|
4946
|
+
* transient error. */
|
|
4947
|
+
private startPromise;
|
|
4325
4948
|
constructor(opts: PatterToolOptions);
|
|
4326
4949
|
/** OpenAI Chat Completions / Assistants tool spec. */
|
|
4327
4950
|
openaiSchema(): {
|
|
@@ -4355,8 +4978,12 @@ declare class PatterTool {
|
|
|
4355
4978
|
* `serve()` provides here. No `onCallEnd` callback is wired: the SDK's own
|
|
4356
4979
|
* per-callId completion registry resolves the result, so the user's
|
|
4357
4980
|
* `onCallEnd` slot is left free.
|
|
4981
|
+
*
|
|
4982
|
+
* Idempotent and concurrency-safe: concurrent callers all await the same
|
|
4983
|
+
* in-progress boot instead of each racing into `phone.serve()`.
|
|
4358
4984
|
*/
|
|
4359
4985
|
start(): Promise<void>;
|
|
4986
|
+
private _doStart;
|
|
4360
4987
|
/** Best-effort shutdown — tear the Patter server down via `disconnect()`. */
|
|
4361
4988
|
stop(): Promise<void>;
|
|
4362
4989
|
/**
|
|
@@ -4608,23 +5235,23 @@ interface Transcript$6 {
|
|
|
4608
5235
|
type TranscriptCallback$6 = (transcript: Transcript$6) => void;
|
|
4609
5236
|
/** Constructor options for {@link SonioxSTT}. */
|
|
4610
5237
|
interface SonioxSTTOptions$1 {
|
|
4611
|
-
model?: SonioxModel | string;
|
|
4612
|
-
languageHints?: string[];
|
|
4613
|
-
languageHintsStrict?: boolean;
|
|
4614
|
-
sampleRate?: SonioxSampleRate | number;
|
|
4615
|
-
numChannels?: number;
|
|
4616
|
-
enableSpeakerDiarization?: boolean;
|
|
4617
|
-
enableLanguageIdentification?: boolean;
|
|
4618
|
-
maxEndpointDelayMs?: number;
|
|
4619
|
-
clientReferenceId?: string;
|
|
4620
|
-
baseUrl?: string;
|
|
5238
|
+
readonly model?: SonioxModel | string;
|
|
5239
|
+
readonly languageHints?: readonly string[];
|
|
5240
|
+
readonly languageHintsStrict?: boolean;
|
|
5241
|
+
readonly sampleRate?: SonioxSampleRate | number;
|
|
5242
|
+
readonly numChannels?: number;
|
|
5243
|
+
readonly enableSpeakerDiarization?: boolean;
|
|
5244
|
+
readonly enableLanguageIdentification?: boolean;
|
|
5245
|
+
readonly maxEndpointDelayMs?: number;
|
|
5246
|
+
readonly clientReferenceId?: string;
|
|
5247
|
+
readonly baseUrl?: string;
|
|
4621
5248
|
}
|
|
4622
5249
|
/** Streaming STT adapter for Soniox's real-time WebSocket API. */
|
|
4623
5250
|
declare class SonioxSTT {
|
|
4624
5251
|
/** Stable pricing/dashboard key — read by stream-handler/metrics. */
|
|
4625
5252
|
static readonly providerKey = "soniox";
|
|
4626
5253
|
private ws;
|
|
4627
|
-
private callbacks;
|
|
5254
|
+
private readonly callbacks;
|
|
4628
5255
|
private final;
|
|
4629
5256
|
private keepaliveTimer;
|
|
4630
5257
|
private readonly apiKey;
|
|
@@ -4649,8 +5276,10 @@ declare class SonioxSTT {
|
|
|
4649
5276
|
private emit;
|
|
4650
5277
|
/** Send a binary PCM16-LE audio chunk to Soniox for transcription. */
|
|
4651
5278
|
sendAudio(audio: Buffer): void;
|
|
4652
|
-
/** Register a transcript listener
|
|
5279
|
+
/** Register a transcript listener. */
|
|
4653
5280
|
onTranscript(callback: TranscriptCallback$6): void;
|
|
5281
|
+
/** Unregister a previously registered transcript listener. */
|
|
5282
|
+
offTranscript(callback: TranscriptCallback$6): void;
|
|
4654
5283
|
/** Send the empty-frame stream terminator and close the WebSocket. */
|
|
4655
5284
|
close(): void;
|
|
4656
5285
|
}
|
|
@@ -6022,8 +6651,8 @@ interface OpenAITTSOptions {
|
|
|
6022
6651
|
speed?: number;
|
|
6023
6652
|
/**
|
|
6024
6653
|
* Enable anti-aliasing LPF ahead of the 3:2 decimation. Defaults to
|
|
6025
|
-
* ``
|
|
6026
|
-
*
|
|
6654
|
+
* ``true`` (matches the provider default); set to ``false`` to opt out
|
|
6655
|
+
* for bit-exact downsample-only output.
|
|
6027
6656
|
*/
|
|
6028
6657
|
antiAlias?: boolean;
|
|
6029
6658
|
}
|
|
@@ -6815,14 +7444,14 @@ declare const SUPPORTED_SAMPLE_RATES: readonly [8000, 16000];
|
|
|
6815
7444
|
type SileroSampleRate = (typeof SUPPORTED_SAMPLE_RATES)[number];
|
|
6816
7445
|
/** Options accepted by {@link SileroVAD.load}. */
|
|
6817
7446
|
interface SileroVADOptions {
|
|
6818
|
-
minSpeechDuration?: number;
|
|
6819
|
-
minSilenceDuration?: number;
|
|
6820
|
-
prefixPaddingDuration?: number;
|
|
6821
|
-
activationThreshold?: number;
|
|
6822
|
-
deactivationThreshold?: number;
|
|
6823
|
-
sampleRate?: SileroSampleRate;
|
|
6824
|
-
forceCpu?: boolean;
|
|
6825
|
-
onnxFilePath?: string;
|
|
7447
|
+
readonly minSpeechDuration?: number;
|
|
7448
|
+
readonly minSilenceDuration?: number;
|
|
7449
|
+
readonly prefixPaddingDuration?: number;
|
|
7450
|
+
readonly activationThreshold?: number;
|
|
7451
|
+
readonly deactivationThreshold?: number;
|
|
7452
|
+
readonly sampleRate?: SileroSampleRate;
|
|
7453
|
+
readonly forceCpu?: boolean;
|
|
7454
|
+
readonly onnxFilePath?: string;
|
|
6826
7455
|
}
|
|
6827
7456
|
/**
|
|
6828
7457
|
* Minimal structural type for the subset of `onnxruntime-node` we depend on.
|
|
@@ -6861,6 +7490,8 @@ declare class SileroVAD implements VADProvider {
|
|
|
6861
7490
|
private speechThresholdDuration;
|
|
6862
7491
|
private silenceThresholdDuration;
|
|
6863
7492
|
private closed;
|
|
7493
|
+
/** Transitions produced in the current processFrame call but not yet returned. */
|
|
7494
|
+
private eventQueue;
|
|
6864
7495
|
private constructor();
|
|
6865
7496
|
/**
|
|
6866
7497
|
* Load the Silero VAD model.
|
|
@@ -6945,9 +7576,9 @@ declare class SileroVAD implements VADProvider {
|
|
|
6945
7576
|
interface DeepFilterNetOptions {
|
|
6946
7577
|
/** Absolute path to a DeepFilterNet ONNX model. If omitted, the filter
|
|
6947
7578
|
* logs a warning and becomes a pass-through. */
|
|
6948
|
-
modelPath?: string;
|
|
7579
|
+
readonly modelPath?: string;
|
|
6949
7580
|
/** When true, disable the pass-through warning (used by tests). */
|
|
6950
|
-
silenceWarnings?: boolean;
|
|
7581
|
+
readonly silenceWarnings?: boolean;
|
|
6951
7582
|
}
|
|
6952
7583
|
/** OSS noise-suppression filter backed by a DeepFilterNet ONNX model. */
|
|
6953
7584
|
declare class DeepFilterNetFilter implements AudioFilter {
|
|
@@ -7208,6 +7839,36 @@ declare class OpenAIRealtime2Adapter extends OpenAIRealtimeAdapter {
|
|
|
7208
7839
|
* artefact and well below the GA VAD's 300 ms prefix-padding window.
|
|
7209
7840
|
*/
|
|
7210
7841
|
private transcodeInboundMulaw8ToPcm24;
|
|
7842
|
+
/**
|
|
7843
|
+
* Log-only safety net for issue #154. The GA server echoes the *effective*
|
|
7844
|
+
* session config in `session.updated`; we request `audio/pcm` @ 24 kHz and
|
|
7845
|
+
* transcode PCM24→mulaw8 ourselves (see
|
|
7846
|
+
* `transcodeOutboundPcm24ToMulaw8Buffer`). If a future GA schema change ever
|
|
7847
|
+
* made the server return a different output format, that transcode — which
|
|
7848
|
+
* assumes PCM16-LE @ 24 kHz — would silently corrupt audio, exactly the
|
|
7849
|
+
* v1-beta failure mode #154 fixed. Warn so the drift surfaces in logs instead
|
|
7850
|
+
* of as static. Never gates audio.
|
|
7851
|
+
*/
|
|
7852
|
+
private warnIfOutputFormatUnexpected;
|
|
7853
|
+
/**
|
|
7854
|
+
* Shared audio-delta translation helper. Transcodes a GA
|
|
7855
|
+
* `response.output_audio.delta` payload (base64 PCM-16-LE 24 kHz)
|
|
7856
|
+
* into mulaw 8 kHz and splits the result into 160-byte (20 ms) frames,
|
|
7857
|
+
* dispatching one synthetic `response.audio.delta` event per frame.
|
|
7858
|
+
*
|
|
7859
|
+
* Called from BOTH the `connect()` shim and the `adoptWebSocket()` shim
|
|
7860
|
+
* so that warm-path (prewarm/adopted) calls receive identical transcoding
|
|
7861
|
+
* to cold-path calls. Without this, adopted sockets forwarded raw PCM-24
|
|
7862
|
+
* to Twilio/Telnyx, producing garbled or silent audio on every warm call.
|
|
7863
|
+
*
|
|
7864
|
+
* @param parsed - The parsed GA event object (type already checked to be
|
|
7865
|
+
* `response.output_audio.delta` with a string `delta`).
|
|
7866
|
+
* @param handler - The downstream message listener to dispatch each frame to.
|
|
7867
|
+
* @param rest - Extra arguments forwarded from the original `message` event.
|
|
7868
|
+
* @returns `true` if frames were dispatched (caller should return early),
|
|
7869
|
+
* `false` if the resampler is still warming up (zero output bytes).
|
|
7870
|
+
*/
|
|
7871
|
+
private translateGaAudioDelta;
|
|
7211
7872
|
/**
|
|
7212
7873
|
* Base64 PCM-16-LE 24 kHz → Base64 mulaw 8 kHz. Used by the WS
|
|
7213
7874
|
* translation shim on each `response.output_audio.delta`. The stateful
|
|
@@ -7217,6 +7878,23 @@ declare class OpenAIRealtime2Adapter extends OpenAIRealtimeAdapter {
|
|
|
7217
7878
|
*/
|
|
7218
7879
|
private transcodeOutboundPcm24ToMulaw8Buffer;
|
|
7219
7880
|
sendFirstMessage(text: string): Promise<void>;
|
|
7881
|
+
/**
|
|
7882
|
+
* Speak a short reassurance filler WITHOUT injecting a `role:user` turn.
|
|
7883
|
+
*
|
|
7884
|
+
* GA-shape sibling of {@link sendFirstMessage} (and override of the base v1
|
|
7885
|
+
* {@link OpenAIRealtimeAdapter.sendReassurance}): a bare `response.create`
|
|
7886
|
+
* carrying explicit `instructions` so the filler is the assistant's own
|
|
7887
|
+
* in-band audio. No `conversation.item.create` with `role:"user"` is
|
|
7888
|
+
* emitted, so the transcript shows no phantom caller line. The GA endpoint
|
|
7889
|
+
* rejects `response.modalities` and does not inherit `audio.output.voice`
|
|
7890
|
+
* for an explicit `response.create`, so — exactly as in
|
|
7891
|
+
* {@link sendFirstMessage} — we send `output_modalities` and re-inject the
|
|
7892
|
+
* voice. Fillers must not imply success or failure.
|
|
7893
|
+
*
|
|
7894
|
+
* Mirrors Python `OpenAIRealtime2Adapter.send_reassurance` in
|
|
7895
|
+
* `providers/openai_realtime_2.py`.
|
|
7896
|
+
*/
|
|
7897
|
+
sendReassurance(text: string): Promise<void>;
|
|
7220
7898
|
}
|
|
7221
7899
|
|
|
7222
7900
|
/**
|
|
@@ -7541,7 +8219,7 @@ declare class ChatContext {
|
|
|
7541
8219
|
*/
|
|
7542
8220
|
|
|
7543
8221
|
/** Valid DTMF tone values (keypad characters). */
|
|
7544
|
-
declare const DTMF_EVENTS: readonly ["
|
|
8222
|
+
declare const DTMF_EVENTS: readonly ["1", "2", "3", "4", "5", "6", "7", "8", "9", "0", "*", "#", "A", "B", "C", "D"];
|
|
7545
8223
|
/** Single DTMF tone value (a member of `DTMF_EVENTS`). */
|
|
7546
8224
|
type DtmfEvent = (typeof DTMF_EVENTS)[number];
|
|
7547
8225
|
/** Join DTMF events into a space-separated debug string. */
|
|
@@ -8030,8 +8708,10 @@ declare class TelnyxSTT {
|
|
|
8030
8708
|
connect(): Promise<void>;
|
|
8031
8709
|
/** Send a binary PCM16 audio chunk; emits the WAV header on the first call. */
|
|
8032
8710
|
sendAudio(audio: Buffer): void;
|
|
8033
|
-
/** Register a transcript listener
|
|
8711
|
+
/** Register a transcript listener. */
|
|
8034
8712
|
onTranscript(callback: TranscriptCallback): void;
|
|
8713
|
+
/** Unregister a previously-registered transcript listener. */
|
|
8714
|
+
offTranscript(callback: TranscriptCallback): void;
|
|
8035
8715
|
/** Close the streaming WebSocket. */
|
|
8036
8716
|
close(): void;
|
|
8037
8717
|
}
|
|
@@ -8149,4 +8829,4 @@ interface CallEvent {
|
|
|
8149
8829
|
readonly direction?: string;
|
|
8150
8830
|
}
|
|
8151
8831
|
|
|
8152
|
-
export { type AgentOptions, type AgentState, AllProvidersFailedError, type AnthropicConversion, LLM$3 as AnthropicLLM, type AnthropicLLMOptions, type AnthropicMessage, AssemblyAIEncoding, AssemblyAIModel, STT$1 as AssemblyAISTT, type AssemblyAISTTOptions, type AudioConfig, type AudioSource, AuthenticationError, type BackgroundAudioOptions, BackgroundAudioPlayer, type EvaluateContext as BargeInEvaluateContext, type BargeInStrategy, BuiltinAudioClip, type BuiltinAudioClipName, type BuiltinPcmSource, type CallControl, type CallEvent, type CallEventHandler, type CallMetrics, CallMetricsAccumulator, type CallOutcome, type CallRecord, type CallResult, type CarrierKind, type CartesiaEncoding, STT$3 as CartesiaSTT, type CartesiaSTTOptions, TTS$3 as CartesiaTTS, CartesiaTTSModel, type CartesiaTTSOptions, CartesiaTTSVoiceMode, LLM$1 as CerebrasLLM, type CerebrasLLMOptions, ChatContext, type ChatMessage, type ChatRole, CloudflareTunnel, type ConversationStateSnapshot, type CostBreakdown, DEFAULT_MIN_SENTENCE_LEN, DEFAULT_PRICING, DTMF_EVENTS, DeepFilterNetFilter, type DeepFilterNetOptions, DeepgramModel, STT$6 as DeepgramSTT, type DeepgramSTTOptions, DefaultToolExecutor, type DefaultToolExecutorOptions, type DefineToolInput, type DtmfEvent, ConvAI as ElevenLabsConvAI, ElevenLabsConvAIAdapter, type ConvAIOptions as ElevenLabsConvAIOptions, ElevenLabsModel, ElevenLabsOutputFormat, ElevenLabsTTS as ElevenLabsRestTTS, TTS$6 as ElevenLabsTTS, type ElevenLabsTTSOptions, type ElevenLabsWebSocketOptions, TTS$5 as ElevenLabsWebSocketTTS, type EouTrigger, ErrorCode, EventBus, FallbackLLMProvider, type FallbackLLMProviderOptions, type FilePcmSource, GEMINI_DEFAULT_INPUT_SR, GEMINI_DEFAULT_OUTPUT_SR, GeminiLiveAdapter, type GeminiLiveEventHandler, LLM as GoogleLLM, type GoogleLLMOptions, LLM$2 as GroqLLM, type GroqLLMOptions, Guardrail$1 as Guardrail, type GuardrailOptions, type HookContext, IVRActivity, type IVRActivityOptions, type IVRToolDefinition, type IncomingMessage, type InitTracingOptions, TTS as InworldTTS, type InworldTTSOptions, type JobCallback, KrispFrameDuration, KrispSampleRate, KrispVivaFilter, type KrispVivaFilterOptions, type LLMChunk, LLMLoop, type LLMProvider, LMNTAudioFormat, LMNTModel, LMNTSampleRate, TTS$1 as LMNTTTS, type LMNTTTSOptions, type LatencyBreakdown, type LocalCallOptions, type LocalConfig, type LocalOptions, type Logger, type LoopCallback, type MessageHandler, MetricsStore, MinWordsStrategy, type MinWordsStrategyOptions, type ModelPricing, Ngrok, LLM$4 as OpenAILLM, type OpenAILLMOptions, OpenAILLMProvider, type OpenAIMessage, Realtime as OpenAIRealtime, Realtime2 as OpenAIRealtime2, OpenAIRealtime2Adapter, type Realtime2Options as OpenAIRealtime2Options, OpenAIRealtimeAdapter, OpenAIRealtimeAudioFormat, OpenAIRealtimeModel, type RealtimeOptions as OpenAIRealtimeOptions, OpenAIRealtimeVADType, TTS$4 as OpenAITTS, type OpenAITTSOptions, STT$4 as OpenAITranscribeSTT, type OpenAITranscribeSTTOptions, OpenAITranscriptionModel, OpenAIVoice, PRICING_LAST_UPDATED, PRICING_VERSION, type ParamSpec, PartialStreamError, Patter, PatterConnectionError, PatterError, type PatterEventType, PatterTool, type PatterToolExecuteArgs, type PatterToolOptions, type PatterToolResult, PcmCarry, PipelineHookExecutor, type PipelineHooks, type PipelineMessageHandler, Carrier as Plivo, PlivoAdapter, type PlivoCarrierOptions, type InitiateCallOptions as PlivoInitiateCallOptions, type InitiateCallResult as PlivoInitiateCallResult, PricingUnit, type PricingUnitValue, type ProviderPricing, ProvisionError, RateLimitError, type RawPcmSource, type RealtimeConfig, RemoteMessageHandler, RimeAudioFormat, RimeModel, TTS$2 as RimeTTS, type RimeTTSOptions, SPAN_BARGEIN, SPAN_CALL, SPAN_ENDPOINT, SPAN_LLM, SPAN_STT, SPAN_TOOL, SPAN_TTS, type SSEEvent, type STTConfig, type ScheduleHandle, SentenceChunker, type ServeOptions, type SilenceCallback, type SileroSampleRate, SileroVAD, type SileroVADOptions, STT$2 as SonioxSTT, type SonioxSTTOptions$1 as SonioxSTTOptions, type Span, type SpeechEventCallback, SpeechEvents, SpeechmaticsAudioEncoding, SpeechmaticsOperatingPoint, STT as SpeechmaticsSTT, type SpeechmaticsSTTOptions, SpeechmaticsSampleRate, SpeechmaticsServerMessage, TurnDetectionMode as SpeechmaticsTurnDetectionMode, StatefulResampler, type StatefulResamplerOptions, Static as StaticTunnel, type TTSConfig, Carrier$1 as Telnyx, TelnyxAdapter, type TelnyxCarrierOptions, type ConfigureNumberOptions as TelnyxConfigureNumberOptions, type EndCallOptions as TelnyxEndCallOptions, type InitiateCallOptions$1 as TelnyxInitiateCallOptions, type InitiateCallResult$1 as TelnyxInitiateCallResult, type ProvisionNumberOptions as TelnyxProvisionNumberOptions, type ProvisionNumberResult as TelnyxProvisionNumberResult, TelnyxSTT, TelnyxSTTInputFormat, TelnyxSTTSampleRate, type Transcript as TelnyxSTTTranscript, TelnyxTTS, TelnyxTTSSampleRate, TelnyxTTSVoice, type TelnyxTranscriptionEngine, TestSession, TfidfLoopDetector, type TfidfLoopDetectorOptions, Tool, type ToolDefinition, type ToolExecutor, type ToolHandler, type ToolOptions, type TunnelHandle, type TurnMetrics, Carrier$2 as Twilio, TwilioAdapter, type TwilioAdapterOptions, type TwilioCarrierOptions, type ConfigureNumberOptions$1 as TwilioConfigureNumberOptions, type InitiateCallOptions$2 as TwilioInitiateCallOptions, type InitiateCallResult$2 as TwilioInitiateCallResult, type ProvisionNumberOptions$1 as TwilioProvisionNumberOptions, type ProvisionNumberResult$1 as TwilioProvisionNumberResult, ULTRAVOX_DEFAULT_API_BASE, ULTRAVOX_DEFAULT_SR, type UltravoxEventHandler, UltravoxRealtimeAdapter, type UserState, STT$5 as WhisperSTT, type WhisperSTTOptions, assemblyai, builtinClipPath, calculateRealtimeCost, calculateSttCost, calculateTelephonyCost, calculateTtsCost, callsToCsv, callsToJson, cartesia, createResampler16kTo8k, createResampler24kTo16k, createResampler24kTo8k, createResampler8kTo16k, deepgram, defineTool, elevenlabs, evaluateStrategies as evaluateBargeInStrategies, filterEmoji, filterForTTS, filterMarkdown, formatDtmf, geminiLive, getLogger, guardrail, initTracing, isRemoteUrl, isTracingEnabled, isWebSocketUrl, lmnt, makeAuthMiddleware, mergePricing, mixPcm, mountApi, mountDashboard, mulawToPcm16, notifyDashboard, openaiTts, pcm16ToMulaw, resample16kTo8k, resample24kTo16k, resample8kTo16k, resamplePcm, resetStrategies as resetBargeInStrategies, rime, scheduleCron, scheduleInterval, scheduleOnce, selectSoundFromList, setLogger, soniox, speechmatics, startSpan, startTunnel, tool, ultravox, whisper };
|
|
8832
|
+
export { type AgentOptions, type AgentState, AllProvidersFailedError, type AnthropicConversion, LLM$3 as AnthropicLLM, type AnthropicLLMOptions, type AnthropicMessage, AssemblyAIEncoding, AssemblyAIModel, STT$1 as AssemblyAISTT, type AssemblyAISTTOptions, type AudioConfig, type AudioSource, AuthenticationError, type BackgroundAudioOptions, BackgroundAudioPlayer, type EvaluateContext as BargeInEvaluateContext, type BargeInStrategy, BuiltinAudioClip, type BuiltinAudioClipName, type BuiltinPcmSource, type CallControl, type CallEvent, type CallEventHandler, type CallMetrics, CallMetricsAccumulator, type CallOutcome, type CallRecord, type CallResult, type CarrierKind, type CartesiaEncoding, STT$3 as CartesiaSTT, type CartesiaSTTOptions, TTS$3 as CartesiaTTS, CartesiaTTSModel, type CartesiaTTSOptions, CartesiaTTSVoiceMode, LLM$1 as CerebrasLLM, type CerebrasLLMOptions, ChatContext, type ChatMessage, type ChatRole, CloudflareTunnel, type ConsultConfig, type ConversationStateSnapshot, type CostBreakdown, DEFAULT_MIN_SENTENCE_LEN, DEFAULT_PRICING, DTMF_EVENTS, DeepFilterNetFilter, type DeepFilterNetOptions, DeepgramModel, STT$6 as DeepgramSTT, type DeepgramSTTOptions, DefaultToolExecutor, type DefaultToolExecutorOptions, type DefineToolInput, type DtmfEvent, ConvAI as ElevenLabsConvAI, ElevenLabsConvAIAdapter, type ConvAIOptions as ElevenLabsConvAIOptions, ElevenLabsModel, ElevenLabsOutputFormat, ElevenLabsTTS as ElevenLabsRestTTS, TTS$6 as ElevenLabsTTS, type ElevenLabsTTSOptions, type ElevenLabsWebSocketOptions, TTS$5 as ElevenLabsWebSocketTTS, type EouTrigger, ErrorCode, EventBus, FallbackLLMProvider, type FallbackLLMProviderOptions, type FilePcmSource, GEMINI_DEFAULT_INPUT_SR, GEMINI_DEFAULT_OUTPUT_SR, GeminiLiveAdapter, type GeminiLiveEventHandler, LLM as GoogleLLM, type GoogleLLMOptions, LLM$2 as GroqLLM, type GroqLLMOptions, Guardrail$1 as Guardrail, type GuardrailOptions, type HookContext, IVRActivity, type IVRActivityOptions, type IVRToolDefinition, type IncomingMessage, type InitTracingOptions, TTS as InworldTTS, type InworldTTSOptions, type JobCallback, KrispFrameDuration, KrispSampleRate, KrispVivaFilter, type KrispVivaFilterOptions, type LLMChunk, LLMLoop, type LLMProvider, LMNTAudioFormat, LMNTModel, LMNTSampleRate, TTS$1 as LMNTTTS, type LMNTTTSOptions, type LatencyBreakdown, type LocalCallOptions, type LocalConfig, type LocalOptions, type Logger, type LoopCallback, type MessageHandler, MetricsStore, MinWordsStrategy, type MinWordsStrategyOptions, type ModelPricing, Ngrok, type OpenAICompatibleConsult, LLM$4 as OpenAILLM, type OpenAILLMOptions, OpenAILLMProvider, type OpenAIMessage, Realtime as OpenAIRealtime, Realtime2 as OpenAIRealtime2, OpenAIRealtime2Adapter, type Realtime2Options as OpenAIRealtime2Options, OpenAIRealtimeAdapter, OpenAIRealtimeAudioFormat, OpenAIRealtimeModel, type RealtimeOptions as OpenAIRealtimeOptions, OpenAIRealtimeVADType, TTS$4 as OpenAITTS, type OpenAITTSOptions, STT$4 as OpenAITranscribeSTT, type OpenAITranscribeSTTOptions, OpenAITranscriptionModel, OpenAIVoice, PRICING_LAST_UPDATED, PRICING_VERSION, type ParamSpec, PartialStreamError, Patter, PatterConfigError, PatterConnectionError, PatterError, type PatterEventType, PatterTool, type PatterToolExecuteArgs, type PatterToolOptions, type PatterToolResult, PcmCarry, PipelineHookExecutor, type PipelineHooks, type PipelineMessageHandler, Carrier as Plivo, PlivoAdapter, type PlivoCarrierOptions, type InitiateCallOptions as PlivoInitiateCallOptions, type InitiateCallResult as PlivoInitiateCallResult, PricingUnit, type PricingUnitValue, type ProviderPricing, ProvisionError, RateLimitError, type RawPcmSource, type RealtimeConfig, type RealtimeTurnDetection, RemoteMessageHandler, RimeAudioFormat, RimeModel, TTS$2 as RimeTTS, type RimeTTSOptions, SPAN_BARGEIN, SPAN_CALL, SPAN_ENDPOINT, SPAN_LLM, SPAN_STT, SPAN_TOOL, SPAN_TTS, type SSEEvent, type STTConfig, type ScheduleHandle, SentenceChunker, type ServeOptions, type SilenceCallback, type SileroSampleRate, SileroVAD, type SileroVADOptions, STT$2 as SonioxSTT, type SonioxSTTOptions$1 as SonioxSTTOptions, type Span, type SpeechEventCallback, SpeechEvents, SpeechmaticsAudioEncoding, SpeechmaticsOperatingPoint, STT as SpeechmaticsSTT, type SpeechmaticsSTTOptions, SpeechmaticsSampleRate, SpeechmaticsServerMessage, TurnDetectionMode as SpeechmaticsTurnDetectionMode, StatefulResampler, type StatefulResamplerOptions, Static as StaticTunnel, type TTSConfig, Carrier$1 as Telnyx, TelnyxAdapter, type TelnyxCarrierOptions, type ConfigureNumberOptions as TelnyxConfigureNumberOptions, type EndCallOptions as TelnyxEndCallOptions, type InitiateCallOptions$1 as TelnyxInitiateCallOptions, type InitiateCallResult$1 as TelnyxInitiateCallResult, type ProvisionNumberOptions as TelnyxProvisionNumberOptions, type ProvisionNumberResult as TelnyxProvisionNumberResult, TelnyxSTT, TelnyxSTTInputFormat, TelnyxSTTSampleRate, type Transcript as TelnyxSTTTranscript, TelnyxTTS, TelnyxTTSSampleRate, TelnyxTTSVoice, type TelnyxTranscriptionEngine, TestSession, TfidfLoopDetector, type TfidfLoopDetectorOptions, Tool, type ToolDefinition, type ToolExecutor, type ToolHandler, type ToolOptions, type TunnelHandle, type TurnMetrics, Carrier$2 as Twilio, TwilioAdapter, type TwilioAdapterOptions, type TwilioCarrierOptions, type ConfigureNumberOptions$1 as TwilioConfigureNumberOptions, type InitiateCallOptions$2 as TwilioInitiateCallOptions, type InitiateCallResult$2 as TwilioInitiateCallResult, type ProvisionNumberOptions$1 as TwilioProvisionNumberOptions, type ProvisionNumberResult$1 as TwilioProvisionNumberResult, ULTRAVOX_DEFAULT_API_BASE, ULTRAVOX_DEFAULT_SR, type UltravoxEventHandler, UltravoxRealtimeAdapter, type UserState, STT$5 as WhisperSTT, type WhisperSTTOptions, assemblyai, builtinClipPath, calculateRealtimeCost, calculateSttCost, calculateTelephonyCost, calculateTtsCost, callsToCsv, callsToJson, cartesia, createResampler16kTo8k, createResampler24kTo16k, createResampler24kTo8k, createResampler8kTo16k, deepgram, defineTool, elevenlabs, evaluateStrategies as evaluateBargeInStrategies, filterEmoji, filterForTTS, filterMarkdown, formatDtmf, geminiLive, getLogger, guardrail, initTracing, isRemoteUrl, isTracingEnabled, isWebSocketUrl, lmnt, makeAuthMiddleware, mergePricing, mixPcm, mountApi, mountDashboard, mulawToPcm16, notifyDashboard, openaiTts, openclawConsult, openclawPostCallNotifier, pcm16ToMulaw, resample16kTo8k, resample24kTo16k, resample8kTo16k, resamplePcm, resetStrategies as resetBargeInStrategies, rime, scheduleCron, scheduleInterval, scheduleOnce, selectSoundFromList, setLogger, soniox, speechmatics, startSpan, startTunnel, tool, ultravox, whisper };
|