getpatter 0.4.3 → 0.4.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.ts CHANGED
@@ -10,13 +10,21 @@ interface STTConfig {
10
10
  readonly provider: string;
11
11
  readonly apiKey: string;
12
12
  readonly language: string;
13
- toDict(): Record<string, string>;
13
+ /**
14
+ * Optional — when present, called by internal serialisation. Not required for
15
+ * callers that pass a plain object literal (``{ provider, apiKey, language }``)
16
+ * to maintain parity with the Python SDK, which accepts dataclass-like inputs.
17
+ */
18
+ toDict?(): Record<string, string | Record<string, unknown>>;
19
+ /** Provider-specific knobs (e.g. Deepgram endpointing). */
20
+ options?: Record<string, unknown>;
14
21
  }
15
22
  interface TTSConfig {
16
23
  readonly provider: string;
17
24
  readonly apiKey: string;
18
25
  readonly voice: string;
19
- toDict(): Record<string, string>;
26
+ toDict?(): Record<string, string | Record<string, unknown>>;
27
+ options?: Record<string, unknown>;
20
28
  }
21
29
  type MessageHandler = (msg: IncomingMessage) => Promise<string>;
22
30
  type CallEventHandler = (data: Record<string, unknown>) => Promise<void>;
@@ -99,7 +107,12 @@ interface Call {
99
107
  }> | null;
100
108
  }
101
109
  interface LocalOptions {
102
- mode: 'local';
110
+ /**
111
+ * Optional — when omitted, local mode is auto-detected from the presence of
112
+ * ``twilioSid`` or ``telnyxKey`` (matches the Python SDK which treats
113
+ * ``Patter(twilio_sid=...)`` as local mode by default).
114
+ */
115
+ mode?: 'local';
103
116
  twilioSid?: string;
104
117
  twilioToken?: string;
105
118
  openaiKey?: string;
@@ -113,6 +126,14 @@ interface LocalOptions {
113
126
  * signature verification. When provided, unauthenticated requests are rejected.
114
127
  */
115
128
  telnyxPublicKey?: string;
129
+ /**
130
+ * Provider-level Deepgram API key. When set, agents that don't override
131
+ * ``agent.deepgramKey`` / ``agent.stt`` use this as the default STT key.
132
+ * Mirrors Python's ``Patter(deepgram_key=...)``.
133
+ */
134
+ deepgramKey?: string;
135
+ /** Provider-level ElevenLabs API key (same semantics as ``deepgramKey``). */
136
+ elevenlabsKey?: string;
116
137
  }
117
138
  interface Guardrail {
118
139
  /** Name for logging when triggered */
@@ -197,6 +218,13 @@ interface AgentOptions {
197
218
  audioFilter?: AudioFilter;
198
219
  /** Optional background audio mixer (hold music, thinking cues). Pipeline mode only. */
199
220
  backgroundAudio?: BackgroundAudioPlayer$1;
221
+ /**
222
+ * Minimum sustained voice (ms) before treating caller audio as a barge-in
223
+ * and interrupting TTS. `0` disables barge-in entirely — useful on noisy
224
+ * links (ngrok tunnels, speakerphone) where the agent can hear itself.
225
+ * Default: 300.
226
+ */
227
+ bargeInThresholdMs?: number;
200
228
  }
201
229
  type PipelineMessageHandler = (data: Record<string, unknown>) => Promise<string>;
202
230
  interface ServeOptions {
@@ -235,11 +263,28 @@ interface LocalCallOptions {
235
263
  voicemailMessage?: string;
236
264
  /** Dynamic variables merged into agent.variables before call. Override agent-level variables. */
237
265
  variables?: Record<string, string>;
266
+ /**
267
+ * Ring timeout in seconds. Forwarded to Twilio as `Timeout` and to Telnyx
268
+ * as `timeout_secs`. Defaults to the carrier default (~28 s on Twilio) when
269
+ * omitted. Increase for international routes where the remote carrier
270
+ * silences short US→IT rings.
271
+ */
272
+ ringTimeout?: number;
238
273
  }
239
274
 
275
+ /**
276
+ * Deepgram STT config. Tune latency via ``endpointingMs`` / ``utteranceEndMs``
277
+ * — mirrors Python's ``Patter.deepgram(endpointing_ms=..., utterance_end_ms=...)``.
278
+ */
240
279
  declare function deepgram(opts: {
241
280
  apiKey: string;
242
281
  language?: string;
282
+ model?: string;
283
+ endpointingMs?: number;
284
+ utteranceEndMs?: number | null;
285
+ smartFormat?: boolean;
286
+ interimResults?: boolean;
287
+ vadEvents?: boolean;
243
288
  }): STTConfig;
244
289
  declare function whisper(opts: {
245
290
  apiKey: string;
@@ -253,6 +298,18 @@ declare function openaiTts(opts: {
253
298
  apiKey: string;
254
299
  voice?: string;
255
300
  }): TTSConfig;
301
+ declare function cartesia(opts: {
302
+ apiKey: string;
303
+ voice?: string;
304
+ }): TTSConfig;
305
+ declare function rime(opts: {
306
+ apiKey: string;
307
+ voice?: string;
308
+ }): TTSConfig;
309
+ declare function lmnt(opts: {
310
+ apiKey: string;
311
+ voice?: string;
312
+ }): TTSConfig;
256
313
 
257
314
  declare class Patter {
258
315
  readonly apiKey: string;
@@ -282,6 +339,9 @@ declare class Patter {
282
339
  static whisper: typeof whisper;
283
340
  static elevenlabs: typeof elevenlabs;
284
341
  static openaiTts: typeof openaiTts;
342
+ static cartesia: typeof cartesia;
343
+ static rime: typeof rime;
344
+ static lmnt: typeof lmnt;
285
345
  static guardrail(opts: {
286
346
  name: string;
287
347
  blockedTerms?: string[];
@@ -699,19 +759,65 @@ interface Transcript$4 {
699
759
  readonly confidence: number;
700
760
  }
701
761
  type TranscriptCallback$4 = (transcript: Transcript$4) => void;
762
+ /**
763
+ * Optional tuning knobs for Deepgram live transcription.
764
+ *
765
+ * Mirrors Python's ``DeepgramSTT`` kwargs so callers can lower turn latency
766
+ * without monkey-patching (BUG #13).
767
+ */
768
+ interface DeepgramSTTOptions {
769
+ /** Model name. Default ``nova-3``. */
770
+ readonly model?: string;
771
+ /** Audio encoding (``linear16`` | ``mulaw`` | etc). Default ``linear16``. */
772
+ readonly encoding?: string;
773
+ /** Sample rate in Hz. Default ``16000``. */
774
+ readonly sampleRate?: number;
775
+ /**
776
+ * Voice-activity endpointing threshold in milliseconds.
777
+ * Lower values reduce turn latency at the cost of more false-start cuts.
778
+ * Default ``150``.
779
+ */
780
+ readonly endpointingMs?: number;
781
+ /**
782
+ * End-of-utterance silence window in milliseconds. Deepgram enforces a
783
+ * hard minimum of 1000 ms. Set to ``null`` to disable. Default ``1000``.
784
+ */
785
+ readonly utteranceEndMs?: number | null;
786
+ /** Enable smart formatting (punctuation + numerals). Default ``true``. */
787
+ readonly smartFormat?: boolean;
788
+ /** Emit interim (non-final) transcripts. Default ``true``. */
789
+ readonly interimResults?: boolean;
790
+ /** Emit VAD events (``SpeechStarted`` / ``UtteranceEnd``). Default ``true``. */
791
+ readonly vadEvents?: boolean;
792
+ }
702
793
  declare class DeepgramSTT {
794
+ private ws;
795
+ private callbacks;
796
+ /** Request ID from Deepgram — used to query actual cost post-call. */
797
+ requestId: string;
703
798
  private readonly apiKey;
704
799
  private readonly language;
705
800
  private readonly model;
706
801
  private readonly encoding;
707
802
  private readonly sampleRate;
708
- private ws;
709
- private callbacks;
710
- /** Request ID from Deepgram — used to query actual cost post-call. */
711
- requestId: string;
712
- constructor(apiKey: string, language?: string, model?: string, encoding?: string, sampleRate?: number);
713
- /** Factory for Twilio calls — mulaw 8 kHz. */
714
- static forTwilio(apiKey: string, language?: string, model?: string): DeepgramSTT;
803
+ private readonly endpointingMs;
804
+ private readonly utteranceEndMs;
805
+ private readonly smartFormat;
806
+ private readonly interimResults;
807
+ private readonly vadEvents;
808
+ /**
809
+ * New ergonomic constructor accepting an options object (mirrors Python kwargs).
810
+ *
811
+ * Also accepts the legacy positional form
812
+ * ``(apiKey, language?, model?, encoding?, sampleRate?)`` for backward
813
+ * compatibility with code that predated BUG #13.
814
+ */
815
+ constructor(apiKey: string, language?: string, model?: string, encoding?: string, sampleRate?: number, options?: DeepgramSTTOptions);
816
+ constructor(apiKey: string, options: DeepgramSTTOptions & {
817
+ language?: string;
818
+ });
819
+ /** Factory for Twilio calls — mulaw 8 kHz. Forwards tuning options through. */
820
+ static forTwilio(apiKey: string, language?: string, model?: string, options?: DeepgramSTTOptions): DeepgramSTT;
715
821
  connect(): Promise<void>;
716
822
  sendAudio(audio: Buffer): void;
717
823
  onTranscript(callback: TranscriptCallback$4): void;
@@ -750,6 +856,76 @@ declare class WhisperSTT {
750
856
  private transcribeBuffer;
751
857
  }
752
858
 
859
+ /**
860
+ * In-memory metrics store for the local dashboard.
861
+ *
862
+ * Keeps the last `maxCalls` completed calls and tracks active calls.
863
+ * Supports SSE event subscribers for real-time updates.
864
+ */
865
+
866
+ interface CallRecord {
867
+ call_id: string;
868
+ caller: string;
869
+ callee: string;
870
+ direction: string;
871
+ started_at: number;
872
+ ended_at?: number;
873
+ /**
874
+ * Current lifecycle state: ``initiated`` (pre-registered), ``ringing``,
875
+ * ``in-progress``, ``completed``, ``no-answer``, ``busy``, ``failed``,
876
+ * ``canceled``, or ``webhook_error``.
877
+ */
878
+ status?: string;
879
+ transcript?: Array<{
880
+ role: string;
881
+ text: string;
882
+ timestamp: number;
883
+ }>;
884
+ turns?: unknown[];
885
+ metrics?: Record<string, unknown> | null;
886
+ [key: string]: unknown;
887
+ }
888
+ interface SSEEvent {
889
+ type: string;
890
+ data: Record<string, unknown>;
891
+ }
892
+ declare class MetricsStore extends EventEmitter {
893
+ private readonly maxCalls;
894
+ private calls;
895
+ private activeCalls;
896
+ /**
897
+ * Accepts either a numeric ``maxCalls`` (legacy positional — matches the
898
+ * original TS API) or an options object ``{ maxCalls }`` to align with the
899
+ * Python SDK's keyword-argument style. Plain literals also work:
900
+ * ``new MetricsStore()`` / ``new MetricsStore(100)`` / ``new MetricsStore({ maxCalls: 100 })``.
901
+ */
902
+ constructor(maxCallsOrOpts?: number | {
903
+ maxCalls?: number;
904
+ });
905
+ private publish;
906
+ recordCallStart(data: Record<string, unknown>): void;
907
+ /**
908
+ * Pre-register an outbound call before any webhook fires. Lets the
909
+ * dashboard surface attempts that never reach media (no-answer, busy,
910
+ * carrier-rejected). Mirrors the Python ``record_call_initiated``.
911
+ */
912
+ recordCallInitiated(data: Record<string, unknown>): void;
913
+ /**
914
+ * Update the status of an active or completed call. Terminal states
915
+ * (completed, no-answer, busy, failed, canceled, webhook_error) move the
916
+ * row from active to completed so the UI freezes the live duration timer.
917
+ */
918
+ updateCallStatus(callId: string, status: string, extra?: Record<string, unknown>): void;
919
+ recordTurn(data: Record<string, unknown>): void;
920
+ recordCallEnd(data: Record<string, unknown>, metrics?: Record<string, unknown> | null): void;
921
+ getCalls(limit?: number, offset?: number): CallRecord[];
922
+ getCall(callId: string): CallRecord | null;
923
+ getActiveCalls(): CallRecord[];
924
+ getAggregates(): Record<string, unknown>;
925
+ getCallsInRange(fromTs?: number, toTs?: number): CallRecord[];
926
+ get callCount(): number;
927
+ }
928
+
753
929
  /**
754
930
  * Remote message handler for B2B webhook and WebSocket integration.
755
931
  *
@@ -800,50 +976,6 @@ declare function isRemoteUrl(onMessage: unknown): onMessage is string;
800
976
  /** Check if a URL is a WebSocket URL. */
801
977
  declare function isWebSocketUrl(url: string): boolean;
802
978
 
803
- /**
804
- * In-memory metrics store for the local dashboard.
805
- *
806
- * Keeps the last `maxCalls` completed calls and tracks active calls.
807
- * Supports SSE event subscribers for real-time updates.
808
- */
809
-
810
- interface CallRecord {
811
- call_id: string;
812
- caller: string;
813
- callee: string;
814
- direction: string;
815
- started_at: number;
816
- ended_at?: number;
817
- transcript?: Array<{
818
- role: string;
819
- text: string;
820
- timestamp: number;
821
- }>;
822
- turns?: unknown[];
823
- metrics?: Record<string, unknown> | null;
824
- [key: string]: unknown;
825
- }
826
- interface SSEEvent {
827
- type: string;
828
- data: Record<string, unknown>;
829
- }
830
- declare class MetricsStore extends EventEmitter {
831
- private readonly maxCalls;
832
- private calls;
833
- private activeCalls;
834
- constructor(maxCalls?: number);
835
- private publish;
836
- recordCallStart(data: Record<string, unknown>): void;
837
- recordTurn(data: Record<string, unknown>): void;
838
- recordCallEnd(data: Record<string, unknown>, metrics?: Record<string, unknown> | null): void;
839
- getCalls(limit?: number, offset?: number): CallRecord[];
840
- getCall(callId: string): CallRecord | null;
841
- getActiveCalls(): CallRecord[];
842
- getAggregates(): Record<string, unknown>;
843
- getCallsInRange(fromTs?: number, toTs?: number): CallRecord[];
844
- get callCount(): number;
845
- }
846
-
847
979
  interface LocalConfig {
848
980
  twilioSid?: string;
849
981
  twilioToken?: string;
@@ -1009,6 +1141,27 @@ declare class FallbackLLMProvider implements LLMProvider {
1009
1141
  getAvailability(): ReadonlyArray<boolean>;
1010
1142
  /** Clears all background recovery timers. Call this when shutting down. */
1011
1143
  destroy(): void;
1144
+ /**
1145
+ * Async-friendly disposer. Parity with Python's ``FallbackLLMProvider.aclose()``
1146
+ * — safe to call multiple times, returns a resolved Promise once all probe
1147
+ * timers are cleared. Prefer this in async contexts so awaiting the
1148
+ * shutdown integrates naturally with the owning lifecycle.
1149
+ */
1150
+ aclose(): Promise<void>;
1151
+ /**
1152
+ * Explicit-resource-management hook so callers can write
1153
+ * ``await using fallback = new FallbackLLMProvider([...])`` and have
1154
+ * background probe timers cleared automatically when the block exits.
1155
+ * Mirrors Python's ``async with FallbackLLMProvider(...)``.
1156
+ */
1157
+ [Symbol.asyncDispose](): Promise<void>;
1158
+ /**
1159
+ * Stream only the text deltas, flattening the chunk envelope. Parity with
1160
+ * Python's ``FallbackLLMProvider.complete_stream``. Tool-call and done
1161
+ * markers are filtered out so callers can concatenate the yielded strings
1162
+ * directly.
1163
+ */
1164
+ completeStream(messages: Array<Record<string, unknown>>, tools?: Array<Record<string, unknown>> | null): AsyncGenerator<string, void, unknown>;
1012
1165
  stream(messages: Array<Record<string, unknown>>, tools?: Array<Record<string, unknown>> | null): AsyncGenerator<LLMChunk, void, unknown>;
1013
1166
  private tryProviders;
1014
1167
  private markUnavailable;
@@ -1159,12 +1312,33 @@ interface ScheduleHandle {
1159
1312
  cancel(): void;
1160
1313
  readonly pending: boolean;
1161
1314
  }
1162
- /** Schedule ``callback`` on a cron expression (node-cron dialect). */
1163
- declare function scheduleCron(cron: string, callback: JobCallback): Promise<ScheduleHandle>;
1315
+ /** Schedule ``callback`` on a cron expression (node-cron dialect).
1316
+ *
1317
+ * Returns a ``ScheduleHandle`` synchronously (parity with Python
1318
+ * ``schedule_cron``). The handle is "pending" until the lazy ``node-cron``
1319
+ * import resolves; cancelling the handle before then discards the pending
1320
+ * job cleanly. If ``node-cron`` is not installed, the returned promise
1321
+ * attached to ``.ready`` rejects with a helpful install message.
1322
+ */
1323
+ declare function scheduleCron(cron: string, callback: JobCallback): ScheduleHandle;
1164
1324
  /** Schedule ``callback`` once at the given date. */
1165
1325
  declare function scheduleOnce(at: Date, callback: JobCallback): ScheduleHandle;
1166
- /** Schedule ``callback`` every ``intervalMs`` milliseconds. */
1167
- declare function scheduleInterval(intervalMs: number, callback: JobCallback): ScheduleHandle;
1326
+ /**
1327
+ * Schedule ``callback`` on a recurring interval.
1328
+ *
1329
+ * Accepts either a millisecond number (legacy, matches the original TS API)
1330
+ * or an object with ``seconds`` / ``intervalMs`` for parity with Python's
1331
+ * ``schedule_interval(seconds=...)``.
1332
+ *
1333
+ * Examples:
1334
+ * scheduleInterval(5000, cb) // 5 s, legacy
1335
+ * scheduleInterval({ intervalMs: 5000 }, cb)
1336
+ * scheduleInterval({ seconds: 5 }, cb) // parity with Python
1337
+ */
1338
+ declare function scheduleInterval(intervalOrOpts: number | {
1339
+ seconds?: number;
1340
+ intervalMs?: number;
1341
+ }, callback: JobCallback): ScheduleHandle;
1168
1342
 
1169
1343
  /**
1170
1344
  * Soniox Speech-to-Text adapter for Patter (TypeScript).
@@ -1354,9 +1528,9 @@ declare class CartesiaSTT {
1354
1528
 
1355
1529
  declare class ElevenLabsTTS {
1356
1530
  private readonly apiKey;
1357
- private readonly voiceId;
1358
1531
  private readonly modelId;
1359
1532
  private readonly outputFormat;
1533
+ private readonly voiceId;
1360
1534
  constructor(apiKey: string, voiceId?: string, modelId?: string, outputFormat?: string);
1361
1535
  /**
1362
1536
  * Synthesise text to speech and return the full audio as a single Buffer.
@@ -1389,15 +1563,22 @@ declare class OpenAITTS {
1389
1563
  *
1390
1564
  * OpenAI returns 24 kHz PCM16; each chunk is resampled to 16 kHz before
1391
1565
  * yielding so the output is ready for telephony pipelines.
1566
+ *
1567
+ * The resampler carries state (buffered samples + odd trailing byte)
1568
+ * between chunks — without that state cross-chunk sample alignment drifts
1569
+ * and the caller hears pops / dropped audio (BUG #23, mirror of the
1570
+ * Python `audioop.ratecv` fix).
1392
1571
  */
1393
1572
  synthesizeStream(text: string): AsyncGenerator<Buffer>;
1394
1573
  /**
1395
- * Resample 24 kHz PCM16-LE to 16 kHz by taking 2 out of every 3 samples.
1396
- *
1397
- * For each group of 3 input samples the first is kept as-is and the second
1398
- * output sample is the average of input samples 2 and 3. This matches the
1399
- * Python SDK implementation.
1574
+ * Streaming 24 kHz 16 kHz resampler (PCM16-LE). Maintains cross-chunk
1575
+ * state so the 3:2 pattern doesn't reset at every network read.
1400
1576
  */
1577
+ static resampleStreaming(audio: Buffer, ctx: {
1578
+ carryByte: number | null;
1579
+ leftover: number[];
1580
+ }): Buffer;
1581
+ /** @deprecated use {@link resampleStreaming} with persistent state. */
1401
1582
  static resample24kTo16k(audio: Buffer): Buffer;
1402
1583
  }
1403
1584