getpatter 0.4.1 → 0.4.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. package/README.md +185 -587
  2. package/dist/chunk-35EVXMGB.mjs +4472 -0
  3. package/dist/chunk-AFUYSNDH.mjs +31 -0
  4. package/dist/chunk-JO5C35FM.mjs +65 -0
  5. package/dist/chunk-OOIUSZB4.mjs +37 -0
  6. package/dist/cli.js +1139 -0
  7. package/dist/index.d.mts +1063 -85
  8. package/dist/index.d.ts +1063 -85
  9. package/dist/index.js +8969 -3904
  10. package/dist/index.mjs +2382 -3354
  11. package/dist/lib-4WCAS54J.mjs +830 -0
  12. package/dist/node-cron-373UVDIO.mjs +935 -0
  13. package/dist/persistence-CYIGNHSU.mjs +7 -0
  14. package/dist/resources/audio/NOTICE +2 -0
  15. package/dist/resources/audio/city-ambience.ogg +0 -0
  16. package/dist/resources/audio/crowded-room.ogg +0 -0
  17. package/dist/resources/audio/forest-ambience.ogg +0 -0
  18. package/dist/resources/audio/hold_music.ogg +0 -0
  19. package/dist/resources/audio/keyboard-typing.ogg +0 -0
  20. package/dist/resources/audio/keyboard-typing2.ogg +0 -0
  21. package/dist/resources/audio/office-ambience.ogg +0 -0
  22. package/dist/resources/silero_vad.onnx +0 -0
  23. package/dist/{test-mode-JMXZSAJS.mjs → test-mode-RH65MMSP.mjs} +2 -1
  24. package/dist/{tunnel-HYSU7EF2.mjs → tunnel-BL7A7GXW.mjs} +2 -1
  25. package/package.json +25 -8
  26. package/src/resources/audio/NOTICE +2 -0
  27. package/src/resources/audio/city-ambience.ogg +0 -0
  28. package/src/resources/audio/crowded-room.ogg +0 -0
  29. package/src/resources/audio/forest-ambience.ogg +0 -0
  30. package/src/resources/audio/hold_music.ogg +0 -0
  31. package/src/resources/audio/keyboard-typing.ogg +0 -0
  32. package/src/resources/audio/keyboard-typing2.ogg +0 -0
  33. package/src/resources/audio/office-ambience.ogg +0 -0
  34. package/dist/chunk-TAATEHKF.mjs +0 -396
  35. package/dist/chunk-VNU4GNW3.mjs +0 -45
package/dist/index.d.ts CHANGED
@@ -124,6 +124,48 @@ interface Guardrail {
124
124
  /** Replacement text spoken when guardrail triggers */
125
125
  replacement?: string;
126
126
  }
127
+ interface HookContext {
128
+ readonly callId: string;
129
+ readonly caller: string;
130
+ readonly callee: string;
131
+ readonly history: ReadonlyArray<{
132
+ role: string;
133
+ text: string;
134
+ }>;
135
+ }
136
+ interface PipelineHooks {
137
+ /** Called with the raw PCM audio chunk before it is forwarded to the STT provider.
138
+ * Return null to drop the chunk (e.g., for custom VAD gating). */
139
+ beforeSendToStt?: (audio: Buffer, ctx: HookContext) => Buffer | null | Promise<Buffer | null>;
140
+ /** Called after STT produces a transcript, before LLM. Return null to skip this turn. */
141
+ afterTranscribe?: (transcript: string, ctx: HookContext) => string | null | Promise<string | null>;
142
+ /** Called before TTS, per-sentence in streaming mode. Return null to skip TTS for this sentence. */
143
+ beforeSynthesize?: (text: string, ctx: HookContext) => string | null | Promise<string | null>;
144
+ /** Called after TTS produces an audio chunk. Return null to discard this chunk. */
145
+ afterSynthesize?: (audio: Buffer, text: string, ctx: HookContext) => Buffer | null | Promise<Buffer | null>;
146
+ }
147
+ /** Voice activity event emitted by a VADProvider. */
148
+ interface VADEvent {
149
+ readonly type: 'speech_start' | 'speech_end' | 'silence';
150
+ readonly confidence?: number;
151
+ readonly durationMs?: number;
152
+ }
153
+ /** Server-side voice activity detector. Integrated before STT in pipeline mode. */
154
+ interface VADProvider {
155
+ processFrame(pcmChunk: Buffer, sampleRate: number): Promise<VADEvent | null>;
156
+ close(): Promise<void>;
157
+ }
158
+ /** Pre-STT audio filter — noise cancellation, gain, EQ. */
159
+ interface AudioFilter {
160
+ process(pcmChunk: Buffer, sampleRate: number): Promise<Buffer>;
161
+ close(): Promise<void>;
162
+ }
163
+ /** Mixes background audio (hold music, thinking cues) with TTS output. */
164
+ interface BackgroundAudioPlayer$1 {
165
+ start(): Promise<void>;
166
+ mix(agentPcm: Buffer, sampleRate: number): Promise<Buffer>;
167
+ stop(): Promise<void>;
168
+ }
127
169
  interface AgentOptions {
128
170
  systemPrompt: string;
129
171
  voice?: string;
@@ -143,6 +185,18 @@ interface AgentOptions {
143
185
  variables?: Record<string, string>;
144
186
  /** Output guardrails — filter AI responses before TTS */
145
187
  guardrails?: Guardrail[];
188
+ /** Pipeline hooks — intercept and transform data at each pipeline stage (pipeline mode only). */
189
+ hooks?: PipelineHooks;
190
+ /** Text transforms applied to LLM output before TTS (pipeline mode only).
191
+ * Each function receives a string and returns the transformed string.
192
+ * Applied in order before the ``beforeSynthesize`` hook. */
193
+ textTransforms?: Array<(text: string) => string>;
194
+ /** Optional server-side VAD (e.g., Silero). Pipeline mode only. */
195
+ vad?: VADProvider;
196
+ /** Optional pre-STT audio filter (noise cancellation). Pipeline mode only. */
197
+ audioFilter?: AudioFilter;
198
+ /** Optional background audio mixer (hold music, thinking cues). Pipeline mode only. */
199
+ backgroundAudio?: BackgroundAudioPlayer$1;
146
200
  }
147
201
  type PipelineMessageHandler = (data: Record<string, unknown>) => Promise<string>;
148
202
  interface ServeOptions {
@@ -269,6 +323,50 @@ declare class Patter {
269
323
  private registerNumber;
270
324
  }
271
325
 
326
+ /**
327
+ * Factory function that builds a {@link ToolDefinition} from a concise
328
+ * parameter spec, auto-generating the full JSON Schema `parameters` object.
329
+ *
330
+ * @example
331
+ * ```ts
332
+ * import { defineTool } from 'getpatter';
333
+ *
334
+ * const getWeather = defineTool({
335
+ * name: 'get_weather',
336
+ * description: 'Get the current weather for a location.',
337
+ * parameters: {
338
+ * location: { type: 'string', description: 'City name or zip code' },
339
+ * unit: { type: 'string', description: 'Temperature unit', default: 'celsius' },
340
+ * },
341
+ * handler: async (args) => {
342
+ * return `Sunny, 22°${(args.unit as string)[0].toUpperCase()}`;
343
+ * },
344
+ * });
345
+ * ```
346
+ */
347
+
348
+ /** Shorthand property spec accepted by {@link defineTool}. */
349
+ interface ParamSpec {
350
+ readonly type: string;
351
+ readonly description?: string;
352
+ /** When present the parameter is *not* required. */
353
+ readonly default?: unknown;
354
+ }
355
+ /** Input accepted by {@link defineTool}. */
356
+ interface DefineToolInput {
357
+ readonly name: string;
358
+ readonly description?: string;
359
+ readonly parameters: Readonly<Record<string, ParamSpec>>;
360
+ readonly handler: (args: Record<string, unknown>, context: Record<string, unknown>) => Promise<string>;
361
+ }
362
+ /**
363
+ * Build a full {@link ToolDefinition} from a concise parameter spec.
364
+ *
365
+ * Parameters that include a `default` value are treated as optional; all
366
+ * others are added to the JSON Schema `required` array.
367
+ */
368
+ declare function defineTool(input: DefineToolInput): ToolDefinition;
369
+
272
370
  interface Logger {
273
371
  info(message: string, ...args: unknown[]): void;
274
372
  warn(message: string, ...args: unknown[]): void;
@@ -278,6 +376,109 @@ interface Logger {
278
376
  declare function getLogger(): Logger;
279
377
  declare function setLogger(logger: Logger): void;
280
378
 
379
+ /**
380
+ * Sentence chunker for streaming TTS in pipeline mode.
381
+ *
382
+ * Accumulates streaming LLM tokens and yields complete sentences.
383
+ * Uses regex-based marker replacement for robust sentence boundary
384
+ * detection, handling abbreviations, acronyms, decimals, websites,
385
+ * ellipsis, and CJK punctuation.
386
+ *
387
+ * Algorithm adapted from LiveKit Agents (Apache 2.0):
388
+ * https://github.com/livekit/agents
389
+ */
390
+ /** Default minimum sentence length before emitting. */
391
+ declare const DEFAULT_MIN_SENTENCE_LEN = 20;
392
+ /**
393
+ * Accumulates streaming tokens and yields complete sentences.
394
+ *
395
+ * @example
396
+ * ```typescript
397
+ * const chunker = new SentenceChunker();
398
+ * for await (const token of llmStream) {
399
+ * for (const sentence of chunker.push(token)) {
400
+ * await tts.synthesizeStream(sentence);
401
+ * }
402
+ * }
403
+ * for (const sentence of chunker.flush()) {
404
+ * await tts.synthesizeStream(sentence);
405
+ * }
406
+ * ```
407
+ */
408
+ declare class SentenceChunker {
409
+ private buffer;
410
+ private readonly minSentenceLen;
411
+ constructor(options?: {
412
+ minSentenceLen?: number;
413
+ });
414
+ /** Feed a token. Returns zero or more complete sentences. */
415
+ push(token: string): string[];
416
+ /** Flush remaining buffer as final sentence(s). Call at end of stream. */
417
+ flush(): string[];
418
+ /** Discard buffered text. Call on interrupt. */
419
+ reset(): void;
420
+ }
421
+
422
+ /**
423
+ * Pipeline hook executor for pipeline mode.
424
+ *
425
+ * Runs user-defined hooks at each stage of the STT → LLM → TTS pipeline.
426
+ * Fail-open: if a hook throws, the error is logged and the original value
427
+ * passes through unchanged.
428
+ */
429
+
430
+ declare class PipelineHookExecutor {
431
+ private readonly hooks;
432
+ constructor(hooks: PipelineHooks | undefined);
433
+ /**
434
+ * Run beforeSendToStt hook. Returns null to drop the audio chunk.
435
+ * If no hook is defined, returns the audio unchanged.
436
+ * Fail-open: on exception, the original audio passes through.
437
+ */
438
+ runBeforeSendToStt(audio: Buffer, ctx: HookContext): Promise<Buffer | null>;
439
+ /**
440
+ * Run afterTranscribe hook. Returns null if hook vetoes the turn.
441
+ * If no hook is defined, returns the transcript unchanged.
442
+ */
443
+ runAfterTranscribe(transcript: string, ctx: HookContext): Promise<string | null>;
444
+ /**
445
+ * Run beforeSynthesize hook. Returns null if hook vetoes TTS for this sentence.
446
+ * If no hook is defined, returns the text unchanged.
447
+ */
448
+ runBeforeSynthesize(text: string, ctx: HookContext): Promise<string | null>;
449
+ /**
450
+ * Run afterSynthesize hook. Returns null if hook vetoes this audio chunk.
451
+ * If no hook is defined, returns the audio unchanged.
452
+ */
453
+ runAfterSynthesize(audio: Buffer, text: string, ctx: HookContext): Promise<Buffer | null>;
454
+ }
455
+
456
+ /**
457
+ * Built-in text transforms for cleaning LLM output before TTS synthesis.
458
+ *
459
+ * These functions strip markdown formatting and emoji characters so that TTS
460
+ * engines produce natural-sounding speech rather than reading aloud syntax
461
+ * like "asterisk asterisk bold asterisk asterisk" or Unicode pictographs.
462
+ */
463
+ /**
464
+ * Remove markdown formatting from text, preserving the readable content.
465
+ *
466
+ * Handles: headers, bold, italic, code blocks/inline, links, images,
467
+ * strikethrough, list markers, block quotes, horizontal rules, HTML tags.
468
+ */
469
+ declare function filterMarkdown(text: string): string;
470
+ /**
471
+ * Remove emoji characters from text, preserving normal text, punctuation,
472
+ * and non-emoji Unicode (CJK, accented characters, etc.).
473
+ */
474
+ declare function filterEmoji(text: string): string;
475
+ /**
476
+ * Combined filter: strip markdown formatting and emoji from text.
477
+ *
478
+ * Intended as a convenience for the most common TTS pre-processing use case.
479
+ */
480
+ declare function filterForTTS(text: string): string;
481
+
281
482
  declare class PatterError extends Error {
282
483
  constructor(message: string);
283
484
  }
@@ -371,10 +572,19 @@ interface CallMetrics {
371
572
  telephony_provider: string;
372
573
  }
373
574
  interface CallControl {
374
- /** Transfer the call to a different number. */
575
+ /** Transfer the call to a different number or SIP URI. */
375
576
  transfer(number: string): Promise<void>;
376
577
  /** Hang up the call. */
377
578
  hangup(): Promise<void>;
579
+ /**
580
+ * Send DTMF digits (for IVR navigation, e.g. "1234#").
581
+ *
582
+ * @param digits String of DTMF digits (0-9, *, #, A-D).
583
+ * @param options Per-call tuning. `delayMs` defaults to `300`.
584
+ */
585
+ sendDtmf?(digits: string, options?: {
586
+ delayMs?: number;
587
+ }): Promise<void>;
378
588
  /** Current call ID. */
379
589
  readonly callId: string;
380
590
  /** Caller number. */
@@ -417,6 +627,8 @@ declare class CallMetricsAccumulator {
417
627
  });
418
628
  /** Configure audio format for STT byte-to-seconds conversion. */
419
629
  configureSttFormat(sampleRate?: number, bytesPerSample?: number): void;
630
+ /** Whether a turn is currently being measured (startTurn called, not yet completed). */
631
+ get turnActive(): boolean;
420
632
  startTurn(): void;
421
633
  recordSttComplete(text: string, audioSeconds?: number): void;
422
634
  recordLlmComplete(): void;
@@ -460,7 +672,7 @@ declare class OpenAIRealtimeAdapter {
460
672
  }> | undefined);
461
673
  connect(): Promise<void>;
462
674
  sendAudio(mulawAudio: Buffer): void;
463
- onEvent(callback: (type: string, data: unknown) => void): void;
675
+ onEvent(callback: (type: string, data: unknown) => void | Promise<void>): void;
464
676
  cancelResponse(): void;
465
677
  sendText(text: string): Promise<void>;
466
678
  sendFunctionResult(callId: string, result: string): Promise<void>;
@@ -477,27 +689,116 @@ declare class ElevenLabsConvAIAdapter {
477
689
  constructor(apiKey: string, agentId?: string, voiceId?: string, _modelId?: string, _language?: string, firstMessage?: string);
478
690
  connect(): Promise<void>;
479
691
  sendAudio(audioBytes: Buffer): void;
480
- onEvent(callback: (type: string, data: unknown) => void): void;
692
+ onEvent(callback: (type: string, data: unknown) => void | Promise<void>): void;
481
693
  close(): void;
482
694
  }
483
695
 
484
- interface LocalConfig {
485
- twilioSid?: string;
486
- twilioToken?: string;
487
- openaiKey?: string;
488
- phoneNumber: string;
489
- webhookUrl: string;
490
- telephonyProvider?: 'twilio' | 'telnyx';
491
- telnyxKey?: string;
492
- telnyxConnectionId?: string;
696
+ interface Transcript$4 {
697
+ readonly text: string;
698
+ readonly isFinal: boolean;
699
+ readonly confidence: number;
700
+ }
701
+ type TranscriptCallback$4 = (transcript: Transcript$4) => void;
702
+ declare class DeepgramSTT {
703
+ private readonly apiKey;
704
+ private readonly language;
705
+ private readonly model;
706
+ private readonly encoding;
707
+ private readonly sampleRate;
708
+ private ws;
709
+ private callbacks;
710
+ /** Request ID from Deepgram — used to query actual cost post-call. */
711
+ requestId: string;
712
+ constructor(apiKey: string, language?: string, model?: string, encoding?: string, sampleRate?: number);
713
+ /** Factory for Twilio calls — mulaw 8 kHz. */
714
+ static forTwilio(apiKey: string, language?: string, model?: string): DeepgramSTT;
715
+ connect(): Promise<void>;
716
+ sendAudio(audio: Buffer): void;
717
+ onTranscript(callback: TranscriptCallback$4): void;
718
+ close(): void;
719
+ }
720
+
721
+ /**
722
+ * OpenAI Whisper STT adapter for the Patter SDK pipeline mode.
723
+ *
724
+ * Buffers incoming PCM16 audio and periodically sends it to the
725
+ * OpenAI Whisper transcription API as a WAV file.
726
+ */
727
+ interface Transcript$3 {
728
+ readonly text: string;
729
+ readonly isFinal: boolean;
730
+ readonly confidence: number;
731
+ }
732
+ type TranscriptCallback$3 = (transcript: Transcript$3) => void;
733
+ declare class WhisperSTT {
734
+ private readonly apiKey;
735
+ private readonly model;
736
+ private readonly language;
737
+ private readonly bufferSize;
738
+ private buffer;
739
+ private callbacks;
740
+ private running;
741
+ private pendingTranscriptions;
742
+ constructor(apiKey: string, model?: string, language?: string, bufferSize?: number);
743
+ /** Factory for Twilio calls — mulaw 8 kHz is transcoded upstream, so we still receive PCM 16-bit. */
744
+ static forTwilio(apiKey: string, language?: string, model?: string): WhisperSTT;
745
+ connect(): Promise<void>;
746
+ sendAudio(audio: Buffer): void;
747
+ private trackTranscription;
748
+ onTranscript(callback: TranscriptCallback$3): void;
749
+ close(): Promise<void>;
750
+ private transcribeBuffer;
751
+ }
752
+
753
+ /**
754
+ * Remote message handler for B2B webhook and WebSocket integration.
755
+ *
756
+ * Allows onMessage to be a URL string instead of a callable:
757
+ * - HTTP webhook: onMessage="https://api.customer.com/patter/message"
758
+ * - WebSocket: onMessage="ws://localhost:9000/stream"
759
+ */
760
+ declare class RemoteMessageHandler {
761
+ private readonly webhookSecret;
493
762
  /**
494
- * Telnyx Ed25519 public key (base64-encoded, DER/SPKI format) used to verify
495
- * incoming webhook signatures. Obtain from the Telnyx portal under
496
- * API Keys Webhook Keys. When provided, unauthenticated webhook requests
497
- * are rejected with HTTP 403.
763
+ * @param webhookSecret Optional HMAC secret. When provided, outgoing webhook
764
+ * requests include an `X-Patter-Signature` header so the receiver can
765
+ * verify the payload originated from Patter.
498
766
  */
499
- telnyxPublicKey?: string;
767
+ constructor(webhookSecret?: string);
768
+ /**
769
+ * Compute HMAC-SHA256 hex digest for the given body.
770
+ */
771
+ private signPayload;
772
+ /**
773
+ * Release resources held by this handler.
774
+ */
775
+ close(): void;
776
+ /**
777
+ * POST transcript to HTTP webhook, return response text.
778
+ *
779
+ * The webhook receives a JSON payload:
780
+ * { text, call_id, caller, callee, history }
781
+ *
782
+ * The response can be plain text or JSON { text: "..." }.
783
+ *
784
+ * When `webhookSecret` was provided at construction time, the request
785
+ * includes an `X-Patter-Signature` header with the HMAC-SHA256 hex
786
+ * digest of the JSON body.
787
+ */
788
+ callWebhook(url: string, data: Record<string, unknown>): Promise<string>;
789
+ /**
790
+ * Send transcript via WebSocket, yield response chunks.
791
+ *
792
+ * Sends the message data as JSON. Receives one or more JSON frames
793
+ * with { text: "..." } - multiple frames enable streaming.
794
+ * A frame with { done: true } signals end of response.
795
+ */
796
+ callWebSocket(url: string, data: Record<string, unknown>): AsyncGenerator<string, void, unknown>;
500
797
  }
798
+ /** Check if onMessage is a remote URL string. */
799
+ declare function isRemoteUrl(onMessage: unknown): onMessage is string;
800
+ /** Check if a URL is a WebSocket URL. */
801
+ declare function isWebSocketUrl(url: string): boolean;
501
802
 
502
803
  /**
503
804
  * In-memory metrics store for the local dashboard.
@@ -543,6 +844,24 @@ declare class MetricsStore extends EventEmitter {
543
844
  get callCount(): number;
544
845
  }
545
846
 
847
+ interface LocalConfig {
848
+ twilioSid?: string;
849
+ twilioToken?: string;
850
+ openaiKey?: string;
851
+ phoneNumber: string;
852
+ webhookUrl: string;
853
+ telephonyProvider?: 'twilio' | 'telnyx';
854
+ telnyxKey?: string;
855
+ telnyxConnectionId?: string;
856
+ /**
857
+ * Telnyx Ed25519 public key (base64-encoded, DER/SPKI format) used to verify
858
+ * incoming webhook signatures. Obtain from the Telnyx portal under
859
+ * API Keys → Webhook Keys. When provided, unauthenticated webhook requests
860
+ * are rejected with HTTP 403.
861
+ */
862
+ telnyxPublicKey?: string;
863
+ }
864
+
546
865
  /**
547
866
  * Dashboard authentication middleware for Express.
548
867
  *
@@ -592,6 +911,15 @@ declare function callsToJson(calls: CallRecord[]): string;
592
911
  declare function mountDashboard(app: Express, store: MetricsStore, token?: string): void;
593
912
  declare function mountApi(app: Express, store: MetricsStore, token?: string): void;
594
913
 
914
+ /**
915
+ * Dashboard notification for live call updates.
916
+ *
917
+ * When the SDK completes a call, it fires a POST to the standalone dashboard
918
+ * (if running) so calls appear in real time. Data lives only in memory —
919
+ * nothing is written to disk.
920
+ */
921
+ declare function notifyDashboard(callData: Record<string, unknown>, port?: number): void;
922
+
595
923
  /**
596
924
  * Built-in LLM loop for pipeline mode when no onMessage handler is provided.
597
925
  *
@@ -648,54 +976,45 @@ declare class LLMLoop {
648
976
  }
649
977
 
650
978
  /**
651
- * Remote message handler for B2B webhook and WebSocket integration.
979
+ * Fallback LLM provider that tries multiple providers in sequence.
652
980
  *
653
- * Allows onMessage to be a URL string instead of a callable:
654
- * - HTTP webhook: onMessage="https://api.customer.com/patter/message"
655
- * - WebSocket: onMessage="ws://localhost:9000/stream"
981
+ * If the primary provider fails, the next provider is tried, and so on.
982
+ * Each provider gets a configurable number of retries before being skipped.
983
+ * Failed providers are marked unavailable and periodically re-checked in the
984
+ * background.
656
985
  */
657
- declare class RemoteMessageHandler {
658
- private readonly webhookSecret;
659
- /**
660
- * @param webhookSecret Optional HMAC secret. When provided, outgoing webhook
661
- * requests include an `X-Patter-Signature` header so the receiver can
662
- * verify the payload originated from Patter.
663
- */
664
- constructor(webhookSecret?: string);
665
- /**
666
- * Compute HMAC-SHA256 hex digest for the given body.
667
- */
668
- private signPayload;
669
- /**
670
- * Release resources held by this handler.
671
- */
672
- close(): void;
673
- /**
674
- * POST transcript to HTTP webhook, return response text.
675
- *
676
- * The webhook receives a JSON payload:
677
- * { text, call_id, caller, callee, history }
678
- *
679
- * The response can be plain text or JSON { text: "..." }.
680
- *
681
- * When `webhookSecret` was provided at construction time, the request
682
- * includes an `X-Patter-Signature` header with the HMAC-SHA256 hex
683
- * digest of the JSON body.
684
- */
685
- callWebhook(url: string, data: Record<string, unknown>): Promise<string>;
686
- /**
687
- * Send transcript via WebSocket, yield response chunks.
688
- *
689
- * Sends the message data as JSON. Receives one or more JSON frames
690
- * with { text: "..." } - multiple frames enable streaming.
691
- * A frame with { done: true } signals end of response.
692
- */
693
- callWebSocket(url: string, data: Record<string, unknown>): AsyncGenerator<string, void, unknown>;
986
+
987
+ interface FallbackLLMProviderOptions {
988
+ /** Number of retry attempts per provider before moving to the next (default 1). */
989
+ readonly maxRetryPerProvider?: number;
990
+ /** Interval in ms between background recovery probes (default 30_000). */
991
+ readonly recoveryIntervalMs?: number;
992
+ }
993
+ /** Thrown when all providers have been exhausted. */
994
+ declare class AllProvidersFailedError extends Error {
995
+ constructor(message: string);
996
+ }
997
+ /** Thrown when a provider fails after already yielding partial output. */
998
+ declare class PartialStreamError extends Error {
999
+ constructor(message: string);
1000
+ }
1001
+ declare class FallbackLLMProvider implements LLMProvider {
1002
+ private readonly providers;
1003
+ private readonly availability;
1004
+ private readonly maxRetryPerProvider;
1005
+ private readonly recoveryIntervalMs;
1006
+ private readonly recoveryTimers;
1007
+ constructor(providers: ReadonlyArray<LLMProvider>, options?: FallbackLLMProviderOptions);
1008
+ /** Returns a snapshot of per-provider availability. */
1009
+ getAvailability(): ReadonlyArray<boolean>;
1010
+ /** Clears all background recovery timers. Call this when shutting down. */
1011
+ destroy(): void;
1012
+ stream(messages: Array<Record<string, unknown>>, tools?: Array<Record<string, unknown>> | null): AsyncGenerator<LLMChunk, void, unknown>;
1013
+ private tryProviders;
1014
+ private markUnavailable;
1015
+ private startRecovery;
1016
+ private stopRecovery;
694
1017
  }
695
- /** Check if onMessage is a remote URL string. */
696
- declare function isRemoteUrl(onMessage: unknown): onMessage is string;
697
- /** Check if a URL is a WebSocket URL. */
698
- declare function isWebSocketUrl(url: string): boolean;
699
1018
 
700
1019
  /**
701
1020
  * Interactive terminal test mode for voice agents.
@@ -714,36 +1033,286 @@ declare class TestSession {
714
1033
  }): Promise<void>;
715
1034
  }
716
1035
 
717
- interface Transcript$1 {
1036
+ /**
1037
+ * Gemini Live realtime adapter.
1038
+ *
1039
+ * Partially adapted (~65% port) from LiveKit Agents
1040
+ * (livekit-plugins-google, Apache 2.0). Reframed to Patter's realtime adapter
1041
+ * surface — connect / sendAudio / onEvent / close — matching OpenAIRealtimeAdapter.
1042
+ *
1043
+ * Uses the @google/genai SDK lazily imported at connect() so consumers that do
1044
+ * not use Gemini Live do not pay the load cost. Install with:
1045
+ *
1046
+ * npm install @google/genai
1047
+ */
1048
+ declare const GEMINI_DEFAULT_INPUT_SR = 16000;
1049
+ declare const GEMINI_DEFAULT_OUTPUT_SR = 24000;
1050
+ type GeminiLiveEventHandler = (type: 'audio' | 'transcript_output' | 'function_call' | 'speech_started' | 'response_done' | 'error', data: unknown) => void | Promise<void>;
1051
+ interface GeminiLiveOptions {
1052
+ model?: string;
1053
+ voice?: string;
1054
+ instructions?: string;
1055
+ language?: string;
1056
+ tools?: Array<{
1057
+ name: string;
1058
+ description: string;
1059
+ parameters: Record<string, unknown>;
1060
+ }>;
1061
+ inputSampleRate?: number;
1062
+ outputSampleRate?: number;
1063
+ temperature?: number;
1064
+ }
1065
+ declare class GeminiLiveAdapter {
1066
+ private readonly apiKey;
1067
+ private readonly model;
1068
+ private readonly voice;
1069
+ private readonly instructions;
1070
+ private readonly language;
1071
+ private readonly tools?;
1072
+ private readonly inputSampleRate;
1073
+ /** Output sample rate — exposed so callers can configure downstream transcoding. */
1074
+ readonly outputSampleRate: number;
1075
+ private readonly temperature;
1076
+ private client;
1077
+ private session;
1078
+ private receiveLoop;
1079
+ private handlers;
1080
+ private running;
1081
+ constructor(apiKey: string, options?: GeminiLiveOptions);
1082
+ connect(): Promise<void>;
1083
+ sendAudio(pcm: Buffer): void;
1084
+ sendText(text: string): Promise<void>;
1085
+ sendFunctionResult(callId: string, result: string): Promise<void>;
1086
+ cancelResponse(): void;
1087
+ onEvent(handler: GeminiLiveEventHandler): void;
1088
+ private emit;
1089
+ private pumpReceive;
1090
+ close(): Promise<void>;
1091
+ }
1092
+
1093
+ /**
1094
+ * Ultravox realtime adapter.
1095
+ *
1096
+ * Partially adapted (~70% port) from LiveKit Agents
1097
+ * (livekit-plugins-ultravox, Apache 2.0). Pure WebSocket protocol — no vendor SDK.
1098
+ *
1099
+ * Reframed to Patter's connect / sendAudio / onEvent / close surface,
1100
+ * matching OpenAIRealtimeAdapter.
1101
+ */
1102
+ declare const ULTRAVOX_DEFAULT_API_BASE = "https://api.ultravox.ai/api";
1103
+ declare const ULTRAVOX_DEFAULT_SR = 16000;
1104
+ type UltravoxEventHandler = (type: 'audio' | 'transcript_input' | 'transcript_output' | 'function_call' | 'speech_started' | 'response_done' | 'error', data: unknown) => void | Promise<void>;
1105
+ interface UltravoxOptions {
1106
+ model?: string;
1107
+ voice?: string;
1108
+ instructions?: string;
1109
+ language?: string;
1110
+ tools?: Array<{
1111
+ name: string;
1112
+ description: string;
1113
+ parameters: Record<string, unknown>;
1114
+ }>;
1115
+ apiBase?: string;
1116
+ sampleRate?: number;
1117
+ firstMessage?: string;
1118
+ }
1119
+ declare class UltravoxRealtimeAdapter {
1120
+ private readonly apiKey;
1121
+ private readonly model;
1122
+ private readonly voice;
1123
+ private readonly instructions;
1124
+ private readonly language;
1125
+ private readonly tools?;
1126
+ private readonly apiBase;
1127
+ private readonly sampleRate;
1128
+ private readonly firstMessage;
1129
+ private ws;
1130
+ private handlers;
1131
+ /** Exposed for diagnostics — true while the underlying socket is open. */
1132
+ running: boolean;
1133
+ constructor(apiKey: string, options?: UltravoxOptions);
1134
+ connect(): Promise<void>;
1135
+ sendAudio(pcm: Buffer): void;
1136
+ sendText(text: string): Promise<void>;
1137
+ sendFunctionResult(callId: string, result: string): Promise<void>;
1138
+ cancelResponse(): void;
1139
+ onEvent(handler: UltravoxEventHandler): void;
1140
+ private emit;
1141
+ private handleMessage;
1142
+ close(): Promise<void>;
1143
+ }
1144
+
1145
+ /**
1146
+ * Thin scheduling wrapper around node-cron (MIT).
1147
+ *
1148
+ * import { scheduleCron, scheduleOnce } from 'getpatter';
1149
+ *
1150
+ * const handle = scheduleCron('* /5 * * * *', async () => doWork());
1151
+ * handle.cancel();
1152
+ *
1153
+ * node-cron is an optional dependency. This module imports it lazily so that
1154
+ * consumers who never schedule anything do not need it installed.
1155
+ */
1156
+ type JobCallback = () => void | Promise<void>;
1157
+ interface ScheduleHandle {
1158
+ readonly jobId: string;
1159
+ cancel(): void;
1160
+ readonly pending: boolean;
1161
+ }
1162
+ /** Schedule ``callback`` on a cron expression (node-cron dialect). */
1163
+ declare function scheduleCron(cron: string, callback: JobCallback): Promise<ScheduleHandle>;
1164
+ /** Schedule ``callback`` once at the given date. */
1165
+ declare function scheduleOnce(at: Date, callback: JobCallback): ScheduleHandle;
1166
+ /** Schedule ``callback`` every ``intervalMs`` milliseconds. */
1167
+ declare function scheduleInterval(intervalMs: number, callback: JobCallback): ScheduleHandle;
1168
+
1169
+ /**
1170
+ * Soniox Speech-to-Text adapter for Patter (TypeScript).
1171
+ *
1172
+ * Pure WebSocket client for the Soniox real-time STT API. Accumulates
1173
+ * `is_final` tokens and flushes them on `<end>`/`<fin>` endpoint tokens,
1174
+ * mirroring the Python `SonioxSTT` adapter.
1175
+ *
1176
+ * Adapted from LiveKit Agents (Apache 2.0):
1177
+ * https://github.com/livekit/agents
1178
+ * (source: livekit-plugins/livekit-plugins-soniox/livekit/plugins/soniox/stt.py
1179
+ * at commit 78a66bcf79c5cea82989401c408f1dff4b961a5b)
1180
+ *
1181
+ * Speechmatics TypeScript adapter is **intentionally not ported**: the
1182
+ * official Speechmatics Voice SDK (`speechmatics.voice`) is Python-only at
1183
+ * the time of writing. Python users should install the optional
1184
+ * `speechmatics` extra; TypeScript users need to wait for an official
1185
+ * upstream SDK before this adapter can land without a WS-handshake reimpl.
1186
+ */
1187
+ interface Transcript$2 {
718
1188
  readonly text: string;
719
1189
  readonly isFinal: boolean;
720
1190
  readonly confidence: number;
721
1191
  }
722
- type TranscriptCallback$1 = (transcript: Transcript$1) => void;
723
- declare class DeepgramSTT {
1192
+ type TranscriptCallback$2 = (transcript: Transcript$2) => void;
1193
+ interface SonioxSTTOptions {
1194
+ model?: string;
1195
+ languageHints?: string[];
1196
+ languageHintsStrict?: boolean;
1197
+ sampleRate?: number;
1198
+ numChannels?: number;
1199
+ enableSpeakerDiarization?: boolean;
1200
+ enableLanguageIdentification?: boolean;
1201
+ maxEndpointDelayMs?: number;
1202
+ clientReferenceId?: string;
1203
+ baseUrl?: string;
1204
+ }
1205
+ declare class SonioxSTT {
1206
+ private ws;
1207
+ private callbacks;
1208
+ private final;
1209
+ private keepaliveTimer;
724
1210
  private readonly apiKey;
725
- private readonly language;
726
1211
  private readonly model;
727
- private readonly encoding;
1212
+ private readonly languageHints?;
1213
+ private readonly languageHintsStrict;
728
1214
  private readonly sampleRate;
1215
+ private readonly numChannels;
1216
+ private readonly enableSpeakerDiarization;
1217
+ private readonly enableLanguageIdentification;
1218
+ private readonly maxEndpointDelayMs;
1219
+ private readonly clientReferenceId?;
1220
+ private readonly baseUrl;
1221
+ constructor(apiKey: string, options?: SonioxSTTOptions);
1222
+ /** Factory for Twilio-style 8 kHz linear PCM. */
1223
+ static forTwilio(apiKey: string, languageHints?: string[]): SonioxSTT;
1224
+ private buildConfig;
1225
+ connect(): Promise<void>;
1226
+ private clearKeepalive;
1227
+ private handleMessage;
1228
+ private emit;
1229
+ sendAudio(audio: Buffer): void;
1230
+ onTranscript(callback: TranscriptCallback$2): void;
1231
+ close(): void;
1232
+ }
1233
+
1234
+ /**
1235
+ * AssemblyAI Universal Streaming STT adapter for the Patter SDK pipeline mode.
1236
+ *
1237
+ * Implements a `DeepgramSTT`-shaped provider using AssemblyAI's v3 streaming
1238
+ * WebSocket API. Pure `ws` transport — does NOT depend on the vendor SDK.
1239
+ *
1240
+ * Algorithm adapted from LiveKit Agents (Apache 2.0):
1241
+ * https://github.com/livekit/agents
1242
+ * Source: livekit-plugins/livekit-plugins-assemblyai/livekit/plugins/assemblyai/stt.py
1243
+ * Upstream ref SHA: 78a66bcf79c5cea82989401c408f1dff4b961a5b
1244
+ */
1245
+ interface Transcript$1 {
1246
+ readonly text: string;
1247
+ readonly isFinal: boolean;
1248
+ readonly confidence: number;
1249
+ }
1250
+ type TranscriptCallback$1 = (transcript: Transcript$1) => void;
1251
+ type AssemblyAIEncoding = 'pcm_s16le' | 'pcm_mulaw';
1252
+ type AssemblyAIModel = 'universal-streaming-english' | 'universal-streaming-multilingual' | 'u3-rt-pro';
1253
+ interface AssemblyAISTTOptions {
1254
+ /** One of the AssemblyAI speech models. */
1255
+ readonly model?: AssemblyAIModel;
1256
+ /** PCM encoding: 16-bit little-endian (default) or G.711 mu-law for telephony. */
1257
+ readonly encoding?: AssemblyAIEncoding;
1258
+ /** Sample rate in Hz — 16000 for wideband audio, 8000 for telephony. */
1259
+ readonly sampleRate?: number;
1260
+ /** Override the streaming base URL (e.g. EU: `wss://streaming.eu.assemblyai.com`). */
1261
+ readonly baseUrl?: string;
1262
+ /** Enable automatic language detection (defaults: true for multilingual/u3-rt-pro). */
1263
+ readonly languageDetection?: boolean;
1264
+ /** 0..1 confidence required before end-of-turn is finalized. */
1265
+ readonly endOfTurnConfidenceThreshold?: number;
1266
+ /** Minimum ms of silence required before end-of-turn finalizes. */
1267
+ readonly minTurnSilence?: number;
1268
+ /** Maximum ms of silence before the turn is force-finalized. */
1269
+ readonly maxTurnSilence?: number;
1270
+ /** When true, wait for the formatted transcript before emitting final. */
1271
+ readonly formatTurns?: boolean;
1272
+ /** Bias keywords/phrases. */
1273
+ readonly keytermsPrompt?: readonly string[];
1274
+ /** Text prompt (u3-rt-pro only). */
1275
+ readonly prompt?: string;
1276
+ /** VAD threshold (0..1). */
1277
+ readonly vadThreshold?: number;
1278
+ /** Enable diarization / speaker labels. */
1279
+ readonly speakerLabels?: boolean;
1280
+ /** Max speakers for diarization. */
1281
+ readonly maxSpeakers?: number;
1282
+ /** Domain hint (e.g. "medical"). */
1283
+ readonly domain?: string;
1284
+ }
1285
+ declare class AssemblyAISTT {
1286
+ private readonly apiKey;
1287
+ private readonly options;
729
1288
  private ws;
730
1289
  private callbacks;
731
- /** Request ID from Deepgram used to query actual cost post-call. */
732
- requestId: string;
733
- constructor(apiKey: string, language?: string, model?: string, encoding?: string, sampleRate?: number);
1290
+ /** AssemblyAI session idset when the `Begin` message arrives. */
1291
+ sessionId: string;
1292
+ /** Unix timestamp when the AssemblyAI session expires. */
1293
+ expiresAt: number;
1294
+ constructor(apiKey: string, options?: AssemblyAISTTOptions);
734
1295
  /** Factory for Twilio calls — mulaw 8 kHz. */
735
- static forTwilio(apiKey: string, language?: string, model?: string): DeepgramSTT;
1296
+ static forTwilio(apiKey: string, model?: AssemblyAIModel): AssemblyAISTT;
1297
+ private buildUrl;
736
1298
  connect(): Promise<void>;
1299
+ private handleEvent;
1300
+ private emit;
737
1301
  sendAudio(audio: Buffer): void;
738
1302
  onTranscript(callback: TranscriptCallback$1): void;
739
1303
  close(): void;
740
1304
  }
741
1305
 
742
1306
  /**
743
- * OpenAI Whisper STT adapter for the Patter SDK pipeline mode.
1307
+ * Cartesia STT (ink-whisper) adapter for the Patter SDK pipeline mode.
744
1308
  *
745
- * Buffers incoming PCM16 audio and periodically sends it to the
746
- * OpenAI Whisper transcription API as a WAV file.
1309
+ * Implements a `DeepgramSTT`-shaped provider using Cartesia's streaming
1310
+ * WebSocket API. Pure `ws` transport does NOT depend on the vendor SDK.
1311
+ *
1312
+ * Algorithm adapted from LiveKit Agents (Apache 2.0):
1313
+ * https://github.com/livekit/agents
1314
+ * Source: livekit-plugins/livekit-plugins-cartesia/livekit/plugins/cartesia/stt.py
1315
+ * Upstream ref SHA: 78a66bcf79c5cea82989401c408f1dff4b961a5b
747
1316
  */
748
1317
  interface Transcript {
749
1318
  readonly text: string;
@@ -751,22 +1320,36 @@ interface Transcript {
751
1320
  readonly confidence: number;
752
1321
  }
753
1322
  type TranscriptCallback = (transcript: Transcript) => void;
754
- declare class WhisperSTT {
1323
+ /** Cartesia STT currently only accepts 16-bit PCM little-endian. */
1324
+ type CartesiaEncoding = 'pcm_s16le';
1325
+ interface CartesiaSTTOptions {
1326
+ /** Cartesia STT model. Currently only `"ink-whisper"`. */
1327
+ readonly model?: string;
1328
+ /** BCP-47 language code. */
1329
+ readonly language?: string;
1330
+ /** PCM encoding; Cartesia only supports `pcm_s16le`. */
1331
+ readonly encoding?: CartesiaEncoding;
1332
+ /** Sample rate in Hz. Cartesia accepts 8000, 16000, 24000, 44100, 48000. */
1333
+ readonly sampleRate?: number;
1334
+ /** Override base URL (HTTP or WS). Defaults to Cartesia prod. */
1335
+ readonly baseUrl?: string;
1336
+ }
1337
+ declare class CartesiaSTT {
755
1338
  private readonly apiKey;
756
- private readonly model;
757
- private readonly language;
758
- private readonly bufferSize;
759
- private buffer;
1339
+ private readonly options;
1340
+ private ws;
760
1341
  private callbacks;
761
- private running;
762
- constructor(apiKey: string, model?: string, language?: string, bufferSize?: number);
763
- /** Factory for Twilio calls — mulaw 8 kHz is transcoded upstream, so we still receive PCM 16-bit. */
764
- static forTwilio(apiKey: string, language?: string, model?: string): WhisperSTT;
1342
+ private keepaliveTimer;
1343
+ /** Cartesia request id set from the server transcript events. */
1344
+ requestId: string;
1345
+ constructor(apiKey: string, options?: CartesiaSTTOptions);
1346
+ private buildWsUrl;
765
1347
  connect(): Promise<void>;
1348
+ private handleEvent;
1349
+ private emit;
766
1350
  sendAudio(audio: Buffer): void;
767
1351
  onTranscript(callback: TranscriptCallback): void;
768
1352
  close(): void;
769
- private transcribeBuffer;
770
1353
  }
771
1354
 
772
1355
  declare class ElevenLabsTTS {
@@ -818,6 +1401,111 @@ declare class OpenAITTS {
818
1401
  static resample24kTo16k(audio: Buffer): Buffer;
819
1402
  }
820
1403
 
1404
+ interface CartesiaTTSOptions {
1405
+ model?: string;
1406
+ voice?: string;
1407
+ language?: string;
1408
+ sampleRate?: number;
1409
+ speed?: string | number;
1410
+ emotion?: string | string[];
1411
+ volume?: number;
1412
+ baseUrl?: string;
1413
+ apiVersion?: string;
1414
+ }
1415
+ declare class CartesiaTTS {
1416
+ private readonly apiKey;
1417
+ private readonly model;
1418
+ private readonly voice;
1419
+ private readonly language;
1420
+ private readonly sampleRate;
1421
+ private readonly speed?;
1422
+ private readonly emotion?;
1423
+ private readonly volume?;
1424
+ private readonly baseUrl;
1425
+ private readonly apiVersion;
1426
+ constructor(apiKey: string, opts?: CartesiaTTSOptions);
1427
+ /** Build the JSON payload for the Cartesia bytes endpoint. */
1428
+ private buildPayload;
1429
+ /** Synthesize text and return the concatenated audio buffer. */
1430
+ synthesize(text: string): Promise<Buffer>;
1431
+ /**
1432
+ * Synthesize text and yield raw PCM_S16LE chunks at the configured
1433
+ * `sampleRate` as they arrive from Cartesia.
1434
+ */
1435
+ synthesizeStream(text: string): AsyncGenerator<Buffer>;
1436
+ }
1437
+
1438
+ interface RimeTTSOptions {
1439
+ model?: string;
1440
+ speaker?: string;
1441
+ lang?: string;
1442
+ sampleRate?: number;
1443
+ repetitionPenalty?: number;
1444
+ temperature?: number;
1445
+ topP?: number;
1446
+ maxTokens?: number;
1447
+ speedAlpha?: number;
1448
+ reduceLatency?: boolean;
1449
+ pauseBetweenBrackets?: boolean;
1450
+ phonemizeBetweenBrackets?: boolean;
1451
+ baseUrl?: string;
1452
+ }
1453
+ declare class RimeTTS {
1454
+ private readonly apiKey;
1455
+ private readonly model;
1456
+ private readonly speaker;
1457
+ private readonly lang;
1458
+ private readonly sampleRate;
1459
+ private readonly repetitionPenalty?;
1460
+ private readonly temperature?;
1461
+ private readonly topP?;
1462
+ private readonly maxTokens?;
1463
+ private readonly speedAlpha?;
1464
+ private readonly reduceLatency?;
1465
+ private readonly pauseBetweenBrackets?;
1466
+ private readonly phonemizeBetweenBrackets?;
1467
+ private readonly baseUrl;
1468
+ private readonly totalTimeoutMs;
1469
+ constructor(apiKey: string, opts?: RimeTTSOptions);
1470
+ private buildPayload;
1471
+ synthesize(text: string): Promise<Buffer>;
1472
+ /**
1473
+ * Synthesize text and yield raw PCM_S16LE chunks at the configured
1474
+ * `sampleRate` as they stream in.
1475
+ */
1476
+ synthesizeStream(text: string): AsyncGenerator<Buffer>;
1477
+ }
1478
+
1479
+ type LMNTAudioFormat = 'aac' | 'mp3' | 'mulaw' | 'raw' | 'wav';
1480
+ type LMNTModel = 'blizzard' | 'aurora';
1481
+ type LMNTSampleRate = 8000 | 16000 | 24000;
1482
+ interface LMNTTTSOptions {
1483
+ model?: LMNTModel;
1484
+ voice?: string;
1485
+ language?: string;
1486
+ format?: LMNTAudioFormat;
1487
+ sampleRate?: LMNTSampleRate;
1488
+ temperature?: number;
1489
+ topP?: number;
1490
+ baseUrl?: string;
1491
+ }
1492
+ declare class LMNTTTS {
1493
+ private readonly apiKey;
1494
+ private readonly model;
1495
+ private readonly voice;
1496
+ private readonly language;
1497
+ private readonly format;
1498
+ private readonly sampleRate;
1499
+ private readonly temperature;
1500
+ private readonly topP;
1501
+ private readonly baseUrl;
1502
+ constructor(apiKey: string, opts?: LMNTTTSOptions);
1503
+ private buildPayload;
1504
+ synthesize(text: string): Promise<Buffer>;
1505
+ /** Yield audio chunks as they arrive — raw PCM_S16LE by default. */
1506
+ synthesizeStream(text: string): AsyncGenerator<Buffer>;
1507
+ }
1508
+
821
1509
  /**
822
1510
  * Audio transcoding utilities for Patter TypeScript SDK.
823
1511
  *
@@ -889,4 +1577,294 @@ interface TunnelHandle {
889
1577
  */
890
1578
  declare function startTunnel(port: number, timeoutMs?: number): Promise<TunnelHandle>;
891
1579
 
892
- export { type Agent, type AgentOptions, AuthenticationError, type Call, type CallControl, type CallEventHandler, type CallMetrics, CallMetricsAccumulator, type CallOptions, type CallRecord, type ConnectOptions, type CostBreakdown, type CreateAgentOptions, DEFAULT_PRICING, DeepgramSTT, ElevenLabsConvAIAdapter, ElevenLabsTTS, type Guardrail, type IncomingMessage, type LLMChunk, LLMLoop, type LLMProvider, type LatencyBreakdown, type LocalCallOptions, type LocalConfig, type LocalOptions, type Logger, type MessageHandler, MetricsStore, OpenAILLMProvider, OpenAIRealtimeAdapter, OpenAITTS, Patter, PatterConnectionError, PatterError, type PatterOptions, type PhoneNumber, type PipelineMessageHandler, type ProviderPricing, ProvisionError, RemoteMessageHandler, type SSEEvent, type STTConfig, type ServeOptions, type TTSConfig, TestSession, type ToolDefinition, type TunnelHandle, type TurnMetrics, WhisperSTT, calculateRealtimeCost, calculateSttCost, calculateTelephonyCost, calculateTtsCost, callsToCsv, callsToJson, deepgram, elevenlabs, getLogger, isRemoteUrl, isWebSocketUrl, makeAuthMiddleware, mergePricing, mountApi, mountDashboard, mulawToPcm16, openaiTts, pcm16ToMulaw, resample16kTo8k, resample24kTo16k, resample8kTo16k, setLogger, startTunnel, whisper };
1580
+ /**
1581
+ * Typed conversation history management with truncation support.
1582
+ *
1583
+ * Replaces raw `list[dict]` history with a structured ChatContext class
1584
+ * that provides immutable messages, automatic ID generation, truncation
1585
+ * preserving system prompts, and format conversion for OpenAI / Anthropic.
1586
+ */
1587
+ type ChatRole = "system" | "user" | "assistant" | "tool";
1588
+ interface ChatMessage {
1589
+ readonly id: string;
1590
+ readonly role: ChatRole;
1591
+ readonly content: string;
1592
+ readonly timestamp: number;
1593
+ readonly name?: string;
1594
+ readonly toolCallId?: string;
1595
+ }
1596
+ interface OpenAIMessage {
1597
+ role: string;
1598
+ content: string;
1599
+ name?: string;
1600
+ tool_call_id?: string;
1601
+ }
1602
+ interface AnthropicMessage {
1603
+ role: string;
1604
+ content: string;
1605
+ }
1606
+ interface AnthropicConversion {
1607
+ system: string | undefined;
1608
+ messages: ReadonlyArray<AnthropicMessage>;
1609
+ }
1610
+ interface ChatContextJSON {
1611
+ messages: ReadonlyArray<ChatMessage>;
1612
+ }
1613
+ declare class ChatContext {
1614
+ private items;
1615
+ constructor(systemPrompt?: string);
1616
+ addUser(content: string): ChatMessage;
1617
+ addAssistant(content: string): ChatMessage;
1618
+ addSystem(content: string): ChatMessage;
1619
+ addToolResult(content: string, toolCallId: string): ChatMessage;
1620
+ getMessages(): ReadonlyArray<ChatMessage>;
1621
+ getLastN(n: number): ReadonlyArray<ChatMessage>;
1622
+ get length(): number;
1623
+ /**
1624
+ * Keep the first system message (if any) plus the last `maxMessages`
1625
+ * non-system-first messages. When no system message exists at index 0,
1626
+ * simply keeps the last `maxMessages` messages.
1627
+ */
1628
+ truncate(maxMessages: number): void;
1629
+ toOpenAI(): OpenAIMessage[];
1630
+ /**
1631
+ * Convert to Anthropic format. The first system message (if present)
1632
+ * is extracted into a separate `system` field, and only user/assistant
1633
+ * messages are included in the messages array.
1634
+ */
1635
+ toAnthropic(): AnthropicConversion;
1636
+ copy(): ChatContext;
1637
+ toJSON(): ChatContextJSON;
1638
+ static fromJSON(data: ChatContextJSON): ChatContext;
1639
+ }
1640
+
1641
+ /**
1642
+ * IVR auto-navigation activity for telephony calls (TypeScript port).
1643
+ *
1644
+ * Detects IVR prompts via transcribed speech, forwards DTMF responses
1645
+ * through `CallControl.sendDtmf`, and recovers from two common failure
1646
+ * modes:
1647
+ *
1648
+ * 1. The agent hears the same IVR prompt repeated several times
1649
+ * (loop detection). `TfidfLoopDetector` flags this by comparing the
1650
+ * cosine similarity of recent transcript chunks.
1651
+ * 2. The IVR falls silent while both parties are passive (silence
1652
+ * detection). A debounced timer triggers a follow-up after
1653
+ * `maxSilenceDuration` seconds of combined silence.
1654
+ *
1655
+ * The Python port uses scikit-learn for TF-IDF; TypeScript has no
1656
+ * equivalent battle-tested package in the std library, so we ship a
1657
+ * minimal in-house bag-of-words + cosine-similarity implementation.
1658
+ * It is intentionally simple — enough to match repeated IVR prompts.
1659
+ *
1660
+ * Algorithm adapted from LiveKit Agents (Apache 2.0):
1661
+ * https://github.com/livekit/agents
1662
+ *
1663
+ * Source:
1664
+ * - livekit-agents/livekit/agents/voice/ivr/ivr_activity.py
1665
+ * - livekit-agents/livekit/agents/beta/tools/send_dtmf.py
1666
+ * LiveKit SHA at port time: 78a66bcf79c5cea82989401c408f1dff4b961a5b
1667
+ */
1668
+
1669
+ /** Valid DTMF tone values (keypad characters). */
1670
+ declare const DTMF_EVENTS: readonly ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "*", "#", "A", "B", "C", "D"];
1671
+ type DtmfEvent = (typeof DTMF_EVENTS)[number];
1672
+ /** Join DTMF events into a space-separated debug string. */
1673
+ declare function formatDtmf(events: DtmfEvent[]): string;
1674
+ interface TfidfLoopDetectorOptions {
1675
+ /** Number of recent chunks to keep in the comparison window. */
1676
+ windowSize?: number;
1677
+ /** Cosine similarity above which two chunks are "the same prompt". */
1678
+ similarityThreshold?: number;
1679
+ /** Consecutive near-duplicates required before firing. */
1680
+ consecutiveThreshold?: number;
1681
+ }
1682
+ /**
1683
+ * Detects repeated IVR prompts via cosine similarity on bag-of-words
1684
+ * vectors. Not a full TF-IDF implementation — good enough for catching
1685
+ * IVRs that re-read the same menu.
1686
+ */
1687
+ declare class TfidfLoopDetector {
1688
+ private readonly windowSize;
1689
+ private readonly similarityThreshold;
1690
+ private readonly consecutiveThreshold;
1691
+ private chunks;
1692
+ private consecutiveSimilar;
1693
+ constructor(opts?: TfidfLoopDetectorOptions);
1694
+ reset(): void;
1695
+ addChunk(text: string): void;
1696
+ checkLoopDetection(): boolean;
1697
+ }
1698
+ /** Async callback fired when the TF-IDF detector trips. */
1699
+ type LoopCallback = () => Promise<void> | void;
1700
+ /** Async callback fired after sustained silence. */
1701
+ type SilenceCallback = () => Promise<void> | void;
1702
+ interface IVRActivityOptions {
1703
+ /** Seconds of combined silence before firing `onSilence`. Default `5.0`. */
1704
+ maxSilenceDuration?: number;
1705
+ /** Enable the TF-IDF loop detector. Default `true`. */
1706
+ loopDetector?: boolean;
1707
+ /** Fired when the loop detector trips. */
1708
+ onLoopDetected?: LoopCallback;
1709
+ /** Fired after `maxSilenceDuration` seconds of combined silence. */
1710
+ onSilence?: SilenceCallback;
1711
+ }
1712
+ /** OpenAI-style function tool spec with attached handler. */
1713
+ interface IVRToolDefinition {
1714
+ name: string;
1715
+ description: string;
1716
+ parameters: {
1717
+ type: "object";
1718
+ properties: Record<string, unknown>;
1719
+ required?: string[];
1720
+ };
1721
+ handler: (args: {
1722
+ events: string[];
1723
+ }) => Promise<string>;
1724
+ }
1725
+ /**
1726
+ * Coordinate IVR navigation heuristics for a single call.
1727
+ *
1728
+ * Usage::
1729
+ *
1730
+ * const ivr = new IVRActivity(callControl);
1731
+ * await ivr.start();
1732
+ *
1733
+ * // In the STT loop, on each final transcript:
1734
+ * await ivr.onUserTranscribed(text);
1735
+ *
1736
+ * // When done:
1737
+ * await ivr.stop();
1738
+ */
1739
+ declare class IVRActivity {
1740
+ private readonly callControl;
1741
+ private readonly maxSilenceDurationMs;
1742
+ private readonly loopDetector;
1743
+ private readonly onLoopDetected?;
1744
+ private readonly onSilence?;
1745
+ private currentUserState;
1746
+ private currentAgentState;
1747
+ private readonly debouncedSilence;
1748
+ private lastShouldSchedule;
1749
+ private started;
1750
+ constructor(callControl: CallControl, opts?: IVRActivityOptions);
1751
+ start(): Promise<void>;
1752
+ stop(): Promise<void>;
1753
+ onUserTranscribed(text: string): Promise<void>;
1754
+ noteUserState(state: string): void;
1755
+ noteAgentState(state: string): void;
1756
+ get tools(): IVRToolDefinition[];
1757
+ private scheduleSilenceCheck;
1758
+ private shouldScheduleCheck;
1759
+ private onSilenceDetected;
1760
+ private buildSendDtmfTool;
1761
+ }
1762
+
1763
+ declare const BuiltinAudioClip: {
1764
+ readonly CITY_AMBIENCE: "city-ambience.ogg";
1765
+ readonly FOREST_AMBIENCE: "forest-ambience.ogg";
1766
+ readonly OFFICE_AMBIENCE: "office-ambience.ogg";
1767
+ readonly CROWDED_ROOM: "crowded-room.ogg";
1768
+ readonly KEYBOARD_TYPING: "keyboard-typing.ogg";
1769
+ readonly KEYBOARD_TYPING2: "keyboard-typing2.ogg";
1770
+ readonly HOLD_MUSIC: "hold_music.ogg";
1771
+ };
1772
+ type BuiltinAudioClipName = (typeof BuiltinAudioClip)[keyof typeof BuiltinAudioClip];
1773
+ /** Resolve a bundled clip name to its absolute path on disk. */
1774
+ declare function builtinClipPath(clip: BuiltinAudioClipName): string;
1775
+ /** Raw int16 mono LE PCM already decoded into memory. */
1776
+ interface RawPcmSource {
1777
+ readonly kind: 'pcm';
1778
+ readonly pcm: Buffer;
1779
+ readonly sampleRate: number;
1780
+ readonly volume?: number;
1781
+ readonly probability?: number;
1782
+ }
1783
+ /** File on disk that a user-supplied decoder will turn into raw PCM. */
1784
+ interface FilePcmSource {
1785
+ readonly kind: 'file';
1786
+ readonly path: string;
1787
+ readonly decode: (p: string) => Promise<{
1788
+ pcm: Buffer;
1789
+ sampleRate: number;
1790
+ }>;
1791
+ readonly volume?: number;
1792
+ readonly probability?: number;
1793
+ }
1794
+ /** One of the bundled clips — requires a ``decode`` function at start() time. */
1795
+ interface BuiltinPcmSource {
1796
+ readonly kind: 'builtin';
1797
+ readonly clip: BuiltinAudioClipName;
1798
+ readonly decode: (p: string) => Promise<{
1799
+ pcm: Buffer;
1800
+ sampleRate: number;
1801
+ }>;
1802
+ readonly volume?: number;
1803
+ readonly probability?: number;
1804
+ }
1805
+ type AudioSource = RawPcmSource | FilePcmSource | BuiltinPcmSource;
1806
+ interface AudioConfig {
1807
+ readonly source: AudioSource;
1808
+ /** Probability weight used when ``BackgroundAudioPlayer`` receives a list. */
1809
+ readonly probability?: number;
1810
+ /** Master volume [0, 1] applied on top of the per-source ``volume``. */
1811
+ readonly volume?: number;
1812
+ }
1813
+ interface BackgroundAudioOptions {
1814
+ /** Overall mix ratio [0, 1]. Defaults to 0.1 (LiveKit's hold-music ratio). */
1815
+ readonly volume?: number;
1816
+ /** When true the source restarts on exhaustion. */
1817
+ readonly loop?: boolean;
1818
+ }
1819
+ /**
1820
+ * Return ``agent + bg * ratio`` as a new Buffer of the same length as
1821
+ * ``agent``. Background is zero-padded or truncated to match.
1822
+ */
1823
+ declare function mixPcm(agent: Buffer, bg: Buffer, ratio: number): Buffer;
1824
+ /**
1825
+ * Linear-interpolation resample from ``srcSr`` to ``dstSr``. Input and
1826
+ * output are mono int16 LE PCM buffers. Used for low-fidelity background
1827
+ * audio (hold music at attenuated volume); not suitable for wideband
1828
+ * program audio.
1829
+ */
1830
+ declare function resamplePcm(src: Buffer, srcSr: number, dstSr: number): Buffer;
1831
+ declare function selectSoundFromList(sounds: readonly AudioConfig[]): AudioConfig | null;
1832
+ /**
1833
+ * Mix a background audio clip into an outbound PCM stream.
1834
+ *
1835
+ * Accepts a single :class:`AudioSource`, a single :class:`AudioConfig`, or a
1836
+ * list of :class:`AudioConfig` (in which case one is picked via
1837
+ * probability-weighted random selection). Call ``start()`` before any
1838
+ * ``mix()`` and ``stop()`` to release decoded PCM.
1839
+ */
1840
+ declare class BackgroundAudioPlayer implements BackgroundAudioPlayer$1 {
1841
+ private readonly source;
1842
+ private readonly volume;
1843
+ private readonly loop;
1844
+ private started;
1845
+ private pcm;
1846
+ private sourceSr;
1847
+ private position;
1848
+ private readonly resampleCache;
1849
+ constructor(source: AudioSource | AudioConfig | readonly AudioConfig[], opts?: BackgroundAudioOptions);
1850
+ /**
1851
+ * Decode the configured source and arm the mixer. Subsequent calls are
1852
+ * no-ops while the player is active.
1853
+ */
1854
+ start(): Promise<void>;
1855
+ /**
1856
+ * Return a mix of ``agentPcm`` with the next background chunk. The result
1857
+ * is always exactly ``agentPcm.length`` bytes long. Returns a copy of
1858
+ * ``agentPcm`` when the player is not started, when ``volume == 0``, or
1859
+ * when the source has been exhausted and ``loop`` is false.
1860
+ */
1861
+ mix(agentPcm: Buffer, sampleRate: number): Promise<Buffer>;
1862
+ /** Release all cached PCM and reset the player. */
1863
+ stop(): Promise<void>;
1864
+ private resolveSource;
1865
+ private decodeSource;
1866
+ private applyGain;
1867
+ private resampleTo;
1868
+ }
1869
+
1870
+ export { type Agent, type AgentOptions, AllProvidersFailedError, type AnthropicConversion, type AnthropicMessage, type AssemblyAIEncoding, type AssemblyAIModel, AssemblyAISTT, type AssemblyAISTTOptions, type AudioConfig, type AudioSource, AuthenticationError, type BackgroundAudioOptions, BackgroundAudioPlayer, BuiltinAudioClip, type BuiltinAudioClipName, type BuiltinPcmSource, type Call, type CallControl, type CallEventHandler, type CallMetrics, CallMetricsAccumulator, type CallOptions, type CallRecord, type CartesiaEncoding, CartesiaSTT, type CartesiaSTTOptions, CartesiaTTS, type CartesiaTTSOptions, ChatContext, type ChatMessage, type ChatRole, type ConnectOptions, type CostBreakdown, type CreateAgentOptions, DEFAULT_MIN_SENTENCE_LEN, DEFAULT_PRICING, DTMF_EVENTS, DeepgramSTT, type DefineToolInput, type DtmfEvent, ElevenLabsConvAIAdapter, ElevenLabsTTS, FallbackLLMProvider, type FallbackLLMProviderOptions, type FilePcmSource, GEMINI_DEFAULT_INPUT_SR, GEMINI_DEFAULT_OUTPUT_SR, GeminiLiveAdapter, type GeminiLiveEventHandler, type Guardrail, type HookContext, IVRActivity, type IVRActivityOptions, type IVRToolDefinition, type IncomingMessage, type JobCallback, type LLMChunk, LLMLoop, type LLMProvider, type LMNTAudioFormat, type LMNTModel, type LMNTSampleRate, LMNTTTS, type LMNTTTSOptions, type LatencyBreakdown, type LocalCallOptions, type LocalConfig, type LocalOptions, type Logger, type LoopCallback, type MessageHandler, MetricsStore, OpenAILLMProvider, type OpenAIMessage, OpenAIRealtimeAdapter, OpenAITTS, type ParamSpec, PartialStreamError, Patter, PatterConnectionError, PatterError, type PatterOptions, type PhoneNumber, PipelineHookExecutor, type PipelineHooks, type PipelineMessageHandler, type ProviderPricing, ProvisionError, type RawPcmSource, RemoteMessageHandler, RimeTTS, type RimeTTSOptions, type SSEEvent, type STTConfig, type ScheduleHandle, SentenceChunker, type ServeOptions, type SilenceCallback, SonioxSTT, type SonioxSTTOptions, type TTSConfig, TestSession, TfidfLoopDetector, type TfidfLoopDetectorOptions, type ToolDefinition, type TunnelHandle, type TurnMetrics, ULTRAVOX_DEFAULT_API_BASE, ULTRAVOX_DEFAULT_SR, type UltravoxEventHandler, UltravoxRealtimeAdapter, WhisperSTT, builtinClipPath, calculateRealtimeCost, calculateSttCost, calculateTelephonyCost, calculateTtsCost, callsToCsv, callsToJson, deepgram, defineTool, elevenlabs, filterEmoji, filterForTTS, filterMarkdown, formatDtmf, getLogger, isRemoteUrl, isWebSocketUrl, makeAuthMiddleware, mergePricing, mixPcm, mountApi, mountDashboard, mulawToPcm16, notifyDashboard, openaiTts, pcm16ToMulaw, resample16kTo8k, resample24kTo16k, resample8kTo16k, resamplePcm, scheduleCron, scheduleInterval, scheduleOnce, selectSoundFromList, setLogger, startTunnel, whisper };