getpatter 0.5.3 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.ts CHANGED
@@ -2,6 +2,7 @@ import { EventEmitter } from 'events';
2
2
  import { Request, Response, NextFunction, Express } from 'express';
3
3
 
4
4
  /** Twilio carrier credentials holder for Patter. */
5
+ /** Constructor options for the Twilio {@link Carrier}. */
5
6
  interface TwilioCarrierOptions {
6
7
  /** Twilio Account SID. Falls back to TWILIO_ACCOUNT_SID env var. */
7
8
  accountSid?: string;
@@ -13,7 +14,7 @@ interface TwilioCarrierOptions {
13
14
  *
14
15
  * @example
15
16
  * ```ts
16
- * import * as twilio from "getpatter/carriers/twilio";
17
+ * import * as twilio from "getpatter/telephony/twilio";
17
18
  * const carrier = new twilio.Carrier(); // reads env
18
19
  * const carrier = new twilio.Carrier({ accountSid: "AC...", authToken: "..." });
19
20
  * ```
@@ -26,6 +27,7 @@ declare class Carrier$1 {
26
27
  }
27
28
 
28
29
  /** Telnyx carrier credentials holder for Patter. */
30
+ /** Constructor options for the Telnyx {@link Carrier}. */
29
31
  interface TelnyxCarrierOptions {
30
32
  /** Telnyx API key. Falls back to TELNYX_API_KEY env var. */
31
33
  apiKey?: string;
@@ -39,7 +41,7 @@ interface TelnyxCarrierOptions {
39
41
  *
40
42
  * @example
41
43
  * ```ts
42
- * import * as telnyx from "getpatter/carriers/telnyx";
44
+ * import * as telnyx from "getpatter/telephony/telnyx";
43
45
  * const carrier = new telnyx.Carrier(); // reads env
44
46
  * const carrier = new telnyx.Carrier({ apiKey: "KEY...", connectionId: "123" });
45
47
  * ```
@@ -53,6 +55,7 @@ declare class Carrier {
53
55
  }
54
56
 
55
57
  /** OpenAI Realtime engine — marker class for Patter client dispatch. */
58
+ /** Constructor options for the OpenAI `Realtime` engine marker. */
56
59
  interface RealtimeOptions {
57
60
  /** API key. Falls back to OPENAI_API_KEY env var when omitted. */
58
61
  apiKey?: string;
@@ -60,6 +63,21 @@ interface RealtimeOptions {
60
63
  model?: string;
61
64
  /** Voice preset. Defaults to alloy. */
62
65
  voice?: string;
66
+ /**
67
+ * Reasoning-effort tier for `gpt-realtime-2`. When omitted the
68
+ * `session.reasoning` field is not sent and the server default applies.
69
+ * OpenAI recommends `"low"` for production voice flows — higher tiers add
70
+ * measurable per-turn latency. Has no effect on models that ignore the
71
+ * field.
72
+ */
73
+ reasoningEffort?: 'minimal' | 'low' | 'medium' | 'high';
74
+ /**
75
+ * Override for the Realtime session's `input_audio_transcription.model`.
76
+ * Omit to keep the adapter default (`whisper-1`). Use
77
+ * `"gpt-realtime-whisper"` for low-latency transcript partials,
78
+ * `"gpt-4o-transcribe"` for higher accuracy.
79
+ */
80
+ inputAudioTranscriptionModel?: string;
63
81
  }
64
82
  /**
65
83
  * OpenAI Realtime engine marker.
@@ -69,6 +87,11 @@ interface RealtimeOptions {
69
87
  * import * as openai from "getpatter/engines/openai";
70
88
  * const engine = new openai.Realtime(); // reads OPENAI_API_KEY
71
89
  * const engine = new openai.Realtime({ voice: "alloy" });
90
+ * const engine = new openai.Realtime({
91
+ * model: "gpt-realtime-2",
92
+ * reasoningEffort: "low", // gpt-realtime-2 only
93
+ * inputAudioTranscriptionModel: "gpt-realtime-whisper",
94
+ * });
72
95
  * ```
73
96
  */
74
97
  declare class Realtime {
@@ -76,10 +99,13 @@ declare class Realtime {
76
99
  readonly apiKey: string;
77
100
  readonly model: string;
78
101
  readonly voice: string;
102
+ readonly reasoningEffort?: 'minimal' | 'low' | 'medium' | 'high';
103
+ readonly inputAudioTranscriptionModel?: string;
79
104
  constructor(opts?: RealtimeOptions);
80
105
  }
81
106
 
82
107
  /** ElevenLabs ConvAI engine — marker class for Patter client dispatch. */
108
+ /** Constructor options for the ElevenLabs `ConvAI` engine marker. */
83
109
  interface ConvAIOptions {
84
110
  /** API key. Falls back to ELEVENLABS_API_KEY env var when omitted. */
85
111
  apiKey?: string;
@@ -175,6 +201,7 @@ declare class Ngrok {
175
201
  * consumed either form keeps working.
176
202
  */
177
203
 
204
+ /** Options accepted by `new Guardrail(...)` / `guardrail(...)`. */
178
205
  interface GuardrailOptions {
179
206
  /** Name for logging when triggered. */
180
207
  name: string;
@@ -204,7 +231,9 @@ declare class Guardrail$1 {
204
231
  }
205
232
  /** Factory helper mirroring Python's `guardrail(...)` function. */
206
233
  declare function guardrail(opts: GuardrailOptions): Guardrail$1;
234
+ /** Async handler invoked in-process when the LLM calls a `Tool`. */
207
235
  type ToolHandler = (args: Record<string, unknown>, context: Record<string, unknown>) => Promise<string>;
236
+ /** Options accepted by `new Tool(...)` / `tool(...)`. */
208
237
  interface ToolOptions {
209
238
  /** Tool name (visible to the LLM). */
210
239
  name: string;
@@ -283,6 +312,7 @@ interface STTTranscript {
283
312
  /** Which provider event this transcript represents (e.g. ``Results``). */
284
313
  eventType?: string;
285
314
  }
315
+ /** Callback invoked by an `STTAdapter` for each (partial or final) transcript event. */
286
316
  type STTTranscriptCallback = (t: STTTranscript) => Promise<void> | void;
287
317
  /** Shape shared by every STT adapter in the SDK. */
288
318
  interface STTAdapter {
@@ -290,7 +320,20 @@ interface STTAdapter {
290
320
  sendAudio(pcm: Buffer): void | Promise<void>;
291
321
  onTranscript(cb: STTTranscriptCallback): void;
292
322
  close(): void | Promise<void>;
323
+ /**
324
+ * Optional: ask the provider to immediately finalise the in-flight
325
+ * utterance (rather than waiting for its own endpoint timer). Called by
326
+ * ``StreamHandler`` whenever the SDK's VAD signals ``speech_end``, and
327
+ * after a barge-in cancel — both moments where waiting for the
328
+ * provider's endpoint heuristic stalls the next turn.
329
+ *
330
+ * Implementations that do not support utterance-level finalisation
331
+ * (e.g. one-shot transcribers like Whisper) should omit this method
332
+ * entirely; the stream handler does an optional-chained call.
333
+ */
334
+ finalize?(): void | Promise<void>;
293
335
  }
336
+ /** Shape shared by every TTS adapter in the SDK. */
294
337
  interface TTSAdapter {
295
338
  synthesizeStream(text: string): AsyncIterable<Buffer>;
296
339
  }
@@ -303,8 +346,10 @@ interface TTSAdapter {
303
346
  * passes through unchanged.
304
347
  */
305
348
 
349
+ /** Runs user-defined pipeline hooks (`beforeSendToStt`, `afterTranscribe`, …) with fail-open semantics. */
306
350
  declare class PipelineHookExecutor {
307
351
  private readonly hooks;
352
+ private readonly afterLlm;
308
353
  constructor(hooks: PipelineHooks | undefined);
309
354
  /**
310
355
  * Run beforeSendToStt hook. Returns null to drop the audio chunk.
@@ -325,14 +370,47 @@ declare class PipelineHookExecutor {
325
370
  */
326
371
  runBeforeLlm(messages: Array<Record<string, unknown>>, ctx: HookContext): Promise<Array<Record<string, unknown>>>;
327
372
  /**
328
- * Run afterLlm hook. Returns a possibly-modified assistant text.
329
- * Returning ``null`` from the hook means "keep the original".
330
- * Fail-open: on exception, the original text passes through.
373
+ * Tier 1 — per-token sync transform. Returns the (possibly transformed)
374
+ * chunk. Fail-open: on exception or non-string return, the original chunk
375
+ * passes through unchanged. Must be cheap (~0 ms budget).
376
+ */
377
+ runAfterLlmChunk(chunk: string): string;
378
+ /**
379
+ * Tier 2 — per-sentence rewrite. Returns rewritten sentence text, the
380
+ * original sentence (if hook returned `null`), or `null` to drop the
381
+ * sentence entirely (empty string is treated as drop). Fail-open.
382
+ */
383
+ runAfterLlmSentence(sentence: string, ctx: HookContext): Promise<string | null>;
384
+ /**
385
+ * Tier 3 — per-response rewrite. Returns the (possibly rewritten) full
386
+ * response text. Triggered after the LLM stream completes. Caller is
387
+ * responsible for buffering tokens before invocation. Fail-open.
388
+ */
389
+ runAfterLlmResponse(text: string, ctx: HookContext): Promise<string>;
390
+ /**
391
+ * Backward-compatible alias for `runAfterLlmResponse`. Existing call sites
392
+ * in the LLM loop continue to work unchanged.
393
+ *
394
+ * @deprecated Use `runAfterLlmResponse` directly.
331
395
  */
332
396
  runAfterLlm(text: string, ctx: HookContext): Promise<string>;
333
397
  /**
334
- * Whether ``afterLlm`` is configured. Used by the LLM loop to decide
335
- * whether to buffer streaming tokens before yielding them.
398
+ * Whether a per-response (tier 3) `onResponse` transform is configured.
399
+ * The LLM loop uses this to decide whether to buffer streaming tokens
400
+ * before yielding them. Per-token (tier 1) and per-sentence (tier 2)
401
+ * transforms do NOT require buffering.
402
+ */
403
+ hasAfterLlmResponse(): boolean;
404
+ /** Whether a per-sentence (tier 2) transform is configured. */
405
+ hasAfterLlmSentence(): boolean;
406
+ /** Whether a per-token (tier 1) transform is configured. */
407
+ hasAfterLlmChunk(): boolean;
408
+ /**
409
+ * Backward-compatible alias for `hasAfterLlmResponse`. The legacy callable
410
+ * form maps to `onResponse`, so this preserves the original semantic for
411
+ * existing call sites.
412
+ *
413
+ * @deprecated Use `hasAfterLlmResponse` directly.
336
414
  */
337
415
  hasAfterLlm(): boolean;
338
416
  /**
@@ -350,13 +428,15 @@ declare class PipelineHookExecutor {
350
428
  /**
351
429
  * Lightweight in-process event bus for Patter call lifecycle events.
352
430
  *
353
- * Mirrors the Python ``PatterEventBus`` (sdk-py/getpatter/observability/event_bus.py).
431
+ * Mirrors the Python ``PatterEventBus`` (libraries/python/getpatter/observability/event_bus.py).
354
432
  * Consumers subscribe with ``on()`` and receive typed payloads. ``emit()`` is
355
433
  * synchronous but handles async listeners: rejections are surfaced via the
356
434
  * Patter logger rather than being swallowed or crashing the call.
357
435
  */
436
+ /** String tag identifying every event type the `EventBus` knows how to dispatch. */
358
437
  type PatterEventType = 'turn_started' | 'turn_ended' | 'eou_metrics' | 'interruption' | 'llm_metrics' | 'tts_metrics' | 'stt_metrics' | 'metrics_collected' | 'call_ended' | 'transcript_partial' | 'transcript_final' | 'llm_chunk' | 'tts_chunk' | 'tool_call_started';
359
438
  type Listener<T = unknown> = (payload: T) => void | Promise<void>;
439
+ /** In-process pub/sub for Patter call-lifecycle events. */
360
440
  declare class EventBus {
361
441
  private readonly listeners;
362
442
  /**
@@ -370,6 +450,65 @@ declare class EventBus {
370
450
  emit<T = unknown>(event: PatterEventType, payload: T): void;
371
451
  }
372
452
 
453
+ /**
454
+ * Per-tool circuit breaker for the Patter SDK.
455
+ *
456
+ * Trips OPEN after N consecutive failures, rejects calls for a cooldown
457
+ * window so a flaky downstream (DB outage, vendor API rate-limit, dead
458
+ * webhook) doesn't burn LLM tokens on retries that will keep failing.
459
+ * After the cooldown elapses the next call probes (HALF_OPEN); a success
460
+ * resets to CLOSED, a failure reopens. The model receives a structured
461
+ * ``{ error, fallback: true }`` JSON in all rejection paths so it can
462
+ * recover gracefully instead of waiting forever.
463
+ *
464
+ * Lightweight in-memory implementation — one ``CircuitBreakerRegistry``
465
+ * per ``DefaultToolExecutor``, state is per tool name. Not persisted
466
+ * across process restarts (intentional — voice calls are too short for
467
+ * persistence to matter).
468
+ */
469
+ /** Lifecycle states for the breaker. */
470
+ declare const CircuitBreakerState: {
471
+ readonly CLOSED: "closed";
472
+ readonly OPEN: "open";
473
+ readonly HALF_OPEN: "half_open";
474
+ };
475
+ type CircuitBreakerState = (typeof CircuitBreakerState)[keyof typeof CircuitBreakerState];
476
+ /** Tunables for a single per-tool breaker. */
477
+ interface CircuitBreakerOptions {
478
+ /** Consecutive failures that flip CLOSED → OPEN. ``0`` disables. */
479
+ failureThreshold?: number;
480
+ /** Time (ms) the breaker stays OPEN before allowing a probe. */
481
+ cooldownMs?: number;
482
+ }
483
+ interface PerToolState {
484
+ state: CircuitBreakerState;
485
+ consecutiveFailures: number;
486
+ openedAt: number;
487
+ }
488
+ /** Per-name registry tracking circuit state for a fleet of tools. */
489
+ declare class CircuitBreakerRegistry {
490
+ private readonly threshold;
491
+ private readonly cooldownMs;
492
+ private readonly state;
493
+ /** Inject for deterministic tests; defaults to ``Date.now()``. */
494
+ private readonly clock;
495
+ constructor(opts?: CircuitBreakerOptions, clock?: () => number);
496
+ /** Returns ``true`` when this tool is currently allowed to run. */
497
+ allow(toolName: string): boolean;
498
+ /** Mark a successful execution. Resets the breaker to CLOSED. */
499
+ recordSuccess(toolName: string): void;
500
+ /** Mark a failed execution; trips OPEN once threshold is reached. */
501
+ recordFailure(toolName: string): void;
502
+ /**
503
+ * Time until the breaker transitions OPEN → HALF_OPEN, in ms. Returns
504
+ * ``0`` when the breaker is currently allowing calls. Useful for
505
+ * tests and the structured rejection JSON.
506
+ */
507
+ timeUntilHalfOpen(toolName: string): number;
508
+ /** Snapshot for debugging / metrics. */
509
+ snapshot(toolName: string): PerToolState | null;
510
+ }
511
+
373
512
  /**
374
513
  * Built-in LLM loop for pipeline mode when no onMessage handler is provided.
375
514
  *
@@ -387,7 +526,7 @@ interface LlmUsageRecorder {
387
526
  }
388
527
  /**
389
528
  * Pluggable tool executor — mirrors the Python ``ToolExecutor`` in
390
- * ``sdk-py/getpatter/services/tool_executor.py``.
529
+ * ``libraries/python/getpatter/services/tool_executor.py``.
391
530
  *
392
531
  * Implementors receive a fully-resolved ``ToolDefinition`` (handler +/ webhook
393
532
  * URL already validated by the SDK) and MUST return a JSON-stringifiable
@@ -395,28 +534,50 @@ interface LlmUsageRecorder {
395
534
  * ``{ error: "...", fallback: true }`` rather than thrown.
396
535
  */
397
536
  interface ToolExecutor {
398
- execute(toolDef: ToolDefinition, args: Record<string, unknown>, callContext: Record<string, unknown>): Promise<string>;
537
+ execute(toolDef: ToolDefinition, args: Record<string, unknown>, callContext: Record<string, unknown>, onProgress?: (text: string) => void | Promise<void>): Promise<string>;
399
538
  }
539
+ /** Constructor options for `DefaultToolExecutor`. */
400
540
  interface DefaultToolExecutorOptions {
401
541
  /** Total attempts = maxRetries + 1. Default: 2 (i.e. 3 attempts). */
402
542
  maxRetries?: number;
403
- /** Delay between attempts, in ms. */
543
+ /** Delay between attempts, in ms. Each retry waits this × ``2^attempt``. */
404
544
  retryDelayMs?: number;
405
545
  /** Per-request timeout for webhook calls, in ms. */
406
546
  requestTimeoutMs?: number;
547
+ /**
548
+ * Circuit-breaker tunables. Default trips OPEN after 5 consecutive
549
+ * failures and stays OPEN for 30 s. Pass ``{ failureThreshold: 0 }`` to
550
+ * disable entirely (legacy behaviour).
551
+ */
552
+ circuitBreaker?: CircuitBreakerOptions;
407
553
  }
408
554
  /**
409
- * Default executor — webhook with retry/fallback and local handler preference.
555
+ * Default executor — webhook + handler with retry/exponential-backoff
556
+ * and a per-tool circuit breaker.
410
557
  *
411
- * This is the out-of-the-box behavior and is 1:1 equivalent to the previous
412
- * inline logic in ``LLMLoop.executeTool``.
558
+ * Failure modes return a structured ``{ error, fallback: true }`` JSON
559
+ * so the model can recover gracefully (e.g. respond "I couldn't reach
560
+ * the booking system, can I take your number to call you back?")
561
+ * instead of hanging on an exception that never surfaces.
413
562
  */
414
563
  declare class DefaultToolExecutor implements ToolExecutor {
415
564
  private readonly maxRetries;
416
565
  private readonly retryDelayMs;
417
566
  private readonly requestTimeoutMs;
567
+ private readonly breaker;
418
568
  constructor(opts?: DefaultToolExecutorOptions);
419
- execute(toolDef: ToolDefinition, args: Record<string, unknown>, callContext: Record<string, unknown>): Promise<string>;
569
+ /** Expose the breaker for tests + dashboard observability. */
570
+ get circuitBreaker(): CircuitBreakerRegistry;
571
+ execute(toolDef: ToolDefinition, args: Record<string, unknown>, callContext: Record<string, unknown>,
572
+ /**
573
+ * Optional progress sink — invoked with each ``{ progress: string }``
574
+ * value yielded by an async-generator handler. Wired by the stream
575
+ * handler to ``OpenAIRealtimeAdapter.sendText`` so the agent speaks
576
+ * the progress message inline. ``null``/``undefined`` discards
577
+ * progress (function handlers always discard since they have no
578
+ * progress channel).
579
+ */
580
+ onProgress?: (text: string) => void | Promise<void>): Promise<string>;
420
581
  }
421
582
  /** A single streaming chunk yielded by an LLM provider. */
422
583
  interface LLMChunk {
@@ -440,8 +601,21 @@ interface LLMChunk {
440
601
  * invocation. Chunks with the same ``index`` are concatenated.
441
602
  * - ``{ type: "done" }`` — signals the end of the stream (optional).
442
603
  */
604
+ /**
605
+ * Optional knobs passed by the LLM loop into ``provider.stream``. Today the
606
+ * only field is ``signal``: a per-turn AbortSignal that the stream handler
607
+ * trips on barge-in so the underlying ``fetch`` / SDK call is cancelled
608
+ * IMMEDIATELY instead of waiting for the next token. Without this, a
609
+ * barge-in fired while the upstream LLM is still composing its first
610
+ * sentence leaves the fetch open until the provider's own timeout (often
611
+ * 30 s) elapses, blocking the next user transcript and producing the
612
+ * "agent stays silent after interruption" symptom.
613
+ */
614
+ interface LLMStreamOptions {
615
+ signal?: AbortSignal;
616
+ }
443
617
  interface LLMProvider {
444
- stream(messages: Array<Record<string, unknown>>, tools?: Array<Record<string, unknown>> | null): AsyncGenerator<LLMChunk, void, unknown>;
618
+ stream(messages: Array<Record<string, unknown>>, tools?: Array<Record<string, unknown>> | null, opts?: LLMStreamOptions): AsyncGenerator<LLMChunk, void, unknown>;
445
619
  }
446
620
  /** Optional sampling kwargs forwarded into the OpenAI Chat Completions body. */
447
621
  interface OpenAILLMSamplingOptions {
@@ -481,8 +655,10 @@ declare class OpenAILLMProvider implements LLMProvider {
481
655
  private readonly presencePenalty?;
482
656
  private readonly stop?;
483
657
  constructor(apiKey: string, model: string, sampling?: OpenAILLMSamplingOptions);
484
- stream(messages: Array<Record<string, unknown>>, tools?: Array<Record<string, unknown>> | null): AsyncGenerator<LLMChunk, void, unknown>;
658
+ /** Stream OpenAI Chat Completions chunks for the given messages/tools. */
659
+ stream(messages: Array<Record<string, unknown>>, tools?: Array<Record<string, unknown>> | null, opts?: LLMStreamOptions): AsyncGenerator<LLMChunk, void, unknown>;
485
660
  }
661
+ /** Pipeline-mode LLM driver: runs the chat loop, dispatches tool calls, and emits text deltas. */
486
662
  declare class LLMLoop {
487
663
  private readonly provider;
488
664
  private readonly systemPrompt;
@@ -493,7 +669,8 @@ declare class LLMLoop {
493
669
  private eventBus?;
494
670
  private readonly _providerName;
495
671
  private readonly _modelName;
496
- constructor(apiKey: string, model: string, systemPrompt: string, tools?: ToolDefinition[] | null, llmProvider?: LLMProvider);
672
+ private onToolCall?;
673
+ constructor(apiKey: string, model: string, systemPrompt: string, tools?: ToolDefinition[] | null, llmProvider?: LLMProvider, disablePhonePreamble?: boolean);
497
674
  /**
498
675
  * Swap in a custom tool executor (e.g. different retry policy, metrics
499
676
  * wrapping, tenant-aware fan-out). The default is ``DefaultToolExecutor``.
@@ -505,6 +682,14 @@ declare class LLMLoop {
505
682
  * appears. Set to ``undefined`` to disable.
506
683
  */
507
684
  setEventBus(bus: EventBus | undefined): void;
685
+ /**
686
+ * Set or replace the post-tool-execution observer. The callback is
687
+ * awaited after every successful tool execution with
688
+ * `(name, args, result)`. Pass `undefined` to disable. Mirrors the
689
+ * Python `LLMLoop.set_on_tool_call` setter so callers (e.g. the
690
+ * pipeline `StreamHandler`) can wire the loop after construction.
691
+ */
692
+ setOnToolCall(callback: ((name: string, args: Record<string, unknown>, result: string) => Promise<void>) | undefined): void;
508
693
  /**
509
694
  * Stream LLM response tokens, handling tool calls automatically.
510
695
  * Yields text tokens as they arrive from the LLM.
@@ -516,16 +701,23 @@ declare class LLMLoop {
516
701
  run(userText: string, history: Array<{
517
702
  role: string;
518
703
  text: string;
519
- }>, callContext: Record<string, unknown>, metrics?: LlmUsageRecorder, hookExecutor?: PipelineHookExecutor, hookCtx?: HookContext): AsyncGenerator<string, void, unknown>;
704
+ }>, callContext: Record<string, unknown>, metrics?: LlmUsageRecorder, hookExecutor?: PipelineHookExecutor, hookCtx?: HookContext, opts?: LLMStreamOptions): AsyncGenerator<string, void, unknown>;
520
705
  private executeTool;
521
706
  private buildMessages;
522
707
  }
523
708
 
709
+ /**
710
+ * Public type definitions for the Patter SDK — agent options, pipeline hooks,
711
+ * provider config envelopes, and serve/call request/response shapes.
712
+ */
713
+
714
+ /** Inbound message handed to a `MessageHandler` per turn (legacy single-turn API). */
524
715
  interface IncomingMessage {
525
716
  readonly text: string;
526
717
  readonly callId: string;
527
718
  readonly caller: string;
528
719
  }
720
+ /** STT provider configuration envelope (provider name + key + language + provider-specific options). */
529
721
  interface STTConfig {
530
722
  readonly provider: string;
531
723
  readonly apiKey: string;
@@ -539,6 +731,7 @@ interface STTConfig {
539
731
  /** Provider-specific knobs (e.g. Deepgram endpointing). */
540
732
  options?: Record<string, unknown>;
541
733
  }
734
+ /** TTS provider configuration envelope (provider name + key + voice + provider-specific options). */
542
735
  interface TTSConfig {
543
736
  readonly provider: string;
544
737
  readonly apiKey: string;
@@ -550,17 +743,94 @@ interface TTSConfig {
550
743
  toDict(): Record<string, string | Record<string, unknown>>;
551
744
  options?: Record<string, unknown>;
552
745
  }
746
+ /** Single-turn message handler — receives the user's transcript, returns the agent's reply. */
553
747
  type MessageHandler = (msg: IncomingMessage) => Promise<string>;
748
+ /** Generic call-lifecycle callback (start/end/transcript/metrics). */
554
749
  type CallEventHandler = (data: Record<string, unknown>) => Promise<void>;
750
+ /**
751
+ * Public MCP server configuration. ``string`` is shorthand for
752
+ * ``{ url: <string>, transport: 'streamable-http' }``. Re-exported from
753
+ * ``tools/mcp-client`` to keep a single source of truth.
754
+ */
755
+ type MCPServerConfig = string | {
756
+ readonly url: string;
757
+ readonly transport?: 'streamable-http';
758
+ /** Headers attached to every transport request — typically auth. */
759
+ readonly headers?: Record<string, string>;
760
+ /** Optional logical name for telemetry / log lines. */
761
+ readonly name?: string;
762
+ };
763
+ /** Internal shape of a tool definition (matches `Tool` from `public-api.ts`). */
555
764
  interface ToolDefinition {
556
765
  name: string;
557
766
  description: string;
558
767
  parameters: Record<string, unknown>;
559
768
  /** Webhook URL — called when the LLM invokes this tool. Mutually exclusive with handler. */
560
769
  webhookUrl?: string;
561
- /** Local handler function — when provided, called instead of webhookUrl. */
562
- handler?: (args: Record<string, unknown>, context: Record<string, unknown>) => Promise<string>;
770
+ /**
771
+ * Local handler called instead of ``webhookUrl`` when present.
772
+ *
773
+ * Two forms:
774
+ *
775
+ * - **Async function**: returns the final result as a JSON string.
776
+ * The model receives only the final return value.
777
+ *
778
+ * - **Async generator**: yields zero or more progress updates before
779
+ * returning. Each ``yield`` of ``{ progress: string }`` is spoken
780
+ * inline by the agent (Realtime: via ``adapter.sendText``) so the
781
+ * caller hears live status during long-running tools. The final
782
+ * ``return`` value (or last ``yield`` if no return) is the
783
+ * function-call result sent to the model. Pipeline mode currently
784
+ * ignores the progress yields — the final value is still used as
785
+ * the tool result.
786
+ */
787
+ handler?: ((args: Record<string, unknown>, context: Record<string, unknown>) => Promise<string>) | ((args: Record<string, unknown>, context: Record<string, unknown>) => AsyncGenerator<{
788
+ progress?: string;
789
+ result?: string;
790
+ }, string | void, unknown>);
791
+ /**
792
+ * "Reassurance" filler the agent speaks while a slow tool call runs.
793
+ * Bridges the silence when a handler or webhook takes longer than
794
+ * humans naturally tolerate (~1.5 s) without sounding dead.
795
+ *
796
+ * Two forms:
797
+ * - string: shorthand for ``{ message: <string>, afterMs: 1500 }``.
798
+ * - object: explicit ``{ message, afterMs? }``. ``afterMs`` is the
799
+ * grace window before the reassurance fires; if the tool returns
800
+ * earlier, no message is spoken.
801
+ *
802
+ * Currently honoured only in **Realtime mode** — the SDK enqueues the
803
+ * message via ``OpenAIRealtimeAdapter.sendText`` so the model
804
+ * synthesises it inline. Pipeline mode has no clean injection point
805
+ * mid-turn yet; the option is silently ignored there. Off by default.
806
+ */
807
+ reassurance?: string | {
808
+ message: string;
809
+ afterMs?: number;
810
+ };
811
+ /**
812
+ * Enable OpenAI strict mode for this tool's function schema. When ``true``
813
+ * the model is constrained to emit arguments that exactly match the
814
+ * declared schema — no missing required fields, no extra properties, no
815
+ * type coercion. Defaults to ``false`` for backward compatibility.
816
+ *
817
+ * Strict mode requires the schema to satisfy OpenAI's structural rules:
818
+ * - root must be ``type: "object"``
819
+ * - every nested object must have ``additionalProperties: false``
820
+ * - every property listed in ``properties`` must also be in ``required``
821
+ *
822
+ * Patter validates these requirements at ``agent()`` build time when
823
+ * ``strict: true`` is set; an invalid schema raises immediately rather
824
+ * than failing silently mid-call. Use ``null`` in a union (``["string",
825
+ * "null"]``) to express "optional" — strict mode does not allow truly
826
+ * optional fields.
827
+ *
828
+ * Recommended for any tool whose handler/webhook can't safely tolerate
829
+ * malformed arguments (DB writes, payment, transfers).
830
+ */
831
+ strict?: boolean;
563
832
  }
833
+ /** Constructor options for `new Patter({...})` in local-server mode. */
564
834
  interface LocalOptions {
565
835
  /**
566
836
  * Telephony carrier instance. Required.
@@ -579,6 +849,34 @@ interface LocalOptions {
579
849
  tunnel?: CloudflareTunnel | Static | boolean;
580
850
  phoneNumber: string;
581
851
  webhookUrl?: string;
852
+ /**
853
+ * On-disk persistence for the dashboard's call history. The dashboard
854
+ * itself is in-memory, but enabling ``persist`` writes per-call records
855
+ * (metadata.json, transcript.jsonl, events.jsonl) to disk and rebuilds
856
+ * the in-memory cache on startup so the dashboard survives process
857
+ * restarts without an external database.
858
+ *
859
+ * Accepted values:
860
+ * - omitted / ``false`` (default): no disk writes; the dashboard resets
861
+ * on every restart. Backward-compatible with prior behaviour.
862
+ * - ``true``: write under the platform default location
863
+ * (``~/Library/Application Support/patter`` on macOS,
864
+ * ``%LOCALAPPDATA%\\patter`` on Windows,
865
+ * ``$XDG_DATA_HOME/patter`` on Linux). Equivalent to setting
866
+ * ``PATTER_LOG_DIR=auto``.
867
+ * - string: write under the supplied absolute path. Equivalent to
868
+ * setting ``PATTER_LOG_DIR=<path>``.
869
+ *
870
+ * The ``PATTER_LOG_DIR`` env var still works as a deployment-time
871
+ * override and takes precedence over an unset ``persist``. When
872
+ * ``persist`` is set explicitly the env var is ignored.
873
+ *
874
+ * Retention: defaults to 30 days, controlled by
875
+ * ``PATTER_LOG_RETENTION_DAYS`` (set to ``0`` to keep forever).
876
+ * Phone numbers are masked by default; control via
877
+ * ``PATTER_LOG_REDACT_PHONE``.
878
+ */
879
+ persist?: boolean | string;
582
880
  /**
583
881
  * @internal — allows ``StreamHandler`` to build the default OpenAI
584
882
  * ``LLMLoop`` when no ``onMessage`` handler is supplied. The
@@ -587,6 +885,7 @@ interface LocalOptions {
587
885
  */
588
886
  openaiKey?: string;
589
887
  }
888
+ /** Internal shape of a guardrail (matches `Guardrail` class from `public-api.ts`). */
590
889
  interface Guardrail {
591
890
  /** Name for logging when triggered */
592
891
  name: string;
@@ -597,6 +896,7 @@ interface Guardrail {
597
896
  /** Replacement text spoken when guardrail triggers */
598
897
  replacement?: string;
599
898
  }
899
+ /** Per-call context passed to every pipeline hook. */
600
900
  interface HookContext {
601
901
  readonly callId: string;
602
902
  readonly caller: string;
@@ -606,6 +906,32 @@ interface HookContext {
606
906
  text: string;
607
907
  }>;
608
908
  }
909
+ /**
910
+ * Streaming-friendly post-LLM transform hook. Three tiers, all optional:
911
+ *
912
+ * - **`onChunk`** — per-token pure transform. Sync, must be fast (~0 ms
913
+ * budget). Use for: regex replace, markdown strip, profanity char-swap.
914
+ * - **`onSentence`** — per-sentence rewrite. Runs between the sentence
915
+ * chunker and TTS. Returns rewritten text or `null` to keep original;
916
+ * ``""`` (empty string) drops the sentence silently. Latency budget
917
+ * ~50–300 ms. Use for: PII redaction, persona overlay, refusal swap.
918
+ * - **`onResponse`** — per-full-response rewrite. **Blocks streaming TTS**
919
+ * until the LLM stream completes, then runs once on the full text.
920
+ * Latency cost: 500 ms – 2 s. Use only when sentence-level rewrite is
921
+ * insufficient (e.g. structured output validation). Avoid in latency-
922
+ * sensitive paths.
923
+ *
924
+ * The legacy single-callable signature `(text, ctx) => string` is still
925
+ * accepted; it maps to `onResponse` and emits a deprecation warning.
926
+ */
927
+ interface AfterLLMHook {
928
+ onChunk?: (chunk: string) => string;
929
+ onSentence?: (sentence: string, ctx: HookContext) => string | null | Promise<string | null>;
930
+ onResponse?: (text: string, ctx: HookContext) => string | null | Promise<string | null>;
931
+ }
932
+ /** Legacy single-callable form of after_llm. Maps to `onResponse`. @deprecated Pass `{ onResponse }` instead. */
933
+ type AfterLLMLegacy = (text: string, ctx: HookContext) => string | null | Promise<string | null>;
934
+ /** Optional callbacks fired at each stage of the STT→LLM→TTS pipeline. */
609
935
  interface PipelineHooks {
610
936
  /** Called with the raw PCM audio chunk before it is forwarded to the STT provider.
611
937
  * Return null to drop the chunk (e.g., for custom VAD gating). */
@@ -616,10 +942,16 @@ interface PipelineHooks {
616
942
  * Return null to keep them, or return a new list to replace
617
943
  * (useful for prompt injection, message filtering, RAG augmentation). */
618
944
  beforeLlm?: (messages: Array<Record<string, unknown>>, ctx: HookContext) => Array<Record<string, unknown>> | null | Promise<Array<Record<string, unknown>> | null>;
619
- /** Called with the final assistant text after the LLM stream completes.
620
- * Return null to keep, or return a new string to replace
621
- * (useful for output validation, redaction, post-processing). */
622
- afterLlm?: (text: string, ctx: HookContext) => string | null | Promise<string | null>;
945
+ /**
946
+ * Post-LLM transform. Pass either:
947
+ * - the new **3-tier object** (`{ onChunk, onSentence, onResponse }`) for
948
+ * streaming-friendly per-chunk / per-sentence / per-response transforms;
949
+ * - or the **legacy callable** `(text, ctx) => string` (deprecated) which
950
+ * maps to `onResponse` semantics and blocks streaming TTS.
951
+ *
952
+ * See `AfterLLMHook` for the full tier contract.
953
+ */
954
+ afterLlm?: AfterLLMHook | AfterLLMLegacy;
623
955
  /** Called before TTS, per-sentence in streaming mode. Return null to skip TTS for this sentence. */
624
956
  beforeSynthesize?: (text: string, ctx: HookContext) => string | null | Promise<string | null>;
625
957
  /** Called after TTS produces an audio chunk. Return null to discard this chunk. */
@@ -647,11 +979,27 @@ interface BackgroundAudioPlayer$1 {
647
979
  mix(agentPcm: Buffer, sampleRate: number): Promise<Buffer>;
648
980
  stop(): Promise<void>;
649
981
  }
982
+ /**
983
+ * Configuration for a local-mode voice AI agent.
984
+ *
985
+ * Several fields (``voice``, ``model``, ``language``) are also carried by
986
+ * engine markers (``OpenAIRealtime``, ``ElevenLabsConvAI``) and by the
987
+ * server-instantiated adapters. When the same setting is set in two places,
988
+ * precedence is:
989
+ *
990
+ * 1. **Explicit field on** ``phone.agent({ voice, model, language })`` always wins.
991
+ * 2. Otherwise, when an ``engine`` is passed, the engine's value is used
992
+ * (see ``Patter.agent()`` for the resolution).
993
+ * 3. Otherwise, the AgentOptions default is used.
994
+ */
995
+ /** Configuration for a local-mode voice AI agent (passed to `phone.agent({...})`). */
650
996
  interface AgentOptions {
651
997
  systemPrompt: string;
652
998
  /**
653
999
  * Voice preset. When ``engine`` is provided, its ``voice`` is used unless
654
- * explicitly overridden here.
1000
+ * explicitly overridden here. Format depends on the engine:
1001
+ * OpenAI Realtime accepts a name (``'alloy'``, ``'echo'``, ...);
1002
+ * ElevenLabs ConvAI accepts a voice ID.
655
1003
  */
656
1004
  voice?: string;
657
1005
  /**
@@ -659,10 +1007,56 @@ interface AgentOptions {
659
1007
  * unless explicitly overridden here.
660
1008
  */
661
1009
  model?: string;
1010
+ /**
1011
+ * BCP-47 language code (e.g. ``'en'``, ``'it'``). Forwarded to STT (in
1012
+ * pipeline mode) and to the engine adapter at call time. STTConfig has its
1013
+ * own ``language`` field for the rare case where STT must use a different
1014
+ * language than the rest of the pipeline.
1015
+ */
662
1016
  language?: string;
663
1017
  firstMessage?: string;
664
1018
  /** Tool definitions — ``Tool`` class instances from ``getpatter``. */
665
1019
  tools?: Array<Tool>;
1020
+ /**
1021
+ * Model Context Protocol (MCP) servers to plug into this agent. Each
1022
+ * server is queried at call start via ``tools/list`` and its tools
1023
+ * are merged into ``tools`` with synthetic handlers that dispatch
1024
+ * back through the MCP client. Lets you connect to existing MCP
1025
+ * servers (Google Workspace, PayPal, GitHub, Postgres, …) without
1026
+ * writing a wrapper handler.
1027
+ *
1028
+ * Each entry is either a URL string (shorthand for
1029
+ * ``{ url, transport: 'streamable-http' }``) or an explicit object
1030
+ * with optional ``headers`` for auth and a ``name`` for telemetry.
1031
+ *
1032
+ * Requires the optional dependency ``@modelcontextprotocol/sdk``.
1033
+ * When unset, MCP is fully disabled and the SDK ships without the
1034
+ * dependency installed.
1035
+ *
1036
+ * Cost: one HTTP handshake + ``tools/list`` round-trip per server at
1037
+ * call start (~50-200 ms × N servers). Future iterations may cache
1038
+ * the discovered list process-wide.
1039
+ */
1040
+ mcpServers?: ReadonlyArray<MCPServerConfig>;
1041
+ /**
1042
+ * When ``true``, ship ``systemPrompt`` to the LLM verbatim. Default
1043
+ * (``false``) prepends a phone-friendly preamble that instructs the
1044
+ * model to avoid markdown, emojis, bullet lists, and verbose replies —
1045
+ * the conventions live phone calls require.
1046
+ */
1047
+ disablePhonePreamble?: boolean;
1048
+ /**
1049
+ * Acoustic echo cancellation. When `true` (pipeline mode only) the SDK
1050
+ * instantiates an `NlmsEchoCanceller` that subtracts the agent's own
1051
+ * TTS bleed from the inbound mic stream before VAD/STT see it.
1052
+ * Strongly recommended for speakerphone / tunnel deployments where the
1053
+ * bleed otherwise keeps VAD permanently in "speaking" state and
1054
+ * barge-in only fires during natural TTS pauses. Off by default —
1055
+ * handset / headset deployments don't have the bleed, and the 0.5–2 s
1056
+ * convergence period would briefly attenuate caller speech if they
1057
+ * spoke before any TTS played.
1058
+ */
1059
+ echoCancellation?: boolean;
666
1060
  /**
667
1061
  * Realtime / ConvAI engine instance. When present, the agent runs in the
668
1062
  * matching mode (``openai_realtime`` or ``elevenlabs_convai``). When absent,
@@ -709,8 +1103,22 @@ interface AgentOptions {
709
1103
  * Default: 300.
710
1104
  */
711
1105
  bargeInThresholdMs?: number;
1106
+ /**
1107
+ * When true, the sentence chunker emits the first clause of each response
1108
+ * on a soft punctuation boundary (",", em-dash, en-dash) once ~40 chars
1109
+ * have accumulated. Saves 200–500 ms TTFA on the first sentence of each
1110
+ * turn at the cost of slightly clipping prosody on the very first chunk.
1111
+ * Hard-disabled when ``language`` starts with ``"it"`` (Italian decimal
1112
+ * comma would split mid-number). Default: false.
1113
+ *
1114
+ * See SentenceChunker constructor for the full guard list (decimal,
1115
+ * currency, balanced delimiter, ellipsis).
1116
+ */
1117
+ aggressiveFirstFlush?: boolean;
712
1118
  }
1119
+ /** Pipeline-mode message handler — given full turn context, returns the agent's reply. */
713
1120
  type PipelineMessageHandler = (data: Record<string, unknown>) => Promise<string>;
1121
+ /** Options for `Patter.serve({...})`. */
714
1122
  interface ServeOptions {
715
1123
  agent: AgentOptions;
716
1124
  port?: number;
@@ -738,12 +1146,73 @@ interface ServeOptions {
738
1146
  dashboardDb?: string;
739
1147
  /** When true (default), persist dashboard data. */
740
1148
  dashboardPersist?: boolean;
1149
+ /**
1150
+ * When true (default), `serve()` calls the carrier's API on startup to
1151
+ * point the configured phone number's webhook URL at this server. Set
1152
+ * to `false` when the webhook is managed externally (Terraform, an edge
1153
+ * gateway / voice-router, or any infra-as-code system) — otherwise every
1154
+ * boot will silently overwrite the externally-managed value.
1155
+ *
1156
+ * Required `false` when:
1157
+ * - Twilio's voice_url should point at a router/gateway in front of
1158
+ * this server rather than directly at it.
1159
+ * - Multiple replicas share the same Twilio number; only one should
1160
+ * write the webhook.
1161
+ * - Compliance forbids the runtime from holding write credentials
1162
+ * against the carrier console.
1163
+ *
1164
+ * Ignored (treated as true) when `tunnel: true`, because the tunnel
1165
+ * hostname is dynamic and only known at runtime — the carrier MUST be
1166
+ * reconfigured for inbound calls to land.
1167
+ */
1168
+ manageWebhook?: boolean;
1169
+ }
1170
+ /**
1171
+ * Normalised AMD (answering-machine detection) result emitted to
1172
+ * ``LocalCallOptions.onMachineDetection`` once the carrier reports back.
1173
+ * The ``raw`` field preserves the provider value verbatim so callers can
1174
+ * apply provider-specific logic; ``classification`` is the SDK's
1175
+ * carrier-agnostic projection that test/acceptance code should check.
1176
+ */
1177
+ interface MachineDetectionResult {
1178
+ readonly call_id: string;
1179
+ readonly carrier: 'twilio' | 'telnyx';
1180
+ /** Carrier-agnostic projection. Use this in app code unless you really need the raw provider value. */
1181
+ readonly classification: 'human' | 'machine' | 'fax' | 'unknown';
1182
+ /**
1183
+ * Raw provider value:
1184
+ * - Twilio: ``human``, ``machine_start``, ``machine_end_beep``,
1185
+ * ``machine_end_silence``, ``machine_end_other``, ``fax``, ``unknown``.
1186
+ * - Telnyx: ``human``, ``machine``, ``not_sure``.
1187
+ */
1188
+ readonly raw: string;
1189
+ /** Unix epoch seconds at which the result was received from the carrier. */
1190
+ readonly detected_at: number;
741
1191
  }
1192
+ /** Options for `Patter.call({...})` to place an outbound call. */
742
1193
  interface LocalCallOptions {
743
1194
  to: string;
744
1195
  agent: AgentOptions;
1196
+ /**
1197
+ * Enable answering-machine detection. **Defaults to ``true``** — the SDK
1198
+ * asks Twilio (``MachineDetection=DetectMessageEnd`` + Async AMD) or
1199
+ * Telnyx (``answering_machine_detection=greeting_end``) to classify
1200
+ * whoever picks up. Async AMD on Twilio adds ~0 answer-latency on human
1201
+ * pickups (the call connects immediately and the result arrives via
1202
+ * webhook 2-5 s later), so ON-by-default is safe. Pass ``false`` to
1203
+ * disable when you want to skip per-call AMD billing or you already
1204
+ * know the destination is a human.
1205
+ */
745
1206
  machineDetection?: boolean;
746
- /** If set, spoken as a voicemail message when AMD detects a machine. Requires machineDetection=true. */
1207
+ /**
1208
+ * Called once when the carrier finishes the AMD check. Fires for both
1209
+ * ``human`` and ``machine`` outcomes. Combine with ``voicemailMessage``
1210
+ * to get both the legacy voicemail-drop AND a result callback (the SDK
1211
+ * fires the callback after the drop is queued). Acceptance tests use
1212
+ * this to mark a run INVALID when ``classification !== 'human'``.
1213
+ */
1214
+ onMachineDetection?: (result: MachineDetectionResult) => void | Promise<void>;
1215
+ /** If set, spoken as a voicemail message when AMD detects a machine. Implicitly enables ``machineDetection``. */
747
1216
  voicemailMessage?: string;
748
1217
  /** Dynamic variables merged into agent.variables before call. Override agent-level variables. */
749
1218
  variables?: Record<string, string>;
@@ -770,6 +1239,7 @@ interface LocalCallOptions {
770
1239
  * the JSONL/JSON files, the store is just a cache on top).
771
1240
  */
772
1241
 
1242
+ /** Snapshot of a call as held by the dashboard store. */
773
1243
  interface CallRecord {
774
1244
  call_id: string;
775
1245
  caller: string;
@@ -792,10 +1262,12 @@ interface CallRecord {
792
1262
  metrics?: Record<string, unknown> | null;
793
1263
  [key: string]: unknown;
794
1264
  }
1265
+ /** Server-Sent-Event payload broadcast by `MetricsStore` for live UI updates. */
795
1266
  interface SSEEvent {
796
1267
  type: string;
797
1268
  data: Record<string, unknown>;
798
1269
  }
1270
+ /** In-memory bounded ring buffer of recent calls plus active-call tracking. */
799
1271
  declare class MetricsStore extends EventEmitter {
800
1272
  private readonly maxCalls;
801
1273
  private calls;
@@ -810,6 +1282,7 @@ declare class MetricsStore extends EventEmitter {
810
1282
  maxCalls?: number;
811
1283
  });
812
1284
  private publish;
1285
+ /** Mark a call as in-progress (creates the row if it does not yet exist). */
813
1286
  recordCallStart(data: Record<string, unknown>): void;
814
1287
  /**
815
1288
  * Pre-register an outbound call before any webhook fires. Lets the
@@ -823,15 +1296,23 @@ declare class MetricsStore extends EventEmitter {
823
1296
  * row from active to completed so the UI freezes the live duration timer.
824
1297
  */
825
1298
  updateCallStatus(callId: string, status: string, extra?: Record<string, unknown>): void;
1299
+ /** Append a single conversation turn to an active call and broadcast it via SSE. */
826
1300
  recordTurn(data: Record<string, unknown>): void;
1301
+ /** Move a call from active to completed and persist its final metrics. */
827
1302
  recordCallEnd(data: Record<string, unknown>, metrics?: Record<string, unknown> | null): void;
1303
+ /** Return a window of completed calls in newest-first order. */
828
1304
  getCalls(limit?: number, offset?: number): CallRecord[];
1305
+ /** Look up a completed call by id (newest match wins). */
829
1306
  getCall(callId: string): CallRecord | null;
830
1307
  /** Look up an active call by id (returns undefined if not active or unknown). */
831
1308
  getActive(callId: string): CallRecord | undefined;
1309
+ /** Return all currently active (not yet ended) calls. */
832
1310
  getActiveCalls(): CallRecord[];
1311
+ /** Compute summary statistics across the buffered call history. */
833
1312
  getAggregates(): Record<string, unknown>;
1313
+ /** Return calls whose `started_at` falls within `[fromTs, toTs]` (Unix seconds). */
834
1314
  getCallsInRange(fromTs?: number, toTs?: number): CallRecord[];
1315
+ /** Number of completed calls currently in the ring buffer. */
835
1316
  get callCount(): number;
836
1317
  /**
837
1318
  * Rebuild the in-memory call list from `metadata.json` files written by
@@ -846,10 +1327,185 @@ declare class MetricsStore extends EventEmitter {
846
1327
  hydrate(logRoot: string | null | undefined): number;
847
1328
  }
848
1329
 
1330
+ /** Async-or-sync callback. Sync return values are silently ignored. */
1331
+ type SpeechEventCallback = (payload: Readonly<Record<string, unknown>>) => void | Promise<void>;
1332
+ type UserState = "listening" | "speaking" | "thinking" | "away";
1333
+ type AgentState = "initializing" | "idle" | "listening" | "thinking" | "speaking";
1334
+ interface ConversationStateSnapshot {
1335
+ readonly user: UserState;
1336
+ readonly agent: AgentState;
1337
+ }
1338
+ type EouTrigger = "vad_silence" | "semantic_turn_detector" | "manual_commit";
1339
+ interface UserSpeechStartedOptions {
1340
+ readonly vadConfidence?: number;
1341
+ readonly audioOffsetMs?: number;
1342
+ readonly timestampMs?: number;
1343
+ }
1344
+ interface UserSpeechEndedOptions extends UserSpeechStartedOptions {
1345
+ readonly speechDurationMs: number;
1346
+ }
1347
+ interface UserSpeechEosOptions {
1348
+ readonly trigger: EouTrigger;
1349
+ readonly trailingSilenceMs?: number;
1350
+ readonly transcriptSoFar?: string;
1351
+ readonly timestampMs?: number;
1352
+ }
1353
+ interface AgentSpeechStartedOptions {
1354
+ readonly ttsProvider?: string;
1355
+ readonly engine?: string;
1356
+ readonly timestampMs?: number;
1357
+ }
1358
+ interface AgentSpeechEndedOptions {
1359
+ readonly speechDurationMs: number;
1360
+ readonly interrupted?: boolean;
1361
+ readonly timestampMs?: number;
1362
+ }
1363
+ interface LlmFirstTokenOptions {
1364
+ readonly llmProvider: string;
1365
+ readonly model: string;
1366
+ readonly timestampMs?: number;
1367
+ }
1368
+ interface AudioOutOptions {
1369
+ readonly ttsProvider: string;
1370
+ readonly timestampMs?: number;
1371
+ }
1372
+ /**
1373
+ * Per-call dispatcher for the seven turn-taking events. A single instance is
1374
+ * shared by every `Patter` instance and survives across calls — the per-turn
1375
+ * state (`turnIdx`, `firstTokenForTurn`, `firstAudioForTurn`) lives here too
1376
+ * so the runner sees a monotonically-increasing turn index across a session.
1377
+ *
1378
+ * Backwards compatibility: every callback defaults to `null`. Existing users
1379
+ * who never set a callback see exactly the previous behaviour and zero
1380
+ * overhead.
1381
+ */
1382
+ declare class SpeechEvents {
1383
+ onUserSpeechStarted: SpeechEventCallback | null;
1384
+ onUserSpeechEnded: SpeechEventCallback | null;
1385
+ onUserSpeechEos: SpeechEventCallback | null;
1386
+ onAgentSpeechStarted: SpeechEventCallback | null;
1387
+ onAgentSpeechEnded: SpeechEventCallback | null;
1388
+ onLlmToken: SpeechEventCallback | null;
1389
+ onAudioOut: SpeechEventCallback | null;
1390
+ private userState;
1391
+ private agentState;
1392
+ private turnIdxValue;
1393
+ private firstTokenForTurn;
1394
+ private firstAudioForTurn;
1395
+ private callStartMs;
1396
+ /** Snapshot of the current per-side state of the call. */
1397
+ get conversationState(): ConversationStateSnapshot;
1398
+ /** Current 0-based turn index. Increments on every EOU commit. */
1399
+ get turnIdx(): number;
1400
+ /** Record the call-start wall-clock for ``audioOffsetMs`` math. */
1401
+ markCallStarted(tsMs?: number): void;
1402
+ /** Reset per-turn cursors. Called automatically on EOU commit. */
1403
+ resetTurnState(): void;
1404
+ /** Fire on the VAD positive edge of the inbound stream.
1405
+ *
1406
+ * Do not coalesce: the runner consumes positive→negative→positive
1407
+ * transitions in order. For server-VAD engines (OpenAI Realtime, Telnyx
1408
+ * Voice AI), forward the upstream signal directly — do not re-run a VAD
1409
+ * layer on top.
1410
+ */
1411
+ fireUserSpeechStarted(opts?: UserSpeechStartedOptions): Promise<void>;
1412
+ /** Fire on the VAD trailing edge (raw — *not* EOU).
1413
+ *
1414
+ * `speechDurationMs` is the length of the segment that just ended; the
1415
+ * runner uses it to compute talk-ratio.
1416
+ */
1417
+ fireUserSpeechEnded(opts: UserSpeechEndedOptions): Promise<void>;
1418
+ /** Fire on the committed end-of-utterance.
1419
+ *
1420
+ * This is the canonical "user finished" signal — VAD edge + trailing
1421
+ * silence + (optionally) a semantic turn-detector model agreement. The
1422
+ * runner uses the timestamp of this event to compute
1423
+ * `eos_to_first_token_ms` (Hamming AI threshold: <800 ms good, >1500 ms
1424
+ * critical).
1425
+ */
1426
+ fireUserSpeechEos(opts: UserSpeechEosOptions): Promise<void>;
1427
+ /** Fire on the FIRST audio chunk of the current agent turn that crosses
1428
+ * to the wire (not the first chunk produced by TTS).
1429
+ *
1430
+ * The user hears the wire chunk, so this is the timestamp the runner
1431
+ * anchors barge-in latency on.
1432
+ */
1433
+ fireAgentSpeechStarted(opts?: AgentSpeechStartedOptions): Promise<void>;
1434
+ /** Fire on the LAST audio chunk of the current agent turn.
1435
+ *
1436
+ * `interrupted=true` marks the turn as cancelled by barge-in; the runner
1437
+ * treats it as the `agent_speech_stopped` half of a barge-in pair.
1438
+ */
1439
+ fireAgentSpeechEnded(opts: AgentSpeechEndedOptions): Promise<void>;
1440
+ /** Fire on the FIRST LLM token of the current turn (TTFT marker).
1441
+ *
1442
+ * Idempotent within a turn — guarded by `firstTokenForTurn`. Combined
1443
+ * with `on_user_speech_eos.timestamp_ms` the runner computes
1444
+ * `eos_to_first_token_ms`.
1445
+ */
1446
+ fireLlmFirstToken(opts: LlmFirstTokenOptions): Promise<void>;
1447
+ /** Fire on the FIRST TTS audio chunk for the current turn.
1448
+ *
1449
+ * Distinct from `fireAgentSpeechStarted`: this is the agent-side buffer
1450
+ * arrival (TTS warmup), not the wire-time chunk. Idempotent within a
1451
+ * turn — guarded by `firstAudioForTurn`.
1452
+ */
1453
+ fireAudioOut(opts: AudioOutOptions): Promise<void>;
1454
+ private resolveOffset;
1455
+ private dispatch;
1456
+ }
1457
+
1458
+ /** Top-level SDK entry point — wraps a carrier + embedded server + agent loop. */
849
1459
  declare class Patter {
850
1460
  private localConfig;
851
1461
  private embeddedServer;
852
1462
  private tunnelHandle;
1463
+ private _tunnelReadyResolve;
1464
+ private _tunnelReadyReject;
1465
+ private _tunnelReady;
1466
+ private _readyResolve;
1467
+ private _readyReject;
1468
+ private _ready;
1469
+ /**
1470
+ * True iff ``localConfig.webhookUrl`` was populated by ``serve()`` from a
1471
+ * freshly-started cloudflared tunnel (rather than by the constructor from
1472
+ * an explicit ``webhookUrl`` / ``StaticTunnel`` config). ``disconnect()``
1473
+ * uses this flag to clear ONLY the auto-assigned hostname so a subsequent
1474
+ * ``serve()`` call (e.g. from a plugin's ``ensureServing`` cycle that
1475
+ * disposes + restarts on agent-identity changes) does not throw
1476
+ * ``Cannot use both tunnel: true and webhookUrl``.
1477
+ */
1478
+ private tunnelOwnsWebhookUrl;
1479
+ /**
1480
+ * Speech-edge events for turn-taking instrumentation. Public surface: the
1481
+ * seven `on*` proxy accessors below plus the `conversationState` snapshot.
1482
+ * Defaults are no-ops — existing users who never set a callback see exactly
1483
+ * the previous behaviour.
1484
+ *
1485
+ * See `src/_speech-events.ts` for the full event taxonomy and the
1486
+ * industry-alignment table (LiveKit / Pipecat / OpenAI Realtime).
1487
+ */
1488
+ readonly speechEvents: SpeechEvents;
1489
+ get onUserSpeechStarted(): SpeechEventCallback | null;
1490
+ set onUserSpeechStarted(cb: SpeechEventCallback | null);
1491
+ get onUserSpeechEnded(): SpeechEventCallback | null;
1492
+ set onUserSpeechEnded(cb: SpeechEventCallback | null);
1493
+ get onUserSpeechEos(): SpeechEventCallback | null;
1494
+ set onUserSpeechEos(cb: SpeechEventCallback | null);
1495
+ get onAgentSpeechStarted(): SpeechEventCallback | null;
1496
+ set onAgentSpeechStarted(cb: SpeechEventCallback | null);
1497
+ get onAgentSpeechEnded(): SpeechEventCallback | null;
1498
+ set onAgentSpeechEnded(cb: SpeechEventCallback | null);
1499
+ get onLlmToken(): SpeechEventCallback | null;
1500
+ set onLlmToken(cb: SpeechEventCallback | null);
1501
+ get onAudioOut(): SpeechEventCallback | null;
1502
+ set onAudioOut(cb: SpeechEventCallback | null);
1503
+ /**
1504
+ * Snapshot of the current per-side state of the call.
1505
+ * Mirrors LiveKit's `user_state_changed` / `agent_state_changed`
1506
+ * payloads. Read-only and safe to call at any time.
1507
+ */
1508
+ get conversationState(): ConversationStateSnapshot;
853
1509
  /**
854
1510
  * Live `MetricsStore` for the embedded server. Returns `null` before
855
1511
  * `serve()` is called. Exposed so integrations like `PatterTool` can
@@ -857,12 +1513,73 @@ declare class Patter {
857
1513
  * `call_start`, `call_end`).
858
1514
  */
859
1515
  get metricsStore(): MetricsStore | null;
1516
+ /**
1517
+ * Resolves to the public webhook hostname as soon as it is known —
1518
+ * either statically configured or freshly minted by the tunnel.
1519
+ *
1520
+ * **Prefer `phone.ready` for outbound calls.** This promise resolves
1521
+ * before the embedded HTTP / WebSocket server is in `listen` state, so
1522
+ * a `phone.call` placed immediately afterwards can still race the
1523
+ * Twilio Media Streams upgrade and produce a "11100 Invalid URL
1524
+ * format" call drop on answer.
1525
+ *
1526
+ * Kept as a separate signal because some integrations (e.g. webhook
1527
+ * registration) only need the hostname, not the WS server.
1528
+ */
1529
+ get tunnelReady(): Promise<string>;
1530
+ /**
1531
+ * Resolves to the public webhook hostname once the SDK is fully ready
1532
+ * to handle carrier callbacks: tunnel resolved, carrier auto-config
1533
+ * complete, and the embedded HTTP / WS server in `listen` state.
1534
+ *
1535
+ * Use this for outbound calls instead of guessing `setTimeout` after
1536
+ * `void phone.serve(...)`:
1537
+ *
1538
+ * ```ts
1539
+ * void phone.serve({ agent, tunnel: true });
1540
+ * await phone.ready;
1541
+ * await phone.call({ to: '+15550001234', agent });
1542
+ * ```
1543
+ *
1544
+ * Rejects with the underlying exception if `serve()` fails before the
1545
+ * server is listening.
1546
+ */
1547
+ get ready(): Promise<string>;
860
1548
  constructor(options: LocalOptions);
1549
+ /** Resolve user-supplied agent options against engine defaults and return the merged config. */
861
1550
  agent(opts: AgentOptions): AgentOptions;
1551
+ /** Boot the embedded HTTP/WebSocket server, configure the carrier webhook, and resolve `ready`. */
862
1552
  serve(opts: ServeOptions): Promise<void>;
1553
+ private _serveImpl;
1554
+ /** Run the agent in interactive terminal-test mode (no real telephony). */
863
1555
  test(opts: ServeOptions): Promise<void>;
1556
+ /** Place an outbound call via the configured carrier. */
864
1557
  call(options: LocalCallOptions): Promise<void>;
1558
+ /**
1559
+ * Stop the embedded server and any running tunnel. Safe to call multiple
1560
+ * times. Leaves the instance reusable: a subsequent ``serve()`` works as
1561
+ * if the previous lifecycle never happened.
1562
+ */
865
1563
  disconnect(): Promise<void>;
1564
+ /**
1565
+ * Terminate an active call on the configured carrier.
1566
+ *
1567
+ * Posts a hangup to the carrier (Twilio
1568
+ * ``Calls(callSid).update({status:'completed'})`` or Telnyx
1569
+ * ``/v2/calls/{callControlId}/actions/hangup``) so the bridge tears down
1570
+ * gracefully — the SDK's WebSocket handler then fires ``onCallEnd`` with
1571
+ * the final ``CallMetrics`` before the WS closes.
1572
+ *
1573
+ * Use this when the host application needs to end a call programmatically
1574
+ * without going through the LLM tool-call path (e.g. an admin override,
1575
+ * a watchdog, or an integration test runner).
1576
+ *
1577
+ * @param callSid - Carrier-issued call identifier (Twilio Call SID or
1578
+ * Telnyx call_control_id) returned from a previous ``call(...)`` or
1579
+ * captured in the ``onCallStart`` callback's payload.
1580
+ * @throws Error when ``callSid`` is empty or no carrier is configured.
1581
+ */
1582
+ endCall(callSid: string): Promise<void>;
866
1583
  }
867
1584
 
868
1585
  /**
@@ -909,13 +1626,23 @@ interface DefineToolInput {
909
1626
  */
910
1627
  declare function defineTool(input: DefineToolInput): ToolDefinition;
911
1628
 
1629
+ /**
1630
+ * Process-wide logger used by the SDK.
1631
+ *
1632
+ * Provides the in-library logger abstraction (`getLogger`/`setLogger`) and
1633
+ * default console-based implementation. Library code MUST use these helpers
1634
+ * rather than calling `console.*` directly so applications can route logs.
1635
+ */
1636
+ /** Minimal logger interface implemented by the default console logger and any user-supplied replacement. */
912
1637
  interface Logger {
913
1638
  info(message: string, ...args: unknown[]): void;
914
1639
  warn(message: string, ...args: unknown[]): void;
915
1640
  error(message: string, ...args: unknown[]): void;
916
1641
  debug(message: string, ...args: unknown[]): void;
917
1642
  }
1643
+ /** Return the active logger (defaults to a console-backed implementation). */
918
1644
  declare function getLogger(): Logger;
1645
+ /** Replace the process-wide logger; useful for routing SDK logs into a host app's logger. */
919
1646
  declare function setLogger(logger: Logger): void;
920
1647
 
921
1648
  /**
@@ -925,9 +1652,6 @@ declare function setLogger(logger: Logger): void;
925
1652
  * Uses regex-based marker replacement for robust sentence boundary
926
1653
  * detection, handling abbreviations, acronyms, decimals, websites,
927
1654
  * ellipsis, and CJK punctuation.
928
- *
929
- * Algorithm adapted from LiveKit Agents (Apache 2.0):
930
- * https://github.com/livekit/agents
931
1655
  */
932
1656
  /** Default minimum sentence length before emitting. */
933
1657
  declare const DEFAULT_MIN_SENTENCE_LEN = 20;
@@ -951,9 +1675,29 @@ declare class SentenceChunker {
951
1675
  private buffer;
952
1676
  private readonly minSentenceLen;
953
1677
  private readonly minWordsForShortFlush;
1678
+ private readonly aggressiveFirstMinLen;
1679
+ private readonly aggressiveFirstFlush;
1680
+ private readonly language;
1681
+ private isFirstFlush;
954
1682
  constructor(options?: {
955
1683
  minSentenceLen?: number;
956
1684
  minWordsForShortFlush?: number;
1685
+ /**
1686
+ * When true, the chunker emits the first clause of each response on a
1687
+ * soft punctuation boundary (",", em-dash, en-dash) once
1688
+ * `aggressiveFirstMinLen` characters accumulate. Saves 200-500 ms TTFA
1689
+ * on the first sentence of each turn. Subsequent sentences fall through
1690
+ * to the standard sentence-boundary path. Default: false.
1691
+ */
1692
+ aggressiveFirstFlush?: boolean;
1693
+ aggressiveFirstMinLen?: number;
1694
+ /**
1695
+ * BCP-47-ish language tag. Italian uses comma as decimal separator
1696
+ * (3,14) and dot as thousands (1.000) — both invert the English
1697
+ * convention — so aggressive comma flush is hard-disabled when language
1698
+ * starts with "it" regardless of `aggressiveFirstFlush`. Default: "en".
1699
+ */
1700
+ language?: string;
957
1701
  });
958
1702
  /**
959
1703
  * Feed a token. Returns zero or more complete sentences.
@@ -964,10 +1708,11 @@ declare class SentenceChunker {
964
1708
  * sentence, all but the last (potentially incomplete) are emitted.
965
1709
  * - **Short-flush path** — when the buffer is shorter than `minSentenceLen`
966
1710
  * but ends with a sentence terminator AND has at least
967
- * `minWordsForShortFlush` whitespace-separated words, emit it
968
- * immediately. This drops TTS TTFB on short greetings like `"Hi there!"`
969
- * while keeping single-word utterances (`"Sì."`) buffered until
970
- * `flush()`.
1711
+ * `minWordsForShortFlush` whitespace-separated words (default 1 — a
1712
+ * single-word reply like `"Yes."` flushes immediately for low TTS
1713
+ * TTFB). Acronym ("U.S.") and decimal ("f(x) = 2.") guards still block
1714
+ * dangerous cases. Bump `minWordsForShortFlush` to 2+ to keep
1715
+ * single-word utterances buffered until `flush()`.
971
1716
  */
972
1717
  push(token: string): string[];
973
1718
  /**
@@ -975,18 +1720,41 @@ declare class SentenceChunker {
975
1720
  *
976
1721
  * A buffer qualifies when **all** of these hold:
977
1722
  * 1. Last non-whitespace char is a sentence terminator.
978
- * 2. Word count is at least `minWordsForShortFlush` (default 2 keeps
979
- * single-word "Sì." / "Yes." buffered until `flush()`).
1723
+ * 2. Word count is at least `minWordsForShortFlush` (default 1
1724
+ * single-word replies like `"Yes."` flush immediately).
980
1725
  * 3. The buffer contains exactly one terminator (the trailing one).
981
1726
  * Multiple terminators mean we may be mid-stream of a longer merged
982
1727
  * utterance like `"Hey! Hi! Hello! This is a sentence."` — let the
983
1728
  * standard path keep merging.
984
1729
  * 4. The char immediately before the terminator is NOT a digit (avoids
985
1730
  * decimal mid-stream like `"f(x) = x * 2."` flushing before `54`).
986
- * 5. The char immediately before the terminator is NOT an uppercase
987
- * ASCII letter (avoids acronym patterns like `"U.S."` / `"U."`).
1731
+ * 5. The trailing word is NOT a short ASCII all-caps acronym of 1-3 chars
1732
+ * (`"U."` / `"U.S."` / `"USA."`).
1733
+ * 6. The trailing word is NOT a known honorific from any of the
1734
+ * per-language `HONORIFICS_*` constants (`"Mr."`, `"Sr."`, `"Dr."`,
1735
+ * `"Hr."`, `"Mme."`, ...).
988
1736
  */
989
1737
  private maybeShortFlush;
1738
+ /**
1739
+ * Try to flush the first clause of the response on a soft punctuation
1740
+ * boundary (comma / em-dash / en-dash) to minimise TTFA.
1741
+ *
1742
+ * Returns the flushed clause text (with terminator) or `null` if no safe
1743
+ * boundary is found. All of these guards must pass:
1744
+ *
1745
+ * 1. **Min length** — buffer ≥ `aggressiveFirstMinLen` (default 40).
1746
+ * 2. **Trailing terminator** — last non-whitespace char in `SOFT_TERMINATORS`.
1747
+ * 3. **Decimal/thousands guard** — refuse if comma is between two digits
1748
+ * or surrounded by digit-thousands grouping.
1749
+ * 4. **Currency guard** — refuse if a currency symbol appears in the
1750
+ * preceding 8 characters.
1751
+ * 5. **Balanced delimiter** — refuse if open parens/brackets/braces or
1752
+ * unmatched double-quotes still pending.
1753
+ * 6. **Ellipsis** — refuse if buffer ends with `...` or `…`.
1754
+ * 7. **Sub-token ambiguity** — only fire when at least one trailing char
1755
+ * after the terminator has arrived.
1756
+ */
1757
+ private maybeAggressiveFirstFlush;
990
1758
  /** Flush remaining buffer as final sentence(s). Call at end of stream. */
991
1759
  flush(): string[];
992
1760
  /** Discard buffered text. Call on interrupt. */
@@ -1019,21 +1787,83 @@ declare function filterEmoji(text: string): string;
1019
1787
  */
1020
1788
  declare function filterForTTS(text: string): string;
1021
1789
 
1790
+ /**
1791
+ * Public error taxonomy for the Patter SDK.
1792
+ *
1793
+ * Every Patter exception carries a stable, machine-readable {@link ErrorCode}
1794
+ * on its `code` property. Downstream code can branch on the code without
1795
+ * relying on class name strings or message parsing.
1796
+ *
1797
+ * The class hierarchy is preserved for backward compatibility — existing
1798
+ * `instanceof PatterConnectionError` checks keep working — and the enum is
1799
+ * purely additive.
1800
+ *
1801
+ * Mirrored byte-for-byte by the Python `ErrorCode` StrEnum in
1802
+ * `libraries/python/getpatter/exceptions.py`.
1803
+ */
1804
+ /**
1805
+ * Stable, machine-readable error codes attached to every Patter exception.
1806
+ *
1807
+ * Values are short, `UPPER_SNAKE_CASE` strings. Existing values must never
1808
+ * change — downstream callers branch on them. New codes are additive.
1809
+ *
1810
+ * This is shipped as a `const` object plus value-union type rather than a
1811
+ * TS `enum` so it's tree-shakeable and compatible with `verbatimModuleSyntax`.
1812
+ */
1813
+ declare const ErrorCode: {
1814
+ /** Invalid constructor args, missing required env var, frozen-config violation. */
1815
+ readonly CONFIG: "CONFIG";
1816
+ /** WebSocket connect failure, HTTP 5xx from provider, network error. */
1817
+ readonly CONNECTION: "CONNECTION";
1818
+ /** Provider rejected our credentials (HTTP 401/403, invalid signature). */
1819
+ readonly AUTH: "AUTH";
1820
+ /** Provider response, voicemail post, or other awaited operation timed out. */
1821
+ readonly TIMEOUT: "TIMEOUT";
1822
+ /** Provider returned HTTP 429. */
1823
+ readonly RATE_LIMIT: "RATE_LIMIT";
1824
+ /** Twilio / Telnyx webhook signature verification failed. */
1825
+ readonly WEBHOOK_VERIFICATION: "WEBHOOK_VERIFICATION";
1826
+ /** Caller passed a malformed phone number, tool arg, etc. */
1827
+ readonly INPUT_VALIDATION: "INPUT_VALIDATION";
1828
+ /** Generic catch-all for unexpected upstream provider failures. */
1829
+ readonly PROVIDER_ERROR: "PROVIDER_ERROR";
1830
+ /** Phone number provisioning, webhook configuration, or carrier setup failed. */
1831
+ readonly PROVISION: "PROVISION";
1832
+ /** Assertion failed / unexpected internal state. Likely a Patter bug. */
1833
+ readonly INTERNAL: "INTERNAL";
1834
+ };
1835
+ type ErrorCode = (typeof ErrorCode)[keyof typeof ErrorCode];
1836
+ /** Base class for every error thrown by the Patter SDK. */
1022
1837
  declare class PatterError extends Error {
1023
- constructor(message: string);
1838
+ /** Stable, machine-readable error code. Subclasses set the default. */
1839
+ readonly code: ErrorCode;
1840
+ constructor(message: string, options?: {
1841
+ code?: ErrorCode;
1842
+ });
1024
1843
  }
1844
+ /** Network / WebSocket / HTTP-level connectivity failure when talking to a provider. */
1025
1845
  declare class PatterConnectionError extends PatterError {
1026
- constructor(message: string);
1846
+ constructor(message: string, options?: {
1847
+ code?: ErrorCode;
1848
+ });
1027
1849
  }
1850
+ /** Provider rejected our credentials (HTTP 401/403, invalid webhook signature, etc.). */
1028
1851
  declare class AuthenticationError extends PatterError {
1029
- constructor(message: string);
1852
+ constructor(message: string, options?: {
1853
+ code?: ErrorCode;
1854
+ });
1030
1855
  }
1856
+ /** Phone-number provisioning or carrier setup failed. */
1031
1857
  declare class ProvisionError extends PatterError {
1032
- constructor(message: string);
1858
+ constructor(message: string, options?: {
1859
+ code?: ErrorCode;
1860
+ });
1033
1861
  }
1034
1862
  /** Thrown when a provider returns HTTP 429 on connect/upgrade. */
1035
1863
  declare class RateLimitError extends PatterConnectionError {
1036
- constructor(message: string);
1864
+ constructor(message: string, options?: {
1865
+ code?: ErrorCode;
1866
+ });
1037
1867
  }
1038
1868
 
1039
1869
  /**
@@ -1080,14 +1910,8 @@ declare function soniox(opts: {
1080
1910
  apiKey: string;
1081
1911
  language?: string;
1082
1912
  }): STTConfig;
1083
- /**
1084
- * Speechmatics STT config helper.
1085
- *
1086
- * NOTE: the Speechmatics adapter is currently Python-only. Calling this helper
1087
- * throws a clear error so callers can switch providers or use the Python SDK
1088
- * until the TS adapter ships.
1089
- */
1090
- declare function speechmatics(_opts: {
1913
+ /** Speechmatics real-time STT config helper. */
1914
+ declare function speechmatics(opts: {
1091
1915
  apiKey: string;
1092
1916
  language?: string;
1093
1917
  }): STTConfig;
@@ -1133,8 +1957,31 @@ declare function geminiLive(opts: {
1133
1957
  voice?: string;
1134
1958
  }): RealtimeConfig;
1135
1959
 
1960
+ /**
1961
+ * Billing units used by ``DEFAULT_PRICING`` entries. String values keep the
1962
+ * pricing table JSON-serialisable and backwards-compatible with consumers
1963
+ * that still compare against the raw strings.
1964
+ */
1965
+ declare const PricingUnit: {
1966
+ readonly MINUTE: "minute";
1967
+ readonly THOUSAND_CHARS: "1k_chars";
1968
+ readonly TOKEN: "token";
1969
+ };
1970
+ /** String value for one of the entries in `PricingUnit`. */
1971
+ type PricingUnitValue = (typeof PricingUnit)[keyof typeof PricingUnit];
1972
+ /** Per-model rate overrides — same shape as `ProviderPricing` minus the unit. */
1973
+ type ModelPricing = Omit<ProviderPricing, 'unit' | 'models'> & {
1974
+ unit?: PricingUnitValue | string;
1975
+ };
1976
+ /** Single provider's pricing entry inside `DEFAULT_PRICING` or a user override map. */
1136
1977
  interface ProviderPricing {
1137
- unit: string;
1978
+ /**
1979
+ * Billing unit. The library ships with values from :data:`PricingUnit`,
1980
+ * but the field stays ``string`` so user overrides loaded from JSON /
1981
+ * env config (which are unconstrained at the type system) keep flowing
1982
+ * through ``mergePricing`` without type assertions.
1983
+ */
1984
+ unit: PricingUnitValue | string;
1138
1985
  price?: number;
1139
1986
  audio_input_per_token?: number;
1140
1987
  audio_output_per_token?: number;
@@ -1142,17 +1989,51 @@ interface ProviderPricing {
1142
1989
  text_output_per_token?: number;
1143
1990
  cached_audio_input_per_token?: number;
1144
1991
  cached_text_input_per_token?: number;
1992
+ /**
1993
+ * Per-model rate overrides keyed by model identifier. When the cost-calc
1994
+ * function receives a ``model`` arg, the matching entry overlays the
1995
+ * provider defaults; missing models fall back to the surrounding rates
1996
+ * (legacy behaviour). Longest-prefix match handles versioned IDs like
1997
+ * ``gpt-realtime-2-2026-05`` against ``gpt-realtime-2``. See
1998
+ * :func:`resolveProviderRates`.
1999
+ */
2000
+ models?: Record<string, ModelPricing>;
1145
2001
  }
2002
+ /**
2003
+ * Built-in pricing table — overridable via `Patter({ pricing: {...} })`.
2004
+ *
2005
+ * Each provider entry carries provider-level defaults plus an optional
2006
+ * `models` map for per-model overrides. When the cost-calc function gets a
2007
+ * model arg it auto-resolves via {@link resolveProviderRates} (longest-prefix
2008
+ * fallback for versioned model IDs). Empty/unknown model → provider defaults.
2009
+ */
1146
2010
  declare const DEFAULT_PRICING: Record<string, ProviderPricing>;
1147
2011
  /**
1148
2012
  * Merge user overrides into a copy of DEFAULT_PRICING.
1149
- * Performs a shallow per-provider merge.
2013
+ *
2014
+ * Performs a per-provider shallow merge with one exception: the nested
2015
+ * ``models`` dict is itself merged shallowly (per-model entries replace
2016
+ * the default entry but unmentioned models keep their built-in rates).
2017
+ * A user override of ``{ deepgram: { models: { 'nova-2': { price: 0.01 } } } }``
2018
+ * keeps every other Deepgram model rate intact.
1150
2019
  */
1151
2020
  declare function mergePricing(overrides?: Record<string, Partial<ProviderPricing>> | null): Record<string, ProviderPricing>;
1152
- /** Calculate STT cost from audio duration. */
1153
- declare function calculateSttCost(provider: string, audioSeconds: number, pricing: Record<string, ProviderPricing>): number;
1154
- /** Calculate TTS cost from character count. */
1155
- declare function calculateTtsCost(provider: string, characterCount: number, pricing: Record<string, ProviderPricing>): number;
2021
+ /**
2022
+ * Calculate STT cost from audio duration.
2023
+ *
2024
+ * When ``model`` is supplied and the provider entry has a matching
2025
+ * ``models`` override, the per-model rate is used; otherwise falls back
2026
+ * to the provider-level rate (legacy behaviour, model omitted).
2027
+ */
2028
+ declare function calculateSttCost(provider: string, audioSeconds: number, pricing: Record<string, ProviderPricing>, model?: string | null): number;
2029
+ /**
2030
+ * Calculate TTS cost from character count.
2031
+ *
2032
+ * When ``model`` is supplied and the provider entry has a matching
2033
+ * ``models`` override, the per-model rate is used; otherwise falls back
2034
+ * to the provider-level rate (legacy behaviour, model omitted).
2035
+ */
2036
+ declare function calculateTtsCost(provider: string, characterCount: number, pricing: Record<string, ProviderPricing>, model?: string | null): number;
1156
2037
  /**
1157
2038
  * Calculate OpenAI Realtime cost from token usage.
1158
2039
  *
@@ -1176,7 +2057,7 @@ declare function calculateRealtimeCost(usage: {
1176
2057
  audio_tokens?: number;
1177
2058
  text_tokens?: number;
1178
2059
  };
1179
- }, pricing: Record<string, ProviderPricing>): number;
2060
+ }, pricing: Record<string, ProviderPricing>, model?: string | null): number;
1180
2061
  /**
1181
2062
  * Calculate telephony cost from call duration.
1182
2063
  *
@@ -1192,6 +2073,7 @@ declare function calculateTelephonyCost(provider: string, durationSeconds: numbe
1192
2073
  * Port of the Python `CallMetricsAccumulator` from `sdk/patter/services/metrics.py`.
1193
2074
  */
1194
2075
 
2076
+ /** Per-turn latency breakdown across the STT/LLM/TTS pipeline. */
1195
2077
  interface LatencyBreakdown {
1196
2078
  stt_ms: number;
1197
2079
  /**
@@ -1228,7 +2110,21 @@ interface LatencyBreakdown {
1228
2110
  * TTS audio byte sent. Optional — undefined when TTS never completed.
1229
2111
  */
1230
2112
  tts_total_ms?: number;
2113
+ /**
2114
+ * **User-perceived agent response latency**: time from end-of-user-speech
2115
+ * (VAD stop or STT ``speech_final``) to the first audio byte the agent
2116
+ * sent back. Computed as ``endpoint_ms + llm_ttft_ms + tts_ms`` when all
2117
+ * three signals are available — falls back to undefined otherwise.
2118
+ *
2119
+ * This is the metric you should watch for SLO / p95 dashboards. Unlike
2120
+ * ``total_ms`` (which spans the user's entire utterance and therefore
2121
+ * grows with how long the user spoke), ``agent_response_ms`` isolates
2122
+ * the system-controlled latency: silence detection + LLM TTFT + TTS
2123
+ * first byte.
2124
+ */
2125
+ agent_response_ms?: number;
1231
2126
  }
2127
+ /** Per-call cost breakdown by component (STT/TTS/LLM/telephony) plus the total. */
1232
2128
  interface CostBreakdown {
1233
2129
  stt: number;
1234
2130
  tts: number;
@@ -1242,6 +2138,7 @@ interface CostBreakdown {
1242
2138
  */
1243
2139
  llm_cached_savings?: number;
1244
2140
  }
2141
+ /** Metrics captured for a single conversation turn. */
1245
2142
  interface TurnMetrics {
1246
2143
  turn_index: number;
1247
2144
  user_text: string;
@@ -1251,6 +2148,7 @@ interface TurnMetrics {
1251
2148
  tts_characters: number;
1252
2149
  timestamp: number;
1253
2150
  }
2151
+ /** Aggregated metrics for an entire call (turns, costs, latency percentiles). */
1254
2152
  interface CallMetrics {
1255
2153
  call_id: string;
1256
2154
  duration_seconds: number;
@@ -1267,6 +2165,7 @@ interface CallMetrics {
1267
2165
  llm_provider: string;
1268
2166
  telephony_provider: string;
1269
2167
  }
2168
+ /** Programmatic control surface for a live call (transfer, hangup, DTMF). */
1270
2169
  interface CallControl {
1271
2170
  /** Transfer the call to a different number or SIP URI. */
1272
2171
  transfer(number: string): Promise<void>;
@@ -1288,6 +2187,7 @@ interface CallControl {
1288
2187
  /** Callee number. */
1289
2188
  readonly callee: string;
1290
2189
  }
2190
+ /** Mutable per-call accumulator that stamps timestamps and emits final `CallMetrics`. */
1291
2191
  declare class CallMetricsAccumulator {
1292
2192
  callId: string;
1293
2193
  readonly providerMode: string;
@@ -1295,6 +2195,14 @@ declare class CallMetricsAccumulator {
1295
2195
  readonly sttProvider: string;
1296
2196
  readonly ttsProvider: string;
1297
2197
  readonly llmProvider: string;
2198
+ /**
2199
+ * Model identifiers for per-model rate resolution (see pricing.ts). Empty
2200
+ * string means "not known" → cost calc falls back to provider defaults,
2201
+ * matching pre-2026.3 behaviour.
2202
+ */
2203
+ readonly sttModel: string;
2204
+ readonly ttsModel: string;
2205
+ readonly realtimeModel: string;
1298
2206
  private readonly _pricing;
1299
2207
  private readonly _callStart;
1300
2208
  private readonly _turns;
@@ -1349,6 +2257,12 @@ declare class CallMetricsAccumulator {
1349
2257
  sttProvider?: string;
1350
2258
  ttsProvider?: string;
1351
2259
  llmProvider?: string;
2260
+ /** Model identifier for the STT adapter (e.g. ``"nova-3-multilingual"``). */
2261
+ sttModel?: string;
2262
+ /** Model identifier for the TTS adapter (e.g. ``"eleven_multilingual_v2"``). */
2263
+ ttsModel?: string;
2264
+ /** Model identifier for the realtime adapter (e.g. ``"gpt-realtime-2"``). */
2265
+ realtimeModel?: string;
1352
2266
  pricing?: Record<string, Partial<ProviderPricing>> | null;
1353
2267
  eventBus?: EventBus;
1354
2268
  /** When true, only the first TTFB emission per call is forwarded to the event bus. */
@@ -1363,6 +2277,7 @@ declare class CallMetricsAccumulator {
1363
2277
  configureSttFormat(sampleRate?: number, bytesPerSample?: number): void;
1364
2278
  /** Whether a turn is currently being measured (startTurn called, not yet completed). */
1365
2279
  get turnActive(): boolean;
2280
+ /** Begin a new turn — stamps the turn start timestamp and resets per-turn state. */
1366
2281
  startTurn(): void;
1367
2282
  /**
1368
2283
  * Start a new turn only if no turn is currently open.
@@ -1370,6 +2285,7 @@ declare class CallMetricsAccumulator {
1370
2285
  * on the first audio byte rather than just before recordSttComplete().
1371
2286
  */
1372
2287
  startTurnIfIdle(): void;
2288
+ /** Stamp end-of-STT, capture the user's transcript, and accrue billed STT seconds. */
1373
2289
  recordSttComplete(text: string, audioSeconds?: number): void;
1374
2290
  /** Record the timestamp of the first LLM token (TTFT). No-op after first call. */
1375
2291
  recordLlmFirstToken(): void;
@@ -1380,8 +2296,11 @@ declare class CallMetricsAccumulator {
1380
2296
  * No-op after first call.
1381
2297
  */
1382
2298
  recordLlmFirstSentenceComplete(): void;
2299
+ /** Stamp end-of-LLM (last token received). */
1383
2300
  recordLlmComplete(): void;
2301
+ /** Stamp first TTS audio byte sent on the wire (used to compute TTS TTFB). */
1384
2302
  recordTtsFirstByte(): void;
2303
+ /** Record final TTS text length and stamp the last-byte timestamp. */
1385
2304
  recordTtsComplete(text: string): void;
1386
2305
  /**
1387
2306
  * Capture the timestamp when the last TTS audio byte was sent on the wire.
@@ -1401,7 +2320,9 @@ declare class CallMetricsAccumulator {
1401
2320
  * to compute ``bargein_ms``.
1402
2321
  */
1403
2322
  recordTtsStopped(ts?: number): void;
2323
+ /** Close the current turn cleanly and append a `TurnMetrics` record. */
1404
2324
  recordTurnComplete(agentText: string): TurnMetrics;
2325
+ /** Close the current turn as interrupted (barge-in) and return the recorded metrics. */
1405
2326
  recordTurnInterrupted(): TurnMetrics | null;
1406
2327
  /**
1407
2328
  * Record the moment VAD emitted speech_end for the current utterance.
@@ -1435,6 +2356,7 @@ declare class CallMetricsAccumulator {
1435
2356
  * ``transcriptionDelay`` = turnCommitted − vadStopped (ms)
1436
2357
  * ``onUserTurnCompletedDelay`` = caller-supplied delta (ms) or 0
1437
2358
  */
2359
+ /** Emit `EOUMetrics` once VAD-stop, STT-final, and turn-committed timestamps are all known. */
1438
2360
  emitEouMetrics(): void;
1439
2361
  /**
1440
2362
  * Record that a caller utterance started overlapping with agent speech.
@@ -1451,7 +2373,16 @@ declare class CallMetricsAccumulator {
1451
2373
  * @param ts Optional override timestamp in hrTimeMs units.
1452
2374
  */
1453
2375
  recordOverlapEnd(wasInterruption: boolean, ts?: number): void;
2376
+ /** Accumulate inbound STT audio bytes for cost calculation when seconds are unknown. */
1454
2377
  addSttAudioBytes(byteCount: number): void;
2378
+ /**
2379
+ * Record an OpenAI Realtime usage payload and roll up its cost + cached-savings.
2380
+ *
2381
+ * `model` allows the cost calc to pick the per-model rate (e.g.
2382
+ * `gpt-realtime-2`). Defaults to whatever was supplied at construction
2383
+ * time (`this.realtimeModel`); pass an explicit value to override per-call
2384
+ * (the `response.done` payload carries the model used).
2385
+ */
1455
2386
  recordRealtimeUsage(usage: {
1456
2387
  input_token_details?: {
1457
2388
  audio_tokens?: number;
@@ -1465,8 +2396,10 @@ declare class CallMetricsAccumulator {
1465
2396
  audio_tokens?: number;
1466
2397
  text_tokens?: number;
1467
2398
  };
1468
- }): void;
2399
+ }, model?: string | null): void;
2400
+ /** Override the carrier-billed telephony cost (e.g. exact value reported via Twilio API). */
1469
2401
  setActualTelephonyCost(cost: number): void;
2402
+ /** Override the provider-billed STT cost when an exact figure is available. */
1470
2403
  setActualSttCost(cost: number): void;
1471
2404
  /**
1472
2405
  * Accumulate LLM token cost for pipeline mode (non-Realtime).
@@ -1482,7 +2415,9 @@ declare class CallMetricsAccumulator {
1482
2415
  * @param cacheWriteTokens Cache write tokens (billed at cache_write rate if present)
1483
2416
  */
1484
2417
  recordLlmUsage(provider: string, model: string, inputTokens: number, outputTokens: number, cacheReadTokens?: number, cacheWriteTokens?: number): void;
2418
+ /** Finalize the call: flush any in-flight turn, compute aggregates, and return `CallMetrics`. */
1485
2419
  endCall(): CallMetrics;
2420
+ /** Return the cost breakdown for the call so far without ending it. */
1486
2421
  getCostSoFar(): CostBreakdown;
1487
2422
  private _resetTurnState;
1488
2423
  private _computeTurnLatency;
@@ -1499,15 +2434,31 @@ declare class CallMetricsAccumulator {
1499
2434
  private _computePercentileLatency;
1500
2435
  }
1501
2436
 
2437
+ /**
2438
+ * OpenAI Realtime WebSocket adapter for Patter's realtime mode.
2439
+ *
2440
+ * Wraps `wss://api.openai.com/v1/realtime` and exposes the unified
2441
+ * Patter realtime contract (`connect / sendAudio / onEvent / close`) on
2442
+ * {@link OpenAIRealtimeAdapter}. Audio negotiation defaults to
2443
+ * `g711_ulaw` so traffic flows through Twilio/Telnyx without transcoding.
2444
+ */
1502
2445
  /**
1503
2446
  * Supported OpenAI Realtime wire audio formats. See
1504
2447
  * https://platform.openai.com/docs/guides/realtime for the full list.
1505
- * ``g711_ulaw`` matches what Twilio/Telnyx emit natively on the phone leg,
1506
- * so no transcoding is needed. ``pcm16`` is used in the terminal test-mode
1507
- * path and when the telephony provider negotiates L16/16000.
2448
+ * `G711_ULAW` matches what Twilio/Telnyx emit natively on the phone leg, so
2449
+ * no transcoding is needed. `PCM16` is used in the terminal test-mode path
2450
+ * and when the telephony provider negotiates L16/16000.
1508
2451
  */
1509
- type OpenAIRealtimeAudioFormat = 'g711_ulaw' | 'g711_alaw' | 'pcm16';
2452
+ declare const OpenAIRealtimeAudioFormat: {
2453
+ readonly G711_ULAW: "g711_ulaw";
2454
+ readonly G711_ALAW: "g711_alaw";
2455
+ readonly PCM16: "pcm16";
2456
+ };
2457
+ /** Union of {@link OpenAIRealtimeAudioFormat} string values. */
2458
+ type OpenAIRealtimeAudioFormat = (typeof OpenAIRealtimeAudioFormat)[keyof typeof OpenAIRealtimeAudioFormat];
2459
+ /** Callback signature for events emitted by {@link OpenAIRealtimeAdapter}. */
1510
2460
  type RealtimeEventCallback = (type: string, data: unknown) => void | Promise<void>;
2461
+ /** Constructor options for {@link OpenAIRealtimeAdapter}. */
1511
2462
  interface OpenAIRealtimeOptions {
1512
2463
  temperature?: number;
1513
2464
  maxResponseOutputTokens?: number | 'inf';
@@ -1522,7 +2473,15 @@ interface OpenAIRealtimeOptions {
1522
2473
  * Increase for dictation-style flows where the user pauses mid-sentence.
1523
2474
  */
1524
2475
  silenceDurationMs?: number;
2476
+ /**
2477
+ * Reasoning-effort tier for `gpt-realtime-2`. When omitted the field is
2478
+ * not sent and the server default applies. OpenAI recommends `"low"` for
2479
+ * production voice flows — higher tiers add measurable per-turn latency.
2480
+ * Has no effect on models that don't support the `reasoning` field.
2481
+ */
2482
+ reasoningEffort?: 'minimal' | 'low' | 'medium' | 'high';
1525
2483
  }
2484
+ /** Realtime WebSocket adapter for OpenAI's `gpt-realtime` family. */
1526
2485
  declare class OpenAIRealtimeAdapter {
1527
2486
  private readonly apiKey;
1528
2487
  private readonly model;
@@ -1536,13 +2495,17 @@ declare class OpenAIRealtimeAdapter {
1536
2495
  private heartbeat;
1537
2496
  private currentResponseItemId;
1538
2497
  private currentResponseAudioMs;
2498
+ private currentResponseFirstAudioAt;
1539
2499
  private readonly options;
1540
2500
  constructor(apiKey: string, model?: string, voice?: string, instructions?: string, tools?: Array<{
1541
2501
  name: string;
1542
2502
  description: string;
1543
2503
  parameters: Record<string, unknown>;
2504
+ strict?: boolean;
1544
2505
  }> | undefined, audioFormat?: OpenAIRealtimeAudioFormat, options?: OpenAIRealtimeOptions);
2506
+ /** Open the Realtime WebSocket and apply the session configuration. */
1545
2507
  connect(): Promise<void>;
2508
+ /** Append a base64-encoded audio chunk to the realtime input buffer. */
1546
2509
  sendAudio(mulawAudio: Buffer): void;
1547
2510
  /**
1548
2511
  * Register a listener for parsed realtime events.
@@ -1553,14 +2516,54 @@ declare class OpenAIRealtimeAdapter {
1553
2516
  * a Set of callbacks. Use {@link offEvent} to remove one.
1554
2517
  */
1555
2518
  onEvent(callback: RealtimeEventCallback): void;
2519
+ /** Remove a previously registered {@link onEvent} callback. */
1556
2520
  offEvent(callback: RealtimeEventCallback): void;
1557
2521
  private ensureMessageListener;
2522
+ /** Truncate the in-flight assistant turn and cancel the active response.
2523
+ *
2524
+ * ``audio_end_ms`` MUST reflect what the caller actually heard, not what
2525
+ * the server generated. OpenAI streams audio at 5-10x real-time, so the
2526
+ * byte-derived counter overstates playback whenever the consumer cleared
2527
+ * its playout buffer (e.g. ``send_clear``) before the audio reached the
2528
+ * speaker. We bound the truncate point by wall-clock time since the first
2529
+ * chunk of this response — that's the physical maximum a 1x real-time
2530
+ * playback could have produced. Without this cap, OpenAI keeps the full
2531
+ * generated assistant text on the transcript, and the model replays /
2532
+ * resumes from it on the next turn — manifesting as re-greetings and
2533
+ * mid-sentence fragments after a barge-in storm.
2534
+ */
1558
2535
  cancelResponse(): void;
2536
+ /** Inject a user text turn and request a new response. */
1559
2537
  sendText(text: string): Promise<void>;
2538
+ /**
2539
+ * Make the AI speak ``text`` as its opening line.
2540
+ *
2541
+ * Triggers ``response.create`` with explicit ``instructions`` that force
2542
+ * the model to render ``text`` verbatim as its first audio utterance.
2543
+ * This is the correct semantics for ``Agent.firstMessage`` per its
2544
+ * docstring ("What the AI says when the callee answers").
2545
+ *
2546
+ * Without this, ``sendText(firstMessage)`` would inject ``text`` as
2547
+ * ``role: user`` and the AI would *reply* to its own greeting, producing
2548
+ * role-confused openings (e.g. a receptionist agent responding "I'd like
2549
+ * to schedule a haircut" because it took its own first_message as a
2550
+ * customer cue).
2551
+ */
2552
+ sendFirstMessage(text: string): Promise<void>;
2553
+ /** Submit a tool/function-call result and request the next response. */
1560
2554
  sendFunctionResult(callId: string, result: string): Promise<void>;
2555
+ /** Stop the heartbeat, drop listeners, and close the Realtime WebSocket. */
1561
2556
  close(): void;
1562
2557
  }
1563
2558
 
2559
+ /**
2560
+ * ElevenLabs Conversational AI (ConvAI) WebSocket adapter for Patter.
2561
+ *
2562
+ * Wraps the `wss://api.elevenlabs.io/v1/convai/conversation` endpoint and
2563
+ * normalises agent audio + transcript + control events into a single
2564
+ * `onEvent(type, data)` callback. See {@link ElevenLabsConvAIAdapter}.
2565
+ */
2566
+ /** Constructor options for {@link ElevenLabsConvAIAdapter}. */
1564
2567
  interface ElevenLabsConvAIOptions {
1565
2568
  apiKey: string;
1566
2569
  agentId?: string;
@@ -1573,6 +2576,7 @@ interface ElevenLabsConvAIOptions {
1573
2576
  useSignedUrl?: boolean;
1574
2577
  }
1575
2578
  type EventCallback = (type: string, data: unknown) => void | Promise<void>;
2579
+ /** WebSocket adapter for ElevenLabs ConvAI managed-agent conversations. */
1576
2580
  declare class ElevenLabsConvAIAdapter {
1577
2581
  private ws;
1578
2582
  private eventCallback;
@@ -1613,6 +2617,7 @@ declare class ElevenLabsConvAIAdapter {
1613
2617
  */
1614
2618
  static forTelnyx(apiKey: string, agentId: string, options?: Omit<ElevenLabsConvAIOptions, 'apiKey' | 'agentId' | 'outputAudioFormat' | 'inputAudioFormat'>): ElevenLabsConvAIAdapter;
1615
2619
  private fetchSignedUrl;
2620
+ /** Open the ConvAI WebSocket and send the conversation init payload. */
1616
2621
  connect(): Promise<void>;
1617
2622
  private safeInvoke;
1618
2623
  private respondToPing;
@@ -1620,8 +2625,11 @@ declare class ElevenLabsConvAIAdapter {
1620
2625
  private finalizeAgentTurn;
1621
2626
  private scheduleSilenceDone;
1622
2627
  private handleMessage;
2628
+ /** Send a caller-side audio chunk to ConvAI as a base64 `user_audio_chunk`. */
1623
2629
  sendAudio(audioBytes: Buffer): void;
2630
+ /** Register the event callback that receives ConvAI server messages. */
1624
2631
  onEvent(callback: EventCallback): void;
2632
+ /** Close the ConvAI WebSocket and release the event callback. */
1625
2633
  close(): Promise<void>;
1626
2634
  }
1627
2635
 
@@ -1632,6 +2640,7 @@ declare class ElevenLabsConvAIAdapter {
1632
2640
  * - HTTP webhook: onMessage="https://api.customer.com/patter/message"
1633
2641
  * - WebSocket: onMessage="ws://localhost:9000/stream"
1634
2642
  */
2643
+ /** Dispatches per-turn messages to a remote HTTP webhook or WebSocket endpoint. */
1635
2644
  declare class RemoteMessageHandler {
1636
2645
  private readonly webhookSecret;
1637
2646
  /**
@@ -1675,6 +2684,12 @@ declare function isRemoteUrl(onMessage: unknown): onMessage is string;
1675
2684
  /** Check if a URL is a WebSocket URL. */
1676
2685
  declare function isWebSocketUrl(url: string): boolean;
1677
2686
 
2687
+ /**
2688
+ * Embedded HTTP/WebSocket server — wires Express webhooks for the configured
2689
+ * carrier (Twilio or Telnyx) into the per-call `StreamHandler` and dashboard.
2690
+ */
2691
+
2692
+ /** Resolved configuration consumed by `EmbeddedServer` (carrier credentials, webhook URL, etc.). */
1678
2693
  interface LocalConfig {
1679
2694
  twilioSid?: string;
1680
2695
  twilioToken?: string;
@@ -1699,6 +2714,14 @@ interface LocalConfig {
1699
2714
  * Set to false only for local development against mock providers.
1700
2715
  */
1701
2716
  requireSignature?: boolean;
2717
+ /**
2718
+ * Resolved on-disk persistence root for the dashboard's call history,
2719
+ * or ``null`` to disable. Computed by ``client.ts`` from the public
2720
+ * ``LocalOptions.persist`` option (with ``PATTER_LOG_DIR`` env-var
2721
+ * fallback). When ``null``, `CallLogger` is a no-op and the dashboard
2722
+ * is in-memory-only — restarts wipe history.
2723
+ */
2724
+ persistRoot?: string | null;
1702
2725
  }
1703
2726
 
1704
2727
  /**
@@ -1709,6 +2732,7 @@ interface LocalConfig {
1709
2732
  * - ?token=<token> query parameter
1710
2733
  */
1711
2734
 
2735
+ /** Build an Express middleware that gates the dashboard behind a static bearer token. */
1712
2736
  declare function makeAuthMiddleware(token?: string): (req: Request, res: Response, next: NextFunction) => void;
1713
2737
 
1714
2738
  /**
@@ -1747,7 +2771,9 @@ declare function callsToJson(calls: CallRecord[]): string;
1747
2771
  * GET /api/v1/analytics/costs - B2B cost breakdown
1748
2772
  */
1749
2773
 
2774
+ /** Mount the dashboard UI + read-only `/api/dashboard/*` routes onto an Express app. */
1750
2775
  declare function mountDashboard(app: Express, store: MetricsStore, token?: string): void;
2776
+ /** Mount the B2B-style `/api/v1/*` JSON routes onto an Express app. */
1751
2777
  declare function mountApi(app: Express, store: MetricsStore, token?: string): void;
1752
2778
 
1753
2779
  /**
@@ -1758,11 +2784,19 @@ declare function mountApi(app: Express, store: MetricsStore, token?: string): vo
1758
2784
  * nothing is written to disk.
1759
2785
  *
1760
2786
  * TODO(parity): Python's `notify_dashboard` is now an async fire-and-forget
1761
- * coroutine (see sdk-py/getpatter/dashboard/persistence.py). This TS version
2787
+ * coroutine (see libraries/python/getpatter/dashboard/persistence.py). This TS version
1762
2788
  * uses `http.request` which is already non-blocking, but for parity consider
1763
2789
  * exposing this as `async function notifyDashboard(...): Promise<void>` so
1764
2790
  * call sites can `await` or `void` it explicitly, matching the Python API.
1765
2791
  */
2792
+ /**
2793
+ * Fire-and-forget POST a completed call payload into a locally-running dashboard, if any.
2794
+ *
2795
+ * Skip entirely when ``PATTER_DASHBOARD_NOTIFY`` is set to ``0``/``false``
2796
+ * (case-insensitive). This avoids 404 spam in the receiver's access log
2797
+ * when callers embed Patter alongside their own HTTP server on port
2798
+ * 8000 (e.g. agent-to-agent test runners).
2799
+ */
1766
2800
  declare function notifyDashboard(callData: Record<string, unknown>, port?: number): void;
1767
2801
 
1768
2802
  /**
@@ -1774,6 +2808,7 @@ declare function notifyDashboard(callData: Record<string, unknown>, port?: numbe
1774
2808
  * background.
1775
2809
  */
1776
2810
 
2811
+ /** Constructor options for `FallbackLLMProvider`. */
1777
2812
  interface FallbackLLMProviderOptions {
1778
2813
  /** Number of retry attempts per provider before moving to the next (default 1). */
1779
2814
  readonly maxRetryPerProvider?: number;
@@ -1788,6 +2823,7 @@ declare class AllProvidersFailedError extends Error {
1788
2823
  declare class PartialStreamError extends Error {
1789
2824
  constructor(message: string);
1790
2825
  }
2826
+ /** LLM provider that delegates to a sequence of underlying providers, falling back on failure. */
1791
2827
  declare class FallbackLLMProvider implements LLMProvider {
1792
2828
  private readonly providers;
1793
2829
  private readonly availability;
@@ -1820,6 +2856,7 @@ declare class FallbackLLMProvider implements LLMProvider {
1820
2856
  * directly.
1821
2857
  */
1822
2858
  completeStream(messages: Array<Record<string, unknown>>, tools?: Array<Record<string, unknown>> | null): AsyncGenerator<string, void, unknown>;
2859
+ /** Streaming entry point — yields chunks from the first provider that succeeds. */
1823
2860
  stream(messages: Array<Record<string, unknown>>, tools?: Array<Record<string, unknown>> | null): AsyncGenerator<LLMChunk, void, unknown>;
1824
2861
  private tryProviders;
1825
2862
  private markUnavailable;
@@ -1927,6 +2964,7 @@ declare const PARAMETERS_SCHEMA: {
1927
2964
  };
1928
2965
  readonly required: readonly ["to"];
1929
2966
  };
2967
+ /** Constructor options for `PatterTool`. */
1930
2968
  interface PatterToolOptions {
1931
2969
  /**
1932
2970
  * Patter instance to dial through. Must be in local mode (have a `carrier`).
@@ -1950,12 +2988,14 @@ interface PatterToolOptions {
1950
2988
  */
1951
2989
  recording?: boolean;
1952
2990
  }
2991
+ /** Args accepted by `PatterTool.execute()` (and the OpenAI/Anthropic/Hermes tool schemas). */
1953
2992
  interface PatterToolExecuteArgs {
1954
2993
  to: string;
1955
2994
  goal?: string;
1956
2995
  first_message?: string;
1957
2996
  max_duration_sec?: number;
1958
2997
  }
2998
+ /** Result envelope returned by `PatterTool.execute()` once the underlying call ends. */
1959
2999
  interface PatterToolResult {
1960
3000
  call_id: string;
1961
3001
  status: string;
@@ -1968,6 +3008,7 @@ interface PatterToolResult {
1968
3008
  }>;
1969
3009
  metrics?: Record<string, unknown> | null;
1970
3010
  }
3011
+ /** Wraps a live `Patter` instance as a tool callable from external agent frameworks. */
1971
3012
  declare class PatterTool {
1972
3013
  readonly name: string;
1973
3014
  readonly description: string;
@@ -2023,6 +3064,7 @@ declare class PatterTool {
2023
3064
  start(): Promise<void>;
2024
3065
  /** Stop the underlying Patter server (and reject any pending calls). */
2025
3066
  stop(): Promise<void>;
3067
+ /** Place an outbound call and resolve once it ends with the transcript and metrics. */
2026
3068
  execute(args: PatterToolExecuteArgs): Promise<PatterToolResult>;
2027
3069
  /** Issue the outbound dial under the mutex and return its assigned call_id. */
2028
3070
  private acquireCallId;
@@ -2043,7 +3085,9 @@ declare class PatterTool {
2043
3085
  * input/output in the terminal. Useful for rapid agent development.
2044
3086
  */
2045
3087
 
3088
+ /** Drives an interactive terminal-based test "call" against an agent. */
2046
3089
  declare class TestSession {
3090
+ /** Run a REPL-style session that loops user input through the agent's LLM/onMessage handler. */
2047
3091
  run(opts: {
2048
3092
  agent: AgentOptions;
2049
3093
  openaiKey?: string;
@@ -2056,9 +3100,8 @@ declare class TestSession {
2056
3100
  /**
2057
3101
  * Gemini Live realtime adapter.
2058
3102
  *
2059
- * Partially adapted (~65% port) from LiveKit Agents
2060
- * (livekit-plugins-google, Apache 2.0). Reframed to Patter's realtime adapter
2061
- * surface — connect / sendAudio / onEvent / close — matching OpenAIRealtimeAdapter.
3103
+ * Implements Patter's realtime adapter surface connect / sendAudio /
3104
+ * onEvent / close matching OpenAIRealtimeAdapter.
2062
3105
  *
2063
3106
  * Uses the @google/genai SDK lazily imported at connect() so consumers that do
2064
3107
  * not use Gemini Live do not pay the load cost. Install with:
@@ -2073,6 +3116,7 @@ declare class TestSession {
2073
3116
  */
2074
3117
  declare const GEMINI_DEFAULT_INPUT_SR = 16000;
2075
3118
  declare const GEMINI_DEFAULT_OUTPUT_SR = 24000;
3119
+ /** Callback signature for events emitted by {@link GeminiLiveAdapter}. */
2076
3120
  type GeminiLiveEventHandler = (type: 'audio' | 'transcript_output' | 'function_call' | 'speech_started' | 'response_done' | 'error', data: unknown) => void | Promise<void>;
2077
3121
  interface GeminiLiveOptions {
2078
3122
  model?: string;
@@ -2088,6 +3132,7 @@ interface GeminiLiveOptions {
2088
3132
  outputSampleRate?: number;
2089
3133
  temperature?: number;
2090
3134
  }
3135
+ /** Realtime adapter for Google's Gemini Live native-audio API. */
2091
3136
  declare class GeminiLiveAdapter {
2092
3137
  private readonly apiKey;
2093
3138
  private readonly model;
@@ -2111,28 +3156,33 @@ declare class GeminiLiveAdapter {
2111
3156
  */
2112
3157
  private pendingToolCalls;
2113
3158
  constructor(apiKey: string, options?: GeminiLiveOptions);
3159
+ /** Lazily import @google/genai, open a Live session, and start the receive loop. */
2114
3160
  connect(): Promise<void>;
3161
+ /** Send a PCM audio chunk to Gemini as base64 inline data. */
2115
3162
  sendAudio(pcm: Buffer): void;
3163
+ /** Send a text turn to Gemini and mark the turn complete. */
2116
3164
  sendText(text: string): Promise<void>;
3165
+ /** Send a tool/function-call result back to Gemini. */
2117
3166
  sendFunctionResult(callId: string, result: string): Promise<void>;
3167
+ /** No-op — Gemini Live barge-in is VAD-driven, not client-cancelled. */
2118
3168
  cancelResponse(): void;
3169
+ /** Register an event handler that receives every Gemini Live event. */
2119
3170
  onEvent(handler: GeminiLiveEventHandler): void;
2120
3171
  private emit;
2121
3172
  private pumpReceive;
3173
+ /** Close the Gemini Live session and stop the receive loop. */
2122
3174
  close(): Promise<void>;
2123
3175
  }
2124
3176
 
2125
3177
  /**
2126
3178
  * Ultravox realtime adapter.
2127
3179
  *
2128
- * Partially adapted (~70% port) from LiveKit Agents
2129
- * (livekit-plugins-ultravox, Apache 2.0). Pure WebSocket protocol no vendor SDK.
2130
- *
2131
- * Reframed to Patter's connect / sendAudio / onEvent / close surface,
2132
- * matching OpenAIRealtimeAdapter.
3180
+ * Pure WebSocket protocol no vendor SDK. Implements Patter's connect /
3181
+ * sendAudio / onEvent / close surface, matching OpenAIRealtimeAdapter.
2133
3182
  */
2134
3183
  declare const ULTRAVOX_DEFAULT_API_BASE = "https://api.ultravox.ai/api";
2135
3184
  declare const ULTRAVOX_DEFAULT_SR = 16000;
3185
+ /** Callback signature for events emitted by {@link UltravoxRealtimeAdapter}. */
2136
3186
  type UltravoxEventHandler = (type: 'audio' | 'transcript_input' | 'transcript_output' | 'function_call' | 'speech_started' | 'response_done' | 'error', data: unknown) => void | Promise<void>;
2137
3187
  interface UltravoxOptions {
2138
3188
  model?: string;
@@ -2148,6 +3198,7 @@ interface UltravoxOptions {
2148
3198
  sampleRate?: number;
2149
3199
  firstMessage?: string;
2150
3200
  }
3201
+ /** Realtime WebSocket adapter for Ultravox managed-agent calls. */
2151
3202
  declare class UltravoxRealtimeAdapter {
2152
3203
  private readonly apiKey;
2153
3204
  private readonly model;
@@ -2163,14 +3214,21 @@ declare class UltravoxRealtimeAdapter {
2163
3214
  /** Exposed for diagnostics — true while the underlying socket is open. */
2164
3215
  running: boolean;
2165
3216
  constructor(apiKey: string, options?: UltravoxOptions);
3217
+ /** Create the Ultravox call, fetch the joinUrl, and open the WebSocket. */
2166
3218
  connect(): Promise<void>;
3219
+ /** Send a binary PCM audio chunk to the Ultravox call. */
2167
3220
  sendAudio(pcm: Buffer): void;
3221
+ /** Inject a user text message into the Ultravox conversation. */
2168
3222
  sendText(text: string): Promise<void>;
3223
+ /** Send a tool/function-call result back to Ultravox. */
2169
3224
  sendFunctionResult(callId: string, result: string): Promise<void>;
3225
+ /** Clear the playback buffer to interrupt the agent's current response. */
2170
3226
  cancelResponse(): void;
3227
+ /** Register an event handler that receives every Ultravox event. */
2171
3228
  onEvent(handler: UltravoxEventHandler): void;
2172
3229
  private emit;
2173
3230
  private handleMessage;
3231
+ /** Close the Ultravox WebSocket and mark the adapter idle. */
2174
3232
  close(): Promise<void>;
2175
3233
  }
2176
3234
 
@@ -2185,7 +3243,9 @@ declare class UltravoxRealtimeAdapter {
2185
3243
  * node-cron is an optional dependency. This module imports it lazily so that
2186
3244
  * consumers who never schedule anything do not need it installed.
2187
3245
  */
3246
+ /** Callback fired by the scheduler — sync or async, return value ignored. */
2188
3247
  type JobCallback = () => void | Promise<void>;
3248
+ /** Handle returned by `scheduleCron`/`scheduleOnce`/`scheduleInterval` for cancellation. */
2189
3249
  interface ScheduleHandle {
2190
3250
  readonly jobId: string;
2191
3251
  cancel(): void;
@@ -2225,29 +3285,34 @@ declare function scheduleInterval(intervalOrOpts: number | {
2225
3285
  * Pure WebSocket client for the Soniox real-time STT API. Accumulates
2226
3286
  * `is_final` tokens and flushes them on `<end>`/`<fin>` endpoint tokens,
2227
3287
  * mirroring the Python `SonioxSTT` adapter.
2228
- *
2229
- * Adapted from LiveKit Agents (Apache 2.0):
2230
- * https://github.com/livekit/agents
2231
- * (source: livekit-plugins/livekit-plugins-soniox/livekit/plugins/soniox/stt.py
2232
- * at commit 78a66bcf79c5cea82989401c408f1dff4b961a5b)
2233
- *
2234
- * Speechmatics TypeScript adapter is **intentionally not ported**: the
2235
- * official Speechmatics Voice SDK (`speechmatics.voice`) is Python-only at
2236
- * the time of writing. Python users should install the optional
2237
- * `speechmatics` extra; TypeScript users need to wait for an official
2238
- * upstream SDK before this adapter can land without a WS-handshake reimpl.
2239
3288
  */
2240
- interface Transcript$4 {
3289
+ /** Known Soniox real-time STT models. */
3290
+ declare const SonioxModel: {
3291
+ readonly STT_RT_V4: "stt-rt-v4";
3292
+ readonly STT_RT_V3: "stt-rt-v3";
3293
+ readonly STT_RT_V2: "stt-rt-v2";
3294
+ };
3295
+ type SonioxModel = (typeof SonioxModel)[keyof typeof SonioxModel];
3296
+ /** Common PCM sample rates for Soniox streaming input. */
3297
+ declare const SonioxSampleRate: {
3298
+ readonly HZ_8000: 8000;
3299
+ readonly HZ_16000: 16000;
3300
+ readonly HZ_24000: 24000;
3301
+ };
3302
+ type SonioxSampleRate = (typeof SonioxSampleRate)[keyof typeof SonioxSampleRate];
3303
+ /** Patter-normalised transcript event emitted by {@link SonioxSTT}. */
3304
+ interface Transcript$6 {
2241
3305
  readonly text: string;
2242
3306
  readonly isFinal: boolean;
2243
3307
  readonly confidence: number;
2244
3308
  }
2245
- type TranscriptCallback$4 = (transcript: Transcript$4) => void;
3309
+ type TranscriptCallback$6 = (transcript: Transcript$6) => void;
3310
+ /** Constructor options for {@link SonioxSTT}. */
2246
3311
  interface SonioxSTTOptions$1 {
2247
- model?: string;
3312
+ model?: SonioxModel | string;
2248
3313
  languageHints?: string[];
2249
3314
  languageHintsStrict?: boolean;
2250
- sampleRate?: number;
3315
+ sampleRate?: SonioxSampleRate | number;
2251
3316
  numChannels?: number;
2252
3317
  enableSpeakerDiarization?: boolean;
2253
3318
  enableLanguageIdentification?: boolean;
@@ -2255,6 +3320,7 @@ interface SonioxSTTOptions$1 {
2255
3320
  clientReferenceId?: string;
2256
3321
  baseUrl?: string;
2257
3322
  }
3323
+ /** Streaming STT adapter for Soniox's real-time WebSocket API. */
2258
3324
  declare class SonioxSTT {
2259
3325
  private ws;
2260
3326
  private callbacks;
@@ -2275,12 +3341,16 @@ declare class SonioxSTT {
2275
3341
  /** Factory for Twilio-style 8 kHz linear PCM. */
2276
3342
  static forTwilio(apiKey: string, languageHints?: string[]): SonioxSTT;
2277
3343
  private buildConfig;
3344
+ /** Open the streaming WebSocket and send the initial config payload. */
2278
3345
  connect(): Promise<void>;
2279
3346
  private clearKeepalive;
2280
3347
  private handleMessage;
2281
3348
  private emit;
3349
+ /** Send a binary PCM16-LE audio chunk to Soniox for transcription. */
2282
3350
  sendAudio(audio: Buffer): void;
2283
- onTranscript(callback: TranscriptCallback$4): void;
3351
+ /** Register a transcript listener (max 10 concurrent listeners). */
3352
+ onTranscript(callback: TranscriptCallback$6): void;
3353
+ /** Send the empty-frame stream terminator and close the WebSocket. */
2284
3354
  close(): void;
2285
3355
  }
2286
3356
 
@@ -2289,17 +3359,36 @@ declare class SonioxSTT {
2289
3359
  *
2290
3360
  * Pure `ws` transport — does NOT depend on the vendor SDK.
2291
3361
  */
2292
- interface Transcript$3 {
3362
+ /** Patter-normalised transcript event emitted by {@link AssemblyAISTT}. */
3363
+ interface Transcript$5 {
2293
3364
  readonly text: string;
2294
3365
  readonly isFinal: boolean;
2295
3366
  readonly confidence: number;
2296
3367
  /** Optional event hint, e.g. `"SpeechStarted"` for barge-in signals. */
2297
3368
  readonly eventType?: string;
2298
3369
  }
2299
- type TranscriptCallback$3 = (transcript: Transcript$3) => void;
2300
- type AssemblyAIEncoding = 'pcm_s16le' | 'pcm_mulaw';
2301
- type AssemblyAIModel = 'universal-streaming-english' | 'universal-streaming-multilingual' | 'u3-rt-pro' | 'whisper-rt';
2302
- type AssemblyAIDomain = 'general' | 'medical-v1';
3370
+ type TranscriptCallback$5 = (transcript: Transcript$5) => void;
3371
+ /** Audio encodings accepted by AssemblyAI's v3 streaming endpoint. */
3372
+ declare const AssemblyAIEncoding: {
3373
+ readonly PCM_S16LE: "pcm_s16le";
3374
+ readonly PCM_MULAW: "pcm_mulaw";
3375
+ };
3376
+ type AssemblyAIEncoding = (typeof AssemblyAIEncoding)[keyof typeof AssemblyAIEncoding];
3377
+ /** Known AssemblyAI Universal Streaming speech models. */
3378
+ declare const AssemblyAIModel: {
3379
+ readonly UNIVERSAL_STREAMING_ENGLISH: "universal-streaming-english";
3380
+ readonly UNIVERSAL_STREAMING_MULTILINGUAL: "universal-streaming-multilingual";
3381
+ readonly U3_RT_PRO: "u3-rt-pro";
3382
+ readonly WHISPER_RT: "whisper-rt";
3383
+ };
3384
+ type AssemblyAIModel = (typeof AssemblyAIModel)[keyof typeof AssemblyAIModel];
3385
+ /** Valid `domain` values for AssemblyAI's v3 streaming endpoint. */
3386
+ declare const AssemblyAIDomain: {
3387
+ readonly GENERAL: "general";
3388
+ readonly MEDICAL_V1: "medical-v1";
3389
+ };
3390
+ type AssemblyAIDomain = (typeof AssemblyAIDomain)[keyof typeof AssemblyAIDomain];
3391
+ /** Constructor options for {@link AssemblyAISTT}. */
2303
3392
  interface AssemblyAISTTOptions$1 {
2304
3393
  /** One of the AssemblyAI speech models. */
2305
3394
  readonly model?: AssemblyAIModel;
@@ -2337,6 +3426,7 @@ interface AssemblyAISTTOptions$1 {
2337
3426
  /** Domain hint — must be `"general"` or `"medical-v1"`. */
2338
3427
  readonly domain?: AssemblyAIDomain;
2339
3428
  }
3429
+ /** Streaming STT adapter for AssemblyAI's v3 Universal Streaming API. */
2340
3430
  declare class AssemblyAISTT {
2341
3431
  private readonly apiKey;
2342
3432
  private readonly options;
@@ -2345,6 +3435,22 @@ declare class AssemblyAISTT {
2345
3435
  private closing;
2346
3436
  private reconnectAttempts;
2347
3437
  private terminationResolve;
3438
+ /**
3439
+ * Coalescing buffer for inbound audio frames. AssemblyAI's v3
3440
+ * streaming endpoint requires each ws frame to carry 50–1000 ms of
3441
+ * audio (server emits error 3007 below 50 ms — observed in the
3442
+ * field as a fully-billed call with zero transcripts). Twilio sends
3443
+ * 20 ms frames, so the SDK must batch ~3 frames before forwarding.
3444
+ *
3445
+ * We accumulate raw bytes here until the cumulative duration crosses
3446
+ * the configured target (default 60 ms — comfortably above the 50 ms
3447
+ * floor with one frame of headroom against jitter), then flush in a
3448
+ * single `ws.send()`.
3449
+ */
3450
+ private chunkBuffer;
3451
+ private chunkBufferBytes;
3452
+ /** Target send size in bytes — recomputed lazily once encoding/sample-rate is known. */
3453
+ private chunkBufferTargetBytes;
2348
3454
  /** AssemblyAI session id — set when the `Begin` message arrives. */
2349
3455
  sessionId: string | null;
2350
3456
  /** Unix timestamp when the AssemblyAI session expires. */
@@ -2354,13 +3460,21 @@ declare class AssemblyAISTT {
2354
3460
  static forTwilio(apiKey: string, model?: AssemblyAIModel): AssemblyAISTT;
2355
3461
  private buildUrl;
2356
3462
  private buildHeaders;
3463
+ /** Open the streaming WebSocket and arm message handlers. */
2357
3464
  connect(): Promise<void>;
2358
3465
  private awaitOpen;
2359
3466
  private attachHandlers;
2360
3467
  private reconnect;
2361
3468
  private handleEvent;
2362
3469
  private emit;
3470
+ /** Send a binary PCM/mu-law audio chunk to AssemblyAI for transcription. */
2363
3471
  sendAudio(audio: Buffer): void;
3472
+ /**
3473
+ * Compute the byte count corresponding to ~60 ms of audio for the
3474
+ * configured encoding / sample rate. Sits one Twilio frame (20 ms)
3475
+ * above AssemblyAI's 50 ms floor so jitter never dips below.
3476
+ */
3477
+ private computeTargetChunkBytes;
2364
3478
  private estimateChunkDurationMs;
2365
3479
  /**
2366
3480
  * Send an `UpdateConfiguration` frame to change settings mid-stream.
@@ -2374,7 +3488,9 @@ declare class AssemblyAISTT {
2374
3488
  }): void;
2375
3489
  /** Force the server to finalize the current turn (for barge-in). */
2376
3490
  forceEndpoint(): void;
2377
- onTranscript(callback: TranscriptCallback$3): () => void;
3491
+ /** Register a transcript listener. Returns an unsubscribe function. */
3492
+ onTranscript(callback: TranscriptCallback$5): () => void;
3493
+ /** Send a Terminate frame, wait briefly for ack, and close the socket. */
2378
3494
  close(): Promise<void>;
2379
3495
  }
2380
3496
 
@@ -2383,32 +3499,50 @@ declare class AssemblyAISTT {
2383
3499
  *
2384
3500
  * Implements a `DeepgramSTT`-shaped provider using Cartesia's streaming
2385
3501
  * WebSocket API. Pure `ws` transport — does NOT depend on the vendor SDK.
2386
- *
2387
- * Algorithm adapted from LiveKit Agents (Apache 2.0):
2388
- * https://github.com/livekit/agents
2389
- * Source: livekit-plugins/livekit-plugins-cartesia/livekit/plugins/cartesia/stt.py
2390
- * Upstream ref SHA: 78a66bcf79c5cea82989401c408f1dff4b961a5b
2391
3502
  */
2392
- interface Transcript$2 {
3503
+ /** Patter-normalised transcript event emitted by {@link CartesiaSTT}. */
3504
+ interface Transcript$4 {
2393
3505
  readonly text: string;
2394
3506
  readonly isFinal: boolean;
2395
3507
  readonly confidence: number;
2396
3508
  }
2397
- type TranscriptCallback$2 = (transcript: Transcript$2) => void;
3509
+ type TranscriptCallback$4 = (transcript: Transcript$4) => void;
3510
+ /** Known Cartesia STT models. */
3511
+ declare const CartesiaSTTModel: {
3512
+ readonly INK_WHISPER: "ink-whisper";
3513
+ };
3514
+ type CartesiaSTTModel = (typeof CartesiaSTTModel)[keyof typeof CartesiaSTTModel];
3515
+ /** Audio encodings accepted by Cartesia's STT websocket endpoint. */
3516
+ declare const CartesiaSTTEncoding: {
3517
+ readonly PCM_S16LE: "pcm_s16le";
3518
+ };
3519
+ type CartesiaSTTEncoding = (typeof CartesiaSTTEncoding)[keyof typeof CartesiaSTTEncoding];
3520
+ /** Common PCM sample rates accepted by Cartesia STT. */
3521
+ declare const CartesiaSTTSampleRate: {
3522
+ readonly HZ_8000: 8000;
3523
+ readonly HZ_16000: 16000;
3524
+ readonly HZ_24000: 24000;
3525
+ readonly HZ_44100: 44100;
3526
+ readonly HZ_48000: 48000;
3527
+ };
3528
+ type CartesiaSTTSampleRate = (typeof CartesiaSTTSampleRate)[keyof typeof CartesiaSTTSampleRate];
2398
3529
  /** Cartesia STT currently only accepts 16-bit PCM little-endian. */
3530
+ /** Legacy encoding alias kept for callers using the bare string form. */
2399
3531
  type CartesiaEncoding = 'pcm_s16le';
3532
+ /** Constructor options for {@link CartesiaSTT}. */
2400
3533
  interface CartesiaSTTOptions$1 {
2401
3534
  /** Cartesia STT model. Currently only `"ink-whisper"`. */
2402
- readonly model?: string;
3535
+ readonly model?: CartesiaSTTModel | string;
2403
3536
  /** BCP-47 language code. */
2404
3537
  readonly language?: string;
2405
3538
  /** PCM encoding; Cartesia only supports `pcm_s16le`. */
2406
- readonly encoding?: CartesiaEncoding;
3539
+ readonly encoding?: CartesiaSTTEncoding | CartesiaEncoding;
2407
3540
  /** Sample rate in Hz. Cartesia accepts 8000, 16000, 24000, 44100, 48000. */
2408
- readonly sampleRate?: number;
3541
+ readonly sampleRate?: CartesiaSTTSampleRate | number;
2409
3542
  /** Override base URL (HTTP or WS). Defaults to Cartesia prod. */
2410
3543
  readonly baseUrl?: string;
2411
3544
  }
3545
+ /** Streaming STT adapter for Cartesia's ink-whisper WebSocket API. */
2412
3546
  declare class CartesiaSTT {
2413
3547
  private readonly apiKey;
2414
3548
  private readonly options;
@@ -2422,13 +3556,16 @@ declare class CartesiaSTT {
2422
3556
  requestId: string | null;
2423
3557
  constructor(apiKey: string, options?: CartesiaSTTOptions$1);
2424
3558
  private buildWsUrl;
3559
+ /** Open the streaming WebSocket and arm message + keepalive handlers. */
2425
3560
  connect(): Promise<void>;
2426
3561
  private handleEvent;
2427
3562
  private emit;
3563
+ /** Send a binary PCM16-LE audio chunk to Cartesia for transcription. */
2428
3564
  sendAudio(audio: Buffer): void;
2429
- onTranscript(callback: TranscriptCallback$2): void;
3565
+ /** Register a transcript listener. */
3566
+ onTranscript(callback: TranscriptCallback$4): void;
2430
3567
  /** Remove a previously registered transcript callback. */
2431
- offTranscript(callback: TranscriptCallback$2): void;
3568
+ offTranscript(callback: TranscriptCallback$4): void;
2432
3569
  /**
2433
3570
  * Synchronous best-effort close. Sends `finalize` and closes the socket
2434
3571
  * without waiting for the server to flush any remaining transcripts.
@@ -2446,9 +3583,35 @@ declare class CartesiaSTT {
2446
3583
  closeAsync(): Promise<void>;
2447
3584
  }
2448
3585
 
2449
- type LMNTAudioFormat = 'aac' | 'mp3' | 'mulaw' | 'raw' | 'wav';
2450
- type LMNTModel = 'blizzard' | 'aurora';
2451
- type LMNTSampleRate = 8000 | 16000 | 24000;
3586
+ /**
3587
+ * LMNT TTS provider — HTTP `/v1/ai/speech/bytes` endpoint.
3588
+ *
3589
+ * Defaults to `format='raw'` (PCM_S16LE) at 16 kHz so the output drops
3590
+ * directly into Patter's telephony pipeline without transcoding.
3591
+ */
3592
+ /** Supported LMNT audio output formats. `RAW` is PCM_S16LE. */
3593
+ declare const LMNTAudioFormat: {
3594
+ readonly AAC: "aac";
3595
+ readonly MP3: "mp3";
3596
+ readonly MULAW: "mulaw";
3597
+ readonly RAW: "raw";
3598
+ readonly WAV: "wav";
3599
+ };
3600
+ type LMNTAudioFormat = (typeof LMNTAudioFormat)[keyof typeof LMNTAudioFormat];
3601
+ /** LMNT TTS model families. */
3602
+ declare const LMNTModel: {
3603
+ readonly BLIZZARD: "blizzard";
3604
+ readonly AURORA: "aurora";
3605
+ };
3606
+ type LMNTModel = (typeof LMNTModel)[keyof typeof LMNTModel];
3607
+ /** Supported PCM sample rates for LMNT raw output. */
3608
+ declare const LMNTSampleRate: {
3609
+ readonly HZ_8000: 8000;
3610
+ readonly HZ_16000: 16000;
3611
+ readonly HZ_24000: 24000;
3612
+ };
3613
+ type LMNTSampleRate = (typeof LMNTSampleRate)[keyof typeof LMNTSampleRate];
3614
+ /** Constructor options for {@link LMNTTTS}. */
2452
3615
  interface LMNTTTSOptions$1 {
2453
3616
  model?: LMNTModel;
2454
3617
  voice?: string;
@@ -2459,6 +3622,7 @@ interface LMNTTTSOptions$1 {
2459
3622
  topP?: number;
2460
3623
  baseUrl?: string;
2461
3624
  }
3625
+ /** LMNT TTS adapter backed by the `/v1/ai/speech/bytes` HTTP streaming endpoint. */
2462
3626
  declare class LMNTTTS {
2463
3627
  private readonly apiKey;
2464
3628
  private readonly model;
@@ -2471,12 +3635,23 @@ declare class LMNTTTS {
2471
3635
  private readonly baseUrl;
2472
3636
  constructor(apiKey: string, opts?: LMNTTTSOptions$1);
2473
3637
  private buildPayload;
3638
+ /** Synthesize text and return the concatenated audio buffer. */
2474
3639
  synthesize(text: string): Promise<Buffer>;
2475
3640
  /** Yield audio chunks as they arrive — raw PCM_S16LE by default. */
2476
3641
  synthesizeStream(text: string): AsyncGenerator<Buffer>;
2477
3642
  }
2478
3643
 
3644
+ /**
3645
+ * Deepgram streaming STT adapter for the Patter SDK pipeline mode.
3646
+ *
3647
+ * Pure `ws` transport — connects to `wss://api.deepgram.com/v1/listen` with
3648
+ * a long-lived KeepAlive pump and emits Patter-normalised {@link Transcript}
3649
+ * events through {@link DeepgramSTT.onTranscript}. See {@link DeepgramSTT}
3650
+ * for the public class.
3651
+ */
3652
+ /** Which Deepgram server event a {@link Transcript} represents. */
2479
3653
  type TranscriptEventType = 'Results' | 'UtteranceEnd' | 'SpeechStarted';
3654
+ /** Per-word timing/confidence record returned by Deepgram in `words[]`. */
2480
3655
  interface DeepgramWord {
2481
3656
  readonly word?: string;
2482
3657
  readonly start?: number;
@@ -2485,7 +3660,8 @@ interface DeepgramWord {
2485
3660
  readonly punctuated_word?: string;
2486
3661
  readonly speaker?: number;
2487
3662
  }
2488
- interface Transcript$1 {
3663
+ /** Patter-normalised transcript event emitted by {@link DeepgramSTT}. */
3664
+ interface Transcript$3 {
2489
3665
  readonly text: string;
2490
3666
  readonly isFinal: boolean;
2491
3667
  readonly confidence: number;
@@ -2500,8 +3676,8 @@ interface Transcript$1 {
2500
3676
  /** Which provider event this Transcript represents. Default ``Results``. */
2501
3677
  readonly eventType?: TranscriptEventType;
2502
3678
  }
2503
- type TranscriptCallback$1 = (transcript: Transcript$1) => void;
2504
- type ErrorCallback = (error: Error) => void;
3679
+ type TranscriptCallback$3 = (transcript: Transcript$3) => void;
3680
+ type ErrorCallback$1 = (error: Error) => void;
2505
3681
  /**
2506
3682
  * Optional tuning knobs for Deepgram live transcription.
2507
3683
  *
@@ -2539,6 +3715,7 @@ interface DeepgramSTTOptions$1 {
2539
3715
  /** Emit VAD events (``SpeechStarted`` / ``UtteranceEnd``). Default ``true``. */
2540
3716
  readonly vadEvents?: boolean;
2541
3717
  }
3718
+ /** Streaming STT adapter for Deepgram's `/v1/listen` WebSocket API. */
2542
3719
  declare class DeepgramSTT {
2543
3720
  private ws;
2544
3721
  private readonly transcriptCallbacks;
@@ -2572,6 +3749,7 @@ declare class DeepgramSTT {
2572
3749
  /** Factory for Twilio calls — mulaw 8 kHz. Forwards tuning options through. */
2573
3750
  static forTwilio(apiKey: string, language?: string, model?: string, options?: DeepgramSTTOptions$1): DeepgramSTT;
2574
3751
  private buildUrl;
3752
+ /** Open the streaming WebSocket and arm message + keepalive handlers. */
2575
3753
  connect(): Promise<void>;
2576
3754
  private openSocket;
2577
3755
  private clearKeepalive;
@@ -2580,11 +3758,31 @@ declare class DeepgramSTT {
2580
3758
  private emitError;
2581
3759
  private handleError;
2582
3760
  private handleClose;
3761
+ /** Send a binary audio chunk to Deepgram for transcription. */
2583
3762
  sendAudio(audio: Buffer): void;
2584
- onTranscript(callback: TranscriptCallback$1): void;
2585
- offTranscript(callback: TranscriptCallback$1): void;
2586
- onError(callback: ErrorCallback): void;
2587
- offError(callback: ErrorCallback): void;
3763
+ private audioSentCount;
3764
+ private audioDroppedCount;
3765
+ /** Register a transcript listener. */
3766
+ onTranscript(callback: TranscriptCallback$3): void;
3767
+ /** Remove a previously registered transcript listener. */
3768
+ offTranscript(callback: TranscriptCallback$3): void;
3769
+ /** Register an error listener for socket / API failures. */
3770
+ onError(callback: ErrorCallback$1): void;
3771
+ /** Remove a previously registered error listener. */
3772
+ offError(callback: ErrorCallback$1): void;
3773
+ /**
3774
+ * Force Deepgram to immediately emit a final ``Results`` frame for the
3775
+ * in-flight utterance, rather than waiting for its own endpoint
3776
+ * heuristic (utterance_end_ms ~1 s + natural-pause endpointing).
3777
+ * Called by the SDK on VAD ``speech_end`` and after barge-in cancel —
3778
+ * both moments where the SDK already knows the user has stopped
3779
+ * speaking and waiting for Deepgram's own endpointing only adds
3780
+ * dead air.
3781
+ *
3782
+ * Idempotent: safe to call when the socket is closed/closing.
3783
+ */
3784
+ finalize(): void;
3785
+ /** Send Finalize, briefly drain trailing transcripts, then close the socket. */
2588
3786
  close(): void;
2589
3787
  }
2590
3788
 
@@ -2605,7 +3803,7 @@ type DeepgramSTTOptions = DeepgramSTTOptions$1 & {
2605
3803
  * const stt = new deepgram.STT({ apiKey: "dg_...", endpointingMs: 80 });
2606
3804
  * ```
2607
3805
  */
2608
- declare class STT$5 extends DeepgramSTT {
3806
+ declare class STT$6 extends DeepgramSTT {
2609
3807
  static readonly providerKey = "deepgram";
2610
3808
  constructor(opts?: DeepgramSTTOptions);
2611
3809
  }
@@ -2616,13 +3814,16 @@ declare class STT$5 extends DeepgramSTT {
2616
3814
  * Buffers incoming PCM16 audio and periodically sends it to the
2617
3815
  * OpenAI Whisper transcription API as a WAV file.
2618
3816
  */
2619
- interface Transcript {
3817
+ /** Patter-normalised transcript event emitted by {@link WhisperSTT}. */
3818
+ interface Transcript$2 {
2620
3819
  readonly text: string;
2621
3820
  readonly isFinal: boolean;
2622
3821
  readonly confidence: number;
2623
3822
  }
2624
- type TranscriptCallback = (transcript: Transcript) => void;
3823
+ type TranscriptCallback$2 = (transcript: Transcript$2) => void;
3824
+ /** Response format requested from `POST /v1/audio/transcriptions`. */
2625
3825
  type WhisperResponseFormat = 'json' | 'verbose_json';
3826
+ /** Buffered STT adapter for OpenAI's Whisper transcription HTTP API. */
2626
3827
  declare class WhisperSTT {
2627
3828
  private readonly apiKey;
2628
3829
  private readonly model;
@@ -2649,7 +3850,9 @@ declare class WhisperSTT {
2649
3850
  constructor(apiKey: string, language?: string, model?: string, bufferSize?: number, responseFormat?: WhisperResponseFormat);
2650
3851
  /** Factory for Twilio calls — mulaw 8 kHz is transcoded upstream, so we still receive PCM 16-bit. */
2651
3852
  static forTwilio(apiKey: string, language?: string, model?: string): WhisperSTT;
3853
+ /** Reset the audio buffer and arm the adapter for incoming chunks. */
2652
3854
  connect(): Promise<void>;
3855
+ /** Buffer a PCM16 chunk; flushes to Whisper once `bufferSize` bytes are reached. */
2653
3856
  sendAudio(audio: Buffer): void;
2654
3857
  private flushChunks;
2655
3858
  private trackTranscription;
@@ -2658,14 +3861,17 @@ declare class WhisperSTT {
2658
3861
  * which capped at 10 and silently replaced the last one, we now keep all
2659
3862
  * registered callbacks in a Set; use {@link offTranscript} to remove one.
2660
3863
  */
2661
- onTranscript(callback: TranscriptCallback): void;
2662
- offTranscript(callback: TranscriptCallback): void;
3864
+ onTranscript(callback: TranscriptCallback$2): void;
3865
+ /** Remove a previously registered transcript listener. */
3866
+ offTranscript(callback: TranscriptCallback$2): void;
3867
+ /** Flush any buffered audio, await pending transcriptions, and clear listeners. */
2663
3868
  close(): Promise<void>;
2664
3869
  private transcribeBuffer;
2665
3870
  }
2666
3871
 
2667
3872
  /** OpenAI Whisper STT for Patter pipeline mode. */
2668
3873
 
3874
+ /** Constructor options for the Whisper `STT` adapter. */
2669
3875
  interface WhisperSTTOptions {
2670
3876
  /** API key. Falls back to OPENAI_API_KEY env var when omitted. */
2671
3877
  apiKey?: string;
@@ -2685,7 +3891,7 @@ interface WhisperSTTOptions {
2685
3891
  * const stt = new whisper.STT({ apiKey: "sk-...", language: "en" });
2686
3892
  * ```
2687
3893
  */
2688
- declare class STT$4 extends WhisperSTT {
3894
+ declare class STT$5 extends WhisperSTT {
2689
3895
  static readonly providerKey = "whisper";
2690
3896
  constructor(opts?: WhisperSTTOptions);
2691
3897
  }
@@ -2705,6 +3911,7 @@ declare class STT$4 extends WhisperSTT {
2705
3911
  * ``whisper-1``.
2706
3912
  */
2707
3913
 
3914
+ /** STT adapter restricted to OpenAI's GPT-4o Transcribe model family. */
2708
3915
  declare class OpenAITranscribeSTT extends WhisperSTT {
2709
3916
  /**
2710
3917
  * @param apiKey OpenAI API key.
@@ -2719,6 +3926,7 @@ declare class OpenAITranscribeSTT extends WhisperSTT {
2719
3926
 
2720
3927
  /** OpenAI GPT-4o Transcribe STT for Patter pipeline mode. */
2721
3928
 
3929
+ /** Constructor options for the OpenAI Transcribe `STT` adapter. */
2722
3930
  interface OpenAITranscribeSTTOptions {
2723
3931
  /** API key. Falls back to OPENAI_API_KEY env var when omitted. */
2724
3932
  apiKey?: string;
@@ -2742,13 +3950,14 @@ interface OpenAITranscribeSTTOptions {
2742
3950
  * const stt = new openaiTranscribe.STT({ apiKey: "sk-...", language: "en" });
2743
3951
  * ```
2744
3952
  */
2745
- declare class STT$3 extends OpenAITranscribeSTT {
3953
+ declare class STT$4 extends OpenAITranscribeSTT {
2746
3954
  static readonly providerKey = "openai_transcribe";
2747
3955
  constructor(opts?: OpenAITranscribeSTTOptions);
2748
3956
  }
2749
3957
 
2750
3958
  /** Cartesia streaming STT for Patter pipeline mode. */
2751
3959
 
3960
+ /** Constructor options for the Cartesia `STT` adapter. */
2752
3961
  interface CartesiaSTTOptions {
2753
3962
  /** API key. Falls back to CARTESIA_API_KEY env var when omitted. */
2754
3963
  apiKey?: string;
@@ -2768,13 +3977,14 @@ interface CartesiaSTTOptions {
2768
3977
  * const stt = new cartesia.STT({ apiKey: "..." });
2769
3978
  * ```
2770
3979
  */
2771
- declare class STT$2 extends CartesiaSTT {
3980
+ declare class STT$3 extends CartesiaSTT {
2772
3981
  static readonly providerKey = "cartesia_stt";
2773
3982
  constructor(opts?: CartesiaSTTOptions);
2774
3983
  }
2775
3984
 
2776
3985
  /** Soniox streaming STT for Patter pipeline mode. */
2777
3986
 
3987
+ /** Constructor options for the Soniox `STT` adapter. */
2778
3988
  interface SonioxSTTOptions {
2779
3989
  /** API key. Falls back to SONIOX_API_KEY env var when omitted. */
2780
3990
  apiKey?: string;
@@ -2799,13 +4009,14 @@ interface SonioxSTTOptions {
2799
4009
  * const stt = new soniox.STT({ apiKey: "..." });
2800
4010
  * ```
2801
4011
  */
2802
- declare class STT$1 extends SonioxSTT {
4012
+ declare class STT$2 extends SonioxSTT {
2803
4013
  static readonly providerKey = "soniox";
2804
4014
  constructor(opts?: SonioxSTTOptions);
2805
4015
  }
2806
4016
 
2807
4017
  /** AssemblyAI Universal Streaming STT for Patter pipeline mode. */
2808
4018
 
4019
+ /** Constructor options for the AssemblyAI `STT` adapter. */
2809
4020
  interface AssemblyAISTTOptions {
2810
4021
  /** API key. Falls back to ASSEMBLYAI_API_KEY env var when omitted. */
2811
4022
  apiKey?: string;
@@ -2814,6 +4025,17 @@ interface AssemblyAISTTOptions {
2814
4025
  sampleRate?: number;
2815
4026
  baseUrl?: string;
2816
4027
  languageDetection?: boolean;
4028
+ /**
4029
+ * BCP-47 language hint (e.g. ``"it"``, ``"en"``). AssemblyAI does NOT
4030
+ * expose a per-call language override — the language is determined by
4031
+ * the chosen ``model`` (English-only models reject non-English audio,
4032
+ * multilingual models auto-detect). This field is accepted for
4033
+ * cross-provider parity with ``DeepgramSTT``/``WhisperSTT``/
4034
+ * ``OpenAITranscribeSTT``/``CartesiaSTT`` but is currently a no-op:
4035
+ * pick a multilingual ``model`` (e.g. ``universal-streaming-pro``)
4036
+ * and the provider will detect Italian automatically.
4037
+ */
4038
+ language?: string;
2817
4039
  endOfTurnConfidenceThreshold?: number;
2818
4040
  minTurnSilence?: number;
2819
4041
  maxTurnSilence?: number;
@@ -2835,31 +4057,234 @@ interface AssemblyAISTTOptions {
2835
4057
  * const stt = new assemblyai.STT({ apiKey: "..." });
2836
4058
  * ```
2837
4059
  */
2838
- declare class STT extends AssemblyAISTT {
4060
+ declare class STT$1 extends AssemblyAISTT {
2839
4061
  static readonly providerKey = "assemblyai";
2840
4062
  constructor(opts?: AssemblyAISTTOptions);
2841
4063
  }
2842
4064
 
4065
+ /**
4066
+ * Speechmatics Speech-to-Text adapter for the Patter SDK pipeline mode.
4067
+ *
4068
+ * Streams PCM audio to the Speechmatics real-time WebSocket API
4069
+ * (`wss://eu.rt.speechmatics.com/v2`) and emits Patter-normalised
4070
+ * {@link Transcript} events. Mirrors `SpeechmaticsSTT` in the Python SDK.
4071
+ *
4072
+ * Divergence from Python: the Python adapter wraps the official
4073
+ * `speechmatics-voice` Python SDK (Voice Agent presets, smart turn
4074
+ * detection, etc.). No equivalent Node SDK is published, so this TypeScript
4075
+ * adapter speaks the underlying RT v2 wire protocol directly via `ws`.
4076
+ * The user-facing options (`turnDetectionMode`, `endOfUtteranceSilenceTrigger`,
4077
+ * `maxDelay`, `enablePartials`, `enableDiarization`, `additionalVocab`,
4078
+ * `operatingPoint`, `domain`, `outputLocale`, `language`, `sampleRate`) all
4079
+ * map 1:1 onto the Python adapter so callers can switch SDKs without
4080
+ * changing their config.
4081
+ */
4082
+ /** Patter-normalised transcript event emitted by {@link SpeechmaticsSTT}. */
4083
+ interface Transcript$1 {
4084
+ readonly text: string;
4085
+ readonly isFinal: boolean;
4086
+ readonly confidence: number;
4087
+ }
4088
+ type TranscriptCallback$1 = (transcript: Transcript$1) => void;
4089
+ type ErrorCallback = (error: Error) => void;
4090
+ /**
4091
+ * Endpoint / turn-detection handling mode. Mirrors the values accepted by
4092
+ * Python's `TurnDetectionMode`. Maps onto Speechmatics's
4093
+ * `conversation_config` knobs on the wire.
4094
+ */
4095
+ declare const TurnDetectionMode: {
4096
+ readonly EXTERNAL: "external";
4097
+ readonly FIXED: "fixed";
4098
+ readonly ADAPTIVE: "adaptive";
4099
+ readonly SMART_TURN: "smart_turn";
4100
+ };
4101
+ type TurnDetectionMode = (typeof TurnDetectionMode)[keyof typeof TurnDetectionMode];
4102
+ /** Common PCM sample rates for Speechmatics streaming input. */
4103
+ declare const SpeechmaticsSampleRate: {
4104
+ readonly HZ_8000: 8000;
4105
+ readonly HZ_16000: 16000;
4106
+ readonly HZ_44100: 44100;
4107
+ };
4108
+ type SpeechmaticsSampleRate = (typeof SpeechmaticsSampleRate)[keyof typeof SpeechmaticsSampleRate];
4109
+ /** Audio encodings accepted by Speechmatics's real-time API. */
4110
+ declare const SpeechmaticsAudioEncoding: {
4111
+ readonly PCM_S16LE: "pcm_s16le";
4112
+ };
4113
+ type SpeechmaticsAudioEncoding = (typeof SpeechmaticsAudioEncoding)[keyof typeof SpeechmaticsAudioEncoding];
4114
+ /** Speechmatics operating points (accuracy vs latency trade-off). */
4115
+ declare const SpeechmaticsOperatingPoint: {
4116
+ readonly ENHANCED: "enhanced";
4117
+ readonly STANDARD: "standard";
4118
+ };
4119
+ type SpeechmaticsOperatingPoint = (typeof SpeechmaticsOperatingPoint)[keyof typeof SpeechmaticsOperatingPoint];
4120
+ /** Speechmatics RT server-message type names emitted to the client. */
4121
+ declare const SpeechmaticsServerMessage: {
4122
+ readonly RECOGNITION_STARTED: "RecognitionStarted";
4123
+ readonly ADD_PARTIAL_TRANSCRIPT: "AddPartialTranscript";
4124
+ readonly ADD_TRANSCRIPT: "AddTranscript";
4125
+ readonly END_OF_UTTERANCE: "EndOfUtterance";
4126
+ readonly END_OF_TRANSCRIPT: "EndOfTranscript";
4127
+ readonly AUDIO_ADDED: "AudioAdded";
4128
+ readonly INFO: "Info";
4129
+ readonly WARNING: "Warning";
4130
+ readonly ERROR: "Error";
4131
+ };
4132
+ type SpeechmaticsServerMessage = (typeof SpeechmaticsServerMessage)[keyof typeof SpeechmaticsServerMessage];
4133
+ /** Constructor options for {@link SpeechmaticsSTT}. */
4134
+ interface SpeechmaticsSTTOptions$1 {
4135
+ /** Override the realtime endpoint (default `wss://eu.rt.speechmatics.com/v2`). */
4136
+ readonly baseUrl?: string;
4137
+ /** BCP-47 language code. Default `"en"`. */
4138
+ readonly language?: string;
4139
+ /** Endpoint / turn-detection mode. Default `"adaptive"`. */
4140
+ readonly turnDetectionMode?: TurnDetectionMode;
4141
+ /** PCM sample rate (Hz). Default 16000. */
4142
+ readonly sampleRate?: SpeechmaticsSampleRate | number;
4143
+ /** Attach speaker IDs to transcripts. Default `false`. */
4144
+ readonly enableDiarization?: boolean;
4145
+ /** Max latency in seconds before the engine emits finals. Range 0.7..4.0. */
4146
+ readonly maxDelay?: number;
4147
+ /** Silence (s) that triggers EOU. Range (0, 2). */
4148
+ readonly endOfUtteranceSilenceTrigger?: number;
4149
+ /** Max EOU delay (s); must exceed `endOfUtteranceSilenceTrigger`. */
4150
+ readonly endOfUtteranceMaxDelay?: number;
4151
+ /** Include partial transcripts in interim output. Default `true`. */
4152
+ readonly includePartials?: boolean;
4153
+ /** Additional vocabulary entries (`{content, sounds_like?}`). */
4154
+ readonly additionalVocab?: ReadonlyArray<Record<string, unknown>>;
4155
+ /** Operating point (`enhanced` | `standard`). */
4156
+ readonly operatingPoint?: SpeechmaticsOperatingPoint;
4157
+ /** Optional Speechmatics domain (e.g. `"finance"`). */
4158
+ readonly domain?: string;
4159
+ /** Optional output locale (e.g. `"en-GB"`). */
4160
+ readonly outputLocale?: string;
4161
+ }
4162
+ /**
4163
+ * Streaming STT adapter for Speechmatics's RT v2 WebSocket API.
4164
+ *
4165
+ * @example
4166
+ * ```ts
4167
+ * const stt = new SpeechmaticsSTT('sm_api_key', { language: 'en' });
4168
+ * stt.onTranscript((t) => console.log(t.text, t.isFinal));
4169
+ * await stt.connect();
4170
+ * stt.sendAudio(pcm16Chunk);
4171
+ * stt.close();
4172
+ * ```
4173
+ */
4174
+ declare class SpeechmaticsSTT {
4175
+ private ws;
4176
+ private readonly transcriptCallbacks;
4177
+ private readonly errorCallbacks;
4178
+ private running;
4179
+ /** Sequence number of the last audio chunk acknowledged via `AudioAdded`. */
4180
+ private lastSeqNo;
4181
+ private readonly apiKey;
4182
+ private readonly baseUrl;
4183
+ private readonly language;
4184
+ private readonly turnDetectionMode;
4185
+ private readonly sampleRate;
4186
+ private readonly enableDiarization;
4187
+ private readonly maxDelay;
4188
+ private readonly endOfUtteranceSilenceTrigger;
4189
+ private readonly endOfUtteranceMaxDelay;
4190
+ private readonly includePartials;
4191
+ private readonly additionalVocab;
4192
+ private readonly operatingPoint;
4193
+ private readonly domain;
4194
+ private readonly outputLocale;
4195
+ constructor(apiKey: string, options?: SpeechmaticsSTTOptions$1);
4196
+ /** Build the JSON `StartRecognition` payload sent on connect. */
4197
+ private buildStartRecognition;
4198
+ /** Open the streaming WebSocket and send the `StartRecognition` frame. */
4199
+ connect(): Promise<void>;
4200
+ /** Send a binary PCM16-LE audio chunk to Speechmatics for transcription. */
4201
+ sendAudio(audio: Buffer): void;
4202
+ /** Register a transcript listener. */
4203
+ onTranscript(callback: TranscriptCallback$1): void;
4204
+ /** Remove a previously registered transcript listener. */
4205
+ offTranscript(callback: TranscriptCallback$1): void;
4206
+ /** Register an error listener for socket / API failures. */
4207
+ onError(callback: ErrorCallback): void;
4208
+ /** Remove a previously registered error listener. */
4209
+ offError(callback: ErrorCallback): void;
4210
+ private handleMessage;
4211
+ /** Translate a Speechmatics transcript message into a Patter `Transcript`. */
4212
+ private toTranscript;
4213
+ private emitTranscript;
4214
+ private emitError;
4215
+ private handleError;
4216
+ private handleClose;
4217
+ /** Send `EndOfStream` and close the WebSocket. Idempotent. */
4218
+ close(): void;
4219
+ }
4220
+
4221
+ /** Speechmatics streaming STT for Patter pipeline mode. */
4222
+
4223
+ type SpeechmaticsSTTOptions = SpeechmaticsSTTOptions$1 & {
4224
+ /** API key. Falls back to SPEECHMATICS_API_KEY env var when omitted. */
4225
+ apiKey?: string;
4226
+ };
4227
+ /**
4228
+ * Speechmatics streaming STT.
4229
+ *
4230
+ * @example
4231
+ * ```ts
4232
+ * import * as speechmatics from "getpatter/stt/speechmatics";
4233
+ * const stt = new speechmatics.STT(); // reads SPEECHMATICS_API_KEY
4234
+ * const stt = new speechmatics.STT({ apiKey: "sm_...", language: "en" });
4235
+ * ```
4236
+ */
4237
+ declare class STT extends SpeechmaticsSTT {
4238
+ static readonly providerKey = "speechmatics";
4239
+ constructor(opts?: SpeechmaticsSTTOptions);
4240
+ }
4241
+
2843
4242
  /**
2844
4243
  * Known stable ElevenLabs voice models (from the official ElevenLabs API
2845
- * reference). Provided as a string-literal union for autocomplete + type
2846
- * narrowing; the public ``modelId`` option also accepts ``string`` so
2847
- * users can pass forward-compat IDs we haven't enumerated yet.
2848
- *
2849
- * - ``eleven_v3`` — newest, highest quality (slower TTFT than Flash).
2850
- * - ``eleven_flash_v2_5``current default, fastest (~75 ms TTFT).
2851
- * - ``eleven_turbo_v2_5``balanced quality/speed.
2852
- * - ``eleven_multilingual_v2``best multilingual support.
2853
- * - ``eleven_monolingual_v1``legacy English-only.
2854
- */
2855
- type ElevenLabsModel = 'eleven_v3' | 'eleven_flash_v2_5' | 'eleven_turbo_v2_5' | 'eleven_multilingual_v2' | 'eleven_monolingual_v1';
2856
- type ElevenLabsOutputFormat = 'mp3_22050_32' | 'mp3_44100_32' | 'mp3_44100_64' | 'mp3_44100_96' | 'mp3_44100_128' | 'mp3_44100_192' | 'pcm_8000' | 'pcm_16000' | 'pcm_22050' | 'pcm_24000' | 'pcm_44100' | 'ulaw_8000';
4244
+ * reference). Exposed as a typed `as const` object so callers can pass
4245
+ * `ElevenLabsModel.FLASH_V2_5` and get autocomplete / static checking; the
4246
+ * public `modelId` option also accepts an arbitrary `string` so users can
4247
+ * pass forward-compat IDs we haven't enumerated yet.
4248
+ *
4249
+ * - `V3`newest, highest quality (slower TTFT than Flash).
4250
+ * - `FLASH_V2_5`current default, fastest (~75 ms TTFT).
4251
+ * - `TURBO_V2_5`balanced quality/speed.
4252
+ * - `MULTILINGUAL_V2`best multilingual support.
4253
+ * - `MONOLINGUAL_V1` — legacy English-only.
4254
+ */
4255
+ declare const ElevenLabsModel: {
4256
+ readonly V3: "eleven_v3";
4257
+ readonly FLASH_V2_5: "eleven_flash_v2_5";
4258
+ readonly TURBO_V2_5: "eleven_turbo_v2_5";
4259
+ readonly MULTILINGUAL_V2: "eleven_multilingual_v2";
4260
+ readonly MONOLINGUAL_V1: "eleven_monolingual_v1";
4261
+ };
4262
+ /** Union of {@link ElevenLabsModel} string values. */
4263
+ type ElevenLabsModel = (typeof ElevenLabsModel)[keyof typeof ElevenLabsModel];
4264
+ declare const ElevenLabsOutputFormat: {
4265
+ readonly MP3_22050_32: "mp3_22050_32";
4266
+ readonly MP3_44100_32: "mp3_44100_32";
4267
+ readonly MP3_44100_64: "mp3_44100_64";
4268
+ readonly MP3_44100_96: "mp3_44100_96";
4269
+ readonly MP3_44100_128: "mp3_44100_128";
4270
+ readonly MP3_44100_192: "mp3_44100_192";
4271
+ readonly PCM_8000: "pcm_8000";
4272
+ readonly PCM_16000: "pcm_16000";
4273
+ readonly PCM_22050: "pcm_22050";
4274
+ readonly PCM_24000: "pcm_24000";
4275
+ readonly PCM_44100: "pcm_44100";
4276
+ readonly ULAW_8000: "ulaw_8000";
4277
+ };
4278
+ /** Union of {@link ElevenLabsOutputFormat} string values. */
4279
+ type ElevenLabsOutputFormat = (typeof ElevenLabsOutputFormat)[keyof typeof ElevenLabsOutputFormat];
4280
+ /** ElevenLabs voice tuning knobs forwarded as `voice_settings` in the request. */
2857
4281
  interface ElevenLabsVoiceSettings {
2858
4282
  stability?: number;
2859
4283
  similarity_boost?: number;
2860
4284
  style?: number;
2861
4285
  use_speaker_boost?: boolean;
2862
4286
  }
4287
+ /** Constructor options for {@link ElevenLabsTTS}. */
2863
4288
  interface ElevenLabsTTSOptions$1 {
2864
4289
  voiceId?: string;
2865
4290
  /**
@@ -2951,16 +4376,25 @@ declare class ElevenLabsTTS {
2951
4376
 
2952
4377
  /** ElevenLabs TTS for Patter pipeline mode. */
2953
4378
 
4379
+ /** Constructor options for the ElevenLabs `TTS` adapter. */
2954
4380
  interface ElevenLabsTTSOptions {
2955
4381
  /** API key. Falls back to ELEVENLABS_API_KEY env var when omitted. */
2956
- apiKey?: string;
2957
- voiceId?: string;
4382
+ readonly apiKey?: string;
4383
+ readonly voiceId?: string;
2958
4384
  /**
2959
4385
  * ElevenLabs voice model ID. Default is ``eleven_flash_v2_5`` (lowest TTFT).
2960
4386
  * Pass ``eleven_v3`` for highest quality, or any string for forward-compat.
2961
4387
  */
2962
- modelId?: ElevenLabsModel | string;
2963
- outputFormat?: string;
4388
+ readonly modelId?: ElevenLabsModel | string;
4389
+ readonly outputFormat?: string;
4390
+ /**
4391
+ * BCP-47 language code (e.g. `"it"`, `"es"`). Forwarded to ElevenLabs as
4392
+ * the `language_code` request body field — required for multilingual /
4393
+ * Flash v2.5 voices to render the right accent.
4394
+ */
4395
+ readonly languageCode?: string;
4396
+ /** ElevenLabs `voice_settings` object (stability, similarity_boost, …). */
4397
+ readonly voiceSettings?: Record<string, unknown>;
2964
4398
  }
2965
4399
  /** Options for the carrier-specific factories — same as the constructor minus `outputFormat`. */
2966
4400
  type ElevenLabsCarrierOptions = Omit<ElevenLabsTTSOptions, "outputFormat">;
@@ -2979,17 +4413,180 @@ type ElevenLabsCarrierOptions = Omit<ElevenLabsTTSOptions, "outputFormat">;
2979
4413
  * 16 kHz, native Telnyx default) on phone calls to skip the SDK-side
2980
4414
  * resampling / transcoding step.
2981
4415
  */
2982
- declare class TTS$4 extends ElevenLabsTTS {
4416
+ declare class TTS$6 extends ElevenLabsTTS {
2983
4417
  static readonly providerKey = "elevenlabs";
2984
4418
  constructor(opts?: ElevenLabsTTSOptions);
2985
4419
  /** Pipeline TTS pre-configured for Twilio Media Streams (`ulaw_8000`). */
2986
- static forTwilio(opts?: ElevenLabsCarrierOptions): TTS$4;
2987
- static forTwilio(apiKey: string, options?: Omit<ElevenLabsTTSOptions, "outputFormat">): TTS$4;
4420
+ static forTwilio(opts?: ElevenLabsCarrierOptions): TTS$6;
4421
+ static forTwilio(apiKey: string, options?: Omit<ElevenLabsTTSOptions, "outputFormat">): TTS$6;
2988
4422
  /** Pipeline TTS pre-configured for Telnyx (`pcm_16000`). */
2989
- static forTelnyx(opts?: ElevenLabsCarrierOptions): TTS$4;
2990
- static forTelnyx(apiKey: string, options?: Omit<ElevenLabsTTSOptions, "outputFormat">): TTS$4;
4423
+ static forTelnyx(opts?: ElevenLabsCarrierOptions): TTS$6;
4424
+ static forTelnyx(apiKey: string, options?: Omit<ElevenLabsTTSOptions, "outputFormat">): TTS$6;
4425
+ }
4426
+
4427
+ /**
4428
+ * WebSocket-based ElevenLabs TTS provider — opt-in low-latency variant.
4429
+ *
4430
+ * Targets the ElevenLabs streaming-input WebSocket endpoint
4431
+ * (`/v1/text-to-speech/{voice_id}/stream-input`) instead of the HTTP
4432
+ * `/stream` endpoint used by `ElevenLabsTTS`. Saves the HTTP request setup
4433
+ * time per utterance (~50 ms) and avoids the HTTP cold-start TLS handshake
4434
+ * when calls are bursty.
4435
+ *
4436
+ * API matches `ElevenLabsTTS` (`synthesizeStream(text)` returns an
4437
+ * `AsyncGenerator<Buffer>`) so it can be passed anywhere a TTSAdapter is
4438
+ * expected.
4439
+ *
4440
+ * Behaviour notes
4441
+ * - WebSocket is opened **per-utterance** (matches HTTP semantics). A
4442
+ * future revision may pool a WS across utterances of the same call
4443
+ * session — see roadmap Phase 5b.
4444
+ * - `auto_mode=true` is enabled by default. Pass `autoMode: false` to
4445
+ * send a custom `chunk_length_schedule`.
4446
+ * - `outputFormat` is exposed as a query parameter so `ulaw_8000` (Twilio
4447
+ * native) and `pcm_16000` (Telnyx native) work without resampling.
4448
+ * - `eleven_v3` is **not** supported — the WS endpoint rejects it.
4449
+ * - `optimize_streaming_latency` is officially deprecated and is not
4450
+ * exposed.
4451
+ */
4452
+
4453
+ /** Constructor options for {@link ElevenLabsWebSocketTTS}. */
4454
+ interface ElevenLabsWebSocketTTSOptions {
4455
+ apiKey: string;
4456
+ voiceId?: string;
4457
+ modelId?: ElevenLabsModel | string;
4458
+ outputFormat?: string;
4459
+ voiceSettings?: Record<string, unknown>;
4460
+ languageCode?: string;
4461
+ /** Let the server pick chunk timing. Default true. */
4462
+ autoMode?: boolean;
4463
+ /** WS keep-alive timeout in seconds (5–180). Default 60. */
4464
+ inactivityTimeout?: number;
4465
+ /**
4466
+ * Manual chunk schedule, only used when ``autoMode: false``. Each value
4467
+ * must be 5–500. ElevenLabs default is ``[120, 160, 250, 290]``.
4468
+ */
4469
+ chunkLengthSchedule?: number[];
4470
+ /** Outgoing audio re-chunk size in bytes. Default 4096. */
4471
+ chunkSize?: number;
4472
+ }
4473
+ /** WebSocket-based ElevenLabs TTS adapter — opt-in low-latency variant. */
4474
+ declare class ElevenLabsWebSocketTTS implements TTSAdapter {
4475
+ static readonly providerKey = "elevenlabs_ws";
4476
+ readonly apiKey: string;
4477
+ readonly voiceId: string;
4478
+ readonly modelId: string;
4479
+ readonly voiceSettings?: Record<string, unknown>;
4480
+ readonly languageCode?: string;
4481
+ readonly autoMode: boolean;
4482
+ readonly inactivityTimeout: number;
4483
+ readonly chunkLengthSchedule?: number[];
4484
+ readonly chunkSize: number;
4485
+ /**
4486
+ * The wire format requested over the ElevenLabs WS. Initially set from
4487
+ * the constructor; ``setTelephonyCarrier`` may auto-flip it to the
4488
+ * carrier's native codec when the caller did NOT pass ``outputFormat``
4489
+ * explicitly.
4490
+ */
4491
+ private _outputFormat;
4492
+ private readonly _outputFormatExplicit;
4493
+ /** Public read-only view of the (possibly auto-flipped) wire format. */
4494
+ get outputFormat(): string;
4495
+ constructor(opts: ElevenLabsWebSocketTTSOptions);
4496
+ /**
4497
+ * Hook called by ``StreamHandler`` to advise the carrier wire format.
4498
+ *
4499
+ * When the user did NOT pass an explicit ``outputFormat`` in the
4500
+ * constructor options, this flips the format to the carrier's native
4501
+ * wire codec — saving a client-side transcode step. Calling with an
4502
+ * unknown carrier (``""`` / ``"custom"``) is a no-op.
4503
+ *
4504
+ * When ``outputFormat`` was explicitly passed (incl. via the
4505
+ * ``forTwilio`` / ``forTelnyx`` factories), this method is a no-op —
4506
+ * the user's choice always wins.
4507
+ */
4508
+ setTelephonyCarrier(carrier: string): void;
4509
+ /** Pre-configured for Twilio Media Streams (`ulaw_8000`). */
4510
+ static forTwilio(opts: Omit<ElevenLabsWebSocketTTSOptions, 'outputFormat'>): ElevenLabsWebSocketTTS;
4511
+ /** Pre-configured for Telnyx (`pcm_16000`). */
4512
+ static forTelnyx(opts: Omit<ElevenLabsWebSocketTTSOptions, 'outputFormat'>): ElevenLabsWebSocketTTS;
4513
+ private buildUrl;
4514
+ /**
4515
+ * Single-shot synthesis: open WS, send text, yield bytes, close.
4516
+ *
4517
+ * Resilience contract:
4518
+ * - Connection bounded by ``CONNECT_TIMEOUT_MS`` (5s, was 15s).
4519
+ * - Each idle wait bounded by ``FRAME_TIMEOUT_MS`` (30s) so a stalled
4520
+ * server cannot keep the generator alive indefinitely.
4521
+ * - Permanent error handler attached BEFORE the open await — prevents
4522
+ * ``uncaughtException`` if an error fires after the once-listener
4523
+ * resolves.
4524
+ * - All event listeners removed in ``finally`` (no closure leak past
4525
+ * socket close).
4526
+ * - Server-reported ``error`` raises ``ElevenLabsTTSError``.
4527
+ * - Per-frame audio payload capped at ``MAX_AUDIO_B64_BYTES``.
4528
+ * - Best-effort EOS ``{"text":""}`` sent in finally (not immediately
4529
+ * after flush — auto_mode could otherwise truncate the tail audio).
4530
+ */
4531
+ synthesizeStream(text: string): AsyncGenerator<Buffer>;
4532
+ /** No-op — connections are per-utterance and torn down inside synthesizeStream. */
4533
+ close(): Promise<void>;
2991
4534
  }
2992
4535
 
4536
+ /** ElevenLabs WebSocket TTS for Patter pipeline mode (opt-in low-latency). */
4537
+
4538
+ /** Constructor options for the ElevenLabs WebSocket `TTS` adapter. */
4539
+ interface ElevenLabsWebSocketOptions {
4540
+ /** API key. Falls back to ELEVENLABS_API_KEY env var when omitted. */
4541
+ apiKey?: string;
4542
+ voiceId?: string;
4543
+ modelId?: ElevenLabsModel | string;
4544
+ outputFormat?: string;
4545
+ /** Let the server pick chunk timing. Default true. */
4546
+ autoMode?: boolean;
4547
+ voiceSettings?: Record<string, unknown>;
4548
+ languageCode?: string;
4549
+ /** WS keep-alive timeout in seconds (5–180). Default 60. */
4550
+ inactivityTimeout?: number;
4551
+ /** Manual chunk schedule, only used when ``autoMode: false``. */
4552
+ chunkLengthSchedule?: number[];
4553
+ }
4554
+ /** Options for the carrier-specific factories — same as the constructor minus `outputFormat`. */
4555
+ type ElevenLabsWebSocketCarrierOptions = Omit<ElevenLabsWebSocketOptions, 'outputFormat'>;
4556
+ /**
4557
+ * ElevenLabs streaming TTS over WebSocket.
4558
+ *
4559
+ * Drop-in replacement for `getpatter/tts/elevenlabs.TTS` (HTTP) using the
4560
+ * `stream-input` WebSocket endpoint. Saves the per-utterance HTTP request
4561
+ * setup time; otherwise behaves identically.
4562
+ *
4563
+ * @example
4564
+ * ```ts
4565
+ * import * as elevenlabsWs from "getpatter/tts/elevenlabs-ws";
4566
+ * const tts = new elevenlabsWs.TTS(); // reads ELEVENLABS_API_KEY
4567
+ * const tts = elevenlabsWs.TTS.forTwilio({ apiKey: "..." });
4568
+ * ```
4569
+ *
4570
+ * **Telephony optimisation** — use {@link TTS.forTwilio} (μ-law @ 8 kHz)
4571
+ * or {@link TTS.forTelnyx} (PCM @ 16 kHz) on phone calls.
4572
+ */
4573
+ declare class TTS$5 extends ElevenLabsWebSocketTTS {
4574
+ static readonly providerKey = "elevenlabs_ws";
4575
+ constructor(opts?: ElevenLabsWebSocketOptions);
4576
+ /** WebSocket TTS pre-configured for Twilio Media Streams (`ulaw_8000`). */
4577
+ static forTwilio(opts?: ElevenLabsWebSocketCarrierOptions): TTS$5;
4578
+ /** WebSocket TTS pre-configured for Telnyx (`pcm_16000`). */
4579
+ static forTelnyx(opts?: ElevenLabsWebSocketCarrierOptions): TTS$5;
4580
+ }
4581
+
4582
+ /**
4583
+ * OpenAI TTS adapter for Patter — HTTP `/v1/audio/speech` endpoint.
4584
+ *
4585
+ * Wraps `gpt-4o-mini-tts` (and legacy `tts-1*`) and ships a stateful
4586
+ * 24 kHz → 16/8 kHz resampler with anti-alias LPF so the output drops
4587
+ * directly into the telephony pipeline. See {@link OpenAITTS}.
4588
+ */
4589
+ /** OpenAI TTS adapter with built-in streaming resample to 16/8 kHz. */
2993
4590
  declare class OpenAITTS {
2994
4591
  private readonly apiKey;
2995
4592
  private readonly voice;
@@ -2997,7 +4594,8 @@ declare class OpenAITTS {
2997
4594
  private readonly instructions;
2998
4595
  private readonly speed;
2999
4596
  private readonly antiAlias;
3000
- constructor(apiKey: string, voice?: string, model?: string, instructions?: string | null, speed?: number | null, antiAlias?: boolean);
4597
+ private readonly targetSampleRate;
4598
+ constructor(apiKey: string, voice?: string, model?: string, instructions?: string | null, speed?: number | null, antiAlias?: boolean, targetSampleRate?: number);
3001
4599
  /**
3002
4600
  * Synthesise text to speech and return the full audio as a single Buffer.
3003
4601
  *
@@ -3017,29 +4615,36 @@ declare class OpenAITTS {
3017
4615
  */
3018
4616
  synthesizeStream(text: string): AsyncGenerator<Buffer>;
3019
4617
  /**
3020
- * Streaming 24 kHz → 16 kHz resampler (PCM16-LE). Applies a single-pole
3021
- * lowpass ahead of the 3:2 decimation and carries filter + sample state
3022
- * across chunks so the cadence doesn't reset at every network read.
4618
+ * Streaming 24 kHz → {16, 8} kHz resampler (PCM16-LE). Applies a single-pole
4619
+ * lowpass ahead of the decimation and carries filter + sample state across
4620
+ * chunks so the cadence doesn't reset at every network read.
4621
+ *
4622
+ * Output rate is selected by ``ctx.targetSampleRate``:
4623
+ * 16000 → 3:2 decimation (sample 0 + mid(1,2)) [default]
4624
+ * 8000 → 3:1 decimation (sample 0 only) [fix #46]
3023
4625
  *
3024
- * ``ctx.lpfEnabled`` (default true on the streaming path, false for the
3025
- * legacy static helper) controls whether the LPF is engaged — we keep
3026
- * the helper bit-exact for the downsample-only tests while the real
3027
- * streaming path gets anti-alias filtering.
4626
+ * ``ctx.lpfEnabled`` controls whether the LPF is engaged kept disabled
4627
+ * for the legacy static helper so the bit-exact downsample-only tests
4628
+ * remain valid; the real streaming path always engages it.
3028
4629
  */
3029
4630
  static resampleStreaming(audio: Buffer, ctx: ResampleCtx): Buffer;
3030
4631
  /** @deprecated use {@link resampleStreaming} with persistent state. */
3031
4632
  static resample24kTo16k(audio: Buffer): Buffer;
3032
4633
  }
4634
+ /** Streaming-resample state passed across calls to {@link OpenAITTS.resampleStreaming}. */
3033
4635
  interface ResampleCtx {
3034
4636
  carryByte: number | null;
3035
4637
  leftover: number[];
3036
4638
  lpfPrev: number;
3037
4639
  /** Enable the single-pole lowpass ahead of decimation. Default true. */
3038
4640
  lpfEnabled?: boolean;
4641
+ /** Final output sample rate. 16000 = 3:2 decimation, 8000 = 3:1. */
4642
+ targetSampleRate?: number;
3039
4643
  }
3040
4644
 
3041
4645
  /** OpenAI TTS for Patter pipeline mode. */
3042
4646
 
4647
+ /** Constructor options for the OpenAI `TTS` adapter. */
3043
4648
  interface OpenAITTSOptions {
3044
4649
  /** API key. Falls back to OPENAI_API_KEY env var when omitted. */
3045
4650
  apiKey?: string;
@@ -3066,22 +4671,70 @@ interface OpenAITTSOptions {
3066
4671
  * const tts = new openai.TTS({ apiKey: "sk-...", voice: "alloy" });
3067
4672
  * ```
3068
4673
  */
3069
- declare class TTS$3 extends OpenAITTS {
4674
+ declare class TTS$4 extends OpenAITTS {
3070
4675
  static readonly providerKey = "openai_tts";
3071
4676
  constructor(opts?: OpenAITTSOptions);
3072
4677
  }
3073
4678
 
4679
+ /**
4680
+ * Cartesia TTS provider — HTTP `/tts/bytes` endpoint.
4681
+ *
4682
+ * Cartesia also offers a WebSocket streaming mode with word timestamps;
4683
+ * this provider focuses on the chunked-bytes HTTP API which maps cleanly
4684
+ * onto Patter's `synthesize(text)` contract and keeps the provider
4685
+ * dependency-free (just `fetch`).
4686
+ *
4687
+ * Default model is `sonic-3` (GA snapshot `sonic-3-2026-01-12`) — Cartesia's
4688
+ * current GA model with a documented ~90 ms TTFB target. Voice IDs from the
4689
+ * sonic-2 generation (including the default Katie voice) remain compatible.
4690
+ *
4691
+ * **Telephony optimization** — the constructor default
4692
+ * `sampleRate=16000` is correct for web playback, dashboard previews, and
4693
+ * 16 kHz pipelines. For real phone calls, use the carrier-specific
4694
+ * factories instead:
4695
+ *
4696
+ * - {@link CartesiaTTS.forTwilio} requests `sampleRate=8000` natively from
4697
+ * Cartesia. Twilio's media-stream WebSocket expects μ-law @ 8 kHz, so
4698
+ * the SDK normally resamples 16 kHz → 8 kHz before doing the PCM →
4699
+ * μ-law transcode in `TwilioAudioSender`. Asking Cartesia for 8 kHz
4700
+ * PCM at the source skips the resample step (saves ~10–30 ms first-
4701
+ * byte plus per-frame CPU and removes a potential aliasing source).
4702
+ * The PCM → μ-law transcode still happens client-side.
4703
+ * - {@link CartesiaTTS.forTelnyx} requests `sampleRate=16000`. Telnyx
4704
+ * negotiates L16/16000 on its bidirectional media WebSocket, so
4705
+ * 16 kHz PCM is already the format used end-to-end and no
4706
+ * transcoding happens. This is the same as the bare-constructor
4707
+ * default and exists for API symmetry with the Twilio factory.
4708
+ */
4709
+ /** Known Cartesia TTS models. */
4710
+ declare const CartesiaTTSModel: {
4711
+ readonly SONIC_3: "sonic-3";
4712
+ readonly SONIC_2: "sonic-2";
4713
+ readonly SONIC: "sonic";
4714
+ };
4715
+ type CartesiaTTSModel = (typeof CartesiaTTSModel)[keyof typeof CartesiaTTSModel];
4716
+ /** Common PCM sample rates accepted by the Cartesia bytes endpoint. */
4717
+ declare const CartesiaTTSSampleRate: {
4718
+ readonly HZ_8000: 8000;
4719
+ readonly HZ_16000: 16000;
4720
+ readonly HZ_22050: 22050;
4721
+ readonly HZ_24000: 24000;
4722
+ readonly HZ_44100: 44100;
4723
+ };
4724
+ type CartesiaTTSSampleRate = (typeof CartesiaTTSSampleRate)[keyof typeof CartesiaTTSSampleRate];
4725
+ /** Constructor options for {@link CartesiaTTS}. */
3074
4726
  interface CartesiaTTSOptions$1 {
3075
- model?: string;
4727
+ model?: CartesiaTTSModel | string;
3076
4728
  voice?: string;
3077
4729
  language?: string;
3078
- sampleRate?: number;
4730
+ sampleRate?: CartesiaTTSSampleRate | number;
3079
4731
  speed?: string | number;
3080
4732
  emotion?: string | string[];
3081
4733
  volume?: number;
3082
4734
  baseUrl?: string;
3083
4735
  apiVersion?: string;
3084
4736
  }
4737
+ /** Cartesia TTS provider backed by the HTTP `/tts/bytes` streaming endpoint. */
3085
4738
  declare class CartesiaTTS {
3086
4739
  private readonly apiKey;
3087
4740
  private readonly model;
@@ -3126,6 +4779,7 @@ declare class CartesiaTTS {
3126
4779
 
3127
4780
  /** Cartesia TTS for Patter pipeline mode. */
3128
4781
 
4782
+ /** Constructor options for the Cartesia `TTS` adapter. */
3129
4783
  interface CartesiaTTSOptions {
3130
4784
  /** API key. Falls back to CARTESIA_API_KEY env var when omitted. */
3131
4785
  apiKey?: string;
@@ -3160,17 +4814,18 @@ type CartesiaCarrierOptions = Omit<CartesiaTTSOptions, "sampleRate">;
3160
4814
  * or {@link TTS.forTelnyx} (PCM @ 16 kHz, native Telnyx default) on
3161
4815
  * phone calls.
3162
4816
  */
3163
- declare class TTS$2 extends CartesiaTTS {
4817
+ declare class TTS$3 extends CartesiaTTS {
3164
4818
  static readonly providerKey = "cartesia_tts";
3165
4819
  constructor(opts?: CartesiaTTSOptions);
3166
4820
  /** Pipeline TTS pre-configured for Twilio Media Streams (PCM @ 8 kHz). */
3167
- static forTwilio(opts?: CartesiaCarrierOptions): TTS$2;
3168
- static forTwilio(apiKey: string, options?: Omit<CartesiaTTSOptions, "sampleRate">): TTS$2;
4821
+ static forTwilio(opts?: CartesiaCarrierOptions): TTS$3;
4822
+ static forTwilio(apiKey: string, options?: Omit<CartesiaTTSOptions, "sampleRate">): TTS$3;
3169
4823
  /** Pipeline TTS pre-configured for Telnyx (PCM @ 16 kHz). */
3170
- static forTelnyx(opts?: CartesiaCarrierOptions): TTS$2;
3171
- static forTelnyx(apiKey: string, options?: Omit<CartesiaTTSOptions, "sampleRate">): TTS$2;
4824
+ static forTelnyx(opts?: CartesiaCarrierOptions): TTS$3;
4825
+ static forTelnyx(apiKey: string, options?: Omit<CartesiaTTSOptions, "sampleRate">): TTS$3;
3172
4826
  }
3173
4827
 
4828
+ /** Constructor options for {@link RimeTTS}. */
3174
4829
  interface RimeTTSOptions$1 {
3175
4830
  model?: string;
3176
4831
  speaker?: string;
@@ -3186,6 +4841,7 @@ interface RimeTTSOptions$1 {
3186
4841
  phonemizeBetweenBrackets?: boolean;
3187
4842
  baseUrl?: string;
3188
4843
  }
4844
+ /** Rime TTS adapter for the `users.rime.ai/v1/rime-tts` HTTP streaming endpoint. */
3189
4845
  declare class RimeTTS {
3190
4846
  private readonly apiKey;
3191
4847
  private readonly model;
@@ -3204,6 +4860,7 @@ declare class RimeTTS {
3204
4860
  private readonly totalTimeoutMs;
3205
4861
  constructor(apiKey: string, opts?: RimeTTSOptions$1);
3206
4862
  private buildPayload;
4863
+ /** Synthesize text and return the concatenated audio buffer. */
3207
4864
  synthesize(text: string): Promise<Buffer>;
3208
4865
  /**
3209
4866
  * Synthesize text and yield raw PCM_S16LE chunks at the configured
@@ -3214,6 +4871,7 @@ declare class RimeTTS {
3214
4871
 
3215
4872
  /** Rime TTS for Patter pipeline mode. */
3216
4873
 
4874
+ /** Constructor options for the Rime `TTS` adapter. */
3217
4875
  interface RimeTTSOptions {
3218
4876
  /** API key. Falls back to RIME_API_KEY env var when omitted. */
3219
4877
  apiKey?: string;
@@ -3241,13 +4899,14 @@ interface RimeTTSOptions {
3241
4899
  * const tts = new rime.TTS({ apiKey: "...", speaker: "astra" });
3242
4900
  * ```
3243
4901
  */
3244
- declare class TTS$1 extends RimeTTS {
4902
+ declare class TTS$2 extends RimeTTS {
3245
4903
  static readonly providerKey = "rime";
3246
4904
  constructor(opts?: RimeTTSOptions);
3247
4905
  }
3248
4906
 
3249
4907
  /** LMNT TTS for Patter pipeline mode. */
3250
4908
 
4909
+ /** Constructor options for the LMNT `TTS` adapter. */
3251
4910
  interface LMNTTTSOptions {
3252
4911
  /** API key. Falls back to LMNT_API_KEY env var when omitted. */
3253
4912
  apiKey?: string;
@@ -3270,13 +4929,135 @@ interface LMNTTTSOptions {
3270
4929
  * const tts = new lmnt.TTS({ apiKey: "...", voice: "leah" });
3271
4930
  * ```
3272
4931
  */
3273
- declare class TTS extends LMNTTTS {
4932
+ declare class TTS$1 extends LMNTTTS {
3274
4933
  static readonly providerKey = "lmnt";
3275
4934
  constructor(opts?: LMNTTTSOptions);
3276
4935
  }
3277
4936
 
4937
+ /**
4938
+ * Inworld TTS provider — HTTP NDJSON streaming endpoint.
4939
+ *
4940
+ * Calls `POST https://api.inworld.ai/tts/v1/voice:stream`. The response is
4941
+ * NDJSON: one JSON object per line of the form
4942
+ * `{"result": {"audioContent": "<base64-PCM_S16LE>", "timestampInfo": ...}}`
4943
+ *
4944
+ * The default config requests `audioEncoding=PCM` at 16 kHz so the output drops
4945
+ * straight into the Patter pipeline without transcoding. Inworld TTS-2 is the
4946
+ * default model — pass `model: "inworld-tts-1.5-max"` for the prior generation.
4947
+ */
4948
+ /** Inworld TTS model families. */
4949
+ declare const InworldModel: {
4950
+ readonly TTS_2: "inworld-tts-2";
4951
+ readonly TTS_1_5_MAX: "inworld-tts-1.5-max";
4952
+ readonly TTS_1_5_MINI: "inworld-tts-1.5-mini";
4953
+ readonly TTS_1_MAX: "inworld-tts-1-max";
4954
+ readonly TTS_1: "inworld-tts-1";
4955
+ };
4956
+ type InworldModel = (typeof InworldModel)[keyof typeof InworldModel];
4957
+ /** Inworld audio encoding values accepted by the REST API. */
4958
+ declare const InworldAudioEncoding: {
4959
+ readonly PCM: "PCM";
4960
+ readonly LINEAR16: "LINEAR16";
4961
+ readonly OGG_OPUS: "OGG_OPUS";
4962
+ readonly MP3: "MP3";
4963
+ };
4964
+ type InworldAudioEncoding = (typeof InworldAudioEncoding)[keyof typeof InworldAudioEncoding];
4965
+ /** TTS-2 stability mode (ignored by older models). */
4966
+ declare const InworldDeliveryMode: {
4967
+ readonly EXPRESSIVE: "EXPRESSIVE";
4968
+ readonly BALANCED: "BALANCED";
4969
+ readonly STABLE: "STABLE";
4970
+ };
4971
+ type InworldDeliveryMode = (typeof InworldDeliveryMode)[keyof typeof InworldDeliveryMode];
4972
+ /** Constructor options for {@link InworldTTS}. */
4973
+ interface InworldTTSOptions$1 {
4974
+ /** Model id. Defaults to `"inworld-tts-2"`. */
4975
+ model?: InworldModel | string;
4976
+ /** Voice name (e.g. `"Ashley"`, `"Olivia"`, `"Craig"`, `"Remy"`). */
4977
+ voice?: string;
4978
+ /** BCP-47 language tag, e.g. `"en"`, `"it"`, `"es"`. */
4979
+ language?: string;
4980
+ /** Output audio encoding. Defaults to `"PCM"` (raw PCM_S16LE). */
4981
+ audioEncoding?: InworldAudioEncoding | string;
4982
+ /** Output sample rate in Hz. Defaults to 16000. */
4983
+ sampleRate?: number;
4984
+ /** Bitrate hint (bits/sec) — used for OGG_OPUS / MP3. Default 64000. */
4985
+ bitrate?: number;
4986
+ /** Sampling temperature 0.0–2.0 (TTS-1.5 only — ignored by TTS-2). */
4987
+ temperature?: number;
4988
+ /** Speaking rate multiplier 0.5–1.5. Default 1.0. */
4989
+ speakingRate?: number;
4990
+ /** Stability mode for TTS-2 (`EXPRESSIVE` / `BALANCED` / `STABLE`). */
4991
+ deliveryMode?: InworldDeliveryMode | string;
4992
+ /** Override the REST endpoint (e.g. for on-prem deployments). */
4993
+ baseUrl?: string;
4994
+ }
4995
+ /**
4996
+ * Inworld TTS over the `/tts/v1/voice:stream` HTTP NDJSON endpoint.
4997
+ *
4998
+ * The Inworld dashboard provides a Base64 token that is already in the form
4999
+ * expected by the `Authorization: Basic <token>` header — pass it as-is. If
5000
+ * you only have the raw API key string, base64-encode `${apiKey}:` yourself
5001
+ * before calling the constructor.
5002
+ */
5003
+ declare class InworldTTS {
5004
+ private readonly authToken;
5005
+ private readonly model;
5006
+ private readonly voice;
5007
+ private readonly language?;
5008
+ private readonly audioEncoding;
5009
+ private readonly sampleRate;
5010
+ private readonly bitrate;
5011
+ private readonly temperature?;
5012
+ private readonly speakingRate;
5013
+ private readonly deliveryMode?;
5014
+ private readonly baseUrl;
5015
+ constructor(authToken: string, opts?: InworldTTSOptions$1);
5016
+ private buildPayload;
5017
+ /** Synthesize text and return the concatenated audio buffer. */
5018
+ synthesize(text: string): Promise<Buffer>;
5019
+ /**
5020
+ * Yield audio chunks as they arrive. With the default `audioEncoding=PCM`
5021
+ * these are raw PCM_S16LE bytes at `sampleRate`.
5022
+ */
5023
+ synthesizeStream(text: string): AsyncGenerator<Buffer>;
5024
+ }
5025
+
5026
+ /** Inworld TTS for Patter pipeline mode. */
5027
+
5028
+ /** Constructor options for the Inworld `TTS` adapter. */
5029
+ interface InworldTTSOptions {
5030
+ /** Inworld Base64 auth token. Falls back to INWORLD_API_KEY env var. */
5031
+ apiKey?: string;
5032
+ model?: InworldModel | string;
5033
+ voice?: string;
5034
+ language?: string;
5035
+ audioEncoding?: InworldAudioEncoding | string;
5036
+ sampleRate?: number;
5037
+ bitrate?: number;
5038
+ temperature?: number;
5039
+ speakingRate?: number;
5040
+ deliveryMode?: InworldDeliveryMode | string;
5041
+ baseUrl?: string;
5042
+ }
5043
+ /**
5044
+ * Inworld TTS — defaults to the TTS-2 model.
5045
+ *
5046
+ * @example
5047
+ * ```ts
5048
+ * import * as inworld from "getpatter/tts/inworld";
5049
+ * const tts = new inworld.TTS(); // reads INWORLD_API_KEY
5050
+ * const tts = new inworld.TTS({ apiKey: "...", voice: "Olivia", language: "en" });
5051
+ * ```
5052
+ */
5053
+ declare class TTS extends InworldTTS {
5054
+ static readonly providerKey = "inworld";
5055
+ constructor(opts?: InworldTTSOptions);
5056
+ }
5057
+
3278
5058
  /** OpenAI LLM for Patter pipeline mode. */
3279
5059
 
5060
+ /** Constructor options for the OpenAI Chat Completions `LLM` adapter. */
3280
5061
  interface OpenAILLMOptions {
3281
5062
  /** API key. Falls back to OPENAI_API_KEY env var when omitted. */
3282
5063
  apiKey?: string;
@@ -3327,16 +5108,8 @@ declare class LLM$4 extends OpenAILLMProvider {
3327
5108
  * Anthropic shape and the vendor event stream is normalised back into
3328
5109
  * Patter's ``{ type: 'text' | 'tool_call' | 'done' }`` chunk protocol.
3329
5110
  *
3330
- * Portions adapted from LiveKit Agents
3331
- * (https://github.com/livekit/agents, commit
3332
- * 78a66bcf79c5cea82989401c408f1dff4b961a5b,
3333
- * file livekit-plugins/livekit-plugins-anthropic/livekit/plugins/anthropic/llm.py),
3334
- * licensed under Apache License 2.0. Copyright 2023 LiveKit, Inc.
3335
- *
3336
- * Adaptations from the LiveKit source:
3337
- * * Ported the Python async class pair (``llm.LLM`` /
3338
- * ``llm.LLMStream``) into a single TypeScript class that satisfies
3339
- * Patter's ``LLMProvider`` interface.
5111
+ * Implementation notes:
5112
+ * * Single TypeScript class satisfying Patter's ``LLMProvider`` interface.
3340
5113
  * * Uses native ``fetch`` + SSE parsing instead of the official
3341
5114
  * ``@anthropic-ai/sdk`` to keep Patter's runtime dependencies lean
3342
5115
  * (mirrors how ``OpenAILLMProvider`` is implemented in
@@ -3346,6 +5119,7 @@ declare class LLM$4 extends OpenAILLMProvider {
3346
5119
  * chunk protocol.
3347
5120
  */
3348
5121
 
5122
+ /** Constructor options for {@link AnthropicLLMProvider}. */
3349
5123
  interface AnthropicLLMOptions$1 {
3350
5124
  apiKey: string;
3351
5125
  model?: string;
@@ -3377,11 +5151,13 @@ declare class AnthropicLLMProvider implements LLMProvider {
3377
5151
  private readonly anthropicVersion;
3378
5152
  private readonly promptCaching;
3379
5153
  constructor(options: AnthropicLLMOptions$1);
3380
- stream(messages: Array<Record<string, unknown>>, tools?: Array<Record<string, unknown>> | null): AsyncGenerator<LLMChunk, void, unknown>;
5154
+ /** Stream Patter-format LLM chunks for the given OpenAI-style chat history. */
5155
+ stream(messages: Array<Record<string, unknown>>, tools?: Array<Record<string, unknown>> | null, opts?: LLMStreamOptions): AsyncGenerator<LLMChunk, void, unknown>;
3381
5156
  }
3382
5157
 
3383
5158
  /** Anthropic Claude LLM for Patter pipeline mode. */
3384
5159
 
5160
+ /** Constructor options for the Anthropic Claude `LLM` adapter. */
3385
5161
  interface AnthropicLLMOptions {
3386
5162
  /** API key. Falls back to ANTHROPIC_API_KEY env var when omitted. */
3387
5163
  apiKey?: string;
@@ -3430,20 +5206,11 @@ declare class LLM$3 extends AnthropicLLMProvider {
3430
5206
  *
3431
5207
  * Groq exposes an OpenAI-compatible Chat Completions API. We reuse the
3432
5208
  * streaming code path by implementing the same SSE parser as
3433
- * ``OpenAILLMProvider`` but pointed at ``api.groq.com``.
3434
- *
3435
- * Portions adapted from LiveKit Agents
3436
- * (https://github.com/livekit/agents, commit
3437
- * 78a66bcf79c5cea82989401c408f1dff4b961a5b,
3438
- * file livekit-plugins/livekit-plugins-groq/livekit/plugins/groq/services.py),
3439
- * licensed under Apache License 2.0. Copyright LiveKit, Inc.
3440
- *
3441
- * Adaptations from the LiveKit source:
3442
- * * Ported the Python ``groq.LLM`` subclass (which subclasses the
3443
- * LiveKit OpenAI plugin) into a tiny TypeScript wrapper that swaps
3444
- * the base URL and defaults to ``llama-3.3-70b-versatile``.
5209
+ * ``OpenAILLMProvider`` but pointed at ``api.groq.com``. Defaults to
5210
+ * ``llama-3.3-70b-versatile``.
3445
5211
  */
3446
5212
 
5213
+ /** Constructor options for {@link GroqLLMProvider}. */
3447
5214
  interface GroqLLMOptions$1 {
3448
5215
  apiKey: string;
3449
5216
  model?: string;
@@ -3485,11 +5252,13 @@ declare class GroqLLMProvider implements LLMProvider {
3485
5252
  private readonly presencePenalty?;
3486
5253
  private readonly stop?;
3487
5254
  constructor(options: GroqLLMOptions$1);
3488
- stream(messages: Array<Record<string, unknown>>, tools?: Array<Record<string, unknown>> | null): AsyncGenerator<LLMChunk, void, unknown>;
5255
+ /** Stream Patter-format LLM chunks from the Groq chat completions API. */
5256
+ stream(messages: Array<Record<string, unknown>>, tools?: Array<Record<string, unknown>> | null, opts?: LLMStreamOptions): AsyncGenerator<LLMChunk, void, unknown>;
3489
5257
  }
3490
5258
 
3491
5259
  /** Groq LLM for Patter pipeline mode. */
3492
5260
 
5261
+ /** Constructor options for the Groq `LLM` adapter. */
3493
5262
  interface GroqLLMOptions {
3494
5263
  /** API key. Falls back to GROQ_API_KEY env var when omitted. */
3495
5264
  apiKey?: string;
@@ -3542,21 +5311,14 @@ declare class LLM$2 extends GroqLLMProvider {
3542
5311
  * compression to reduce TTFT for requests with large prompts
3543
5312
  * (see https://inference-docs.cerebras.ai/payload-optimization).
3544
5313
  *
3545
- * Portions adapted from LiveKit Agents
3546
- * (https://github.com/livekit/agents, commit
3547
- * 78a66bcf79c5cea82989401c408f1dff4b961a5b,
3548
- * file livekit-plugins/livekit-plugins-cerebras/livekit/plugins/cerebras/llm.py),
3549
- * licensed under Apache License 2.0. Copyright 2026 LiveKit, Inc.
3550
- *
3551
- * Adaptations from the LiveKit source:
3552
- * * LiveKit's ``cerebras.LLM`` subclasses the LiveKit OpenAI plugin.
3553
- * Patter's analogue is a tiny wrapper around ``fetch`` that swaps
3554
- * the base URL and default model.
3555
- * * The msgpack payload optimisation from LiveKit is Python-only
3556
- * (msgpack in Node land isn't as standard); only gzip compression
3557
- * is ported. Enable with ``gzipCompression: true``.
5314
+ * Implementation notes:
5315
+ * * Tiny wrapper around ``fetch`` that swaps the base URL and default
5316
+ * model relative to the OpenAI-compatible API.
5317
+ * * Gzip compression of the request body is supported via
5318
+ * ``gzipCompression: true`` (default).
3558
5319
  */
3559
5320
 
5321
+ /** Constructor options for {@link CerebrasLLMProvider}. */
3560
5322
  interface CerebrasLLMOptions$1 {
3561
5323
  apiKey: string;
3562
5324
  model?: string;
@@ -3624,11 +5386,13 @@ declare class CerebrasLLMProvider implements LLMProvider {
3624
5386
  private readonly presencePenalty?;
3625
5387
  private readonly stop?;
3626
5388
  constructor(options: CerebrasLLMOptions$1);
3627
- stream(messages: Array<Record<string, unknown>>, tools?: Array<Record<string, unknown>> | null): AsyncGenerator<LLMChunk, void, unknown>;
5389
+ /** Stream Patter-format LLM chunks from the Cerebras chat completions API. */
5390
+ stream(messages: Array<Record<string, unknown>>, tools?: Array<Record<string, unknown>> | null, opts?: LLMStreamOptions): AsyncGenerator<LLMChunk, void, unknown>;
3628
5391
  }
3629
5392
 
3630
5393
  /** Cerebras LLM for Patter pipeline mode. */
3631
5394
 
5395
+ /** Constructor options for the Cerebras `LLM` adapter. */
3632
5396
  interface CerebrasLLMOptions {
3633
5397
  /** API key. Falls back to CEREBRAS_API_KEY env var when omitted. */
3634
5398
  apiKey?: string;
@@ -3685,23 +5449,16 @@ declare class LLM$1 extends CerebrasLLMProvider {
3685
5449
  * and ``tools`` shapes, and streamed response parts are normalised to
3686
5450
  * Patter's ``{ type: 'text' | 'tool_call' | 'done' }`` chunks.
3687
5451
  *
3688
- * Portions adapted from LiveKit Agents
3689
- * (https://github.com/livekit/agents, commit
3690
- * 78a66bcf79c5cea82989401c408f1dff4b961a5b,
3691
- * file livekit-plugins/livekit-plugins-google/livekit/plugins/google/llm.py),
3692
- * licensed under Apache License 2.0. Copyright 2023 LiveKit, Inc.
3693
- *
3694
- * Adaptations from the LiveKit source:
3695
- * * LiveKit uses the ``google-genai`` Python SDK. The TypeScript port
3696
- * uses native ``fetch`` against the REST SSE endpoint so we don't
5452
+ * Implementation notes:
5453
+ * * Uses native ``fetch`` against the REST SSE endpoint so we don't
3697
5454
  * pull in a large SDK dependency.
3698
- * * Collapsed the Python ``llm.LLM`` / ``llm.LLMStream`` pair into a
3699
- * single class that satisfies Patter's ``LLMProvider`` interface.
3700
- * * Dropped Vertex AI support (which requires GCP auth) only the
3701
- * Developer API (API key) path is ported. Vertex can be added by a
3702
- * follow-up PR once credential plumbing is in place.
5455
+ * * Single class that satisfies Patter's ``LLMProvider`` interface.
5456
+ * * Vertex AI support (which requires GCP auth) is not included — only
5457
+ * the Developer API (API key) path is supported. Vertex can be added
5458
+ * by a follow-up PR once credential plumbing is in place.
3703
5459
  */
3704
5460
 
5461
+ /** Constructor options for {@link GoogleLLMProvider}. */
3705
5462
  interface GoogleLLMOptions$1 {
3706
5463
  apiKey: string;
3707
5464
  model?: string;
@@ -3717,11 +5474,13 @@ declare class GoogleLLMProvider implements LLMProvider {
3717
5474
  private readonly temperature?;
3718
5475
  private readonly maxOutputTokens?;
3719
5476
  constructor(options: GoogleLLMOptions$1);
3720
- stream(messages: Array<Record<string, unknown>>, tools?: Array<Record<string, unknown>> | null): AsyncGenerator<LLMChunk, void, unknown>;
5477
+ /** Stream Patter-format LLM chunks from the Gemini SSE endpoint. */
5478
+ stream(messages: Array<Record<string, unknown>>, tools?: Array<Record<string, unknown>> | null, opts?: LLMStreamOptions): AsyncGenerator<LLMChunk, void, unknown>;
3721
5479
  }
3722
5480
 
3723
5481
  /** Google Gemini LLM for Patter pipeline mode. */
3724
5482
 
5483
+ /** Constructor options for the Google Gemini `LLM` adapter. */
3725
5484
  interface GoogleLLMOptions {
3726
5485
  /**
3727
5486
  * API key. Falls back to ``GEMINI_API_KEY`` first, then ``GOOGLE_API_KEY``.
@@ -3754,28 +5513,24 @@ declare class LLM extends GoogleLLMProvider {
3754
5513
  }
3755
5514
 
3756
5515
  /**
3757
- * Silero VAD provider (TypeScript port).
5516
+ * Silero VAD provider.
3758
5517
  *
3759
5518
  * Acoustic voice activity detection backed by the Silero ONNX model. Buffers
3760
5519
  * incoming int16 LE PCM frames, runs inference on fixed-size windows
3761
5520
  * (256 samples at 8 kHz, 512 at 16 kHz), applies an exponential probability
3762
5521
  * filter, and emits VADEvent transitions (speech_start / speech_end).
3763
5522
  *
3764
- * Ported from LiveKit Agents (Apache 2.0):
3765
- * https://github.com/livekit/agents
3766
- * Sources:
3767
- * - livekit-plugins/livekit-plugins-silero/livekit/plugins/silero/vad.py
3768
- * - livekit-plugins/livekit-plugins-silero/livekit/plugins/silero/onnx_model.py
3769
- *
3770
- * Adaptations for Patter:
5523
+ * Notes:
3771
5524
  * - Input is raw PCM `Buffer` (int16 LE, mono) via
3772
- * `processFrame(pcmChunk, sampleRate)`, not `livekit.rtc.AudioFrame`.
5525
+ * `processFrame(pcmChunk, sampleRate)`.
3773
5526
  * - onnxruntime-node is loaded lazily as an optional dependency.
3774
- * - Emits `VADEvent` (Patter protocol) instead of LiveKit event types.
5527
+ * - Emits `VADEvent` (Patter protocol).
3775
5528
  */
3776
5529
 
3777
5530
  declare const SUPPORTED_SAMPLE_RATES: readonly [8000, 16000];
5531
+ /** Sample rates supported by the bundled Silero ONNX model (8 kHz or 16 kHz). */
3778
5532
  type SileroSampleRate = (typeof SUPPORTED_SAMPLE_RATES)[number];
5533
+ /** Options accepted by {@link SileroVAD.load}. */
3779
5534
  interface SileroVADOptions {
3780
5535
  minSpeechDuration?: number;
3781
5536
  minSilenceDuration?: number;
@@ -3790,13 +5545,16 @@ interface SileroVADOptions {
3790
5545
  * Minimal structural type for the subset of `onnxruntime-node` we depend on.
3791
5546
  * Declared locally so consumers don't need the package installed at build time.
3792
5547
  */
5548
+ /** Minimal subset of `onnxruntime-node`'s `InferenceSession` that Silero needs. */
3793
5549
  interface OnnxInferenceSession {
3794
5550
  run(feeds: Record<string, OnnxTensor>): Promise<Record<string, OnnxTensor>>;
3795
5551
  }
5552
+ /** Minimal subset of an `onnxruntime-node` tensor used by Silero inference. */
3796
5553
  interface OnnxTensor {
3797
5554
  readonly data: Float32Array | BigInt64Array;
3798
5555
  readonly dims: readonly number[];
3799
5556
  }
5557
+ /** Minimal `onnxruntime-node` module surface accepted by {@link SileroVAD}. */
3800
5558
  interface OnnxRuntime {
3801
5559
  InferenceSession: {
3802
5560
  create(pathOrBuffer: string | Uint8Array, options?: Record<string, unknown>): Promise<OnnxInferenceSession>;
@@ -3822,21 +5580,50 @@ declare class SileroVAD implements VADProvider {
3822
5580
  private closed;
3823
5581
  private constructor();
3824
5582
  /**
3825
- * Load the Silero VAD model. Defaults match the LiveKit Silero plugin.
5583
+ * Load the Silero VAD model.
3826
5584
  * Throws if `onnxruntime-node` is not installed.
3827
5585
  */
3828
5586
  static load(options?: SileroVADOptions): Promise<SileroVAD>;
5587
+ /**
5588
+ * Convenience factory for telephony pipelines.
5589
+ *
5590
+ * Identical to {@link SileroVAD.load} but pins `sampleRate` to 16000 Hz
5591
+ * — the only sample rate Patter's pipeline-mode audio bus uses (8 kHz
5592
+ * mulaw from Twilio is upsampled to 16 kHz PCM before reaching the
5593
+ * VAD). Every other parameter mirrors the upstream Silero VAD
5594
+ * defaults from `snakers4/silero-vad` (`get_speech_timestamps` /
5595
+ * `VADIterator`):
5596
+ *
5597
+ * - `activationThreshold = 0.5` — upstream `threshold`
5598
+ * - `deactivationThreshold = 0.35` — upstream `neg_threshold = threshold - 0.15`
5599
+ * - `minSpeechDuration = 0.25` — upstream `min_speech_duration_ms = 250`
5600
+ * - `minSilenceDuration = 0.1` — upstream `min_silence_duration_ms = 100`
5601
+ * - `prefixPaddingDuration = 0.03` — upstream `speech_pad_ms = 30`
5602
+ *
5603
+ * Override any field by passing `options`. Deployments that experience
5604
+ * truncation on natural pauses can raise `minSilenceDuration` (e.g.
5605
+ * 0.5–1.0 s) per call site rather than as a global default.
5606
+ *
5607
+ * @example
5608
+ * ```ts
5609
+ * const vad = await SileroVAD.forPhoneCall();
5610
+ * // or, if natural-pause truncation is observed:
5611
+ * const vad = await SileroVAD.forPhoneCall({ minSilenceDuration: 0.5 });
5612
+ * ```
5613
+ */
5614
+ static forPhoneCall(options?: SileroVADOptions): Promise<SileroVAD>;
3829
5615
  /**
3830
5616
  * Internal factory used by tests — bypasses onnxruntime-node loading.
3831
5617
  * @internal
3832
5618
  */
3833
5619
  static fromOnnxModel(runtime: OnnxRuntime, session: OnnxInferenceSession, options: Required<Omit<SileroVADOptions, 'onnxFilePath' | 'forceCpu'>>): SileroVAD;
5620
+ /** Sample rate (Hz) the underlying ONNX model was loaded with. */
3834
5621
  get sampleRate(): SileroSampleRate;
3835
5622
  /**
3836
5623
  * Number of int16 PCM samples that must be provided per call to
3837
5624
  * processFrame for the model to run one inference window.
3838
5625
  *
3839
- * Constraint (ported from LiveKit Agents / Silero ONNX spec):
5626
+ * Constraint (Silero ONNX spec):
3840
5627
  * - 16 000 Hz → 512 samples (32 ms)
3841
5628
  * - 8 000 Hz → 256 samples (32 ms)
3842
5629
  *
@@ -3847,8 +5634,10 @@ declare class SileroVAD implements VADProvider {
3847
5634
  * passing exactly one window per call minimises heap allocation.
3848
5635
  */
3849
5636
  numFramesRequired(): number;
5637
+ /** Run VAD on a PCM16 chunk; returns a transition event or null if no change. */
3850
5638
  processFrame(pcmChunk: Buffer, sampleRate: number): Promise<VADEvent | null>;
3851
5639
  private advanceState;
5640
+ /** Mark the VAD as closed; subsequent processFrame calls throw. */
3852
5641
  close(): Promise<void>;
3853
5642
  }
3854
5643
 
@@ -3924,6 +5713,8 @@ interface StatefulResamplerOptions {
3924
5713
  * - 16 000 → 8 000 Hz (2:1 decimation with 5-tap FIR anti-alias)
3925
5714
  * - 8 000 → 16 000 Hz (1:2 linear interpolation)
3926
5715
  * - 24 000 → 16 000 Hz (3:2 linear interpolation)
5716
+ * - 24 000 → 8 000 Hz (3:1 decimation with linear interpolation;
5717
+ * collapses 24k→16k→8k chain — fix #46)
3927
5718
  *
3928
5719
  * All methods accept and return Buffer (PCM16-LE, mono by default).
3929
5720
  */
@@ -3998,6 +5789,10 @@ declare class StatefulResampler {
3998
5789
  * handled using `resample24Last`.
3999
5790
  */
4000
5791
  private _resample24kTo16k;
5792
+ /** 3:1 decimation — collapses the 24k→16k→8k chain into a single step. */
5793
+ private _resample24kTo8k;
5794
+ /** Shared phase-stepping resampler used by 24→16 (step 1.5) and 24→8 (step 3). */
5795
+ private _resample24kStep;
4001
5796
  }
4002
5797
  /** Create a stateful 16 kHz → 8 kHz downsampling resampler. */
4003
5798
  declare function createResampler16kTo8k(): StatefulResampler;
@@ -4005,6 +5800,8 @@ declare function createResampler16kTo8k(): StatefulResampler;
4005
5800
  declare function createResampler8kTo16k(): StatefulResampler;
4006
5801
  /** Create a stateful 24 kHz → 16 kHz resampler (3:2 linear interpolation). */
4007
5802
  declare function createResampler24kTo16k(): StatefulResampler;
5803
+ /** Create a stateful 24 kHz → 8 kHz resampler (3:1 decimation, fix #46). */
5804
+ declare function createResampler24kTo8k(): StatefulResampler;
4008
5805
  /**
4009
5806
  * Upsample 8 kHz PCM16 to 16 kHz using linear interpolation.
4010
5807
  *
@@ -4051,6 +5848,7 @@ declare function resample24kTo16k(pcm24k: Buffer): Buffer;
4051
5848
  *
4052
5849
  * Install: npm install cloudflared
4053
5850
  */
5851
+ /** Handle returned by `startTunnel` exposing the public hostname and a stopper. */
4054
5852
  interface TunnelHandle {
4055
5853
  /** Public hostname (no protocol), e.g. "random-name.trycloudflare.com" */
4056
5854
  hostname: string;
@@ -4073,7 +5871,9 @@ declare function startTunnel(port: number, timeoutMs?: number): Promise<TunnelHa
4073
5871
  * that provides immutable messages, automatic ID generation, truncation
4074
5872
  * preserving system prompts, and format conversion for OpenAI / Anthropic.
4075
5873
  */
5874
+ /** Role tag attached to every `ChatMessage`. */
4076
5875
  type ChatRole = "system" | "user" | "assistant" | "tool";
5876
+ /** Single immutable entry in a `ChatContext` history. */
4077
5877
  interface ChatMessage {
4078
5878
  readonly id: string;
4079
5879
  readonly role: ChatRole;
@@ -4082,16 +5882,19 @@ interface ChatMessage {
4082
5882
  readonly name?: string;
4083
5883
  readonly toolCallId?: string;
4084
5884
  }
5885
+ /** Wire shape produced by `ChatContext.toOpenAI()` (matches OpenAI Chat Completions). */
4085
5886
  interface OpenAIMessage {
4086
5887
  role: string;
4087
5888
  content: string;
4088
5889
  name?: string;
4089
5890
  tool_call_id?: string;
4090
5891
  }
5892
+ /** Single message in `AnthropicConversion.messages`. */
4091
5893
  interface AnthropicMessage {
4092
5894
  role: string;
4093
5895
  content: string;
4094
5896
  }
5897
+ /** Result of `ChatContext.toAnthropic()` — system prompt extracted from the message list. */
4095
5898
  interface AnthropicConversion {
4096
5899
  system: string | undefined;
4097
5900
  messages: ReadonlyArray<AnthropicMessage>;
@@ -4099,15 +5902,23 @@ interface AnthropicConversion {
4099
5902
  interface ChatContextJSON {
4100
5903
  messages: ReadonlyArray<ChatMessage>;
4101
5904
  }
5905
+ /** Mutable conversation history with system-prompt-aware truncation and provider conversion helpers. */
4102
5906
  declare class ChatContext {
4103
5907
  private items;
4104
5908
  constructor(systemPrompt?: string);
5909
+ /** Append a user message and return the created `ChatMessage`. */
4105
5910
  addUser(content: string): ChatMessage;
5911
+ /** Append an assistant message and return the created `ChatMessage`. */
4106
5912
  addAssistant(content: string): ChatMessage;
5913
+ /** Append a system message and return the created `ChatMessage`. */
4107
5914
  addSystem(content: string): ChatMessage;
5915
+ /** Append a tool-result message tied to a tool-call id. */
4108
5916
  addToolResult(content: string, toolCallId: string): ChatMessage;
5917
+ /** Return a snapshot of all messages currently in the context. */
4109
5918
  getMessages(): ReadonlyArray<ChatMessage>;
5919
+ /** Return the last `n` messages (or `[]` when `n <= 0`). */
4110
5920
  getLastN(n: number): ReadonlyArray<ChatMessage>;
5921
+ /** Number of messages currently in the context. */
4111
5922
  get length(): number;
4112
5923
  /**
4113
5924
  * Keep the first system message (if any) plus the last `maxMessages`
@@ -4115,6 +5926,7 @@ declare class ChatContext {
4115
5926
  * simply keeps the last `maxMessages` messages.
4116
5927
  */
4117
5928
  truncate(maxMessages: number): void;
5929
+ /** Convert the conversation to the OpenAI Chat Completions message format. */
4118
5930
  toOpenAI(): OpenAIMessage[];
4119
5931
  /**
4120
5932
  * Convert to Anthropic format. The first system message (if present)
@@ -4122,8 +5934,11 @@ declare class ChatContext {
4122
5934
  * messages are included in the messages array.
4123
5935
  */
4124
5936
  toAnthropic(): AnthropicConversion;
5937
+ /** Return a new `ChatContext` with the same messages (independent storage). */
4125
5938
  copy(): ChatContext;
5939
+ /** Serialize the context to a JSON-safe object. */
4126
5940
  toJSON(): ChatContextJSON;
5941
+ /** Reconstruct a `ChatContext` from the result of `toJSON()`. */
4127
5942
  static fromJSON(data: ChatContextJSON): ChatContext;
4128
5943
  }
4129
5944
 
@@ -4145,21 +5960,15 @@ declare class ChatContext {
4145
5960
  * equivalent battle-tested package in the std library, so we ship a
4146
5961
  * minimal in-house bag-of-words + cosine-similarity implementation.
4147
5962
  * It is intentionally simple — enough to match repeated IVR prompts.
4148
- *
4149
- * Algorithm adapted from LiveKit Agents (Apache 2.0):
4150
- * https://github.com/livekit/agents
4151
- *
4152
- * Source:
4153
- * - livekit-agents/livekit/agents/voice/ivr/ivr_activity.py
4154
- * - livekit-agents/livekit/agents/beta/tools/send_dtmf.py
4155
- * LiveKit SHA at port time: 78a66bcf79c5cea82989401c408f1dff4b961a5b
4156
5963
  */
4157
5964
 
4158
5965
  /** Valid DTMF tone values (keypad characters). */
4159
5966
  declare const DTMF_EVENTS: readonly ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "*", "#", "A", "B", "C", "D"];
5967
+ /** Single DTMF tone value (a member of `DTMF_EVENTS`). */
4160
5968
  type DtmfEvent = (typeof DTMF_EVENTS)[number];
4161
5969
  /** Join DTMF events into a space-separated debug string. */
4162
5970
  declare function formatDtmf(events: DtmfEvent[]): string;
5971
+ /** Constructor options for `TfidfLoopDetector`. */
4163
5972
  interface TfidfLoopDetectorOptions {
4164
5973
  /** Number of recent chunks to keep in the comparison window. */
4165
5974
  windowSize?: number;
@@ -4180,14 +5989,18 @@ declare class TfidfLoopDetector {
4180
5989
  private chunks;
4181
5990
  private consecutiveSimilar;
4182
5991
  constructor(opts?: TfidfLoopDetectorOptions);
5992
+ /** Forget all previously observed chunks and reset the consecutive-hit counter. */
4183
5993
  reset(): void;
5994
+ /** Record a new transcript chunk in the rolling window. */
4184
5995
  addChunk(text: string): void;
5996
+ /** Returns true once the most recent chunks look like a repeated IVR prompt. */
4185
5997
  checkLoopDetection(): boolean;
4186
5998
  }
4187
5999
  /** Async callback fired when the TF-IDF detector trips. */
4188
6000
  type LoopCallback = () => Promise<void> | void;
4189
6001
  /** Async callback fired after sustained silence. */
4190
6002
  type SilenceCallback = () => Promise<void> | void;
6003
+ /** Constructor options for `IVRActivity`. */
4191
6004
  interface IVRActivityOptions {
4192
6005
  /** Seconds of combined silence before firing `onSilence`. Default `5.0`. */
4193
6006
  maxSilenceDuration?: number;
@@ -4237,11 +6050,17 @@ declare class IVRActivity {
4237
6050
  private lastShouldSchedule;
4238
6051
  private started;
4239
6052
  constructor(callControl: CallControl, opts?: IVRActivityOptions);
6053
+ /** Begin tracking transcripts and silence; call once per call. */
4240
6054
  start(): Promise<void>;
6055
+ /** Stop tracking and cancel any pending silence timer. */
4241
6056
  stop(): Promise<void>;
6057
+ /** Feed a final user-side transcript chunk into the loop detector. */
4242
6058
  onUserTranscribed(text: string): Promise<void>;
6059
+ /** Record the current user-turn state (e.g. `"listening"`, `"away"`). */
4243
6060
  noteUserState(state: string): void;
6061
+ /** Record the current agent-turn state (e.g. `"idle"`, `"listening"`). */
4244
6062
  noteAgentState(state: string): void;
6063
+ /** Tool definitions to expose to the LLM (currently only `send_dtmf_events`). */
4245
6064
  get tools(): IVRToolDefinition[];
4246
6065
  private scheduleSilenceCheck;
4247
6066
  private shouldScheduleCheck;
@@ -4249,6 +6068,30 @@ declare class IVRActivity {
4249
6068
  private buildSendDtmfTool;
4250
6069
  }
4251
6070
 
6071
+ /**
6072
+ * Background-audio mixer for the Patter TypeScript SDK. Patter routes
6073
+ * outbound PCM through the pipeline stream handler, so this module exposes
6074
+ * a ``start / mix / stop`` API that does no I/O of its own. See
6075
+ * {@link BackgroundAudioPlayer} for the public class.
6076
+ *
6077
+ * Notes:
6078
+ *
6079
+ * - PCM mixing is a ~40-line pure-JavaScript routine operating on
6080
+ * ``Buffer`` (see :func:`mixPcm` below). Clipping is done against the
6081
+ * int16 range.
6082
+ * - ``.ogg`` decoding is not done in this module. Node does not bundle a
6083
+ * Vorbis decoder and shipping a native one would triple the SDK size.
6084
+ * Instead, callers supply a :class:`RawPcmSource` (pre-decoded int16
6085
+ * mono LE PCM at a known sample rate) OR a :class:`DecodedSource` via a
6086
+ * user-supplied decoder. The Python SDK ships the bundled ``.ogg``
6087
+ * clips and their decoder; the TS package exposes the raw files next to
6088
+ * this module for users who wire up their own decoder.
6089
+ *
6090
+ * Attribution for the bundled audio clips themselves is preserved in
6091
+ * ``src/resources/audio/NOTICE``.
6092
+ */
6093
+
6094
+ /** Names of the .ogg clips bundled with the SDK under ``resources/audio/``. */
4252
6095
  declare const BuiltinAudioClip: {
4253
6096
  readonly CITY_AMBIENCE: "city-ambience.ogg";
4254
6097
  readonly FOREST_AMBIENCE: "forest-ambience.ogg";
@@ -4258,6 +6101,7 @@ declare const BuiltinAudioClip: {
4258
6101
  readonly KEYBOARD_TYPING2: "keyboard-typing2.ogg";
4259
6102
  readonly HOLD_MUSIC: "hold_music.ogg";
4260
6103
  };
6104
+ /** Filename of one of the bundled clips (e.g. ``"city-ambience.ogg"``). */
4261
6105
  type BuiltinAudioClipName = (typeof BuiltinAudioClip)[keyof typeof BuiltinAudioClip];
4262
6106
  /** Resolve a bundled clip name to its absolute path on disk. */
4263
6107
  declare function builtinClipPath(clip: BuiltinAudioClipName): string;
@@ -4291,7 +6135,9 @@ interface BuiltinPcmSource {
4291
6135
  readonly volume?: number;
4292
6136
  readonly probability?: number;
4293
6137
  }
6138
+ /** Tagged union of every input shape accepted by the player. */
4294
6139
  type AudioSource = RawPcmSource | FilePcmSource | BuiltinPcmSource;
6140
+ /** A source plus optional probability weight + volume for list-style players. */
4295
6141
  interface AudioConfig {
4296
6142
  readonly source: AudioSource;
4297
6143
  /** Probability weight used when ``BackgroundAudioPlayer`` receives a list. */
@@ -4299,8 +6145,9 @@ interface AudioConfig {
4299
6145
  /** Master volume [0, 1] applied on top of the per-source ``volume``. */
4300
6146
  readonly volume?: number;
4301
6147
  }
6148
+ /** Constructor options for {@link BackgroundAudioPlayer}. */
4302
6149
  interface BackgroundAudioOptions {
4303
- /** Overall mix ratio [0, 1]. Defaults to 0.1 (LiveKit's hold-music ratio). */
6150
+ /** Overall mix ratio [0, 1]. Defaults to 0.1 (typical hold-music ratio). */
4304
6151
  readonly volume?: number;
4305
6152
  /** When true the source restarts on exhaustion. */
4306
6153
  readonly loop?: boolean;
@@ -4317,6 +6164,7 @@ declare function mixPcm(agent: Buffer, bg: Buffer, ratio: number): Buffer;
4317
6164
  * program audio.
4318
6165
  */
4319
6166
  declare function resamplePcm(src: Buffer, srcSr: number, dstSr: number): Buffer;
6167
+ /** Probability-weighted random pick from a list of {@link AudioConfig}. */
4320
6168
  declare function selectSoundFromList(sounds: readonly AudioConfig[]): AudioConfig | null;
4321
6169
  /**
4322
6170
  * Mix a background audio clip into an outbound PCM stream.
@@ -4356,26 +6204,31 @@ declare class BackgroundAudioPlayer implements BackgroundAudioPlayer$1 {
4356
6204
  private resampleTo;
4357
6205
  }
4358
6206
 
6207
+ /** Constructor options for {@link TwilioAdapter}. */
4359
6208
  interface TwilioAdapterOptions {
4360
6209
  /** Optional Twilio edge region (e.g. ``ie1`` for Ireland). */
4361
6210
  region?: string;
4362
6211
  }
6212
+ /** Options accepted by {@link TwilioAdapter.provisionNumber}. */
4363
6213
  interface ProvisionNumberOptions$1 {
4364
6214
  /** ISO-3166-1 alpha-2 country code, e.g. ``"US"``. */
4365
6215
  countryCode: string;
4366
6216
  /** Optional North-American area code (e.g. ``"415"``). */
4367
6217
  areaCode?: string;
4368
6218
  }
6219
+ /** Result returned by {@link TwilioAdapter.provisionNumber}. */
4369
6220
  interface ProvisionNumberResult$1 {
4370
6221
  readonly phoneNumber: string;
4371
6222
  readonly sid: string;
4372
6223
  }
6224
+ /** Options accepted by {@link TwilioAdapter.configureNumber}. */
4373
6225
  interface ConfigureNumberOptions$1 {
4374
6226
  /** URL Twilio should hit when the number receives a call. */
4375
6227
  voiceUrl: string;
4376
6228
  /** Optional status callback URL for call lifecycle events. */
4377
6229
  statusCallback?: string;
4378
6230
  }
6231
+ /** Options accepted by {@link TwilioAdapter.initiateCall}. */
4379
6232
  interface InitiateCallOptions$1 {
4380
6233
  from: string;
4381
6234
  to: string;
@@ -4397,9 +6250,11 @@ interface InitiateCallOptions$1 {
4397
6250
  /** Raw extra form parameters forwarded to the Calls endpoint. */
4398
6251
  extraParams?: Record<string, string>;
4399
6252
  }
6253
+ /** Result returned by {@link TwilioAdapter.initiateCall}. */
4400
6254
  interface InitiateCallResult$1 {
4401
6255
  readonly callSid: string;
4402
6256
  }
6257
+ /** Direct REST adapter for Twilio Programmable Voice & Numbers API. */
4403
6258
  declare class TwilioAdapter {
4404
6259
  readonly accountSid: string;
4405
6260
  readonly region: string | undefined;
@@ -4426,18 +6281,22 @@ declare class TwilioAdapter {
4426
6281
  endCall(callSid: string): Promise<void>;
4427
6282
  }
4428
6283
 
6284
+ /** Options accepted by {@link TelnyxAdapter.provisionNumber}. */
4429
6285
  interface ProvisionNumberOptions {
4430
6286
  /** ISO-3166-1 alpha-2 country code (e.g. ``"US"``). */
4431
6287
  countryCode: string;
4432
6288
  }
6289
+ /** Result returned by {@link TelnyxAdapter.provisionNumber}. */
4433
6290
  interface ProvisionNumberResult {
4434
6291
  readonly phoneNumber: string;
4435
6292
  readonly orderId: string;
4436
6293
  }
6294
+ /** Options accepted by {@link TelnyxAdapter.configureNumber}. */
4437
6295
  interface ConfigureNumberOptions {
4438
6296
  /** Telnyx Call Control Application / Connection ID. */
4439
6297
  connectionId: string;
4440
6298
  }
6299
+ /** Options accepted by {@link TelnyxAdapter.initiateCall}. */
4441
6300
  interface InitiateCallOptions {
4442
6301
  from: string;
4443
6302
  to: string;
@@ -4446,13 +6305,16 @@ interface InitiateCallOptions {
4446
6305
  /** Opaque state string that Telnyx echoes back on webhooks. Base64-encoded on wire. */
4447
6306
  clientState?: string;
4448
6307
  }
6308
+ /** Result returned by {@link TelnyxAdapter.initiateCall}. */
4449
6309
  interface InitiateCallResult {
4450
6310
  readonly callControlId: string;
4451
6311
  }
6312
+ /** Options accepted by {@link TelnyxAdapter.endCall}. */
4452
6313
  interface EndCallOptions {
4453
6314
  /** Idempotency key for the hangup command. */
4454
6315
  commandId?: string;
4455
6316
  }
6317
+ /** Direct REST adapter for Telnyx Call Control & Numbers API. */
4456
6318
  declare class TelnyxAdapter {
4457
6319
  private readonly apiKey;
4458
6320
  readonly connectionId: string | undefined;
@@ -4479,6 +6341,102 @@ declare class TelnyxAdapter {
4479
6341
  endCall(callControlId: string, opts?: EndCallOptions): Promise<void>;
4480
6342
  }
4481
6343
 
6344
+ /**
6345
+ * Telnyx Speech-to-Text adapter (WebSocket streaming).
6346
+ *
6347
+ * Bridges the Telnyx `/v2/speech-to-text/transcription` WebSocket API to the
6348
+ * Patter SDK pipeline-mode STT interface. Implemented in TypeScript
6349
+ * (`ws` + `Buffer`) with a callback-based interface matching the other
6350
+ * Patter STT providers (Deepgram, Whisper).
6351
+ */
6352
+ /** Patter-normalised transcript event emitted by {@link TelnyxSTT}. */
6353
+ interface Transcript {
6354
+ readonly text: string;
6355
+ readonly isFinal: boolean;
6356
+ readonly confidence: number;
6357
+ }
6358
+ type TranscriptCallback = (transcript: Transcript) => void;
6359
+ /** Backing transcription engine accepted by Telnyx STT. */
6360
+ type TelnyxTranscriptionEngine = 'telnyx' | 'google' | 'deepgram' | 'azure';
6361
+ /** Common PCM sample rates accepted by Telnyx STT. */
6362
+ declare const TelnyxSTTSampleRate: {
6363
+ readonly HZ_8000: 8000;
6364
+ readonly HZ_16000: 16000;
6365
+ readonly HZ_24000: 24000;
6366
+ };
6367
+ /** Union of {@link TelnyxSTTSampleRate} integer values. */
6368
+ type TelnyxSTTSampleRate = (typeof TelnyxSTTSampleRate)[keyof typeof TelnyxSTTSampleRate];
6369
+ /** Input audio formats accepted by Telnyx STT. */
6370
+ declare const TelnyxSTTInputFormat: {
6371
+ readonly WAV: "wav";
6372
+ };
6373
+ /** Union of {@link TelnyxSTTInputFormat} string values. */
6374
+ type TelnyxSTTInputFormat = (typeof TelnyxSTTInputFormat)[keyof typeof TelnyxSTTInputFormat];
6375
+ /** Streaming STT adapter for Telnyx's `/v2/speech-to-text` WebSocket. */
6376
+ declare class TelnyxSTT {
6377
+ private readonly apiKey;
6378
+ private readonly language;
6379
+ private readonly transcriptionEngine;
6380
+ private readonly sampleRate;
6381
+ private readonly baseUrl;
6382
+ private ws;
6383
+ private callbacks;
6384
+ private headerSent;
6385
+ constructor(apiKey: string, language?: string, transcriptionEngine?: TelnyxTranscriptionEngine, sampleRate?: number, baseUrl?: string);
6386
+ /** Open the streaming WebSocket and arm message handlers. */
6387
+ connect(): Promise<void>;
6388
+ /** Send a binary PCM16 audio chunk; emits the WAV header on the first call. */
6389
+ sendAudio(audio: Buffer): void;
6390
+ /** Register a transcript listener (max 10 concurrent listeners). */
6391
+ onTranscript(callback: TranscriptCallback): void;
6392
+ /** Close the streaming WebSocket. */
6393
+ close(): void;
6394
+ }
6395
+
6396
+ /**
6397
+ * Telnyx Text-to-Speech adapter (WebSocket streaming).
6398
+ *
6399
+ * Bridges the Telnyx `/v2/text-to-speech/speech` WebSocket API to the
6400
+ * Patter SDK pipeline-mode TTS interface. Implemented in TypeScript
6401
+ * (`ws` + `Buffer`) with the same `synthesize` / `synthesizeStream`
6402
+ * method shape used by the other Patter TTS providers (ElevenLabs,
6403
+ * OpenAI). The stream yields raw MP3 bytes.
6404
+ */
6405
+ /** Common Telnyx NaturalHD voices accepted by the TTS endpoint. */
6406
+ declare const TelnyxTTSVoice: {
6407
+ readonly NATURAL_HD_ASTRA: "Telnyx.NaturalHD.astra";
6408
+ readonly NATURAL_HD_LUNA: "Telnyx.NaturalHD.luna";
6409
+ readonly NATURAL_HD_ATLAS: "Telnyx.NaturalHD.atlas";
6410
+ readonly NATURAL_HD_HERA: "Telnyx.NaturalHD.hera";
6411
+ readonly NATURAL_HD_ZEUS: "Telnyx.NaturalHD.zeus";
6412
+ };
6413
+ /** Union of {@link TelnyxTTSVoice} string values. */
6414
+ type TelnyxTTSVoice = (typeof TelnyxTTSVoice)[keyof typeof TelnyxTTSVoice];
6415
+ /** Sample rates supported by the Telnyx TTS WebSocket endpoint. */
6416
+ declare const TelnyxTTSSampleRate: {
6417
+ readonly HZ_8000: 8000;
6418
+ readonly HZ_16000: 16000;
6419
+ readonly HZ_24000: 24000;
6420
+ };
6421
+ /** Union of {@link TelnyxTTSSampleRate} integer values. */
6422
+ type TelnyxTTSSampleRate = (typeof TelnyxTTSSampleRate)[keyof typeof TelnyxTTSSampleRate];
6423
+ /** Streaming TTS adapter for Telnyx's `/v2/text-to-speech/speech` WebSocket. */
6424
+ declare class TelnyxTTS {
6425
+ private readonly apiKey;
6426
+ private readonly voice;
6427
+ private readonly baseUrl;
6428
+ constructor(apiKey: string, voice?: string, baseUrl?: string);
6429
+ /** Collect every audio chunk into a single Buffer. */
6430
+ synthesize(text: string): Promise<Buffer>;
6431
+ /**
6432
+ * Stream MP3-encoded audio chunks as they arrive from Telnyx.
6433
+ *
6434
+ * The server sends JSON frames of the shape `{"audio": "<base64-mp3>"}`.
6435
+ * Callers that need PCM must decode the MP3 bytes (e.g. via `ffmpeg`).
6436
+ */
6437
+ synthesizeStream(text: string): AsyncGenerator<Buffer>;
6438
+ }
6439
+
4482
6440
  declare const SPAN_CALL = "getpatter.call";
4483
6441
  declare const SPAN_STT = "getpatter.stt";
4484
6442
  declare const SPAN_LLM = "getpatter.llm";
@@ -4495,6 +6453,7 @@ interface Span {
4495
6453
  recordException(exception: unknown): void;
4496
6454
  end(): void;
4497
6455
  }
6456
+ /** Options for `initTracing()`. */
4498
6457
  interface InitTracingOptions {
4499
6458
  serviceName?: string;
4500
6459
  otlpEndpoint?: string;
@@ -4545,4 +6504,4 @@ interface CallEvent {
4545
6504
  readonly direction?: string;
4546
6505
  }
4547
6506
 
4548
- export { type AgentOptions, AllProvidersFailedError, type AnthropicConversion, LLM$3 as AnthropicLLM, type AnthropicLLMOptions, type AnthropicMessage, type AssemblyAIEncoding, type AssemblyAIModel, STT as AssemblyAISTT, type AssemblyAISTTOptions, type AudioConfig, type AudioSource, AuthenticationError, type BackgroundAudioOptions, BackgroundAudioPlayer, BuiltinAudioClip, type BuiltinAudioClipName, type BuiltinPcmSource, type CallControl, type CallEvent, type CallEventHandler, type CallMetrics, CallMetricsAccumulator, type CallRecord, type CartesiaEncoding, STT$2 as CartesiaSTT, type CartesiaSTTOptions, TTS$2 as CartesiaTTS, type CartesiaTTSOptions, LLM$1 as CerebrasLLM, type CerebrasLLMOptions, ChatContext, type ChatMessage, type ChatRole, CloudflareTunnel, type CostBreakdown, DEFAULT_MIN_SENTENCE_LEN, DEFAULT_PRICING, DTMF_EVENTS, STT$5 as DeepgramSTT, type DeepgramSTTOptions, DefaultToolExecutor, type DefaultToolExecutorOptions, type DefineToolInput, type DtmfEvent, ConvAI as ElevenLabsConvAI, ElevenLabsConvAIAdapter, type ConvAIOptions as ElevenLabsConvAIOptions, TTS$4 as ElevenLabsTTS, type ElevenLabsTTSOptions, EventBus, FallbackLLMProvider, type FallbackLLMProviderOptions, type FilePcmSource, GEMINI_DEFAULT_INPUT_SR, GEMINI_DEFAULT_OUTPUT_SR, GeminiLiveAdapter, type GeminiLiveEventHandler, LLM as GoogleLLM, type GoogleLLMOptions, LLM$2 as GroqLLM, type GroqLLMOptions, Guardrail$1 as Guardrail, type GuardrailOptions, type HookContext, IVRActivity, type IVRActivityOptions, type IVRToolDefinition, type IncomingMessage, type InitTracingOptions, type JobCallback, type LLMChunk, LLMLoop, type LLMProvider, type LMNTAudioFormat, type LMNTModel, type LMNTSampleRate, TTS as LMNTTTS, type LMNTTTSOptions, type LatencyBreakdown, type LocalCallOptions, type LocalConfig, type LocalOptions, type Logger, type LoopCallback, type MessageHandler, MetricsStore, Ngrok, LLM$4 as OpenAILLM, type OpenAILLMOptions, OpenAILLMProvider, type OpenAIMessage, Realtime as OpenAIRealtime, OpenAIRealtimeAdapter, type RealtimeOptions as OpenAIRealtimeOptions, TTS$3 as OpenAITTS, type OpenAITTSOptions, STT$3 as OpenAITranscribeSTT, type OpenAITranscribeSTTOptions, type ParamSpec, PartialStreamError, Patter, PatterConnectionError, PatterError, type PatterEventType, PatterTool, type PatterToolExecuteArgs, type PatterToolOptions, type PatterToolResult, PcmCarry, PipelineHookExecutor, type PipelineHooks, type PipelineMessageHandler, type ProviderPricing, ProvisionError, RateLimitError, type RawPcmSource, type RealtimeConfig, RemoteMessageHandler, TTS$1 as RimeTTS, type RimeTTSOptions, SPAN_BARGEIN, SPAN_CALL, SPAN_ENDPOINT, SPAN_LLM, SPAN_STT, SPAN_TOOL, SPAN_TTS, type SSEEvent, type STTConfig, type ScheduleHandle, SentenceChunker, type ServeOptions, type SilenceCallback, type SileroSampleRate, SileroVAD, type SileroVADOptions, STT$1 as SonioxSTT, type SonioxSTTOptions$1 as SonioxSTTOptions, type Span, StatefulResampler, type StatefulResamplerOptions, Static as StaticTunnel, type TTSConfig, Carrier as Telnyx, TelnyxAdapter, type TelnyxCarrierOptions, type ConfigureNumberOptions as TelnyxConfigureNumberOptions, type EndCallOptions as TelnyxEndCallOptions, type InitiateCallOptions as TelnyxInitiateCallOptions, type InitiateCallResult as TelnyxInitiateCallResult, type ProvisionNumberOptions as TelnyxProvisionNumberOptions, type ProvisionNumberResult as TelnyxProvisionNumberResult, TestSession, TfidfLoopDetector, type TfidfLoopDetectorOptions, Tool, type ToolDefinition, type ToolExecutor, type ToolHandler, type ToolOptions, type TunnelHandle, type TurnMetrics, Carrier$1 as Twilio, TwilioAdapter, type TwilioAdapterOptions, type TwilioCarrierOptions, type ConfigureNumberOptions$1 as TwilioConfigureNumberOptions, type InitiateCallOptions$1 as TwilioInitiateCallOptions, type InitiateCallResult$1 as TwilioInitiateCallResult, type ProvisionNumberOptions$1 as TwilioProvisionNumberOptions, type ProvisionNumberResult$1 as TwilioProvisionNumberResult, ULTRAVOX_DEFAULT_API_BASE, ULTRAVOX_DEFAULT_SR, type UltravoxEventHandler, UltravoxRealtimeAdapter, STT$4 as WhisperSTT, type WhisperSTTOptions, assemblyai, builtinClipPath, calculateRealtimeCost, calculateSttCost, calculateTelephonyCost, calculateTtsCost, callsToCsv, callsToJson, cartesia, createResampler16kTo8k, createResampler24kTo16k, createResampler8kTo16k, deepgram, defineTool, elevenlabs, filterEmoji, filterForTTS, filterMarkdown, formatDtmf, geminiLive, getLogger, guardrail, initTracing, isRemoteUrl, isTracingEnabled, isWebSocketUrl, lmnt, makeAuthMiddleware, mergePricing, mixPcm, mountApi, mountDashboard, mulawToPcm16, notifyDashboard, openaiTts, pcm16ToMulaw, resample16kTo8k, resample24kTo16k, resample8kTo16k, resamplePcm, rime, scheduleCron, scheduleInterval, scheduleOnce, selectSoundFromList, setLogger, soniox, speechmatics, startSpan, startTunnel, tool, ultravox, whisper };
6507
+ export { type AgentOptions, type AgentState, AllProvidersFailedError, type AnthropicConversion, LLM$3 as AnthropicLLM, type AnthropicLLMOptions, type AnthropicMessage, AssemblyAIEncoding, AssemblyAIModel, STT$1 as AssemblyAISTT, type AssemblyAISTTOptions, type AudioConfig, type AudioSource, AuthenticationError, type BackgroundAudioOptions, BackgroundAudioPlayer, BuiltinAudioClip, type BuiltinAudioClipName, type BuiltinPcmSource, type CallControl, type CallEvent, type CallEventHandler, type CallMetrics, CallMetricsAccumulator, type CallRecord, type CartesiaEncoding, STT$3 as CartesiaSTT, type CartesiaSTTOptions, TTS$3 as CartesiaTTS, type CartesiaTTSOptions, LLM$1 as CerebrasLLM, type CerebrasLLMOptions, ChatContext, type ChatMessage, type ChatRole, CloudflareTunnel, type ConversationStateSnapshot, type CostBreakdown, DEFAULT_MIN_SENTENCE_LEN, DEFAULT_PRICING, DTMF_EVENTS, STT$6 as DeepgramSTT, type DeepgramSTTOptions, DefaultToolExecutor, type DefaultToolExecutorOptions, type DefineToolInput, type DtmfEvent, ConvAI as ElevenLabsConvAI, ElevenLabsConvAIAdapter, type ConvAIOptions as ElevenLabsConvAIOptions, TTS$6 as ElevenLabsTTS, type ElevenLabsTTSOptions, type ElevenLabsWebSocketOptions, TTS$5 as ElevenLabsWebSocketTTS, type EouTrigger, ErrorCode, EventBus, FallbackLLMProvider, type FallbackLLMProviderOptions, type FilePcmSource, GEMINI_DEFAULT_INPUT_SR, GEMINI_DEFAULT_OUTPUT_SR, GeminiLiveAdapter, type GeminiLiveEventHandler, LLM as GoogleLLM, type GoogleLLMOptions, LLM$2 as GroqLLM, type GroqLLMOptions, Guardrail$1 as Guardrail, type GuardrailOptions, type HookContext, IVRActivity, type IVRActivityOptions, type IVRToolDefinition, type IncomingMessage, type InitTracingOptions, TTS as InworldTTS, type InworldTTSOptions, type JobCallback, type LLMChunk, LLMLoop, type LLMProvider, LMNTAudioFormat, LMNTModel, LMNTSampleRate, TTS$1 as LMNTTTS, type LMNTTTSOptions, type LatencyBreakdown, type LocalCallOptions, type LocalConfig, type LocalOptions, type Logger, type LoopCallback, type MessageHandler, MetricsStore, Ngrok, LLM$4 as OpenAILLM, type OpenAILLMOptions, OpenAILLMProvider, type OpenAIMessage, Realtime as OpenAIRealtime, OpenAIRealtimeAdapter, type RealtimeOptions as OpenAIRealtimeOptions, TTS$4 as OpenAITTS, type OpenAITTSOptions, STT$4 as OpenAITranscribeSTT, type OpenAITranscribeSTTOptions, type ParamSpec, PartialStreamError, Patter, PatterConnectionError, PatterError, type PatterEventType, PatterTool, type PatterToolExecuteArgs, type PatterToolOptions, type PatterToolResult, PcmCarry, PipelineHookExecutor, type PipelineHooks, type PipelineMessageHandler, type ProviderPricing, ProvisionError, RateLimitError, type RawPcmSource, type RealtimeConfig, RemoteMessageHandler, TTS$2 as RimeTTS, type RimeTTSOptions, SPAN_BARGEIN, SPAN_CALL, SPAN_ENDPOINT, SPAN_LLM, SPAN_STT, SPAN_TOOL, SPAN_TTS, type SSEEvent, type STTConfig, type ScheduleHandle, SentenceChunker, type ServeOptions, type SilenceCallback, type SileroSampleRate, SileroVAD, type SileroVADOptions, STT$2 as SonioxSTT, type SonioxSTTOptions$1 as SonioxSTTOptions, type Span, type SpeechEventCallback, SpeechEvents, SpeechmaticsAudioEncoding, SpeechmaticsOperatingPoint, STT as SpeechmaticsSTT, type SpeechmaticsSTTOptions, SpeechmaticsSampleRate, SpeechmaticsServerMessage, TurnDetectionMode as SpeechmaticsTurnDetectionMode, StatefulResampler, type StatefulResamplerOptions, Static as StaticTunnel, type TTSConfig, Carrier as Telnyx, TelnyxAdapter, type TelnyxCarrierOptions, type ConfigureNumberOptions as TelnyxConfigureNumberOptions, type EndCallOptions as TelnyxEndCallOptions, type InitiateCallOptions as TelnyxInitiateCallOptions, type InitiateCallResult as TelnyxInitiateCallResult, type ProvisionNumberOptions as TelnyxProvisionNumberOptions, type ProvisionNumberResult as TelnyxProvisionNumberResult, TelnyxSTT, TelnyxSTTInputFormat, TelnyxSTTSampleRate, type Transcript as TelnyxSTTTranscript, TelnyxTTS, TelnyxTTSSampleRate, TelnyxTTSVoice, type TelnyxTranscriptionEngine, TestSession, TfidfLoopDetector, type TfidfLoopDetectorOptions, Tool, type ToolDefinition, type ToolExecutor, type ToolHandler, type ToolOptions, type TunnelHandle, type TurnMetrics, Carrier$1 as Twilio, TwilioAdapter, type TwilioAdapterOptions, type TwilioCarrierOptions, type ConfigureNumberOptions$1 as TwilioConfigureNumberOptions, type InitiateCallOptions$1 as TwilioInitiateCallOptions, type InitiateCallResult$1 as TwilioInitiateCallResult, type ProvisionNumberOptions$1 as TwilioProvisionNumberOptions, type ProvisionNumberResult$1 as TwilioProvisionNumberResult, ULTRAVOX_DEFAULT_API_BASE, ULTRAVOX_DEFAULT_SR, type UltravoxEventHandler, UltravoxRealtimeAdapter, type UserState, STT$5 as WhisperSTT, type WhisperSTTOptions, assemblyai, builtinClipPath, calculateRealtimeCost, calculateSttCost, calculateTelephonyCost, calculateTtsCost, callsToCsv, callsToJson, cartesia, createResampler16kTo8k, createResampler24kTo16k, createResampler24kTo8k, createResampler8kTo16k, deepgram, defineTool, elevenlabs, filterEmoji, filterForTTS, filterMarkdown, formatDtmf, geminiLive, getLogger, guardrail, initTracing, isRemoteUrl, isTracingEnabled, isWebSocketUrl, lmnt, makeAuthMiddleware, mergePricing, mixPcm, mountApi, mountDashboard, mulawToPcm16, notifyDashboard, openaiTts, pcm16ToMulaw, resample16kTo8k, resample24kTo16k, resample8kTo16k, resamplePcm, rime, scheduleCron, scheduleInterval, scheduleOnce, selectSoundFromList, setLogger, soniox, speechmatics, startSpan, startTunnel, tool, ultravox, whisper };