getpatter 0.5.2 → 0.5.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.ts CHANGED
@@ -135,6 +135,35 @@ declare class Static {
135
135
  hostname: string;
136
136
  });
137
137
  }
138
+ /**
139
+ * Ngrok tunnel marker — parity with the Python ``getpatter.tunnels.Ngrok``.
140
+ *
141
+ * Patter does not bundle the ngrok binary or auto-provision tunnels. This
142
+ * marker exists so applications can pass an existing ngrok hostname through
143
+ * the same code path as ``Static`` / ``CloudflareTunnel``. Constructing one
144
+ * without a hostname is allowed (mirrors the Python type), but ``start()``
145
+ * will throw — the user is expected to either pass a hostname or run the
146
+ * tunnel themselves and feed the resulting URL via ``Static``.
147
+ *
148
+ * @example
149
+ * ```ts
150
+ * import { Ngrok } from "getpatter/tunnels";
151
+ * const tunnel = new Ngrok({ hostname: "abc.ngrok.io" });
152
+ * ```
153
+ */
154
+ declare class Ngrok {
155
+ readonly kind: "ngrok";
156
+ readonly hostname: string;
157
+ constructor(opts?: {
158
+ hostname?: string;
159
+ });
160
+ /**
161
+ * Returns the configured hostname or throws if the marker was constructed
162
+ * without one. Patter does not start ngrok itself — the user is expected
163
+ * to either supply a hostname or run ngrok out-of-band.
164
+ */
165
+ start(): string;
166
+ }
138
167
 
139
168
  /**
140
169
  * Public API primitives — `Tool` and `Guardrail` classes, plus the
@@ -224,9 +253,35 @@ declare function tool(opts: ToolOptions): Tool;
224
253
  * functions so the Twilio/Telnyx bridges have a single dispatch point.
225
254
  */
226
255
 
256
+ /** Per-word timings / metadata (Deepgram-shaped). Optional on every adapter. */
257
+ interface STTWord {
258
+ readonly word?: string;
259
+ readonly start?: number;
260
+ readonly end?: number;
261
+ readonly confidence?: number;
262
+ readonly punctuated_word?: string;
263
+ readonly speaker?: number;
264
+ }
265
+ /**
266
+ * Facade transcript shape — widened to surface richer provider fields
267
+ * (Deepgram emits all of them) without forcing adapters that only know
268
+ * ``text``/``isFinal`` to change. All non-text fields are optional.
269
+ */
227
270
  interface STTTranscript {
228
271
  text: string;
229
272
  isFinal?: boolean;
273
+ /** Overall transcript confidence in [0, 1]. */
274
+ confidence?: number;
275
+ /** Provider-side end-of-utterance hint (faster than ``isFinal``). */
276
+ speechFinal?: boolean;
277
+ /** True when the result was produced in response to a Finalize command. */
278
+ fromFinalize?: boolean;
279
+ /** Provider request id (Deepgram populates this from the Metadata frame). */
280
+ requestId?: string;
281
+ /** Per-word timings / metadata when the provider emits them. */
282
+ words?: ReadonlyArray<STTWord>;
283
+ /** Which provider event this transcript represents (e.g. ``Results``). */
284
+ eventType?: string;
230
285
  }
231
286
  type STTTranscriptCallback = (t: STTTranscript) => Promise<void> | void;
232
287
  /** Shape shared by every STT adapter in the SDK. */
@@ -240,6 +295,81 @@ interface TTSAdapter {
240
295
  synthesizeStream(text: string): AsyncIterable<Buffer>;
241
296
  }
242
297
 
298
+ /**
299
+ * Pipeline hook executor for pipeline mode.
300
+ *
301
+ * Runs user-defined hooks at each stage of the STT → LLM → TTS pipeline.
302
+ * Fail-open: if a hook throws, the error is logged and the original value
303
+ * passes through unchanged.
304
+ */
305
+
306
+ declare class PipelineHookExecutor {
307
+ private readonly hooks;
308
+ constructor(hooks: PipelineHooks | undefined);
309
+ /**
310
+ * Run beforeSendToStt hook. Returns null to drop the audio chunk.
311
+ * If no hook is defined, returns the audio unchanged.
312
+ * Fail-open: on exception, the original audio passes through.
313
+ */
314
+ runBeforeSendToStt(audio: Buffer, ctx: HookContext): Promise<Buffer | null>;
315
+ /**
316
+ * Run afterTranscribe hook. Returns null if hook vetoes the turn.
317
+ * If no hook is defined, returns the transcript unchanged.
318
+ */
319
+ runAfterTranscribe(transcript: string, ctx: HookContext): Promise<string | null>;
320
+ /**
321
+ * Run beforeLlm hook. Returns a possibly-modified messages list.
322
+ * Returning ``null`` from the hook means "keep the original" — the LLM
323
+ * call is too important to be silently vetoed.
324
+ * Fail-open: on exception, the original messages pass through.
325
+ */
326
+ runBeforeLlm(messages: Array<Record<string, unknown>>, ctx: HookContext): Promise<Array<Record<string, unknown>>>;
327
+ /**
328
+ * Run afterLlm hook. Returns a possibly-modified assistant text.
329
+ * Returning ``null`` from the hook means "keep the original".
330
+ * Fail-open: on exception, the original text passes through.
331
+ */
332
+ runAfterLlm(text: string, ctx: HookContext): Promise<string>;
333
+ /**
334
+ * Whether ``afterLlm`` is configured. Used by the LLM loop to decide
335
+ * whether to buffer streaming tokens before yielding them.
336
+ */
337
+ hasAfterLlm(): boolean;
338
+ /**
339
+ * Run beforeSynthesize hook. Returns null if hook vetoes TTS for this sentence.
340
+ * If no hook is defined, returns the text unchanged.
341
+ */
342
+ runBeforeSynthesize(text: string, ctx: HookContext): Promise<string | null>;
343
+ /**
344
+ * Run afterSynthesize hook. Returns null if hook vetoes this audio chunk.
345
+ * If no hook is defined, returns the audio unchanged.
346
+ */
347
+ runAfterSynthesize(audio: Buffer, text: string, ctx: HookContext): Promise<Buffer | null>;
348
+ }
349
+
350
+ /**
351
+ * Lightweight in-process event bus for Patter call lifecycle events.
352
+ *
353
+ * Mirrors the Python ``PatterEventBus`` (sdk-py/getpatter/observability/event_bus.py).
354
+ * Consumers subscribe with ``on()`` and receive typed payloads. ``emit()`` is
355
+ * synchronous but handles async listeners: rejections are surfaced via the
356
+ * Patter logger rather than being swallowed or crashing the call.
357
+ */
358
+ type PatterEventType = 'turn_started' | 'turn_ended' | 'eou_metrics' | 'interruption' | 'llm_metrics' | 'tts_metrics' | 'stt_metrics' | 'metrics_collected' | 'call_ended' | 'transcript_partial' | 'transcript_final' | 'llm_chunk' | 'tts_chunk' | 'tool_call_started';
359
+ type Listener<T = unknown> = (payload: T) => void | Promise<void>;
360
+ declare class EventBus {
361
+ private readonly listeners;
362
+ /**
363
+ * Subscribe to an event type. Returns an unsubscribe function.
364
+ */
365
+ on<T = unknown>(event: PatterEventType, cb: Listener<T>): () => void;
366
+ /**
367
+ * Emit an event synchronously. Async listeners are fire-and-forget with
368
+ * rejection logging so a badly-behaved observer never stalls the call path.
369
+ */
370
+ emit<T = unknown>(event: PatterEventType, payload: T): void;
371
+ }
372
+
243
373
  /**
244
374
  * Built-in LLM loop for pipeline mode when no onMessage handler is provided.
245
375
  *
@@ -248,14 +378,58 @@ interface TTSAdapter {
248
378
  * ``OpenAILLMProvider`` which preserves full backward compatibility.
249
379
  */
250
380
 
381
+ /**
382
+ * Minimal interface for recording LLM usage chunks.
383
+ * Avoids a circular import from metrics.ts.
384
+ */
385
+ interface LlmUsageRecorder {
386
+ recordLlmUsage(provider: string, model: string, inputTokens: number, outputTokens: number, cacheReadTokens?: number, cacheCreationTokens?: number): void;
387
+ }
388
+ /**
389
+ * Pluggable tool executor — mirrors the Python ``ToolExecutor`` in
390
+ * ``sdk-py/getpatter/services/tool_executor.py``.
391
+ *
392
+ * Implementors receive a fully-resolved ``ToolDefinition`` (handler +/ webhook
393
+ * URL already validated by the SDK) and MUST return a JSON-stringifiable
394
+ * result. Errors should be returned as JSON like
395
+ * ``{ error: "...", fallback: true }`` rather than thrown.
396
+ */
397
+ interface ToolExecutor {
398
+ execute(toolDef: ToolDefinition, args: Record<string, unknown>, callContext: Record<string, unknown>): Promise<string>;
399
+ }
400
+ interface DefaultToolExecutorOptions {
401
+ /** Total attempts = maxRetries + 1. Default: 2 (i.e. 3 attempts). */
402
+ maxRetries?: number;
403
+ /** Delay between attempts, in ms. */
404
+ retryDelayMs?: number;
405
+ /** Per-request timeout for webhook calls, in ms. */
406
+ requestTimeoutMs?: number;
407
+ }
408
+ /**
409
+ * Default executor — webhook with retry/fallback and local handler preference.
410
+ *
411
+ * This is the out-of-the-box behavior and is 1:1 equivalent to the previous
412
+ * inline logic in ``LLMLoop.executeTool``.
413
+ */
414
+ declare class DefaultToolExecutor implements ToolExecutor {
415
+ private readonly maxRetries;
416
+ private readonly retryDelayMs;
417
+ private readonly requestTimeoutMs;
418
+ constructor(opts?: DefaultToolExecutorOptions);
419
+ execute(toolDef: ToolDefinition, args: Record<string, unknown>, callContext: Record<string, unknown>): Promise<string>;
420
+ }
251
421
  /** A single streaming chunk yielded by an LLM provider. */
252
422
  interface LLMChunk {
253
- type: 'text' | 'tool_call' | 'done';
423
+ type: 'text' | 'tool_call' | 'done' | 'usage';
254
424
  content?: string;
255
425
  index?: number;
256
426
  id?: string;
257
427
  name?: string;
258
428
  arguments?: string;
429
+ inputTokens?: number;
430
+ outputTokens?: number;
431
+ cacheReadInputTokens?: number;
432
+ cacheCreationInputTokens?: number;
259
433
  }
260
434
  /**
261
435
  * Interface that any LLM provider must satisfy.
@@ -269,11 +443,44 @@ interface LLMChunk {
269
443
  interface LLMProvider {
270
444
  stream(messages: Array<Record<string, unknown>>, tools?: Array<Record<string, unknown>> | null): AsyncGenerator<LLMChunk, void, unknown>;
271
445
  }
446
+ /** Optional sampling kwargs forwarded into the OpenAI Chat Completions body. */
447
+ interface OpenAILLMSamplingOptions {
448
+ /** Sampling temperature [0, 2]. */
449
+ temperature?: number;
450
+ /** Max tokens in the assistant response (sent as ``max_completion_tokens``). */
451
+ maxTokens?: number;
452
+ /** OpenAI-style ``response_format`` for JSON mode / structured outputs. */
453
+ responseFormat?: Record<string, unknown>;
454
+ /** Whether to allow parallel tool calls. */
455
+ parallelToolCalls?: boolean;
456
+ /** ``"auto" | "none" | "required"`` or a specific tool object. */
457
+ toolChoice?: string | Record<string, unknown>;
458
+ /** Sampling seed for reproducible outputs. */
459
+ seed?: number;
460
+ /** Nucleus sampling cutoff in [0, 1]. */
461
+ topP?: number;
462
+ /** Penalty in [-2, 2] applied to repeated tokens. */
463
+ frequencyPenalty?: number;
464
+ /** Penalty in [-2, 2] applied to seen tokens. */
465
+ presencePenalty?: number;
466
+ /** Stop sequence(s). */
467
+ stop?: string | string[];
468
+ }
272
469
  /** LLM provider backed by OpenAI Chat Completions (streaming). */
273
470
  declare class OpenAILLMProvider implements LLMProvider {
274
471
  private readonly apiKey;
275
- private readonly model;
276
- constructor(apiKey: string, model: string);
472
+ readonly model: string;
473
+ private readonly temperature?;
474
+ private readonly maxTokens?;
475
+ private readonly responseFormat?;
476
+ private readonly parallelToolCalls?;
477
+ private readonly toolChoice?;
478
+ private readonly seed?;
479
+ private readonly topP?;
480
+ private readonly frequencyPenalty?;
481
+ private readonly presencePenalty?;
482
+ private readonly stop?;
483
+ constructor(apiKey: string, model: string, sampling?: OpenAILLMSamplingOptions);
277
484
  stream(messages: Array<Record<string, unknown>>, tools?: Array<Record<string, unknown>> | null): AsyncGenerator<LLMChunk, void, unknown>;
278
485
  }
279
486
  declare class LLMLoop {
@@ -282,15 +489,34 @@ declare class LLMLoop {
282
489
  private readonly tools;
283
490
  private readonly openaiTools;
284
491
  private readonly toolMap;
492
+ private toolExecutor;
493
+ private eventBus?;
494
+ private readonly _providerName;
495
+ private readonly _modelName;
285
496
  constructor(apiKey: string, model: string, systemPrompt: string, tools?: ToolDefinition[] | null, llmProvider?: LLMProvider);
497
+ /**
498
+ * Swap in a custom tool executor (e.g. different retry policy, metrics
499
+ * wrapping, tenant-aware fan-out). The default is ``DefaultToolExecutor``.
500
+ */
501
+ setToolExecutor(executor: ToolExecutor): void;
502
+ /**
503
+ * Wire an :class:`EventBus` so the loop emits ``llm_chunk`` per text
504
+ * token and ``tool_call_started`` the first time each tool-call index
505
+ * appears. Set to ``undefined`` to disable.
506
+ */
507
+ setEventBus(bus: EventBus | undefined): void;
286
508
  /**
287
509
  * Stream LLM response tokens, handling tool calls automatically.
288
510
  * Yields text tokens as they arrive from the LLM.
511
+ *
512
+ * @param metrics Optional usage recorder — when provided, usage chunks
513
+ * from the provider are forwarded to {@link LlmUsageRecorder.recordLlmUsage}
514
+ * so token costs are included in the call cost breakdown (fix 10).
289
515
  */
290
516
  run(userText: string, history: Array<{
291
517
  role: string;
292
518
  text: string;
293
- }>, callContext: Record<string, unknown>): AsyncGenerator<string, void, unknown>;
519
+ }>, callContext: Record<string, unknown>, metrics?: LlmUsageRecorder, hookExecutor?: PipelineHookExecutor, hookCtx?: HookContext): AsyncGenerator<string, void, unknown>;
294
520
  private executeTool;
295
521
  private buildMessages;
296
522
  }
@@ -305,11 +531,11 @@ interface STTConfig {
305
531
  readonly apiKey: string;
306
532
  readonly language: string;
307
533
  /**
308
- * Optional when present, called by internal serialisation. Not required for
309
- * callers that pass a plain object literal (``{ provider, apiKey, language }``)
310
- * to maintain parity with the Python SDK, which accepts dataclass-like inputs.
534
+ * Serialise the config into a JSON-compatible dict for the wire protocol.
535
+ * Mandatory matches Python's ``STTConfig.to_dict()``. Concrete classes
536
+ * returned by ``stt(...)``/``deepgram(...)`` etc. all implement it.
311
537
  */
312
- toDict?(): Record<string, string | Record<string, unknown>>;
538
+ toDict(): Record<string, string | Record<string, unknown>>;
313
539
  /** Provider-specific knobs (e.g. Deepgram endpointing). */
314
540
  options?: Record<string, unknown>;
315
541
  }
@@ -317,36 +543,15 @@ interface TTSConfig {
317
543
  readonly provider: string;
318
544
  readonly apiKey: string;
319
545
  readonly voice: string;
320
- toDict?(): Record<string, string | Record<string, unknown>>;
546
+ /**
547
+ * Serialise the config into a JSON-compatible dict for the wire protocol.
548
+ * Mandatory — matches Python's ``TTSConfig.to_dict()``.
549
+ */
550
+ toDict(): Record<string, string | Record<string, unknown>>;
321
551
  options?: Record<string, unknown>;
322
552
  }
323
553
  type MessageHandler = (msg: IncomingMessage) => Promise<string>;
324
554
  type CallEventHandler = (data: Record<string, unknown>) => Promise<void>;
325
- interface PatterOptions {
326
- apiKey: string;
327
- backendUrl?: string;
328
- restUrl?: string;
329
- }
330
- interface ConnectOptions {
331
- onMessage: MessageHandler;
332
- onCallStart?: CallEventHandler;
333
- onCallEnd?: CallEventHandler;
334
- provider?: string;
335
- providerKey?: string;
336
- providerSecret?: string;
337
- number?: string;
338
- country?: string;
339
- stt?: STTConfig;
340
- tts?: TTSConfig;
341
- }
342
- interface CallOptions {
343
- to: string;
344
- onMessage?: MessageHandler;
345
- firstMessage?: string;
346
- fromNumber?: string;
347
- agentId?: string;
348
- machineDetection?: boolean;
349
- }
350
555
  interface ToolDefinition {
351
556
  name: string;
352
557
  description: string;
@@ -356,58 +561,9 @@ interface ToolDefinition {
356
561
  /** Local handler function — when provided, called instead of webhookUrl. */
357
562
  handler?: (args: Record<string, unknown>, context: Record<string, unknown>) => Promise<string>;
358
563
  }
359
- interface CreateAgentOptions {
360
- name: string;
361
- systemPrompt: string;
362
- model?: string;
363
- voice?: string;
364
- voiceProvider?: string;
365
- language?: string;
366
- firstMessage?: string;
367
- tools?: ToolDefinition[];
368
- }
369
- interface Agent {
370
- id: string;
371
- name: string;
372
- systemPrompt: string;
373
- model: string;
374
- voice: string;
375
- voiceProvider: string;
376
- language: string;
377
- firstMessage: string | null;
378
- tools: ToolDefinition[] | null;
379
- }
380
- interface PhoneNumber {
381
- id: string;
382
- number: string;
383
- provider: string;
384
- country: string;
385
- status: string;
386
- agentId: string | null;
387
- }
388
- interface Call {
389
- id: string;
390
- direction: string;
391
- caller: string;
392
- callee: string;
393
- startedAt: string;
394
- endedAt: string | null;
395
- durationSeconds: number | null;
396
- status: string;
397
- transcript: Array<{
398
- role: string;
399
- text: string;
400
- timestamp: string;
401
- }> | null;
402
- }
403
564
  interface LocalOptions {
404
565
  /**
405
- * Local mode is auto-detected when a ``carrier`` is passed. Pass
406
- * ``mode: 'local'`` to force local mode explicitly.
407
- */
408
- mode?: 'local';
409
- /**
410
- * Telephony carrier instance. Required for local mode.
566
+ * Telephony carrier instance. Required.
411
567
  *
412
568
  * @example
413
569
  * ```ts
@@ -456,6 +612,14 @@ interface PipelineHooks {
456
612
  beforeSendToStt?: (audio: Buffer, ctx: HookContext) => Buffer | null | Promise<Buffer | null>;
457
613
  /** Called after STT produces a transcript, before LLM. Return null to skip this turn. */
458
614
  afterTranscribe?: (transcript: string, ctx: HookContext) => string | null | Promise<string | null>;
615
+ /** Called with the messages list before the LLM call.
616
+ * Return null to keep them, or return a new list to replace
617
+ * (useful for prompt injection, message filtering, RAG augmentation). */
618
+ beforeLlm?: (messages: Array<Record<string, unknown>>, ctx: HookContext) => Array<Record<string, unknown>> | null | Promise<Array<Record<string, unknown>> | null>;
619
+ /** Called with the final assistant text after the LLM stream completes.
620
+ * Return null to keep, or return a new string to replace
621
+ * (useful for output validation, redaction, post-processing). */
622
+ afterLlm?: (text: string, ctx: HookContext) => string | null | Promise<string | null>;
459
623
  /** Called before TTS, per-sentence in streaming mode. Return null to skip TTS for this sentence. */
460
624
  beforeSynthesize?: (text: string, ctx: HookContext) => string | null | Promise<string | null>;
461
625
  /** Called after TTS produces an audio chunk. Return null to discard this chunk. */
@@ -585,38 +749,120 @@ interface LocalCallOptions {
585
749
  variables?: Record<string, string>;
586
750
  /**
587
751
  * Ring timeout in seconds. Forwarded to Twilio as `Timeout` and to Telnyx
588
- * as `timeout_secs`. Defaults to the carrier default (~28 s on Twilio) when
589
- * omitted. Increase for international routes where the remote carrier
590
- * silences short US→IT rings.
752
+ * as `timeout_secs`. Defaults to **25 s** the production-recommended
753
+ * value that limits phantom calls. Pass `60` for legacy carrier-default
754
+ * parity, or `null` to omit the parameter entirely (carrier picks its
755
+ * own default).
591
756
  */
592
- ringTimeout?: number;
757
+ ringTimeout?: number | null;
758
+ }
759
+
760
+ /**
761
+ * In-memory metrics store for the local dashboard.
762
+ *
763
+ * Keeps the last `maxCalls` completed calls and tracks active calls.
764
+ * Supports SSE event subscribers for real-time updates.
765
+ *
766
+ * Optional disk hydration: when `CallLogger` writes per-call records under
767
+ * `<root>/calls/YYYY/MM/DD/<call_id>/metadata.json`, calling
768
+ * `hydrate(logRoot)` on a fresh store rebuilds the in-memory list from those
769
+ * files so the dashboard survives process restarts (the persistence is in
770
+ * the JSONL/JSON files, the store is just a cache on top).
771
+ */
772
+
773
+ interface CallRecord {
774
+ call_id: string;
775
+ caller: string;
776
+ callee: string;
777
+ direction: string;
778
+ started_at: number;
779
+ ended_at?: number;
780
+ /**
781
+ * Current lifecycle state: ``initiated`` (pre-registered), ``ringing``,
782
+ * ``in-progress``, ``completed``, ``no-answer``, ``busy``, ``failed``,
783
+ * ``canceled``, or ``webhook_error``.
784
+ */
785
+ status?: string;
786
+ transcript?: Array<{
787
+ role: string;
788
+ text: string;
789
+ timestamp: number;
790
+ }>;
791
+ turns?: unknown[];
792
+ metrics?: Record<string, unknown> | null;
793
+ [key: string]: unknown;
794
+ }
795
+ interface SSEEvent {
796
+ type: string;
797
+ data: Record<string, unknown>;
798
+ }
799
+ declare class MetricsStore extends EventEmitter {
800
+ private readonly maxCalls;
801
+ private calls;
802
+ private activeCalls;
803
+ /**
804
+ * Accepts either a numeric ``maxCalls`` (legacy positional — matches the
805
+ * original TS API) or an options object ``{ maxCalls }`` to align with the
806
+ * Python SDK's keyword-argument style. Plain literals also work:
807
+ * ``new MetricsStore()`` / ``new MetricsStore(100)`` / ``new MetricsStore({ maxCalls: 100 })``.
808
+ */
809
+ constructor(maxCallsOrOpts?: number | {
810
+ maxCalls?: number;
811
+ });
812
+ private publish;
813
+ recordCallStart(data: Record<string, unknown>): void;
814
+ /**
815
+ * Pre-register an outbound call before any webhook fires. Lets the
816
+ * dashboard surface attempts that never reach media (no-answer, busy,
817
+ * carrier-rejected). Mirrors the Python ``record_call_initiated``.
818
+ */
819
+ recordCallInitiated(data: Record<string, unknown>): void;
820
+ /**
821
+ * Update the status of an active or completed call. Terminal states
822
+ * (completed, no-answer, busy, failed, canceled, webhook_error) move the
823
+ * row from active to completed so the UI freezes the live duration timer.
824
+ */
825
+ updateCallStatus(callId: string, status: string, extra?: Record<string, unknown>): void;
826
+ recordTurn(data: Record<string, unknown>): void;
827
+ recordCallEnd(data: Record<string, unknown>, metrics?: Record<string, unknown> | null): void;
828
+ getCalls(limit?: number, offset?: number): CallRecord[];
829
+ getCall(callId: string): CallRecord | null;
830
+ /** Look up an active call by id (returns undefined if not active or unknown). */
831
+ getActive(callId: string): CallRecord | undefined;
832
+ getActiveCalls(): CallRecord[];
833
+ getAggregates(): Record<string, unknown>;
834
+ getCallsInRange(fromTs?: number, toTs?: number): CallRecord[];
835
+ get callCount(): number;
836
+ /**
837
+ * Rebuild the in-memory call list from `metadata.json` files written by
838
+ * `CallLogger` under `<logRoot>/calls/YYYY/MM/DD/<call_id>/`. Idempotent:
839
+ * call_ids already in the store are skipped. Errors per file are logged
840
+ * and swallowed so a single corrupt entry doesn't block hydration.
841
+ *
842
+ * Returns the number of calls newly added to the store.
843
+ *
844
+ * Safe to call before any traffic; intended to run once at server startup.
845
+ */
846
+ hydrate(logRoot: string | null | undefined): number;
593
847
  }
594
848
 
595
849
  declare class Patter {
596
- readonly apiKey: string;
597
- private readonly backendUrl;
598
- private readonly restUrl;
599
- private readonly connection;
600
- private readonly mode;
601
850
  private localConfig;
602
851
  private embeddedServer;
603
852
  private tunnelHandle;
604
- constructor(options: PatterOptions | LocalOptions);
853
+ /**
854
+ * Live `MetricsStore` for the embedded server. Returns `null` before
855
+ * `serve()` is called. Exposed so integrations like `PatterTool` can
856
+ * subscribe to per-call lifecycle events (`call_initiated`,
857
+ * `call_start`, `call_end`).
858
+ */
859
+ get metricsStore(): MetricsStore | null;
860
+ constructor(options: LocalOptions);
605
861
  agent(opts: AgentOptions): AgentOptions;
606
862
  serve(opts: ServeOptions): Promise<void>;
607
863
  test(opts: ServeOptions): Promise<void>;
608
- connect(options: ConnectOptions): Promise<void>;
609
- call(options: CallOptions | LocalCallOptions): Promise<void>;
864
+ call(options: LocalCallOptions): Promise<void>;
610
865
  disconnect(): Promise<void>;
611
- createAgent(opts: CreateAgentOptions): Promise<Agent>;
612
- listAgents(): Promise<Agent[]>;
613
- buyNumber(opts?: {
614
- country?: string;
615
- provider?: string;
616
- }): Promise<PhoneNumber>;
617
- assignAgent(numberId: string, agentId: string): Promise<void>;
618
- listCalls(limit?: number): Promise<Call[]>;
619
- private registerNumber;
620
866
  }
621
867
 
622
868
  /**
@@ -704,51 +950,49 @@ declare const DEFAULT_MIN_SENTENCE_LEN = 20;
704
950
  declare class SentenceChunker {
705
951
  private buffer;
706
952
  private readonly minSentenceLen;
953
+ private readonly minWordsForShortFlush;
707
954
  constructor(options?: {
708
955
  minSentenceLen?: number;
956
+ minWordsForShortFlush?: number;
709
957
  });
710
- /** Feed a token. Returns zero or more complete sentences. */
958
+ /**
959
+ * Feed a token. Returns zero or more complete sentences.
960
+ *
961
+ * Two emission paths:
962
+ * - **Standard path** — when the buffer is at least `minSentenceLen`
963
+ * characters long and the regex tokenizer reports more than one
964
+ * sentence, all but the last (potentially incomplete) are emitted.
965
+ * - **Short-flush path** — when the buffer is shorter than `minSentenceLen`
966
+ * but ends with a sentence terminator AND has at least
967
+ * `minWordsForShortFlush` whitespace-separated words, emit it
968
+ * immediately. This drops TTS TTFB on short greetings like `"Hi there!"`
969
+ * while keeping single-word utterances (`"Sì."`) buffered until
970
+ * `flush()`.
971
+ */
711
972
  push(token: string): string[];
973
+ /**
974
+ * Emit the buffer when it's a short, complete single-sentence utterance.
975
+ *
976
+ * A buffer qualifies when **all** of these hold:
977
+ * 1. Last non-whitespace char is a sentence terminator.
978
+ * 2. Word count is at least `minWordsForShortFlush` (default 2 — keeps
979
+ * single-word "Sì." / "Yes." buffered until `flush()`).
980
+ * 3. The buffer contains exactly one terminator (the trailing one).
981
+ * Multiple terminators mean we may be mid-stream of a longer merged
982
+ * utterance like `"Hey! Hi! Hello! This is a sentence."` — let the
983
+ * standard path keep merging.
984
+ * 4. The char immediately before the terminator is NOT a digit (avoids
985
+ * decimal mid-stream like `"f(x) = x * 2."` flushing before `54`).
986
+ * 5. The char immediately before the terminator is NOT an uppercase
987
+ * ASCII letter (avoids acronym patterns like `"U.S."` / `"U."`).
988
+ */
989
+ private maybeShortFlush;
712
990
  /** Flush remaining buffer as final sentence(s). Call at end of stream. */
713
991
  flush(): string[];
714
992
  /** Discard buffered text. Call on interrupt. */
715
993
  reset(): void;
716
994
  }
717
995
 
718
- /**
719
- * Pipeline hook executor for pipeline mode.
720
- *
721
- * Runs user-defined hooks at each stage of the STT → LLM → TTS pipeline.
722
- * Fail-open: if a hook throws, the error is logged and the original value
723
- * passes through unchanged.
724
- */
725
-
726
- declare class PipelineHookExecutor {
727
- private readonly hooks;
728
- constructor(hooks: PipelineHooks | undefined);
729
- /**
730
- * Run beforeSendToStt hook. Returns null to drop the audio chunk.
731
- * If no hook is defined, returns the audio unchanged.
732
- * Fail-open: on exception, the original audio passes through.
733
- */
734
- runBeforeSendToStt(audio: Buffer, ctx: HookContext): Promise<Buffer | null>;
735
- /**
736
- * Run afterTranscribe hook. Returns null if hook vetoes the turn.
737
- * If no hook is defined, returns the transcript unchanged.
738
- */
739
- runAfterTranscribe(transcript: string, ctx: HookContext): Promise<string | null>;
740
- /**
741
- * Run beforeSynthesize hook. Returns null if hook vetoes TTS for this sentence.
742
- * If no hook is defined, returns the text unchanged.
743
- */
744
- runBeforeSynthesize(text: string, ctx: HookContext): Promise<string | null>;
745
- /**
746
- * Run afterSynthesize hook. Returns null if hook vetoes this audio chunk.
747
- * If no hook is defined, returns the audio unchanged.
748
- */
749
- runAfterSynthesize(audio: Buffer, text: string, ctx: HookContext): Promise<Buffer | null>;
750
- }
751
-
752
996
  /**
753
997
  * Built-in text transforms for cleaning LLM output before TTS synthesis.
754
998
  *
@@ -787,7 +1031,23 @@ declare class AuthenticationError extends PatterError {
787
1031
  declare class ProvisionError extends PatterError {
788
1032
  constructor(message: string);
789
1033
  }
1034
+ /** Thrown when a provider returns HTTP 429 on connect/upgrade. */
1035
+ declare class RateLimitError extends PatterConnectionError {
1036
+ constructor(message: string);
1037
+ }
790
1038
 
1039
+ /**
1040
+ * Config envelope for realtime / ConvAI pipelines — mirrors the wire-level
1041
+ * shape consumed by the backend. Kept narrow on purpose so callers can pass a
1042
+ * plain object literal if they prefer.
1043
+ */
1044
+ interface RealtimeConfig {
1045
+ readonly provider: string;
1046
+ readonly apiKey: string;
1047
+ readonly model?: string;
1048
+ readonly voice?: string;
1049
+ readonly options?: Record<string, unknown>;
1050
+ }
791
1051
  /**
792
1052
  * Deepgram STT config builder. Tune latency via ``endpointingMs`` /
793
1053
  * ``utteranceEndMs``. Internal only — public code should use ``DeepgramSTT``
@@ -815,13 +1075,64 @@ declare function openaiTts(opts: {
815
1075
  apiKey: string;
816
1076
  voice?: string;
817
1077
  }): TTSConfig;
818
-
1078
+ /** Soniox real-time STT config helper. */
1079
+ declare function soniox(opts: {
1080
+ apiKey: string;
1081
+ language?: string;
1082
+ }): STTConfig;
1083
+ /**
1084
+ * Speechmatics STT config helper.
1085
+ *
1086
+ * NOTE: the Speechmatics adapter is currently Python-only. Calling this helper
1087
+ * throws a clear error so callers can switch providers or use the Python SDK
1088
+ * until the TS adapter ships.
1089
+ */
1090
+ declare function speechmatics(_opts: {
1091
+ apiKey: string;
1092
+ language?: string;
1093
+ }): STTConfig;
1094
+ /** AssemblyAI real-time STT config helper. */
1095
+ declare function assemblyai(opts: {
1096
+ apiKey: string;
1097
+ language?: string;
1098
+ }): STTConfig;
1099
+ /** Cartesia TTS config helper. Default voice matches Python SDK. */
1100
+ declare function cartesia(opts: {
1101
+ apiKey: string;
1102
+ voice?: string;
1103
+ }): TTSConfig;
1104
+ /** Rime TTS config helper. */
1105
+ declare function rime(opts: {
1106
+ apiKey: string;
1107
+ voice?: string;
1108
+ }): TTSConfig;
1109
+ /** LMNT TTS config helper. */
1110
+ declare function lmnt(opts: {
1111
+ apiKey: string;
1112
+ voice?: string;
1113
+ }): TTSConfig;
819
1114
  /**
820
- * Default provider pricing and merge utilities.
1115
+ * Ultravox realtime engine config helper.
821
1116
  *
822
- * Pricing is based on public provider rates (as of early 2025).
823
- * Developers can override any provider's pricing.
1117
+ * Returns a ``RealtimeConfig`` envelope that the backend can dispatch. For
1118
+ * programmatic control over a live session use ``UltravoxRealtimeAdapter``
1119
+ * directly.
824
1120
  */
1121
+ declare function ultravox(opts: {
1122
+ apiKey: string;
1123
+ model?: string;
1124
+ voice?: string;
1125
+ }): RealtimeConfig;
1126
+ /**
1127
+ * Google Gemini Live realtime engine config helper. See
1128
+ * ``GeminiLiveAdapter`` for direct session control.
1129
+ */
1130
+ declare function geminiLive(opts: {
1131
+ apiKey: string;
1132
+ model?: string;
1133
+ voice?: string;
1134
+ }): RealtimeConfig;
1135
+
825
1136
  interface ProviderPricing {
826
1137
  unit: string;
827
1138
  price?: number;
@@ -829,6 +1140,8 @@ interface ProviderPricing {
829
1140
  audio_output_per_token?: number;
830
1141
  text_input_per_token?: number;
831
1142
  text_output_per_token?: number;
1143
+ cached_audio_input_per_token?: number;
1144
+ cached_text_input_per_token?: number;
832
1145
  }
833
1146
  declare const DEFAULT_PRICING: Record<string, ProviderPricing>;
834
1147
  /**
@@ -840,18 +1153,37 @@ declare function mergePricing(overrides?: Record<string, Partial<ProviderPricing
840
1153
  declare function calculateSttCost(provider: string, audioSeconds: number, pricing: Record<string, ProviderPricing>): number;
841
1154
  /** Calculate TTS cost from character count. */
842
1155
  declare function calculateTtsCost(provider: string, characterCount: number, pricing: Record<string, ProviderPricing>): number;
843
- /** Calculate OpenAI Realtime cost from token usage. */
1156
+ /**
1157
+ * Calculate OpenAI Realtime cost from token usage.
1158
+ *
1159
+ * OpenAI bills the cached portion of ``input_token_details.audio_tokens`` and
1160
+ * ``.text_tokens`` at the reduced cached rate (typically ~3% of full for audio,
1161
+ * ~10% of full for text on the mini model). ``cached_tokens_details`` is a
1162
+ * nested breakdown of the same ``input_token_details`` totals — the cached
1163
+ * counts are already INCLUDED in the top-level totals, so we subtract them
1164
+ * out before applying the full rate and add them back at the cached rate.
1165
+ */
844
1166
  declare function calculateRealtimeCost(usage: {
845
1167
  input_token_details?: {
846
1168
  audio_tokens?: number;
847
1169
  text_tokens?: number;
1170
+ cached_tokens_details?: {
1171
+ audio_tokens?: number;
1172
+ text_tokens?: number;
1173
+ };
848
1174
  };
849
1175
  output_token_details?: {
850
1176
  audio_tokens?: number;
851
1177
  text_tokens?: number;
852
1178
  };
853
1179
  }, pricing: Record<string, ProviderPricing>): number;
854
- /** Calculate telephony cost from call duration. */
1180
+ /**
1181
+ * Calculate telephony cost from call duration.
1182
+ *
1183
+ * Twilio bills in whole-minute increments (any partial minute is rounded up
1184
+ * to the next full minute per twilio.com/help/223132307). Telnyx bills
1185
+ * per-second. We detect Twilio by provider name and apply the round-up.
1186
+ */
855
1187
  declare function calculateTelephonyCost(provider: string, durationSeconds: number, pricing: Record<string, ProviderPricing>): number;
856
1188
 
857
1189
  /**
@@ -862,9 +1194,40 @@ declare function calculateTelephonyCost(provider: string, durationSeconds: numbe
862
1194
 
863
1195
  interface LatencyBreakdown {
864
1196
  stt_ms: number;
1197
+ /**
1198
+ * Backwards-compatible LLM bucket. With the split below, this now reflects
1199
+ * the user-perceived first-token latency (TTFT) when streaming is available
1200
+ * and the full generation time otherwise. Prefer ``llm_ttft_ms`` /
1201
+ * ``llm_total_ms`` in new code.
1202
+ */
865
1203
  llm_ms: number;
1204
+ /** Time-to-first-token (UX-facing latency): stt_complete → first LLM token. */
1205
+ llm_ttft_ms?: number;
1206
+ /**
1207
+ * Total LLM generation time: stt_complete → last LLM token. Distinct from
1208
+ * ``llm_ms`` so cost/throughput analysis and TTFT can be tracked separately.
1209
+ */
1210
+ llm_total_ms?: number;
866
1211
  tts_ms: number;
867
1212
  total_ms: number;
1213
+ /**
1214
+ * Endpoint latency: time from end-of-user-speech (VAD stop or STT
1215
+ * ``speech_final``) to LLM dispatch. Captures the silence-detection +
1216
+ * transcript-finalization gap. Optional — undefined when the source signal
1217
+ * is missing.
1218
+ */
1219
+ endpoint_ms?: number;
1220
+ /**
1221
+ * Barge-in latency: time from user-interrupt detection to TTS playback
1222
+ * actually halting (i.e. after ``sendClear`` returned). Optional — only
1223
+ * populated on interrupted turns.
1224
+ */
1225
+ bargein_ms?: number;
1226
+ /**
1227
+ * Total TTS time: LLM-first-token (or first-sentence boundary) to last
1228
+ * TTS audio byte sent. Optional — undefined when TTS never completed.
1229
+ */
1230
+ tts_total_ms?: number;
868
1231
  }
869
1232
  interface CostBreakdown {
870
1233
  stt: number;
@@ -872,6 +1235,12 @@ interface CostBreakdown {
872
1235
  llm: number;
873
1236
  telephony: number;
874
1237
  total: number;
1238
+ /**
1239
+ * Amount saved on LLM cost thanks to OpenAI Realtime prompt caching.
1240
+ * ``llm`` above is the net cost AFTER this discount. Dashboards can
1241
+ * render ``saved $X (pct%)`` next to the LLM line when > 0.
1242
+ */
1243
+ llm_cached_savings?: number;
875
1244
  }
876
1245
  interface TurnMetrics {
877
1246
  turn_index: number;
@@ -889,6 +1258,9 @@ interface CallMetrics {
889
1258
  cost: CostBreakdown;
890
1259
  latency_avg: LatencyBreakdown;
891
1260
  latency_p95: LatencyBreakdown;
1261
+ latency_p50?: LatencyBreakdown;
1262
+ latency_p90?: LatencyBreakdown;
1263
+ latency_p99?: LatencyBreakdown;
892
1264
  provider_mode: string;
893
1265
  stt_provider: string;
894
1266
  tts_provider: string;
@@ -928,18 +1300,48 @@ declare class CallMetricsAccumulator {
928
1300
  private readonly _turns;
929
1301
  private _turnStart;
930
1302
  private _sttComplete;
1303
+ private _llmFirstToken;
1304
+ private _llmFirstSentenceComplete;
931
1305
  private _llmComplete;
932
1306
  private _ttsFirstByte;
1307
+ /** Last TTS audio byte sent (hrTimeMs). Stamped by ``recordTtsComplete`` /
1308
+ * ``recordTtsCompleteTs``. Used to compute ``tts_total_ms``. */
1309
+ private _ttsLastByte;
1310
+ /** Endpoint signal (hrTimeMs) — VAD stop or STT speech_final, whichever
1311
+ * fires first. Used to compute ``endpoint_ms``. */
1312
+ private _endpointSignalAt;
1313
+ /** Monotonic stamp of LLM dispatch (paired with ``_endpointSignalAt``). */
1314
+ private _turnCommittedMono;
1315
+ /** Barge-in detected timestamp (hrTimeMs). */
1316
+ private _bargeinDetectedAt;
1317
+ /** TTS-stopped timestamp after barge-in (hrTimeMs). */
1318
+ private _bargeinStoppedAt;
933
1319
  private _turnUserText;
934
1320
  private _turnSttAudioSeconds;
935
1321
  private _totalSttAudioSeconds;
936
1322
  private _totalTtsCharacters;
937
1323
  private _totalRealtimeCost;
1324
+ private _totalRealtimeCachedSavings;
938
1325
  private _sttByteCount;
939
1326
  private _sttSampleRate;
940
1327
  private _sttBytesPerSample;
941
1328
  private _actualTelephonyCost;
942
1329
  private _actualSttCost;
1330
+ private _totalLlmCost;
1331
+ private _eventBus;
1332
+ /** Timestamp (hrTimeMs) when VAD emitted speech_end. */
1333
+ private _vadStoppedAt;
1334
+ /** Timestamp (hrTimeMs) when STT emitted its final transcript. */
1335
+ private _sttFinalAt;
1336
+ /** Timestamp (hrTimeMs) when the transcript was committed to the LLM. */
1337
+ private _turnCommittedAt;
1338
+ /** Delta (ms) from turn-committed to on_user_turn_completed hook done. */
1339
+ private _onUserTurnCompletedDelayMs;
1340
+ private _numInterruptions;
1341
+ private _numBackchannels;
1342
+ private _overlapStartedAt;
1343
+ private _reportOnlyInitialTtfb;
1344
+ private _initialTtfbEmitted;
943
1345
  constructor(opts: {
944
1346
  callId: string;
945
1347
  providerMode: string;
@@ -948,23 +1350,116 @@ declare class CallMetricsAccumulator {
948
1350
  ttsProvider?: string;
949
1351
  llmProvider?: string;
950
1352
  pricing?: Record<string, Partial<ProviderPricing>> | null;
1353
+ eventBus?: EventBus;
1354
+ /** When true, only the first TTFB emission per call is forwarded to the event bus. */
1355
+ reportOnlyInitialTtfb?: boolean;
951
1356
  });
1357
+ /**
1358
+ * Attach (or replace) an EventBus after construction.
1359
+ * Useful when the bus is created after the accumulator (e.g. in tests).
1360
+ */
1361
+ attachEventBus(bus: EventBus): void;
952
1362
  /** Configure audio format for STT byte-to-seconds conversion. */
953
1363
  configureSttFormat(sampleRate?: number, bytesPerSample?: number): void;
954
1364
  /** Whether a turn is currently being measured (startTurn called, not yet completed). */
955
1365
  get turnActive(): boolean;
956
1366
  startTurn(): void;
1367
+ /**
1368
+ * Start a new turn only if no turn is currently open.
1369
+ * Use this at inbound-audio ingestion points so the turn timer begins
1370
+ * on the first audio byte rather than just before recordSttComplete().
1371
+ */
1372
+ startTurnIfIdle(): void;
957
1373
  recordSttComplete(text: string, audioSeconds?: number): void;
1374
+ /** Record the timestamp of the first LLM token (TTFT). No-op after first call. */
1375
+ recordLlmFirstToken(): void;
1376
+ /**
1377
+ * Record when the sentence chunker emits the first complete sentence.
1378
+ * Used as the TTS span start so tts_ms reflects true TTS-provider latency
1379
+ * rather than the gap from llm_complete (which fires after the full response).
1380
+ * No-op after first call.
1381
+ */
1382
+ recordLlmFirstSentenceComplete(): void;
958
1383
  recordLlmComplete(): void;
959
1384
  recordTtsFirstByte(): void;
960
1385
  recordTtsComplete(text: string): void;
1386
+ /**
1387
+ * Capture the timestamp when the last TTS audio byte was sent on the wire.
1388
+ * Useful when the caller wants to record the timing without bumping the
1389
+ * character counter (e.g. interrupted turns where audio actually went out
1390
+ * but synthesis was truncated).
1391
+ */
1392
+ recordTtsCompleteTs(ts?: number): void;
1393
+ /**
1394
+ * Mark the moment a user interrupt (barge-in) was detected. Pairs with
1395
+ * ``recordTtsStopped`` to compute ``bargein_ms``.
1396
+ */
1397
+ recordBargeinDetected(ts?: number): void;
1398
+ /**
1399
+ * Mark the moment TTS playback was actually halted after a barge-in. Call
1400
+ * this *after* ``sendClear`` returns. Pairs with ``recordBargeinDetected``
1401
+ * to compute ``bargein_ms``.
1402
+ */
1403
+ recordTtsStopped(ts?: number): void;
961
1404
  recordTurnComplete(agentText: string): TurnMetrics;
962
1405
  recordTurnInterrupted(): TurnMetrics | null;
1406
+ /**
1407
+ * Record the moment VAD emitted speech_end for the current utterance.
1408
+ * @param ts Optional override timestamp in hrTimeMs units (defaults to now).
1409
+ */
1410
+ recordVadStop(ts?: number): void;
1411
+ /**
1412
+ * Record the moment the STT provider delivered its final transcript.
1413
+ * Aliased to the same instant as recordSttComplete() when called from
1414
+ * the standard pipeline; can be called independently for custom pipelines.
1415
+ * @param ts Optional override timestamp in hrTimeMs units.
1416
+ */
1417
+ recordSttFinalTimestamp(ts?: number): void;
1418
+ /**
1419
+ * Record the moment the transcript was committed to the LLM (turn start).
1420
+ * After this call, ``emitEouMetrics()`` can produce a complete EOUMetrics payload.
1421
+ * @param ts Optional override timestamp in hrTimeMs units.
1422
+ */
1423
+ recordTurnCommitted(ts?: number): void;
1424
+ /**
1425
+ * Record the delta (ms) between turn-committed and when on_user_turn_completed
1426
+ * pipeline hook finished. Stored for inclusion in the next ``emitEouMetrics``
1427
+ * call (or an explicit re-emit if desired).
1428
+ */
1429
+ recordOnUserTurnCompletedDelay(delayMs: number): void;
1430
+ /**
1431
+ * Compute and emit EOUMetrics when all three prerequisite timestamps are
1432
+ * available (VAD stop, STT final, turn committed).
1433
+ *
1434
+ * ``endOfUtteranceDelay`` = sttFinal − vadStopped (ms)
1435
+ * ``transcriptionDelay`` = turnCommitted − vadStopped (ms)
1436
+ * ``onUserTurnCompletedDelay`` = caller-supplied delta (ms) or 0
1437
+ */
1438
+ emitEouMetrics(): void;
1439
+ /**
1440
+ * Record that a caller utterance started overlapping with agent speech.
1441
+ * Call this when VAD detects speech_start during TTS playback.
1442
+ * @param ts Optional override timestamp in hrTimeMs units.
1443
+ */
1444
+ recordOverlapStart(ts?: number): void;
1445
+ /**
1446
+ * Record that the overlap ended. Emits ``InterruptionMetrics`` via the
1447
+ * event bus.
1448
+ *
1449
+ * @param wasInterruption true → barge-in (increments ``numInterruptions``),
1450
+ * false → backchannel (increments ``numBackchannels``).
1451
+ * @param ts Optional override timestamp in hrTimeMs units.
1452
+ */
1453
+ recordOverlapEnd(wasInterruption: boolean, ts?: number): void;
963
1454
  addSttAudioBytes(byteCount: number): void;
964
1455
  recordRealtimeUsage(usage: {
965
1456
  input_token_details?: {
966
1457
  audio_tokens?: number;
967
1458
  text_tokens?: number;
1459
+ cached_tokens_details?: {
1460
+ audio_tokens?: number;
1461
+ text_tokens?: number;
1462
+ };
968
1463
  };
969
1464
  output_token_details?: {
970
1465
  audio_tokens?: number;
@@ -973,118 +1468,161 @@ declare class CallMetricsAccumulator {
973
1468
  }): void;
974
1469
  setActualTelephonyCost(cost: number): void;
975
1470
  setActualSttCost(cost: number): void;
1471
+ /**
1472
+ * Accumulate LLM token cost for pipeline mode (non-Realtime).
1473
+ *
1474
+ * Called by LLMLoop.run() when a usage chunk arrives from the provider.
1475
+ * Mirrors Python's CallMetricsAccumulator.record_llm_usage().
1476
+ *
1477
+ * @param provider LLM provider key (e.g. 'openai', 'anthropic')
1478
+ * @param model Model name (e.g. 'gpt-4o-mini')
1479
+ * @param inputTokens Total input tokens (includes cached)
1480
+ * @param outputTokens Total output tokens
1481
+ * @param cacheReadTokens Cached input tokens (subtracted from input before billing full rate)
1482
+ * @param cacheWriteTokens Cache write tokens (billed at cache_write rate if present)
1483
+ */
1484
+ recordLlmUsage(provider: string, model: string, inputTokens: number, outputTokens: number, cacheReadTokens?: number, cacheWriteTokens?: number): void;
976
1485
  endCall(): CallMetrics;
977
1486
  getCostSoFar(): CostBreakdown;
978
1487
  private _resetTurnState;
979
1488
  private _computeTurnLatency;
980
1489
  private _computeCost;
1490
+ /**
1491
+ * Turns eligible for latency statistics.
1492
+ *
1493
+ * Excludes turns marked ``[interrupted]`` (barge-in, cancelled replacements)
1494
+ * because their recorded latency either reflects partial state or zero —
1495
+ * including them would drag every p95/avg bucket toward meaningless numbers.
1496
+ */
1497
+ private _completedTurns;
981
1498
  private _computeAverageLatency;
982
- private _computeP95Latency;
1499
+ private _computePercentileLatency;
983
1500
  }
984
1501
 
1502
+ /**
1503
+ * Supported OpenAI Realtime wire audio formats. See
1504
+ * https://platform.openai.com/docs/guides/realtime for the full list.
1505
+ * ``g711_ulaw`` matches what Twilio/Telnyx emit natively on the phone leg,
1506
+ * so no transcoding is needed. ``pcm16`` is used in the terminal test-mode
1507
+ * path and when the telephony provider negotiates L16/16000.
1508
+ */
1509
+ type OpenAIRealtimeAudioFormat = 'g711_ulaw' | 'g711_alaw' | 'pcm16';
1510
+ type RealtimeEventCallback = (type: string, data: unknown) => void | Promise<void>;
1511
+ interface OpenAIRealtimeOptions {
1512
+ temperature?: number;
1513
+ maxResponseOutputTokens?: number | 'inf';
1514
+ modalities?: string[];
1515
+ toolChoice?: string | Record<string, unknown>;
1516
+ inputAudioTranscriptionModel?: string;
1517
+ vadType?: 'server_vad' | 'semantic_vad';
1518
+ /**
1519
+ * Trailing silence (ms) the server VAD waits for before treating the user's
1520
+ * turn as complete. Defaults to 300 — OpenAI's documented sweet-spot for
1521
+ * snappier turn-taking, ~200 ms faster than the previous 500 default.
1522
+ * Increase for dictation-style flows where the user pauses mid-sentence.
1523
+ */
1524
+ silenceDurationMs?: number;
1525
+ }
985
1526
  declare class OpenAIRealtimeAdapter {
986
1527
  private readonly apiKey;
987
1528
  private readonly model;
988
1529
  private readonly voice;
989
1530
  private readonly instructions;
990
1531
  private readonly tools?;
1532
+ private readonly audioFormat;
991
1533
  private ws;
1534
+ private readonly eventCallbacks;
1535
+ private messageListenerAttached;
1536
+ private heartbeat;
1537
+ private currentResponseItemId;
1538
+ private currentResponseAudioMs;
1539
+ private readonly options;
992
1540
  constructor(apiKey: string, model?: string, voice?: string, instructions?: string, tools?: Array<{
993
1541
  name: string;
994
1542
  description: string;
995
1543
  parameters: Record<string, unknown>;
996
- }> | undefined);
1544
+ }> | undefined, audioFormat?: OpenAIRealtimeAudioFormat, options?: OpenAIRealtimeOptions);
997
1545
  connect(): Promise<void>;
998
1546
  sendAudio(mulawAudio: Buffer): void;
999
- onEvent(callback: (type: string, data: unknown) => void | Promise<void>): void;
1547
+ /**
1548
+ * Register a listener for parsed realtime events.
1549
+ *
1550
+ * Previously every call attached a new ``ws.on('message')`` handler,
1551
+ * which leaked listeners across retries and multi-consumer hooks. We now
1552
+ * route all traffic through a single persistent handler that fans out to
1553
+ * a Set of callbacks. Use {@link offEvent} to remove one.
1554
+ */
1555
+ onEvent(callback: RealtimeEventCallback): void;
1556
+ offEvent(callback: RealtimeEventCallback): void;
1557
+ private ensureMessageListener;
1000
1558
  cancelResponse(): void;
1001
1559
  sendText(text: string): Promise<void>;
1002
1560
  sendFunctionResult(callId: string, result: string): Promise<void>;
1003
1561
  close(): void;
1004
1562
  }
1005
1563
 
1564
+ interface ElevenLabsConvAIOptions {
1565
+ apiKey: string;
1566
+ agentId?: string;
1567
+ voiceId?: string;
1568
+ modelId?: string;
1569
+ language?: string;
1570
+ firstMessage?: string;
1571
+ outputAudioFormat?: string;
1572
+ inputAudioFormat?: string;
1573
+ useSignedUrl?: boolean;
1574
+ }
1575
+ type EventCallback = (type: string, data: unknown) => void | Promise<void>;
1006
1576
  declare class ElevenLabsConvAIAdapter {
1577
+ private ws;
1578
+ private eventCallback;
1007
1579
  private readonly apiKey;
1008
1580
  private readonly agentId;
1009
1581
  private readonly voiceId;
1582
+ readonly modelId: string;
1583
+ private readonly language;
1010
1584
  private readonly firstMessage;
1011
- private ws;
1012
- private eventCallback;
1013
- constructor(apiKey: string, agentId?: string, voiceId?: string, _modelId?: string, _language?: string, firstMessage?: string);
1014
- connect(): Promise<void>;
1015
- sendAudio(audioBytes: Buffer): void;
1016
- onEvent(callback: (type: string, data: unknown) => void | Promise<void>): void;
1017
- close(): void;
1018
- }
1019
-
1020
- /**
1021
- * In-memory metrics store for the local dashboard.
1022
- *
1023
- * Keeps the last `maxCalls` completed calls and tracks active calls.
1024
- * Supports SSE event subscribers for real-time updates.
1025
- */
1026
-
1027
- interface CallRecord {
1028
- call_id: string;
1029
- caller: string;
1030
- callee: string;
1031
- direction: string;
1032
- started_at: number;
1033
- ended_at?: number;
1034
- /**
1035
- * Current lifecycle state: ``initiated`` (pre-registered), ``ringing``,
1036
- * ``in-progress``, ``completed``, ``no-answer``, ``busy``, ``failed``,
1037
- * ``canceled``, or ``webhook_error``.
1038
- */
1039
- status?: string;
1040
- transcript?: Array<{
1041
- role: string;
1042
- text: string;
1043
- timestamp: number;
1044
- }>;
1045
- turns?: unknown[];
1046
- metrics?: Record<string, unknown> | null;
1047
- [key: string]: unknown;
1048
- }
1049
- interface SSEEvent {
1050
- type: string;
1051
- data: Record<string, unknown>;
1052
- }
1053
- declare class MetricsStore extends EventEmitter {
1054
- private readonly maxCalls;
1055
- private calls;
1056
- private activeCalls;
1057
- /**
1058
- * Accepts either a numeric ``maxCalls`` (legacy positional — matches the
1059
- * original TS API) or an options object ``{ maxCalls }`` to align with the
1060
- * Python SDK's keyword-argument style. Plain literals also work:
1061
- * ``new MetricsStore()`` / ``new MetricsStore(100)`` / ``new MetricsStore({ maxCalls: 100 })``.
1062
- */
1063
- constructor(maxCallsOrOpts?: number | {
1064
- maxCalls?: number;
1065
- });
1066
- private publish;
1067
- recordCallStart(data: Record<string, unknown>): void;
1585
+ readonly outputAudioFormat: string | undefined;
1586
+ readonly inputAudioFormat: string | undefined;
1587
+ private readonly useSignedUrl;
1588
+ conversationId: string | null;
1589
+ agentOutputAudioFormat: string | null;
1590
+ userInputAudioFormat: string | null;
1591
+ private agentSpeaking;
1592
+ private silenceTimer;
1593
+ private closePromise;
1594
+ constructor(apiKey: string, agentId?: string, voiceId?: string, firstMessage?: string);
1595
+ constructor(options: ElevenLabsConvAIOptions);
1068
1596
  /**
1069
- * Pre-register an outbound call before any webhook fires. Lets the
1070
- * dashboard surface attempts that never reach media (no-answer, busy,
1071
- * carrier-rejected). Mirrors the Python ``record_call_initiated``.
1597
+ * Build an adapter pre-configured for Twilio Media Streams.
1598
+ *
1599
+ * Negotiates `ulaw_8000` for both `outputAudioFormat` and
1600
+ * `inputAudioFormat`, matching Twilio's μ-law @ 8 kHz wire format. The
1601
+ * SDK's stream handler detects this and skips the 8 kHz → 16 kHz inbound
1602
+ * resample and the 16 kHz → 8 kHz / PCM → μ-law outbound transcode.
1603
+ * Saves ~30–80 ms first-byte plus per-frame CPU on every turn.
1072
1604
  */
1073
- recordCallInitiated(data: Record<string, unknown>): void;
1605
+ static forTwilio(apiKey: string, agentId: string, options?: Omit<ElevenLabsConvAIOptions, 'apiKey' | 'agentId' | 'outputAudioFormat' | 'inputAudioFormat'>): ElevenLabsConvAIAdapter;
1074
1606
  /**
1075
- * Update the status of an active or completed call. Terminal states
1076
- * (completed, no-answer, busy, failed, canceled, webhook_error) move the
1077
- * row from active to completed so the UI freezes the live duration timer.
1607
+ * Build an adapter pre-configured for Telnyx bidirectional media.
1608
+ *
1609
+ * Telnyx negotiates PCMU @ 8 kHz when `streaming_start` sets
1610
+ * `stream_bidirectional_codec=PCMU` (the SDK default). Picking
1611
+ * `ulaw_8000` on both ConvAI directions removes every transcode on the
1612
+ * audio path — same optimization as `forTwilio`.
1078
1613
  */
1079
- updateCallStatus(callId: string, status: string, extra?: Record<string, unknown>): void;
1080
- recordTurn(data: Record<string, unknown>): void;
1081
- recordCallEnd(data: Record<string, unknown>, metrics?: Record<string, unknown> | null): void;
1082
- getCalls(limit?: number, offset?: number): CallRecord[];
1083
- getCall(callId: string): CallRecord | null;
1084
- getActiveCalls(): CallRecord[];
1085
- getAggregates(): Record<string, unknown>;
1086
- getCallsInRange(fromTs?: number, toTs?: number): CallRecord[];
1087
- get callCount(): number;
1614
+ static forTelnyx(apiKey: string, agentId: string, options?: Omit<ElevenLabsConvAIOptions, 'apiKey' | 'agentId' | 'outputAudioFormat' | 'inputAudioFormat'>): ElevenLabsConvAIAdapter;
1615
+ private fetchSignedUrl;
1616
+ connect(): Promise<void>;
1617
+ private safeInvoke;
1618
+ private respondToPing;
1619
+ private clearSilenceTimer;
1620
+ private finalizeAgentTurn;
1621
+ private scheduleSilenceDone;
1622
+ private handleMessage;
1623
+ sendAudio(audioBytes: Buffer): void;
1624
+ onEvent(callback: EventCallback): void;
1625
+ close(): Promise<void>;
1088
1626
  }
1089
1627
 
1090
1628
  /**
@@ -1153,6 +1691,14 @@ interface LocalConfig {
1153
1691
  * are rejected with HTTP 403.
1154
1692
  */
1155
1693
  telnyxPublicKey?: string;
1694
+ /**
1695
+ * SECURITY: require valid webhook signatures on both Twilio and Telnyx
1696
+ * inbound webhooks. When True (the default), a missing credential
1697
+ * (twilioToken / telnyxPublicKey) causes the webhook to return
1698
+ * 503 Service Unavailable instead of silently accepting the request.
1699
+ * Set to false only for local development against mock providers.
1700
+ */
1701
+ requireSignature?: boolean;
1156
1702
  }
1157
1703
 
1158
1704
  /**
@@ -1210,6 +1756,12 @@ declare function mountApi(app: Express, store: MetricsStore, token?: string): vo
1210
1756
  * When the SDK completes a call, it fires a POST to the standalone dashboard
1211
1757
  * (if running) so calls appear in real time. Data lives only in memory —
1212
1758
  * nothing is written to disk.
1759
+ *
1760
+ * TODO(parity): Python's `notify_dashboard` is now an async fire-and-forget
1761
+ * coroutine (see sdk-py/getpatter/dashboard/persistence.py). This TS version
1762
+ * uses `http.request` which is already non-blocking, but for parity consider
1763
+ * exposing this as `async function notifyDashboard(...): Promise<void>` so
1764
+ * call sites can `await` or `void` it explicitly, matching the Python API.
1213
1765
  */
1214
1766
  declare function notifyDashboard(callData: Record<string, unknown>, port?: number): void;
1215
1767
 
@@ -1275,6 +1827,215 @@ declare class FallbackLLMProvider implements LLMProvider {
1275
1827
  private stopRecovery;
1276
1828
  }
1277
1829
 
1830
+ /**
1831
+ * PatterTool — wrap a live Patter instance as a tool callable from external
1832
+ * agent frameworks (OpenAI Assistants, Anthropic Claude tool-use, LangChain,
1833
+ * Hermes Agent, MCP, generic OpenAI-compatible endpoints).
1834
+ *
1835
+ * Pattern this enables: a customer already runs an agent in their existing
1836
+ * stack (LangChain, OpenAI Assistant, Hermes Agent, …) and wants the agent
1837
+ * to *make phone calls* during a conversation. With this tool, the customer
1838
+ * registers `make_phone_call` and the agent's tool-call loop can dial out
1839
+ * via Patter, get a transcript + cost back, and continue reasoning.
1840
+ *
1841
+ * ## Design
1842
+ *
1843
+ * Each `PatterTool` wraps one `Patter` instance (carrier + agent + serve).
1844
+ * The tool exposes:
1845
+ *
1846
+ * - `openaiSchema()` — OpenAI / chat-completions tool spec
1847
+ * - `anthropicSchema()` — Anthropic Claude tool spec
1848
+ * - `hermesSchema()` — Hermes Agent / Nous registry schema (alias for
1849
+ * anthropicSchema; same JSON-Schema shape)
1850
+ * - `execute(args)` — dial outbound, await call end, return summary
1851
+ * - `hermesHandler()` — `(args, **kw) => Promise<string>` wrapper that
1852
+ * returns a JSON string and `{"error": "..."}` on
1853
+ * failure (matches Hermes' tool contract)
1854
+ *
1855
+ * ## Usage (OpenAI / Anthropic)
1856
+ *
1857
+ * ```ts
1858
+ * import { Patter, Twilio, DeepgramSTT, GroqLLM, ElevenLabsTTS } from 'getpatter';
1859
+ * import { PatterTool } from 'getpatter/integrations';
1860
+ *
1861
+ * const phone = new Patter({
1862
+ * carrier: new Twilio(),
1863
+ * phoneNumber: process.env.TWILIO_PHONE_NUMBER!,
1864
+ * webhookUrl: 'agent.example.com',
1865
+ * });
1866
+ *
1867
+ * const tool = new PatterTool({
1868
+ * phone,
1869
+ * agent: { stt: new DeepgramSTT(), llm: new GroqLLM(), tts: new ElevenLabsTTS() },
1870
+ * });
1871
+ *
1872
+ * await tool.start(); // boots phone.serve() once
1873
+ *
1874
+ * // Register with your LLM
1875
+ * const tools = [tool.openaiSchema()];
1876
+ *
1877
+ * // When the LLM emits a tool_call:
1878
+ * const result = await tool.execute({
1879
+ * to: '+15551234567',
1880
+ * goal: 'Book a dentist appointment for next Tuesday afternoon.',
1881
+ * });
1882
+ * // → { call_id, status, duration_seconds, cost_usd, transcript, … }
1883
+ * ```
1884
+ *
1885
+ * ## Usage (Hermes Agent)
1886
+ *
1887
+ * Hermes' contract: handler takes `args: dict` + kwargs, returns a JSON
1888
+ * string. The TS SDK is meant to be invoked from Python via your own bridge
1889
+ * (HTTP, MCP, subprocess); this `hermesSchema()` + `hermesHandler()` pair
1890
+ * matches the Python adapter shipped under `getpatter.integrations` so the
1891
+ * two SDKs stay in lockstep.
1892
+ *
1893
+ * For pure-Python Hermes setups, use `PatterTool` from `getpatter.integrations`
1894
+ * directly inside a `tools/patter.py` module:
1895
+ *
1896
+ * ```python
1897
+ * from tools.registry import registry
1898
+ * from getpatter.integrations import PatterTool
1899
+ *
1900
+ * tool = PatterTool(phone=...)
1901
+ * tool.register_hermes(registry)
1902
+ * ```
1903
+ */
1904
+
1905
+ /** JSON-Schema of the call args. Identical wire shape across openai/anthropic/hermes. */
1906
+ declare const PARAMETERS_SCHEMA: {
1907
+ readonly type: "object";
1908
+ readonly properties: {
1909
+ readonly to: {
1910
+ readonly type: "string";
1911
+ readonly description: "Destination phone number in E.164 format (e.g. \"+15551234567\"). Required.";
1912
+ };
1913
+ readonly goal: {
1914
+ readonly type: "string";
1915
+ readonly description: "What the agent should accomplish on the call. Becomes the in-call agent's system prompt for this single call.";
1916
+ };
1917
+ readonly first_message: {
1918
+ readonly type: "string";
1919
+ readonly description: "Optional first message the agent speaks when the callee answers. Defaults to a generic greeting.";
1920
+ };
1921
+ readonly max_duration_sec: {
1922
+ readonly type: "integer";
1923
+ readonly description: "Hard timeout for the call in seconds. Default 180. The call is force-ended at this deadline whether or not it has resolved.";
1924
+ readonly minimum: 5;
1925
+ readonly maximum: 1800;
1926
+ };
1927
+ };
1928
+ readonly required: readonly ["to"];
1929
+ };
1930
+ interface PatterToolOptions {
1931
+ /**
1932
+ * Patter instance to dial through. Must be in local mode (have a `carrier`).
1933
+ * The tool boots `phone.serve()` on `start()`; do not call `serve()` yourself.
1934
+ */
1935
+ phone: Patter;
1936
+ /**
1937
+ * Default agent config used for outbound calls. Per-call overrides come from
1938
+ * `execute({ goal, first_message })`.
1939
+ */
1940
+ agent?: AgentOptions;
1941
+ /** Tool name shown to the LLM. Default `'make_phone_call'`. */
1942
+ name?: string;
1943
+ /** Tool description for the LLM. Default tuned for English assistants. */
1944
+ description?: string;
1945
+ /** Default per-call timeout in seconds. Default 180. */
1946
+ maxDurationSec?: number;
1947
+ /**
1948
+ * Optional pass-through for `phone.serve()`'s `recording` flag — record all
1949
+ * outbound calls placed via this tool.
1950
+ */
1951
+ recording?: boolean;
1952
+ }
1953
+ interface PatterToolExecuteArgs {
1954
+ to: string;
1955
+ goal?: string;
1956
+ first_message?: string;
1957
+ max_duration_sec?: number;
1958
+ }
1959
+ interface PatterToolResult {
1960
+ call_id: string;
1961
+ status: string;
1962
+ duration_seconds: number;
1963
+ cost_usd?: number;
1964
+ transcript: Array<{
1965
+ role: string;
1966
+ text: string;
1967
+ timestamp?: number;
1968
+ }>;
1969
+ metrics?: Record<string, unknown> | null;
1970
+ }
1971
+ declare class PatterTool {
1972
+ readonly name: string;
1973
+ readonly description: string;
1974
+ private readonly phone;
1975
+ private readonly agent;
1976
+ private readonly maxDurationSec;
1977
+ private readonly recording;
1978
+ private started;
1979
+ /** Resolver for the next `call_initiated` SSE event. Only set inside the
1980
+ * dial mutex (`dialQueue`), so two parallel `execute()` calls never share
1981
+ * it and never lose a dispatch. */
1982
+ private pendingDial;
1983
+ /** Mutex that serializes the dial → call_id capture critical section.
1984
+ * Each `execute()` chains a continuation onto this promise so the
1985
+ * `pendingDial` slot is owned by exactly one caller at a time. */
1986
+ private dialQueue;
1987
+ /** Captured SSE listener so `stop()` can detach it (prevents leaks when
1988
+ * the underlying Patter instance outlives this tool). */
1989
+ private sseListener;
1990
+ /** Captured Patter metrics store, for cleanup in `stop()`. */
1991
+ private metricsStoreRef;
1992
+ /** call_id → pending promise machinery. */
1993
+ private readonly pending;
1994
+ private readonly bus;
1995
+ /** How long to wait for the `call_initiated` SSE before failing the dial. */
1996
+ private static readonly DIAL_CAPTURE_TIMEOUT_MS;
1997
+ constructor(opts: PatterToolOptions);
1998
+ /** OpenAI Chat Completions / Assistants tool spec. */
1999
+ openaiSchema(): {
2000
+ type: 'function';
2001
+ function: {
2002
+ name: string;
2003
+ description: string;
2004
+ parameters: typeof PARAMETERS_SCHEMA;
2005
+ };
2006
+ };
2007
+ /** Anthropic Messages API tool spec. */
2008
+ anthropicSchema(): {
2009
+ name: string;
2010
+ description: string;
2011
+ input_schema: typeof PARAMETERS_SCHEMA;
2012
+ };
2013
+ /**
2014
+ * Hermes Agent (Nous Research) registry schema. Same JSON-Schema shape as
2015
+ * Anthropic's; Hermes consumes it via `registry.register({ schema: ... })`.
2016
+ */
2017
+ hermesSchema(): {
2018
+ name: string;
2019
+ description: string;
2020
+ parameters: typeof PARAMETERS_SCHEMA;
2021
+ };
2022
+ /** Start the underlying Patter server. Idempotent. */
2023
+ start(): Promise<void>;
2024
+ /** Stop the underlying Patter server (and reject any pending calls). */
2025
+ stop(): Promise<void>;
2026
+ execute(args: PatterToolExecuteArgs): Promise<PatterToolResult>;
2027
+ /** Issue the outbound dial under the mutex and return its assigned call_id. */
2028
+ private acquireCallId;
2029
+ /**
2030
+ * Hermes-style handler: `(args, kwargs) => Promise<string>` returning a JSON
2031
+ * string with either the result envelope or an `{"error": "..."}` payload.
2032
+ * Mirrors the Python `PatterTool.hermes_handler` so cross-SDK adapters share
2033
+ * the same wire contract.
2034
+ */
2035
+ hermesHandler(): (args: PatterToolExecuteArgs) => Promise<string>;
2036
+ private onCallEndHandler;
2037
+ }
2038
+
1278
2039
  /**
1279
2040
  * Interactive terminal test mode for voice agents.
1280
2041
  *
@@ -1303,6 +2064,12 @@ declare class TestSession {
1303
2064
  * not use Gemini Live do not pay the load cost. Install with:
1304
2065
  *
1305
2066
  * npm install @google/genai
2067
+ *
2068
+ * NOTE: Native-audio Gemini Live models are **v1alpha-only**. We pass
2069
+ * `httpOptions: { apiVersion: 'v1alpha' }` when constructing the client.
2070
+ * When Google promotes native audio to GA, switch to `v1beta` / `v1` and
2071
+ * update the default model below.
2072
+ * See: https://ai.google.dev/gemini-api/docs/live
1306
2073
  */
1307
2074
  declare const GEMINI_DEFAULT_INPUT_SR = 16000;
1308
2075
  declare const GEMINI_DEFAULT_OUTPUT_SR = 24000;
@@ -1337,6 +2104,12 @@ declare class GeminiLiveAdapter {
1337
2104
  private receiveLoop;
1338
2105
  private handlers;
1339
2106
  private running;
2107
+ /**
2108
+ * Tracks call_id -> function name so tool responses can be sent back with
2109
+ * the correct `name` field (Gemini expects the original function name,
2110
+ * not the call_id).
2111
+ */
2112
+ private pendingToolCalls;
1340
2113
  constructor(apiKey: string, options?: GeminiLiveOptions);
1341
2114
  connect(): Promise<void>;
1342
2115
  sendAudio(pcm: Buffer): void;
@@ -1514,22 +2287,19 @@ declare class SonioxSTT {
1514
2287
  /**
1515
2288
  * AssemblyAI Universal Streaming STT adapter for the Patter SDK pipeline mode.
1516
2289
  *
1517
- * Implements a `DeepgramSTT`-shaped provider using AssemblyAI's v3 streaming
1518
- * WebSocket API. Pure `ws` transport — does NOT depend on the vendor SDK.
1519
- *
1520
- * Algorithm adapted from LiveKit Agents (Apache 2.0):
1521
- * https://github.com/livekit/agents
1522
- * Source: livekit-plugins/livekit-plugins-assemblyai/livekit/plugins/assemblyai/stt.py
1523
- * Upstream ref SHA: 78a66bcf79c5cea82989401c408f1dff4b961a5b
2290
+ * Pure `ws` transport does NOT depend on the vendor SDK.
1524
2291
  */
1525
2292
  interface Transcript$3 {
1526
2293
  readonly text: string;
1527
2294
  readonly isFinal: boolean;
1528
2295
  readonly confidence: number;
2296
+ /** Optional event hint, e.g. `"SpeechStarted"` for barge-in signals. */
2297
+ readonly eventType?: string;
1529
2298
  }
1530
2299
  type TranscriptCallback$3 = (transcript: Transcript$3) => void;
1531
2300
  type AssemblyAIEncoding = 'pcm_s16le' | 'pcm_mulaw';
1532
- type AssemblyAIModel = 'universal-streaming-english' | 'universal-streaming-multilingual' | 'u3-rt-pro';
2301
+ type AssemblyAIModel = 'universal-streaming-english' | 'universal-streaming-multilingual' | 'u3-rt-pro' | 'whisper-rt';
2302
+ type AssemblyAIDomain = 'general' | 'medical-v1';
1533
2303
  interface AssemblyAISTTOptions$1 {
1534
2304
  /** One of the AssemblyAI speech models. */
1535
2305
  readonly model?: AssemblyAIModel;
@@ -1539,6 +2309,11 @@ interface AssemblyAISTTOptions$1 {
1539
2309
  readonly sampleRate?: number;
1540
2310
  /** Override the streaming base URL (e.g. EU: `wss://streaming.eu.assemblyai.com`). */
1541
2311
  readonly baseUrl?: string;
2312
+ /**
2313
+ * Authenticate via `?token=<apiKey>` in the URL instead of the
2314
+ * `Authorization` header. Default `false`.
2315
+ */
2316
+ readonly useQueryToken?: boolean;
1542
2317
  /** Enable automatic language detection (defaults: true for multilingual/u3-rt-pro). */
1543
2318
  readonly languageDetection?: boolean;
1544
2319
  /** 0..1 confidence required before end-of-turn is finalized. */
@@ -1553,34 +2328,54 @@ interface AssemblyAISTTOptions$1 {
1553
2328
  readonly keytermsPrompt?: readonly string[];
1554
2329
  /** Text prompt (u3-rt-pro only). */
1555
2330
  readonly prompt?: string;
1556
- /** VAD threshold (0..1). */
2331
+ /** Accepted for backward compatibility but NOT sent — not a valid v3 param. */
1557
2332
  readonly vadThreshold?: number;
1558
2333
  /** Enable diarization / speaker labels. */
1559
2334
  readonly speakerLabels?: boolean;
1560
2335
  /** Max speakers for diarization. */
1561
2336
  readonly maxSpeakers?: number;
1562
- /** Domain hint (e.g. "medical"). */
1563
- readonly domain?: string;
2337
+ /** Domain hint must be `"general"` or `"medical-v1"`. */
2338
+ readonly domain?: AssemblyAIDomain;
1564
2339
  }
1565
2340
  declare class AssemblyAISTT {
1566
2341
  private readonly apiKey;
1567
2342
  private readonly options;
1568
2343
  private ws;
1569
- private callbacks;
2344
+ private readonly callbacks;
2345
+ private closing;
2346
+ private reconnectAttempts;
2347
+ private terminationResolve;
1570
2348
  /** AssemblyAI session id — set when the `Begin` message arrives. */
1571
- sessionId: string;
2349
+ sessionId: string | null;
1572
2350
  /** Unix timestamp when the AssemblyAI session expires. */
1573
- expiresAt: number;
2351
+ expiresAt: number | null;
1574
2352
  constructor(apiKey: string, options?: AssemblyAISTTOptions$1);
1575
2353
  /** Factory for Twilio calls — mulaw 8 kHz. */
1576
2354
  static forTwilio(apiKey: string, model?: AssemblyAIModel): AssemblyAISTT;
1577
2355
  private buildUrl;
2356
+ private buildHeaders;
1578
2357
  connect(): Promise<void>;
2358
+ private awaitOpen;
2359
+ private attachHandlers;
2360
+ private reconnect;
1579
2361
  private handleEvent;
1580
2362
  private emit;
1581
2363
  sendAudio(audio: Buffer): void;
1582
- onTranscript(callback: TranscriptCallback$3): void;
1583
- close(): void;
2364
+ private estimateChunkDurationMs;
2365
+ /**
2366
+ * Send an `UpdateConfiguration` frame to change settings mid-stream.
2367
+ * Only defined fields are included.
2368
+ */
2369
+ updateConfiguration(params: {
2370
+ keytermsPrompt?: readonly string[];
2371
+ prompt?: string;
2372
+ minTurnSilence?: number;
2373
+ maxTurnSilence?: number;
2374
+ }): void;
2375
+ /** Force the server to finalize the current turn (for barge-in). */
2376
+ forceEndpoint(): void;
2377
+ onTranscript(callback: TranscriptCallback$3): () => void;
2378
+ close(): Promise<void>;
1584
2379
  }
1585
2380
 
1586
2381
  /**
@@ -1620,8 +2415,11 @@ declare class CartesiaSTT {
1620
2415
  private ws;
1621
2416
  private callbacks;
1622
2417
  private keepaliveTimer;
1623
- /** Cartesia request id — set from the server transcript events. */
1624
- requestId: string;
2418
+ /**
2419
+ * Cartesia request id — set from the server transcript events.
2420
+ * `null` until the first transcript event arrives (matches Python's `None`).
2421
+ */
2422
+ requestId: string | null;
1625
2423
  constructor(apiKey: string, options?: CartesiaSTTOptions$1);
1626
2424
  private buildWsUrl;
1627
2425
  connect(): Promise<void>;
@@ -1629,7 +2427,23 @@ declare class CartesiaSTT {
1629
2427
  private emit;
1630
2428
  sendAudio(audio: Buffer): void;
1631
2429
  onTranscript(callback: TranscriptCallback$2): void;
2430
+ /** Remove a previously registered transcript callback. */
2431
+ offTranscript(callback: TranscriptCallback$2): void;
2432
+ /**
2433
+ * Synchronous best-effort close. Sends `finalize` and closes the socket
2434
+ * without waiting for the server to flush any remaining transcripts.
2435
+ *
2436
+ * Limitation: any transcript events produced between the `finalize` send
2437
+ * and the socket close may be dropped. Callers that need to guarantee all
2438
+ * transcripts are delivered should await :meth:`closeAsync` instead.
2439
+ */
1632
2440
  close(): void;
2441
+ /**
2442
+ * Graceful close that awaits the `finalize` send and the socket closing
2443
+ * handshake, matching the Python adapter's behavior. Use this when you
2444
+ * need any in-flight transcripts to be flushed before teardown.
2445
+ */
2446
+ closeAsync(): Promise<void>;
1633
2447
  }
1634
2448
 
1635
2449
  type LMNTAudioFormat = 'aac' | 'mp3' | 'mulaw' | 'raw' | 'wav';
@@ -1662,12 +2476,32 @@ declare class LMNTTTS {
1662
2476
  synthesizeStream(text: string): AsyncGenerator<Buffer>;
1663
2477
  }
1664
2478
 
2479
+ type TranscriptEventType = 'Results' | 'UtteranceEnd' | 'SpeechStarted';
2480
+ interface DeepgramWord {
2481
+ readonly word?: string;
2482
+ readonly start?: number;
2483
+ readonly end?: number;
2484
+ readonly confidence?: number;
2485
+ readonly punctuated_word?: string;
2486
+ readonly speaker?: number;
2487
+ }
1665
2488
  interface Transcript$1 {
1666
2489
  readonly text: string;
1667
2490
  readonly isFinal: boolean;
1668
2491
  readonly confidence: number;
2492
+ /** Deepgram VAD hint — faster end-of-utterance than ``isFinal``. */
2493
+ readonly speechFinal?: boolean;
2494
+ /** True when this Results frame was produced in response to a Finalize. */
2495
+ readonly fromFinalize?: boolean;
2496
+ /** Deepgram request id, populated from the initial Metadata frame. */
2497
+ readonly requestId?: string;
2498
+ /** Per-word timings/metadata when Deepgram emits them. */
2499
+ readonly words?: ReadonlyArray<DeepgramWord>;
2500
+ /** Which provider event this Transcript represents. Default ``Results``. */
2501
+ readonly eventType?: TranscriptEventType;
1669
2502
  }
1670
2503
  type TranscriptCallback$1 = (transcript: Transcript$1) => void;
2504
+ type ErrorCallback = (error: Error) => void;
1671
2505
  /**
1672
2506
  * Optional tuning knobs for Deepgram live transcription.
1673
2507
  *
@@ -1692,7 +2526,13 @@ interface DeepgramSTTOptions$1 {
1692
2526
  * hard minimum of 1000 ms. Set to ``null`` to disable. Default ``1000``.
1693
2527
  */
1694
2528
  readonly utteranceEndMs?: number | null;
1695
- /** Enable smart formatting (punctuation + numerals). Default ``true``. */
2529
+ /**
2530
+ * Enable smart formatting (punctuation + numerals). Default ``false`` —
2531
+ * smart formatting adds roughly 50–150 ms to TTFT on each final transcript
2532
+ * and is rarely useful for telephony pipelines that pass the text straight
2533
+ * to an LLM. Set to ``true`` for use cases (dashboards, raw transcripts)
2534
+ * where the formatted text is surfaced directly to humans.
2535
+ */
1696
2536
  readonly smartFormat?: boolean;
1697
2537
  /** Emit interim (non-final) transcripts. Default ``true``. */
1698
2538
  readonly interimResults?: boolean;
@@ -1701,7 +2541,11 @@ interface DeepgramSTTOptions$1 {
1701
2541
  }
1702
2542
  declare class DeepgramSTT {
1703
2543
  private ws;
1704
- private callbacks;
2544
+ private readonly transcriptCallbacks;
2545
+ private readonly errorCallbacks;
2546
+ private keepaliveTimer;
2547
+ private running;
2548
+ private reconnectAttempted;
1705
2549
  /** Request ID from Deepgram — used to query actual cost post-call. */
1706
2550
  requestId: string;
1707
2551
  private readonly apiKey;
@@ -1727,27 +2571,30 @@ declare class DeepgramSTT {
1727
2571
  });
1728
2572
  /** Factory for Twilio calls — mulaw 8 kHz. Forwards tuning options through. */
1729
2573
  static forTwilio(apiKey: string, language?: string, model?: string, options?: DeepgramSTTOptions$1): DeepgramSTT;
2574
+ private buildUrl;
1730
2575
  connect(): Promise<void>;
2576
+ private openSocket;
2577
+ private clearKeepalive;
2578
+ private handleMessage;
2579
+ private emitTranscript;
2580
+ private emitError;
2581
+ private handleError;
2582
+ private handleClose;
1731
2583
  sendAudio(audio: Buffer): void;
1732
2584
  onTranscript(callback: TranscriptCallback$1): void;
2585
+ offTranscript(callback: TranscriptCallback$1): void;
2586
+ onError(callback: ErrorCallback): void;
2587
+ offError(callback: ErrorCallback): void;
1733
2588
  close(): void;
1734
2589
  }
1735
2590
 
1736
2591
  /** Deepgram streaming STT for Patter pipeline mode. */
1737
2592
 
1738
- interface DeepgramSTTOptions {
2593
+ type DeepgramSTTOptions = DeepgramSTTOptions$1 & {
1739
2594
  /** API key. Falls back to DEEPGRAM_API_KEY env var when omitted. */
1740
2595
  apiKey?: string;
1741
2596
  language?: string;
1742
- model?: string;
1743
- encoding?: string;
1744
- sampleRate?: number;
1745
- endpointingMs?: number;
1746
- utteranceEndMs?: number | null;
1747
- smartFormat?: boolean;
1748
- interimResults?: boolean;
1749
- vadEvents?: boolean;
1750
- }
2597
+ };
1751
2598
  /**
1752
2599
  * Deepgram streaming STT.
1753
2600
  *
@@ -1758,7 +2605,8 @@ interface DeepgramSTTOptions {
1758
2605
  * const stt = new deepgram.STT({ apiKey: "dg_...", endpointingMs: 80 });
1759
2606
  * ```
1760
2607
  */
1761
- declare class STT$4 extends DeepgramSTT {
2608
+ declare class STT$5 extends DeepgramSTT {
2609
+ static readonly providerKey = "deepgram";
1762
2610
  constructor(opts?: DeepgramSTTOptions);
1763
2611
  }
1764
2612
 
@@ -1774,22 +2622,44 @@ interface Transcript {
1774
2622
  readonly confidence: number;
1775
2623
  }
1776
2624
  type TranscriptCallback = (transcript: Transcript) => void;
2625
+ type WhisperResponseFormat = 'json' | 'verbose_json';
1777
2626
  declare class WhisperSTT {
1778
2627
  private readonly apiKey;
1779
2628
  private readonly model;
1780
2629
  private readonly language;
1781
2630
  private readonly bufferSize;
1782
- private buffer;
2631
+ private readonly responseFormat;
2632
+ private chunks;
2633
+ private bufferedBytes;
1783
2634
  private callbacks;
1784
2635
  private running;
1785
2636
  private pendingTranscriptions;
1786
- constructor(apiKey: string, model?: string, language?: string, bufferSize?: number);
2637
+ /**
2638
+ * @param apiKey OpenAI API key.
2639
+ * @param language ISO-639-1 language code (e.g. ``"en"``, ``"it"``). Optional.
2640
+ * @param model One of ``whisper-1``, ``gpt-4o-transcribe``, ``gpt-4o-mini-transcribe``.
2641
+ * @param bufferSize Bytes of PCM16 to buffer before each transcription request.
2642
+ * @param responseFormat ``"json"`` (default) or ``"verbose_json"``.
2643
+ *
2644
+ * Argument order matches the Python SDK's ``WhisperSTT(api_key, language, model, response_format)``
2645
+ * for cross-language parity. Pre-0.5.3 the TS positional order was
2646
+ * ``(apiKey, model, language, bufferSize, responseFormat)`` — callers using
2647
+ * the old order will need to swap ``language`` and ``model``.
2648
+ */
2649
+ constructor(apiKey: string, language?: string, model?: string, bufferSize?: number, responseFormat?: WhisperResponseFormat);
1787
2650
  /** Factory for Twilio calls — mulaw 8 kHz is transcoded upstream, so we still receive PCM 16-bit. */
1788
2651
  static forTwilio(apiKey: string, language?: string, model?: string): WhisperSTT;
1789
2652
  connect(): Promise<void>;
1790
2653
  sendAudio(audio: Buffer): void;
2654
+ private flushChunks;
1791
2655
  private trackTranscription;
2656
+ /**
2657
+ * Register a transcript listener. Unlike the previous implementation
2658
+ * which capped at 10 and silently replaced the last one, we now keep all
2659
+ * registered callbacks in a Set; use {@link offTranscript} to remove one.
2660
+ */
1792
2661
  onTranscript(callback: TranscriptCallback): void;
2662
+ offTranscript(callback: TranscriptCallback): void;
1793
2663
  close(): Promise<void>;
1794
2664
  private transcribeBuffer;
1795
2665
  }
@@ -1802,6 +2672,8 @@ interface WhisperSTTOptions {
1802
2672
  model?: string;
1803
2673
  language?: string;
1804
2674
  bufferSize?: number;
2675
+ /** ``"verbose_json"`` exposes segment-level confidence / timestamps. */
2676
+ responseFormat?: WhisperResponseFormat;
1805
2677
  }
1806
2678
  /**
1807
2679
  * OpenAI Whisper STT.
@@ -1813,10 +2685,68 @@ interface WhisperSTTOptions {
1813
2685
  * const stt = new whisper.STT({ apiKey: "sk-...", language: "en" });
1814
2686
  * ```
1815
2687
  */
1816
- declare class STT$3 extends WhisperSTT {
2688
+ declare class STT$4 extends WhisperSTT {
2689
+ static readonly providerKey = "whisper";
1817
2690
  constructor(opts?: WhisperSTTOptions);
1818
2691
  }
1819
2692
 
2693
+ /**
2694
+ * OpenAI GPT-4o Transcribe STT adapter for the Patter SDK pipeline mode.
2695
+ *
2696
+ * First-class wrapper around OpenAI's ``gpt-4o-transcribe`` /
2697
+ * ``gpt-4o-mini-transcribe`` models. They share the
2698
+ * ``POST /v1/audio/transcriptions`` endpoint with Whisper-1 but offer ~10x
2699
+ * lower latency and stronger multilingual quality, making them a drop-in
2700
+ * replacement for ``WhisperSTT`` whenever speed matters.
2701
+ *
2702
+ * Use this class instead of ``WhisperSTT`` when you specifically want the
2703
+ * GPT-4o Transcribe family — it restricts the accepted models so
2704
+ * misconfigured calls fail fast instead of silently dropping back to
2705
+ * ``whisper-1``.
2706
+ */
2707
+
2708
+ declare class OpenAITranscribeSTT extends WhisperSTT {
2709
+ /**
2710
+ * @param apiKey OpenAI API key.
2711
+ * @param language ISO-639-1 language code (e.g. ``"en"``, ``"it"``). Optional.
2712
+ * @param model One of ``gpt-4o-transcribe`` (default), ``gpt-4o-mini-transcribe``.
2713
+ * ``"whisper-1"`` is intentionally rejected here — use ``WhisperSTT`` for that.
2714
+ * @param bufferSize Bytes of PCM16 to buffer before each transcription request.
2715
+ * @param responseFormat ``"json"`` (default) or ``"verbose_json"``.
2716
+ */
2717
+ constructor(apiKey: string, language?: string, model?: string, bufferSize?: number, responseFormat?: WhisperResponseFormat);
2718
+ }
2719
+
2720
+ /** OpenAI GPT-4o Transcribe STT for Patter pipeline mode. */
2721
+
2722
+ interface OpenAITranscribeSTTOptions {
2723
+ /** API key. Falls back to OPENAI_API_KEY env var when omitted. */
2724
+ apiKey?: string;
2725
+ /** ``gpt-4o-transcribe`` (default) or ``gpt-4o-mini-transcribe``. */
2726
+ model?: string;
2727
+ language?: string;
2728
+ bufferSize?: number;
2729
+ /** ``"verbose_json"`` exposes segment-level confidence / timestamps. */
2730
+ responseFormat?: WhisperResponseFormat;
2731
+ }
2732
+ /**
2733
+ * OpenAI GPT-4o Transcribe STT — ~10x faster than Whisper-1.
2734
+ *
2735
+ * Drop-in replacement for ``whisper.STT`` with stronger multilingual
2736
+ * quality and significantly lower latency.
2737
+ *
2738
+ * @example
2739
+ * ```ts
2740
+ * import * as openaiTranscribe from "getpatter/stt/openai-transcribe";
2741
+ * const stt = new openaiTranscribe.STT(); // reads OPENAI_API_KEY
2742
+ * const stt = new openaiTranscribe.STT({ apiKey: "sk-...", language: "en" });
2743
+ * ```
2744
+ */
2745
+ declare class STT$3 extends OpenAITranscribeSTT {
2746
+ static readonly providerKey = "openai_transcribe";
2747
+ constructor(opts?: OpenAITranscribeSTTOptions);
2748
+ }
2749
+
1820
2750
  /** Cartesia streaming STT for Patter pipeline mode. */
1821
2751
 
1822
2752
  interface CartesiaSTTOptions {
@@ -1839,6 +2769,7 @@ interface CartesiaSTTOptions {
1839
2769
  * ```
1840
2770
  */
1841
2771
  declare class STT$2 extends CartesiaSTT {
2772
+ static readonly providerKey = "cartesia_stt";
1842
2773
  constructor(opts?: CartesiaSTTOptions);
1843
2774
  }
1844
2775
 
@@ -1869,6 +2800,7 @@ interface SonioxSTTOptions {
1869
2800
  * ```
1870
2801
  */
1871
2802
  declare class STT$1 extends SonioxSTT {
2803
+ static readonly providerKey = "soniox";
1872
2804
  constructor(opts?: SonioxSTTOptions);
1873
2805
  }
1874
2806
 
@@ -1891,7 +2823,7 @@ interface AssemblyAISTTOptions {
1891
2823
  vadThreshold?: number;
1892
2824
  speakerLabels?: boolean;
1893
2825
  maxSpeakers?: number;
1894
- domain?: string;
2826
+ domain?: AssemblyAIDomain;
1895
2827
  }
1896
2828
  /**
1897
2829
  * AssemblyAI Universal Streaming STT.
@@ -1904,15 +2836,103 @@ interface AssemblyAISTTOptions {
1904
2836
  * ```
1905
2837
  */
1906
2838
  declare class STT extends AssemblyAISTT {
2839
+ static readonly providerKey = "assemblyai";
1907
2840
  constructor(opts?: AssemblyAISTTOptions);
1908
2841
  }
1909
2842
 
2843
+ /**
2844
+ * Known stable ElevenLabs voice models (from the official ElevenLabs API
2845
+ * reference). Provided as a string-literal union for autocomplete + type
2846
+ * narrowing; the public ``modelId`` option also accepts ``string`` so
2847
+ * users can pass forward-compat IDs we haven't enumerated yet.
2848
+ *
2849
+ * - ``eleven_v3`` — newest, highest quality (slower TTFT than Flash).
2850
+ * - ``eleven_flash_v2_5`` — current default, fastest (~75 ms TTFT).
2851
+ * - ``eleven_turbo_v2_5`` — balanced quality/speed.
2852
+ * - ``eleven_multilingual_v2`` — best multilingual support.
2853
+ * - ``eleven_monolingual_v1`` — legacy English-only.
2854
+ */
2855
+ type ElevenLabsModel = 'eleven_v3' | 'eleven_flash_v2_5' | 'eleven_turbo_v2_5' | 'eleven_multilingual_v2' | 'eleven_monolingual_v1';
2856
+ type ElevenLabsOutputFormat = 'mp3_22050_32' | 'mp3_44100_32' | 'mp3_44100_64' | 'mp3_44100_96' | 'mp3_44100_128' | 'mp3_44100_192' | 'pcm_8000' | 'pcm_16000' | 'pcm_22050' | 'pcm_24000' | 'pcm_44100' | 'ulaw_8000';
2857
+ interface ElevenLabsVoiceSettings {
2858
+ stability?: number;
2859
+ similarity_boost?: number;
2860
+ style?: number;
2861
+ use_speaker_boost?: boolean;
2862
+ }
2863
+ interface ElevenLabsTTSOptions$1 {
2864
+ voiceId?: string;
2865
+ /**
2866
+ * ElevenLabs voice model ID. The default ``eleven_flash_v2_5`` has the
2867
+ * lowest TTFT (~75 ms). Pass ``eleven_v3`` for highest quality, or any
2868
+ * arbitrary string for forward-compat with future models.
2869
+ */
2870
+ modelId?: ElevenLabsModel | string;
2871
+ outputFormat?: ElevenLabsOutputFormat;
2872
+ voiceSettings?: ElevenLabsVoiceSettings;
2873
+ languageCode?: string;
2874
+ chunkSize?: number;
2875
+ }
2876
+ /**
2877
+ * ElevenLabs streaming TTS adapter.
2878
+ *
2879
+ * Supported `modelId` values are autocompleted via {@link ElevenLabsModel}.
2880
+ * Default is `eleven_flash_v2_5` (lowest TTFT, ~75 ms).
2881
+ *
2882
+ * **Telephony optimization** — the constructor default
2883
+ * `outputFormat='pcm_16000'` is correct for web playback, dashboard
2884
+ * previews, and 16 kHz pipelines. For real phone calls, use the
2885
+ * carrier-specific factories instead:
2886
+ *
2887
+ * - {@link ElevenLabsTTS.forTwilio} emits `ulaw_8000` natively. Twilio's
2888
+ * media-stream WebSocket expects μ-law @ 8 kHz, so the SDK normally
2889
+ * resamples 16 kHz → 8 kHz and PCM → μ-law before sending. Asking
2890
+ * ElevenLabs to produce μ-law directly skips that step (saves
2891
+ * ~30–80 ms first-byte plus per-frame CPU and avoids any resampling
2892
+ * aliasing).
2893
+ * - {@link ElevenLabsTTS.forTelnyx} emits `pcm_16000`. Telnyx negotiates
2894
+ * L16/16000 on its bidirectional media WebSocket, so 16 kHz PCM is
2895
+ * already the format used end-to-end and no transcoding happens.
2896
+ * ElevenLabs *also* supports `ulaw_8000` if your Telnyx profile is
2897
+ * pinned to PCMU/8000 — pass `outputFormat: 'ulaw_8000'` explicitly
2898
+ * in that case.
2899
+ */
1910
2900
  declare class ElevenLabsTTS {
1911
2901
  private readonly apiKey;
2902
+ private readonly voiceId;
1912
2903
  private readonly modelId;
1913
2904
  private readonly outputFormat;
1914
- private readonly voiceId;
1915
- constructor(apiKey: string, voiceId?: string, modelId?: string, outputFormat?: string);
2905
+ private readonly voiceSettings;
2906
+ private readonly languageCode;
2907
+ private readonly chunkSize;
2908
+ constructor(apiKey: string, voiceId?: string, modelId?: string, outputFormat?: ElevenLabsOutputFormat | string);
2909
+ constructor(apiKey: string, options: ElevenLabsTTSOptions$1);
2910
+ /**
2911
+ * Construct an instance pre-configured for Twilio Media Streams.
2912
+ *
2913
+ * Sets `outputFormat='ulaw_8000'` so ElevenLabs emits μ-law @ 8 kHz
2914
+ * directly — the exact wire format Twilio's media stream uses — letting
2915
+ * the SDK skip the 16 kHz→8 kHz resample and PCM→μ-law conversion in
2916
+ * `TwilioAudioSender`. Saves ~30–80 ms first-byte and per-frame CPU,
2917
+ * and removes a potential aliasing source.
2918
+ *
2919
+ * `voiceSettings` defaults to a low-bandwidth-friendly profile
2920
+ * (speaker boost off, modest stability) which sounds cleaner at 8 kHz
2921
+ * μ-law than the studio default. Pass an explicit object to override.
2922
+ */
2923
+ static forTwilio(apiKey: string, options?: Omit<ElevenLabsTTSOptions$1, 'outputFormat'>): ElevenLabsTTS;
2924
+ /**
2925
+ * Construct an instance pre-configured for Telnyx bidirectional media.
2926
+ *
2927
+ * Telnyx's default media-streaming codec is L16 PCM @ 16 kHz, which
2928
+ * matches our default Telnyx handler. We pick `pcm_16000` so the audio
2929
+ * flows end-to-end with zero resampling or transcoding.
2930
+ *
2931
+ * Trade-off: if your Telnyx profile is pinned to PCMU/8000 (μ-law),
2932
+ * construct `ElevenLabsTTS` directly with `outputFormat: 'ulaw_8000'`
2933
+ * — Telnyx supports that natively too.
2934
+ */
2935
+ static forTelnyx(apiKey: string, options?: Omit<ElevenLabsTTSOptions$1, 'outputFormat'>): ElevenLabsTTS;
1916
2936
  /**
1917
2937
  * Synthesise text to speech and return the full audio as a single Buffer.
1918
2938
  *
@@ -1923,7 +2943,8 @@ declare class ElevenLabsTTS {
1923
2943
  * Synthesise text and yield audio chunks as they arrive (streaming).
1924
2944
  *
1925
2945
  * The yielded buffers are raw PCM at 16 kHz (or whatever `outputFormat` is
1926
- * configured to).
2946
+ * configured to). `chunkSize` controls the maximum yield size — 512 is a
2947
+ * good choice for low-latency telephony.
1927
2948
  */
1928
2949
  synthesizeStream(text: string): AsyncGenerator<Buffer>;
1929
2950
  }
@@ -1934,9 +2955,15 @@ interface ElevenLabsTTSOptions {
1934
2955
  /** API key. Falls back to ELEVENLABS_API_KEY env var when omitted. */
1935
2956
  apiKey?: string;
1936
2957
  voiceId?: string;
1937
- modelId?: string;
2958
+ /**
2959
+ * ElevenLabs voice model ID. Default is ``eleven_flash_v2_5`` (lowest TTFT).
2960
+ * Pass ``eleven_v3`` for highest quality, or any string for forward-compat.
2961
+ */
2962
+ modelId?: ElevenLabsModel | string;
1938
2963
  outputFormat?: string;
1939
2964
  }
2965
+ /** Options for the carrier-specific factories — same as the constructor minus `outputFormat`. */
2966
+ type ElevenLabsCarrierOptions = Omit<ElevenLabsTTSOptions, "outputFormat">;
1940
2967
  /**
1941
2968
  * ElevenLabs TTS.
1942
2969
  *
@@ -1946,16 +2973,31 @@ interface ElevenLabsTTSOptions {
1946
2973
  * const tts = new elevenlabs.TTS(); // reads ELEVENLABS_API_KEY
1947
2974
  * const tts = new elevenlabs.TTS({ apiKey: "...", voiceId: "rachel" });
1948
2975
  * ```
2976
+ *
2977
+ * **Telephony optimization** — use {@link TTS.forTwilio} (μ-law @ 8 kHz,
2978
+ * native Twilio Media Streams format) or {@link TTS.forTelnyx} (PCM @
2979
+ * 16 kHz, native Telnyx default) on phone calls to skip the SDK-side
2980
+ * resampling / transcoding step.
1949
2981
  */
1950
2982
  declare class TTS$4 extends ElevenLabsTTS {
2983
+ static readonly providerKey = "elevenlabs";
1951
2984
  constructor(opts?: ElevenLabsTTSOptions);
2985
+ /** Pipeline TTS pre-configured for Twilio Media Streams (`ulaw_8000`). */
2986
+ static forTwilio(opts?: ElevenLabsCarrierOptions): TTS$4;
2987
+ static forTwilio(apiKey: string, options?: Omit<ElevenLabsTTSOptions, "outputFormat">): TTS$4;
2988
+ /** Pipeline TTS pre-configured for Telnyx (`pcm_16000`). */
2989
+ static forTelnyx(opts?: ElevenLabsCarrierOptions): TTS$4;
2990
+ static forTelnyx(apiKey: string, options?: Omit<ElevenLabsTTSOptions, "outputFormat">): TTS$4;
1952
2991
  }
1953
2992
 
1954
2993
  declare class OpenAITTS {
1955
2994
  private readonly apiKey;
1956
2995
  private readonly voice;
1957
2996
  private readonly model;
1958
- constructor(apiKey: string, voice?: string, model?: string);
2997
+ private readonly instructions;
2998
+ private readonly speed;
2999
+ private readonly antiAlias;
3000
+ constructor(apiKey: string, voice?: string, model?: string, instructions?: string | null, speed?: number | null, antiAlias?: boolean);
1959
3001
  /**
1960
3002
  * Synthesise text to speech and return the full audio as a single Buffer.
1961
3003
  *
@@ -1965,26 +3007,36 @@ declare class OpenAITTS {
1965
3007
  /**
1966
3008
  * Synthesise text and yield audio chunks as they arrive (streaming).
1967
3009
  *
1968
- * OpenAI returns 24 kHz PCM16; each chunk is resampled to 16 kHz before
1969
- * yielding so the output is ready for telephony pipelines.
3010
+ * OpenAI returns 24 kHz PCM16; each chunk is lowpass-filtered then
3011
+ * decimated 3:2 to 16 kHz before yielding so the output is ready for
3012
+ * telephony pipelines.
1970
3013
  *
1971
- * The resampler carries state (buffered samples + odd trailing byte)
1972
- * between chunks without that state cross-chunk sample alignment drifts
1973
- * and the caller hears pops / dropped audio (BUG #23, mirror of the
1974
- * Python `audioop.ratecv` fix).
3014
+ * The resampler carries state (filter memory + buffered samples + odd
3015
+ * trailing byte) between chunks so cross-chunk sample alignment and
3016
+ * filter phase don't reset on every network read.
1975
3017
  */
1976
3018
  synthesizeStream(text: string): AsyncGenerator<Buffer>;
1977
3019
  /**
1978
- * Streaming 24 kHz → 16 kHz resampler (PCM16-LE). Maintains cross-chunk
1979
- * state so the 3:2 pattern doesn't reset at every network read.
3020
+ * Streaming 24 kHz → 16 kHz resampler (PCM16-LE). Applies a single-pole
3021
+ * lowpass ahead of the 3:2 decimation and carries filter + sample state
3022
+ * across chunks so the cadence doesn't reset at every network read.
3023
+ *
3024
+ * ``ctx.lpfEnabled`` (default true on the streaming path, false for the
3025
+ * legacy static helper) controls whether the LPF is engaged — we keep
3026
+ * the helper bit-exact for the downsample-only tests while the real
3027
+ * streaming path gets anti-alias filtering.
1980
3028
  */
1981
- static resampleStreaming(audio: Buffer, ctx: {
1982
- carryByte: number | null;
1983
- leftover: number[];
1984
- }): Buffer;
3029
+ static resampleStreaming(audio: Buffer, ctx: ResampleCtx): Buffer;
1985
3030
  /** @deprecated use {@link resampleStreaming} with persistent state. */
1986
3031
  static resample24kTo16k(audio: Buffer): Buffer;
1987
3032
  }
3033
+ interface ResampleCtx {
3034
+ carryByte: number | null;
3035
+ leftover: number[];
3036
+ lpfPrev: number;
3037
+ /** Enable the single-pole lowpass ahead of decimation. Default true. */
3038
+ lpfEnabled?: boolean;
3039
+ }
1988
3040
 
1989
3041
  /** OpenAI TTS for Patter pipeline mode. */
1990
3042
 
@@ -1993,6 +3045,16 @@ interface OpenAITTSOptions {
1993
3045
  apiKey?: string;
1994
3046
  voice?: string;
1995
3047
  model?: string;
3048
+ /** Voice-direction prompt (only honoured for gpt-4o-mini-tts and newer). */
3049
+ instructions?: string;
3050
+ /** Speech speed multiplier, must be in [0.25, 4.0] when set. */
3051
+ speed?: number;
3052
+ /**
3053
+ * Enable anti-aliasing LPF ahead of the 3:2 decimation. Defaults to
3054
+ * ``false`` for backwards-compatibility; set to ``true`` for cleaner
3055
+ * audio on sibilants / fricatives.
3056
+ */
3057
+ antiAlias?: boolean;
1996
3058
  }
1997
3059
  /**
1998
3060
  * OpenAI TTS.
@@ -2005,6 +3067,7 @@ interface OpenAITTSOptions {
2005
3067
  * ```
2006
3068
  */
2007
3069
  declare class TTS$3 extends OpenAITTS {
3070
+ static readonly providerKey = "openai_tts";
2008
3071
  constructor(opts?: OpenAITTSOptions);
2009
3072
  }
2010
3073
 
@@ -2031,6 +3094,25 @@ declare class CartesiaTTS {
2031
3094
  private readonly baseUrl;
2032
3095
  private readonly apiVersion;
2033
3096
  constructor(apiKey: string, opts?: CartesiaTTSOptions$1);
3097
+ /**
3098
+ * Construct an instance pre-configured for Twilio Media Streams.
3099
+ *
3100
+ * Sets `sampleRate=8000` so Cartesia emits PCM_S16LE @ 8 kHz directly.
3101
+ * Twilio's media stream uses μ-law @ 8 kHz so the SDK still does the
3102
+ * PCM → μ-law transcode client-side, but the 16 kHz → 8 kHz resample
3103
+ * step is skipped. Saves ~10–30 ms first-byte plus per-frame CPU and
3104
+ * removes a potential aliasing source.
3105
+ */
3106
+ static forTwilio(apiKey: string, options?: Omit<CartesiaTTSOptions$1, 'sampleRate'>): CartesiaTTS;
3107
+ /**
3108
+ * Construct an instance pre-configured for Telnyx bidirectional media.
3109
+ *
3110
+ * Sets `sampleRate=16000` to match Telnyx's L16/16000 default codec —
3111
+ * audio flows end-to-end with zero resampling or transcoding. Same as
3112
+ * the bare-constructor default; exists for API symmetry with
3113
+ * {@link CartesiaTTS.forTwilio}.
3114
+ */
3115
+ static forTelnyx(apiKey: string, options?: Omit<CartesiaTTSOptions$1, 'sampleRate'>): CartesiaTTS;
2034
3116
  /** Build the JSON payload for the Cartesia bytes endpoint. */
2035
3117
  private buildPayload;
2036
3118
  /** Synthesize text and return the concatenated audio buffer. */
@@ -2057,8 +3139,14 @@ interface CartesiaTTSOptions {
2057
3139
  baseUrl?: string;
2058
3140
  apiVersion?: string;
2059
3141
  }
3142
+ /** Options for the carrier-specific factories — same as the constructor minus `sampleRate`. */
3143
+ type CartesiaCarrierOptions = Omit<CartesiaTTSOptions, "sampleRate">;
2060
3144
  /**
2061
- * Cartesia TTS (sonic-2).
3145
+ * Cartesia TTS (sonic-3 GA, ~90 ms TTFB).
3146
+ *
3147
+ * The default model is `sonic-3` — Cartesia's current GA model. Voice IDs
3148
+ * from the previous `sonic-2` family (including the default Katie voice)
3149
+ * remain compatible.
2062
3150
  *
2063
3151
  * @example
2064
3152
  * ```ts
@@ -2066,9 +3154,21 @@ interface CartesiaTTSOptions {
2066
3154
  * const tts = new cartesia.TTS(); // reads CARTESIA_API_KEY
2067
3155
  * const tts = new cartesia.TTS({ apiKey: "..." });
2068
3156
  * ```
3157
+ *
3158
+ * **Telephony optimization** — use {@link TTS.forTwilio} (PCM @ 8 kHz,
3159
+ * skipping the SDK-side 16 kHz → 8 kHz resample before μ-law transcoding)
3160
+ * or {@link TTS.forTelnyx} (PCM @ 16 kHz, native Telnyx default) on
3161
+ * phone calls.
2069
3162
  */
2070
3163
  declare class TTS$2 extends CartesiaTTS {
3164
+ static readonly providerKey = "cartesia_tts";
2071
3165
  constructor(opts?: CartesiaTTSOptions);
3166
+ /** Pipeline TTS pre-configured for Twilio Media Streams (PCM @ 8 kHz). */
3167
+ static forTwilio(opts?: CartesiaCarrierOptions): TTS$2;
3168
+ static forTwilio(apiKey: string, options?: Omit<CartesiaTTSOptions, "sampleRate">): TTS$2;
3169
+ /** Pipeline TTS pre-configured for Telnyx (PCM @ 16 kHz). */
3170
+ static forTelnyx(opts?: CartesiaCarrierOptions): TTS$2;
3171
+ static forTelnyx(apiKey: string, options?: Omit<CartesiaTTSOptions, "sampleRate">): TTS$2;
2072
3172
  }
2073
3173
 
2074
3174
  interface RimeTTSOptions$1 {
@@ -2142,6 +3242,7 @@ interface RimeTTSOptions {
2142
3242
  * ```
2143
3243
  */
2144
3244
  declare class TTS$1 extends RimeTTS {
3245
+ static readonly providerKey = "rime";
2145
3246
  constructor(opts?: RimeTTSOptions);
2146
3247
  }
2147
3248
 
@@ -2170,6 +3271,7 @@ interface LMNTTTSOptions {
2170
3271
  * ```
2171
3272
  */
2172
3273
  declare class TTS extends LMNTTTS {
3274
+ static readonly providerKey = "lmnt";
2173
3275
  constructor(opts?: LMNTTTSOptions);
2174
3276
  }
2175
3277
 
@@ -2180,6 +3282,26 @@ interface OpenAILLMOptions {
2180
3282
  apiKey?: string;
2181
3283
  /** Chat Completions model id. Defaults to ``"gpt-4o-mini"``. */
2182
3284
  model?: string;
3285
+ /** Sampling temperature [0, 2]. */
3286
+ temperature?: number;
3287
+ /** Max tokens in the assistant response (sent as ``max_completion_tokens``). */
3288
+ maxTokens?: number;
3289
+ /** OpenAI-style ``response_format`` for JSON mode / structured outputs. */
3290
+ responseFormat?: Record<string, unknown>;
3291
+ /** Whether to allow parallel tool calls. */
3292
+ parallelToolCalls?: boolean;
3293
+ /** ``"auto" | "none" | "required"`` or a specific tool object. */
3294
+ toolChoice?: string | Record<string, unknown>;
3295
+ /** Sampling seed for reproducible outputs. */
3296
+ seed?: number;
3297
+ /** Nucleus sampling cutoff in [0, 1]. */
3298
+ topP?: number;
3299
+ /** Penalty in [-2, 2] applied to repeated tokens. */
3300
+ frequencyPenalty?: number;
3301
+ /** Penalty in [-2, 2] applied to seen tokens. */
3302
+ presencePenalty?: number;
3303
+ /** Stop sequence(s). */
3304
+ stop?: string | string[];
2183
3305
  }
2184
3306
  /**
2185
3307
  * OpenAI Chat Completions LLM provider.
@@ -2188,10 +3310,11 @@ interface OpenAILLMOptions {
2188
3310
  * ```ts
2189
3311
  * import * as openai from "getpatter/llm/openai";
2190
3312
  * const llm = new openai.LLM(); // reads OPENAI_API_KEY
2191
- * const llm = new openai.LLM({ apiKey: "sk-...", model: "gpt-4o-mini" });
3313
+ * const llm = new openai.LLM({ apiKey: "sk-...", model: "gpt-4o-mini", temperature: 0.4 });
2192
3314
  * ```
2193
3315
  */
2194
3316
  declare class LLM$4 extends OpenAILLMProvider {
3317
+ static readonly providerKey = "openai";
2195
3318
  constructor(opts?: OpenAILLMOptions);
2196
3319
  }
2197
3320
 
@@ -2230,6 +3353,19 @@ interface AnthropicLLMOptions$1 {
2230
3353
  temperature?: number;
2231
3354
  baseUrl?: string;
2232
3355
  anthropicVersion?: string;
3356
+ /**
3357
+ * Enable Anthropic prompt caching for the system prompt and tools.
3358
+ * Defaults to ``true`` — for voice agents with long instruction-dense
3359
+ * system prompts, the cache saves ~100-400 ms TTFT and ~90% of input-
3360
+ * token cost on every cached turn. The cache lives ~5 minutes; the
3361
+ * first request writes it, subsequent requests within that window
3362
+ * hit it.
3363
+ *
3364
+ * Disable when the system prompt + tools combined are smaller than
3365
+ * Anthropic's minimum cacheable size (~1024 tokens for Sonnet/Opus,
3366
+ * ~2048 for Haiku) — caching has no effect below that threshold.
3367
+ */
3368
+ promptCaching?: boolean;
2233
3369
  }
2234
3370
  /** LLM provider backed by Anthropic's Messages API (streaming). */
2235
3371
  declare class AnthropicLLMProvider implements LLMProvider {
@@ -2239,6 +3375,7 @@ declare class AnthropicLLMProvider implements LLMProvider {
2239
3375
  private readonly temperature?;
2240
3376
  private readonly url;
2241
3377
  private readonly anthropicVersion;
3378
+ private readonly promptCaching;
2242
3379
  constructor(options: AnthropicLLMOptions$1);
2243
3380
  stream(messages: Array<Record<string, unknown>>, tools?: Array<Record<string, unknown>> | null): AsyncGenerator<LLMChunk, void, unknown>;
2244
3381
  }
@@ -2248,7 +3385,7 @@ declare class AnthropicLLMProvider implements LLMProvider {
2248
3385
  interface AnthropicLLMOptions {
2249
3386
  /** API key. Falls back to ANTHROPIC_API_KEY env var when omitted. */
2250
3387
  apiKey?: string;
2251
- /** Anthropic Messages API model id (e.g. ``"claude-3-5-sonnet-20241022"``). */
3388
+ /** Anthropic Messages API model id (e.g. ``"claude-haiku-4-5-20251001"``). */
2252
3389
  model?: string;
2253
3390
  /** Maximum number of tokens to sample. Defaults to the adapter default. */
2254
3391
  maxTokens?: number;
@@ -2258,18 +3395,33 @@ interface AnthropicLLMOptions {
2258
3395
  baseUrl?: string;
2259
3396
  /** ``anthropic-version`` header override. */
2260
3397
  anthropicVersion?: string;
3398
+ /**
3399
+ * Enable Anthropic prompt caching (default: ``true``). For voice
3400
+ * agents with long instruction-dense system prompts, the cache saves
3401
+ * ~100-400 ms TTFT and ~90% input-token cost per cached turn. Disable
3402
+ * if your system prompt + tools are below Anthropic's minimum
3403
+ * cacheable size (~1024 tokens for Sonnet/Opus, ~2048 for Haiku) —
3404
+ * caching has no effect below that threshold.
3405
+ */
3406
+ promptCaching?: boolean;
2261
3407
  }
2262
3408
  /**
2263
3409
  * Anthropic Claude LLM provider (Messages API, streaming).
2264
3410
  *
3411
+ * Prompt caching is **enabled by default**. The first request writes
3412
+ * the cache; subsequent requests within ~5 minutes hit it. Pass
3413
+ * ``{ promptCaching: false }`` to opt out.
3414
+ *
2265
3415
  * @example
2266
3416
  * ```ts
2267
3417
  * import * as anthropic from "getpatter/llm/anthropic";
2268
3418
  * const llm = new anthropic.LLM(); // reads ANTHROPIC_API_KEY
2269
- * const llm = new anthropic.LLM({ apiKey: "sk-ant-...", model: "claude-3-5-sonnet-20241022" });
3419
+ * const llm = new anthropic.LLM({ apiKey: "sk-ant-...", model: "claude-haiku-4-5-20251001" });
3420
+ * const llm = new anthropic.LLM({ promptCaching: false }); // opt out of caching
2270
3421
  * ```
2271
3422
  */
2272
3423
  declare class LLM$3 extends AnthropicLLMProvider {
3424
+ static readonly providerKey = "anthropic";
2273
3425
  constructor(opts?: AnthropicLLMOptions);
2274
3426
  }
2275
3427
 
@@ -2296,12 +3448,42 @@ interface GroqLLMOptions$1 {
2296
3448
  apiKey: string;
2297
3449
  model?: string;
2298
3450
  baseUrl?: string;
3451
+ /** Sampling temperature [0, 2]. */
3452
+ temperature?: number;
3453
+ /** Max tokens in the assistant response (sent as ``max_completion_tokens``). */
3454
+ maxTokens?: number;
3455
+ /** OpenAI-style ``response_format`` for JSON mode / structured outputs. */
3456
+ responseFormat?: Record<string, unknown>;
3457
+ /** Whether to allow parallel tool calls. */
3458
+ parallelToolCalls?: boolean;
3459
+ /** ``"auto" | "none" | "required"`` or a specific tool object. */
3460
+ toolChoice?: string | Record<string, unknown>;
3461
+ /** Sampling seed. */
3462
+ seed?: number;
3463
+ /** Nucleus sampling cutoff in [0, 1]. */
3464
+ topP?: number;
3465
+ /** Penalty in [-2, 2] applied to repeated tokens. */
3466
+ frequencyPenalty?: number;
3467
+ /** Penalty in [-2, 2] applied to seen tokens. */
3468
+ presencePenalty?: number;
3469
+ /** Stop sequence(s). */
3470
+ stop?: string | string[];
2299
3471
  }
2300
3472
  /** LLM provider backed by Groq's OpenAI-compatible Chat Completions API. */
2301
3473
  declare class GroqLLMProvider implements LLMProvider {
2302
3474
  private readonly apiKey;
2303
- private readonly model;
3475
+ readonly model: string;
2304
3476
  private readonly baseUrl;
3477
+ private readonly temperature?;
3478
+ private readonly maxTokens?;
3479
+ private readonly responseFormat?;
3480
+ private readonly parallelToolCalls?;
3481
+ private readonly toolChoice?;
3482
+ private readonly seed?;
3483
+ private readonly topP?;
3484
+ private readonly frequencyPenalty?;
3485
+ private readonly presencePenalty?;
3486
+ private readonly stop?;
2305
3487
  constructor(options: GroqLLMOptions$1);
2306
3488
  stream(messages: Array<Record<string, unknown>>, tools?: Array<Record<string, unknown>> | null): AsyncGenerator<LLMChunk, void, unknown>;
2307
3489
  }
@@ -2315,6 +3497,26 @@ interface GroqLLMOptions {
2315
3497
  model?: string;
2316
3498
  /** Override the OpenAI-compatible base URL (rarely needed). */
2317
3499
  baseUrl?: string;
3500
+ /** Sampling temperature [0, 2]. */
3501
+ temperature?: number;
3502
+ /** Max tokens in the assistant response (sent as ``max_completion_tokens``). */
3503
+ maxTokens?: number;
3504
+ /** OpenAI-style ``response_format`` for JSON mode / structured outputs. */
3505
+ responseFormat?: Record<string, unknown>;
3506
+ /** Whether to allow parallel tool calls. */
3507
+ parallelToolCalls?: boolean;
3508
+ /** ``"auto" | "none" | "required"`` or a specific tool object. */
3509
+ toolChoice?: string | Record<string, unknown>;
3510
+ /** Sampling seed. */
3511
+ seed?: number;
3512
+ /** Nucleus sampling cutoff in [0, 1]. */
3513
+ topP?: number;
3514
+ /** Penalty in [-2, 2] applied to repeated tokens. */
3515
+ frequencyPenalty?: number;
3516
+ /** Penalty in [-2, 2] applied to seen tokens. */
3517
+ presencePenalty?: number;
3518
+ /** Stop sequence(s). */
3519
+ stop?: string | string[];
2318
3520
  }
2319
3521
  /**
2320
3522
  * Groq LLM provider (OpenAI-compatible Chat Completions, streaming).
@@ -2327,6 +3529,7 @@ interface GroqLLMOptions {
2327
3529
  * ```
2328
3530
  */
2329
3531
  declare class LLM$2 extends GroqLLMProvider {
3532
+ static readonly providerKey = "groq";
2330
3533
  constructor(opts?: GroqLLMOptions);
2331
3534
  }
2332
3535
 
@@ -2358,15 +3561,68 @@ interface CerebrasLLMOptions$1 {
2358
3561
  apiKey: string;
2359
3562
  model?: string;
2360
3563
  baseUrl?: string;
2361
- /** Gzip request payloads for faster TTFT on large prompts. */
3564
+ /**
3565
+ * Gzip request payloads for faster TTFT on large prompts. Defaults to
3566
+ * ``true`` (parity with Python SDK) — set ``false`` to disable.
3567
+ *
3568
+ * msgpack encoding is Python-only; TS uses gzip alone, which captures
3569
+ * ~85% of the TTFT win.
3570
+ */
2362
3571
  gzipCompression?: boolean;
3572
+ /** Sampling temperature [0, 2]. */
3573
+ temperature?: number;
3574
+ /** Max tokens in the assistant response (sent as ``max_completion_tokens``). */
3575
+ maxTokens?: number;
3576
+ /**
3577
+ * Optional OpenAI-style ``response_format`` for JSON mode / structured
3578
+ * outputs, e.g. ``{ type: 'json_schema', json_schema: { ... } }``.
3579
+ * See https://inference-docs.cerebras.ai/capabilities/structured-outputs.
3580
+ */
3581
+ responseFormat?: Record<string, unknown>;
3582
+ /** Whether to allow parallel tool calls. */
3583
+ parallelToolCalls?: boolean;
3584
+ /** ``"auto" | "none" | "required"`` or a specific tool object. */
3585
+ toolChoice?: string | Record<string, unknown>;
3586
+ /** Sampling seed for reproducible outputs. */
3587
+ seed?: number;
3588
+ /** Nucleus sampling cutoff in [0, 1]. */
3589
+ topP?: number;
3590
+ /** Penalty in [-2, 2] applied to repeated tokens. */
3591
+ frequencyPenalty?: number;
3592
+ /** Penalty in [-2, 2] applied to seen tokens. */
3593
+ presencePenalty?: number;
3594
+ /** Stop sequence(s). */
3595
+ stop?: string | string[];
2363
3596
  }
2364
- /** LLM provider backed by Cerebras's OpenAI-compatible Inference API. */
3597
+ /**
3598
+ * LLM provider backed by Cerebras's OpenAI-compatible Inference API.
3599
+ *
3600
+ * Available models on Cerebras (verified against
3601
+ * https://inference-docs.cerebras.ai/models/overview):
3602
+ *
3603
+ * Production:
3604
+ * - gpt-oss-120b (default — highest throughput on Cerebras, no deprecation)
3605
+ * - llama3.1-8b (smaller context alternative; deprecating 2026-05-27)
3606
+ *
3607
+ * Preview (opt-in):
3608
+ * - qwen-3-235b-a22b-instruct-2507 (multilingual, strong on European languages)
3609
+ * - zai-glm-4.7
3610
+ */
2365
3611
  declare class CerebrasLLMProvider implements LLMProvider {
2366
3612
  private readonly apiKey;
2367
- private readonly model;
3613
+ readonly model: string;
2368
3614
  private readonly baseUrl;
2369
3615
  private readonly gzipCompression;
3616
+ private readonly temperature?;
3617
+ private readonly maxTokens?;
3618
+ private readonly responseFormat?;
3619
+ private readonly parallelToolCalls?;
3620
+ private readonly toolChoice?;
3621
+ private readonly seed?;
3622
+ private readonly topP?;
3623
+ private readonly frequencyPenalty?;
3624
+ private readonly presencePenalty?;
3625
+ private readonly stop?;
2370
3626
  constructor(options: CerebrasLLMOptions$1);
2371
3627
  stream(messages: Array<Record<string, unknown>>, tools?: Array<Record<string, unknown>> | null): AsyncGenerator<LLMChunk, void, unknown>;
2372
3628
  }
@@ -2376,12 +3632,32 @@ declare class CerebrasLLMProvider implements LLMProvider {
2376
3632
  interface CerebrasLLMOptions {
2377
3633
  /** API key. Falls back to CEREBRAS_API_KEY env var when omitted. */
2378
3634
  apiKey?: string;
2379
- /** Model id (e.g. ``"llama3.1-8b"``). */
3635
+ /** Model id (e.g. ``"gpt-oss-120b"``). */
2380
3636
  model?: string;
2381
3637
  /** Override the OpenAI-compatible base URL (rarely needed). */
2382
3638
  baseUrl?: string;
2383
3639
  /** Gzip request payloads for faster TTFT on large prompts. */
2384
3640
  gzipCompression?: boolean;
3641
+ /** Sampling temperature [0, 2]. */
3642
+ temperature?: number;
3643
+ /** Max tokens in the assistant response (sent as ``max_completion_tokens``). */
3644
+ maxTokens?: number;
3645
+ /** OpenAI-style ``response_format`` for JSON mode / structured outputs. */
3646
+ responseFormat?: Record<string, unknown>;
3647
+ /** Whether to allow parallel tool calls. */
3648
+ parallelToolCalls?: boolean;
3649
+ /** ``"auto" | "none" | "required"`` or a specific tool object. */
3650
+ toolChoice?: string | Record<string, unknown>;
3651
+ /** Sampling seed for reproducible outputs. */
3652
+ seed?: number;
3653
+ /** Nucleus sampling cutoff in [0, 1]. */
3654
+ topP?: number;
3655
+ /** Penalty in [-2, 2] applied to repeated tokens. */
3656
+ frequencyPenalty?: number;
3657
+ /** Penalty in [-2, 2] applied to seen tokens. */
3658
+ presencePenalty?: number;
3659
+ /** Stop sequence(s). */
3660
+ stop?: string | string[];
2385
3661
  }
2386
3662
  /**
2387
3663
  * Cerebras LLM provider (OpenAI-compatible Inference API, streaming).
@@ -2390,10 +3666,13 @@ interface CerebrasLLMOptions {
2390
3666
  * ```ts
2391
3667
  * import * as cerebras from "getpatter/llm/cerebras";
2392
3668
  * const llm = new cerebras.LLM(); // reads CEREBRAS_API_KEY
3669
+ * const llm = new cerebras.LLM({ apiKey: "csk-...", model: "gpt-oss-120b" });
3670
+ * // smaller-context alternative:
2393
3671
  * const llm = new cerebras.LLM({ apiKey: "csk-...", model: "llama3.1-8b" });
2394
3672
  * ```
2395
3673
  */
2396
3674
  declare class LLM$1 extends CerebrasLLMProvider {
3675
+ static readonly providerKey = "cerebras";
2397
3676
  constructor(opts?: CerebrasLLMOptions);
2398
3677
  }
2399
3678
 
@@ -2433,7 +3712,7 @@ interface GoogleLLMOptions$1 {
2433
3712
  /** LLM provider backed by Google Gemini (Developer API, streaming SSE). */
2434
3713
  declare class GoogleLLMProvider implements LLMProvider {
2435
3714
  private readonly apiKey;
2436
- private readonly model;
3715
+ readonly model: string;
2437
3716
  private readonly baseUrl;
2438
3717
  private readonly temperature?;
2439
3718
  private readonly maxOutputTokens?;
@@ -2470,9 +3749,109 @@ interface GoogleLLMOptions {
2470
3749
  * ```
2471
3750
  */
2472
3751
  declare class LLM extends GoogleLLMProvider {
3752
+ static readonly providerKey = "google";
2473
3753
  constructor(opts?: GoogleLLMOptions);
2474
3754
  }
2475
3755
 
3756
+ /**
3757
+ * Silero VAD provider (TypeScript port).
3758
+ *
3759
+ * Acoustic voice activity detection backed by the Silero ONNX model. Buffers
3760
+ * incoming int16 LE PCM frames, runs inference on fixed-size windows
3761
+ * (256 samples at 8 kHz, 512 at 16 kHz), applies an exponential probability
3762
+ * filter, and emits VADEvent transitions (speech_start / speech_end).
3763
+ *
3764
+ * Ported from LiveKit Agents (Apache 2.0):
3765
+ * https://github.com/livekit/agents
3766
+ * Sources:
3767
+ * - livekit-plugins/livekit-plugins-silero/livekit/plugins/silero/vad.py
3768
+ * - livekit-plugins/livekit-plugins-silero/livekit/plugins/silero/onnx_model.py
3769
+ *
3770
+ * Adaptations for Patter:
3771
+ * - Input is raw PCM `Buffer` (int16 LE, mono) via
3772
+ * `processFrame(pcmChunk, sampleRate)`, not `livekit.rtc.AudioFrame`.
3773
+ * - onnxruntime-node is loaded lazily as an optional dependency.
3774
+ * - Emits `VADEvent` (Patter protocol) instead of LiveKit event types.
3775
+ */
3776
+
3777
+ declare const SUPPORTED_SAMPLE_RATES: readonly [8000, 16000];
3778
+ type SileroSampleRate = (typeof SUPPORTED_SAMPLE_RATES)[number];
3779
+ interface SileroVADOptions {
3780
+ minSpeechDuration?: number;
3781
+ minSilenceDuration?: number;
3782
+ prefixPaddingDuration?: number;
3783
+ activationThreshold?: number;
3784
+ deactivationThreshold?: number;
3785
+ sampleRate?: SileroSampleRate;
3786
+ forceCpu?: boolean;
3787
+ onnxFilePath?: string;
3788
+ }
3789
+ /**
3790
+ * Minimal structural type for the subset of `onnxruntime-node` we depend on.
3791
+ * Declared locally so consumers don't need the package installed at build time.
3792
+ */
3793
+ interface OnnxInferenceSession {
3794
+ run(feeds: Record<string, OnnxTensor>): Promise<Record<string, OnnxTensor>>;
3795
+ }
3796
+ interface OnnxTensor {
3797
+ readonly data: Float32Array | BigInt64Array;
3798
+ readonly dims: readonly number[];
3799
+ }
3800
+ interface OnnxRuntime {
3801
+ InferenceSession: {
3802
+ create(pathOrBuffer: string | Uint8Array, options?: Record<string, unknown>): Promise<OnnxInferenceSession>;
3803
+ };
3804
+ Tensor: new (type: 'float32' | 'int64', data: Float32Array | BigInt64Array, dims: readonly number[]) => OnnxTensor;
3805
+ }
3806
+ /**
3807
+ * Silero-based `VADProvider`. Load via `SileroVAD.load()`:
3808
+ *
3809
+ * const vad = await SileroVAD.load({ sampleRate: 16000 });
3810
+ * const evt = await vad.processFrame(pcm, 16000);
3811
+ * if (evt && evt.type === 'speech_start') { ... }
3812
+ * await vad.close();
3813
+ */
3814
+ declare class SileroVAD implements VADProvider {
3815
+ private readonly model;
3816
+ private readonly opts;
3817
+ private pending;
3818
+ private expFilter;
3819
+ private pubSpeaking;
3820
+ private speechThresholdDuration;
3821
+ private silenceThresholdDuration;
3822
+ private closed;
3823
+ private constructor();
3824
+ /**
3825
+ * Load the Silero VAD model. Defaults match the LiveKit Silero plugin.
3826
+ * Throws if `onnxruntime-node` is not installed.
3827
+ */
3828
+ static load(options?: SileroVADOptions): Promise<SileroVAD>;
3829
+ /**
3830
+ * Internal factory used by tests — bypasses onnxruntime-node loading.
3831
+ * @internal
3832
+ */
3833
+ static fromOnnxModel(runtime: OnnxRuntime, session: OnnxInferenceSession, options: Required<Omit<SileroVADOptions, 'onnxFilePath' | 'forceCpu'>>): SileroVAD;
3834
+ get sampleRate(): SileroSampleRate;
3835
+ /**
3836
+ * Number of int16 PCM samples that must be provided per call to
3837
+ * processFrame for the model to run one inference window.
3838
+ *
3839
+ * Constraint (ported from LiveKit Agents / Silero ONNX spec):
3840
+ * - 16 000 Hz → 512 samples (32 ms)
3841
+ * - 8 000 Hz → 256 samples (32 ms)
3842
+ *
3843
+ * Callers that feed raw audio in fixed-size chunks (e.g. WebSocket frames)
3844
+ * should buffer incoming audio until at least numFramesRequired() int16
3845
+ * samples are available before calling processFrame. The provider
3846
+ * internally buffers partial windows so smaller chunks are also safe, but
3847
+ * passing exactly one window per call minimises heap allocation.
3848
+ */
3849
+ numFramesRequired(): number;
3850
+ processFrame(pcmChunk: Buffer, sampleRate: number): Promise<VADEvent | null>;
3851
+ private advanceState;
3852
+ close(): Promise<void>;
3853
+ }
3854
+
2476
3855
  /**
2477
3856
  * Audio transcoding utilities for Patter TypeScript SDK.
2478
3857
  *
@@ -2495,6 +3874,137 @@ declare function mulawToPcm16(mulawData: Buffer): Buffer;
2495
3874
  * If the input length is odd, the trailing byte is ignored.
2496
3875
  */
2497
3876
  declare function pcm16ToMulaw(pcmData: Buffer): Buffer;
3877
+ /**
3878
+ * Buffers a trailing odd byte across chunk boundaries so that downstream
3879
+ * consumers (resamplers, encoders) always receive even-length (2-byte-aligned)
3880
+ * PCM16 buffers.
3881
+ *
3882
+ * Mirror of the Python-side PcmCarry helper. Typical usage:
3883
+ *
3884
+ * ```ts
3885
+ * const carry = new PcmCarry();
3886
+ * for (const raw of stream) {
3887
+ * const aligned = carry.push(raw);
3888
+ * if (aligned.length > 0) process(aligned);
3889
+ * }
3890
+ * const tail = carry.flush();
3891
+ * if (tail.length > 0) process(tail);
3892
+ * ```
3893
+ */
3894
+ declare class PcmCarry {
3895
+ private pending;
3896
+ /**
3897
+ * Prepend any carried odd byte, return the even-length prefix, and stash
3898
+ * any new trailing odd byte for the next call.
3899
+ *
3900
+ * Returns a zero-length buffer when no complete sample is yet available.
3901
+ */
3902
+ push(chunk: Buffer): Buffer;
3903
+ /**
3904
+ * Return any pending byte as a 1-byte buffer (rare in practice — only if
3905
+ * the entire stream had an odd byte count), then reset internal state.
3906
+ */
3907
+ flush(): Buffer;
3908
+ /** Reset carry state without flushing. */
3909
+ reset(): void;
3910
+ }
3911
+ /** Options for constructing a {@link StatefulResampler}. */
3912
+ interface StatefulResamplerOptions {
3913
+ srcRate: number;
3914
+ dstRate: number;
3915
+ /** Number of channels (default 1 / mono). */
3916
+ channels?: number;
3917
+ }
3918
+ /**
3919
+ * Stateful PCM16 resampler that carries tail state across chunk boundaries,
3920
+ * eliminating the boundary discontinuities present in the legacy one-shot
3921
+ * helpers.
3922
+ *
3923
+ * Supported conversions:
3924
+ * - 16 000 → 8 000 Hz (2:1 decimation with 5-tap FIR anti-alias)
3925
+ * - 8 000 → 16 000 Hz (1:2 linear interpolation)
3926
+ * - 24 000 → 16 000 Hz (3:2 linear interpolation)
3927
+ *
3928
+ * All methods accept and return Buffer (PCM16-LE, mono by default).
3929
+ */
3930
+ declare class StatefulResampler {
3931
+ private readonly srcRate;
3932
+ private readonly dstRate;
3933
+ private firHistory;
3934
+ private firHistoryValid;
3935
+ private firPendingSample;
3936
+ private upsampleLast;
3937
+ private upsampleHasHistory;
3938
+ private resample24Last;
3939
+ private resample24Phase;
3940
+ private resample24HasHistory;
3941
+ private readonly carry;
3942
+ constructor(opts: StatefulResamplerOptions);
3943
+ /**
3944
+ * Process a chunk of PCM16-LE samples.
3945
+ *
3946
+ * Handles odd-byte inputs via an internal carry buffer. Returns an even-byte-
3947
+ * aligned output buffer; may return a zero-length buffer if not enough
3948
+ * aligned input is available yet.
3949
+ */
3950
+ process(pcm: Buffer): Buffer;
3951
+ /**
3952
+ * Flush internal state and return any remaining output samples.
3953
+ *
3954
+ * For 8k→16k: the deferred last sample is emitted duplicated (matching
3955
+ * the stateless helper's end-of-stream behaviour).
3956
+ * For 16k→8k: any pending odd sample is processed with edge-replication.
3957
+ * Resets all state after flushing.
3958
+ */
3959
+ flush(): Buffer;
3960
+ /** Reset all carried state (e.g. at call boundaries). */
3961
+ reset(): void;
3962
+ /**
3963
+ * 2:1 decimation with a 5-tap binomial FIR anti-alias filter.
3964
+ *
3965
+ * FIR coefficients: [1, 4, 6, 4, 1] / 16 (cutoff ~Fs/4 = 4 kHz).
3966
+ *
3967
+ * Cross-chunk state:
3968
+ * - `firHistory[0]` = s_{-2}, `firHistory[1]` = s_{-1} relative to the
3969
+ * virtual stream (seeded to first-sample on the very first call).
3970
+ * - `firPendingSample` = a lone input sample carried from a chunk whose
3971
+ * sample count was odd; it will become the first input of the next chunk.
3972
+ *
3973
+ * Decimation: outputs are at even positions (0, 2, 4 …) in the virtual
3974
+ * extended stream, so every 2 input samples yield 1 output. An odd-sample-
3975
+ * count chunk leaves 1 sample in `firPendingSample`; the next chunk
3976
+ * prepends it so the output cadence is unbroken.
3977
+ */
3978
+ private _downsample16kTo8k;
3979
+ /**
3980
+ * 1:2 linear-interpolation upsampler.
3981
+ *
3982
+ * For the first chunk (no history): emits 2*(N-1) samples and defers the
3983
+ * last sample. For subsequent chunks (with history): emits the deferred
3984
+ * sample + its interpolated midpoint THEN 2*(N-1) samples from the new
3985
+ * chunk, deferring the new last sample. Total across K chunks + flush =
3986
+ * 2*total_input_samples (correct output length).
3987
+ *
3988
+ * Call flush() after the final chunk to emit the last deferred sample
3989
+ * pair (self-duplicate at end of stream).
3990
+ */
3991
+ private _upsample8kTo16k;
3992
+ /**
3993
+ * 3:2 linear-interpolation decimator (ratio srcRate/dstRate = 1.5).
3994
+ *
3995
+ * `resample24Phase` tracks the fractional input position of the next output
3996
+ * sample relative to the START of the next chunk. Negative phase means the
3997
+ * next output straddles the previous/current chunk boundary; those are
3998
+ * handled using `resample24Last`.
3999
+ */
4000
+ private _resample24kTo16k;
4001
+ }
4002
+ /** Create a stateful 16 kHz → 8 kHz downsampling resampler. */
4003
+ declare function createResampler16kTo8k(): StatefulResampler;
4004
+ /** Create a stateful 8 kHz → 16 kHz upsampling resampler. */
4005
+ declare function createResampler8kTo16k(): StatefulResampler;
4006
+ /** Create a stateful 24 kHz → 16 kHz resampler (3:2 linear interpolation). */
4007
+ declare function createResampler24kTo16k(): StatefulResampler;
2498
4008
  /**
2499
4009
  * Upsample 8 kHz PCM16 to 16 kHz using linear interpolation.
2500
4010
  *
@@ -2503,21 +4013,33 @@ declare function pcm16ToMulaw(pcmData: Buffer): Buffer;
2503
4013
  * is duplicated to fill the final position.
2504
4014
  *
2505
4015
  * Output length = input length * 2.
4016
+ *
4017
+ * @deprecated Use {@link StatefulResampler} or {@link createResampler8kTo16k}
4018
+ * for streaming pipelines where chunk-boundary continuity matters.
2506
4019
  */
2507
4020
  declare function resample8kTo16k(pcm8k: Buffer): Buffer;
2508
4021
  /**
2509
- * Downsample 16 kHz PCM16 to 8 kHz by taking every 2nd sample.
4022
+ * Downsample 16 kHz PCM16 to 8 kHz with anti-aliasing.
4023
+ *
4024
+ * Uses a 5-tap binomial low-pass FIR filter ([1, 4, 6, 4, 1] / 16) applied
4025
+ * to every pair of input samples before decimating by 2.
2510
4026
  *
2511
4027
  * Output length = input length / 2.
4028
+ *
4029
+ * @deprecated Use {@link StatefulResampler} or {@link createResampler16kTo8k}
4030
+ * for streaming pipelines where chunk-boundary continuity matters.
2512
4031
  */
2513
4032
  declare function resample16kTo8k(pcm16k: Buffer): Buffer;
2514
4033
  /**
2515
- * Downsample 24 kHz PCM16 to 16 kHz by taking 2 of every 3 samples.
4034
+ * Downsample 24 kHz PCM16 to 16 kHz with linear interpolation.
2516
4035
  *
2517
- * Matches the Python backend approach: for every group of 3 input samples,
2518
- * output the 1st and 2nd, skip the 3rd.
4036
+ * For a 3:2 ratio, each output sample is a weighted blend of the two
4037
+ * neighbouring input samples rather than a raw pick-every-third.
2519
4038
  *
2520
4039
  * Output length = floor(inputSamples * 2 / 3) * 2 bytes.
4040
+ *
4041
+ * @deprecated Use {@link StatefulResampler} or {@link OpenAITTS.resampleStreaming}
4042
+ * for anti-aliased resampling.
2521
4043
  */
2522
4044
  declare function resample24kTo16k(pcm24k: Buffer): Buffer;
2523
4045
 
@@ -2834,4 +4356,193 @@ declare class BackgroundAudioPlayer implements BackgroundAudioPlayer$1 {
2834
4356
  private resampleTo;
2835
4357
  }
2836
4358
 
2837
- export { type Agent, type AgentOptions, AllProvidersFailedError, type AnthropicConversion, LLM$3 as AnthropicLLM, type AnthropicLLMOptions, type AnthropicMessage, type AssemblyAIEncoding, type AssemblyAIModel, STT as AssemblyAISTT, type AssemblyAISTTOptions, type AudioConfig, type AudioSource, AuthenticationError, type BackgroundAudioOptions, BackgroundAudioPlayer, BuiltinAudioClip, type BuiltinAudioClipName, type BuiltinPcmSource, type Call, type CallControl, type CallEventHandler, type CallMetrics, CallMetricsAccumulator, type CallOptions, type CallRecord, type CartesiaEncoding, STT$2 as CartesiaSTT, type CartesiaSTTOptions, TTS$2 as CartesiaTTS, type CartesiaTTSOptions, LLM$1 as CerebrasLLM, type CerebrasLLMOptions, ChatContext, type ChatMessage, type ChatRole, CloudflareTunnel, type ConnectOptions, type CostBreakdown, type CreateAgentOptions, DEFAULT_MIN_SENTENCE_LEN, DEFAULT_PRICING, DTMF_EVENTS, STT$4 as DeepgramSTT, type DeepgramSTTOptions, type DefineToolInput, type DtmfEvent, ConvAI as ElevenLabsConvAI, ElevenLabsConvAIAdapter, type ConvAIOptions as ElevenLabsConvAIOptions, TTS$4 as ElevenLabsTTS, type ElevenLabsTTSOptions, FallbackLLMProvider, type FallbackLLMProviderOptions, type FilePcmSource, GEMINI_DEFAULT_INPUT_SR, GEMINI_DEFAULT_OUTPUT_SR, GeminiLiveAdapter, type GeminiLiveEventHandler, LLM as GoogleLLM, type GoogleLLMOptions, LLM$2 as GroqLLM, type GroqLLMOptions, Guardrail$1 as Guardrail, type GuardrailOptions, type HookContext, IVRActivity, type IVRActivityOptions, type IVRToolDefinition, type IncomingMessage, type JobCallback, type LLMChunk, LLMLoop, type LLMProvider, type LMNTAudioFormat, type LMNTModel, type LMNTSampleRate, TTS as LMNTTTS, type LMNTTTSOptions, type LatencyBreakdown, type LocalCallOptions, type LocalConfig, type LocalOptions, type Logger, type LoopCallback, type MessageHandler, MetricsStore, LLM$4 as OpenAILLM, type OpenAILLMOptions, OpenAILLMProvider, type OpenAIMessage, Realtime as OpenAIRealtime, OpenAIRealtimeAdapter, type RealtimeOptions as OpenAIRealtimeOptions, TTS$3 as OpenAITTS, type OpenAITTSOptions, type ParamSpec, PartialStreamError, Patter, PatterConnectionError, PatterError, type PatterOptions, type PhoneNumber, PipelineHookExecutor, type PipelineHooks, type PipelineMessageHandler, type ProviderPricing, ProvisionError, type RawPcmSource, RemoteMessageHandler, TTS$1 as RimeTTS, type RimeTTSOptions, type SSEEvent, type STTConfig, type ScheduleHandle, SentenceChunker, type ServeOptions, type SilenceCallback, STT$1 as SonioxSTT, type SonioxSTTOptions$1 as SonioxSTTOptions, Static as StaticTunnel, type TTSConfig, Carrier as Telnyx, type TelnyxCarrierOptions, TestSession, TfidfLoopDetector, type TfidfLoopDetectorOptions, Tool, type ToolDefinition, type ToolHandler, type ToolOptions, type TunnelHandle, type TurnMetrics, Carrier$1 as Twilio, type TwilioCarrierOptions, ULTRAVOX_DEFAULT_API_BASE, ULTRAVOX_DEFAULT_SR, type UltravoxEventHandler, UltravoxRealtimeAdapter, STT$3 as WhisperSTT, type WhisperSTTOptions, builtinClipPath, calculateRealtimeCost, calculateSttCost, calculateTelephonyCost, calculateTtsCost, callsToCsv, callsToJson, deepgram, defineTool, elevenlabs, filterEmoji, filterForTTS, filterMarkdown, formatDtmf, getLogger, guardrail, isRemoteUrl, isWebSocketUrl, makeAuthMiddleware, mergePricing, mixPcm, mountApi, mountDashboard, mulawToPcm16, notifyDashboard, openaiTts, pcm16ToMulaw, resample16kTo8k, resample24kTo16k, resample8kTo16k, resamplePcm, scheduleCron, scheduleInterval, scheduleOnce, selectSoundFromList, setLogger, startTunnel, tool, whisper };
4359
+ interface TwilioAdapterOptions {
4360
+ /** Optional Twilio edge region (e.g. ``ie1`` for Ireland). */
4361
+ region?: string;
4362
+ }
4363
+ interface ProvisionNumberOptions$1 {
4364
+ /** ISO-3166-1 alpha-2 country code, e.g. ``"US"``. */
4365
+ countryCode: string;
4366
+ /** Optional North-American area code (e.g. ``"415"``). */
4367
+ areaCode?: string;
4368
+ }
4369
+ interface ProvisionNumberResult$1 {
4370
+ readonly phoneNumber: string;
4371
+ readonly sid: string;
4372
+ }
4373
+ interface ConfigureNumberOptions$1 {
4374
+ /** URL Twilio should hit when the number receives a call. */
4375
+ voiceUrl: string;
4376
+ /** Optional status callback URL for call lifecycle events. */
4377
+ statusCallback?: string;
4378
+ }
4379
+ interface InitiateCallOptions$1 {
4380
+ from: string;
4381
+ to: string;
4382
+ /**
4383
+ * TwiML or absolute URL Twilio should request when the call connects.
4384
+ * Mutually exclusive with ``streamUrl`` — provide exactly one.
4385
+ */
4386
+ url?: string;
4387
+ /**
4388
+ * Optional WebSocket stream URL. When provided (and ``url`` is not), the
4389
+ * adapter auto-builds a ``<Response><Connect><Stream>`` TwiML document
4390
+ * via :meth:`generateStreamTwiml` and sends it as the ``Twiml`` form
4391
+ * parameter. Mirrors the Python adapter's ``stream_url`` convenience path.
4392
+ */
4393
+ streamUrl?: string;
4394
+ statusCallback?: string;
4395
+ /** Value accepted by Twilio's ``MachineDetection`` parameter. */
4396
+ machineDetection?: 'Enable' | 'DetectMessageEnd' | 'false';
4397
+ /** Raw extra form parameters forwarded to the Calls endpoint. */
4398
+ extraParams?: Record<string, string>;
4399
+ }
4400
+ interface InitiateCallResult$1 {
4401
+ readonly callSid: string;
4402
+ }
4403
+ declare class TwilioAdapter {
4404
+ readonly accountSid: string;
4405
+ readonly region: string | undefined;
4406
+ private readonly baseUrl;
4407
+ private readonly authHeader;
4408
+ constructor(accountSid: string, authToken: string, opts?: TwilioAdapterOptions);
4409
+ private request;
4410
+ /**
4411
+ * Provision a local phone number in the given country.
4412
+ *
4413
+ * Lists available local numbers, then purchases the first match.
4414
+ */
4415
+ provisionNumber(opts: ProvisionNumberOptions$1): Promise<ProvisionNumberResult$1>;
4416
+ /** Update an already-purchased number to point at our voice webhook. */
4417
+ configureNumber(phoneNumberSid: string, opts: ConfigureNumberOptions$1): Promise<void>;
4418
+ /** Place an outbound call. Returns the Twilio call SID. */
4419
+ initiateCall(opts: InitiateCallOptions$1): Promise<InitiateCallResult$1>;
4420
+ /**
4421
+ * Build a minimal ``<Response><Connect><Stream url="..."/></Connect></Response>``
4422
+ * TwiML document. Mirrors the Python adapter's ``generate_stream_twiml``.
4423
+ */
4424
+ static generateStreamTwiml(streamUrl: string): string;
4425
+ /** Force-complete an in-progress call. */
4426
+ endCall(callSid: string): Promise<void>;
4427
+ }
4428
+
4429
+ interface ProvisionNumberOptions {
4430
+ /** ISO-3166-1 alpha-2 country code (e.g. ``"US"``). */
4431
+ countryCode: string;
4432
+ }
4433
+ interface ProvisionNumberResult {
4434
+ readonly phoneNumber: string;
4435
+ readonly orderId: string;
4436
+ }
4437
+ interface ConfigureNumberOptions {
4438
+ /** Telnyx Call Control Application / Connection ID. */
4439
+ connectionId: string;
4440
+ }
4441
+ interface InitiateCallOptions {
4442
+ from: string;
4443
+ to: string;
4444
+ /** Override ``connectionId`` at dial time. Falls back to the adapter default. */
4445
+ connectionId?: string;
4446
+ /** Opaque state string that Telnyx echoes back on webhooks. Base64-encoded on wire. */
4447
+ clientState?: string;
4448
+ }
4449
+ interface InitiateCallResult {
4450
+ readonly callControlId: string;
4451
+ }
4452
+ interface EndCallOptions {
4453
+ /** Idempotency key for the hangup command. */
4454
+ commandId?: string;
4455
+ }
4456
+ declare class TelnyxAdapter {
4457
+ private readonly apiKey;
4458
+ readonly connectionId: string | undefined;
4459
+ private readonly baseUrl;
4460
+ constructor(apiKey: string, connectionId?: string);
4461
+ private request;
4462
+ /**
4463
+ * Search available numbers for ``countryCode`` and place an order for the
4464
+ * first match. Returns both the reserved E.164 number and the order ID.
4465
+ */
4466
+ provisionNumber(opts: ProvisionNumberOptions): Promise<ProvisionNumberResult>;
4467
+ /** Attach a number to a Call Control Application. */
4468
+ configureNumber(phoneNumber: string, opts: ConfigureNumberOptions): Promise<void>;
4469
+ /**
4470
+ * Place an outbound call on the Call Control Application.
4471
+ *
4472
+ * Note: we intentionally do NOT pass ``stream_url`` here — audio streaming
4473
+ * is configured on the Application itself (or started explicitly via a
4474
+ * ``streaming_start`` command). Passing ``stream_url`` on dial is a
4475
+ * deprecated code path that Telnyx rejects in newer API versions.
4476
+ */
4477
+ initiateCall(opts: InitiateCallOptions): Promise<InitiateCallResult>;
4478
+ /** Hang up an in-progress call. */
4479
+ endCall(callControlId: string, opts?: EndCallOptions): Promise<void>;
4480
+ }
4481
+
4482
+ declare const SPAN_CALL = "getpatter.call";
4483
+ declare const SPAN_STT = "getpatter.stt";
4484
+ declare const SPAN_LLM = "getpatter.llm";
4485
+ declare const SPAN_TTS = "getpatter.tts";
4486
+ declare const SPAN_TOOL = "getpatter.tool";
4487
+ declare const SPAN_ENDPOINT = "getpatter.endpoint";
4488
+ declare const SPAN_BARGEIN = "getpatter.bargein";
4489
+ /**
4490
+ * Minimal span surface area — subset of the OTel ``Span`` API the Patter SDK
4491
+ * relies on. We keep this narrow so the no-op fallback stays trivial.
4492
+ */
4493
+ interface Span {
4494
+ setAttribute(key: string, value: unknown): void;
4495
+ recordException(exception: unknown): void;
4496
+ end(): void;
4497
+ }
4498
+ interface InitTracingOptions {
4499
+ serviceName?: string;
4500
+ otlpEndpoint?: string;
4501
+ resourceAttributes?: Record<string, string>;
4502
+ }
4503
+ /**
4504
+ * Initialize tracing. Returns ``true`` when OTel is wired, ``false`` otherwise
4505
+ * (which covers both "env flag off" and "peer dep missing").
4506
+ *
4507
+ * If the optional SDK packages (``@opentelemetry/sdk-trace-node``,
4508
+ * ``@opentelemetry/sdk-trace-base``, ``@opentelemetry/exporter-trace-otlp-http``)
4509
+ * are installed, a ``NodeTracerProvider`` with OTLP/HTTP exporter is wired up
4510
+ * automatically. Otherwise, spans produced via ``startSpan`` are still created
4511
+ * against whatever global provider ``@opentelemetry/api`` resolves to (which
4512
+ * may be a no-op if the host hasn't registered one).
4513
+ */
4514
+ declare function initTracing(options?: InitTracingOptions): boolean;
4515
+ /** True only if the env flag is set AND the tracer initialized cleanly. */
4516
+ declare function isTracingEnabled(): boolean;
4517
+ /**
4518
+ * Start a span. Callers must ``end()`` the returned span — use try/finally:
4519
+ *
4520
+ * ```ts
4521
+ * const span = startSpan(SPAN_LLM, { 'llm.model': 'gpt-4o' });
4522
+ * try { ... } finally { span.end(); }
4523
+ * ```
4524
+ *
4525
+ * Returns a no-op span when tracing is disabled or unavailable.
4526
+ */
4527
+ declare function startSpan(name: string, attrs?: Record<string, unknown>): Span;
4528
+
4529
+ /**
4530
+ * Observability entrypoint — re-exports the tracing API.
4531
+ *
4532
+ * See ``./tracing.ts`` for the implementation.
4533
+ */
4534
+
4535
+ /**
4536
+ * Call lifecycle event — TS mirror of ``getpatter.models.CallEvent``.
4537
+ *
4538
+ * Kept in the observability namespace because the primary consumers are
4539
+ * metrics/tracing sinks (e.g. dashboard ingestion).
4540
+ */
4541
+ interface CallEvent {
4542
+ readonly callId: string;
4543
+ readonly caller?: string;
4544
+ readonly callee?: string;
4545
+ readonly direction?: string;
4546
+ }
4547
+
4548
+ export { type AgentOptions, AllProvidersFailedError, type AnthropicConversion, LLM$3 as AnthropicLLM, type AnthropicLLMOptions, type AnthropicMessage, type AssemblyAIEncoding, type AssemblyAIModel, STT as AssemblyAISTT, type AssemblyAISTTOptions, type AudioConfig, type AudioSource, AuthenticationError, type BackgroundAudioOptions, BackgroundAudioPlayer, BuiltinAudioClip, type BuiltinAudioClipName, type BuiltinPcmSource, type CallControl, type CallEvent, type CallEventHandler, type CallMetrics, CallMetricsAccumulator, type CallRecord, type CartesiaEncoding, STT$2 as CartesiaSTT, type CartesiaSTTOptions, TTS$2 as CartesiaTTS, type CartesiaTTSOptions, LLM$1 as CerebrasLLM, type CerebrasLLMOptions, ChatContext, type ChatMessage, type ChatRole, CloudflareTunnel, type CostBreakdown, DEFAULT_MIN_SENTENCE_LEN, DEFAULT_PRICING, DTMF_EVENTS, STT$5 as DeepgramSTT, type DeepgramSTTOptions, DefaultToolExecutor, type DefaultToolExecutorOptions, type DefineToolInput, type DtmfEvent, ConvAI as ElevenLabsConvAI, ElevenLabsConvAIAdapter, type ConvAIOptions as ElevenLabsConvAIOptions, TTS$4 as ElevenLabsTTS, type ElevenLabsTTSOptions, EventBus, FallbackLLMProvider, type FallbackLLMProviderOptions, type FilePcmSource, GEMINI_DEFAULT_INPUT_SR, GEMINI_DEFAULT_OUTPUT_SR, GeminiLiveAdapter, type GeminiLiveEventHandler, LLM as GoogleLLM, type GoogleLLMOptions, LLM$2 as GroqLLM, type GroqLLMOptions, Guardrail$1 as Guardrail, type GuardrailOptions, type HookContext, IVRActivity, type IVRActivityOptions, type IVRToolDefinition, type IncomingMessage, type InitTracingOptions, type JobCallback, type LLMChunk, LLMLoop, type LLMProvider, type LMNTAudioFormat, type LMNTModel, type LMNTSampleRate, TTS as LMNTTTS, type LMNTTTSOptions, type LatencyBreakdown, type LocalCallOptions, type LocalConfig, type LocalOptions, type Logger, type LoopCallback, type MessageHandler, MetricsStore, Ngrok, LLM$4 as OpenAILLM, type OpenAILLMOptions, OpenAILLMProvider, type OpenAIMessage, Realtime as OpenAIRealtime, OpenAIRealtimeAdapter, type RealtimeOptions as OpenAIRealtimeOptions, TTS$3 as OpenAITTS, type OpenAITTSOptions, STT$3 as OpenAITranscribeSTT, type OpenAITranscribeSTTOptions, type ParamSpec, PartialStreamError, Patter, PatterConnectionError, PatterError, type PatterEventType, PatterTool, type PatterToolExecuteArgs, type PatterToolOptions, type PatterToolResult, PcmCarry, PipelineHookExecutor, type PipelineHooks, type PipelineMessageHandler, type ProviderPricing, ProvisionError, RateLimitError, type RawPcmSource, type RealtimeConfig, RemoteMessageHandler, TTS$1 as RimeTTS, type RimeTTSOptions, SPAN_BARGEIN, SPAN_CALL, SPAN_ENDPOINT, SPAN_LLM, SPAN_STT, SPAN_TOOL, SPAN_TTS, type SSEEvent, type STTConfig, type ScheduleHandle, SentenceChunker, type ServeOptions, type SilenceCallback, type SileroSampleRate, SileroVAD, type SileroVADOptions, STT$1 as SonioxSTT, type SonioxSTTOptions$1 as SonioxSTTOptions, type Span, StatefulResampler, type StatefulResamplerOptions, Static as StaticTunnel, type TTSConfig, Carrier as Telnyx, TelnyxAdapter, type TelnyxCarrierOptions, type ConfigureNumberOptions as TelnyxConfigureNumberOptions, type EndCallOptions as TelnyxEndCallOptions, type InitiateCallOptions as TelnyxInitiateCallOptions, type InitiateCallResult as TelnyxInitiateCallResult, type ProvisionNumberOptions as TelnyxProvisionNumberOptions, type ProvisionNumberResult as TelnyxProvisionNumberResult, TestSession, TfidfLoopDetector, type TfidfLoopDetectorOptions, Tool, type ToolDefinition, type ToolExecutor, type ToolHandler, type ToolOptions, type TunnelHandle, type TurnMetrics, Carrier$1 as Twilio, TwilioAdapter, type TwilioAdapterOptions, type TwilioCarrierOptions, type ConfigureNumberOptions$1 as TwilioConfigureNumberOptions, type InitiateCallOptions$1 as TwilioInitiateCallOptions, type InitiateCallResult$1 as TwilioInitiateCallResult, type ProvisionNumberOptions$1 as TwilioProvisionNumberOptions, type ProvisionNumberResult$1 as TwilioProvisionNumberResult, ULTRAVOX_DEFAULT_API_BASE, ULTRAVOX_DEFAULT_SR, type UltravoxEventHandler, UltravoxRealtimeAdapter, STT$4 as WhisperSTT, type WhisperSTTOptions, assemblyai, builtinClipPath, calculateRealtimeCost, calculateSttCost, calculateTelephonyCost, calculateTtsCost, callsToCsv, callsToJson, cartesia, createResampler16kTo8k, createResampler24kTo16k, createResampler8kTo16k, deepgram, defineTool, elevenlabs, filterEmoji, filterForTTS, filterMarkdown, formatDtmf, geminiLive, getLogger, guardrail, initTracing, isRemoteUrl, isTracingEnabled, isWebSocketUrl, lmnt, makeAuthMiddleware, mergePricing, mixPcm, mountApi, mountDashboard, mulawToPcm16, notifyDashboard, openaiTts, pcm16ToMulaw, resample16kTo8k, resample24kTo16k, resample8kTo16k, resamplePcm, rime, scheduleCron, scheduleInterval, scheduleOnce, selectSoundFromList, setLogger, soniox, speechmatics, startSpan, startTunnel, tool, ultravox, whisper };