getpatter 0.5.4 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +1 -1
- package/README.md +5 -2
- package/dist/aec-PJJMUM5E.mjs +228 -0
- package/dist/{banner-3GNZ6VQK.mjs → banner-UYW6UM3J.mjs} +4 -1
- package/dist/{carrier-config-33HQ2W4V.mjs → carrier-config-4ZKVYAWV.mjs} +5 -2
- package/dist/{chunk-AFUYSNDH.mjs → chunk-6GR5MHHQ.mjs} +9 -0
- package/dist/chunk-CYLJVT5G.mjs +7031 -0
- package/dist/{chunk-FIFIWBL7.mjs → chunk-JUQ5WQTQ.mjs} +2157 -883
- package/dist/{chunk-VJVDG4V5.mjs → chunk-MVOQFAEO.mjs} +5 -0
- package/dist/chunk-N565J3CF.mjs +69 -0
- package/dist/chunk-X3364LSI.mjs +363 -0
- package/dist/{chunk-SEMKNPCD.mjs → chunk-XS45BAQL.mjs} +5 -1
- package/dist/cli.js +32 -621
- package/dist/client-2GJVZT42.mjs +8935 -0
- package/dist/dashboard/ui.html +63 -0
- package/dist/{dist-YRCCJQ26.mjs → dist-RYMPCILF.mjs} +28 -2
- package/dist/index.d.mts +2199 -240
- package/dist/index.d.ts +2199 -240
- package/dist/index.js +28942 -7073
- package/dist/index.mjs +2337 -447
- package/dist/{node-cron-6PRPSBG5.mjs → node-cron-JFWQQRBU.mjs} +23 -2
- package/dist/persistence-LVIAHESK.mjs +7 -0
- package/dist/silero-vad-YLCXT5GQ.mjs +7 -0
- package/dist/streamableHttp-WKNGHDVO.mjs +1496 -0
- package/dist/test-mode-Y7YG5LFZ.mjs +8 -0
- package/dist/tunnel-43CHWPVQ.mjs +8 -0
- package/package.json +7 -7
- package/src/dashboard/ui.html +63 -0
- package/dist/chunk-QHHBUCMT.mjs +0 -25
- package/dist/persistence-LQBYQPQQ.mjs +0 -7
- package/dist/test-mode-MVJ3SKG4.mjs +0 -8
- package/dist/tunnel-UVR3PPAU.mjs +0 -8
package/dist/index.d.ts
CHANGED
|
@@ -2,6 +2,7 @@ import { EventEmitter } from 'events';
|
|
|
2
2
|
import { Request, Response, NextFunction, Express } from 'express';
|
|
3
3
|
|
|
4
4
|
/** Twilio carrier credentials holder for Patter. */
|
|
5
|
+
/** Constructor options for the Twilio {@link Carrier}. */
|
|
5
6
|
interface TwilioCarrierOptions {
|
|
6
7
|
/** Twilio Account SID. Falls back to TWILIO_ACCOUNT_SID env var. */
|
|
7
8
|
accountSid?: string;
|
|
@@ -13,7 +14,7 @@ interface TwilioCarrierOptions {
|
|
|
13
14
|
*
|
|
14
15
|
* @example
|
|
15
16
|
* ```ts
|
|
16
|
-
* import * as twilio from "getpatter/
|
|
17
|
+
* import * as twilio from "getpatter/telephony/twilio";
|
|
17
18
|
* const carrier = new twilio.Carrier(); // reads env
|
|
18
19
|
* const carrier = new twilio.Carrier({ accountSid: "AC...", authToken: "..." });
|
|
19
20
|
* ```
|
|
@@ -26,6 +27,7 @@ declare class Carrier$1 {
|
|
|
26
27
|
}
|
|
27
28
|
|
|
28
29
|
/** Telnyx carrier credentials holder for Patter. */
|
|
30
|
+
/** Constructor options for the Telnyx {@link Carrier}. */
|
|
29
31
|
interface TelnyxCarrierOptions {
|
|
30
32
|
/** Telnyx API key. Falls back to TELNYX_API_KEY env var. */
|
|
31
33
|
apiKey?: string;
|
|
@@ -39,7 +41,7 @@ interface TelnyxCarrierOptions {
|
|
|
39
41
|
*
|
|
40
42
|
* @example
|
|
41
43
|
* ```ts
|
|
42
|
-
* import * as telnyx from "getpatter/
|
|
44
|
+
* import * as telnyx from "getpatter/telephony/telnyx";
|
|
43
45
|
* const carrier = new telnyx.Carrier(); // reads env
|
|
44
46
|
* const carrier = new telnyx.Carrier({ apiKey: "KEY...", connectionId: "123" });
|
|
45
47
|
* ```
|
|
@@ -53,6 +55,7 @@ declare class Carrier {
|
|
|
53
55
|
}
|
|
54
56
|
|
|
55
57
|
/** OpenAI Realtime engine — marker class for Patter client dispatch. */
|
|
58
|
+
/** Constructor options for the OpenAI `Realtime` engine marker. */
|
|
56
59
|
interface RealtimeOptions {
|
|
57
60
|
/** API key. Falls back to OPENAI_API_KEY env var when omitted. */
|
|
58
61
|
apiKey?: string;
|
|
@@ -60,6 +63,21 @@ interface RealtimeOptions {
|
|
|
60
63
|
model?: string;
|
|
61
64
|
/** Voice preset. Defaults to alloy. */
|
|
62
65
|
voice?: string;
|
|
66
|
+
/**
|
|
67
|
+
* Reasoning-effort tier for `gpt-realtime-2`. When omitted the
|
|
68
|
+
* `session.reasoning` field is not sent and the server default applies.
|
|
69
|
+
* OpenAI recommends `"low"` for production voice flows — higher tiers add
|
|
70
|
+
* measurable per-turn latency. Has no effect on models that ignore the
|
|
71
|
+
* field.
|
|
72
|
+
*/
|
|
73
|
+
reasoningEffort?: 'minimal' | 'low' | 'medium' | 'high';
|
|
74
|
+
/**
|
|
75
|
+
* Override for the Realtime session's `input_audio_transcription.model`.
|
|
76
|
+
* Omit to keep the adapter default (`whisper-1`). Use
|
|
77
|
+
* `"gpt-realtime-whisper"` for low-latency transcript partials,
|
|
78
|
+
* `"gpt-4o-transcribe"` for higher accuracy.
|
|
79
|
+
*/
|
|
80
|
+
inputAudioTranscriptionModel?: string;
|
|
63
81
|
}
|
|
64
82
|
/**
|
|
65
83
|
* OpenAI Realtime engine marker.
|
|
@@ -69,6 +87,11 @@ interface RealtimeOptions {
|
|
|
69
87
|
* import * as openai from "getpatter/engines/openai";
|
|
70
88
|
* const engine = new openai.Realtime(); // reads OPENAI_API_KEY
|
|
71
89
|
* const engine = new openai.Realtime({ voice: "alloy" });
|
|
90
|
+
* const engine = new openai.Realtime({
|
|
91
|
+
* model: "gpt-realtime-2",
|
|
92
|
+
* reasoningEffort: "low", // gpt-realtime-2 only
|
|
93
|
+
* inputAudioTranscriptionModel: "gpt-realtime-whisper",
|
|
94
|
+
* });
|
|
72
95
|
* ```
|
|
73
96
|
*/
|
|
74
97
|
declare class Realtime {
|
|
@@ -76,10 +99,13 @@ declare class Realtime {
|
|
|
76
99
|
readonly apiKey: string;
|
|
77
100
|
readonly model: string;
|
|
78
101
|
readonly voice: string;
|
|
102
|
+
readonly reasoningEffort?: 'minimal' | 'low' | 'medium' | 'high';
|
|
103
|
+
readonly inputAudioTranscriptionModel?: string;
|
|
79
104
|
constructor(opts?: RealtimeOptions);
|
|
80
105
|
}
|
|
81
106
|
|
|
82
107
|
/** ElevenLabs ConvAI engine — marker class for Patter client dispatch. */
|
|
108
|
+
/** Constructor options for the ElevenLabs `ConvAI` engine marker. */
|
|
83
109
|
interface ConvAIOptions {
|
|
84
110
|
/** API key. Falls back to ELEVENLABS_API_KEY env var when omitted. */
|
|
85
111
|
apiKey?: string;
|
|
@@ -175,6 +201,7 @@ declare class Ngrok {
|
|
|
175
201
|
* consumed either form keeps working.
|
|
176
202
|
*/
|
|
177
203
|
|
|
204
|
+
/** Options accepted by `new Guardrail(...)` / `guardrail(...)`. */
|
|
178
205
|
interface GuardrailOptions {
|
|
179
206
|
/** Name for logging when triggered. */
|
|
180
207
|
name: string;
|
|
@@ -204,7 +231,9 @@ declare class Guardrail$1 {
|
|
|
204
231
|
}
|
|
205
232
|
/** Factory helper mirroring Python's `guardrail(...)` function. */
|
|
206
233
|
declare function guardrail(opts: GuardrailOptions): Guardrail$1;
|
|
234
|
+
/** Async handler invoked in-process when the LLM calls a `Tool`. */
|
|
207
235
|
type ToolHandler = (args: Record<string, unknown>, context: Record<string, unknown>) => Promise<string>;
|
|
236
|
+
/** Options accepted by `new Tool(...)` / `tool(...)`. */
|
|
208
237
|
interface ToolOptions {
|
|
209
238
|
/** Tool name (visible to the LLM). */
|
|
210
239
|
name: string;
|
|
@@ -283,6 +312,7 @@ interface STTTranscript {
|
|
|
283
312
|
/** Which provider event this transcript represents (e.g. ``Results``). */
|
|
284
313
|
eventType?: string;
|
|
285
314
|
}
|
|
315
|
+
/** Callback invoked by an `STTAdapter` for each (partial or final) transcript event. */
|
|
286
316
|
type STTTranscriptCallback = (t: STTTranscript) => Promise<void> | void;
|
|
287
317
|
/** Shape shared by every STT adapter in the SDK. */
|
|
288
318
|
interface STTAdapter {
|
|
@@ -290,7 +320,20 @@ interface STTAdapter {
|
|
|
290
320
|
sendAudio(pcm: Buffer): void | Promise<void>;
|
|
291
321
|
onTranscript(cb: STTTranscriptCallback): void;
|
|
292
322
|
close(): void | Promise<void>;
|
|
323
|
+
/**
|
|
324
|
+
* Optional: ask the provider to immediately finalise the in-flight
|
|
325
|
+
* utterance (rather than waiting for its own endpoint timer). Called by
|
|
326
|
+
* ``StreamHandler`` whenever the SDK's VAD signals ``speech_end``, and
|
|
327
|
+
* after a barge-in cancel — both moments where waiting for the
|
|
328
|
+
* provider's endpoint heuristic stalls the next turn.
|
|
329
|
+
*
|
|
330
|
+
* Implementations that do not support utterance-level finalisation
|
|
331
|
+
* (e.g. one-shot transcribers like Whisper) should omit this method
|
|
332
|
+
* entirely; the stream handler does an optional-chained call.
|
|
333
|
+
*/
|
|
334
|
+
finalize?(): void | Promise<void>;
|
|
293
335
|
}
|
|
336
|
+
/** Shape shared by every TTS adapter in the SDK. */
|
|
294
337
|
interface TTSAdapter {
|
|
295
338
|
synthesizeStream(text: string): AsyncIterable<Buffer>;
|
|
296
339
|
}
|
|
@@ -303,8 +346,10 @@ interface TTSAdapter {
|
|
|
303
346
|
* passes through unchanged.
|
|
304
347
|
*/
|
|
305
348
|
|
|
349
|
+
/** Runs user-defined pipeline hooks (`beforeSendToStt`, `afterTranscribe`, …) with fail-open semantics. */
|
|
306
350
|
declare class PipelineHookExecutor {
|
|
307
351
|
private readonly hooks;
|
|
352
|
+
private readonly afterLlm;
|
|
308
353
|
constructor(hooks: PipelineHooks | undefined);
|
|
309
354
|
/**
|
|
310
355
|
* Run beforeSendToStt hook. Returns null to drop the audio chunk.
|
|
@@ -325,14 +370,47 @@ declare class PipelineHookExecutor {
|
|
|
325
370
|
*/
|
|
326
371
|
runBeforeLlm(messages: Array<Record<string, unknown>>, ctx: HookContext): Promise<Array<Record<string, unknown>>>;
|
|
327
372
|
/**
|
|
328
|
-
*
|
|
329
|
-
*
|
|
330
|
-
*
|
|
373
|
+
* Tier 1 — per-token sync transform. Returns the (possibly transformed)
|
|
374
|
+
* chunk. Fail-open: on exception or non-string return, the original chunk
|
|
375
|
+
* passes through unchanged. Must be cheap (~0 ms budget).
|
|
376
|
+
*/
|
|
377
|
+
runAfterLlmChunk(chunk: string): string;
|
|
378
|
+
/**
|
|
379
|
+
* Tier 2 — per-sentence rewrite. Returns rewritten sentence text, the
|
|
380
|
+
* original sentence (if hook returned `null`), or `null` to drop the
|
|
381
|
+
* sentence entirely (empty string is treated as drop). Fail-open.
|
|
382
|
+
*/
|
|
383
|
+
runAfterLlmSentence(sentence: string, ctx: HookContext): Promise<string | null>;
|
|
384
|
+
/**
|
|
385
|
+
* Tier 3 — per-response rewrite. Returns the (possibly rewritten) full
|
|
386
|
+
* response text. Triggered after the LLM stream completes. Caller is
|
|
387
|
+
* responsible for buffering tokens before invocation. Fail-open.
|
|
388
|
+
*/
|
|
389
|
+
runAfterLlmResponse(text: string, ctx: HookContext): Promise<string>;
|
|
390
|
+
/**
|
|
391
|
+
* Backward-compatible alias for `runAfterLlmResponse`. Existing call sites
|
|
392
|
+
* in the LLM loop continue to work unchanged.
|
|
393
|
+
*
|
|
394
|
+
* @deprecated Use `runAfterLlmResponse` directly.
|
|
331
395
|
*/
|
|
332
396
|
runAfterLlm(text: string, ctx: HookContext): Promise<string>;
|
|
333
397
|
/**
|
|
334
|
-
* Whether
|
|
335
|
-
* whether to buffer streaming tokens
|
|
398
|
+
* Whether a per-response (tier 3) `onResponse` transform is configured.
|
|
399
|
+
* The LLM loop uses this to decide whether to buffer streaming tokens
|
|
400
|
+
* before yielding them. Per-token (tier 1) and per-sentence (tier 2)
|
|
401
|
+
* transforms do NOT require buffering.
|
|
402
|
+
*/
|
|
403
|
+
hasAfterLlmResponse(): boolean;
|
|
404
|
+
/** Whether a per-sentence (tier 2) transform is configured. */
|
|
405
|
+
hasAfterLlmSentence(): boolean;
|
|
406
|
+
/** Whether a per-token (tier 1) transform is configured. */
|
|
407
|
+
hasAfterLlmChunk(): boolean;
|
|
408
|
+
/**
|
|
409
|
+
* Backward-compatible alias for `hasAfterLlmResponse`. The legacy callable
|
|
410
|
+
* form maps to `onResponse`, so this preserves the original semantic for
|
|
411
|
+
* existing call sites.
|
|
412
|
+
*
|
|
413
|
+
* @deprecated Use `hasAfterLlmResponse` directly.
|
|
336
414
|
*/
|
|
337
415
|
hasAfterLlm(): boolean;
|
|
338
416
|
/**
|
|
@@ -350,13 +428,15 @@ declare class PipelineHookExecutor {
|
|
|
350
428
|
/**
|
|
351
429
|
* Lightweight in-process event bus for Patter call lifecycle events.
|
|
352
430
|
*
|
|
353
|
-
* Mirrors the Python ``PatterEventBus`` (
|
|
431
|
+
* Mirrors the Python ``PatterEventBus`` (libraries/python/getpatter/observability/event_bus.py).
|
|
354
432
|
* Consumers subscribe with ``on()`` and receive typed payloads. ``emit()`` is
|
|
355
433
|
* synchronous but handles async listeners: rejections are surfaced via the
|
|
356
434
|
* Patter logger rather than being swallowed or crashing the call.
|
|
357
435
|
*/
|
|
436
|
+
/** String tag identifying every event type the `EventBus` knows how to dispatch. */
|
|
358
437
|
type PatterEventType = 'turn_started' | 'turn_ended' | 'eou_metrics' | 'interruption' | 'llm_metrics' | 'tts_metrics' | 'stt_metrics' | 'metrics_collected' | 'call_ended' | 'transcript_partial' | 'transcript_final' | 'llm_chunk' | 'tts_chunk' | 'tool_call_started';
|
|
359
438
|
type Listener<T = unknown> = (payload: T) => void | Promise<void>;
|
|
439
|
+
/** In-process pub/sub for Patter call-lifecycle events. */
|
|
360
440
|
declare class EventBus {
|
|
361
441
|
private readonly listeners;
|
|
362
442
|
/**
|
|
@@ -370,6 +450,65 @@ declare class EventBus {
|
|
|
370
450
|
emit<T = unknown>(event: PatterEventType, payload: T): void;
|
|
371
451
|
}
|
|
372
452
|
|
|
453
|
+
/**
|
|
454
|
+
* Per-tool circuit breaker for the Patter SDK.
|
|
455
|
+
*
|
|
456
|
+
* Trips OPEN after N consecutive failures, rejects calls for a cooldown
|
|
457
|
+
* window so a flaky downstream (DB outage, vendor API rate-limit, dead
|
|
458
|
+
* webhook) doesn't burn LLM tokens on retries that will keep failing.
|
|
459
|
+
* After the cooldown elapses the next call probes (HALF_OPEN); a success
|
|
460
|
+
* resets to CLOSED, a failure reopens. The model receives a structured
|
|
461
|
+
* ``{ error, fallback: true }`` JSON in all rejection paths so it can
|
|
462
|
+
* recover gracefully instead of waiting forever.
|
|
463
|
+
*
|
|
464
|
+
* Lightweight in-memory implementation — one ``CircuitBreakerRegistry``
|
|
465
|
+
* per ``DefaultToolExecutor``, state is per tool name. Not persisted
|
|
466
|
+
* across process restarts (intentional — voice calls are too short for
|
|
467
|
+
* persistence to matter).
|
|
468
|
+
*/
|
|
469
|
+
/** Lifecycle states for the breaker. */
|
|
470
|
+
declare const CircuitBreakerState: {
|
|
471
|
+
readonly CLOSED: "closed";
|
|
472
|
+
readonly OPEN: "open";
|
|
473
|
+
readonly HALF_OPEN: "half_open";
|
|
474
|
+
};
|
|
475
|
+
type CircuitBreakerState = (typeof CircuitBreakerState)[keyof typeof CircuitBreakerState];
|
|
476
|
+
/** Tunables for a single per-tool breaker. */
|
|
477
|
+
interface CircuitBreakerOptions {
|
|
478
|
+
/** Consecutive failures that flip CLOSED → OPEN. ``0`` disables. */
|
|
479
|
+
failureThreshold?: number;
|
|
480
|
+
/** Time (ms) the breaker stays OPEN before allowing a probe. */
|
|
481
|
+
cooldownMs?: number;
|
|
482
|
+
}
|
|
483
|
+
interface PerToolState {
|
|
484
|
+
state: CircuitBreakerState;
|
|
485
|
+
consecutiveFailures: number;
|
|
486
|
+
openedAt: number;
|
|
487
|
+
}
|
|
488
|
+
/** Per-name registry tracking circuit state for a fleet of tools. */
|
|
489
|
+
declare class CircuitBreakerRegistry {
|
|
490
|
+
private readonly threshold;
|
|
491
|
+
private readonly cooldownMs;
|
|
492
|
+
private readonly state;
|
|
493
|
+
/** Inject for deterministic tests; defaults to ``Date.now()``. */
|
|
494
|
+
private readonly clock;
|
|
495
|
+
constructor(opts?: CircuitBreakerOptions, clock?: () => number);
|
|
496
|
+
/** Returns ``true`` when this tool is currently allowed to run. */
|
|
497
|
+
allow(toolName: string): boolean;
|
|
498
|
+
/** Mark a successful execution. Resets the breaker to CLOSED. */
|
|
499
|
+
recordSuccess(toolName: string): void;
|
|
500
|
+
/** Mark a failed execution; trips OPEN once threshold is reached. */
|
|
501
|
+
recordFailure(toolName: string): void;
|
|
502
|
+
/**
|
|
503
|
+
* Time until the breaker transitions OPEN → HALF_OPEN, in ms. Returns
|
|
504
|
+
* ``0`` when the breaker is currently allowing calls. Useful for
|
|
505
|
+
* tests and the structured rejection JSON.
|
|
506
|
+
*/
|
|
507
|
+
timeUntilHalfOpen(toolName: string): number;
|
|
508
|
+
/** Snapshot for debugging / metrics. */
|
|
509
|
+
snapshot(toolName: string): PerToolState | null;
|
|
510
|
+
}
|
|
511
|
+
|
|
373
512
|
/**
|
|
374
513
|
* Built-in LLM loop for pipeline mode when no onMessage handler is provided.
|
|
375
514
|
*
|
|
@@ -387,7 +526,7 @@ interface LlmUsageRecorder {
|
|
|
387
526
|
}
|
|
388
527
|
/**
|
|
389
528
|
* Pluggable tool executor — mirrors the Python ``ToolExecutor`` in
|
|
390
|
-
* ``
|
|
529
|
+
* ``libraries/python/getpatter/services/tool_executor.py``.
|
|
391
530
|
*
|
|
392
531
|
* Implementors receive a fully-resolved ``ToolDefinition`` (handler +/ webhook
|
|
393
532
|
* URL already validated by the SDK) and MUST return a JSON-stringifiable
|
|
@@ -395,28 +534,50 @@ interface LlmUsageRecorder {
|
|
|
395
534
|
* ``{ error: "...", fallback: true }`` rather than thrown.
|
|
396
535
|
*/
|
|
397
536
|
interface ToolExecutor {
|
|
398
|
-
execute(toolDef: ToolDefinition, args: Record<string, unknown>, callContext: Record<string, unknown>): Promise<string>;
|
|
537
|
+
execute(toolDef: ToolDefinition, args: Record<string, unknown>, callContext: Record<string, unknown>, onProgress?: (text: string) => void | Promise<void>): Promise<string>;
|
|
399
538
|
}
|
|
539
|
+
/** Constructor options for `DefaultToolExecutor`. */
|
|
400
540
|
interface DefaultToolExecutorOptions {
|
|
401
541
|
/** Total attempts = maxRetries + 1. Default: 2 (i.e. 3 attempts). */
|
|
402
542
|
maxRetries?: number;
|
|
403
|
-
/** Delay between attempts, in ms. */
|
|
543
|
+
/** Delay between attempts, in ms. Each retry waits this × ``2^attempt``. */
|
|
404
544
|
retryDelayMs?: number;
|
|
405
545
|
/** Per-request timeout for webhook calls, in ms. */
|
|
406
546
|
requestTimeoutMs?: number;
|
|
547
|
+
/**
|
|
548
|
+
* Circuit-breaker tunables. Default trips OPEN after 5 consecutive
|
|
549
|
+
* failures and stays OPEN for 30 s. Pass ``{ failureThreshold: 0 }`` to
|
|
550
|
+
* disable entirely (legacy behaviour).
|
|
551
|
+
*/
|
|
552
|
+
circuitBreaker?: CircuitBreakerOptions;
|
|
407
553
|
}
|
|
408
554
|
/**
|
|
409
|
-
* Default executor — webhook with retry/
|
|
555
|
+
* Default executor — webhook + handler with retry/exponential-backoff
|
|
556
|
+
* and a per-tool circuit breaker.
|
|
410
557
|
*
|
|
411
|
-
*
|
|
412
|
-
*
|
|
558
|
+
* Failure modes return a structured ``{ error, fallback: true }`` JSON
|
|
559
|
+
* so the model can recover gracefully (e.g. respond "I couldn't reach
|
|
560
|
+
* the booking system, can I take your number to call you back?")
|
|
561
|
+
* instead of hanging on an exception that never surfaces.
|
|
413
562
|
*/
|
|
414
563
|
declare class DefaultToolExecutor implements ToolExecutor {
|
|
415
564
|
private readonly maxRetries;
|
|
416
565
|
private readonly retryDelayMs;
|
|
417
566
|
private readonly requestTimeoutMs;
|
|
567
|
+
private readonly breaker;
|
|
418
568
|
constructor(opts?: DefaultToolExecutorOptions);
|
|
419
|
-
|
|
569
|
+
/** Expose the breaker for tests + dashboard observability. */
|
|
570
|
+
get circuitBreaker(): CircuitBreakerRegistry;
|
|
571
|
+
execute(toolDef: ToolDefinition, args: Record<string, unknown>, callContext: Record<string, unknown>,
|
|
572
|
+
/**
|
|
573
|
+
* Optional progress sink — invoked with each ``{ progress: string }``
|
|
574
|
+
* value yielded by an async-generator handler. Wired by the stream
|
|
575
|
+
* handler to ``OpenAIRealtimeAdapter.sendText`` so the agent speaks
|
|
576
|
+
* the progress message inline. ``null``/``undefined`` discards
|
|
577
|
+
* progress (function handlers always discard since they have no
|
|
578
|
+
* progress channel).
|
|
579
|
+
*/
|
|
580
|
+
onProgress?: (text: string) => void | Promise<void>): Promise<string>;
|
|
420
581
|
}
|
|
421
582
|
/** A single streaming chunk yielded by an LLM provider. */
|
|
422
583
|
interface LLMChunk {
|
|
@@ -440,8 +601,21 @@ interface LLMChunk {
|
|
|
440
601
|
* invocation. Chunks with the same ``index`` are concatenated.
|
|
441
602
|
* - ``{ type: "done" }`` — signals the end of the stream (optional).
|
|
442
603
|
*/
|
|
604
|
+
/**
|
|
605
|
+
* Optional knobs passed by the LLM loop into ``provider.stream``. Today the
|
|
606
|
+
* only field is ``signal``: a per-turn AbortSignal that the stream handler
|
|
607
|
+
* trips on barge-in so the underlying ``fetch`` / SDK call is cancelled
|
|
608
|
+
* IMMEDIATELY instead of waiting for the next token. Without this, a
|
|
609
|
+
* barge-in fired while the upstream LLM is still composing its first
|
|
610
|
+
* sentence leaves the fetch open until the provider's own timeout (often
|
|
611
|
+
* 30 s) elapses, blocking the next user transcript and producing the
|
|
612
|
+
* "agent stays silent after interruption" symptom.
|
|
613
|
+
*/
|
|
614
|
+
interface LLMStreamOptions {
|
|
615
|
+
signal?: AbortSignal;
|
|
616
|
+
}
|
|
443
617
|
interface LLMProvider {
|
|
444
|
-
stream(messages: Array<Record<string, unknown>>, tools?: Array<Record<string, unknown>> | null): AsyncGenerator<LLMChunk, void, unknown>;
|
|
618
|
+
stream(messages: Array<Record<string, unknown>>, tools?: Array<Record<string, unknown>> | null, opts?: LLMStreamOptions): AsyncGenerator<LLMChunk, void, unknown>;
|
|
445
619
|
}
|
|
446
620
|
/** Optional sampling kwargs forwarded into the OpenAI Chat Completions body. */
|
|
447
621
|
interface OpenAILLMSamplingOptions {
|
|
@@ -481,8 +655,10 @@ declare class OpenAILLMProvider implements LLMProvider {
|
|
|
481
655
|
private readonly presencePenalty?;
|
|
482
656
|
private readonly stop?;
|
|
483
657
|
constructor(apiKey: string, model: string, sampling?: OpenAILLMSamplingOptions);
|
|
484
|
-
|
|
658
|
+
/** Stream OpenAI Chat Completions chunks for the given messages/tools. */
|
|
659
|
+
stream(messages: Array<Record<string, unknown>>, tools?: Array<Record<string, unknown>> | null, opts?: LLMStreamOptions): AsyncGenerator<LLMChunk, void, unknown>;
|
|
485
660
|
}
|
|
661
|
+
/** Pipeline-mode LLM driver: runs the chat loop, dispatches tool calls, and emits text deltas. */
|
|
486
662
|
declare class LLMLoop {
|
|
487
663
|
private readonly provider;
|
|
488
664
|
private readonly systemPrompt;
|
|
@@ -493,7 +669,8 @@ declare class LLMLoop {
|
|
|
493
669
|
private eventBus?;
|
|
494
670
|
private readonly _providerName;
|
|
495
671
|
private readonly _modelName;
|
|
496
|
-
|
|
672
|
+
private onToolCall?;
|
|
673
|
+
constructor(apiKey: string, model: string, systemPrompt: string, tools?: ToolDefinition[] | null, llmProvider?: LLMProvider, disablePhonePreamble?: boolean);
|
|
497
674
|
/**
|
|
498
675
|
* Swap in a custom tool executor (e.g. different retry policy, metrics
|
|
499
676
|
* wrapping, tenant-aware fan-out). The default is ``DefaultToolExecutor``.
|
|
@@ -505,6 +682,14 @@ declare class LLMLoop {
|
|
|
505
682
|
* appears. Set to ``undefined`` to disable.
|
|
506
683
|
*/
|
|
507
684
|
setEventBus(bus: EventBus | undefined): void;
|
|
685
|
+
/**
|
|
686
|
+
* Set or replace the post-tool-execution observer. The callback is
|
|
687
|
+
* awaited after every successful tool execution with
|
|
688
|
+
* `(name, args, result)`. Pass `undefined` to disable. Mirrors the
|
|
689
|
+
* Python `LLMLoop.set_on_tool_call` setter so callers (e.g. the
|
|
690
|
+
* pipeline `StreamHandler`) can wire the loop after construction.
|
|
691
|
+
*/
|
|
692
|
+
setOnToolCall(callback: ((name: string, args: Record<string, unknown>, result: string) => Promise<void>) | undefined): void;
|
|
508
693
|
/**
|
|
509
694
|
* Stream LLM response tokens, handling tool calls automatically.
|
|
510
695
|
* Yields text tokens as they arrive from the LLM.
|
|
@@ -516,16 +701,23 @@ declare class LLMLoop {
|
|
|
516
701
|
run(userText: string, history: Array<{
|
|
517
702
|
role: string;
|
|
518
703
|
text: string;
|
|
519
|
-
}>, callContext: Record<string, unknown>, metrics?: LlmUsageRecorder, hookExecutor?: PipelineHookExecutor, hookCtx?: HookContext): AsyncGenerator<string, void, unknown>;
|
|
704
|
+
}>, callContext: Record<string, unknown>, metrics?: LlmUsageRecorder, hookExecutor?: PipelineHookExecutor, hookCtx?: HookContext, opts?: LLMStreamOptions): AsyncGenerator<string, void, unknown>;
|
|
520
705
|
private executeTool;
|
|
521
706
|
private buildMessages;
|
|
522
707
|
}
|
|
523
708
|
|
|
709
|
+
/**
|
|
710
|
+
* Public type definitions for the Patter SDK — agent options, pipeline hooks,
|
|
711
|
+
* provider config envelopes, and serve/call request/response shapes.
|
|
712
|
+
*/
|
|
713
|
+
|
|
714
|
+
/** Inbound message handed to a `MessageHandler` per turn (legacy single-turn API). */
|
|
524
715
|
interface IncomingMessage {
|
|
525
716
|
readonly text: string;
|
|
526
717
|
readonly callId: string;
|
|
527
718
|
readonly caller: string;
|
|
528
719
|
}
|
|
720
|
+
/** STT provider configuration envelope (provider name + key + language + provider-specific options). */
|
|
529
721
|
interface STTConfig {
|
|
530
722
|
readonly provider: string;
|
|
531
723
|
readonly apiKey: string;
|
|
@@ -539,6 +731,7 @@ interface STTConfig {
|
|
|
539
731
|
/** Provider-specific knobs (e.g. Deepgram endpointing). */
|
|
540
732
|
options?: Record<string, unknown>;
|
|
541
733
|
}
|
|
734
|
+
/** TTS provider configuration envelope (provider name + key + voice + provider-specific options). */
|
|
542
735
|
interface TTSConfig {
|
|
543
736
|
readonly provider: string;
|
|
544
737
|
readonly apiKey: string;
|
|
@@ -550,17 +743,94 @@ interface TTSConfig {
|
|
|
550
743
|
toDict(): Record<string, string | Record<string, unknown>>;
|
|
551
744
|
options?: Record<string, unknown>;
|
|
552
745
|
}
|
|
746
|
+
/** Single-turn message handler — receives the user's transcript, returns the agent's reply. */
|
|
553
747
|
type MessageHandler = (msg: IncomingMessage) => Promise<string>;
|
|
748
|
+
/** Generic call-lifecycle callback (start/end/transcript/metrics). */
|
|
554
749
|
type CallEventHandler = (data: Record<string, unknown>) => Promise<void>;
|
|
750
|
+
/**
|
|
751
|
+
* Public MCP server configuration. ``string`` is shorthand for
|
|
752
|
+
* ``{ url: <string>, transport: 'streamable-http' }``. Re-exported from
|
|
753
|
+
* ``tools/mcp-client`` to keep a single source of truth.
|
|
754
|
+
*/
|
|
755
|
+
type MCPServerConfig = string | {
|
|
756
|
+
readonly url: string;
|
|
757
|
+
readonly transport?: 'streamable-http';
|
|
758
|
+
/** Headers attached to every transport request — typically auth. */
|
|
759
|
+
readonly headers?: Record<string, string>;
|
|
760
|
+
/** Optional logical name for telemetry / log lines. */
|
|
761
|
+
readonly name?: string;
|
|
762
|
+
};
|
|
763
|
+
/** Internal shape of a tool definition (matches `Tool` from `public-api.ts`). */
|
|
555
764
|
interface ToolDefinition {
|
|
556
765
|
name: string;
|
|
557
766
|
description: string;
|
|
558
767
|
parameters: Record<string, unknown>;
|
|
559
768
|
/** Webhook URL — called when the LLM invokes this tool. Mutually exclusive with handler. */
|
|
560
769
|
webhookUrl?: string;
|
|
561
|
-
/**
|
|
562
|
-
|
|
770
|
+
/**
|
|
771
|
+
* Local handler — called instead of ``webhookUrl`` when present.
|
|
772
|
+
*
|
|
773
|
+
* Two forms:
|
|
774
|
+
*
|
|
775
|
+
* - **Async function**: returns the final result as a JSON string.
|
|
776
|
+
* The model receives only the final return value.
|
|
777
|
+
*
|
|
778
|
+
* - **Async generator**: yields zero or more progress updates before
|
|
779
|
+
* returning. Each ``yield`` of ``{ progress: string }`` is spoken
|
|
780
|
+
* inline by the agent (Realtime: via ``adapter.sendText``) so the
|
|
781
|
+
* caller hears live status during long-running tools. The final
|
|
782
|
+
* ``return`` value (or last ``yield`` if no return) is the
|
|
783
|
+
* function-call result sent to the model. Pipeline mode currently
|
|
784
|
+
* ignores the progress yields — the final value is still used as
|
|
785
|
+
* the tool result.
|
|
786
|
+
*/
|
|
787
|
+
handler?: ((args: Record<string, unknown>, context: Record<string, unknown>) => Promise<string>) | ((args: Record<string, unknown>, context: Record<string, unknown>) => AsyncGenerator<{
|
|
788
|
+
progress?: string;
|
|
789
|
+
result?: string;
|
|
790
|
+
}, string | void, unknown>);
|
|
791
|
+
/**
|
|
792
|
+
* "Reassurance" filler the agent speaks while a slow tool call runs.
|
|
793
|
+
* Bridges the silence when a handler or webhook takes longer than
|
|
794
|
+
* humans naturally tolerate (~1.5 s) without sounding dead.
|
|
795
|
+
*
|
|
796
|
+
* Two forms:
|
|
797
|
+
* - string: shorthand for ``{ message: <string>, afterMs: 1500 }``.
|
|
798
|
+
* - object: explicit ``{ message, afterMs? }``. ``afterMs`` is the
|
|
799
|
+
* grace window before the reassurance fires; if the tool returns
|
|
800
|
+
* earlier, no message is spoken.
|
|
801
|
+
*
|
|
802
|
+
* Currently honoured only in **Realtime mode** — the SDK enqueues the
|
|
803
|
+
* message via ``OpenAIRealtimeAdapter.sendText`` so the model
|
|
804
|
+
* synthesises it inline. Pipeline mode has no clean injection point
|
|
805
|
+
* mid-turn yet; the option is silently ignored there. Off by default.
|
|
806
|
+
*/
|
|
807
|
+
reassurance?: string | {
|
|
808
|
+
message: string;
|
|
809
|
+
afterMs?: number;
|
|
810
|
+
};
|
|
811
|
+
/**
|
|
812
|
+
* Enable OpenAI strict mode for this tool's function schema. When ``true``
|
|
813
|
+
* the model is constrained to emit arguments that exactly match the
|
|
814
|
+
* declared schema — no missing required fields, no extra properties, no
|
|
815
|
+
* type coercion. Defaults to ``false`` for backward compatibility.
|
|
816
|
+
*
|
|
817
|
+
* Strict mode requires the schema to satisfy OpenAI's structural rules:
|
|
818
|
+
* - root must be ``type: "object"``
|
|
819
|
+
* - every nested object must have ``additionalProperties: false``
|
|
820
|
+
* - every property listed in ``properties`` must also be in ``required``
|
|
821
|
+
*
|
|
822
|
+
* Patter validates these requirements at ``agent()`` build time when
|
|
823
|
+
* ``strict: true`` is set; an invalid schema raises immediately rather
|
|
824
|
+
* than failing silently mid-call. Use ``null`` in a union (``["string",
|
|
825
|
+
* "null"]``) to express "optional" — strict mode does not allow truly
|
|
826
|
+
* optional fields.
|
|
827
|
+
*
|
|
828
|
+
* Recommended for any tool whose handler/webhook can't safely tolerate
|
|
829
|
+
* malformed arguments (DB writes, payment, transfers).
|
|
830
|
+
*/
|
|
831
|
+
strict?: boolean;
|
|
563
832
|
}
|
|
833
|
+
/** Constructor options for `new Patter({...})` in local-server mode. */
|
|
564
834
|
interface LocalOptions {
|
|
565
835
|
/**
|
|
566
836
|
* Telephony carrier instance. Required.
|
|
@@ -579,6 +849,34 @@ interface LocalOptions {
|
|
|
579
849
|
tunnel?: CloudflareTunnel | Static | boolean;
|
|
580
850
|
phoneNumber: string;
|
|
581
851
|
webhookUrl?: string;
|
|
852
|
+
/**
|
|
853
|
+
* On-disk persistence for the dashboard's call history. The dashboard
|
|
854
|
+
* itself is in-memory, but enabling ``persist`` writes per-call records
|
|
855
|
+
* (metadata.json, transcript.jsonl, events.jsonl) to disk and rebuilds
|
|
856
|
+
* the in-memory cache on startup so the dashboard survives process
|
|
857
|
+
* restarts without an external database.
|
|
858
|
+
*
|
|
859
|
+
* Accepted values:
|
|
860
|
+
* - omitted / ``false`` (default): no disk writes; the dashboard resets
|
|
861
|
+
* on every restart. Backward-compatible with prior behaviour.
|
|
862
|
+
* - ``true``: write under the platform default location
|
|
863
|
+
* (``~/Library/Application Support/patter`` on macOS,
|
|
864
|
+
* ``%LOCALAPPDATA%\\patter`` on Windows,
|
|
865
|
+
* ``$XDG_DATA_HOME/patter`` on Linux). Equivalent to setting
|
|
866
|
+
* ``PATTER_LOG_DIR=auto``.
|
|
867
|
+
* - string: write under the supplied absolute path. Equivalent to
|
|
868
|
+
* setting ``PATTER_LOG_DIR=<path>``.
|
|
869
|
+
*
|
|
870
|
+
* The ``PATTER_LOG_DIR`` env var still works as a deployment-time
|
|
871
|
+
* override and takes precedence over an unset ``persist``. When
|
|
872
|
+
* ``persist`` is set explicitly the env var is ignored.
|
|
873
|
+
*
|
|
874
|
+
* Retention: defaults to 30 days, controlled by
|
|
875
|
+
* ``PATTER_LOG_RETENTION_DAYS`` (set to ``0`` to keep forever).
|
|
876
|
+
* Phone numbers are masked by default; control via
|
|
877
|
+
* ``PATTER_LOG_REDACT_PHONE``.
|
|
878
|
+
*/
|
|
879
|
+
persist?: boolean | string;
|
|
582
880
|
/**
|
|
583
881
|
* @internal — allows ``StreamHandler`` to build the default OpenAI
|
|
584
882
|
* ``LLMLoop`` when no ``onMessage`` handler is supplied. The
|
|
@@ -587,6 +885,7 @@ interface LocalOptions {
|
|
|
587
885
|
*/
|
|
588
886
|
openaiKey?: string;
|
|
589
887
|
}
|
|
888
|
+
/** Internal shape of a guardrail (matches `Guardrail` class from `public-api.ts`). */
|
|
590
889
|
interface Guardrail {
|
|
591
890
|
/** Name for logging when triggered */
|
|
592
891
|
name: string;
|
|
@@ -597,6 +896,7 @@ interface Guardrail {
|
|
|
597
896
|
/** Replacement text spoken when guardrail triggers */
|
|
598
897
|
replacement?: string;
|
|
599
898
|
}
|
|
899
|
+
/** Per-call context passed to every pipeline hook. */
|
|
600
900
|
interface HookContext {
|
|
601
901
|
readonly callId: string;
|
|
602
902
|
readonly caller: string;
|
|
@@ -606,6 +906,32 @@ interface HookContext {
|
|
|
606
906
|
text: string;
|
|
607
907
|
}>;
|
|
608
908
|
}
|
|
909
|
+
/**
|
|
910
|
+
* Streaming-friendly post-LLM transform hook. Three tiers, all optional:
|
|
911
|
+
*
|
|
912
|
+
* - **`onChunk`** — per-token pure transform. Sync, must be fast (~0 ms
|
|
913
|
+
* budget). Use for: regex replace, markdown strip, profanity char-swap.
|
|
914
|
+
* - **`onSentence`** — per-sentence rewrite. Runs between the sentence
|
|
915
|
+
* chunker and TTS. Returns rewritten text or `null` to keep original;
|
|
916
|
+
* ``""`` (empty string) drops the sentence silently. Latency budget
|
|
917
|
+
* ~50–300 ms. Use for: PII redaction, persona overlay, refusal swap.
|
|
918
|
+
* - **`onResponse`** — per-full-response rewrite. **Blocks streaming TTS**
|
|
919
|
+
* until the LLM stream completes, then runs once on the full text.
|
|
920
|
+
* Latency cost: 500 ms – 2 s. Use only when sentence-level rewrite is
|
|
921
|
+
* insufficient (e.g. structured output validation). Avoid in latency-
|
|
922
|
+
* sensitive paths.
|
|
923
|
+
*
|
|
924
|
+
* The legacy single-callable signature `(text, ctx) => string` is still
|
|
925
|
+
* accepted; it maps to `onResponse` and emits a deprecation warning.
|
|
926
|
+
*/
|
|
927
|
+
interface AfterLLMHook {
|
|
928
|
+
onChunk?: (chunk: string) => string;
|
|
929
|
+
onSentence?: (sentence: string, ctx: HookContext) => string | null | Promise<string | null>;
|
|
930
|
+
onResponse?: (text: string, ctx: HookContext) => string | null | Promise<string | null>;
|
|
931
|
+
}
|
|
932
|
+
/** Legacy single-callable form of after_llm. Maps to `onResponse`. @deprecated Pass `{ onResponse }` instead. */
|
|
933
|
+
type AfterLLMLegacy = (text: string, ctx: HookContext) => string | null | Promise<string | null>;
|
|
934
|
+
/** Optional callbacks fired at each stage of the STT→LLM→TTS pipeline. */
|
|
609
935
|
interface PipelineHooks {
|
|
610
936
|
/** Called with the raw PCM audio chunk before it is forwarded to the STT provider.
|
|
611
937
|
* Return null to drop the chunk (e.g., for custom VAD gating). */
|
|
@@ -616,10 +942,16 @@ interface PipelineHooks {
|
|
|
616
942
|
* Return null to keep them, or return a new list to replace
|
|
617
943
|
* (useful for prompt injection, message filtering, RAG augmentation). */
|
|
618
944
|
beforeLlm?: (messages: Array<Record<string, unknown>>, ctx: HookContext) => Array<Record<string, unknown>> | null | Promise<Array<Record<string, unknown>> | null>;
|
|
619
|
-
/**
|
|
620
|
-
*
|
|
621
|
-
*
|
|
622
|
-
|
|
945
|
+
/**
|
|
946
|
+
* Post-LLM transform. Pass either:
|
|
947
|
+
* - the new **3-tier object** (`{ onChunk, onSentence, onResponse }`) for
|
|
948
|
+
* streaming-friendly per-chunk / per-sentence / per-response transforms;
|
|
949
|
+
* - or the **legacy callable** `(text, ctx) => string` (deprecated) which
|
|
950
|
+
* maps to `onResponse` semantics and blocks streaming TTS.
|
|
951
|
+
*
|
|
952
|
+
* See `AfterLLMHook` for the full tier contract.
|
|
953
|
+
*/
|
|
954
|
+
afterLlm?: AfterLLMHook | AfterLLMLegacy;
|
|
623
955
|
/** Called before TTS, per-sentence in streaming mode. Return null to skip TTS for this sentence. */
|
|
624
956
|
beforeSynthesize?: (text: string, ctx: HookContext) => string | null | Promise<string | null>;
|
|
625
957
|
/** Called after TTS produces an audio chunk. Return null to discard this chunk. */
|
|
@@ -647,11 +979,27 @@ interface BackgroundAudioPlayer$1 {
|
|
|
647
979
|
mix(agentPcm: Buffer, sampleRate: number): Promise<Buffer>;
|
|
648
980
|
stop(): Promise<void>;
|
|
649
981
|
}
|
|
982
|
+
/**
|
|
983
|
+
* Configuration for a local-mode voice AI agent.
|
|
984
|
+
*
|
|
985
|
+
* Several fields (``voice``, ``model``, ``language``) are also carried by
|
|
986
|
+
* engine markers (``OpenAIRealtime``, ``ElevenLabsConvAI``) and by the
|
|
987
|
+
* server-instantiated adapters. When the same setting is set in two places,
|
|
988
|
+
* precedence is:
|
|
989
|
+
*
|
|
990
|
+
* 1. **Explicit field on** ``phone.agent({ voice, model, language })`` always wins.
|
|
991
|
+
* 2. Otherwise, when an ``engine`` is passed, the engine's value is used
|
|
992
|
+
* (see ``Patter.agent()`` for the resolution).
|
|
993
|
+
* 3. Otherwise, the AgentOptions default is used.
|
|
994
|
+
*/
|
|
995
|
+
/** Configuration for a local-mode voice AI agent (passed to `phone.agent({...})`). */
|
|
650
996
|
interface AgentOptions {
|
|
651
997
|
systemPrompt: string;
|
|
652
998
|
/**
|
|
653
999
|
* Voice preset. When ``engine`` is provided, its ``voice`` is used unless
|
|
654
|
-
* explicitly overridden here.
|
|
1000
|
+
* explicitly overridden here. Format depends on the engine:
|
|
1001
|
+
* OpenAI Realtime accepts a name (``'alloy'``, ``'echo'``, ...);
|
|
1002
|
+
* ElevenLabs ConvAI accepts a voice ID.
|
|
655
1003
|
*/
|
|
656
1004
|
voice?: string;
|
|
657
1005
|
/**
|
|
@@ -659,10 +1007,56 @@ interface AgentOptions {
|
|
|
659
1007
|
* unless explicitly overridden here.
|
|
660
1008
|
*/
|
|
661
1009
|
model?: string;
|
|
1010
|
+
/**
|
|
1011
|
+
* BCP-47 language code (e.g. ``'en'``, ``'it'``). Forwarded to STT (in
|
|
1012
|
+
* pipeline mode) and to the engine adapter at call time. STTConfig has its
|
|
1013
|
+
* own ``language`` field for the rare case where STT must use a different
|
|
1014
|
+
* language than the rest of the pipeline.
|
|
1015
|
+
*/
|
|
662
1016
|
language?: string;
|
|
663
1017
|
firstMessage?: string;
|
|
664
1018
|
/** Tool definitions — ``Tool`` class instances from ``getpatter``. */
|
|
665
1019
|
tools?: Array<Tool>;
|
|
1020
|
+
/**
|
|
1021
|
+
* Model Context Protocol (MCP) servers to plug into this agent. Each
|
|
1022
|
+
* server is queried at call start via ``tools/list`` and its tools
|
|
1023
|
+
* are merged into ``tools`` with synthetic handlers that dispatch
|
|
1024
|
+
* back through the MCP client. Lets you connect to existing MCP
|
|
1025
|
+
* servers (Google Workspace, PayPal, GitHub, Postgres, …) without
|
|
1026
|
+
* writing a wrapper handler.
|
|
1027
|
+
*
|
|
1028
|
+
* Each entry is either a URL string (shorthand for
|
|
1029
|
+
* ``{ url, transport: 'streamable-http' }``) or an explicit object
|
|
1030
|
+
* with optional ``headers`` for auth and a ``name`` for telemetry.
|
|
1031
|
+
*
|
|
1032
|
+
* Requires the optional dependency ``@modelcontextprotocol/sdk``.
|
|
1033
|
+
* When unset, MCP is fully disabled and the SDK ships without the
|
|
1034
|
+
* dependency installed.
|
|
1035
|
+
*
|
|
1036
|
+
* Cost: one HTTP handshake + ``tools/list`` round-trip per server at
|
|
1037
|
+
* call start (~50-200 ms × N servers). Future iterations may cache
|
|
1038
|
+
* the discovered list process-wide.
|
|
1039
|
+
*/
|
|
1040
|
+
mcpServers?: ReadonlyArray<MCPServerConfig>;
|
|
1041
|
+
/**
|
|
1042
|
+
* When ``true``, ship ``systemPrompt`` to the LLM verbatim. Default
|
|
1043
|
+
* (``false``) prepends a phone-friendly preamble that instructs the
|
|
1044
|
+
* model to avoid markdown, emojis, bullet lists, and verbose replies —
|
|
1045
|
+
* the conventions live phone calls require.
|
|
1046
|
+
*/
|
|
1047
|
+
disablePhonePreamble?: boolean;
|
|
1048
|
+
/**
|
|
1049
|
+
* Acoustic echo cancellation. When `true` (pipeline mode only) the SDK
|
|
1050
|
+
* instantiates an `NlmsEchoCanceller` that subtracts the agent's own
|
|
1051
|
+
* TTS bleed from the inbound mic stream before VAD/STT see it.
|
|
1052
|
+
* Strongly recommended for speakerphone / tunnel deployments where the
|
|
1053
|
+
* bleed otherwise keeps VAD permanently in "speaking" state and
|
|
1054
|
+
* barge-in only fires during natural TTS pauses. Off by default —
|
|
1055
|
+
* handset / headset deployments don't have the bleed, and the 0.5–2 s
|
|
1056
|
+
* convergence period would briefly attenuate caller speech if they
|
|
1057
|
+
* spoke before any TTS played.
|
|
1058
|
+
*/
|
|
1059
|
+
echoCancellation?: boolean;
|
|
666
1060
|
/**
|
|
667
1061
|
* Realtime / ConvAI engine instance. When present, the agent runs in the
|
|
668
1062
|
* matching mode (``openai_realtime`` or ``elevenlabs_convai``). When absent,
|
|
@@ -709,8 +1103,22 @@ interface AgentOptions {
|
|
|
709
1103
|
* Default: 300.
|
|
710
1104
|
*/
|
|
711
1105
|
bargeInThresholdMs?: number;
|
|
1106
|
+
/**
|
|
1107
|
+
* When true, the sentence chunker emits the first clause of each response
|
|
1108
|
+
* on a soft punctuation boundary (",", em-dash, en-dash) once ~40 chars
|
|
1109
|
+
* have accumulated. Saves 200–500 ms TTFA on the first sentence of each
|
|
1110
|
+
* turn at the cost of slightly clipping prosody on the very first chunk.
|
|
1111
|
+
* Hard-disabled when ``language`` starts with ``"it"`` (Italian decimal
|
|
1112
|
+
* comma would split mid-number). Default: false.
|
|
1113
|
+
*
|
|
1114
|
+
* See SentenceChunker constructor for the full guard list (decimal,
|
|
1115
|
+
* currency, balanced delimiter, ellipsis).
|
|
1116
|
+
*/
|
|
1117
|
+
aggressiveFirstFlush?: boolean;
|
|
712
1118
|
}
|
|
1119
|
+
/** Pipeline-mode message handler — given full turn context, returns the agent's reply. */
|
|
713
1120
|
type PipelineMessageHandler = (data: Record<string, unknown>) => Promise<string>;
|
|
1121
|
+
/** Options for `Patter.serve({...})`. */
|
|
714
1122
|
interface ServeOptions {
|
|
715
1123
|
agent: AgentOptions;
|
|
716
1124
|
port?: number;
|
|
@@ -738,12 +1146,73 @@ interface ServeOptions {
|
|
|
738
1146
|
dashboardDb?: string;
|
|
739
1147
|
/** When true (default), persist dashboard data. */
|
|
740
1148
|
dashboardPersist?: boolean;
|
|
1149
|
+
/**
|
|
1150
|
+
* When true (default), `serve()` calls the carrier's API on startup to
|
|
1151
|
+
* point the configured phone number's webhook URL at this server. Set
|
|
1152
|
+
* to `false` when the webhook is managed externally (Terraform, an edge
|
|
1153
|
+
* gateway / voice-router, or any infra-as-code system) — otherwise every
|
|
1154
|
+
* boot will silently overwrite the externally-managed value.
|
|
1155
|
+
*
|
|
1156
|
+
* Required `false` when:
|
|
1157
|
+
* - Twilio's voice_url should point at a router/gateway in front of
|
|
1158
|
+
* this server rather than directly at it.
|
|
1159
|
+
* - Multiple replicas share the same Twilio number; only one should
|
|
1160
|
+
* write the webhook.
|
|
1161
|
+
* - Compliance forbids the runtime from holding write credentials
|
|
1162
|
+
* against the carrier console.
|
|
1163
|
+
*
|
|
1164
|
+
* Ignored (treated as true) when `tunnel: true`, because the tunnel
|
|
1165
|
+
* hostname is dynamic and only known at runtime — the carrier MUST be
|
|
1166
|
+
* reconfigured for inbound calls to land.
|
|
1167
|
+
*/
|
|
1168
|
+
manageWebhook?: boolean;
|
|
1169
|
+
}
|
|
1170
|
+
/**
|
|
1171
|
+
* Normalised AMD (answering-machine detection) result emitted to
|
|
1172
|
+
* ``LocalCallOptions.onMachineDetection`` once the carrier reports back.
|
|
1173
|
+
* The ``raw`` field preserves the provider value verbatim so callers can
|
|
1174
|
+
* apply provider-specific logic; ``classification`` is the SDK's
|
|
1175
|
+
* carrier-agnostic projection that test/acceptance code should check.
|
|
1176
|
+
*/
|
|
1177
|
+
interface MachineDetectionResult {
|
|
1178
|
+
readonly call_id: string;
|
|
1179
|
+
readonly carrier: 'twilio' | 'telnyx';
|
|
1180
|
+
/** Carrier-agnostic projection. Use this in app code unless you really need the raw provider value. */
|
|
1181
|
+
readonly classification: 'human' | 'machine' | 'fax' | 'unknown';
|
|
1182
|
+
/**
|
|
1183
|
+
* Raw provider value:
|
|
1184
|
+
* - Twilio: ``human``, ``machine_start``, ``machine_end_beep``,
|
|
1185
|
+
* ``machine_end_silence``, ``machine_end_other``, ``fax``, ``unknown``.
|
|
1186
|
+
* - Telnyx: ``human``, ``machine``, ``not_sure``.
|
|
1187
|
+
*/
|
|
1188
|
+
readonly raw: string;
|
|
1189
|
+
/** Unix epoch seconds at which the result was received from the carrier. */
|
|
1190
|
+
readonly detected_at: number;
|
|
741
1191
|
}
|
|
1192
|
+
/** Options for `Patter.call({...})` to place an outbound call. */
|
|
742
1193
|
interface LocalCallOptions {
|
|
743
1194
|
to: string;
|
|
744
1195
|
agent: AgentOptions;
|
|
1196
|
+
/**
|
|
1197
|
+
* Enable answering-machine detection. **Defaults to ``true``** — the SDK
|
|
1198
|
+
* asks Twilio (``MachineDetection=DetectMessageEnd`` + Async AMD) or
|
|
1199
|
+
* Telnyx (``answering_machine_detection=greeting_end``) to classify
|
|
1200
|
+
* whoever picks up. Async AMD on Twilio adds ~0 answer-latency on human
|
|
1201
|
+
* pickups (the call connects immediately and the result arrives via
|
|
1202
|
+
* webhook 2-5 s later), so ON-by-default is safe. Pass ``false`` to
|
|
1203
|
+
* disable when you want to skip per-call AMD billing or you already
|
|
1204
|
+
* know the destination is a human.
|
|
1205
|
+
*/
|
|
745
1206
|
machineDetection?: boolean;
|
|
746
|
-
/**
|
|
1207
|
+
/**
|
|
1208
|
+
* Called once when the carrier finishes the AMD check. Fires for both
|
|
1209
|
+
* ``human`` and ``machine`` outcomes. Combine with ``voicemailMessage``
|
|
1210
|
+
* to get both the legacy voicemail-drop AND a result callback (the SDK
|
|
1211
|
+
* fires the callback after the drop is queued). Acceptance tests use
|
|
1212
|
+
* this to mark a run INVALID when ``classification !== 'human'``.
|
|
1213
|
+
*/
|
|
1214
|
+
onMachineDetection?: (result: MachineDetectionResult) => void | Promise<void>;
|
|
1215
|
+
/** If set, spoken as a voicemail message when AMD detects a machine. Implicitly enables ``machineDetection``. */
|
|
747
1216
|
voicemailMessage?: string;
|
|
748
1217
|
/** Dynamic variables merged into agent.variables before call. Override agent-level variables. */
|
|
749
1218
|
variables?: Record<string, string>;
|
|
@@ -770,6 +1239,7 @@ interface LocalCallOptions {
|
|
|
770
1239
|
* the JSONL/JSON files, the store is just a cache on top).
|
|
771
1240
|
*/
|
|
772
1241
|
|
|
1242
|
+
/** Snapshot of a call as held by the dashboard store. */
|
|
773
1243
|
interface CallRecord {
|
|
774
1244
|
call_id: string;
|
|
775
1245
|
caller: string;
|
|
@@ -792,10 +1262,12 @@ interface CallRecord {
|
|
|
792
1262
|
metrics?: Record<string, unknown> | null;
|
|
793
1263
|
[key: string]: unknown;
|
|
794
1264
|
}
|
|
1265
|
+
/** Server-Sent-Event payload broadcast by `MetricsStore` for live UI updates. */
|
|
795
1266
|
interface SSEEvent {
|
|
796
1267
|
type: string;
|
|
797
1268
|
data: Record<string, unknown>;
|
|
798
1269
|
}
|
|
1270
|
+
/** In-memory bounded ring buffer of recent calls plus active-call tracking. */
|
|
799
1271
|
declare class MetricsStore extends EventEmitter {
|
|
800
1272
|
private readonly maxCalls;
|
|
801
1273
|
private calls;
|
|
@@ -810,6 +1282,7 @@ declare class MetricsStore extends EventEmitter {
|
|
|
810
1282
|
maxCalls?: number;
|
|
811
1283
|
});
|
|
812
1284
|
private publish;
|
|
1285
|
+
/** Mark a call as in-progress (creates the row if it does not yet exist). */
|
|
813
1286
|
recordCallStart(data: Record<string, unknown>): void;
|
|
814
1287
|
/**
|
|
815
1288
|
* Pre-register an outbound call before any webhook fires. Lets the
|
|
@@ -823,15 +1296,23 @@ declare class MetricsStore extends EventEmitter {
|
|
|
823
1296
|
* row from active to completed so the UI freezes the live duration timer.
|
|
824
1297
|
*/
|
|
825
1298
|
updateCallStatus(callId: string, status: string, extra?: Record<string, unknown>): void;
|
|
1299
|
+
/** Append a single conversation turn to an active call and broadcast it via SSE. */
|
|
826
1300
|
recordTurn(data: Record<string, unknown>): void;
|
|
1301
|
+
/** Move a call from active to completed and persist its final metrics. */
|
|
827
1302
|
recordCallEnd(data: Record<string, unknown>, metrics?: Record<string, unknown> | null): void;
|
|
1303
|
+
/** Return a window of completed calls in newest-first order. */
|
|
828
1304
|
getCalls(limit?: number, offset?: number): CallRecord[];
|
|
1305
|
+
/** Look up a completed call by id (newest match wins). */
|
|
829
1306
|
getCall(callId: string): CallRecord | null;
|
|
830
1307
|
/** Look up an active call by id (returns undefined if not active or unknown). */
|
|
831
1308
|
getActive(callId: string): CallRecord | undefined;
|
|
1309
|
+
/** Return all currently active (not yet ended) calls. */
|
|
832
1310
|
getActiveCalls(): CallRecord[];
|
|
1311
|
+
/** Compute summary statistics across the buffered call history. */
|
|
833
1312
|
getAggregates(): Record<string, unknown>;
|
|
1313
|
+
/** Return calls whose `started_at` falls within `[fromTs, toTs]` (Unix seconds). */
|
|
834
1314
|
getCallsInRange(fromTs?: number, toTs?: number): CallRecord[];
|
|
1315
|
+
/** Number of completed calls currently in the ring buffer. */
|
|
835
1316
|
get callCount(): number;
|
|
836
1317
|
/**
|
|
837
1318
|
* Rebuild the in-memory call list from `metadata.json` files written by
|
|
@@ -846,10 +1327,185 @@ declare class MetricsStore extends EventEmitter {
|
|
|
846
1327
|
hydrate(logRoot: string | null | undefined): number;
|
|
847
1328
|
}
|
|
848
1329
|
|
|
1330
|
+
/** Async-or-sync callback. Sync return values are silently ignored. */
|
|
1331
|
+
type SpeechEventCallback = (payload: Readonly<Record<string, unknown>>) => void | Promise<void>;
|
|
1332
|
+
type UserState = "listening" | "speaking" | "thinking" | "away";
|
|
1333
|
+
type AgentState = "initializing" | "idle" | "listening" | "thinking" | "speaking";
|
|
1334
|
+
interface ConversationStateSnapshot {
|
|
1335
|
+
readonly user: UserState;
|
|
1336
|
+
readonly agent: AgentState;
|
|
1337
|
+
}
|
|
1338
|
+
type EouTrigger = "vad_silence" | "semantic_turn_detector" | "manual_commit";
|
|
1339
|
+
interface UserSpeechStartedOptions {
|
|
1340
|
+
readonly vadConfidence?: number;
|
|
1341
|
+
readonly audioOffsetMs?: number;
|
|
1342
|
+
readonly timestampMs?: number;
|
|
1343
|
+
}
|
|
1344
|
+
interface UserSpeechEndedOptions extends UserSpeechStartedOptions {
|
|
1345
|
+
readonly speechDurationMs: number;
|
|
1346
|
+
}
|
|
1347
|
+
interface UserSpeechEosOptions {
|
|
1348
|
+
readonly trigger: EouTrigger;
|
|
1349
|
+
readonly trailingSilenceMs?: number;
|
|
1350
|
+
readonly transcriptSoFar?: string;
|
|
1351
|
+
readonly timestampMs?: number;
|
|
1352
|
+
}
|
|
1353
|
+
interface AgentSpeechStartedOptions {
|
|
1354
|
+
readonly ttsProvider?: string;
|
|
1355
|
+
readonly engine?: string;
|
|
1356
|
+
readonly timestampMs?: number;
|
|
1357
|
+
}
|
|
1358
|
+
interface AgentSpeechEndedOptions {
|
|
1359
|
+
readonly speechDurationMs: number;
|
|
1360
|
+
readonly interrupted?: boolean;
|
|
1361
|
+
readonly timestampMs?: number;
|
|
1362
|
+
}
|
|
1363
|
+
interface LlmFirstTokenOptions {
|
|
1364
|
+
readonly llmProvider: string;
|
|
1365
|
+
readonly model: string;
|
|
1366
|
+
readonly timestampMs?: number;
|
|
1367
|
+
}
|
|
1368
|
+
interface AudioOutOptions {
|
|
1369
|
+
readonly ttsProvider: string;
|
|
1370
|
+
readonly timestampMs?: number;
|
|
1371
|
+
}
|
|
1372
|
+
/**
|
|
1373
|
+
* Per-call dispatcher for the seven turn-taking events. A single instance is
|
|
1374
|
+
* shared by every `Patter` instance and survives across calls — the per-turn
|
|
1375
|
+
* state (`turnIdx`, `firstTokenForTurn`, `firstAudioForTurn`) lives here too
|
|
1376
|
+
* so the runner sees a monotonically-increasing turn index across a session.
|
|
1377
|
+
*
|
|
1378
|
+
* Backwards compatibility: every callback defaults to `null`. Existing users
|
|
1379
|
+
* who never set a callback see exactly the previous behaviour and zero
|
|
1380
|
+
* overhead.
|
|
1381
|
+
*/
|
|
1382
|
+
declare class SpeechEvents {
|
|
1383
|
+
onUserSpeechStarted: SpeechEventCallback | null;
|
|
1384
|
+
onUserSpeechEnded: SpeechEventCallback | null;
|
|
1385
|
+
onUserSpeechEos: SpeechEventCallback | null;
|
|
1386
|
+
onAgentSpeechStarted: SpeechEventCallback | null;
|
|
1387
|
+
onAgentSpeechEnded: SpeechEventCallback | null;
|
|
1388
|
+
onLlmToken: SpeechEventCallback | null;
|
|
1389
|
+
onAudioOut: SpeechEventCallback | null;
|
|
1390
|
+
private userState;
|
|
1391
|
+
private agentState;
|
|
1392
|
+
private turnIdxValue;
|
|
1393
|
+
private firstTokenForTurn;
|
|
1394
|
+
private firstAudioForTurn;
|
|
1395
|
+
private callStartMs;
|
|
1396
|
+
/** Snapshot of the current per-side state of the call. */
|
|
1397
|
+
get conversationState(): ConversationStateSnapshot;
|
|
1398
|
+
/** Current 0-based turn index. Increments on every EOU commit. */
|
|
1399
|
+
get turnIdx(): number;
|
|
1400
|
+
/** Record the call-start wall-clock for ``audioOffsetMs`` math. */
|
|
1401
|
+
markCallStarted(tsMs?: number): void;
|
|
1402
|
+
/** Reset per-turn cursors. Called automatically on EOU commit. */
|
|
1403
|
+
resetTurnState(): void;
|
|
1404
|
+
/** Fire on the VAD positive edge of the inbound stream.
|
|
1405
|
+
*
|
|
1406
|
+
* Do not coalesce: the runner consumes positive→negative→positive
|
|
1407
|
+
* transitions in order. For server-VAD engines (OpenAI Realtime, Telnyx
|
|
1408
|
+
* Voice AI), forward the upstream signal directly — do not re-run a VAD
|
|
1409
|
+
* layer on top.
|
|
1410
|
+
*/
|
|
1411
|
+
fireUserSpeechStarted(opts?: UserSpeechStartedOptions): Promise<void>;
|
|
1412
|
+
/** Fire on the VAD trailing edge (raw — *not* EOU).
|
|
1413
|
+
*
|
|
1414
|
+
* `speechDurationMs` is the length of the segment that just ended; the
|
|
1415
|
+
* runner uses it to compute talk-ratio.
|
|
1416
|
+
*/
|
|
1417
|
+
fireUserSpeechEnded(opts: UserSpeechEndedOptions): Promise<void>;
|
|
1418
|
+
/** Fire on the committed end-of-utterance.
|
|
1419
|
+
*
|
|
1420
|
+
* This is the canonical "user finished" signal — VAD edge + trailing
|
|
1421
|
+
* silence + (optionally) a semantic turn-detector model agreement. The
|
|
1422
|
+
* runner uses the timestamp of this event to compute
|
|
1423
|
+
* `eos_to_first_token_ms` (Hamming AI threshold: <800 ms good, >1500 ms
|
|
1424
|
+
* critical).
|
|
1425
|
+
*/
|
|
1426
|
+
fireUserSpeechEos(opts: UserSpeechEosOptions): Promise<void>;
|
|
1427
|
+
/** Fire on the FIRST audio chunk of the current agent turn that crosses
|
|
1428
|
+
* to the wire (not the first chunk produced by TTS).
|
|
1429
|
+
*
|
|
1430
|
+
* The user hears the wire chunk, so this is the timestamp the runner
|
|
1431
|
+
* anchors barge-in latency on.
|
|
1432
|
+
*/
|
|
1433
|
+
fireAgentSpeechStarted(opts?: AgentSpeechStartedOptions): Promise<void>;
|
|
1434
|
+
/** Fire on the LAST audio chunk of the current agent turn.
|
|
1435
|
+
*
|
|
1436
|
+
* `interrupted=true` marks the turn as cancelled by barge-in; the runner
|
|
1437
|
+
* treats it as the `agent_speech_stopped` half of a barge-in pair.
|
|
1438
|
+
*/
|
|
1439
|
+
fireAgentSpeechEnded(opts: AgentSpeechEndedOptions): Promise<void>;
|
|
1440
|
+
/** Fire on the FIRST LLM token of the current turn (TTFT marker).
|
|
1441
|
+
*
|
|
1442
|
+
* Idempotent within a turn — guarded by `firstTokenForTurn`. Combined
|
|
1443
|
+
* with `on_user_speech_eos.timestamp_ms` the runner computes
|
|
1444
|
+
* `eos_to_first_token_ms`.
|
|
1445
|
+
*/
|
|
1446
|
+
fireLlmFirstToken(opts: LlmFirstTokenOptions): Promise<void>;
|
|
1447
|
+
/** Fire on the FIRST TTS audio chunk for the current turn.
|
|
1448
|
+
*
|
|
1449
|
+
* Distinct from `fireAgentSpeechStarted`: this is the agent-side buffer
|
|
1450
|
+
* arrival (TTS warmup), not the wire-time chunk. Idempotent within a
|
|
1451
|
+
* turn — guarded by `firstAudioForTurn`.
|
|
1452
|
+
*/
|
|
1453
|
+
fireAudioOut(opts: AudioOutOptions): Promise<void>;
|
|
1454
|
+
private resolveOffset;
|
|
1455
|
+
private dispatch;
|
|
1456
|
+
}
|
|
1457
|
+
|
|
1458
|
+
/** Top-level SDK entry point — wraps a carrier + embedded server + agent loop. */
|
|
849
1459
|
declare class Patter {
|
|
850
1460
|
private localConfig;
|
|
851
1461
|
private embeddedServer;
|
|
852
1462
|
private tunnelHandle;
|
|
1463
|
+
private _tunnelReadyResolve;
|
|
1464
|
+
private _tunnelReadyReject;
|
|
1465
|
+
private _tunnelReady;
|
|
1466
|
+
private _readyResolve;
|
|
1467
|
+
private _readyReject;
|
|
1468
|
+
private _ready;
|
|
1469
|
+
/**
|
|
1470
|
+
* True iff ``localConfig.webhookUrl`` was populated by ``serve()`` from a
|
|
1471
|
+
* freshly-started cloudflared tunnel (rather than by the constructor from
|
|
1472
|
+
* an explicit ``webhookUrl`` / ``StaticTunnel`` config). ``disconnect()``
|
|
1473
|
+
* uses this flag to clear ONLY the auto-assigned hostname so a subsequent
|
|
1474
|
+
* ``serve()`` call (e.g. from a plugin's ``ensureServing`` cycle that
|
|
1475
|
+
* disposes + restarts on agent-identity changes) does not throw
|
|
1476
|
+
* ``Cannot use both tunnel: true and webhookUrl``.
|
|
1477
|
+
*/
|
|
1478
|
+
private tunnelOwnsWebhookUrl;
|
|
1479
|
+
/**
|
|
1480
|
+
* Speech-edge events for turn-taking instrumentation. Public surface: the
|
|
1481
|
+
* seven `on*` proxy accessors below plus the `conversationState` snapshot.
|
|
1482
|
+
* Defaults are no-ops — existing users who never set a callback see exactly
|
|
1483
|
+
* the previous behaviour.
|
|
1484
|
+
*
|
|
1485
|
+
* See `src/_speech-events.ts` for the full event taxonomy and the
|
|
1486
|
+
* industry-alignment table (LiveKit / Pipecat / OpenAI Realtime).
|
|
1487
|
+
*/
|
|
1488
|
+
readonly speechEvents: SpeechEvents;
|
|
1489
|
+
get onUserSpeechStarted(): SpeechEventCallback | null;
|
|
1490
|
+
set onUserSpeechStarted(cb: SpeechEventCallback | null);
|
|
1491
|
+
get onUserSpeechEnded(): SpeechEventCallback | null;
|
|
1492
|
+
set onUserSpeechEnded(cb: SpeechEventCallback | null);
|
|
1493
|
+
get onUserSpeechEos(): SpeechEventCallback | null;
|
|
1494
|
+
set onUserSpeechEos(cb: SpeechEventCallback | null);
|
|
1495
|
+
get onAgentSpeechStarted(): SpeechEventCallback | null;
|
|
1496
|
+
set onAgentSpeechStarted(cb: SpeechEventCallback | null);
|
|
1497
|
+
get onAgentSpeechEnded(): SpeechEventCallback | null;
|
|
1498
|
+
set onAgentSpeechEnded(cb: SpeechEventCallback | null);
|
|
1499
|
+
get onLlmToken(): SpeechEventCallback | null;
|
|
1500
|
+
set onLlmToken(cb: SpeechEventCallback | null);
|
|
1501
|
+
get onAudioOut(): SpeechEventCallback | null;
|
|
1502
|
+
set onAudioOut(cb: SpeechEventCallback | null);
|
|
1503
|
+
/**
|
|
1504
|
+
* Snapshot of the current per-side state of the call.
|
|
1505
|
+
* Mirrors LiveKit's `user_state_changed` / `agent_state_changed`
|
|
1506
|
+
* payloads. Read-only and safe to call at any time.
|
|
1507
|
+
*/
|
|
1508
|
+
get conversationState(): ConversationStateSnapshot;
|
|
853
1509
|
/**
|
|
854
1510
|
* Live `MetricsStore` for the embedded server. Returns `null` before
|
|
855
1511
|
* `serve()` is called. Exposed so integrations like `PatterTool` can
|
|
@@ -857,12 +1513,73 @@ declare class Patter {
|
|
|
857
1513
|
* `call_start`, `call_end`).
|
|
858
1514
|
*/
|
|
859
1515
|
get metricsStore(): MetricsStore | null;
|
|
1516
|
+
/**
|
|
1517
|
+
* Resolves to the public webhook hostname as soon as it is known —
|
|
1518
|
+
* either statically configured or freshly minted by the tunnel.
|
|
1519
|
+
*
|
|
1520
|
+
* **Prefer `phone.ready` for outbound calls.** This promise resolves
|
|
1521
|
+
* before the embedded HTTP / WebSocket server is in `listen` state, so
|
|
1522
|
+
* a `phone.call` placed immediately afterwards can still race the
|
|
1523
|
+
* Twilio Media Streams upgrade and produce a "11100 Invalid URL
|
|
1524
|
+
* format" call drop on answer.
|
|
1525
|
+
*
|
|
1526
|
+
* Kept as a separate signal because some integrations (e.g. webhook
|
|
1527
|
+
* registration) only need the hostname, not the WS server.
|
|
1528
|
+
*/
|
|
1529
|
+
get tunnelReady(): Promise<string>;
|
|
1530
|
+
/**
|
|
1531
|
+
* Resolves to the public webhook hostname once the SDK is fully ready
|
|
1532
|
+
* to handle carrier callbacks: tunnel resolved, carrier auto-config
|
|
1533
|
+
* complete, and the embedded HTTP / WS server in `listen` state.
|
|
1534
|
+
*
|
|
1535
|
+
* Use this for outbound calls instead of guessing `setTimeout` after
|
|
1536
|
+
* `void phone.serve(...)`:
|
|
1537
|
+
*
|
|
1538
|
+
* ```ts
|
|
1539
|
+
* void phone.serve({ agent, tunnel: true });
|
|
1540
|
+
* await phone.ready;
|
|
1541
|
+
* await phone.call({ to: '+15550001234', agent });
|
|
1542
|
+
* ```
|
|
1543
|
+
*
|
|
1544
|
+
* Rejects with the underlying exception if `serve()` fails before the
|
|
1545
|
+
* server is listening.
|
|
1546
|
+
*/
|
|
1547
|
+
get ready(): Promise<string>;
|
|
860
1548
|
constructor(options: LocalOptions);
|
|
1549
|
+
/** Resolve user-supplied agent options against engine defaults and return the merged config. */
|
|
861
1550
|
agent(opts: AgentOptions): AgentOptions;
|
|
1551
|
+
/** Boot the embedded HTTP/WebSocket server, configure the carrier webhook, and resolve `ready`. */
|
|
862
1552
|
serve(opts: ServeOptions): Promise<void>;
|
|
1553
|
+
private _serveImpl;
|
|
1554
|
+
/** Run the agent in interactive terminal-test mode (no real telephony). */
|
|
863
1555
|
test(opts: ServeOptions): Promise<void>;
|
|
1556
|
+
/** Place an outbound call via the configured carrier. */
|
|
864
1557
|
call(options: LocalCallOptions): Promise<void>;
|
|
1558
|
+
/**
|
|
1559
|
+
* Stop the embedded server and any running tunnel. Safe to call multiple
|
|
1560
|
+
* times. Leaves the instance reusable: a subsequent ``serve()`` works as
|
|
1561
|
+
* if the previous lifecycle never happened.
|
|
1562
|
+
*/
|
|
865
1563
|
disconnect(): Promise<void>;
|
|
1564
|
+
/**
|
|
1565
|
+
* Terminate an active call on the configured carrier.
|
|
1566
|
+
*
|
|
1567
|
+
* Posts a hangup to the carrier (Twilio
|
|
1568
|
+
* ``Calls(callSid).update({status:'completed'})`` or Telnyx
|
|
1569
|
+
* ``/v2/calls/{callControlId}/actions/hangup``) so the bridge tears down
|
|
1570
|
+
* gracefully — the SDK's WebSocket handler then fires ``onCallEnd`` with
|
|
1571
|
+
* the final ``CallMetrics`` before the WS closes.
|
|
1572
|
+
*
|
|
1573
|
+
* Use this when the host application needs to end a call programmatically
|
|
1574
|
+
* without going through the LLM tool-call path (e.g. an admin override,
|
|
1575
|
+
* a watchdog, or an integration test runner).
|
|
1576
|
+
*
|
|
1577
|
+
* @param callSid - Carrier-issued call identifier (Twilio Call SID or
|
|
1578
|
+
* Telnyx call_control_id) returned from a previous ``call(...)`` or
|
|
1579
|
+
* captured in the ``onCallStart`` callback's payload.
|
|
1580
|
+
* @throws Error when ``callSid`` is empty or no carrier is configured.
|
|
1581
|
+
*/
|
|
1582
|
+
endCall(callSid: string): Promise<void>;
|
|
866
1583
|
}
|
|
867
1584
|
|
|
868
1585
|
/**
|
|
@@ -909,13 +1626,23 @@ interface DefineToolInput {
|
|
|
909
1626
|
*/
|
|
910
1627
|
declare function defineTool(input: DefineToolInput): ToolDefinition;
|
|
911
1628
|
|
|
1629
|
+
/**
|
|
1630
|
+
* Process-wide logger used by the SDK.
|
|
1631
|
+
*
|
|
1632
|
+
* Provides the in-library logger abstraction (`getLogger`/`setLogger`) and
|
|
1633
|
+
* default console-based implementation. Library code MUST use these helpers
|
|
1634
|
+
* rather than calling `console.*` directly so applications can route logs.
|
|
1635
|
+
*/
|
|
1636
|
+
/** Minimal logger interface implemented by the default console logger and any user-supplied replacement. */
|
|
912
1637
|
interface Logger {
|
|
913
1638
|
info(message: string, ...args: unknown[]): void;
|
|
914
1639
|
warn(message: string, ...args: unknown[]): void;
|
|
915
1640
|
error(message: string, ...args: unknown[]): void;
|
|
916
1641
|
debug(message: string, ...args: unknown[]): void;
|
|
917
1642
|
}
|
|
1643
|
+
/** Return the active logger (defaults to a console-backed implementation). */
|
|
918
1644
|
declare function getLogger(): Logger;
|
|
1645
|
+
/** Replace the process-wide logger; useful for routing SDK logs into a host app's logger. */
|
|
919
1646
|
declare function setLogger(logger: Logger): void;
|
|
920
1647
|
|
|
921
1648
|
/**
|
|
@@ -925,9 +1652,6 @@ declare function setLogger(logger: Logger): void;
|
|
|
925
1652
|
* Uses regex-based marker replacement for robust sentence boundary
|
|
926
1653
|
* detection, handling abbreviations, acronyms, decimals, websites,
|
|
927
1654
|
* ellipsis, and CJK punctuation.
|
|
928
|
-
*
|
|
929
|
-
* Algorithm adapted from LiveKit Agents (Apache 2.0):
|
|
930
|
-
* https://github.com/livekit/agents
|
|
931
1655
|
*/
|
|
932
1656
|
/** Default minimum sentence length before emitting. */
|
|
933
1657
|
declare const DEFAULT_MIN_SENTENCE_LEN = 20;
|
|
@@ -951,9 +1675,29 @@ declare class SentenceChunker {
|
|
|
951
1675
|
private buffer;
|
|
952
1676
|
private readonly minSentenceLen;
|
|
953
1677
|
private readonly minWordsForShortFlush;
|
|
1678
|
+
private readonly aggressiveFirstMinLen;
|
|
1679
|
+
private readonly aggressiveFirstFlush;
|
|
1680
|
+
private readonly language;
|
|
1681
|
+
private isFirstFlush;
|
|
954
1682
|
constructor(options?: {
|
|
955
1683
|
minSentenceLen?: number;
|
|
956
1684
|
minWordsForShortFlush?: number;
|
|
1685
|
+
/**
|
|
1686
|
+
* When true, the chunker emits the first clause of each response on a
|
|
1687
|
+
* soft punctuation boundary (",", em-dash, en-dash) once
|
|
1688
|
+
* `aggressiveFirstMinLen` characters accumulate. Saves 200-500 ms TTFA
|
|
1689
|
+
* on the first sentence of each turn. Subsequent sentences fall through
|
|
1690
|
+
* to the standard sentence-boundary path. Default: false.
|
|
1691
|
+
*/
|
|
1692
|
+
aggressiveFirstFlush?: boolean;
|
|
1693
|
+
aggressiveFirstMinLen?: number;
|
|
1694
|
+
/**
|
|
1695
|
+
* BCP-47-ish language tag. Italian uses comma as decimal separator
|
|
1696
|
+
* (3,14) and dot as thousands (1.000) — both invert the English
|
|
1697
|
+
* convention — so aggressive comma flush is hard-disabled when language
|
|
1698
|
+
* starts with "it" regardless of `aggressiveFirstFlush`. Default: "en".
|
|
1699
|
+
*/
|
|
1700
|
+
language?: string;
|
|
957
1701
|
});
|
|
958
1702
|
/**
|
|
959
1703
|
* Feed a token. Returns zero or more complete sentences.
|
|
@@ -964,10 +1708,11 @@ declare class SentenceChunker {
|
|
|
964
1708
|
* sentence, all but the last (potentially incomplete) are emitted.
|
|
965
1709
|
* - **Short-flush path** — when the buffer is shorter than `minSentenceLen`
|
|
966
1710
|
* but ends with a sentence terminator AND has at least
|
|
967
|
-
* `minWordsForShortFlush` whitespace-separated words
|
|
968
|
-
*
|
|
969
|
-
*
|
|
970
|
-
* `
|
|
1711
|
+
* `minWordsForShortFlush` whitespace-separated words (default 1 — a
|
|
1712
|
+
* single-word reply like `"Yes."` flushes immediately for low TTS
|
|
1713
|
+
* TTFB). Acronym ("U.S.") and decimal ("f(x) = 2.") guards still block
|
|
1714
|
+
* dangerous cases. Bump `minWordsForShortFlush` to 2+ to keep
|
|
1715
|
+
* single-word utterances buffered until `flush()`.
|
|
971
1716
|
*/
|
|
972
1717
|
push(token: string): string[];
|
|
973
1718
|
/**
|
|
@@ -975,18 +1720,41 @@ declare class SentenceChunker {
|
|
|
975
1720
|
*
|
|
976
1721
|
* A buffer qualifies when **all** of these hold:
|
|
977
1722
|
* 1. Last non-whitespace char is a sentence terminator.
|
|
978
|
-
* 2. Word count is at least `minWordsForShortFlush` (default
|
|
979
|
-
* single-word
|
|
1723
|
+
* 2. Word count is at least `minWordsForShortFlush` (default 1 —
|
|
1724
|
+
* single-word replies like `"Yes."` flush immediately).
|
|
980
1725
|
* 3. The buffer contains exactly one terminator (the trailing one).
|
|
981
1726
|
* Multiple terminators mean we may be mid-stream of a longer merged
|
|
982
1727
|
* utterance like `"Hey! Hi! Hello! This is a sentence."` — let the
|
|
983
1728
|
* standard path keep merging.
|
|
984
1729
|
* 4. The char immediately before the terminator is NOT a digit (avoids
|
|
985
1730
|
* decimal mid-stream like `"f(x) = x * 2."` flushing before `54`).
|
|
986
|
-
* 5. The
|
|
987
|
-
*
|
|
1731
|
+
* 5. The trailing word is NOT a short ASCII all-caps acronym of 1-3 chars
|
|
1732
|
+
* (`"U."` / `"U.S."` / `"USA."`).
|
|
1733
|
+
* 6. The trailing word is NOT a known honorific from any of the
|
|
1734
|
+
* per-language `HONORIFICS_*` constants (`"Mr."`, `"Sr."`, `"Dr."`,
|
|
1735
|
+
* `"Hr."`, `"Mme."`, ...).
|
|
988
1736
|
*/
|
|
989
1737
|
private maybeShortFlush;
|
|
1738
|
+
/**
|
|
1739
|
+
* Try to flush the first clause of the response on a soft punctuation
|
|
1740
|
+
* boundary (comma / em-dash / en-dash) to minimise TTFA.
|
|
1741
|
+
*
|
|
1742
|
+
* Returns the flushed clause text (with terminator) or `null` if no safe
|
|
1743
|
+
* boundary is found. All of these guards must pass:
|
|
1744
|
+
*
|
|
1745
|
+
* 1. **Min length** — buffer ≥ `aggressiveFirstMinLen` (default 40).
|
|
1746
|
+
* 2. **Trailing terminator** — last non-whitespace char in `SOFT_TERMINATORS`.
|
|
1747
|
+
* 3. **Decimal/thousands guard** — refuse if comma is between two digits
|
|
1748
|
+
* or surrounded by digit-thousands grouping.
|
|
1749
|
+
* 4. **Currency guard** — refuse if a currency symbol appears in the
|
|
1750
|
+
* preceding 8 characters.
|
|
1751
|
+
* 5. **Balanced delimiter** — refuse if open parens/brackets/braces or
|
|
1752
|
+
* unmatched double-quotes still pending.
|
|
1753
|
+
* 6. **Ellipsis** — refuse if buffer ends with `...` or `…`.
|
|
1754
|
+
* 7. **Sub-token ambiguity** — only fire when at least one trailing char
|
|
1755
|
+
* after the terminator has arrived.
|
|
1756
|
+
*/
|
|
1757
|
+
private maybeAggressiveFirstFlush;
|
|
990
1758
|
/** Flush remaining buffer as final sentence(s). Call at end of stream. */
|
|
991
1759
|
flush(): string[];
|
|
992
1760
|
/** Discard buffered text. Call on interrupt. */
|
|
@@ -1019,21 +1787,83 @@ declare function filterEmoji(text: string): string;
|
|
|
1019
1787
|
*/
|
|
1020
1788
|
declare function filterForTTS(text: string): string;
|
|
1021
1789
|
|
|
1790
|
+
/**
|
|
1791
|
+
* Public error taxonomy for the Patter SDK.
|
|
1792
|
+
*
|
|
1793
|
+
* Every Patter exception carries a stable, machine-readable {@link ErrorCode}
|
|
1794
|
+
* on its `code` property. Downstream code can branch on the code without
|
|
1795
|
+
* relying on class name strings or message parsing.
|
|
1796
|
+
*
|
|
1797
|
+
* The class hierarchy is preserved for backward compatibility — existing
|
|
1798
|
+
* `instanceof PatterConnectionError` checks keep working — and the enum is
|
|
1799
|
+
* purely additive.
|
|
1800
|
+
*
|
|
1801
|
+
* Mirrored byte-for-byte by the Python `ErrorCode` StrEnum in
|
|
1802
|
+
* `libraries/python/getpatter/exceptions.py`.
|
|
1803
|
+
*/
|
|
1804
|
+
/**
|
|
1805
|
+
* Stable, machine-readable error codes attached to every Patter exception.
|
|
1806
|
+
*
|
|
1807
|
+
* Values are short, `UPPER_SNAKE_CASE` strings. Existing values must never
|
|
1808
|
+
* change — downstream callers branch on them. New codes are additive.
|
|
1809
|
+
*
|
|
1810
|
+
* This is shipped as a `const` object plus value-union type rather than a
|
|
1811
|
+
* TS `enum` so it's tree-shakeable and compatible with `verbatimModuleSyntax`.
|
|
1812
|
+
*/
|
|
1813
|
+
declare const ErrorCode: {
|
|
1814
|
+
/** Invalid constructor args, missing required env var, frozen-config violation. */
|
|
1815
|
+
readonly CONFIG: "CONFIG";
|
|
1816
|
+
/** WebSocket connect failure, HTTP 5xx from provider, network error. */
|
|
1817
|
+
readonly CONNECTION: "CONNECTION";
|
|
1818
|
+
/** Provider rejected our credentials (HTTP 401/403, invalid signature). */
|
|
1819
|
+
readonly AUTH: "AUTH";
|
|
1820
|
+
/** Provider response, voicemail post, or other awaited operation timed out. */
|
|
1821
|
+
readonly TIMEOUT: "TIMEOUT";
|
|
1822
|
+
/** Provider returned HTTP 429. */
|
|
1823
|
+
readonly RATE_LIMIT: "RATE_LIMIT";
|
|
1824
|
+
/** Twilio / Telnyx webhook signature verification failed. */
|
|
1825
|
+
readonly WEBHOOK_VERIFICATION: "WEBHOOK_VERIFICATION";
|
|
1826
|
+
/** Caller passed a malformed phone number, tool arg, etc. */
|
|
1827
|
+
readonly INPUT_VALIDATION: "INPUT_VALIDATION";
|
|
1828
|
+
/** Generic catch-all for unexpected upstream provider failures. */
|
|
1829
|
+
readonly PROVIDER_ERROR: "PROVIDER_ERROR";
|
|
1830
|
+
/** Phone number provisioning, webhook configuration, or carrier setup failed. */
|
|
1831
|
+
readonly PROVISION: "PROVISION";
|
|
1832
|
+
/** Assertion failed / unexpected internal state. Likely a Patter bug. */
|
|
1833
|
+
readonly INTERNAL: "INTERNAL";
|
|
1834
|
+
};
|
|
1835
|
+
type ErrorCode = (typeof ErrorCode)[keyof typeof ErrorCode];
|
|
1836
|
+
/** Base class for every error thrown by the Patter SDK. */
|
|
1022
1837
|
declare class PatterError extends Error {
|
|
1023
|
-
|
|
1838
|
+
/** Stable, machine-readable error code. Subclasses set the default. */
|
|
1839
|
+
readonly code: ErrorCode;
|
|
1840
|
+
constructor(message: string, options?: {
|
|
1841
|
+
code?: ErrorCode;
|
|
1842
|
+
});
|
|
1024
1843
|
}
|
|
1844
|
+
/** Network / WebSocket / HTTP-level connectivity failure when talking to a provider. */
|
|
1025
1845
|
declare class PatterConnectionError extends PatterError {
|
|
1026
|
-
constructor(message: string
|
|
1846
|
+
constructor(message: string, options?: {
|
|
1847
|
+
code?: ErrorCode;
|
|
1848
|
+
});
|
|
1027
1849
|
}
|
|
1850
|
+
/** Provider rejected our credentials (HTTP 401/403, invalid webhook signature, etc.). */
|
|
1028
1851
|
declare class AuthenticationError extends PatterError {
|
|
1029
|
-
constructor(message: string
|
|
1852
|
+
constructor(message: string, options?: {
|
|
1853
|
+
code?: ErrorCode;
|
|
1854
|
+
});
|
|
1030
1855
|
}
|
|
1856
|
+
/** Phone-number provisioning or carrier setup failed. */
|
|
1031
1857
|
declare class ProvisionError extends PatterError {
|
|
1032
|
-
constructor(message: string
|
|
1858
|
+
constructor(message: string, options?: {
|
|
1859
|
+
code?: ErrorCode;
|
|
1860
|
+
});
|
|
1033
1861
|
}
|
|
1034
1862
|
/** Thrown when a provider returns HTTP 429 on connect/upgrade. */
|
|
1035
1863
|
declare class RateLimitError extends PatterConnectionError {
|
|
1036
|
-
constructor(message: string
|
|
1864
|
+
constructor(message: string, options?: {
|
|
1865
|
+
code?: ErrorCode;
|
|
1866
|
+
});
|
|
1037
1867
|
}
|
|
1038
1868
|
|
|
1039
1869
|
/**
|
|
@@ -1080,14 +1910,8 @@ declare function soniox(opts: {
|
|
|
1080
1910
|
apiKey: string;
|
|
1081
1911
|
language?: string;
|
|
1082
1912
|
}): STTConfig;
|
|
1083
|
-
/**
|
|
1084
|
-
|
|
1085
|
-
*
|
|
1086
|
-
* NOTE: the Speechmatics adapter is currently Python-only. Calling this helper
|
|
1087
|
-
* throws a clear error so callers can switch providers or use the Python SDK
|
|
1088
|
-
* until the TS adapter ships.
|
|
1089
|
-
*/
|
|
1090
|
-
declare function speechmatics(_opts: {
|
|
1913
|
+
/** Speechmatics real-time STT config helper. */
|
|
1914
|
+
declare function speechmatics(opts: {
|
|
1091
1915
|
apiKey: string;
|
|
1092
1916
|
language?: string;
|
|
1093
1917
|
}): STTConfig;
|
|
@@ -1133,8 +1957,31 @@ declare function geminiLive(opts: {
|
|
|
1133
1957
|
voice?: string;
|
|
1134
1958
|
}): RealtimeConfig;
|
|
1135
1959
|
|
|
1960
|
+
/**
|
|
1961
|
+
* Billing units used by ``DEFAULT_PRICING`` entries. String values keep the
|
|
1962
|
+
* pricing table JSON-serialisable and backwards-compatible with consumers
|
|
1963
|
+
* that still compare against the raw strings.
|
|
1964
|
+
*/
|
|
1965
|
+
declare const PricingUnit: {
|
|
1966
|
+
readonly MINUTE: "minute";
|
|
1967
|
+
readonly THOUSAND_CHARS: "1k_chars";
|
|
1968
|
+
readonly TOKEN: "token";
|
|
1969
|
+
};
|
|
1970
|
+
/** String value for one of the entries in `PricingUnit`. */
|
|
1971
|
+
type PricingUnitValue = (typeof PricingUnit)[keyof typeof PricingUnit];
|
|
1972
|
+
/** Per-model rate overrides — same shape as `ProviderPricing` minus the unit. */
|
|
1973
|
+
type ModelPricing = Omit<ProviderPricing, 'unit' | 'models'> & {
|
|
1974
|
+
unit?: PricingUnitValue | string;
|
|
1975
|
+
};
|
|
1976
|
+
/** Single provider's pricing entry inside `DEFAULT_PRICING` or a user override map. */
|
|
1136
1977
|
interface ProviderPricing {
|
|
1137
|
-
|
|
1978
|
+
/**
|
|
1979
|
+
* Billing unit. The library ships with values from :data:`PricingUnit`,
|
|
1980
|
+
* but the field stays ``string`` so user overrides loaded from JSON /
|
|
1981
|
+
* env config (which are unconstrained at the type system) keep flowing
|
|
1982
|
+
* through ``mergePricing`` without type assertions.
|
|
1983
|
+
*/
|
|
1984
|
+
unit: PricingUnitValue | string;
|
|
1138
1985
|
price?: number;
|
|
1139
1986
|
audio_input_per_token?: number;
|
|
1140
1987
|
audio_output_per_token?: number;
|
|
@@ -1142,17 +1989,51 @@ interface ProviderPricing {
|
|
|
1142
1989
|
text_output_per_token?: number;
|
|
1143
1990
|
cached_audio_input_per_token?: number;
|
|
1144
1991
|
cached_text_input_per_token?: number;
|
|
1992
|
+
/**
|
|
1993
|
+
* Per-model rate overrides keyed by model identifier. When the cost-calc
|
|
1994
|
+
* function receives a ``model`` arg, the matching entry overlays the
|
|
1995
|
+
* provider defaults; missing models fall back to the surrounding rates
|
|
1996
|
+
* (legacy behaviour). Longest-prefix match handles versioned IDs like
|
|
1997
|
+
* ``gpt-realtime-2-2026-05`` against ``gpt-realtime-2``. See
|
|
1998
|
+
* :func:`resolveProviderRates`.
|
|
1999
|
+
*/
|
|
2000
|
+
models?: Record<string, ModelPricing>;
|
|
1145
2001
|
}
|
|
2002
|
+
/**
|
|
2003
|
+
* Built-in pricing table — overridable via `Patter({ pricing: {...} })`.
|
|
2004
|
+
*
|
|
2005
|
+
* Each provider entry carries provider-level defaults plus an optional
|
|
2006
|
+
* `models` map for per-model overrides. When the cost-calc function gets a
|
|
2007
|
+
* model arg it auto-resolves via {@link resolveProviderRates} (longest-prefix
|
|
2008
|
+
* fallback for versioned model IDs). Empty/unknown model → provider defaults.
|
|
2009
|
+
*/
|
|
1146
2010
|
declare const DEFAULT_PRICING: Record<string, ProviderPricing>;
|
|
1147
2011
|
/**
|
|
1148
2012
|
* Merge user overrides into a copy of DEFAULT_PRICING.
|
|
1149
|
-
*
|
|
2013
|
+
*
|
|
2014
|
+
* Performs a per-provider shallow merge with one exception: the nested
|
|
2015
|
+
* ``models`` dict is itself merged shallowly (per-model entries replace
|
|
2016
|
+
* the default entry but unmentioned models keep their built-in rates).
|
|
2017
|
+
* A user override of ``{ deepgram: { models: { 'nova-2': { price: 0.01 } } } }``
|
|
2018
|
+
* keeps every other Deepgram model rate intact.
|
|
1150
2019
|
*/
|
|
1151
2020
|
declare function mergePricing(overrides?: Record<string, Partial<ProviderPricing>> | null): Record<string, ProviderPricing>;
|
|
1152
|
-
/**
|
|
1153
|
-
|
|
1154
|
-
|
|
1155
|
-
|
|
2021
|
+
/**
|
|
2022
|
+
* Calculate STT cost from audio duration.
|
|
2023
|
+
*
|
|
2024
|
+
* When ``model`` is supplied and the provider entry has a matching
|
|
2025
|
+
* ``models`` override, the per-model rate is used; otherwise falls back
|
|
2026
|
+
* to the provider-level rate (legacy behaviour, model omitted).
|
|
2027
|
+
*/
|
|
2028
|
+
declare function calculateSttCost(provider: string, audioSeconds: number, pricing: Record<string, ProviderPricing>, model?: string | null): number;
|
|
2029
|
+
/**
|
|
2030
|
+
* Calculate TTS cost from character count.
|
|
2031
|
+
*
|
|
2032
|
+
* When ``model`` is supplied and the provider entry has a matching
|
|
2033
|
+
* ``models`` override, the per-model rate is used; otherwise falls back
|
|
2034
|
+
* to the provider-level rate (legacy behaviour, model omitted).
|
|
2035
|
+
*/
|
|
2036
|
+
declare function calculateTtsCost(provider: string, characterCount: number, pricing: Record<string, ProviderPricing>, model?: string | null): number;
|
|
1156
2037
|
/**
|
|
1157
2038
|
* Calculate OpenAI Realtime cost from token usage.
|
|
1158
2039
|
*
|
|
@@ -1176,7 +2057,7 @@ declare function calculateRealtimeCost(usage: {
|
|
|
1176
2057
|
audio_tokens?: number;
|
|
1177
2058
|
text_tokens?: number;
|
|
1178
2059
|
};
|
|
1179
|
-
}, pricing: Record<string, ProviderPricing
|
|
2060
|
+
}, pricing: Record<string, ProviderPricing>, model?: string | null): number;
|
|
1180
2061
|
/**
|
|
1181
2062
|
* Calculate telephony cost from call duration.
|
|
1182
2063
|
*
|
|
@@ -1192,6 +2073,7 @@ declare function calculateTelephonyCost(provider: string, durationSeconds: numbe
|
|
|
1192
2073
|
* Port of the Python `CallMetricsAccumulator` from `sdk/patter/services/metrics.py`.
|
|
1193
2074
|
*/
|
|
1194
2075
|
|
|
2076
|
+
/** Per-turn latency breakdown across the STT/LLM/TTS pipeline. */
|
|
1195
2077
|
interface LatencyBreakdown {
|
|
1196
2078
|
stt_ms: number;
|
|
1197
2079
|
/**
|
|
@@ -1228,7 +2110,21 @@ interface LatencyBreakdown {
|
|
|
1228
2110
|
* TTS audio byte sent. Optional — undefined when TTS never completed.
|
|
1229
2111
|
*/
|
|
1230
2112
|
tts_total_ms?: number;
|
|
2113
|
+
/**
|
|
2114
|
+
* **User-perceived agent response latency**: time from end-of-user-speech
|
|
2115
|
+
* (VAD stop or STT ``speech_final``) to the first audio byte the agent
|
|
2116
|
+
* sent back. Computed as ``endpoint_ms + llm_ttft_ms + tts_ms`` when all
|
|
2117
|
+
* three signals are available — falls back to undefined otherwise.
|
|
2118
|
+
*
|
|
2119
|
+
* This is the metric you should watch for SLO / p95 dashboards. Unlike
|
|
2120
|
+
* ``total_ms`` (which spans the user's entire utterance and therefore
|
|
2121
|
+
* grows with how long the user spoke), ``agent_response_ms`` isolates
|
|
2122
|
+
* the system-controlled latency: silence detection + LLM TTFT + TTS
|
|
2123
|
+
* first byte.
|
|
2124
|
+
*/
|
|
2125
|
+
agent_response_ms?: number;
|
|
1231
2126
|
}
|
|
2127
|
+
/** Per-call cost breakdown by component (STT/TTS/LLM/telephony) plus the total. */
|
|
1232
2128
|
interface CostBreakdown {
|
|
1233
2129
|
stt: number;
|
|
1234
2130
|
tts: number;
|
|
@@ -1242,6 +2138,7 @@ interface CostBreakdown {
|
|
|
1242
2138
|
*/
|
|
1243
2139
|
llm_cached_savings?: number;
|
|
1244
2140
|
}
|
|
2141
|
+
/** Metrics captured for a single conversation turn. */
|
|
1245
2142
|
interface TurnMetrics {
|
|
1246
2143
|
turn_index: number;
|
|
1247
2144
|
user_text: string;
|
|
@@ -1251,6 +2148,7 @@ interface TurnMetrics {
|
|
|
1251
2148
|
tts_characters: number;
|
|
1252
2149
|
timestamp: number;
|
|
1253
2150
|
}
|
|
2151
|
+
/** Aggregated metrics for an entire call (turns, costs, latency percentiles). */
|
|
1254
2152
|
interface CallMetrics {
|
|
1255
2153
|
call_id: string;
|
|
1256
2154
|
duration_seconds: number;
|
|
@@ -1267,6 +2165,7 @@ interface CallMetrics {
|
|
|
1267
2165
|
llm_provider: string;
|
|
1268
2166
|
telephony_provider: string;
|
|
1269
2167
|
}
|
|
2168
|
+
/** Programmatic control surface for a live call (transfer, hangup, DTMF). */
|
|
1270
2169
|
interface CallControl {
|
|
1271
2170
|
/** Transfer the call to a different number or SIP URI. */
|
|
1272
2171
|
transfer(number: string): Promise<void>;
|
|
@@ -1288,6 +2187,7 @@ interface CallControl {
|
|
|
1288
2187
|
/** Callee number. */
|
|
1289
2188
|
readonly callee: string;
|
|
1290
2189
|
}
|
|
2190
|
+
/** Mutable per-call accumulator that stamps timestamps and emits final `CallMetrics`. */
|
|
1291
2191
|
declare class CallMetricsAccumulator {
|
|
1292
2192
|
callId: string;
|
|
1293
2193
|
readonly providerMode: string;
|
|
@@ -1295,6 +2195,14 @@ declare class CallMetricsAccumulator {
|
|
|
1295
2195
|
readonly sttProvider: string;
|
|
1296
2196
|
readonly ttsProvider: string;
|
|
1297
2197
|
readonly llmProvider: string;
|
|
2198
|
+
/**
|
|
2199
|
+
* Model identifiers for per-model rate resolution (see pricing.ts). Empty
|
|
2200
|
+
* string means "not known" → cost calc falls back to provider defaults,
|
|
2201
|
+
* matching pre-2026.3 behaviour.
|
|
2202
|
+
*/
|
|
2203
|
+
readonly sttModel: string;
|
|
2204
|
+
readonly ttsModel: string;
|
|
2205
|
+
readonly realtimeModel: string;
|
|
1298
2206
|
private readonly _pricing;
|
|
1299
2207
|
private readonly _callStart;
|
|
1300
2208
|
private readonly _turns;
|
|
@@ -1349,6 +2257,12 @@ declare class CallMetricsAccumulator {
|
|
|
1349
2257
|
sttProvider?: string;
|
|
1350
2258
|
ttsProvider?: string;
|
|
1351
2259
|
llmProvider?: string;
|
|
2260
|
+
/** Model identifier for the STT adapter (e.g. ``"nova-3-multilingual"``). */
|
|
2261
|
+
sttModel?: string;
|
|
2262
|
+
/** Model identifier for the TTS adapter (e.g. ``"eleven_multilingual_v2"``). */
|
|
2263
|
+
ttsModel?: string;
|
|
2264
|
+
/** Model identifier for the realtime adapter (e.g. ``"gpt-realtime-2"``). */
|
|
2265
|
+
realtimeModel?: string;
|
|
1352
2266
|
pricing?: Record<string, Partial<ProviderPricing>> | null;
|
|
1353
2267
|
eventBus?: EventBus;
|
|
1354
2268
|
/** When true, only the first TTFB emission per call is forwarded to the event bus. */
|
|
@@ -1363,6 +2277,7 @@ declare class CallMetricsAccumulator {
|
|
|
1363
2277
|
configureSttFormat(sampleRate?: number, bytesPerSample?: number): void;
|
|
1364
2278
|
/** Whether a turn is currently being measured (startTurn called, not yet completed). */
|
|
1365
2279
|
get turnActive(): boolean;
|
|
2280
|
+
/** Begin a new turn — stamps the turn start timestamp and resets per-turn state. */
|
|
1366
2281
|
startTurn(): void;
|
|
1367
2282
|
/**
|
|
1368
2283
|
* Start a new turn only if no turn is currently open.
|
|
@@ -1370,6 +2285,7 @@ declare class CallMetricsAccumulator {
|
|
|
1370
2285
|
* on the first audio byte rather than just before recordSttComplete().
|
|
1371
2286
|
*/
|
|
1372
2287
|
startTurnIfIdle(): void;
|
|
2288
|
+
/** Stamp end-of-STT, capture the user's transcript, and accrue billed STT seconds. */
|
|
1373
2289
|
recordSttComplete(text: string, audioSeconds?: number): void;
|
|
1374
2290
|
/** Record the timestamp of the first LLM token (TTFT). No-op after first call. */
|
|
1375
2291
|
recordLlmFirstToken(): void;
|
|
@@ -1380,8 +2296,11 @@ declare class CallMetricsAccumulator {
|
|
|
1380
2296
|
* No-op after first call.
|
|
1381
2297
|
*/
|
|
1382
2298
|
recordLlmFirstSentenceComplete(): void;
|
|
2299
|
+
/** Stamp end-of-LLM (last token received). */
|
|
1383
2300
|
recordLlmComplete(): void;
|
|
2301
|
+
/** Stamp first TTS audio byte sent on the wire (used to compute TTS TTFB). */
|
|
1384
2302
|
recordTtsFirstByte(): void;
|
|
2303
|
+
/** Record final TTS text length and stamp the last-byte timestamp. */
|
|
1385
2304
|
recordTtsComplete(text: string): void;
|
|
1386
2305
|
/**
|
|
1387
2306
|
* Capture the timestamp when the last TTS audio byte was sent on the wire.
|
|
@@ -1401,7 +2320,9 @@ declare class CallMetricsAccumulator {
|
|
|
1401
2320
|
* to compute ``bargein_ms``.
|
|
1402
2321
|
*/
|
|
1403
2322
|
recordTtsStopped(ts?: number): void;
|
|
2323
|
+
/** Close the current turn cleanly and append a `TurnMetrics` record. */
|
|
1404
2324
|
recordTurnComplete(agentText: string): TurnMetrics;
|
|
2325
|
+
/** Close the current turn as interrupted (barge-in) and return the recorded metrics. */
|
|
1405
2326
|
recordTurnInterrupted(): TurnMetrics | null;
|
|
1406
2327
|
/**
|
|
1407
2328
|
* Record the moment VAD emitted speech_end for the current utterance.
|
|
@@ -1435,6 +2356,7 @@ declare class CallMetricsAccumulator {
|
|
|
1435
2356
|
* ``transcriptionDelay`` = turnCommitted − vadStopped (ms)
|
|
1436
2357
|
* ``onUserTurnCompletedDelay`` = caller-supplied delta (ms) or 0
|
|
1437
2358
|
*/
|
|
2359
|
+
/** Emit `EOUMetrics` once VAD-stop, STT-final, and turn-committed timestamps are all known. */
|
|
1438
2360
|
emitEouMetrics(): void;
|
|
1439
2361
|
/**
|
|
1440
2362
|
* Record that a caller utterance started overlapping with agent speech.
|
|
@@ -1451,7 +2373,16 @@ declare class CallMetricsAccumulator {
|
|
|
1451
2373
|
* @param ts Optional override timestamp in hrTimeMs units.
|
|
1452
2374
|
*/
|
|
1453
2375
|
recordOverlapEnd(wasInterruption: boolean, ts?: number): void;
|
|
2376
|
+
/** Accumulate inbound STT audio bytes for cost calculation when seconds are unknown. */
|
|
1454
2377
|
addSttAudioBytes(byteCount: number): void;
|
|
2378
|
+
/**
|
|
2379
|
+
* Record an OpenAI Realtime usage payload and roll up its cost + cached-savings.
|
|
2380
|
+
*
|
|
2381
|
+
* `model` allows the cost calc to pick the per-model rate (e.g.
|
|
2382
|
+
* `gpt-realtime-2`). Defaults to whatever was supplied at construction
|
|
2383
|
+
* time (`this.realtimeModel`); pass an explicit value to override per-call
|
|
2384
|
+
* (the `response.done` payload carries the model used).
|
|
2385
|
+
*/
|
|
1455
2386
|
recordRealtimeUsage(usage: {
|
|
1456
2387
|
input_token_details?: {
|
|
1457
2388
|
audio_tokens?: number;
|
|
@@ -1465,8 +2396,10 @@ declare class CallMetricsAccumulator {
|
|
|
1465
2396
|
audio_tokens?: number;
|
|
1466
2397
|
text_tokens?: number;
|
|
1467
2398
|
};
|
|
1468
|
-
}): void;
|
|
2399
|
+
}, model?: string | null): void;
|
|
2400
|
+
/** Override the carrier-billed telephony cost (e.g. exact value reported via Twilio API). */
|
|
1469
2401
|
setActualTelephonyCost(cost: number): void;
|
|
2402
|
+
/** Override the provider-billed STT cost when an exact figure is available. */
|
|
1470
2403
|
setActualSttCost(cost: number): void;
|
|
1471
2404
|
/**
|
|
1472
2405
|
* Accumulate LLM token cost for pipeline mode (non-Realtime).
|
|
@@ -1482,7 +2415,9 @@ declare class CallMetricsAccumulator {
|
|
|
1482
2415
|
* @param cacheWriteTokens Cache write tokens (billed at cache_write rate if present)
|
|
1483
2416
|
*/
|
|
1484
2417
|
recordLlmUsage(provider: string, model: string, inputTokens: number, outputTokens: number, cacheReadTokens?: number, cacheWriteTokens?: number): void;
|
|
2418
|
+
/** Finalize the call: flush any in-flight turn, compute aggregates, and return `CallMetrics`. */
|
|
1485
2419
|
endCall(): CallMetrics;
|
|
2420
|
+
/** Return the cost breakdown for the call so far without ending it. */
|
|
1486
2421
|
getCostSoFar(): CostBreakdown;
|
|
1487
2422
|
private _resetTurnState;
|
|
1488
2423
|
private _computeTurnLatency;
|
|
@@ -1499,15 +2434,31 @@ declare class CallMetricsAccumulator {
|
|
|
1499
2434
|
private _computePercentileLatency;
|
|
1500
2435
|
}
|
|
1501
2436
|
|
|
2437
|
+
/**
|
|
2438
|
+
* OpenAI Realtime WebSocket adapter for Patter's realtime mode.
|
|
2439
|
+
*
|
|
2440
|
+
* Wraps `wss://api.openai.com/v1/realtime` and exposes the unified
|
|
2441
|
+
* Patter realtime contract (`connect / sendAudio / onEvent / close`) on
|
|
2442
|
+
* {@link OpenAIRealtimeAdapter}. Audio negotiation defaults to
|
|
2443
|
+
* `g711_ulaw` so traffic flows through Twilio/Telnyx without transcoding.
|
|
2444
|
+
*/
|
|
1502
2445
|
/**
|
|
1503
2446
|
* Supported OpenAI Realtime wire audio formats. See
|
|
1504
2447
|
* https://platform.openai.com/docs/guides/realtime for the full list.
|
|
1505
|
-
*
|
|
1506
|
-
*
|
|
1507
|
-
*
|
|
2448
|
+
* `G711_ULAW` matches what Twilio/Telnyx emit natively on the phone leg, so
|
|
2449
|
+
* no transcoding is needed. `PCM16` is used in the terminal test-mode path
|
|
2450
|
+
* and when the telephony provider negotiates L16/16000.
|
|
1508
2451
|
*/
|
|
1509
|
-
|
|
2452
|
+
declare const OpenAIRealtimeAudioFormat: {
|
|
2453
|
+
readonly G711_ULAW: "g711_ulaw";
|
|
2454
|
+
readonly G711_ALAW: "g711_alaw";
|
|
2455
|
+
readonly PCM16: "pcm16";
|
|
2456
|
+
};
|
|
2457
|
+
/** Union of {@link OpenAIRealtimeAudioFormat} string values. */
|
|
2458
|
+
type OpenAIRealtimeAudioFormat = (typeof OpenAIRealtimeAudioFormat)[keyof typeof OpenAIRealtimeAudioFormat];
|
|
2459
|
+
/** Callback signature for events emitted by {@link OpenAIRealtimeAdapter}. */
|
|
1510
2460
|
type RealtimeEventCallback = (type: string, data: unknown) => void | Promise<void>;
|
|
2461
|
+
/** Constructor options for {@link OpenAIRealtimeAdapter}. */
|
|
1511
2462
|
interface OpenAIRealtimeOptions {
|
|
1512
2463
|
temperature?: number;
|
|
1513
2464
|
maxResponseOutputTokens?: number | 'inf';
|
|
@@ -1522,7 +2473,15 @@ interface OpenAIRealtimeOptions {
|
|
|
1522
2473
|
* Increase for dictation-style flows where the user pauses mid-sentence.
|
|
1523
2474
|
*/
|
|
1524
2475
|
silenceDurationMs?: number;
|
|
2476
|
+
/**
|
|
2477
|
+
* Reasoning-effort tier for `gpt-realtime-2`. When omitted the field is
|
|
2478
|
+
* not sent and the server default applies. OpenAI recommends `"low"` for
|
|
2479
|
+
* production voice flows — higher tiers add measurable per-turn latency.
|
|
2480
|
+
* Has no effect on models that don't support the `reasoning` field.
|
|
2481
|
+
*/
|
|
2482
|
+
reasoningEffort?: 'minimal' | 'low' | 'medium' | 'high';
|
|
1525
2483
|
}
|
|
2484
|
+
/** Realtime WebSocket adapter for OpenAI's `gpt-realtime` family. */
|
|
1526
2485
|
declare class OpenAIRealtimeAdapter {
|
|
1527
2486
|
private readonly apiKey;
|
|
1528
2487
|
private readonly model;
|
|
@@ -1536,13 +2495,17 @@ declare class OpenAIRealtimeAdapter {
|
|
|
1536
2495
|
private heartbeat;
|
|
1537
2496
|
private currentResponseItemId;
|
|
1538
2497
|
private currentResponseAudioMs;
|
|
2498
|
+
private currentResponseFirstAudioAt;
|
|
1539
2499
|
private readonly options;
|
|
1540
2500
|
constructor(apiKey: string, model?: string, voice?: string, instructions?: string, tools?: Array<{
|
|
1541
2501
|
name: string;
|
|
1542
2502
|
description: string;
|
|
1543
2503
|
parameters: Record<string, unknown>;
|
|
2504
|
+
strict?: boolean;
|
|
1544
2505
|
}> | undefined, audioFormat?: OpenAIRealtimeAudioFormat, options?: OpenAIRealtimeOptions);
|
|
2506
|
+
/** Open the Realtime WebSocket and apply the session configuration. */
|
|
1545
2507
|
connect(): Promise<void>;
|
|
2508
|
+
/** Append a base64-encoded audio chunk to the realtime input buffer. */
|
|
1546
2509
|
sendAudio(mulawAudio: Buffer): void;
|
|
1547
2510
|
/**
|
|
1548
2511
|
* Register a listener for parsed realtime events.
|
|
@@ -1553,14 +2516,54 @@ declare class OpenAIRealtimeAdapter {
|
|
|
1553
2516
|
* a Set of callbacks. Use {@link offEvent} to remove one.
|
|
1554
2517
|
*/
|
|
1555
2518
|
onEvent(callback: RealtimeEventCallback): void;
|
|
2519
|
+
/** Remove a previously registered {@link onEvent} callback. */
|
|
1556
2520
|
offEvent(callback: RealtimeEventCallback): void;
|
|
1557
2521
|
private ensureMessageListener;
|
|
2522
|
+
/** Truncate the in-flight assistant turn and cancel the active response.
|
|
2523
|
+
*
|
|
2524
|
+
* ``audio_end_ms`` MUST reflect what the caller actually heard, not what
|
|
2525
|
+
* the server generated. OpenAI streams audio at 5-10x real-time, so the
|
|
2526
|
+
* byte-derived counter overstates playback whenever the consumer cleared
|
|
2527
|
+
* its playout buffer (e.g. ``send_clear``) before the audio reached the
|
|
2528
|
+
* speaker. We bound the truncate point by wall-clock time since the first
|
|
2529
|
+
* chunk of this response — that's the physical maximum a 1x real-time
|
|
2530
|
+
* playback could have produced. Without this cap, OpenAI keeps the full
|
|
2531
|
+
* generated assistant text on the transcript, and the model replays /
|
|
2532
|
+
* resumes from it on the next turn — manifesting as re-greetings and
|
|
2533
|
+
* mid-sentence fragments after a barge-in storm.
|
|
2534
|
+
*/
|
|
1558
2535
|
cancelResponse(): void;
|
|
2536
|
+
/** Inject a user text turn and request a new response. */
|
|
1559
2537
|
sendText(text: string): Promise<void>;
|
|
2538
|
+
/**
|
|
2539
|
+
* Make the AI speak ``text`` as its opening line.
|
|
2540
|
+
*
|
|
2541
|
+
* Triggers ``response.create`` with explicit ``instructions`` that force
|
|
2542
|
+
* the model to render ``text`` verbatim as its first audio utterance.
|
|
2543
|
+
* This is the correct semantics for ``Agent.firstMessage`` per its
|
|
2544
|
+
* docstring ("What the AI says when the callee answers").
|
|
2545
|
+
*
|
|
2546
|
+
* Without this, ``sendText(firstMessage)`` would inject ``text`` as
|
|
2547
|
+
* ``role: user`` and the AI would *reply* to its own greeting, producing
|
|
2548
|
+
* role-confused openings (e.g. a receptionist agent responding "I'd like
|
|
2549
|
+
* to schedule a haircut" because it took its own first_message as a
|
|
2550
|
+
* customer cue).
|
|
2551
|
+
*/
|
|
2552
|
+
sendFirstMessage(text: string): Promise<void>;
|
|
2553
|
+
/** Submit a tool/function-call result and request the next response. */
|
|
1560
2554
|
sendFunctionResult(callId: string, result: string): Promise<void>;
|
|
2555
|
+
/** Stop the heartbeat, drop listeners, and close the Realtime WebSocket. */
|
|
1561
2556
|
close(): void;
|
|
1562
2557
|
}
|
|
1563
2558
|
|
|
2559
|
+
/**
|
|
2560
|
+
* ElevenLabs Conversational AI (ConvAI) WebSocket adapter for Patter.
|
|
2561
|
+
*
|
|
2562
|
+
* Wraps the `wss://api.elevenlabs.io/v1/convai/conversation` endpoint and
|
|
2563
|
+
* normalises agent audio + transcript + control events into a single
|
|
2564
|
+
* `onEvent(type, data)` callback. See {@link ElevenLabsConvAIAdapter}.
|
|
2565
|
+
*/
|
|
2566
|
+
/** Constructor options for {@link ElevenLabsConvAIAdapter}. */
|
|
1564
2567
|
interface ElevenLabsConvAIOptions {
|
|
1565
2568
|
apiKey: string;
|
|
1566
2569
|
agentId?: string;
|
|
@@ -1573,6 +2576,7 @@ interface ElevenLabsConvAIOptions {
|
|
|
1573
2576
|
useSignedUrl?: boolean;
|
|
1574
2577
|
}
|
|
1575
2578
|
type EventCallback = (type: string, data: unknown) => void | Promise<void>;
|
|
2579
|
+
/** WebSocket adapter for ElevenLabs ConvAI managed-agent conversations. */
|
|
1576
2580
|
declare class ElevenLabsConvAIAdapter {
|
|
1577
2581
|
private ws;
|
|
1578
2582
|
private eventCallback;
|
|
@@ -1613,6 +2617,7 @@ declare class ElevenLabsConvAIAdapter {
|
|
|
1613
2617
|
*/
|
|
1614
2618
|
static forTelnyx(apiKey: string, agentId: string, options?: Omit<ElevenLabsConvAIOptions, 'apiKey' | 'agentId' | 'outputAudioFormat' | 'inputAudioFormat'>): ElevenLabsConvAIAdapter;
|
|
1615
2619
|
private fetchSignedUrl;
|
|
2620
|
+
/** Open the ConvAI WebSocket and send the conversation init payload. */
|
|
1616
2621
|
connect(): Promise<void>;
|
|
1617
2622
|
private safeInvoke;
|
|
1618
2623
|
private respondToPing;
|
|
@@ -1620,8 +2625,11 @@ declare class ElevenLabsConvAIAdapter {
|
|
|
1620
2625
|
private finalizeAgentTurn;
|
|
1621
2626
|
private scheduleSilenceDone;
|
|
1622
2627
|
private handleMessage;
|
|
2628
|
+
/** Send a caller-side audio chunk to ConvAI as a base64 `user_audio_chunk`. */
|
|
1623
2629
|
sendAudio(audioBytes: Buffer): void;
|
|
2630
|
+
/** Register the event callback that receives ConvAI server messages. */
|
|
1624
2631
|
onEvent(callback: EventCallback): void;
|
|
2632
|
+
/** Close the ConvAI WebSocket and release the event callback. */
|
|
1625
2633
|
close(): Promise<void>;
|
|
1626
2634
|
}
|
|
1627
2635
|
|
|
@@ -1632,6 +2640,7 @@ declare class ElevenLabsConvAIAdapter {
|
|
|
1632
2640
|
* - HTTP webhook: onMessage="https://api.customer.com/patter/message"
|
|
1633
2641
|
* - WebSocket: onMessage="ws://localhost:9000/stream"
|
|
1634
2642
|
*/
|
|
2643
|
+
/** Dispatches per-turn messages to a remote HTTP webhook or WebSocket endpoint. */
|
|
1635
2644
|
declare class RemoteMessageHandler {
|
|
1636
2645
|
private readonly webhookSecret;
|
|
1637
2646
|
/**
|
|
@@ -1675,6 +2684,12 @@ declare function isRemoteUrl(onMessage: unknown): onMessage is string;
|
|
|
1675
2684
|
/** Check if a URL is a WebSocket URL. */
|
|
1676
2685
|
declare function isWebSocketUrl(url: string): boolean;
|
|
1677
2686
|
|
|
2687
|
+
/**
|
|
2688
|
+
* Embedded HTTP/WebSocket server — wires Express webhooks for the configured
|
|
2689
|
+
* carrier (Twilio or Telnyx) into the per-call `StreamHandler` and dashboard.
|
|
2690
|
+
*/
|
|
2691
|
+
|
|
2692
|
+
/** Resolved configuration consumed by `EmbeddedServer` (carrier credentials, webhook URL, etc.). */
|
|
1678
2693
|
interface LocalConfig {
|
|
1679
2694
|
twilioSid?: string;
|
|
1680
2695
|
twilioToken?: string;
|
|
@@ -1699,6 +2714,14 @@ interface LocalConfig {
|
|
|
1699
2714
|
* Set to false only for local development against mock providers.
|
|
1700
2715
|
*/
|
|
1701
2716
|
requireSignature?: boolean;
|
|
2717
|
+
/**
|
|
2718
|
+
* Resolved on-disk persistence root for the dashboard's call history,
|
|
2719
|
+
* or ``null`` to disable. Computed by ``client.ts`` from the public
|
|
2720
|
+
* ``LocalOptions.persist`` option (with ``PATTER_LOG_DIR`` env-var
|
|
2721
|
+
* fallback). When ``null``, `CallLogger` is a no-op and the dashboard
|
|
2722
|
+
* is in-memory-only — restarts wipe history.
|
|
2723
|
+
*/
|
|
2724
|
+
persistRoot?: string | null;
|
|
1702
2725
|
}
|
|
1703
2726
|
|
|
1704
2727
|
/**
|
|
@@ -1709,6 +2732,7 @@ interface LocalConfig {
|
|
|
1709
2732
|
* - ?token=<token> query parameter
|
|
1710
2733
|
*/
|
|
1711
2734
|
|
|
2735
|
+
/** Build an Express middleware that gates the dashboard behind a static bearer token. */
|
|
1712
2736
|
declare function makeAuthMiddleware(token?: string): (req: Request, res: Response, next: NextFunction) => void;
|
|
1713
2737
|
|
|
1714
2738
|
/**
|
|
@@ -1747,7 +2771,9 @@ declare function callsToJson(calls: CallRecord[]): string;
|
|
|
1747
2771
|
* GET /api/v1/analytics/costs - B2B cost breakdown
|
|
1748
2772
|
*/
|
|
1749
2773
|
|
|
2774
|
+
/** Mount the dashboard UI + read-only `/api/dashboard/*` routes onto an Express app. */
|
|
1750
2775
|
declare function mountDashboard(app: Express, store: MetricsStore, token?: string): void;
|
|
2776
|
+
/** Mount the B2B-style `/api/v1/*` JSON routes onto an Express app. */
|
|
1751
2777
|
declare function mountApi(app: Express, store: MetricsStore, token?: string): void;
|
|
1752
2778
|
|
|
1753
2779
|
/**
|
|
@@ -1758,11 +2784,19 @@ declare function mountApi(app: Express, store: MetricsStore, token?: string): vo
|
|
|
1758
2784
|
* nothing is written to disk.
|
|
1759
2785
|
*
|
|
1760
2786
|
* TODO(parity): Python's `notify_dashboard` is now an async fire-and-forget
|
|
1761
|
-
* coroutine (see
|
|
2787
|
+
* coroutine (see libraries/python/getpatter/dashboard/persistence.py). This TS version
|
|
1762
2788
|
* uses `http.request` which is already non-blocking, but for parity consider
|
|
1763
2789
|
* exposing this as `async function notifyDashboard(...): Promise<void>` so
|
|
1764
2790
|
* call sites can `await` or `void` it explicitly, matching the Python API.
|
|
1765
2791
|
*/
|
|
2792
|
+
/**
|
|
2793
|
+
* Fire-and-forget POST a completed call payload into a locally-running dashboard, if any.
|
|
2794
|
+
*
|
|
2795
|
+
* Skip entirely when ``PATTER_DASHBOARD_NOTIFY`` is set to ``0``/``false``
|
|
2796
|
+
* (case-insensitive). This avoids 404 spam in the receiver's access log
|
|
2797
|
+
* when callers embed Patter alongside their own HTTP server on port
|
|
2798
|
+
* 8000 (e.g. agent-to-agent test runners).
|
|
2799
|
+
*/
|
|
1766
2800
|
declare function notifyDashboard(callData: Record<string, unknown>, port?: number): void;
|
|
1767
2801
|
|
|
1768
2802
|
/**
|
|
@@ -1774,6 +2808,7 @@ declare function notifyDashboard(callData: Record<string, unknown>, port?: numbe
|
|
|
1774
2808
|
* background.
|
|
1775
2809
|
*/
|
|
1776
2810
|
|
|
2811
|
+
/** Constructor options for `FallbackLLMProvider`. */
|
|
1777
2812
|
interface FallbackLLMProviderOptions {
|
|
1778
2813
|
/** Number of retry attempts per provider before moving to the next (default 1). */
|
|
1779
2814
|
readonly maxRetryPerProvider?: number;
|
|
@@ -1788,6 +2823,7 @@ declare class AllProvidersFailedError extends Error {
|
|
|
1788
2823
|
declare class PartialStreamError extends Error {
|
|
1789
2824
|
constructor(message: string);
|
|
1790
2825
|
}
|
|
2826
|
+
/** LLM provider that delegates to a sequence of underlying providers, falling back on failure. */
|
|
1791
2827
|
declare class FallbackLLMProvider implements LLMProvider {
|
|
1792
2828
|
private readonly providers;
|
|
1793
2829
|
private readonly availability;
|
|
@@ -1820,6 +2856,7 @@ declare class FallbackLLMProvider implements LLMProvider {
|
|
|
1820
2856
|
* directly.
|
|
1821
2857
|
*/
|
|
1822
2858
|
completeStream(messages: Array<Record<string, unknown>>, tools?: Array<Record<string, unknown>> | null): AsyncGenerator<string, void, unknown>;
|
|
2859
|
+
/** Streaming entry point — yields chunks from the first provider that succeeds. */
|
|
1823
2860
|
stream(messages: Array<Record<string, unknown>>, tools?: Array<Record<string, unknown>> | null): AsyncGenerator<LLMChunk, void, unknown>;
|
|
1824
2861
|
private tryProviders;
|
|
1825
2862
|
private markUnavailable;
|
|
@@ -1927,6 +2964,7 @@ declare const PARAMETERS_SCHEMA: {
|
|
|
1927
2964
|
};
|
|
1928
2965
|
readonly required: readonly ["to"];
|
|
1929
2966
|
};
|
|
2967
|
+
/** Constructor options for `PatterTool`. */
|
|
1930
2968
|
interface PatterToolOptions {
|
|
1931
2969
|
/**
|
|
1932
2970
|
* Patter instance to dial through. Must be in local mode (have a `carrier`).
|
|
@@ -1950,12 +2988,14 @@ interface PatterToolOptions {
|
|
|
1950
2988
|
*/
|
|
1951
2989
|
recording?: boolean;
|
|
1952
2990
|
}
|
|
2991
|
+
/** Args accepted by `PatterTool.execute()` (and the OpenAI/Anthropic/Hermes tool schemas). */
|
|
1953
2992
|
interface PatterToolExecuteArgs {
|
|
1954
2993
|
to: string;
|
|
1955
2994
|
goal?: string;
|
|
1956
2995
|
first_message?: string;
|
|
1957
2996
|
max_duration_sec?: number;
|
|
1958
2997
|
}
|
|
2998
|
+
/** Result envelope returned by `PatterTool.execute()` once the underlying call ends. */
|
|
1959
2999
|
interface PatterToolResult {
|
|
1960
3000
|
call_id: string;
|
|
1961
3001
|
status: string;
|
|
@@ -1968,6 +3008,7 @@ interface PatterToolResult {
|
|
|
1968
3008
|
}>;
|
|
1969
3009
|
metrics?: Record<string, unknown> | null;
|
|
1970
3010
|
}
|
|
3011
|
+
/** Wraps a live `Patter` instance as a tool callable from external agent frameworks. */
|
|
1971
3012
|
declare class PatterTool {
|
|
1972
3013
|
readonly name: string;
|
|
1973
3014
|
readonly description: string;
|
|
@@ -2023,6 +3064,7 @@ declare class PatterTool {
|
|
|
2023
3064
|
start(): Promise<void>;
|
|
2024
3065
|
/** Stop the underlying Patter server (and reject any pending calls). */
|
|
2025
3066
|
stop(): Promise<void>;
|
|
3067
|
+
/** Place an outbound call and resolve once it ends with the transcript and metrics. */
|
|
2026
3068
|
execute(args: PatterToolExecuteArgs): Promise<PatterToolResult>;
|
|
2027
3069
|
/** Issue the outbound dial under the mutex and return its assigned call_id. */
|
|
2028
3070
|
private acquireCallId;
|
|
@@ -2043,7 +3085,9 @@ declare class PatterTool {
|
|
|
2043
3085
|
* input/output in the terminal. Useful for rapid agent development.
|
|
2044
3086
|
*/
|
|
2045
3087
|
|
|
3088
|
+
/** Drives an interactive terminal-based test "call" against an agent. */
|
|
2046
3089
|
declare class TestSession {
|
|
3090
|
+
/** Run a REPL-style session that loops user input through the agent's LLM/onMessage handler. */
|
|
2047
3091
|
run(opts: {
|
|
2048
3092
|
agent: AgentOptions;
|
|
2049
3093
|
openaiKey?: string;
|
|
@@ -2056,9 +3100,8 @@ declare class TestSession {
|
|
|
2056
3100
|
/**
|
|
2057
3101
|
* Gemini Live realtime adapter.
|
|
2058
3102
|
*
|
|
2059
|
-
*
|
|
2060
|
-
*
|
|
2061
|
-
* surface — connect / sendAudio / onEvent / close — matching OpenAIRealtimeAdapter.
|
|
3103
|
+
* Implements Patter's realtime adapter surface — connect / sendAudio /
|
|
3104
|
+
* onEvent / close — matching OpenAIRealtimeAdapter.
|
|
2062
3105
|
*
|
|
2063
3106
|
* Uses the @google/genai SDK lazily imported at connect() so consumers that do
|
|
2064
3107
|
* not use Gemini Live do not pay the load cost. Install with:
|
|
@@ -2073,6 +3116,7 @@ declare class TestSession {
|
|
|
2073
3116
|
*/
|
|
2074
3117
|
declare const GEMINI_DEFAULT_INPUT_SR = 16000;
|
|
2075
3118
|
declare const GEMINI_DEFAULT_OUTPUT_SR = 24000;
|
|
3119
|
+
/** Callback signature for events emitted by {@link GeminiLiveAdapter}. */
|
|
2076
3120
|
type GeminiLiveEventHandler = (type: 'audio' | 'transcript_output' | 'function_call' | 'speech_started' | 'response_done' | 'error', data: unknown) => void | Promise<void>;
|
|
2077
3121
|
interface GeminiLiveOptions {
|
|
2078
3122
|
model?: string;
|
|
@@ -2088,6 +3132,7 @@ interface GeminiLiveOptions {
|
|
|
2088
3132
|
outputSampleRate?: number;
|
|
2089
3133
|
temperature?: number;
|
|
2090
3134
|
}
|
|
3135
|
+
/** Realtime adapter for Google's Gemini Live native-audio API. */
|
|
2091
3136
|
declare class GeminiLiveAdapter {
|
|
2092
3137
|
private readonly apiKey;
|
|
2093
3138
|
private readonly model;
|
|
@@ -2111,28 +3156,33 @@ declare class GeminiLiveAdapter {
|
|
|
2111
3156
|
*/
|
|
2112
3157
|
private pendingToolCalls;
|
|
2113
3158
|
constructor(apiKey: string, options?: GeminiLiveOptions);
|
|
3159
|
+
/** Lazily import @google/genai, open a Live session, and start the receive loop. */
|
|
2114
3160
|
connect(): Promise<void>;
|
|
3161
|
+
/** Send a PCM audio chunk to Gemini as base64 inline data. */
|
|
2115
3162
|
sendAudio(pcm: Buffer): void;
|
|
3163
|
+
/** Send a text turn to Gemini and mark the turn complete. */
|
|
2116
3164
|
sendText(text: string): Promise<void>;
|
|
3165
|
+
/** Send a tool/function-call result back to Gemini. */
|
|
2117
3166
|
sendFunctionResult(callId: string, result: string): Promise<void>;
|
|
3167
|
+
/** No-op — Gemini Live barge-in is VAD-driven, not client-cancelled. */
|
|
2118
3168
|
cancelResponse(): void;
|
|
3169
|
+
/** Register an event handler that receives every Gemini Live event. */
|
|
2119
3170
|
onEvent(handler: GeminiLiveEventHandler): void;
|
|
2120
3171
|
private emit;
|
|
2121
3172
|
private pumpReceive;
|
|
3173
|
+
/** Close the Gemini Live session and stop the receive loop. */
|
|
2122
3174
|
close(): Promise<void>;
|
|
2123
3175
|
}
|
|
2124
3176
|
|
|
2125
3177
|
/**
|
|
2126
3178
|
* Ultravox realtime adapter.
|
|
2127
3179
|
*
|
|
2128
|
-
*
|
|
2129
|
-
*
|
|
2130
|
-
*
|
|
2131
|
-
* Reframed to Patter's connect / sendAudio / onEvent / close surface,
|
|
2132
|
-
* matching OpenAIRealtimeAdapter.
|
|
3180
|
+
* Pure WebSocket protocol — no vendor SDK. Implements Patter's connect /
|
|
3181
|
+
* sendAudio / onEvent / close surface, matching OpenAIRealtimeAdapter.
|
|
2133
3182
|
*/
|
|
2134
3183
|
declare const ULTRAVOX_DEFAULT_API_BASE = "https://api.ultravox.ai/api";
|
|
2135
3184
|
declare const ULTRAVOX_DEFAULT_SR = 16000;
|
|
3185
|
+
/** Callback signature for events emitted by {@link UltravoxRealtimeAdapter}. */
|
|
2136
3186
|
type UltravoxEventHandler = (type: 'audio' | 'transcript_input' | 'transcript_output' | 'function_call' | 'speech_started' | 'response_done' | 'error', data: unknown) => void | Promise<void>;
|
|
2137
3187
|
interface UltravoxOptions {
|
|
2138
3188
|
model?: string;
|
|
@@ -2148,6 +3198,7 @@ interface UltravoxOptions {
|
|
|
2148
3198
|
sampleRate?: number;
|
|
2149
3199
|
firstMessage?: string;
|
|
2150
3200
|
}
|
|
3201
|
+
/** Realtime WebSocket adapter for Ultravox managed-agent calls. */
|
|
2151
3202
|
declare class UltravoxRealtimeAdapter {
|
|
2152
3203
|
private readonly apiKey;
|
|
2153
3204
|
private readonly model;
|
|
@@ -2163,14 +3214,21 @@ declare class UltravoxRealtimeAdapter {
|
|
|
2163
3214
|
/** Exposed for diagnostics — true while the underlying socket is open. */
|
|
2164
3215
|
running: boolean;
|
|
2165
3216
|
constructor(apiKey: string, options?: UltravoxOptions);
|
|
3217
|
+
/** Create the Ultravox call, fetch the joinUrl, and open the WebSocket. */
|
|
2166
3218
|
connect(): Promise<void>;
|
|
3219
|
+
/** Send a binary PCM audio chunk to the Ultravox call. */
|
|
2167
3220
|
sendAudio(pcm: Buffer): void;
|
|
3221
|
+
/** Inject a user text message into the Ultravox conversation. */
|
|
2168
3222
|
sendText(text: string): Promise<void>;
|
|
3223
|
+
/** Send a tool/function-call result back to Ultravox. */
|
|
2169
3224
|
sendFunctionResult(callId: string, result: string): Promise<void>;
|
|
3225
|
+
/** Clear the playback buffer to interrupt the agent's current response. */
|
|
2170
3226
|
cancelResponse(): void;
|
|
3227
|
+
/** Register an event handler that receives every Ultravox event. */
|
|
2171
3228
|
onEvent(handler: UltravoxEventHandler): void;
|
|
2172
3229
|
private emit;
|
|
2173
3230
|
private handleMessage;
|
|
3231
|
+
/** Close the Ultravox WebSocket and mark the adapter idle. */
|
|
2174
3232
|
close(): Promise<void>;
|
|
2175
3233
|
}
|
|
2176
3234
|
|
|
@@ -2185,7 +3243,9 @@ declare class UltravoxRealtimeAdapter {
|
|
|
2185
3243
|
* node-cron is an optional dependency. This module imports it lazily so that
|
|
2186
3244
|
* consumers who never schedule anything do not need it installed.
|
|
2187
3245
|
*/
|
|
3246
|
+
/** Callback fired by the scheduler — sync or async, return value ignored. */
|
|
2188
3247
|
type JobCallback = () => void | Promise<void>;
|
|
3248
|
+
/** Handle returned by `scheduleCron`/`scheduleOnce`/`scheduleInterval` for cancellation. */
|
|
2189
3249
|
interface ScheduleHandle {
|
|
2190
3250
|
readonly jobId: string;
|
|
2191
3251
|
cancel(): void;
|
|
@@ -2225,29 +3285,34 @@ declare function scheduleInterval(intervalOrOpts: number | {
|
|
|
2225
3285
|
* Pure WebSocket client for the Soniox real-time STT API. Accumulates
|
|
2226
3286
|
* `is_final` tokens and flushes them on `<end>`/`<fin>` endpoint tokens,
|
|
2227
3287
|
* mirroring the Python `SonioxSTT` adapter.
|
|
2228
|
-
*
|
|
2229
|
-
* Adapted from LiveKit Agents (Apache 2.0):
|
|
2230
|
-
* https://github.com/livekit/agents
|
|
2231
|
-
* (source: livekit-plugins/livekit-plugins-soniox/livekit/plugins/soniox/stt.py
|
|
2232
|
-
* at commit 78a66bcf79c5cea82989401c408f1dff4b961a5b)
|
|
2233
|
-
*
|
|
2234
|
-
* Speechmatics TypeScript adapter is **intentionally not ported**: the
|
|
2235
|
-
* official Speechmatics Voice SDK (`speechmatics.voice`) is Python-only at
|
|
2236
|
-
* the time of writing. Python users should install the optional
|
|
2237
|
-
* `speechmatics` extra; TypeScript users need to wait for an official
|
|
2238
|
-
* upstream SDK before this adapter can land without a WS-handshake reimpl.
|
|
2239
3288
|
*/
|
|
2240
|
-
|
|
3289
|
+
/** Known Soniox real-time STT models. */
|
|
3290
|
+
declare const SonioxModel: {
|
|
3291
|
+
readonly STT_RT_V4: "stt-rt-v4";
|
|
3292
|
+
readonly STT_RT_V3: "stt-rt-v3";
|
|
3293
|
+
readonly STT_RT_V2: "stt-rt-v2";
|
|
3294
|
+
};
|
|
3295
|
+
type SonioxModel = (typeof SonioxModel)[keyof typeof SonioxModel];
|
|
3296
|
+
/** Common PCM sample rates for Soniox streaming input. */
|
|
3297
|
+
declare const SonioxSampleRate: {
|
|
3298
|
+
readonly HZ_8000: 8000;
|
|
3299
|
+
readonly HZ_16000: 16000;
|
|
3300
|
+
readonly HZ_24000: 24000;
|
|
3301
|
+
};
|
|
3302
|
+
type SonioxSampleRate = (typeof SonioxSampleRate)[keyof typeof SonioxSampleRate];
|
|
3303
|
+
/** Patter-normalised transcript event emitted by {@link SonioxSTT}. */
|
|
3304
|
+
interface Transcript$6 {
|
|
2241
3305
|
readonly text: string;
|
|
2242
3306
|
readonly isFinal: boolean;
|
|
2243
3307
|
readonly confidence: number;
|
|
2244
3308
|
}
|
|
2245
|
-
type TranscriptCallback$
|
|
3309
|
+
type TranscriptCallback$6 = (transcript: Transcript$6) => void;
|
|
3310
|
+
/** Constructor options for {@link SonioxSTT}. */
|
|
2246
3311
|
interface SonioxSTTOptions$1 {
|
|
2247
|
-
model?: string;
|
|
3312
|
+
model?: SonioxModel | string;
|
|
2248
3313
|
languageHints?: string[];
|
|
2249
3314
|
languageHintsStrict?: boolean;
|
|
2250
|
-
sampleRate?: number;
|
|
3315
|
+
sampleRate?: SonioxSampleRate | number;
|
|
2251
3316
|
numChannels?: number;
|
|
2252
3317
|
enableSpeakerDiarization?: boolean;
|
|
2253
3318
|
enableLanguageIdentification?: boolean;
|
|
@@ -2255,6 +3320,7 @@ interface SonioxSTTOptions$1 {
|
|
|
2255
3320
|
clientReferenceId?: string;
|
|
2256
3321
|
baseUrl?: string;
|
|
2257
3322
|
}
|
|
3323
|
+
/** Streaming STT adapter for Soniox's real-time WebSocket API. */
|
|
2258
3324
|
declare class SonioxSTT {
|
|
2259
3325
|
private ws;
|
|
2260
3326
|
private callbacks;
|
|
@@ -2275,12 +3341,16 @@ declare class SonioxSTT {
|
|
|
2275
3341
|
/** Factory for Twilio-style 8 kHz linear PCM. */
|
|
2276
3342
|
static forTwilio(apiKey: string, languageHints?: string[]): SonioxSTT;
|
|
2277
3343
|
private buildConfig;
|
|
3344
|
+
/** Open the streaming WebSocket and send the initial config payload. */
|
|
2278
3345
|
connect(): Promise<void>;
|
|
2279
3346
|
private clearKeepalive;
|
|
2280
3347
|
private handleMessage;
|
|
2281
3348
|
private emit;
|
|
3349
|
+
/** Send a binary PCM16-LE audio chunk to Soniox for transcription. */
|
|
2282
3350
|
sendAudio(audio: Buffer): void;
|
|
2283
|
-
|
|
3351
|
+
/** Register a transcript listener (max 10 concurrent listeners). */
|
|
3352
|
+
onTranscript(callback: TranscriptCallback$6): void;
|
|
3353
|
+
/** Send the empty-frame stream terminator and close the WebSocket. */
|
|
2284
3354
|
close(): void;
|
|
2285
3355
|
}
|
|
2286
3356
|
|
|
@@ -2289,17 +3359,36 @@ declare class SonioxSTT {
|
|
|
2289
3359
|
*
|
|
2290
3360
|
* Pure `ws` transport — does NOT depend on the vendor SDK.
|
|
2291
3361
|
*/
|
|
2292
|
-
|
|
3362
|
+
/** Patter-normalised transcript event emitted by {@link AssemblyAISTT}. */
|
|
3363
|
+
interface Transcript$5 {
|
|
2293
3364
|
readonly text: string;
|
|
2294
3365
|
readonly isFinal: boolean;
|
|
2295
3366
|
readonly confidence: number;
|
|
2296
3367
|
/** Optional event hint, e.g. `"SpeechStarted"` for barge-in signals. */
|
|
2297
3368
|
readonly eventType?: string;
|
|
2298
3369
|
}
|
|
2299
|
-
type TranscriptCallback$
|
|
2300
|
-
|
|
2301
|
-
|
|
2302
|
-
|
|
3370
|
+
type TranscriptCallback$5 = (transcript: Transcript$5) => void;
|
|
3371
|
+
/** Audio encodings accepted by AssemblyAI's v3 streaming endpoint. */
|
|
3372
|
+
declare const AssemblyAIEncoding: {
|
|
3373
|
+
readonly PCM_S16LE: "pcm_s16le";
|
|
3374
|
+
readonly PCM_MULAW: "pcm_mulaw";
|
|
3375
|
+
};
|
|
3376
|
+
type AssemblyAIEncoding = (typeof AssemblyAIEncoding)[keyof typeof AssemblyAIEncoding];
|
|
3377
|
+
/** Known AssemblyAI Universal Streaming speech models. */
|
|
3378
|
+
declare const AssemblyAIModel: {
|
|
3379
|
+
readonly UNIVERSAL_STREAMING_ENGLISH: "universal-streaming-english";
|
|
3380
|
+
readonly UNIVERSAL_STREAMING_MULTILINGUAL: "universal-streaming-multilingual";
|
|
3381
|
+
readonly U3_RT_PRO: "u3-rt-pro";
|
|
3382
|
+
readonly WHISPER_RT: "whisper-rt";
|
|
3383
|
+
};
|
|
3384
|
+
type AssemblyAIModel = (typeof AssemblyAIModel)[keyof typeof AssemblyAIModel];
|
|
3385
|
+
/** Valid `domain` values for AssemblyAI's v3 streaming endpoint. */
|
|
3386
|
+
declare const AssemblyAIDomain: {
|
|
3387
|
+
readonly GENERAL: "general";
|
|
3388
|
+
readonly MEDICAL_V1: "medical-v1";
|
|
3389
|
+
};
|
|
3390
|
+
type AssemblyAIDomain = (typeof AssemblyAIDomain)[keyof typeof AssemblyAIDomain];
|
|
3391
|
+
/** Constructor options for {@link AssemblyAISTT}. */
|
|
2303
3392
|
interface AssemblyAISTTOptions$1 {
|
|
2304
3393
|
/** One of the AssemblyAI speech models. */
|
|
2305
3394
|
readonly model?: AssemblyAIModel;
|
|
@@ -2337,6 +3426,7 @@ interface AssemblyAISTTOptions$1 {
|
|
|
2337
3426
|
/** Domain hint — must be `"general"` or `"medical-v1"`. */
|
|
2338
3427
|
readonly domain?: AssemblyAIDomain;
|
|
2339
3428
|
}
|
|
3429
|
+
/** Streaming STT adapter for AssemblyAI's v3 Universal Streaming API. */
|
|
2340
3430
|
declare class AssemblyAISTT {
|
|
2341
3431
|
private readonly apiKey;
|
|
2342
3432
|
private readonly options;
|
|
@@ -2345,6 +3435,22 @@ declare class AssemblyAISTT {
|
|
|
2345
3435
|
private closing;
|
|
2346
3436
|
private reconnectAttempts;
|
|
2347
3437
|
private terminationResolve;
|
|
3438
|
+
/**
|
|
3439
|
+
* Coalescing buffer for inbound audio frames. AssemblyAI's v3
|
|
3440
|
+
* streaming endpoint requires each ws frame to carry 50–1000 ms of
|
|
3441
|
+
* audio (server emits error 3007 below 50 ms — observed in the
|
|
3442
|
+
* field as a fully-billed call with zero transcripts). Twilio sends
|
|
3443
|
+
* 20 ms frames, so the SDK must batch ~3 frames before forwarding.
|
|
3444
|
+
*
|
|
3445
|
+
* We accumulate raw bytes here until the cumulative duration crosses
|
|
3446
|
+
* the configured target (default 60 ms — comfortably above the 50 ms
|
|
3447
|
+
* floor with one frame of headroom against jitter), then flush in a
|
|
3448
|
+
* single `ws.send()`.
|
|
3449
|
+
*/
|
|
3450
|
+
private chunkBuffer;
|
|
3451
|
+
private chunkBufferBytes;
|
|
3452
|
+
/** Target send size in bytes — recomputed lazily once encoding/sample-rate is known. */
|
|
3453
|
+
private chunkBufferTargetBytes;
|
|
2348
3454
|
/** AssemblyAI session id — set when the `Begin` message arrives. */
|
|
2349
3455
|
sessionId: string | null;
|
|
2350
3456
|
/** Unix timestamp when the AssemblyAI session expires. */
|
|
@@ -2354,13 +3460,21 @@ declare class AssemblyAISTT {
|
|
|
2354
3460
|
static forTwilio(apiKey: string, model?: AssemblyAIModel): AssemblyAISTT;
|
|
2355
3461
|
private buildUrl;
|
|
2356
3462
|
private buildHeaders;
|
|
3463
|
+
/** Open the streaming WebSocket and arm message handlers. */
|
|
2357
3464
|
connect(): Promise<void>;
|
|
2358
3465
|
private awaitOpen;
|
|
2359
3466
|
private attachHandlers;
|
|
2360
3467
|
private reconnect;
|
|
2361
3468
|
private handleEvent;
|
|
2362
3469
|
private emit;
|
|
3470
|
+
/** Send a binary PCM/mu-law audio chunk to AssemblyAI for transcription. */
|
|
2363
3471
|
sendAudio(audio: Buffer): void;
|
|
3472
|
+
/**
|
|
3473
|
+
* Compute the byte count corresponding to ~60 ms of audio for the
|
|
3474
|
+
* configured encoding / sample rate. Sits one Twilio frame (20 ms)
|
|
3475
|
+
* above AssemblyAI's 50 ms floor so jitter never dips below.
|
|
3476
|
+
*/
|
|
3477
|
+
private computeTargetChunkBytes;
|
|
2364
3478
|
private estimateChunkDurationMs;
|
|
2365
3479
|
/**
|
|
2366
3480
|
* Send an `UpdateConfiguration` frame to change settings mid-stream.
|
|
@@ -2374,7 +3488,9 @@ declare class AssemblyAISTT {
|
|
|
2374
3488
|
}): void;
|
|
2375
3489
|
/** Force the server to finalize the current turn (for barge-in). */
|
|
2376
3490
|
forceEndpoint(): void;
|
|
2377
|
-
|
|
3491
|
+
/** Register a transcript listener. Returns an unsubscribe function. */
|
|
3492
|
+
onTranscript(callback: TranscriptCallback$5): () => void;
|
|
3493
|
+
/** Send a Terminate frame, wait briefly for ack, and close the socket. */
|
|
2378
3494
|
close(): Promise<void>;
|
|
2379
3495
|
}
|
|
2380
3496
|
|
|
@@ -2383,32 +3499,50 @@ declare class AssemblyAISTT {
|
|
|
2383
3499
|
*
|
|
2384
3500
|
* Implements a `DeepgramSTT`-shaped provider using Cartesia's streaming
|
|
2385
3501
|
* WebSocket API. Pure `ws` transport — does NOT depend on the vendor SDK.
|
|
2386
|
-
*
|
|
2387
|
-
* Algorithm adapted from LiveKit Agents (Apache 2.0):
|
|
2388
|
-
* https://github.com/livekit/agents
|
|
2389
|
-
* Source: livekit-plugins/livekit-plugins-cartesia/livekit/plugins/cartesia/stt.py
|
|
2390
|
-
* Upstream ref SHA: 78a66bcf79c5cea82989401c408f1dff4b961a5b
|
|
2391
3502
|
*/
|
|
2392
|
-
|
|
3503
|
+
/** Patter-normalised transcript event emitted by {@link CartesiaSTT}. */
|
|
3504
|
+
interface Transcript$4 {
|
|
2393
3505
|
readonly text: string;
|
|
2394
3506
|
readonly isFinal: boolean;
|
|
2395
3507
|
readonly confidence: number;
|
|
2396
3508
|
}
|
|
2397
|
-
type TranscriptCallback$
|
|
3509
|
+
type TranscriptCallback$4 = (transcript: Transcript$4) => void;
|
|
3510
|
+
/** Known Cartesia STT models. */
|
|
3511
|
+
declare const CartesiaSTTModel: {
|
|
3512
|
+
readonly INK_WHISPER: "ink-whisper";
|
|
3513
|
+
};
|
|
3514
|
+
type CartesiaSTTModel = (typeof CartesiaSTTModel)[keyof typeof CartesiaSTTModel];
|
|
3515
|
+
/** Audio encodings accepted by Cartesia's STT websocket endpoint. */
|
|
3516
|
+
declare const CartesiaSTTEncoding: {
|
|
3517
|
+
readonly PCM_S16LE: "pcm_s16le";
|
|
3518
|
+
};
|
|
3519
|
+
type CartesiaSTTEncoding = (typeof CartesiaSTTEncoding)[keyof typeof CartesiaSTTEncoding];
|
|
3520
|
+
/** Common PCM sample rates accepted by Cartesia STT. */
|
|
3521
|
+
declare const CartesiaSTTSampleRate: {
|
|
3522
|
+
readonly HZ_8000: 8000;
|
|
3523
|
+
readonly HZ_16000: 16000;
|
|
3524
|
+
readonly HZ_24000: 24000;
|
|
3525
|
+
readonly HZ_44100: 44100;
|
|
3526
|
+
readonly HZ_48000: 48000;
|
|
3527
|
+
};
|
|
3528
|
+
type CartesiaSTTSampleRate = (typeof CartesiaSTTSampleRate)[keyof typeof CartesiaSTTSampleRate];
|
|
2398
3529
|
/** Cartesia STT currently only accepts 16-bit PCM little-endian. */
|
|
3530
|
+
/** Legacy encoding alias kept for callers using the bare string form. */
|
|
2399
3531
|
type CartesiaEncoding = 'pcm_s16le';
|
|
3532
|
+
/** Constructor options for {@link CartesiaSTT}. */
|
|
2400
3533
|
interface CartesiaSTTOptions$1 {
|
|
2401
3534
|
/** Cartesia STT model. Currently only `"ink-whisper"`. */
|
|
2402
|
-
readonly model?: string;
|
|
3535
|
+
readonly model?: CartesiaSTTModel | string;
|
|
2403
3536
|
/** BCP-47 language code. */
|
|
2404
3537
|
readonly language?: string;
|
|
2405
3538
|
/** PCM encoding; Cartesia only supports `pcm_s16le`. */
|
|
2406
|
-
readonly encoding?: CartesiaEncoding;
|
|
3539
|
+
readonly encoding?: CartesiaSTTEncoding | CartesiaEncoding;
|
|
2407
3540
|
/** Sample rate in Hz. Cartesia accepts 8000, 16000, 24000, 44100, 48000. */
|
|
2408
|
-
readonly sampleRate?: number;
|
|
3541
|
+
readonly sampleRate?: CartesiaSTTSampleRate | number;
|
|
2409
3542
|
/** Override base URL (HTTP or WS). Defaults to Cartesia prod. */
|
|
2410
3543
|
readonly baseUrl?: string;
|
|
2411
3544
|
}
|
|
3545
|
+
/** Streaming STT adapter for Cartesia's ink-whisper WebSocket API. */
|
|
2412
3546
|
declare class CartesiaSTT {
|
|
2413
3547
|
private readonly apiKey;
|
|
2414
3548
|
private readonly options;
|
|
@@ -2422,13 +3556,16 @@ declare class CartesiaSTT {
|
|
|
2422
3556
|
requestId: string | null;
|
|
2423
3557
|
constructor(apiKey: string, options?: CartesiaSTTOptions$1);
|
|
2424
3558
|
private buildWsUrl;
|
|
3559
|
+
/** Open the streaming WebSocket and arm message + keepalive handlers. */
|
|
2425
3560
|
connect(): Promise<void>;
|
|
2426
3561
|
private handleEvent;
|
|
2427
3562
|
private emit;
|
|
3563
|
+
/** Send a binary PCM16-LE audio chunk to Cartesia for transcription. */
|
|
2428
3564
|
sendAudio(audio: Buffer): void;
|
|
2429
|
-
|
|
3565
|
+
/** Register a transcript listener. */
|
|
3566
|
+
onTranscript(callback: TranscriptCallback$4): void;
|
|
2430
3567
|
/** Remove a previously registered transcript callback. */
|
|
2431
|
-
offTranscript(callback: TranscriptCallback$
|
|
3568
|
+
offTranscript(callback: TranscriptCallback$4): void;
|
|
2432
3569
|
/**
|
|
2433
3570
|
* Synchronous best-effort close. Sends `finalize` and closes the socket
|
|
2434
3571
|
* without waiting for the server to flush any remaining transcripts.
|
|
@@ -2446,9 +3583,35 @@ declare class CartesiaSTT {
|
|
|
2446
3583
|
closeAsync(): Promise<void>;
|
|
2447
3584
|
}
|
|
2448
3585
|
|
|
2449
|
-
|
|
2450
|
-
|
|
2451
|
-
|
|
3586
|
+
/**
|
|
3587
|
+
* LMNT TTS provider — HTTP `/v1/ai/speech/bytes` endpoint.
|
|
3588
|
+
*
|
|
3589
|
+
* Defaults to `format='raw'` (PCM_S16LE) at 16 kHz so the output drops
|
|
3590
|
+
* directly into Patter's telephony pipeline without transcoding.
|
|
3591
|
+
*/
|
|
3592
|
+
/** Supported LMNT audio output formats. `RAW` is PCM_S16LE. */
|
|
3593
|
+
declare const LMNTAudioFormat: {
|
|
3594
|
+
readonly AAC: "aac";
|
|
3595
|
+
readonly MP3: "mp3";
|
|
3596
|
+
readonly MULAW: "mulaw";
|
|
3597
|
+
readonly RAW: "raw";
|
|
3598
|
+
readonly WAV: "wav";
|
|
3599
|
+
};
|
|
3600
|
+
type LMNTAudioFormat = (typeof LMNTAudioFormat)[keyof typeof LMNTAudioFormat];
|
|
3601
|
+
/** LMNT TTS model families. */
|
|
3602
|
+
declare const LMNTModel: {
|
|
3603
|
+
readonly BLIZZARD: "blizzard";
|
|
3604
|
+
readonly AURORA: "aurora";
|
|
3605
|
+
};
|
|
3606
|
+
type LMNTModel = (typeof LMNTModel)[keyof typeof LMNTModel];
|
|
3607
|
+
/** Supported PCM sample rates for LMNT raw output. */
|
|
3608
|
+
declare const LMNTSampleRate: {
|
|
3609
|
+
readonly HZ_8000: 8000;
|
|
3610
|
+
readonly HZ_16000: 16000;
|
|
3611
|
+
readonly HZ_24000: 24000;
|
|
3612
|
+
};
|
|
3613
|
+
type LMNTSampleRate = (typeof LMNTSampleRate)[keyof typeof LMNTSampleRate];
|
|
3614
|
+
/** Constructor options for {@link LMNTTTS}. */
|
|
2452
3615
|
interface LMNTTTSOptions$1 {
|
|
2453
3616
|
model?: LMNTModel;
|
|
2454
3617
|
voice?: string;
|
|
@@ -2459,6 +3622,7 @@ interface LMNTTTSOptions$1 {
|
|
|
2459
3622
|
topP?: number;
|
|
2460
3623
|
baseUrl?: string;
|
|
2461
3624
|
}
|
|
3625
|
+
/** LMNT TTS adapter backed by the `/v1/ai/speech/bytes` HTTP streaming endpoint. */
|
|
2462
3626
|
declare class LMNTTTS {
|
|
2463
3627
|
private readonly apiKey;
|
|
2464
3628
|
private readonly model;
|
|
@@ -2471,12 +3635,23 @@ declare class LMNTTTS {
|
|
|
2471
3635
|
private readonly baseUrl;
|
|
2472
3636
|
constructor(apiKey: string, opts?: LMNTTTSOptions$1);
|
|
2473
3637
|
private buildPayload;
|
|
3638
|
+
/** Synthesize text and return the concatenated audio buffer. */
|
|
2474
3639
|
synthesize(text: string): Promise<Buffer>;
|
|
2475
3640
|
/** Yield audio chunks as they arrive — raw PCM_S16LE by default. */
|
|
2476
3641
|
synthesizeStream(text: string): AsyncGenerator<Buffer>;
|
|
2477
3642
|
}
|
|
2478
3643
|
|
|
3644
|
+
/**
|
|
3645
|
+
* Deepgram streaming STT adapter for the Patter SDK pipeline mode.
|
|
3646
|
+
*
|
|
3647
|
+
* Pure `ws` transport — connects to `wss://api.deepgram.com/v1/listen` with
|
|
3648
|
+
* a long-lived KeepAlive pump and emits Patter-normalised {@link Transcript}
|
|
3649
|
+
* events through {@link DeepgramSTT.onTranscript}. See {@link DeepgramSTT}
|
|
3650
|
+
* for the public class.
|
|
3651
|
+
*/
|
|
3652
|
+
/** Which Deepgram server event a {@link Transcript} represents. */
|
|
2479
3653
|
type TranscriptEventType = 'Results' | 'UtteranceEnd' | 'SpeechStarted';
|
|
3654
|
+
/** Per-word timing/confidence record returned by Deepgram in `words[]`. */
|
|
2480
3655
|
interface DeepgramWord {
|
|
2481
3656
|
readonly word?: string;
|
|
2482
3657
|
readonly start?: number;
|
|
@@ -2485,7 +3660,8 @@ interface DeepgramWord {
|
|
|
2485
3660
|
readonly punctuated_word?: string;
|
|
2486
3661
|
readonly speaker?: number;
|
|
2487
3662
|
}
|
|
2488
|
-
|
|
3663
|
+
/** Patter-normalised transcript event emitted by {@link DeepgramSTT}. */
|
|
3664
|
+
interface Transcript$3 {
|
|
2489
3665
|
readonly text: string;
|
|
2490
3666
|
readonly isFinal: boolean;
|
|
2491
3667
|
readonly confidence: number;
|
|
@@ -2500,8 +3676,8 @@ interface Transcript$1 {
|
|
|
2500
3676
|
/** Which provider event this Transcript represents. Default ``Results``. */
|
|
2501
3677
|
readonly eventType?: TranscriptEventType;
|
|
2502
3678
|
}
|
|
2503
|
-
type TranscriptCallback$
|
|
2504
|
-
type ErrorCallback = (error: Error) => void;
|
|
3679
|
+
type TranscriptCallback$3 = (transcript: Transcript$3) => void;
|
|
3680
|
+
type ErrorCallback$1 = (error: Error) => void;
|
|
2505
3681
|
/**
|
|
2506
3682
|
* Optional tuning knobs for Deepgram live transcription.
|
|
2507
3683
|
*
|
|
@@ -2539,6 +3715,7 @@ interface DeepgramSTTOptions$1 {
|
|
|
2539
3715
|
/** Emit VAD events (``SpeechStarted`` / ``UtteranceEnd``). Default ``true``. */
|
|
2540
3716
|
readonly vadEvents?: boolean;
|
|
2541
3717
|
}
|
|
3718
|
+
/** Streaming STT adapter for Deepgram's `/v1/listen` WebSocket API. */
|
|
2542
3719
|
declare class DeepgramSTT {
|
|
2543
3720
|
private ws;
|
|
2544
3721
|
private readonly transcriptCallbacks;
|
|
@@ -2572,6 +3749,7 @@ declare class DeepgramSTT {
|
|
|
2572
3749
|
/** Factory for Twilio calls — mulaw 8 kHz. Forwards tuning options through. */
|
|
2573
3750
|
static forTwilio(apiKey: string, language?: string, model?: string, options?: DeepgramSTTOptions$1): DeepgramSTT;
|
|
2574
3751
|
private buildUrl;
|
|
3752
|
+
/** Open the streaming WebSocket and arm message + keepalive handlers. */
|
|
2575
3753
|
connect(): Promise<void>;
|
|
2576
3754
|
private openSocket;
|
|
2577
3755
|
private clearKeepalive;
|
|
@@ -2580,11 +3758,31 @@ declare class DeepgramSTT {
|
|
|
2580
3758
|
private emitError;
|
|
2581
3759
|
private handleError;
|
|
2582
3760
|
private handleClose;
|
|
3761
|
+
/** Send a binary audio chunk to Deepgram for transcription. */
|
|
2583
3762
|
sendAudio(audio: Buffer): void;
|
|
2584
|
-
|
|
2585
|
-
|
|
2586
|
-
|
|
2587
|
-
|
|
3763
|
+
private audioSentCount;
|
|
3764
|
+
private audioDroppedCount;
|
|
3765
|
+
/** Register a transcript listener. */
|
|
3766
|
+
onTranscript(callback: TranscriptCallback$3): void;
|
|
3767
|
+
/** Remove a previously registered transcript listener. */
|
|
3768
|
+
offTranscript(callback: TranscriptCallback$3): void;
|
|
3769
|
+
/** Register an error listener for socket / API failures. */
|
|
3770
|
+
onError(callback: ErrorCallback$1): void;
|
|
3771
|
+
/** Remove a previously registered error listener. */
|
|
3772
|
+
offError(callback: ErrorCallback$1): void;
|
|
3773
|
+
/**
|
|
3774
|
+
* Force Deepgram to immediately emit a final ``Results`` frame for the
|
|
3775
|
+
* in-flight utterance, rather than waiting for its own endpoint
|
|
3776
|
+
* heuristic (utterance_end_ms ~1 s + natural-pause endpointing).
|
|
3777
|
+
* Called by the SDK on VAD ``speech_end`` and after barge-in cancel —
|
|
3778
|
+
* both moments where the SDK already knows the user has stopped
|
|
3779
|
+
* speaking and waiting for Deepgram's own endpointing only adds
|
|
3780
|
+
* dead air.
|
|
3781
|
+
*
|
|
3782
|
+
* Idempotent: safe to call when the socket is closed/closing.
|
|
3783
|
+
*/
|
|
3784
|
+
finalize(): void;
|
|
3785
|
+
/** Send Finalize, briefly drain trailing transcripts, then close the socket. */
|
|
2588
3786
|
close(): void;
|
|
2589
3787
|
}
|
|
2590
3788
|
|
|
@@ -2605,7 +3803,7 @@ type DeepgramSTTOptions = DeepgramSTTOptions$1 & {
|
|
|
2605
3803
|
* const stt = new deepgram.STT({ apiKey: "dg_...", endpointingMs: 80 });
|
|
2606
3804
|
* ```
|
|
2607
3805
|
*/
|
|
2608
|
-
declare class STT$
|
|
3806
|
+
declare class STT$6 extends DeepgramSTT {
|
|
2609
3807
|
static readonly providerKey = "deepgram";
|
|
2610
3808
|
constructor(opts?: DeepgramSTTOptions);
|
|
2611
3809
|
}
|
|
@@ -2616,13 +3814,16 @@ declare class STT$5 extends DeepgramSTT {
|
|
|
2616
3814
|
* Buffers incoming PCM16 audio and periodically sends it to the
|
|
2617
3815
|
* OpenAI Whisper transcription API as a WAV file.
|
|
2618
3816
|
*/
|
|
2619
|
-
|
|
3817
|
+
/** Patter-normalised transcript event emitted by {@link WhisperSTT}. */
|
|
3818
|
+
interface Transcript$2 {
|
|
2620
3819
|
readonly text: string;
|
|
2621
3820
|
readonly isFinal: boolean;
|
|
2622
3821
|
readonly confidence: number;
|
|
2623
3822
|
}
|
|
2624
|
-
type TranscriptCallback = (transcript: Transcript) => void;
|
|
3823
|
+
type TranscriptCallback$2 = (transcript: Transcript$2) => void;
|
|
3824
|
+
/** Response format requested from `POST /v1/audio/transcriptions`. */
|
|
2625
3825
|
type WhisperResponseFormat = 'json' | 'verbose_json';
|
|
3826
|
+
/** Buffered STT adapter for OpenAI's Whisper transcription HTTP API. */
|
|
2626
3827
|
declare class WhisperSTT {
|
|
2627
3828
|
private readonly apiKey;
|
|
2628
3829
|
private readonly model;
|
|
@@ -2649,7 +3850,9 @@ declare class WhisperSTT {
|
|
|
2649
3850
|
constructor(apiKey: string, language?: string, model?: string, bufferSize?: number, responseFormat?: WhisperResponseFormat);
|
|
2650
3851
|
/** Factory for Twilio calls — mulaw 8 kHz is transcoded upstream, so we still receive PCM 16-bit. */
|
|
2651
3852
|
static forTwilio(apiKey: string, language?: string, model?: string): WhisperSTT;
|
|
3853
|
+
/** Reset the audio buffer and arm the adapter for incoming chunks. */
|
|
2652
3854
|
connect(): Promise<void>;
|
|
3855
|
+
/** Buffer a PCM16 chunk; flushes to Whisper once `bufferSize` bytes are reached. */
|
|
2653
3856
|
sendAudio(audio: Buffer): void;
|
|
2654
3857
|
private flushChunks;
|
|
2655
3858
|
private trackTranscription;
|
|
@@ -2658,14 +3861,17 @@ declare class WhisperSTT {
|
|
|
2658
3861
|
* which capped at 10 and silently replaced the last one, we now keep all
|
|
2659
3862
|
* registered callbacks in a Set; use {@link offTranscript} to remove one.
|
|
2660
3863
|
*/
|
|
2661
|
-
onTranscript(callback: TranscriptCallback): void;
|
|
2662
|
-
|
|
3864
|
+
onTranscript(callback: TranscriptCallback$2): void;
|
|
3865
|
+
/** Remove a previously registered transcript listener. */
|
|
3866
|
+
offTranscript(callback: TranscriptCallback$2): void;
|
|
3867
|
+
/** Flush any buffered audio, await pending transcriptions, and clear listeners. */
|
|
2663
3868
|
close(): Promise<void>;
|
|
2664
3869
|
private transcribeBuffer;
|
|
2665
3870
|
}
|
|
2666
3871
|
|
|
2667
3872
|
/** OpenAI Whisper STT for Patter pipeline mode. */
|
|
2668
3873
|
|
|
3874
|
+
/** Constructor options for the Whisper `STT` adapter. */
|
|
2669
3875
|
interface WhisperSTTOptions {
|
|
2670
3876
|
/** API key. Falls back to OPENAI_API_KEY env var when omitted. */
|
|
2671
3877
|
apiKey?: string;
|
|
@@ -2685,7 +3891,7 @@ interface WhisperSTTOptions {
|
|
|
2685
3891
|
* const stt = new whisper.STT({ apiKey: "sk-...", language: "en" });
|
|
2686
3892
|
* ```
|
|
2687
3893
|
*/
|
|
2688
|
-
declare class STT$
|
|
3894
|
+
declare class STT$5 extends WhisperSTT {
|
|
2689
3895
|
static readonly providerKey = "whisper";
|
|
2690
3896
|
constructor(opts?: WhisperSTTOptions);
|
|
2691
3897
|
}
|
|
@@ -2705,6 +3911,7 @@ declare class STT$4 extends WhisperSTT {
|
|
|
2705
3911
|
* ``whisper-1``.
|
|
2706
3912
|
*/
|
|
2707
3913
|
|
|
3914
|
+
/** STT adapter restricted to OpenAI's GPT-4o Transcribe model family. */
|
|
2708
3915
|
declare class OpenAITranscribeSTT extends WhisperSTT {
|
|
2709
3916
|
/**
|
|
2710
3917
|
* @param apiKey OpenAI API key.
|
|
@@ -2719,6 +3926,7 @@ declare class OpenAITranscribeSTT extends WhisperSTT {
|
|
|
2719
3926
|
|
|
2720
3927
|
/** OpenAI GPT-4o Transcribe STT for Patter pipeline mode. */
|
|
2721
3928
|
|
|
3929
|
+
/** Constructor options for the OpenAI Transcribe `STT` adapter. */
|
|
2722
3930
|
interface OpenAITranscribeSTTOptions {
|
|
2723
3931
|
/** API key. Falls back to OPENAI_API_KEY env var when omitted. */
|
|
2724
3932
|
apiKey?: string;
|
|
@@ -2742,13 +3950,14 @@ interface OpenAITranscribeSTTOptions {
|
|
|
2742
3950
|
* const stt = new openaiTranscribe.STT({ apiKey: "sk-...", language: "en" });
|
|
2743
3951
|
* ```
|
|
2744
3952
|
*/
|
|
2745
|
-
declare class STT$
|
|
3953
|
+
declare class STT$4 extends OpenAITranscribeSTT {
|
|
2746
3954
|
static readonly providerKey = "openai_transcribe";
|
|
2747
3955
|
constructor(opts?: OpenAITranscribeSTTOptions);
|
|
2748
3956
|
}
|
|
2749
3957
|
|
|
2750
3958
|
/** Cartesia streaming STT for Patter pipeline mode. */
|
|
2751
3959
|
|
|
3960
|
+
/** Constructor options for the Cartesia `STT` adapter. */
|
|
2752
3961
|
interface CartesiaSTTOptions {
|
|
2753
3962
|
/** API key. Falls back to CARTESIA_API_KEY env var when omitted. */
|
|
2754
3963
|
apiKey?: string;
|
|
@@ -2768,13 +3977,14 @@ interface CartesiaSTTOptions {
|
|
|
2768
3977
|
* const stt = new cartesia.STT({ apiKey: "..." });
|
|
2769
3978
|
* ```
|
|
2770
3979
|
*/
|
|
2771
|
-
declare class STT$
|
|
3980
|
+
declare class STT$3 extends CartesiaSTT {
|
|
2772
3981
|
static readonly providerKey = "cartesia_stt";
|
|
2773
3982
|
constructor(opts?: CartesiaSTTOptions);
|
|
2774
3983
|
}
|
|
2775
3984
|
|
|
2776
3985
|
/** Soniox streaming STT for Patter pipeline mode. */
|
|
2777
3986
|
|
|
3987
|
+
/** Constructor options for the Soniox `STT` adapter. */
|
|
2778
3988
|
interface SonioxSTTOptions {
|
|
2779
3989
|
/** API key. Falls back to SONIOX_API_KEY env var when omitted. */
|
|
2780
3990
|
apiKey?: string;
|
|
@@ -2799,13 +4009,14 @@ interface SonioxSTTOptions {
|
|
|
2799
4009
|
* const stt = new soniox.STT({ apiKey: "..." });
|
|
2800
4010
|
* ```
|
|
2801
4011
|
*/
|
|
2802
|
-
declare class STT$
|
|
4012
|
+
declare class STT$2 extends SonioxSTT {
|
|
2803
4013
|
static readonly providerKey = "soniox";
|
|
2804
4014
|
constructor(opts?: SonioxSTTOptions);
|
|
2805
4015
|
}
|
|
2806
4016
|
|
|
2807
4017
|
/** AssemblyAI Universal Streaming STT for Patter pipeline mode. */
|
|
2808
4018
|
|
|
4019
|
+
/** Constructor options for the AssemblyAI `STT` adapter. */
|
|
2809
4020
|
interface AssemblyAISTTOptions {
|
|
2810
4021
|
/** API key. Falls back to ASSEMBLYAI_API_KEY env var when omitted. */
|
|
2811
4022
|
apiKey?: string;
|
|
@@ -2814,6 +4025,17 @@ interface AssemblyAISTTOptions {
|
|
|
2814
4025
|
sampleRate?: number;
|
|
2815
4026
|
baseUrl?: string;
|
|
2816
4027
|
languageDetection?: boolean;
|
|
4028
|
+
/**
|
|
4029
|
+
* BCP-47 language hint (e.g. ``"it"``, ``"en"``). AssemblyAI does NOT
|
|
4030
|
+
* expose a per-call language override — the language is determined by
|
|
4031
|
+
* the chosen ``model`` (English-only models reject non-English audio,
|
|
4032
|
+
* multilingual models auto-detect). This field is accepted for
|
|
4033
|
+
* cross-provider parity with ``DeepgramSTT``/``WhisperSTT``/
|
|
4034
|
+
* ``OpenAITranscribeSTT``/``CartesiaSTT`` but is currently a no-op:
|
|
4035
|
+
* pick a multilingual ``model`` (e.g. ``universal-streaming-pro``)
|
|
4036
|
+
* and the provider will detect Italian automatically.
|
|
4037
|
+
*/
|
|
4038
|
+
language?: string;
|
|
2817
4039
|
endOfTurnConfidenceThreshold?: number;
|
|
2818
4040
|
minTurnSilence?: number;
|
|
2819
4041
|
maxTurnSilence?: number;
|
|
@@ -2835,31 +4057,234 @@ interface AssemblyAISTTOptions {
|
|
|
2835
4057
|
* const stt = new assemblyai.STT({ apiKey: "..." });
|
|
2836
4058
|
* ```
|
|
2837
4059
|
*/
|
|
2838
|
-
declare class STT extends AssemblyAISTT {
|
|
4060
|
+
declare class STT$1 extends AssemblyAISTT {
|
|
2839
4061
|
static readonly providerKey = "assemblyai";
|
|
2840
4062
|
constructor(opts?: AssemblyAISTTOptions);
|
|
2841
4063
|
}
|
|
2842
4064
|
|
|
4065
|
+
/**
|
|
4066
|
+
* Speechmatics Speech-to-Text adapter for the Patter SDK pipeline mode.
|
|
4067
|
+
*
|
|
4068
|
+
* Streams PCM audio to the Speechmatics real-time WebSocket API
|
|
4069
|
+
* (`wss://eu.rt.speechmatics.com/v2`) and emits Patter-normalised
|
|
4070
|
+
* {@link Transcript} events. Mirrors `SpeechmaticsSTT` in the Python SDK.
|
|
4071
|
+
*
|
|
4072
|
+
* Divergence from Python: the Python adapter wraps the official
|
|
4073
|
+
* `speechmatics-voice` Python SDK (Voice Agent presets, smart turn
|
|
4074
|
+
* detection, etc.). No equivalent Node SDK is published, so this TypeScript
|
|
4075
|
+
* adapter speaks the underlying RT v2 wire protocol directly via `ws`.
|
|
4076
|
+
* The user-facing options (`turnDetectionMode`, `endOfUtteranceSilenceTrigger`,
|
|
4077
|
+
* `maxDelay`, `enablePartials`, `enableDiarization`, `additionalVocab`,
|
|
4078
|
+
* `operatingPoint`, `domain`, `outputLocale`, `language`, `sampleRate`) all
|
|
4079
|
+
* map 1:1 onto the Python adapter so callers can switch SDKs without
|
|
4080
|
+
* changing their config.
|
|
4081
|
+
*/
|
|
4082
|
+
/** Patter-normalised transcript event emitted by {@link SpeechmaticsSTT}. */
|
|
4083
|
+
interface Transcript$1 {
|
|
4084
|
+
readonly text: string;
|
|
4085
|
+
readonly isFinal: boolean;
|
|
4086
|
+
readonly confidence: number;
|
|
4087
|
+
}
|
|
4088
|
+
type TranscriptCallback$1 = (transcript: Transcript$1) => void;
|
|
4089
|
+
type ErrorCallback = (error: Error) => void;
|
|
4090
|
+
/**
|
|
4091
|
+
* Endpoint / turn-detection handling mode. Mirrors the values accepted by
|
|
4092
|
+
* Python's `TurnDetectionMode`. Maps onto Speechmatics's
|
|
4093
|
+
* `conversation_config` knobs on the wire.
|
|
4094
|
+
*/
|
|
4095
|
+
declare const TurnDetectionMode: {
|
|
4096
|
+
readonly EXTERNAL: "external";
|
|
4097
|
+
readonly FIXED: "fixed";
|
|
4098
|
+
readonly ADAPTIVE: "adaptive";
|
|
4099
|
+
readonly SMART_TURN: "smart_turn";
|
|
4100
|
+
};
|
|
4101
|
+
type TurnDetectionMode = (typeof TurnDetectionMode)[keyof typeof TurnDetectionMode];
|
|
4102
|
+
/** Common PCM sample rates for Speechmatics streaming input. */
|
|
4103
|
+
declare const SpeechmaticsSampleRate: {
|
|
4104
|
+
readonly HZ_8000: 8000;
|
|
4105
|
+
readonly HZ_16000: 16000;
|
|
4106
|
+
readonly HZ_44100: 44100;
|
|
4107
|
+
};
|
|
4108
|
+
type SpeechmaticsSampleRate = (typeof SpeechmaticsSampleRate)[keyof typeof SpeechmaticsSampleRate];
|
|
4109
|
+
/** Audio encodings accepted by Speechmatics's real-time API. */
|
|
4110
|
+
declare const SpeechmaticsAudioEncoding: {
|
|
4111
|
+
readonly PCM_S16LE: "pcm_s16le";
|
|
4112
|
+
};
|
|
4113
|
+
type SpeechmaticsAudioEncoding = (typeof SpeechmaticsAudioEncoding)[keyof typeof SpeechmaticsAudioEncoding];
|
|
4114
|
+
/** Speechmatics operating points (accuracy vs latency trade-off). */
|
|
4115
|
+
declare const SpeechmaticsOperatingPoint: {
|
|
4116
|
+
readonly ENHANCED: "enhanced";
|
|
4117
|
+
readonly STANDARD: "standard";
|
|
4118
|
+
};
|
|
4119
|
+
type SpeechmaticsOperatingPoint = (typeof SpeechmaticsOperatingPoint)[keyof typeof SpeechmaticsOperatingPoint];
|
|
4120
|
+
/** Speechmatics RT server-message type names emitted to the client. */
|
|
4121
|
+
declare const SpeechmaticsServerMessage: {
|
|
4122
|
+
readonly RECOGNITION_STARTED: "RecognitionStarted";
|
|
4123
|
+
readonly ADD_PARTIAL_TRANSCRIPT: "AddPartialTranscript";
|
|
4124
|
+
readonly ADD_TRANSCRIPT: "AddTranscript";
|
|
4125
|
+
readonly END_OF_UTTERANCE: "EndOfUtterance";
|
|
4126
|
+
readonly END_OF_TRANSCRIPT: "EndOfTranscript";
|
|
4127
|
+
readonly AUDIO_ADDED: "AudioAdded";
|
|
4128
|
+
readonly INFO: "Info";
|
|
4129
|
+
readonly WARNING: "Warning";
|
|
4130
|
+
readonly ERROR: "Error";
|
|
4131
|
+
};
|
|
4132
|
+
type SpeechmaticsServerMessage = (typeof SpeechmaticsServerMessage)[keyof typeof SpeechmaticsServerMessage];
|
|
4133
|
+
/** Constructor options for {@link SpeechmaticsSTT}. */
|
|
4134
|
+
interface SpeechmaticsSTTOptions$1 {
|
|
4135
|
+
/** Override the realtime endpoint (default `wss://eu.rt.speechmatics.com/v2`). */
|
|
4136
|
+
readonly baseUrl?: string;
|
|
4137
|
+
/** BCP-47 language code. Default `"en"`. */
|
|
4138
|
+
readonly language?: string;
|
|
4139
|
+
/** Endpoint / turn-detection mode. Default `"adaptive"`. */
|
|
4140
|
+
readonly turnDetectionMode?: TurnDetectionMode;
|
|
4141
|
+
/** PCM sample rate (Hz). Default 16000. */
|
|
4142
|
+
readonly sampleRate?: SpeechmaticsSampleRate | number;
|
|
4143
|
+
/** Attach speaker IDs to transcripts. Default `false`. */
|
|
4144
|
+
readonly enableDiarization?: boolean;
|
|
4145
|
+
/** Max latency in seconds before the engine emits finals. Range 0.7..4.0. */
|
|
4146
|
+
readonly maxDelay?: number;
|
|
4147
|
+
/** Silence (s) that triggers EOU. Range (0, 2). */
|
|
4148
|
+
readonly endOfUtteranceSilenceTrigger?: number;
|
|
4149
|
+
/** Max EOU delay (s); must exceed `endOfUtteranceSilenceTrigger`. */
|
|
4150
|
+
readonly endOfUtteranceMaxDelay?: number;
|
|
4151
|
+
/** Include partial transcripts in interim output. Default `true`. */
|
|
4152
|
+
readonly includePartials?: boolean;
|
|
4153
|
+
/** Additional vocabulary entries (`{content, sounds_like?}`). */
|
|
4154
|
+
readonly additionalVocab?: ReadonlyArray<Record<string, unknown>>;
|
|
4155
|
+
/** Operating point (`enhanced` | `standard`). */
|
|
4156
|
+
readonly operatingPoint?: SpeechmaticsOperatingPoint;
|
|
4157
|
+
/** Optional Speechmatics domain (e.g. `"finance"`). */
|
|
4158
|
+
readonly domain?: string;
|
|
4159
|
+
/** Optional output locale (e.g. `"en-GB"`). */
|
|
4160
|
+
readonly outputLocale?: string;
|
|
4161
|
+
}
|
|
4162
|
+
/**
|
|
4163
|
+
* Streaming STT adapter for Speechmatics's RT v2 WebSocket API.
|
|
4164
|
+
*
|
|
4165
|
+
* @example
|
|
4166
|
+
* ```ts
|
|
4167
|
+
* const stt = new SpeechmaticsSTT('sm_api_key', { language: 'en' });
|
|
4168
|
+
* stt.onTranscript((t) => console.log(t.text, t.isFinal));
|
|
4169
|
+
* await stt.connect();
|
|
4170
|
+
* stt.sendAudio(pcm16Chunk);
|
|
4171
|
+
* stt.close();
|
|
4172
|
+
* ```
|
|
4173
|
+
*/
|
|
4174
|
+
declare class SpeechmaticsSTT {
|
|
4175
|
+
private ws;
|
|
4176
|
+
private readonly transcriptCallbacks;
|
|
4177
|
+
private readonly errorCallbacks;
|
|
4178
|
+
private running;
|
|
4179
|
+
/** Sequence number of the last audio chunk acknowledged via `AudioAdded`. */
|
|
4180
|
+
private lastSeqNo;
|
|
4181
|
+
private readonly apiKey;
|
|
4182
|
+
private readonly baseUrl;
|
|
4183
|
+
private readonly language;
|
|
4184
|
+
private readonly turnDetectionMode;
|
|
4185
|
+
private readonly sampleRate;
|
|
4186
|
+
private readonly enableDiarization;
|
|
4187
|
+
private readonly maxDelay;
|
|
4188
|
+
private readonly endOfUtteranceSilenceTrigger;
|
|
4189
|
+
private readonly endOfUtteranceMaxDelay;
|
|
4190
|
+
private readonly includePartials;
|
|
4191
|
+
private readonly additionalVocab;
|
|
4192
|
+
private readonly operatingPoint;
|
|
4193
|
+
private readonly domain;
|
|
4194
|
+
private readonly outputLocale;
|
|
4195
|
+
constructor(apiKey: string, options?: SpeechmaticsSTTOptions$1);
|
|
4196
|
+
/** Build the JSON `StartRecognition` payload sent on connect. */
|
|
4197
|
+
private buildStartRecognition;
|
|
4198
|
+
/** Open the streaming WebSocket and send the `StartRecognition` frame. */
|
|
4199
|
+
connect(): Promise<void>;
|
|
4200
|
+
/** Send a binary PCM16-LE audio chunk to Speechmatics for transcription. */
|
|
4201
|
+
sendAudio(audio: Buffer): void;
|
|
4202
|
+
/** Register a transcript listener. */
|
|
4203
|
+
onTranscript(callback: TranscriptCallback$1): void;
|
|
4204
|
+
/** Remove a previously registered transcript listener. */
|
|
4205
|
+
offTranscript(callback: TranscriptCallback$1): void;
|
|
4206
|
+
/** Register an error listener for socket / API failures. */
|
|
4207
|
+
onError(callback: ErrorCallback): void;
|
|
4208
|
+
/** Remove a previously registered error listener. */
|
|
4209
|
+
offError(callback: ErrorCallback): void;
|
|
4210
|
+
private handleMessage;
|
|
4211
|
+
/** Translate a Speechmatics transcript message into a Patter `Transcript`. */
|
|
4212
|
+
private toTranscript;
|
|
4213
|
+
private emitTranscript;
|
|
4214
|
+
private emitError;
|
|
4215
|
+
private handleError;
|
|
4216
|
+
private handleClose;
|
|
4217
|
+
/** Send `EndOfStream` and close the WebSocket. Idempotent. */
|
|
4218
|
+
close(): void;
|
|
4219
|
+
}
|
|
4220
|
+
|
|
4221
|
+
/** Speechmatics streaming STT for Patter pipeline mode. */
|
|
4222
|
+
|
|
4223
|
+
type SpeechmaticsSTTOptions = SpeechmaticsSTTOptions$1 & {
|
|
4224
|
+
/** API key. Falls back to SPEECHMATICS_API_KEY env var when omitted. */
|
|
4225
|
+
apiKey?: string;
|
|
4226
|
+
};
|
|
4227
|
+
/**
|
|
4228
|
+
* Speechmatics streaming STT.
|
|
4229
|
+
*
|
|
4230
|
+
* @example
|
|
4231
|
+
* ```ts
|
|
4232
|
+
* import * as speechmatics from "getpatter/stt/speechmatics";
|
|
4233
|
+
* const stt = new speechmatics.STT(); // reads SPEECHMATICS_API_KEY
|
|
4234
|
+
* const stt = new speechmatics.STT({ apiKey: "sm_...", language: "en" });
|
|
4235
|
+
* ```
|
|
4236
|
+
*/
|
|
4237
|
+
declare class STT extends SpeechmaticsSTT {
|
|
4238
|
+
static readonly providerKey = "speechmatics";
|
|
4239
|
+
constructor(opts?: SpeechmaticsSTTOptions);
|
|
4240
|
+
}
|
|
4241
|
+
|
|
2843
4242
|
/**
|
|
2844
4243
|
* Known stable ElevenLabs voice models (from the official ElevenLabs API
|
|
2845
|
-
* reference).
|
|
2846
|
-
*
|
|
2847
|
-
*
|
|
2848
|
-
*
|
|
2849
|
-
*
|
|
2850
|
-
* -
|
|
2851
|
-
* -
|
|
2852
|
-
* -
|
|
2853
|
-
* -
|
|
2854
|
-
|
|
2855
|
-
|
|
2856
|
-
|
|
4244
|
+
* reference). Exposed as a typed `as const` object so callers can pass
|
|
4245
|
+
* `ElevenLabsModel.FLASH_V2_5` and get autocomplete / static checking; the
|
|
4246
|
+
* public `modelId` option also accepts an arbitrary `string` so users can
|
|
4247
|
+
* pass forward-compat IDs we haven't enumerated yet.
|
|
4248
|
+
*
|
|
4249
|
+
* - `V3` — newest, highest quality (slower TTFT than Flash).
|
|
4250
|
+
* - `FLASH_V2_5` — current default, fastest (~75 ms TTFT).
|
|
4251
|
+
* - `TURBO_V2_5` — balanced quality/speed.
|
|
4252
|
+
* - `MULTILINGUAL_V2` — best multilingual support.
|
|
4253
|
+
* - `MONOLINGUAL_V1` — legacy English-only.
|
|
4254
|
+
*/
|
|
4255
|
+
declare const ElevenLabsModel: {
|
|
4256
|
+
readonly V3: "eleven_v3";
|
|
4257
|
+
readonly FLASH_V2_5: "eleven_flash_v2_5";
|
|
4258
|
+
readonly TURBO_V2_5: "eleven_turbo_v2_5";
|
|
4259
|
+
readonly MULTILINGUAL_V2: "eleven_multilingual_v2";
|
|
4260
|
+
readonly MONOLINGUAL_V1: "eleven_monolingual_v1";
|
|
4261
|
+
};
|
|
4262
|
+
/** Union of {@link ElevenLabsModel} string values. */
|
|
4263
|
+
type ElevenLabsModel = (typeof ElevenLabsModel)[keyof typeof ElevenLabsModel];
|
|
4264
|
+
declare const ElevenLabsOutputFormat: {
|
|
4265
|
+
readonly MP3_22050_32: "mp3_22050_32";
|
|
4266
|
+
readonly MP3_44100_32: "mp3_44100_32";
|
|
4267
|
+
readonly MP3_44100_64: "mp3_44100_64";
|
|
4268
|
+
readonly MP3_44100_96: "mp3_44100_96";
|
|
4269
|
+
readonly MP3_44100_128: "mp3_44100_128";
|
|
4270
|
+
readonly MP3_44100_192: "mp3_44100_192";
|
|
4271
|
+
readonly PCM_8000: "pcm_8000";
|
|
4272
|
+
readonly PCM_16000: "pcm_16000";
|
|
4273
|
+
readonly PCM_22050: "pcm_22050";
|
|
4274
|
+
readonly PCM_24000: "pcm_24000";
|
|
4275
|
+
readonly PCM_44100: "pcm_44100";
|
|
4276
|
+
readonly ULAW_8000: "ulaw_8000";
|
|
4277
|
+
};
|
|
4278
|
+
/** Union of {@link ElevenLabsOutputFormat} string values. */
|
|
4279
|
+
type ElevenLabsOutputFormat = (typeof ElevenLabsOutputFormat)[keyof typeof ElevenLabsOutputFormat];
|
|
4280
|
+
/** ElevenLabs voice tuning knobs forwarded as `voice_settings` in the request. */
|
|
2857
4281
|
interface ElevenLabsVoiceSettings {
|
|
2858
4282
|
stability?: number;
|
|
2859
4283
|
similarity_boost?: number;
|
|
2860
4284
|
style?: number;
|
|
2861
4285
|
use_speaker_boost?: boolean;
|
|
2862
4286
|
}
|
|
4287
|
+
/** Constructor options for {@link ElevenLabsTTS}. */
|
|
2863
4288
|
interface ElevenLabsTTSOptions$1 {
|
|
2864
4289
|
voiceId?: string;
|
|
2865
4290
|
/**
|
|
@@ -2951,16 +4376,25 @@ declare class ElevenLabsTTS {
|
|
|
2951
4376
|
|
|
2952
4377
|
/** ElevenLabs TTS for Patter pipeline mode. */
|
|
2953
4378
|
|
|
4379
|
+
/** Constructor options for the ElevenLabs `TTS` adapter. */
|
|
2954
4380
|
interface ElevenLabsTTSOptions {
|
|
2955
4381
|
/** API key. Falls back to ELEVENLABS_API_KEY env var when omitted. */
|
|
2956
|
-
apiKey?: string;
|
|
2957
|
-
voiceId?: string;
|
|
4382
|
+
readonly apiKey?: string;
|
|
4383
|
+
readonly voiceId?: string;
|
|
2958
4384
|
/**
|
|
2959
4385
|
* ElevenLabs voice model ID. Default is ``eleven_flash_v2_5`` (lowest TTFT).
|
|
2960
4386
|
* Pass ``eleven_v3`` for highest quality, or any string for forward-compat.
|
|
2961
4387
|
*/
|
|
2962
|
-
modelId?: ElevenLabsModel | string;
|
|
2963
|
-
outputFormat?: string;
|
|
4388
|
+
readonly modelId?: ElevenLabsModel | string;
|
|
4389
|
+
readonly outputFormat?: string;
|
|
4390
|
+
/**
|
|
4391
|
+
* BCP-47 language code (e.g. `"it"`, `"es"`). Forwarded to ElevenLabs as
|
|
4392
|
+
* the `language_code` request body field — required for multilingual /
|
|
4393
|
+
* Flash v2.5 voices to render the right accent.
|
|
4394
|
+
*/
|
|
4395
|
+
readonly languageCode?: string;
|
|
4396
|
+
/** ElevenLabs `voice_settings` object (stability, similarity_boost, …). */
|
|
4397
|
+
readonly voiceSettings?: Record<string, unknown>;
|
|
2964
4398
|
}
|
|
2965
4399
|
/** Options for the carrier-specific factories — same as the constructor minus `outputFormat`. */
|
|
2966
4400
|
type ElevenLabsCarrierOptions = Omit<ElevenLabsTTSOptions, "outputFormat">;
|
|
@@ -2979,17 +4413,180 @@ type ElevenLabsCarrierOptions = Omit<ElevenLabsTTSOptions, "outputFormat">;
|
|
|
2979
4413
|
* 16 kHz, native Telnyx default) on phone calls to skip the SDK-side
|
|
2980
4414
|
* resampling / transcoding step.
|
|
2981
4415
|
*/
|
|
2982
|
-
declare class TTS$
|
|
4416
|
+
declare class TTS$6 extends ElevenLabsTTS {
|
|
2983
4417
|
static readonly providerKey = "elevenlabs";
|
|
2984
4418
|
constructor(opts?: ElevenLabsTTSOptions);
|
|
2985
4419
|
/** Pipeline TTS pre-configured for Twilio Media Streams (`ulaw_8000`). */
|
|
2986
|
-
static forTwilio(opts?: ElevenLabsCarrierOptions): TTS$
|
|
2987
|
-
static forTwilio(apiKey: string, options?: Omit<ElevenLabsTTSOptions, "outputFormat">): TTS$
|
|
4420
|
+
static forTwilio(opts?: ElevenLabsCarrierOptions): TTS$6;
|
|
4421
|
+
static forTwilio(apiKey: string, options?: Omit<ElevenLabsTTSOptions, "outputFormat">): TTS$6;
|
|
2988
4422
|
/** Pipeline TTS pre-configured for Telnyx (`pcm_16000`). */
|
|
2989
|
-
static forTelnyx(opts?: ElevenLabsCarrierOptions): TTS$
|
|
2990
|
-
static forTelnyx(apiKey: string, options?: Omit<ElevenLabsTTSOptions, "outputFormat">): TTS$
|
|
4423
|
+
static forTelnyx(opts?: ElevenLabsCarrierOptions): TTS$6;
|
|
4424
|
+
static forTelnyx(apiKey: string, options?: Omit<ElevenLabsTTSOptions, "outputFormat">): TTS$6;
|
|
4425
|
+
}
|
|
4426
|
+
|
|
4427
|
+
/**
|
|
4428
|
+
* WebSocket-based ElevenLabs TTS provider — opt-in low-latency variant.
|
|
4429
|
+
*
|
|
4430
|
+
* Targets the ElevenLabs streaming-input WebSocket endpoint
|
|
4431
|
+
* (`/v1/text-to-speech/{voice_id}/stream-input`) instead of the HTTP
|
|
4432
|
+
* `/stream` endpoint used by `ElevenLabsTTS`. Saves the HTTP request setup
|
|
4433
|
+
* time per utterance (~50 ms) and avoids the HTTP cold-start TLS handshake
|
|
4434
|
+
* when calls are bursty.
|
|
4435
|
+
*
|
|
4436
|
+
* API matches `ElevenLabsTTS` (`synthesizeStream(text)` returns an
|
|
4437
|
+
* `AsyncGenerator<Buffer>`) so it can be passed anywhere a TTSAdapter is
|
|
4438
|
+
* expected.
|
|
4439
|
+
*
|
|
4440
|
+
* Behaviour notes
|
|
4441
|
+
* - WebSocket is opened **per-utterance** (matches HTTP semantics). A
|
|
4442
|
+
* future revision may pool a WS across utterances of the same call
|
|
4443
|
+
* session — see roadmap Phase 5b.
|
|
4444
|
+
* - `auto_mode=true` is enabled by default. Pass `autoMode: false` to
|
|
4445
|
+
* send a custom `chunk_length_schedule`.
|
|
4446
|
+
* - `outputFormat` is exposed as a query parameter so `ulaw_8000` (Twilio
|
|
4447
|
+
* native) and `pcm_16000` (Telnyx native) work without resampling.
|
|
4448
|
+
* - `eleven_v3` is **not** supported — the WS endpoint rejects it.
|
|
4449
|
+
* - `optimize_streaming_latency` is officially deprecated and is not
|
|
4450
|
+
* exposed.
|
|
4451
|
+
*/
|
|
4452
|
+
|
|
4453
|
+
/** Constructor options for {@link ElevenLabsWebSocketTTS}. */
|
|
4454
|
+
interface ElevenLabsWebSocketTTSOptions {
|
|
4455
|
+
apiKey: string;
|
|
4456
|
+
voiceId?: string;
|
|
4457
|
+
modelId?: ElevenLabsModel | string;
|
|
4458
|
+
outputFormat?: string;
|
|
4459
|
+
voiceSettings?: Record<string, unknown>;
|
|
4460
|
+
languageCode?: string;
|
|
4461
|
+
/** Let the server pick chunk timing. Default true. */
|
|
4462
|
+
autoMode?: boolean;
|
|
4463
|
+
/** WS keep-alive timeout in seconds (5–180). Default 60. */
|
|
4464
|
+
inactivityTimeout?: number;
|
|
4465
|
+
/**
|
|
4466
|
+
* Manual chunk schedule, only used when ``autoMode: false``. Each value
|
|
4467
|
+
* must be 5–500. ElevenLabs default is ``[120, 160, 250, 290]``.
|
|
4468
|
+
*/
|
|
4469
|
+
chunkLengthSchedule?: number[];
|
|
4470
|
+
/** Outgoing audio re-chunk size in bytes. Default 4096. */
|
|
4471
|
+
chunkSize?: number;
|
|
4472
|
+
}
|
|
4473
|
+
/** WebSocket-based ElevenLabs TTS adapter — opt-in low-latency variant. */
|
|
4474
|
+
declare class ElevenLabsWebSocketTTS implements TTSAdapter {
|
|
4475
|
+
static readonly providerKey = "elevenlabs_ws";
|
|
4476
|
+
readonly apiKey: string;
|
|
4477
|
+
readonly voiceId: string;
|
|
4478
|
+
readonly modelId: string;
|
|
4479
|
+
readonly voiceSettings?: Record<string, unknown>;
|
|
4480
|
+
readonly languageCode?: string;
|
|
4481
|
+
readonly autoMode: boolean;
|
|
4482
|
+
readonly inactivityTimeout: number;
|
|
4483
|
+
readonly chunkLengthSchedule?: number[];
|
|
4484
|
+
readonly chunkSize: number;
|
|
4485
|
+
/**
|
|
4486
|
+
* The wire format requested over the ElevenLabs WS. Initially set from
|
|
4487
|
+
* the constructor; ``setTelephonyCarrier`` may auto-flip it to the
|
|
4488
|
+
* carrier's native codec when the caller did NOT pass ``outputFormat``
|
|
4489
|
+
* explicitly.
|
|
4490
|
+
*/
|
|
4491
|
+
private _outputFormat;
|
|
4492
|
+
private readonly _outputFormatExplicit;
|
|
4493
|
+
/** Public read-only view of the (possibly auto-flipped) wire format. */
|
|
4494
|
+
get outputFormat(): string;
|
|
4495
|
+
constructor(opts: ElevenLabsWebSocketTTSOptions);
|
|
4496
|
+
/**
|
|
4497
|
+
* Hook called by ``StreamHandler`` to advise the carrier wire format.
|
|
4498
|
+
*
|
|
4499
|
+
* When the user did NOT pass an explicit ``outputFormat`` in the
|
|
4500
|
+
* constructor options, this flips the format to the carrier's native
|
|
4501
|
+
* wire codec — saving a client-side transcode step. Calling with an
|
|
4502
|
+
* unknown carrier (``""`` / ``"custom"``) is a no-op.
|
|
4503
|
+
*
|
|
4504
|
+
* When ``outputFormat`` was explicitly passed (incl. via the
|
|
4505
|
+
* ``forTwilio`` / ``forTelnyx`` factories), this method is a no-op —
|
|
4506
|
+
* the user's choice always wins.
|
|
4507
|
+
*/
|
|
4508
|
+
setTelephonyCarrier(carrier: string): void;
|
|
4509
|
+
/** Pre-configured for Twilio Media Streams (`ulaw_8000`). */
|
|
4510
|
+
static forTwilio(opts: Omit<ElevenLabsWebSocketTTSOptions, 'outputFormat'>): ElevenLabsWebSocketTTS;
|
|
4511
|
+
/** Pre-configured for Telnyx (`pcm_16000`). */
|
|
4512
|
+
static forTelnyx(opts: Omit<ElevenLabsWebSocketTTSOptions, 'outputFormat'>): ElevenLabsWebSocketTTS;
|
|
4513
|
+
private buildUrl;
|
|
4514
|
+
/**
|
|
4515
|
+
* Single-shot synthesis: open WS, send text, yield bytes, close.
|
|
4516
|
+
*
|
|
4517
|
+
* Resilience contract:
|
|
4518
|
+
* - Connection bounded by ``CONNECT_TIMEOUT_MS`` (5s, was 15s).
|
|
4519
|
+
* - Each idle wait bounded by ``FRAME_TIMEOUT_MS`` (30s) so a stalled
|
|
4520
|
+
* server cannot keep the generator alive indefinitely.
|
|
4521
|
+
* - Permanent error handler attached BEFORE the open await — prevents
|
|
4522
|
+
* ``uncaughtException`` if an error fires after the once-listener
|
|
4523
|
+
* resolves.
|
|
4524
|
+
* - All event listeners removed in ``finally`` (no closure leak past
|
|
4525
|
+
* socket close).
|
|
4526
|
+
* - Server-reported ``error`` raises ``ElevenLabsTTSError``.
|
|
4527
|
+
* - Per-frame audio payload capped at ``MAX_AUDIO_B64_BYTES``.
|
|
4528
|
+
* - Best-effort EOS ``{"text":""}`` sent in finally (not immediately
|
|
4529
|
+
* after flush — auto_mode could otherwise truncate the tail audio).
|
|
4530
|
+
*/
|
|
4531
|
+
synthesizeStream(text: string): AsyncGenerator<Buffer>;
|
|
4532
|
+
/** No-op — connections are per-utterance and torn down inside synthesizeStream. */
|
|
4533
|
+
close(): Promise<void>;
|
|
2991
4534
|
}
|
|
2992
4535
|
|
|
4536
|
+
/** ElevenLabs WebSocket TTS for Patter pipeline mode (opt-in low-latency). */
|
|
4537
|
+
|
|
4538
|
+
/** Constructor options for the ElevenLabs WebSocket `TTS` adapter. */
|
|
4539
|
+
interface ElevenLabsWebSocketOptions {
|
|
4540
|
+
/** API key. Falls back to ELEVENLABS_API_KEY env var when omitted. */
|
|
4541
|
+
apiKey?: string;
|
|
4542
|
+
voiceId?: string;
|
|
4543
|
+
modelId?: ElevenLabsModel | string;
|
|
4544
|
+
outputFormat?: string;
|
|
4545
|
+
/** Let the server pick chunk timing. Default true. */
|
|
4546
|
+
autoMode?: boolean;
|
|
4547
|
+
voiceSettings?: Record<string, unknown>;
|
|
4548
|
+
languageCode?: string;
|
|
4549
|
+
/** WS keep-alive timeout in seconds (5–180). Default 60. */
|
|
4550
|
+
inactivityTimeout?: number;
|
|
4551
|
+
/** Manual chunk schedule, only used when ``autoMode: false``. */
|
|
4552
|
+
chunkLengthSchedule?: number[];
|
|
4553
|
+
}
|
|
4554
|
+
/** Options for the carrier-specific factories — same as the constructor minus `outputFormat`. */
|
|
4555
|
+
type ElevenLabsWebSocketCarrierOptions = Omit<ElevenLabsWebSocketOptions, 'outputFormat'>;
|
|
4556
|
+
/**
|
|
4557
|
+
* ElevenLabs streaming TTS over WebSocket.
|
|
4558
|
+
*
|
|
4559
|
+
* Drop-in replacement for `getpatter/tts/elevenlabs.TTS` (HTTP) using the
|
|
4560
|
+
* `stream-input` WebSocket endpoint. Saves the per-utterance HTTP request
|
|
4561
|
+
* setup time; otherwise behaves identically.
|
|
4562
|
+
*
|
|
4563
|
+
* @example
|
|
4564
|
+
* ```ts
|
|
4565
|
+
* import * as elevenlabsWs from "getpatter/tts/elevenlabs-ws";
|
|
4566
|
+
* const tts = new elevenlabsWs.TTS(); // reads ELEVENLABS_API_KEY
|
|
4567
|
+
* const tts = elevenlabsWs.TTS.forTwilio({ apiKey: "..." });
|
|
4568
|
+
* ```
|
|
4569
|
+
*
|
|
4570
|
+
* **Telephony optimisation** — use {@link TTS.forTwilio} (μ-law @ 8 kHz)
|
|
4571
|
+
* or {@link TTS.forTelnyx} (PCM @ 16 kHz) on phone calls.
|
|
4572
|
+
*/
|
|
4573
|
+
declare class TTS$5 extends ElevenLabsWebSocketTTS {
|
|
4574
|
+
static readonly providerKey = "elevenlabs_ws";
|
|
4575
|
+
constructor(opts?: ElevenLabsWebSocketOptions);
|
|
4576
|
+
/** WebSocket TTS pre-configured for Twilio Media Streams (`ulaw_8000`). */
|
|
4577
|
+
static forTwilio(opts?: ElevenLabsWebSocketCarrierOptions): TTS$5;
|
|
4578
|
+
/** WebSocket TTS pre-configured for Telnyx (`pcm_16000`). */
|
|
4579
|
+
static forTelnyx(opts?: ElevenLabsWebSocketCarrierOptions): TTS$5;
|
|
4580
|
+
}
|
|
4581
|
+
|
|
4582
|
+
/**
|
|
4583
|
+
* OpenAI TTS adapter for Patter — HTTP `/v1/audio/speech` endpoint.
|
|
4584
|
+
*
|
|
4585
|
+
* Wraps `gpt-4o-mini-tts` (and legacy `tts-1*`) and ships a stateful
|
|
4586
|
+
* 24 kHz → 16/8 kHz resampler with anti-alias LPF so the output drops
|
|
4587
|
+
* directly into the telephony pipeline. See {@link OpenAITTS}.
|
|
4588
|
+
*/
|
|
4589
|
+
/** OpenAI TTS adapter with built-in streaming resample to 16/8 kHz. */
|
|
2993
4590
|
declare class OpenAITTS {
|
|
2994
4591
|
private readonly apiKey;
|
|
2995
4592
|
private readonly voice;
|
|
@@ -2997,7 +4594,8 @@ declare class OpenAITTS {
|
|
|
2997
4594
|
private readonly instructions;
|
|
2998
4595
|
private readonly speed;
|
|
2999
4596
|
private readonly antiAlias;
|
|
3000
|
-
|
|
4597
|
+
private readonly targetSampleRate;
|
|
4598
|
+
constructor(apiKey: string, voice?: string, model?: string, instructions?: string | null, speed?: number | null, antiAlias?: boolean, targetSampleRate?: number);
|
|
3001
4599
|
/**
|
|
3002
4600
|
* Synthesise text to speech and return the full audio as a single Buffer.
|
|
3003
4601
|
*
|
|
@@ -3017,29 +4615,36 @@ declare class OpenAITTS {
|
|
|
3017
4615
|
*/
|
|
3018
4616
|
synthesizeStream(text: string): AsyncGenerator<Buffer>;
|
|
3019
4617
|
/**
|
|
3020
|
-
* Streaming 24 kHz → 16 kHz resampler (PCM16-LE). Applies a single-pole
|
|
3021
|
-
* lowpass ahead of the
|
|
3022
|
-
*
|
|
4618
|
+
* Streaming 24 kHz → {16, 8} kHz resampler (PCM16-LE). Applies a single-pole
|
|
4619
|
+
* lowpass ahead of the decimation and carries filter + sample state across
|
|
4620
|
+
* chunks so the cadence doesn't reset at every network read.
|
|
4621
|
+
*
|
|
4622
|
+
* Output rate is selected by ``ctx.targetSampleRate``:
|
|
4623
|
+
* 16000 → 3:2 decimation (sample 0 + mid(1,2)) [default]
|
|
4624
|
+
* 8000 → 3:1 decimation (sample 0 only) [fix #46]
|
|
3023
4625
|
*
|
|
3024
|
-
* ``ctx.lpfEnabled``
|
|
3025
|
-
* legacy static helper
|
|
3026
|
-
*
|
|
3027
|
-
* streaming path gets anti-alias filtering.
|
|
4626
|
+
* ``ctx.lpfEnabled`` controls whether the LPF is engaged — kept disabled
|
|
4627
|
+
* for the legacy static helper so the bit-exact downsample-only tests
|
|
4628
|
+
* remain valid; the real streaming path always engages it.
|
|
3028
4629
|
*/
|
|
3029
4630
|
static resampleStreaming(audio: Buffer, ctx: ResampleCtx): Buffer;
|
|
3030
4631
|
/** @deprecated use {@link resampleStreaming} with persistent state. */
|
|
3031
4632
|
static resample24kTo16k(audio: Buffer): Buffer;
|
|
3032
4633
|
}
|
|
4634
|
+
/** Streaming-resample state passed across calls to {@link OpenAITTS.resampleStreaming}. */
|
|
3033
4635
|
interface ResampleCtx {
|
|
3034
4636
|
carryByte: number | null;
|
|
3035
4637
|
leftover: number[];
|
|
3036
4638
|
lpfPrev: number;
|
|
3037
4639
|
/** Enable the single-pole lowpass ahead of decimation. Default true. */
|
|
3038
4640
|
lpfEnabled?: boolean;
|
|
4641
|
+
/** Final output sample rate. 16000 = 3:2 decimation, 8000 = 3:1. */
|
|
4642
|
+
targetSampleRate?: number;
|
|
3039
4643
|
}
|
|
3040
4644
|
|
|
3041
4645
|
/** OpenAI TTS for Patter pipeline mode. */
|
|
3042
4646
|
|
|
4647
|
+
/** Constructor options for the OpenAI `TTS` adapter. */
|
|
3043
4648
|
interface OpenAITTSOptions {
|
|
3044
4649
|
/** API key. Falls back to OPENAI_API_KEY env var when omitted. */
|
|
3045
4650
|
apiKey?: string;
|
|
@@ -3066,22 +4671,70 @@ interface OpenAITTSOptions {
|
|
|
3066
4671
|
* const tts = new openai.TTS({ apiKey: "sk-...", voice: "alloy" });
|
|
3067
4672
|
* ```
|
|
3068
4673
|
*/
|
|
3069
|
-
declare class TTS$
|
|
4674
|
+
declare class TTS$4 extends OpenAITTS {
|
|
3070
4675
|
static readonly providerKey = "openai_tts";
|
|
3071
4676
|
constructor(opts?: OpenAITTSOptions);
|
|
3072
4677
|
}
|
|
3073
4678
|
|
|
4679
|
+
/**
|
|
4680
|
+
* Cartesia TTS provider — HTTP `/tts/bytes` endpoint.
|
|
4681
|
+
*
|
|
4682
|
+
* Cartesia also offers a WebSocket streaming mode with word timestamps;
|
|
4683
|
+
* this provider focuses on the chunked-bytes HTTP API which maps cleanly
|
|
4684
|
+
* onto Patter's `synthesize(text)` contract and keeps the provider
|
|
4685
|
+
* dependency-free (just `fetch`).
|
|
4686
|
+
*
|
|
4687
|
+
* Default model is `sonic-3` (GA snapshot `sonic-3-2026-01-12`) — Cartesia's
|
|
4688
|
+
* current GA model with a documented ~90 ms TTFB target. Voice IDs from the
|
|
4689
|
+
* sonic-2 generation (including the default Katie voice) remain compatible.
|
|
4690
|
+
*
|
|
4691
|
+
* **Telephony optimization** — the constructor default
|
|
4692
|
+
* `sampleRate=16000` is correct for web playback, dashboard previews, and
|
|
4693
|
+
* 16 kHz pipelines. For real phone calls, use the carrier-specific
|
|
4694
|
+
* factories instead:
|
|
4695
|
+
*
|
|
4696
|
+
* - {@link CartesiaTTS.forTwilio} requests `sampleRate=8000` natively from
|
|
4697
|
+
* Cartesia. Twilio's media-stream WebSocket expects μ-law @ 8 kHz, so
|
|
4698
|
+
* the SDK normally resamples 16 kHz → 8 kHz before doing the PCM →
|
|
4699
|
+
* μ-law transcode in `TwilioAudioSender`. Asking Cartesia for 8 kHz
|
|
4700
|
+
* PCM at the source skips the resample step (saves ~10–30 ms first-
|
|
4701
|
+
* byte plus per-frame CPU and removes a potential aliasing source).
|
|
4702
|
+
* The PCM → μ-law transcode still happens client-side.
|
|
4703
|
+
* - {@link CartesiaTTS.forTelnyx} requests `sampleRate=16000`. Telnyx
|
|
4704
|
+
* negotiates L16/16000 on its bidirectional media WebSocket, so
|
|
4705
|
+
* 16 kHz PCM is already the format used end-to-end and no
|
|
4706
|
+
* transcoding happens. This is the same as the bare-constructor
|
|
4707
|
+
* default and exists for API symmetry with the Twilio factory.
|
|
4708
|
+
*/
|
|
4709
|
+
/** Known Cartesia TTS models. */
|
|
4710
|
+
declare const CartesiaTTSModel: {
|
|
4711
|
+
readonly SONIC_3: "sonic-3";
|
|
4712
|
+
readonly SONIC_2: "sonic-2";
|
|
4713
|
+
readonly SONIC: "sonic";
|
|
4714
|
+
};
|
|
4715
|
+
type CartesiaTTSModel = (typeof CartesiaTTSModel)[keyof typeof CartesiaTTSModel];
|
|
4716
|
+
/** Common PCM sample rates accepted by the Cartesia bytes endpoint. */
|
|
4717
|
+
declare const CartesiaTTSSampleRate: {
|
|
4718
|
+
readonly HZ_8000: 8000;
|
|
4719
|
+
readonly HZ_16000: 16000;
|
|
4720
|
+
readonly HZ_22050: 22050;
|
|
4721
|
+
readonly HZ_24000: 24000;
|
|
4722
|
+
readonly HZ_44100: 44100;
|
|
4723
|
+
};
|
|
4724
|
+
type CartesiaTTSSampleRate = (typeof CartesiaTTSSampleRate)[keyof typeof CartesiaTTSSampleRate];
|
|
4725
|
+
/** Constructor options for {@link CartesiaTTS}. */
|
|
3074
4726
|
interface CartesiaTTSOptions$1 {
|
|
3075
|
-
model?: string;
|
|
4727
|
+
model?: CartesiaTTSModel | string;
|
|
3076
4728
|
voice?: string;
|
|
3077
4729
|
language?: string;
|
|
3078
|
-
sampleRate?: number;
|
|
4730
|
+
sampleRate?: CartesiaTTSSampleRate | number;
|
|
3079
4731
|
speed?: string | number;
|
|
3080
4732
|
emotion?: string | string[];
|
|
3081
4733
|
volume?: number;
|
|
3082
4734
|
baseUrl?: string;
|
|
3083
4735
|
apiVersion?: string;
|
|
3084
4736
|
}
|
|
4737
|
+
/** Cartesia TTS provider backed by the HTTP `/tts/bytes` streaming endpoint. */
|
|
3085
4738
|
declare class CartesiaTTS {
|
|
3086
4739
|
private readonly apiKey;
|
|
3087
4740
|
private readonly model;
|
|
@@ -3126,6 +4779,7 @@ declare class CartesiaTTS {
|
|
|
3126
4779
|
|
|
3127
4780
|
/** Cartesia TTS for Patter pipeline mode. */
|
|
3128
4781
|
|
|
4782
|
+
/** Constructor options for the Cartesia `TTS` adapter. */
|
|
3129
4783
|
interface CartesiaTTSOptions {
|
|
3130
4784
|
/** API key. Falls back to CARTESIA_API_KEY env var when omitted. */
|
|
3131
4785
|
apiKey?: string;
|
|
@@ -3160,17 +4814,18 @@ type CartesiaCarrierOptions = Omit<CartesiaTTSOptions, "sampleRate">;
|
|
|
3160
4814
|
* or {@link TTS.forTelnyx} (PCM @ 16 kHz, native Telnyx default) on
|
|
3161
4815
|
* phone calls.
|
|
3162
4816
|
*/
|
|
3163
|
-
declare class TTS$
|
|
4817
|
+
declare class TTS$3 extends CartesiaTTS {
|
|
3164
4818
|
static readonly providerKey = "cartesia_tts";
|
|
3165
4819
|
constructor(opts?: CartesiaTTSOptions);
|
|
3166
4820
|
/** Pipeline TTS pre-configured for Twilio Media Streams (PCM @ 8 kHz). */
|
|
3167
|
-
static forTwilio(opts?: CartesiaCarrierOptions): TTS$
|
|
3168
|
-
static forTwilio(apiKey: string, options?: Omit<CartesiaTTSOptions, "sampleRate">): TTS$
|
|
4821
|
+
static forTwilio(opts?: CartesiaCarrierOptions): TTS$3;
|
|
4822
|
+
static forTwilio(apiKey: string, options?: Omit<CartesiaTTSOptions, "sampleRate">): TTS$3;
|
|
3169
4823
|
/** Pipeline TTS pre-configured for Telnyx (PCM @ 16 kHz). */
|
|
3170
|
-
static forTelnyx(opts?: CartesiaCarrierOptions): TTS$
|
|
3171
|
-
static forTelnyx(apiKey: string, options?: Omit<CartesiaTTSOptions, "sampleRate">): TTS$
|
|
4824
|
+
static forTelnyx(opts?: CartesiaCarrierOptions): TTS$3;
|
|
4825
|
+
static forTelnyx(apiKey: string, options?: Omit<CartesiaTTSOptions, "sampleRate">): TTS$3;
|
|
3172
4826
|
}
|
|
3173
4827
|
|
|
4828
|
+
/** Constructor options for {@link RimeTTS}. */
|
|
3174
4829
|
interface RimeTTSOptions$1 {
|
|
3175
4830
|
model?: string;
|
|
3176
4831
|
speaker?: string;
|
|
@@ -3186,6 +4841,7 @@ interface RimeTTSOptions$1 {
|
|
|
3186
4841
|
phonemizeBetweenBrackets?: boolean;
|
|
3187
4842
|
baseUrl?: string;
|
|
3188
4843
|
}
|
|
4844
|
+
/** Rime TTS adapter for the `users.rime.ai/v1/rime-tts` HTTP streaming endpoint. */
|
|
3189
4845
|
declare class RimeTTS {
|
|
3190
4846
|
private readonly apiKey;
|
|
3191
4847
|
private readonly model;
|
|
@@ -3204,6 +4860,7 @@ declare class RimeTTS {
|
|
|
3204
4860
|
private readonly totalTimeoutMs;
|
|
3205
4861
|
constructor(apiKey: string, opts?: RimeTTSOptions$1);
|
|
3206
4862
|
private buildPayload;
|
|
4863
|
+
/** Synthesize text and return the concatenated audio buffer. */
|
|
3207
4864
|
synthesize(text: string): Promise<Buffer>;
|
|
3208
4865
|
/**
|
|
3209
4866
|
* Synthesize text and yield raw PCM_S16LE chunks at the configured
|
|
@@ -3214,6 +4871,7 @@ declare class RimeTTS {
|
|
|
3214
4871
|
|
|
3215
4872
|
/** Rime TTS for Patter pipeline mode. */
|
|
3216
4873
|
|
|
4874
|
+
/** Constructor options for the Rime `TTS` adapter. */
|
|
3217
4875
|
interface RimeTTSOptions {
|
|
3218
4876
|
/** API key. Falls back to RIME_API_KEY env var when omitted. */
|
|
3219
4877
|
apiKey?: string;
|
|
@@ -3241,13 +4899,14 @@ interface RimeTTSOptions {
|
|
|
3241
4899
|
* const tts = new rime.TTS({ apiKey: "...", speaker: "astra" });
|
|
3242
4900
|
* ```
|
|
3243
4901
|
*/
|
|
3244
|
-
declare class TTS$
|
|
4902
|
+
declare class TTS$2 extends RimeTTS {
|
|
3245
4903
|
static readonly providerKey = "rime";
|
|
3246
4904
|
constructor(opts?: RimeTTSOptions);
|
|
3247
4905
|
}
|
|
3248
4906
|
|
|
3249
4907
|
/** LMNT TTS for Patter pipeline mode. */
|
|
3250
4908
|
|
|
4909
|
+
/** Constructor options for the LMNT `TTS` adapter. */
|
|
3251
4910
|
interface LMNTTTSOptions {
|
|
3252
4911
|
/** API key. Falls back to LMNT_API_KEY env var when omitted. */
|
|
3253
4912
|
apiKey?: string;
|
|
@@ -3270,13 +4929,135 @@ interface LMNTTTSOptions {
|
|
|
3270
4929
|
* const tts = new lmnt.TTS({ apiKey: "...", voice: "leah" });
|
|
3271
4930
|
* ```
|
|
3272
4931
|
*/
|
|
3273
|
-
declare class TTS extends LMNTTTS {
|
|
4932
|
+
declare class TTS$1 extends LMNTTTS {
|
|
3274
4933
|
static readonly providerKey = "lmnt";
|
|
3275
4934
|
constructor(opts?: LMNTTTSOptions);
|
|
3276
4935
|
}
|
|
3277
4936
|
|
|
4937
|
+
/**
|
|
4938
|
+
* Inworld TTS provider — HTTP NDJSON streaming endpoint.
|
|
4939
|
+
*
|
|
4940
|
+
* Calls `POST https://api.inworld.ai/tts/v1/voice:stream`. The response is
|
|
4941
|
+
* NDJSON: one JSON object per line of the form
|
|
4942
|
+
* `{"result": {"audioContent": "<base64-PCM_S16LE>", "timestampInfo": ...}}`
|
|
4943
|
+
*
|
|
4944
|
+
* The default config requests `audioEncoding=PCM` at 16 kHz so the output drops
|
|
4945
|
+
* straight into the Patter pipeline without transcoding. Inworld TTS-2 is the
|
|
4946
|
+
* default model — pass `model: "inworld-tts-1.5-max"` for the prior generation.
|
|
4947
|
+
*/
|
|
4948
|
+
/** Inworld TTS model families. */
|
|
4949
|
+
declare const InworldModel: {
|
|
4950
|
+
readonly TTS_2: "inworld-tts-2";
|
|
4951
|
+
readonly TTS_1_5_MAX: "inworld-tts-1.5-max";
|
|
4952
|
+
readonly TTS_1_5_MINI: "inworld-tts-1.5-mini";
|
|
4953
|
+
readonly TTS_1_MAX: "inworld-tts-1-max";
|
|
4954
|
+
readonly TTS_1: "inworld-tts-1";
|
|
4955
|
+
};
|
|
4956
|
+
type InworldModel = (typeof InworldModel)[keyof typeof InworldModel];
|
|
4957
|
+
/** Inworld audio encoding values accepted by the REST API. */
|
|
4958
|
+
declare const InworldAudioEncoding: {
|
|
4959
|
+
readonly PCM: "PCM";
|
|
4960
|
+
readonly LINEAR16: "LINEAR16";
|
|
4961
|
+
readonly OGG_OPUS: "OGG_OPUS";
|
|
4962
|
+
readonly MP3: "MP3";
|
|
4963
|
+
};
|
|
4964
|
+
type InworldAudioEncoding = (typeof InworldAudioEncoding)[keyof typeof InworldAudioEncoding];
|
|
4965
|
+
/** TTS-2 stability mode (ignored by older models). */
|
|
4966
|
+
declare const InworldDeliveryMode: {
|
|
4967
|
+
readonly EXPRESSIVE: "EXPRESSIVE";
|
|
4968
|
+
readonly BALANCED: "BALANCED";
|
|
4969
|
+
readonly STABLE: "STABLE";
|
|
4970
|
+
};
|
|
4971
|
+
type InworldDeliveryMode = (typeof InworldDeliveryMode)[keyof typeof InworldDeliveryMode];
|
|
4972
|
+
/** Constructor options for {@link InworldTTS}. */
|
|
4973
|
+
interface InworldTTSOptions$1 {
|
|
4974
|
+
/** Model id. Defaults to `"inworld-tts-2"`. */
|
|
4975
|
+
model?: InworldModel | string;
|
|
4976
|
+
/** Voice name (e.g. `"Ashley"`, `"Olivia"`, `"Craig"`, `"Remy"`). */
|
|
4977
|
+
voice?: string;
|
|
4978
|
+
/** BCP-47 language tag, e.g. `"en"`, `"it"`, `"es"`. */
|
|
4979
|
+
language?: string;
|
|
4980
|
+
/** Output audio encoding. Defaults to `"PCM"` (raw PCM_S16LE). */
|
|
4981
|
+
audioEncoding?: InworldAudioEncoding | string;
|
|
4982
|
+
/** Output sample rate in Hz. Defaults to 16000. */
|
|
4983
|
+
sampleRate?: number;
|
|
4984
|
+
/** Bitrate hint (bits/sec) — used for OGG_OPUS / MP3. Default 64000. */
|
|
4985
|
+
bitrate?: number;
|
|
4986
|
+
/** Sampling temperature 0.0–2.0 (TTS-1.5 only — ignored by TTS-2). */
|
|
4987
|
+
temperature?: number;
|
|
4988
|
+
/** Speaking rate multiplier 0.5–1.5. Default 1.0. */
|
|
4989
|
+
speakingRate?: number;
|
|
4990
|
+
/** Stability mode for TTS-2 (`EXPRESSIVE` / `BALANCED` / `STABLE`). */
|
|
4991
|
+
deliveryMode?: InworldDeliveryMode | string;
|
|
4992
|
+
/** Override the REST endpoint (e.g. for on-prem deployments). */
|
|
4993
|
+
baseUrl?: string;
|
|
4994
|
+
}
|
|
4995
|
+
/**
|
|
4996
|
+
* Inworld TTS over the `/tts/v1/voice:stream` HTTP NDJSON endpoint.
|
|
4997
|
+
*
|
|
4998
|
+
* The Inworld dashboard provides a Base64 token that is already in the form
|
|
4999
|
+
* expected by the `Authorization: Basic <token>` header — pass it as-is. If
|
|
5000
|
+
* you only have the raw API key string, base64-encode `${apiKey}:` yourself
|
|
5001
|
+
* before calling the constructor.
|
|
5002
|
+
*/
|
|
5003
|
+
declare class InworldTTS {
|
|
5004
|
+
private readonly authToken;
|
|
5005
|
+
private readonly model;
|
|
5006
|
+
private readonly voice;
|
|
5007
|
+
private readonly language?;
|
|
5008
|
+
private readonly audioEncoding;
|
|
5009
|
+
private readonly sampleRate;
|
|
5010
|
+
private readonly bitrate;
|
|
5011
|
+
private readonly temperature?;
|
|
5012
|
+
private readonly speakingRate;
|
|
5013
|
+
private readonly deliveryMode?;
|
|
5014
|
+
private readonly baseUrl;
|
|
5015
|
+
constructor(authToken: string, opts?: InworldTTSOptions$1);
|
|
5016
|
+
private buildPayload;
|
|
5017
|
+
/** Synthesize text and return the concatenated audio buffer. */
|
|
5018
|
+
synthesize(text: string): Promise<Buffer>;
|
|
5019
|
+
/**
|
|
5020
|
+
* Yield audio chunks as they arrive. With the default `audioEncoding=PCM`
|
|
5021
|
+
* these are raw PCM_S16LE bytes at `sampleRate`.
|
|
5022
|
+
*/
|
|
5023
|
+
synthesizeStream(text: string): AsyncGenerator<Buffer>;
|
|
5024
|
+
}
|
|
5025
|
+
|
|
5026
|
+
/** Inworld TTS for Patter pipeline mode. */
|
|
5027
|
+
|
|
5028
|
+
/** Constructor options for the Inworld `TTS` adapter. */
|
|
5029
|
+
interface InworldTTSOptions {
|
|
5030
|
+
/** Inworld Base64 auth token. Falls back to INWORLD_API_KEY env var. */
|
|
5031
|
+
apiKey?: string;
|
|
5032
|
+
model?: InworldModel | string;
|
|
5033
|
+
voice?: string;
|
|
5034
|
+
language?: string;
|
|
5035
|
+
audioEncoding?: InworldAudioEncoding | string;
|
|
5036
|
+
sampleRate?: number;
|
|
5037
|
+
bitrate?: number;
|
|
5038
|
+
temperature?: number;
|
|
5039
|
+
speakingRate?: number;
|
|
5040
|
+
deliveryMode?: InworldDeliveryMode | string;
|
|
5041
|
+
baseUrl?: string;
|
|
5042
|
+
}
|
|
5043
|
+
/**
|
|
5044
|
+
* Inworld TTS — defaults to the TTS-2 model.
|
|
5045
|
+
*
|
|
5046
|
+
* @example
|
|
5047
|
+
* ```ts
|
|
5048
|
+
* import * as inworld from "getpatter/tts/inworld";
|
|
5049
|
+
* const tts = new inworld.TTS(); // reads INWORLD_API_KEY
|
|
5050
|
+
* const tts = new inworld.TTS({ apiKey: "...", voice: "Olivia", language: "en" });
|
|
5051
|
+
* ```
|
|
5052
|
+
*/
|
|
5053
|
+
declare class TTS extends InworldTTS {
|
|
5054
|
+
static readonly providerKey = "inworld";
|
|
5055
|
+
constructor(opts?: InworldTTSOptions);
|
|
5056
|
+
}
|
|
5057
|
+
|
|
3278
5058
|
/** OpenAI LLM for Patter pipeline mode. */
|
|
3279
5059
|
|
|
5060
|
+
/** Constructor options for the OpenAI Chat Completions `LLM` adapter. */
|
|
3280
5061
|
interface OpenAILLMOptions {
|
|
3281
5062
|
/** API key. Falls back to OPENAI_API_KEY env var when omitted. */
|
|
3282
5063
|
apiKey?: string;
|
|
@@ -3327,16 +5108,8 @@ declare class LLM$4 extends OpenAILLMProvider {
|
|
|
3327
5108
|
* Anthropic shape and the vendor event stream is normalised back into
|
|
3328
5109
|
* Patter's ``{ type: 'text' | 'tool_call' | 'done' }`` chunk protocol.
|
|
3329
5110
|
*
|
|
3330
|
-
*
|
|
3331
|
-
*
|
|
3332
|
-
* 78a66bcf79c5cea82989401c408f1dff4b961a5b,
|
|
3333
|
-
* file livekit-plugins/livekit-plugins-anthropic/livekit/plugins/anthropic/llm.py),
|
|
3334
|
-
* licensed under Apache License 2.0. Copyright 2023 LiveKit, Inc.
|
|
3335
|
-
*
|
|
3336
|
-
* Adaptations from the LiveKit source:
|
|
3337
|
-
* * Ported the Python async class pair (``llm.LLM`` /
|
|
3338
|
-
* ``llm.LLMStream``) into a single TypeScript class that satisfies
|
|
3339
|
-
* Patter's ``LLMProvider`` interface.
|
|
5111
|
+
* Implementation notes:
|
|
5112
|
+
* * Single TypeScript class satisfying Patter's ``LLMProvider`` interface.
|
|
3340
5113
|
* * Uses native ``fetch`` + SSE parsing instead of the official
|
|
3341
5114
|
* ``@anthropic-ai/sdk`` to keep Patter's runtime dependencies lean
|
|
3342
5115
|
* (mirrors how ``OpenAILLMProvider`` is implemented in
|
|
@@ -3346,6 +5119,7 @@ declare class LLM$4 extends OpenAILLMProvider {
|
|
|
3346
5119
|
* chunk protocol.
|
|
3347
5120
|
*/
|
|
3348
5121
|
|
|
5122
|
+
/** Constructor options for {@link AnthropicLLMProvider}. */
|
|
3349
5123
|
interface AnthropicLLMOptions$1 {
|
|
3350
5124
|
apiKey: string;
|
|
3351
5125
|
model?: string;
|
|
@@ -3377,11 +5151,13 @@ declare class AnthropicLLMProvider implements LLMProvider {
|
|
|
3377
5151
|
private readonly anthropicVersion;
|
|
3378
5152
|
private readonly promptCaching;
|
|
3379
5153
|
constructor(options: AnthropicLLMOptions$1);
|
|
3380
|
-
|
|
5154
|
+
/** Stream Patter-format LLM chunks for the given OpenAI-style chat history. */
|
|
5155
|
+
stream(messages: Array<Record<string, unknown>>, tools?: Array<Record<string, unknown>> | null, opts?: LLMStreamOptions): AsyncGenerator<LLMChunk, void, unknown>;
|
|
3381
5156
|
}
|
|
3382
5157
|
|
|
3383
5158
|
/** Anthropic Claude LLM for Patter pipeline mode. */
|
|
3384
5159
|
|
|
5160
|
+
/** Constructor options for the Anthropic Claude `LLM` adapter. */
|
|
3385
5161
|
interface AnthropicLLMOptions {
|
|
3386
5162
|
/** API key. Falls back to ANTHROPIC_API_KEY env var when omitted. */
|
|
3387
5163
|
apiKey?: string;
|
|
@@ -3430,20 +5206,11 @@ declare class LLM$3 extends AnthropicLLMProvider {
|
|
|
3430
5206
|
*
|
|
3431
5207
|
* Groq exposes an OpenAI-compatible Chat Completions API. We reuse the
|
|
3432
5208
|
* streaming code path by implementing the same SSE parser as
|
|
3433
|
-
* ``OpenAILLMProvider`` but pointed at ``api.groq.com``.
|
|
3434
|
-
*
|
|
3435
|
-
* Portions adapted from LiveKit Agents
|
|
3436
|
-
* (https://github.com/livekit/agents, commit
|
|
3437
|
-
* 78a66bcf79c5cea82989401c408f1dff4b961a5b,
|
|
3438
|
-
* file livekit-plugins/livekit-plugins-groq/livekit/plugins/groq/services.py),
|
|
3439
|
-
* licensed under Apache License 2.0. Copyright LiveKit, Inc.
|
|
3440
|
-
*
|
|
3441
|
-
* Adaptations from the LiveKit source:
|
|
3442
|
-
* * Ported the Python ``groq.LLM`` subclass (which subclasses the
|
|
3443
|
-
* LiveKit OpenAI plugin) into a tiny TypeScript wrapper that swaps
|
|
3444
|
-
* the base URL and defaults to ``llama-3.3-70b-versatile``.
|
|
5209
|
+
* ``OpenAILLMProvider`` but pointed at ``api.groq.com``. Defaults to
|
|
5210
|
+
* ``llama-3.3-70b-versatile``.
|
|
3445
5211
|
*/
|
|
3446
5212
|
|
|
5213
|
+
/** Constructor options for {@link GroqLLMProvider}. */
|
|
3447
5214
|
interface GroqLLMOptions$1 {
|
|
3448
5215
|
apiKey: string;
|
|
3449
5216
|
model?: string;
|
|
@@ -3485,11 +5252,13 @@ declare class GroqLLMProvider implements LLMProvider {
|
|
|
3485
5252
|
private readonly presencePenalty?;
|
|
3486
5253
|
private readonly stop?;
|
|
3487
5254
|
constructor(options: GroqLLMOptions$1);
|
|
3488
|
-
|
|
5255
|
+
/** Stream Patter-format LLM chunks from the Groq chat completions API. */
|
|
5256
|
+
stream(messages: Array<Record<string, unknown>>, tools?: Array<Record<string, unknown>> | null, opts?: LLMStreamOptions): AsyncGenerator<LLMChunk, void, unknown>;
|
|
3489
5257
|
}
|
|
3490
5258
|
|
|
3491
5259
|
/** Groq LLM for Patter pipeline mode. */
|
|
3492
5260
|
|
|
5261
|
+
/** Constructor options for the Groq `LLM` adapter. */
|
|
3493
5262
|
interface GroqLLMOptions {
|
|
3494
5263
|
/** API key. Falls back to GROQ_API_KEY env var when omitted. */
|
|
3495
5264
|
apiKey?: string;
|
|
@@ -3542,21 +5311,14 @@ declare class LLM$2 extends GroqLLMProvider {
|
|
|
3542
5311
|
* compression to reduce TTFT for requests with large prompts
|
|
3543
5312
|
* (see https://inference-docs.cerebras.ai/payload-optimization).
|
|
3544
5313
|
*
|
|
3545
|
-
*
|
|
3546
|
-
*
|
|
3547
|
-
*
|
|
3548
|
-
*
|
|
3549
|
-
*
|
|
3550
|
-
*
|
|
3551
|
-
* Adaptations from the LiveKit source:
|
|
3552
|
-
* * LiveKit's ``cerebras.LLM`` subclasses the LiveKit OpenAI plugin.
|
|
3553
|
-
* Patter's analogue is a tiny wrapper around ``fetch`` that swaps
|
|
3554
|
-
* the base URL and default model.
|
|
3555
|
-
* * The msgpack payload optimisation from LiveKit is Python-only
|
|
3556
|
-
* (msgpack in Node land isn't as standard); only gzip compression
|
|
3557
|
-
* is ported. Enable with ``gzipCompression: true``.
|
|
5314
|
+
* Implementation notes:
|
|
5315
|
+
* * Tiny wrapper around ``fetch`` that swaps the base URL and default
|
|
5316
|
+
* model relative to the OpenAI-compatible API.
|
|
5317
|
+
* * Gzip compression of the request body is supported via
|
|
5318
|
+
* ``gzipCompression: true`` (default).
|
|
3558
5319
|
*/
|
|
3559
5320
|
|
|
5321
|
+
/** Constructor options for {@link CerebrasLLMProvider}. */
|
|
3560
5322
|
interface CerebrasLLMOptions$1 {
|
|
3561
5323
|
apiKey: string;
|
|
3562
5324
|
model?: string;
|
|
@@ -3624,11 +5386,13 @@ declare class CerebrasLLMProvider implements LLMProvider {
|
|
|
3624
5386
|
private readonly presencePenalty?;
|
|
3625
5387
|
private readonly stop?;
|
|
3626
5388
|
constructor(options: CerebrasLLMOptions$1);
|
|
3627
|
-
|
|
5389
|
+
/** Stream Patter-format LLM chunks from the Cerebras chat completions API. */
|
|
5390
|
+
stream(messages: Array<Record<string, unknown>>, tools?: Array<Record<string, unknown>> | null, opts?: LLMStreamOptions): AsyncGenerator<LLMChunk, void, unknown>;
|
|
3628
5391
|
}
|
|
3629
5392
|
|
|
3630
5393
|
/** Cerebras LLM for Patter pipeline mode. */
|
|
3631
5394
|
|
|
5395
|
+
/** Constructor options for the Cerebras `LLM` adapter. */
|
|
3632
5396
|
interface CerebrasLLMOptions {
|
|
3633
5397
|
/** API key. Falls back to CEREBRAS_API_KEY env var when omitted. */
|
|
3634
5398
|
apiKey?: string;
|
|
@@ -3685,23 +5449,16 @@ declare class LLM$1 extends CerebrasLLMProvider {
|
|
|
3685
5449
|
* and ``tools`` shapes, and streamed response parts are normalised to
|
|
3686
5450
|
* Patter's ``{ type: 'text' | 'tool_call' | 'done' }`` chunks.
|
|
3687
5451
|
*
|
|
3688
|
-
*
|
|
3689
|
-
*
|
|
3690
|
-
* 78a66bcf79c5cea82989401c408f1dff4b961a5b,
|
|
3691
|
-
* file livekit-plugins/livekit-plugins-google/livekit/plugins/google/llm.py),
|
|
3692
|
-
* licensed under Apache License 2.0. Copyright 2023 LiveKit, Inc.
|
|
3693
|
-
*
|
|
3694
|
-
* Adaptations from the LiveKit source:
|
|
3695
|
-
* * LiveKit uses the ``google-genai`` Python SDK. The TypeScript port
|
|
3696
|
-
* uses native ``fetch`` against the REST SSE endpoint so we don't
|
|
5452
|
+
* Implementation notes:
|
|
5453
|
+
* * Uses native ``fetch`` against the REST SSE endpoint so we don't
|
|
3697
5454
|
* pull in a large SDK dependency.
|
|
3698
|
-
* *
|
|
3699
|
-
*
|
|
3700
|
-
*
|
|
3701
|
-
*
|
|
3702
|
-
* follow-up PR once credential plumbing is in place.
|
|
5455
|
+
* * Single class that satisfies Patter's ``LLMProvider`` interface.
|
|
5456
|
+
* * Vertex AI support (which requires GCP auth) is not included — only
|
|
5457
|
+
* the Developer API (API key) path is supported. Vertex can be added
|
|
5458
|
+
* by a follow-up PR once credential plumbing is in place.
|
|
3703
5459
|
*/
|
|
3704
5460
|
|
|
5461
|
+
/** Constructor options for {@link GoogleLLMProvider}. */
|
|
3705
5462
|
interface GoogleLLMOptions$1 {
|
|
3706
5463
|
apiKey: string;
|
|
3707
5464
|
model?: string;
|
|
@@ -3717,11 +5474,13 @@ declare class GoogleLLMProvider implements LLMProvider {
|
|
|
3717
5474
|
private readonly temperature?;
|
|
3718
5475
|
private readonly maxOutputTokens?;
|
|
3719
5476
|
constructor(options: GoogleLLMOptions$1);
|
|
3720
|
-
|
|
5477
|
+
/** Stream Patter-format LLM chunks from the Gemini SSE endpoint. */
|
|
5478
|
+
stream(messages: Array<Record<string, unknown>>, tools?: Array<Record<string, unknown>> | null, opts?: LLMStreamOptions): AsyncGenerator<LLMChunk, void, unknown>;
|
|
3721
5479
|
}
|
|
3722
5480
|
|
|
3723
5481
|
/** Google Gemini LLM for Patter pipeline mode. */
|
|
3724
5482
|
|
|
5483
|
+
/** Constructor options for the Google Gemini `LLM` adapter. */
|
|
3725
5484
|
interface GoogleLLMOptions {
|
|
3726
5485
|
/**
|
|
3727
5486
|
* API key. Falls back to ``GEMINI_API_KEY`` first, then ``GOOGLE_API_KEY``.
|
|
@@ -3754,28 +5513,24 @@ declare class LLM extends GoogleLLMProvider {
|
|
|
3754
5513
|
}
|
|
3755
5514
|
|
|
3756
5515
|
/**
|
|
3757
|
-
* Silero VAD provider
|
|
5516
|
+
* Silero VAD provider.
|
|
3758
5517
|
*
|
|
3759
5518
|
* Acoustic voice activity detection backed by the Silero ONNX model. Buffers
|
|
3760
5519
|
* incoming int16 LE PCM frames, runs inference on fixed-size windows
|
|
3761
5520
|
* (256 samples at 8 kHz, 512 at 16 kHz), applies an exponential probability
|
|
3762
5521
|
* filter, and emits VADEvent transitions (speech_start / speech_end).
|
|
3763
5522
|
*
|
|
3764
|
-
*
|
|
3765
|
-
* https://github.com/livekit/agents
|
|
3766
|
-
* Sources:
|
|
3767
|
-
* - livekit-plugins/livekit-plugins-silero/livekit/plugins/silero/vad.py
|
|
3768
|
-
* - livekit-plugins/livekit-plugins-silero/livekit/plugins/silero/onnx_model.py
|
|
3769
|
-
*
|
|
3770
|
-
* Adaptations for Patter:
|
|
5523
|
+
* Notes:
|
|
3771
5524
|
* - Input is raw PCM `Buffer` (int16 LE, mono) via
|
|
3772
|
-
* `processFrame(pcmChunk, sampleRate)
|
|
5525
|
+
* `processFrame(pcmChunk, sampleRate)`.
|
|
3773
5526
|
* - onnxruntime-node is loaded lazily as an optional dependency.
|
|
3774
|
-
* - Emits `VADEvent` (Patter protocol)
|
|
5527
|
+
* - Emits `VADEvent` (Patter protocol).
|
|
3775
5528
|
*/
|
|
3776
5529
|
|
|
3777
5530
|
declare const SUPPORTED_SAMPLE_RATES: readonly [8000, 16000];
|
|
5531
|
+
/** Sample rates supported by the bundled Silero ONNX model (8 kHz or 16 kHz). */
|
|
3778
5532
|
type SileroSampleRate = (typeof SUPPORTED_SAMPLE_RATES)[number];
|
|
5533
|
+
/** Options accepted by {@link SileroVAD.load}. */
|
|
3779
5534
|
interface SileroVADOptions {
|
|
3780
5535
|
minSpeechDuration?: number;
|
|
3781
5536
|
minSilenceDuration?: number;
|
|
@@ -3790,13 +5545,16 @@ interface SileroVADOptions {
|
|
|
3790
5545
|
* Minimal structural type for the subset of `onnxruntime-node` we depend on.
|
|
3791
5546
|
* Declared locally so consumers don't need the package installed at build time.
|
|
3792
5547
|
*/
|
|
5548
|
+
/** Minimal subset of `onnxruntime-node`'s `InferenceSession` that Silero needs. */
|
|
3793
5549
|
interface OnnxInferenceSession {
|
|
3794
5550
|
run(feeds: Record<string, OnnxTensor>): Promise<Record<string, OnnxTensor>>;
|
|
3795
5551
|
}
|
|
5552
|
+
/** Minimal subset of an `onnxruntime-node` tensor used by Silero inference. */
|
|
3796
5553
|
interface OnnxTensor {
|
|
3797
5554
|
readonly data: Float32Array | BigInt64Array;
|
|
3798
5555
|
readonly dims: readonly number[];
|
|
3799
5556
|
}
|
|
5557
|
+
/** Minimal `onnxruntime-node` module surface accepted by {@link SileroVAD}. */
|
|
3800
5558
|
interface OnnxRuntime {
|
|
3801
5559
|
InferenceSession: {
|
|
3802
5560
|
create(pathOrBuffer: string | Uint8Array, options?: Record<string, unknown>): Promise<OnnxInferenceSession>;
|
|
@@ -3822,21 +5580,50 @@ declare class SileroVAD implements VADProvider {
|
|
|
3822
5580
|
private closed;
|
|
3823
5581
|
private constructor();
|
|
3824
5582
|
/**
|
|
3825
|
-
* Load the Silero VAD model.
|
|
5583
|
+
* Load the Silero VAD model.
|
|
3826
5584
|
* Throws if `onnxruntime-node` is not installed.
|
|
3827
5585
|
*/
|
|
3828
5586
|
static load(options?: SileroVADOptions): Promise<SileroVAD>;
|
|
5587
|
+
/**
|
|
5588
|
+
* Convenience factory for telephony pipelines.
|
|
5589
|
+
*
|
|
5590
|
+
* Identical to {@link SileroVAD.load} but pins `sampleRate` to 16000 Hz
|
|
5591
|
+
* — the only sample rate Patter's pipeline-mode audio bus uses (8 kHz
|
|
5592
|
+
* mulaw from Twilio is upsampled to 16 kHz PCM before reaching the
|
|
5593
|
+
* VAD). Every other parameter mirrors the upstream Silero VAD
|
|
5594
|
+
* defaults from `snakers4/silero-vad` (`get_speech_timestamps` /
|
|
5595
|
+
* `VADIterator`):
|
|
5596
|
+
*
|
|
5597
|
+
* - `activationThreshold = 0.5` — upstream `threshold`
|
|
5598
|
+
* - `deactivationThreshold = 0.35` — upstream `neg_threshold = threshold - 0.15`
|
|
5599
|
+
* - `minSpeechDuration = 0.25` — upstream `min_speech_duration_ms = 250`
|
|
5600
|
+
* - `minSilenceDuration = 0.1` — upstream `min_silence_duration_ms = 100`
|
|
5601
|
+
* - `prefixPaddingDuration = 0.03` — upstream `speech_pad_ms = 30`
|
|
5602
|
+
*
|
|
5603
|
+
* Override any field by passing `options`. Deployments that experience
|
|
5604
|
+
* truncation on natural pauses can raise `minSilenceDuration` (e.g.
|
|
5605
|
+
* 0.5–1.0 s) per call site rather than as a global default.
|
|
5606
|
+
*
|
|
5607
|
+
* @example
|
|
5608
|
+
* ```ts
|
|
5609
|
+
* const vad = await SileroVAD.forPhoneCall();
|
|
5610
|
+
* // or, if natural-pause truncation is observed:
|
|
5611
|
+
* const vad = await SileroVAD.forPhoneCall({ minSilenceDuration: 0.5 });
|
|
5612
|
+
* ```
|
|
5613
|
+
*/
|
|
5614
|
+
static forPhoneCall(options?: SileroVADOptions): Promise<SileroVAD>;
|
|
3829
5615
|
/**
|
|
3830
5616
|
* Internal factory used by tests — bypasses onnxruntime-node loading.
|
|
3831
5617
|
* @internal
|
|
3832
5618
|
*/
|
|
3833
5619
|
static fromOnnxModel(runtime: OnnxRuntime, session: OnnxInferenceSession, options: Required<Omit<SileroVADOptions, 'onnxFilePath' | 'forceCpu'>>): SileroVAD;
|
|
5620
|
+
/** Sample rate (Hz) the underlying ONNX model was loaded with. */
|
|
3834
5621
|
get sampleRate(): SileroSampleRate;
|
|
3835
5622
|
/**
|
|
3836
5623
|
* Number of int16 PCM samples that must be provided per call to
|
|
3837
5624
|
* processFrame for the model to run one inference window.
|
|
3838
5625
|
*
|
|
3839
|
-
* Constraint (
|
|
5626
|
+
* Constraint (Silero ONNX spec):
|
|
3840
5627
|
* - 16 000 Hz → 512 samples (32 ms)
|
|
3841
5628
|
* - 8 000 Hz → 256 samples (32 ms)
|
|
3842
5629
|
*
|
|
@@ -3847,8 +5634,10 @@ declare class SileroVAD implements VADProvider {
|
|
|
3847
5634
|
* passing exactly one window per call minimises heap allocation.
|
|
3848
5635
|
*/
|
|
3849
5636
|
numFramesRequired(): number;
|
|
5637
|
+
/** Run VAD on a PCM16 chunk; returns a transition event or null if no change. */
|
|
3850
5638
|
processFrame(pcmChunk: Buffer, sampleRate: number): Promise<VADEvent | null>;
|
|
3851
5639
|
private advanceState;
|
|
5640
|
+
/** Mark the VAD as closed; subsequent processFrame calls throw. */
|
|
3852
5641
|
close(): Promise<void>;
|
|
3853
5642
|
}
|
|
3854
5643
|
|
|
@@ -3924,6 +5713,8 @@ interface StatefulResamplerOptions {
|
|
|
3924
5713
|
* - 16 000 → 8 000 Hz (2:1 decimation with 5-tap FIR anti-alias)
|
|
3925
5714
|
* - 8 000 → 16 000 Hz (1:2 linear interpolation)
|
|
3926
5715
|
* - 24 000 → 16 000 Hz (3:2 linear interpolation)
|
|
5716
|
+
* - 24 000 → 8 000 Hz (3:1 decimation with linear interpolation;
|
|
5717
|
+
* collapses 24k→16k→8k chain — fix #46)
|
|
3927
5718
|
*
|
|
3928
5719
|
* All methods accept and return Buffer (PCM16-LE, mono by default).
|
|
3929
5720
|
*/
|
|
@@ -3998,6 +5789,10 @@ declare class StatefulResampler {
|
|
|
3998
5789
|
* handled using `resample24Last`.
|
|
3999
5790
|
*/
|
|
4000
5791
|
private _resample24kTo16k;
|
|
5792
|
+
/** 3:1 decimation — collapses the 24k→16k→8k chain into a single step. */
|
|
5793
|
+
private _resample24kTo8k;
|
|
5794
|
+
/** Shared phase-stepping resampler used by 24→16 (step 1.5) and 24→8 (step 3). */
|
|
5795
|
+
private _resample24kStep;
|
|
4001
5796
|
}
|
|
4002
5797
|
/** Create a stateful 16 kHz → 8 kHz downsampling resampler. */
|
|
4003
5798
|
declare function createResampler16kTo8k(): StatefulResampler;
|
|
@@ -4005,6 +5800,8 @@ declare function createResampler16kTo8k(): StatefulResampler;
|
|
|
4005
5800
|
declare function createResampler8kTo16k(): StatefulResampler;
|
|
4006
5801
|
/** Create a stateful 24 kHz → 16 kHz resampler (3:2 linear interpolation). */
|
|
4007
5802
|
declare function createResampler24kTo16k(): StatefulResampler;
|
|
5803
|
+
/** Create a stateful 24 kHz → 8 kHz resampler (3:1 decimation, fix #46). */
|
|
5804
|
+
declare function createResampler24kTo8k(): StatefulResampler;
|
|
4008
5805
|
/**
|
|
4009
5806
|
* Upsample 8 kHz PCM16 to 16 kHz using linear interpolation.
|
|
4010
5807
|
*
|
|
@@ -4051,6 +5848,7 @@ declare function resample24kTo16k(pcm24k: Buffer): Buffer;
|
|
|
4051
5848
|
*
|
|
4052
5849
|
* Install: npm install cloudflared
|
|
4053
5850
|
*/
|
|
5851
|
+
/** Handle returned by `startTunnel` exposing the public hostname and a stopper. */
|
|
4054
5852
|
interface TunnelHandle {
|
|
4055
5853
|
/** Public hostname (no protocol), e.g. "random-name.trycloudflare.com" */
|
|
4056
5854
|
hostname: string;
|
|
@@ -4073,7 +5871,9 @@ declare function startTunnel(port: number, timeoutMs?: number): Promise<TunnelHa
|
|
|
4073
5871
|
* that provides immutable messages, automatic ID generation, truncation
|
|
4074
5872
|
* preserving system prompts, and format conversion for OpenAI / Anthropic.
|
|
4075
5873
|
*/
|
|
5874
|
+
/** Role tag attached to every `ChatMessage`. */
|
|
4076
5875
|
type ChatRole = "system" | "user" | "assistant" | "tool";
|
|
5876
|
+
/** Single immutable entry in a `ChatContext` history. */
|
|
4077
5877
|
interface ChatMessage {
|
|
4078
5878
|
readonly id: string;
|
|
4079
5879
|
readonly role: ChatRole;
|
|
@@ -4082,16 +5882,19 @@ interface ChatMessage {
|
|
|
4082
5882
|
readonly name?: string;
|
|
4083
5883
|
readonly toolCallId?: string;
|
|
4084
5884
|
}
|
|
5885
|
+
/** Wire shape produced by `ChatContext.toOpenAI()` (matches OpenAI Chat Completions). */
|
|
4085
5886
|
interface OpenAIMessage {
|
|
4086
5887
|
role: string;
|
|
4087
5888
|
content: string;
|
|
4088
5889
|
name?: string;
|
|
4089
5890
|
tool_call_id?: string;
|
|
4090
5891
|
}
|
|
5892
|
+
/** Single message in `AnthropicConversion.messages`. */
|
|
4091
5893
|
interface AnthropicMessage {
|
|
4092
5894
|
role: string;
|
|
4093
5895
|
content: string;
|
|
4094
5896
|
}
|
|
5897
|
+
/** Result of `ChatContext.toAnthropic()` — system prompt extracted from the message list. */
|
|
4095
5898
|
interface AnthropicConversion {
|
|
4096
5899
|
system: string | undefined;
|
|
4097
5900
|
messages: ReadonlyArray<AnthropicMessage>;
|
|
@@ -4099,15 +5902,23 @@ interface AnthropicConversion {
|
|
|
4099
5902
|
interface ChatContextJSON {
|
|
4100
5903
|
messages: ReadonlyArray<ChatMessage>;
|
|
4101
5904
|
}
|
|
5905
|
+
/** Mutable conversation history with system-prompt-aware truncation and provider conversion helpers. */
|
|
4102
5906
|
declare class ChatContext {
|
|
4103
5907
|
private items;
|
|
4104
5908
|
constructor(systemPrompt?: string);
|
|
5909
|
+
/** Append a user message and return the created `ChatMessage`. */
|
|
4105
5910
|
addUser(content: string): ChatMessage;
|
|
5911
|
+
/** Append an assistant message and return the created `ChatMessage`. */
|
|
4106
5912
|
addAssistant(content: string): ChatMessage;
|
|
5913
|
+
/** Append a system message and return the created `ChatMessage`. */
|
|
4107
5914
|
addSystem(content: string): ChatMessage;
|
|
5915
|
+
/** Append a tool-result message tied to a tool-call id. */
|
|
4108
5916
|
addToolResult(content: string, toolCallId: string): ChatMessage;
|
|
5917
|
+
/** Return a snapshot of all messages currently in the context. */
|
|
4109
5918
|
getMessages(): ReadonlyArray<ChatMessage>;
|
|
5919
|
+
/** Return the last `n` messages (or `[]` when `n <= 0`). */
|
|
4110
5920
|
getLastN(n: number): ReadonlyArray<ChatMessage>;
|
|
5921
|
+
/** Number of messages currently in the context. */
|
|
4111
5922
|
get length(): number;
|
|
4112
5923
|
/**
|
|
4113
5924
|
* Keep the first system message (if any) plus the last `maxMessages`
|
|
@@ -4115,6 +5926,7 @@ declare class ChatContext {
|
|
|
4115
5926
|
* simply keeps the last `maxMessages` messages.
|
|
4116
5927
|
*/
|
|
4117
5928
|
truncate(maxMessages: number): void;
|
|
5929
|
+
/** Convert the conversation to the OpenAI Chat Completions message format. */
|
|
4118
5930
|
toOpenAI(): OpenAIMessage[];
|
|
4119
5931
|
/**
|
|
4120
5932
|
* Convert to Anthropic format. The first system message (if present)
|
|
@@ -4122,8 +5934,11 @@ declare class ChatContext {
|
|
|
4122
5934
|
* messages are included in the messages array.
|
|
4123
5935
|
*/
|
|
4124
5936
|
toAnthropic(): AnthropicConversion;
|
|
5937
|
+
/** Return a new `ChatContext` with the same messages (independent storage). */
|
|
4125
5938
|
copy(): ChatContext;
|
|
5939
|
+
/** Serialize the context to a JSON-safe object. */
|
|
4126
5940
|
toJSON(): ChatContextJSON;
|
|
5941
|
+
/** Reconstruct a `ChatContext` from the result of `toJSON()`. */
|
|
4127
5942
|
static fromJSON(data: ChatContextJSON): ChatContext;
|
|
4128
5943
|
}
|
|
4129
5944
|
|
|
@@ -4145,21 +5960,15 @@ declare class ChatContext {
|
|
|
4145
5960
|
* equivalent battle-tested package in the std library, so we ship a
|
|
4146
5961
|
* minimal in-house bag-of-words + cosine-similarity implementation.
|
|
4147
5962
|
* It is intentionally simple — enough to match repeated IVR prompts.
|
|
4148
|
-
*
|
|
4149
|
-
* Algorithm adapted from LiveKit Agents (Apache 2.0):
|
|
4150
|
-
* https://github.com/livekit/agents
|
|
4151
|
-
*
|
|
4152
|
-
* Source:
|
|
4153
|
-
* - livekit-agents/livekit/agents/voice/ivr/ivr_activity.py
|
|
4154
|
-
* - livekit-agents/livekit/agents/beta/tools/send_dtmf.py
|
|
4155
|
-
* LiveKit SHA at port time: 78a66bcf79c5cea82989401c408f1dff4b961a5b
|
|
4156
5963
|
*/
|
|
4157
5964
|
|
|
4158
5965
|
/** Valid DTMF tone values (keypad characters). */
|
|
4159
5966
|
declare const DTMF_EVENTS: readonly ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "*", "#", "A", "B", "C", "D"];
|
|
5967
|
+
/** Single DTMF tone value (a member of `DTMF_EVENTS`). */
|
|
4160
5968
|
type DtmfEvent = (typeof DTMF_EVENTS)[number];
|
|
4161
5969
|
/** Join DTMF events into a space-separated debug string. */
|
|
4162
5970
|
declare function formatDtmf(events: DtmfEvent[]): string;
|
|
5971
|
+
/** Constructor options for `TfidfLoopDetector`. */
|
|
4163
5972
|
interface TfidfLoopDetectorOptions {
|
|
4164
5973
|
/** Number of recent chunks to keep in the comparison window. */
|
|
4165
5974
|
windowSize?: number;
|
|
@@ -4180,14 +5989,18 @@ declare class TfidfLoopDetector {
|
|
|
4180
5989
|
private chunks;
|
|
4181
5990
|
private consecutiveSimilar;
|
|
4182
5991
|
constructor(opts?: TfidfLoopDetectorOptions);
|
|
5992
|
+
/** Forget all previously observed chunks and reset the consecutive-hit counter. */
|
|
4183
5993
|
reset(): void;
|
|
5994
|
+
/** Record a new transcript chunk in the rolling window. */
|
|
4184
5995
|
addChunk(text: string): void;
|
|
5996
|
+
/** Returns true once the most recent chunks look like a repeated IVR prompt. */
|
|
4185
5997
|
checkLoopDetection(): boolean;
|
|
4186
5998
|
}
|
|
4187
5999
|
/** Async callback fired when the TF-IDF detector trips. */
|
|
4188
6000
|
type LoopCallback = () => Promise<void> | void;
|
|
4189
6001
|
/** Async callback fired after sustained silence. */
|
|
4190
6002
|
type SilenceCallback = () => Promise<void> | void;
|
|
6003
|
+
/** Constructor options for `IVRActivity`. */
|
|
4191
6004
|
interface IVRActivityOptions {
|
|
4192
6005
|
/** Seconds of combined silence before firing `onSilence`. Default `5.0`. */
|
|
4193
6006
|
maxSilenceDuration?: number;
|
|
@@ -4237,11 +6050,17 @@ declare class IVRActivity {
|
|
|
4237
6050
|
private lastShouldSchedule;
|
|
4238
6051
|
private started;
|
|
4239
6052
|
constructor(callControl: CallControl, opts?: IVRActivityOptions);
|
|
6053
|
+
/** Begin tracking transcripts and silence; call once per call. */
|
|
4240
6054
|
start(): Promise<void>;
|
|
6055
|
+
/** Stop tracking and cancel any pending silence timer. */
|
|
4241
6056
|
stop(): Promise<void>;
|
|
6057
|
+
/** Feed a final user-side transcript chunk into the loop detector. */
|
|
4242
6058
|
onUserTranscribed(text: string): Promise<void>;
|
|
6059
|
+
/** Record the current user-turn state (e.g. `"listening"`, `"away"`). */
|
|
4243
6060
|
noteUserState(state: string): void;
|
|
6061
|
+
/** Record the current agent-turn state (e.g. `"idle"`, `"listening"`). */
|
|
4244
6062
|
noteAgentState(state: string): void;
|
|
6063
|
+
/** Tool definitions to expose to the LLM (currently only `send_dtmf_events`). */
|
|
4245
6064
|
get tools(): IVRToolDefinition[];
|
|
4246
6065
|
private scheduleSilenceCheck;
|
|
4247
6066
|
private shouldScheduleCheck;
|
|
@@ -4249,6 +6068,30 @@ declare class IVRActivity {
|
|
|
4249
6068
|
private buildSendDtmfTool;
|
|
4250
6069
|
}
|
|
4251
6070
|
|
|
6071
|
+
/**
|
|
6072
|
+
* Background-audio mixer for the Patter TypeScript SDK. Patter routes
|
|
6073
|
+
* outbound PCM through the pipeline stream handler, so this module exposes
|
|
6074
|
+
* a ``start / mix / stop`` API that does no I/O of its own. See
|
|
6075
|
+
* {@link BackgroundAudioPlayer} for the public class.
|
|
6076
|
+
*
|
|
6077
|
+
* Notes:
|
|
6078
|
+
*
|
|
6079
|
+
* - PCM mixing is a ~40-line pure-JavaScript routine operating on
|
|
6080
|
+
* ``Buffer`` (see :func:`mixPcm` below). Clipping is done against the
|
|
6081
|
+
* int16 range.
|
|
6082
|
+
* - ``.ogg`` decoding is not done in this module. Node does not bundle a
|
|
6083
|
+
* Vorbis decoder and shipping a native one would triple the SDK size.
|
|
6084
|
+
* Instead, callers supply a :class:`RawPcmSource` (pre-decoded int16
|
|
6085
|
+
* mono LE PCM at a known sample rate) OR a :class:`DecodedSource` via a
|
|
6086
|
+
* user-supplied decoder. The Python SDK ships the bundled ``.ogg``
|
|
6087
|
+
* clips and their decoder; the TS package exposes the raw files next to
|
|
6088
|
+
* this module for users who wire up their own decoder.
|
|
6089
|
+
*
|
|
6090
|
+
* Attribution for the bundled audio clips themselves is preserved in
|
|
6091
|
+
* ``src/resources/audio/NOTICE``.
|
|
6092
|
+
*/
|
|
6093
|
+
|
|
6094
|
+
/** Names of the .ogg clips bundled with the SDK under ``resources/audio/``. */
|
|
4252
6095
|
declare const BuiltinAudioClip: {
|
|
4253
6096
|
readonly CITY_AMBIENCE: "city-ambience.ogg";
|
|
4254
6097
|
readonly FOREST_AMBIENCE: "forest-ambience.ogg";
|
|
@@ -4258,6 +6101,7 @@ declare const BuiltinAudioClip: {
|
|
|
4258
6101
|
readonly KEYBOARD_TYPING2: "keyboard-typing2.ogg";
|
|
4259
6102
|
readonly HOLD_MUSIC: "hold_music.ogg";
|
|
4260
6103
|
};
|
|
6104
|
+
/** Filename of one of the bundled clips (e.g. ``"city-ambience.ogg"``). */
|
|
4261
6105
|
type BuiltinAudioClipName = (typeof BuiltinAudioClip)[keyof typeof BuiltinAudioClip];
|
|
4262
6106
|
/** Resolve a bundled clip name to its absolute path on disk. */
|
|
4263
6107
|
declare function builtinClipPath(clip: BuiltinAudioClipName): string;
|
|
@@ -4291,7 +6135,9 @@ interface BuiltinPcmSource {
|
|
|
4291
6135
|
readonly volume?: number;
|
|
4292
6136
|
readonly probability?: number;
|
|
4293
6137
|
}
|
|
6138
|
+
/** Tagged union of every input shape accepted by the player. */
|
|
4294
6139
|
type AudioSource = RawPcmSource | FilePcmSource | BuiltinPcmSource;
|
|
6140
|
+
/** A source plus optional probability weight + volume for list-style players. */
|
|
4295
6141
|
interface AudioConfig {
|
|
4296
6142
|
readonly source: AudioSource;
|
|
4297
6143
|
/** Probability weight used when ``BackgroundAudioPlayer`` receives a list. */
|
|
@@ -4299,8 +6145,9 @@ interface AudioConfig {
|
|
|
4299
6145
|
/** Master volume [0, 1] applied on top of the per-source ``volume``. */
|
|
4300
6146
|
readonly volume?: number;
|
|
4301
6147
|
}
|
|
6148
|
+
/** Constructor options for {@link BackgroundAudioPlayer}. */
|
|
4302
6149
|
interface BackgroundAudioOptions {
|
|
4303
|
-
/** Overall mix ratio [0, 1]. Defaults to 0.1 (
|
|
6150
|
+
/** Overall mix ratio [0, 1]. Defaults to 0.1 (typical hold-music ratio). */
|
|
4304
6151
|
readonly volume?: number;
|
|
4305
6152
|
/** When true the source restarts on exhaustion. */
|
|
4306
6153
|
readonly loop?: boolean;
|
|
@@ -4317,6 +6164,7 @@ declare function mixPcm(agent: Buffer, bg: Buffer, ratio: number): Buffer;
|
|
|
4317
6164
|
* program audio.
|
|
4318
6165
|
*/
|
|
4319
6166
|
declare function resamplePcm(src: Buffer, srcSr: number, dstSr: number): Buffer;
|
|
6167
|
+
/** Probability-weighted random pick from a list of {@link AudioConfig}. */
|
|
4320
6168
|
declare function selectSoundFromList(sounds: readonly AudioConfig[]): AudioConfig | null;
|
|
4321
6169
|
/**
|
|
4322
6170
|
* Mix a background audio clip into an outbound PCM stream.
|
|
@@ -4356,26 +6204,31 @@ declare class BackgroundAudioPlayer implements BackgroundAudioPlayer$1 {
|
|
|
4356
6204
|
private resampleTo;
|
|
4357
6205
|
}
|
|
4358
6206
|
|
|
6207
|
+
/** Constructor options for {@link TwilioAdapter}. */
|
|
4359
6208
|
interface TwilioAdapterOptions {
|
|
4360
6209
|
/** Optional Twilio edge region (e.g. ``ie1`` for Ireland). */
|
|
4361
6210
|
region?: string;
|
|
4362
6211
|
}
|
|
6212
|
+
/** Options accepted by {@link TwilioAdapter.provisionNumber}. */
|
|
4363
6213
|
interface ProvisionNumberOptions$1 {
|
|
4364
6214
|
/** ISO-3166-1 alpha-2 country code, e.g. ``"US"``. */
|
|
4365
6215
|
countryCode: string;
|
|
4366
6216
|
/** Optional North-American area code (e.g. ``"415"``). */
|
|
4367
6217
|
areaCode?: string;
|
|
4368
6218
|
}
|
|
6219
|
+
/** Result returned by {@link TwilioAdapter.provisionNumber}. */
|
|
4369
6220
|
interface ProvisionNumberResult$1 {
|
|
4370
6221
|
readonly phoneNumber: string;
|
|
4371
6222
|
readonly sid: string;
|
|
4372
6223
|
}
|
|
6224
|
+
/** Options accepted by {@link TwilioAdapter.configureNumber}. */
|
|
4373
6225
|
interface ConfigureNumberOptions$1 {
|
|
4374
6226
|
/** URL Twilio should hit when the number receives a call. */
|
|
4375
6227
|
voiceUrl: string;
|
|
4376
6228
|
/** Optional status callback URL for call lifecycle events. */
|
|
4377
6229
|
statusCallback?: string;
|
|
4378
6230
|
}
|
|
6231
|
+
/** Options accepted by {@link TwilioAdapter.initiateCall}. */
|
|
4379
6232
|
interface InitiateCallOptions$1 {
|
|
4380
6233
|
from: string;
|
|
4381
6234
|
to: string;
|
|
@@ -4397,9 +6250,11 @@ interface InitiateCallOptions$1 {
|
|
|
4397
6250
|
/** Raw extra form parameters forwarded to the Calls endpoint. */
|
|
4398
6251
|
extraParams?: Record<string, string>;
|
|
4399
6252
|
}
|
|
6253
|
+
/** Result returned by {@link TwilioAdapter.initiateCall}. */
|
|
4400
6254
|
interface InitiateCallResult$1 {
|
|
4401
6255
|
readonly callSid: string;
|
|
4402
6256
|
}
|
|
6257
|
+
/** Direct REST adapter for Twilio Programmable Voice & Numbers API. */
|
|
4403
6258
|
declare class TwilioAdapter {
|
|
4404
6259
|
readonly accountSid: string;
|
|
4405
6260
|
readonly region: string | undefined;
|
|
@@ -4426,18 +6281,22 @@ declare class TwilioAdapter {
|
|
|
4426
6281
|
endCall(callSid: string): Promise<void>;
|
|
4427
6282
|
}
|
|
4428
6283
|
|
|
6284
|
+
/** Options accepted by {@link TelnyxAdapter.provisionNumber}. */
|
|
4429
6285
|
interface ProvisionNumberOptions {
|
|
4430
6286
|
/** ISO-3166-1 alpha-2 country code (e.g. ``"US"``). */
|
|
4431
6287
|
countryCode: string;
|
|
4432
6288
|
}
|
|
6289
|
+
/** Result returned by {@link TelnyxAdapter.provisionNumber}. */
|
|
4433
6290
|
interface ProvisionNumberResult {
|
|
4434
6291
|
readonly phoneNumber: string;
|
|
4435
6292
|
readonly orderId: string;
|
|
4436
6293
|
}
|
|
6294
|
+
/** Options accepted by {@link TelnyxAdapter.configureNumber}. */
|
|
4437
6295
|
interface ConfigureNumberOptions {
|
|
4438
6296
|
/** Telnyx Call Control Application / Connection ID. */
|
|
4439
6297
|
connectionId: string;
|
|
4440
6298
|
}
|
|
6299
|
+
/** Options accepted by {@link TelnyxAdapter.initiateCall}. */
|
|
4441
6300
|
interface InitiateCallOptions {
|
|
4442
6301
|
from: string;
|
|
4443
6302
|
to: string;
|
|
@@ -4446,13 +6305,16 @@ interface InitiateCallOptions {
|
|
|
4446
6305
|
/** Opaque state string that Telnyx echoes back on webhooks. Base64-encoded on wire. */
|
|
4447
6306
|
clientState?: string;
|
|
4448
6307
|
}
|
|
6308
|
+
/** Result returned by {@link TelnyxAdapter.initiateCall}. */
|
|
4449
6309
|
interface InitiateCallResult {
|
|
4450
6310
|
readonly callControlId: string;
|
|
4451
6311
|
}
|
|
6312
|
+
/** Options accepted by {@link TelnyxAdapter.endCall}. */
|
|
4452
6313
|
interface EndCallOptions {
|
|
4453
6314
|
/** Idempotency key for the hangup command. */
|
|
4454
6315
|
commandId?: string;
|
|
4455
6316
|
}
|
|
6317
|
+
/** Direct REST adapter for Telnyx Call Control & Numbers API. */
|
|
4456
6318
|
declare class TelnyxAdapter {
|
|
4457
6319
|
private readonly apiKey;
|
|
4458
6320
|
readonly connectionId: string | undefined;
|
|
@@ -4479,6 +6341,102 @@ declare class TelnyxAdapter {
|
|
|
4479
6341
|
endCall(callControlId: string, opts?: EndCallOptions): Promise<void>;
|
|
4480
6342
|
}
|
|
4481
6343
|
|
|
6344
|
+
/**
|
|
6345
|
+
* Telnyx Speech-to-Text adapter (WebSocket streaming).
|
|
6346
|
+
*
|
|
6347
|
+
* Bridges the Telnyx `/v2/speech-to-text/transcription` WebSocket API to the
|
|
6348
|
+
* Patter SDK pipeline-mode STT interface. Implemented in TypeScript
|
|
6349
|
+
* (`ws` + `Buffer`) with a callback-based interface matching the other
|
|
6350
|
+
* Patter STT providers (Deepgram, Whisper).
|
|
6351
|
+
*/
|
|
6352
|
+
/** Patter-normalised transcript event emitted by {@link TelnyxSTT}. */
|
|
6353
|
+
interface Transcript {
|
|
6354
|
+
readonly text: string;
|
|
6355
|
+
readonly isFinal: boolean;
|
|
6356
|
+
readonly confidence: number;
|
|
6357
|
+
}
|
|
6358
|
+
type TranscriptCallback = (transcript: Transcript) => void;
|
|
6359
|
+
/** Backing transcription engine accepted by Telnyx STT. */
|
|
6360
|
+
type TelnyxTranscriptionEngine = 'telnyx' | 'google' | 'deepgram' | 'azure';
|
|
6361
|
+
/** Common PCM sample rates accepted by Telnyx STT. */
|
|
6362
|
+
declare const TelnyxSTTSampleRate: {
|
|
6363
|
+
readonly HZ_8000: 8000;
|
|
6364
|
+
readonly HZ_16000: 16000;
|
|
6365
|
+
readonly HZ_24000: 24000;
|
|
6366
|
+
};
|
|
6367
|
+
/** Union of {@link TelnyxSTTSampleRate} integer values. */
|
|
6368
|
+
type TelnyxSTTSampleRate = (typeof TelnyxSTTSampleRate)[keyof typeof TelnyxSTTSampleRate];
|
|
6369
|
+
/** Input audio formats accepted by Telnyx STT. */
|
|
6370
|
+
declare const TelnyxSTTInputFormat: {
|
|
6371
|
+
readonly WAV: "wav";
|
|
6372
|
+
};
|
|
6373
|
+
/** Union of {@link TelnyxSTTInputFormat} string values. */
|
|
6374
|
+
type TelnyxSTTInputFormat = (typeof TelnyxSTTInputFormat)[keyof typeof TelnyxSTTInputFormat];
|
|
6375
|
+
/** Streaming STT adapter for Telnyx's `/v2/speech-to-text` WebSocket. */
|
|
6376
|
+
declare class TelnyxSTT {
|
|
6377
|
+
private readonly apiKey;
|
|
6378
|
+
private readonly language;
|
|
6379
|
+
private readonly transcriptionEngine;
|
|
6380
|
+
private readonly sampleRate;
|
|
6381
|
+
private readonly baseUrl;
|
|
6382
|
+
private ws;
|
|
6383
|
+
private callbacks;
|
|
6384
|
+
private headerSent;
|
|
6385
|
+
constructor(apiKey: string, language?: string, transcriptionEngine?: TelnyxTranscriptionEngine, sampleRate?: number, baseUrl?: string);
|
|
6386
|
+
/** Open the streaming WebSocket and arm message handlers. */
|
|
6387
|
+
connect(): Promise<void>;
|
|
6388
|
+
/** Send a binary PCM16 audio chunk; emits the WAV header on the first call. */
|
|
6389
|
+
sendAudio(audio: Buffer): void;
|
|
6390
|
+
/** Register a transcript listener (max 10 concurrent listeners). */
|
|
6391
|
+
onTranscript(callback: TranscriptCallback): void;
|
|
6392
|
+
/** Close the streaming WebSocket. */
|
|
6393
|
+
close(): void;
|
|
6394
|
+
}
|
|
6395
|
+
|
|
6396
|
+
/**
|
|
6397
|
+
* Telnyx Text-to-Speech adapter (WebSocket streaming).
|
|
6398
|
+
*
|
|
6399
|
+
* Bridges the Telnyx `/v2/text-to-speech/speech` WebSocket API to the
|
|
6400
|
+
* Patter SDK pipeline-mode TTS interface. Implemented in TypeScript
|
|
6401
|
+
* (`ws` + `Buffer`) with the same `synthesize` / `synthesizeStream`
|
|
6402
|
+
* method shape used by the other Patter TTS providers (ElevenLabs,
|
|
6403
|
+
* OpenAI). The stream yields raw MP3 bytes.
|
|
6404
|
+
*/
|
|
6405
|
+
/** Common Telnyx NaturalHD voices accepted by the TTS endpoint. */
|
|
6406
|
+
declare const TelnyxTTSVoice: {
|
|
6407
|
+
readonly NATURAL_HD_ASTRA: "Telnyx.NaturalHD.astra";
|
|
6408
|
+
readonly NATURAL_HD_LUNA: "Telnyx.NaturalHD.luna";
|
|
6409
|
+
readonly NATURAL_HD_ATLAS: "Telnyx.NaturalHD.atlas";
|
|
6410
|
+
readonly NATURAL_HD_HERA: "Telnyx.NaturalHD.hera";
|
|
6411
|
+
readonly NATURAL_HD_ZEUS: "Telnyx.NaturalHD.zeus";
|
|
6412
|
+
};
|
|
6413
|
+
/** Union of {@link TelnyxTTSVoice} string values. */
|
|
6414
|
+
type TelnyxTTSVoice = (typeof TelnyxTTSVoice)[keyof typeof TelnyxTTSVoice];
|
|
6415
|
+
/** Sample rates supported by the Telnyx TTS WebSocket endpoint. */
|
|
6416
|
+
declare const TelnyxTTSSampleRate: {
|
|
6417
|
+
readonly HZ_8000: 8000;
|
|
6418
|
+
readonly HZ_16000: 16000;
|
|
6419
|
+
readonly HZ_24000: 24000;
|
|
6420
|
+
};
|
|
6421
|
+
/** Union of {@link TelnyxTTSSampleRate} integer values. */
|
|
6422
|
+
type TelnyxTTSSampleRate = (typeof TelnyxTTSSampleRate)[keyof typeof TelnyxTTSSampleRate];
|
|
6423
|
+
/** Streaming TTS adapter for Telnyx's `/v2/text-to-speech/speech` WebSocket. */
|
|
6424
|
+
declare class TelnyxTTS {
|
|
6425
|
+
private readonly apiKey;
|
|
6426
|
+
private readonly voice;
|
|
6427
|
+
private readonly baseUrl;
|
|
6428
|
+
constructor(apiKey: string, voice?: string, baseUrl?: string);
|
|
6429
|
+
/** Collect every audio chunk into a single Buffer. */
|
|
6430
|
+
synthesize(text: string): Promise<Buffer>;
|
|
6431
|
+
/**
|
|
6432
|
+
* Stream MP3-encoded audio chunks as they arrive from Telnyx.
|
|
6433
|
+
*
|
|
6434
|
+
* The server sends JSON frames of the shape `{"audio": "<base64-mp3>"}`.
|
|
6435
|
+
* Callers that need PCM must decode the MP3 bytes (e.g. via `ffmpeg`).
|
|
6436
|
+
*/
|
|
6437
|
+
synthesizeStream(text: string): AsyncGenerator<Buffer>;
|
|
6438
|
+
}
|
|
6439
|
+
|
|
4482
6440
|
declare const SPAN_CALL = "getpatter.call";
|
|
4483
6441
|
declare const SPAN_STT = "getpatter.stt";
|
|
4484
6442
|
declare const SPAN_LLM = "getpatter.llm";
|
|
@@ -4495,6 +6453,7 @@ interface Span {
|
|
|
4495
6453
|
recordException(exception: unknown): void;
|
|
4496
6454
|
end(): void;
|
|
4497
6455
|
}
|
|
6456
|
+
/** Options for `initTracing()`. */
|
|
4498
6457
|
interface InitTracingOptions {
|
|
4499
6458
|
serviceName?: string;
|
|
4500
6459
|
otlpEndpoint?: string;
|
|
@@ -4545,4 +6504,4 @@ interface CallEvent {
|
|
|
4545
6504
|
readonly direction?: string;
|
|
4546
6505
|
}
|
|
4547
6506
|
|
|
4548
|
-
export { type AgentOptions, AllProvidersFailedError, type AnthropicConversion, LLM$3 as AnthropicLLM, type AnthropicLLMOptions, type AnthropicMessage,
|
|
6507
|
+
export { type AgentOptions, type AgentState, AllProvidersFailedError, type AnthropicConversion, LLM$3 as AnthropicLLM, type AnthropicLLMOptions, type AnthropicMessage, AssemblyAIEncoding, AssemblyAIModel, STT$1 as AssemblyAISTT, type AssemblyAISTTOptions, type AudioConfig, type AudioSource, AuthenticationError, type BackgroundAudioOptions, BackgroundAudioPlayer, BuiltinAudioClip, type BuiltinAudioClipName, type BuiltinPcmSource, type CallControl, type CallEvent, type CallEventHandler, type CallMetrics, CallMetricsAccumulator, type CallRecord, type CartesiaEncoding, STT$3 as CartesiaSTT, type CartesiaSTTOptions, TTS$3 as CartesiaTTS, type CartesiaTTSOptions, LLM$1 as CerebrasLLM, type CerebrasLLMOptions, ChatContext, type ChatMessage, type ChatRole, CloudflareTunnel, type ConversationStateSnapshot, type CostBreakdown, DEFAULT_MIN_SENTENCE_LEN, DEFAULT_PRICING, DTMF_EVENTS, STT$6 as DeepgramSTT, type DeepgramSTTOptions, DefaultToolExecutor, type DefaultToolExecutorOptions, type DefineToolInput, type DtmfEvent, ConvAI as ElevenLabsConvAI, ElevenLabsConvAIAdapter, type ConvAIOptions as ElevenLabsConvAIOptions, TTS$6 as ElevenLabsTTS, type ElevenLabsTTSOptions, type ElevenLabsWebSocketOptions, TTS$5 as ElevenLabsWebSocketTTS, type EouTrigger, ErrorCode, EventBus, FallbackLLMProvider, type FallbackLLMProviderOptions, type FilePcmSource, GEMINI_DEFAULT_INPUT_SR, GEMINI_DEFAULT_OUTPUT_SR, GeminiLiveAdapter, type GeminiLiveEventHandler, LLM as GoogleLLM, type GoogleLLMOptions, LLM$2 as GroqLLM, type GroqLLMOptions, Guardrail$1 as Guardrail, type GuardrailOptions, type HookContext, IVRActivity, type IVRActivityOptions, type IVRToolDefinition, type IncomingMessage, type InitTracingOptions, TTS as InworldTTS, type InworldTTSOptions, type JobCallback, type LLMChunk, LLMLoop, type LLMProvider, LMNTAudioFormat, LMNTModel, LMNTSampleRate, TTS$1 as LMNTTTS, type LMNTTTSOptions, type LatencyBreakdown, type LocalCallOptions, type LocalConfig, type LocalOptions, type Logger, type LoopCallback, type MessageHandler, MetricsStore, Ngrok, LLM$4 as OpenAILLM, type OpenAILLMOptions, OpenAILLMProvider, type OpenAIMessage, Realtime as OpenAIRealtime, OpenAIRealtimeAdapter, type RealtimeOptions as OpenAIRealtimeOptions, TTS$4 as OpenAITTS, type OpenAITTSOptions, STT$4 as OpenAITranscribeSTT, type OpenAITranscribeSTTOptions, type ParamSpec, PartialStreamError, Patter, PatterConnectionError, PatterError, type PatterEventType, PatterTool, type PatterToolExecuteArgs, type PatterToolOptions, type PatterToolResult, PcmCarry, PipelineHookExecutor, type PipelineHooks, type PipelineMessageHandler, type ProviderPricing, ProvisionError, RateLimitError, type RawPcmSource, type RealtimeConfig, RemoteMessageHandler, TTS$2 as RimeTTS, type RimeTTSOptions, SPAN_BARGEIN, SPAN_CALL, SPAN_ENDPOINT, SPAN_LLM, SPAN_STT, SPAN_TOOL, SPAN_TTS, type SSEEvent, type STTConfig, type ScheduleHandle, SentenceChunker, type ServeOptions, type SilenceCallback, type SileroSampleRate, SileroVAD, type SileroVADOptions, STT$2 as SonioxSTT, type SonioxSTTOptions$1 as SonioxSTTOptions, type Span, type SpeechEventCallback, SpeechEvents, SpeechmaticsAudioEncoding, SpeechmaticsOperatingPoint, STT as SpeechmaticsSTT, type SpeechmaticsSTTOptions, SpeechmaticsSampleRate, SpeechmaticsServerMessage, TurnDetectionMode as SpeechmaticsTurnDetectionMode, StatefulResampler, type StatefulResamplerOptions, Static as StaticTunnel, type TTSConfig, Carrier as Telnyx, TelnyxAdapter, type TelnyxCarrierOptions, type ConfigureNumberOptions as TelnyxConfigureNumberOptions, type EndCallOptions as TelnyxEndCallOptions, type InitiateCallOptions as TelnyxInitiateCallOptions, type InitiateCallResult as TelnyxInitiateCallResult, type ProvisionNumberOptions as TelnyxProvisionNumberOptions, type ProvisionNumberResult as TelnyxProvisionNumberResult, TelnyxSTT, TelnyxSTTInputFormat, TelnyxSTTSampleRate, type Transcript as TelnyxSTTTranscript, TelnyxTTS, TelnyxTTSSampleRate, TelnyxTTSVoice, type TelnyxTranscriptionEngine, TestSession, TfidfLoopDetector, type TfidfLoopDetectorOptions, Tool, type ToolDefinition, type ToolExecutor, type ToolHandler, type ToolOptions, type TunnelHandle, type TurnMetrics, Carrier$1 as Twilio, TwilioAdapter, type TwilioAdapterOptions, type TwilioCarrierOptions, type ConfigureNumberOptions$1 as TwilioConfigureNumberOptions, type InitiateCallOptions$1 as TwilioInitiateCallOptions, type InitiateCallResult$1 as TwilioInitiateCallResult, type ProvisionNumberOptions$1 as TwilioProvisionNumberOptions, type ProvisionNumberResult$1 as TwilioProvisionNumberResult, ULTRAVOX_DEFAULT_API_BASE, ULTRAVOX_DEFAULT_SR, type UltravoxEventHandler, UltravoxRealtimeAdapter, type UserState, STT$5 as WhisperSTT, type WhisperSTTOptions, assemblyai, builtinClipPath, calculateRealtimeCost, calculateSttCost, calculateTelephonyCost, calculateTtsCost, callsToCsv, callsToJson, cartesia, createResampler16kTo8k, createResampler24kTo16k, createResampler24kTo8k, createResampler8kTo16k, deepgram, defineTool, elevenlabs, filterEmoji, filterForTTS, filterMarkdown, formatDtmf, geminiLive, getLogger, guardrail, initTracing, isRemoteUrl, isTracingEnabled, isWebSocketUrl, lmnt, makeAuthMiddleware, mergePricing, mixPcm, mountApi, mountDashboard, mulawToPcm16, notifyDashboard, openaiTts, pcm16ToMulaw, resample16kTo8k, resample24kTo16k, resample8kTo16k, resamplePcm, rime, scheduleCron, scheduleInterval, scheduleOnce, selectSoundFromList, setLogger, soniox, speechmatics, startSpan, startTunnel, tool, ultravox, whisper };
|