getpatter 0.5.1 → 0.5.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +5 -5
- package/dist/banner-3GNZ6VQK.mjs +19 -0
- package/dist/{carrier-config-CPG5CROM.mjs → carrier-config-33HQ2W4V.mjs} +2 -2
- package/dist/{chunk-B6C3KIBG.mjs → chunk-FIFIWBL7.mjs} +3226 -569
- package/dist/chunk-QHHBUCMT.mjs +25 -0
- package/dist/{chunk-AKQFOFLG.mjs → chunk-SEMKNPCD.mjs} +7 -2
- package/dist/{chunk-FMNRCP5X.mjs → chunk-VJVDG4V5.mjs} +1 -1
- package/dist/cli.js +133 -15
- package/dist/dist-YRCCJQ26.mjs +1631 -0
- package/dist/index.d.mts +2000 -289
- package/dist/index.d.ts +2000 -289
- package/dist/index.js +8019 -1984
- package/dist/index.mjs +1885 -618
- package/dist/node-cron-6PRPSBG5.mjs +1348 -0
- package/dist/onnxruntime_binding-4Q2WV26X.node +0 -0
- package/dist/onnxruntime_binding-5PVQ7RFC.node +0 -0
- package/dist/onnxruntime_binding-FNOPH2XG.node +0 -0
- package/dist/onnxruntime_binding-HSGOY4IT.node +0 -0
- package/dist/onnxruntime_binding-OY2N3XIT.node +0 -0
- package/dist/onnxruntime_binding-ZPEJPBCV.node +0 -0
- package/dist/{persistence-CYIGNHSU.mjs → persistence-LQBYQPQQ.mjs} +1 -1
- package/dist/test-mode-MVJ3SKG4.mjs +8 -0
- package/dist/tunnel-UVR3PPAU.mjs +8 -0
- package/package.json +10 -3
- package/dist/chunk-OOIUSZB4.mjs +0 -37
- package/dist/node-cron-373UVDIO.mjs +0 -935
- package/dist/test-mode-JZMYE5HY.mjs +0 -8
- package/dist/tunnel-O7ICMSTP.mjs +0 -8
package/dist/index.d.ts
CHANGED
|
@@ -135,6 +135,35 @@ declare class Static {
|
|
|
135
135
|
hostname: string;
|
|
136
136
|
});
|
|
137
137
|
}
|
|
138
|
+
/**
|
|
139
|
+
* Ngrok tunnel marker — parity with the Python ``getpatter.tunnels.Ngrok``.
|
|
140
|
+
*
|
|
141
|
+
* Patter does not bundle the ngrok binary or auto-provision tunnels. This
|
|
142
|
+
* marker exists so applications can pass an existing ngrok hostname through
|
|
143
|
+
* the same code path as ``Static`` / ``CloudflareTunnel``. Constructing one
|
|
144
|
+
* without a hostname is allowed (mirrors the Python type), but ``start()``
|
|
145
|
+
* will throw — the user is expected to either pass a hostname or run the
|
|
146
|
+
* tunnel themselves and feed the resulting URL via ``Static``.
|
|
147
|
+
*
|
|
148
|
+
* @example
|
|
149
|
+
* ```ts
|
|
150
|
+
* import { Ngrok } from "getpatter/tunnels";
|
|
151
|
+
* const tunnel = new Ngrok({ hostname: "abc.ngrok.io" });
|
|
152
|
+
* ```
|
|
153
|
+
*/
|
|
154
|
+
declare class Ngrok {
|
|
155
|
+
readonly kind: "ngrok";
|
|
156
|
+
readonly hostname: string;
|
|
157
|
+
constructor(opts?: {
|
|
158
|
+
hostname?: string;
|
|
159
|
+
});
|
|
160
|
+
/**
|
|
161
|
+
* Returns the configured hostname or throws if the marker was constructed
|
|
162
|
+
* without one. Patter does not start ngrok itself — the user is expected
|
|
163
|
+
* to either supply a hostname or run ngrok out-of-band.
|
|
164
|
+
*/
|
|
165
|
+
start(): string;
|
|
166
|
+
}
|
|
138
167
|
|
|
139
168
|
/**
|
|
140
169
|
* Public API primitives — `Tool` and `Guardrail` classes, plus the
|
|
@@ -224,9 +253,35 @@ declare function tool(opts: ToolOptions): Tool;
|
|
|
224
253
|
* functions so the Twilio/Telnyx bridges have a single dispatch point.
|
|
225
254
|
*/
|
|
226
255
|
|
|
256
|
+
/** Per-word timings / metadata (Deepgram-shaped). Optional on every adapter. */
|
|
257
|
+
interface STTWord {
|
|
258
|
+
readonly word?: string;
|
|
259
|
+
readonly start?: number;
|
|
260
|
+
readonly end?: number;
|
|
261
|
+
readonly confidence?: number;
|
|
262
|
+
readonly punctuated_word?: string;
|
|
263
|
+
readonly speaker?: number;
|
|
264
|
+
}
|
|
265
|
+
/**
|
|
266
|
+
* Facade transcript shape — widened to surface richer provider fields
|
|
267
|
+
* (Deepgram emits all of them) without forcing adapters that only know
|
|
268
|
+
* ``text``/``isFinal`` to change. All non-text fields are optional.
|
|
269
|
+
*/
|
|
227
270
|
interface STTTranscript {
|
|
228
271
|
text: string;
|
|
229
272
|
isFinal?: boolean;
|
|
273
|
+
/** Overall transcript confidence in [0, 1]. */
|
|
274
|
+
confidence?: number;
|
|
275
|
+
/** Provider-side end-of-utterance hint (faster than ``isFinal``). */
|
|
276
|
+
speechFinal?: boolean;
|
|
277
|
+
/** True when the result was produced in response to a Finalize command. */
|
|
278
|
+
fromFinalize?: boolean;
|
|
279
|
+
/** Provider request id (Deepgram populates this from the Metadata frame). */
|
|
280
|
+
requestId?: string;
|
|
281
|
+
/** Per-word timings / metadata when the provider emits them. */
|
|
282
|
+
words?: ReadonlyArray<STTWord>;
|
|
283
|
+
/** Which provider event this transcript represents (e.g. ``Results``). */
|
|
284
|
+
eventType?: string;
|
|
230
285
|
}
|
|
231
286
|
type STTTranscriptCallback = (t: STTTranscript) => Promise<void> | void;
|
|
232
287
|
/** Shape shared by every STT adapter in the SDK. */
|
|
@@ -240,6 +295,81 @@ interface TTSAdapter {
|
|
|
240
295
|
synthesizeStream(text: string): AsyncIterable<Buffer>;
|
|
241
296
|
}
|
|
242
297
|
|
|
298
|
+
/**
|
|
299
|
+
* Pipeline hook executor for pipeline mode.
|
|
300
|
+
*
|
|
301
|
+
* Runs user-defined hooks at each stage of the STT → LLM → TTS pipeline.
|
|
302
|
+
* Fail-open: if a hook throws, the error is logged and the original value
|
|
303
|
+
* passes through unchanged.
|
|
304
|
+
*/
|
|
305
|
+
|
|
306
|
+
declare class PipelineHookExecutor {
|
|
307
|
+
private readonly hooks;
|
|
308
|
+
constructor(hooks: PipelineHooks | undefined);
|
|
309
|
+
/**
|
|
310
|
+
* Run beforeSendToStt hook. Returns null to drop the audio chunk.
|
|
311
|
+
* If no hook is defined, returns the audio unchanged.
|
|
312
|
+
* Fail-open: on exception, the original audio passes through.
|
|
313
|
+
*/
|
|
314
|
+
runBeforeSendToStt(audio: Buffer, ctx: HookContext): Promise<Buffer | null>;
|
|
315
|
+
/**
|
|
316
|
+
* Run afterTranscribe hook. Returns null if hook vetoes the turn.
|
|
317
|
+
* If no hook is defined, returns the transcript unchanged.
|
|
318
|
+
*/
|
|
319
|
+
runAfterTranscribe(transcript: string, ctx: HookContext): Promise<string | null>;
|
|
320
|
+
/**
|
|
321
|
+
* Run beforeLlm hook. Returns a possibly-modified messages list.
|
|
322
|
+
* Returning ``null`` from the hook means "keep the original" — the LLM
|
|
323
|
+
* call is too important to be silently vetoed.
|
|
324
|
+
* Fail-open: on exception, the original messages pass through.
|
|
325
|
+
*/
|
|
326
|
+
runBeforeLlm(messages: Array<Record<string, unknown>>, ctx: HookContext): Promise<Array<Record<string, unknown>>>;
|
|
327
|
+
/**
|
|
328
|
+
* Run afterLlm hook. Returns a possibly-modified assistant text.
|
|
329
|
+
* Returning ``null`` from the hook means "keep the original".
|
|
330
|
+
* Fail-open: on exception, the original text passes through.
|
|
331
|
+
*/
|
|
332
|
+
runAfterLlm(text: string, ctx: HookContext): Promise<string>;
|
|
333
|
+
/**
|
|
334
|
+
* Whether ``afterLlm`` is configured. Used by the LLM loop to decide
|
|
335
|
+
* whether to buffer streaming tokens before yielding them.
|
|
336
|
+
*/
|
|
337
|
+
hasAfterLlm(): boolean;
|
|
338
|
+
/**
|
|
339
|
+
* Run beforeSynthesize hook. Returns null if hook vetoes TTS for this sentence.
|
|
340
|
+
* If no hook is defined, returns the text unchanged.
|
|
341
|
+
*/
|
|
342
|
+
runBeforeSynthesize(text: string, ctx: HookContext): Promise<string | null>;
|
|
343
|
+
/**
|
|
344
|
+
* Run afterSynthesize hook. Returns null if hook vetoes this audio chunk.
|
|
345
|
+
* If no hook is defined, returns the audio unchanged.
|
|
346
|
+
*/
|
|
347
|
+
runAfterSynthesize(audio: Buffer, text: string, ctx: HookContext): Promise<Buffer | null>;
|
|
348
|
+
}
|
|
349
|
+
|
|
350
|
+
/**
|
|
351
|
+
* Lightweight in-process event bus for Patter call lifecycle events.
|
|
352
|
+
*
|
|
353
|
+
* Mirrors the Python ``PatterEventBus`` (sdk-py/getpatter/observability/event_bus.py).
|
|
354
|
+
* Consumers subscribe with ``on()`` and receive typed payloads. ``emit()`` is
|
|
355
|
+
* synchronous but handles async listeners: rejections are surfaced via the
|
|
356
|
+
* Patter logger rather than being swallowed or crashing the call.
|
|
357
|
+
*/
|
|
358
|
+
type PatterEventType = 'turn_started' | 'turn_ended' | 'eou_metrics' | 'interruption' | 'llm_metrics' | 'tts_metrics' | 'stt_metrics' | 'metrics_collected' | 'call_ended' | 'transcript_partial' | 'transcript_final' | 'llm_chunk' | 'tts_chunk' | 'tool_call_started';
|
|
359
|
+
type Listener<T = unknown> = (payload: T) => void | Promise<void>;
|
|
360
|
+
declare class EventBus {
|
|
361
|
+
private readonly listeners;
|
|
362
|
+
/**
|
|
363
|
+
* Subscribe to an event type. Returns an unsubscribe function.
|
|
364
|
+
*/
|
|
365
|
+
on<T = unknown>(event: PatterEventType, cb: Listener<T>): () => void;
|
|
366
|
+
/**
|
|
367
|
+
* Emit an event synchronously. Async listeners are fire-and-forget with
|
|
368
|
+
* rejection logging so a badly-behaved observer never stalls the call path.
|
|
369
|
+
*/
|
|
370
|
+
emit<T = unknown>(event: PatterEventType, payload: T): void;
|
|
371
|
+
}
|
|
372
|
+
|
|
243
373
|
/**
|
|
244
374
|
* Built-in LLM loop for pipeline mode when no onMessage handler is provided.
|
|
245
375
|
*
|
|
@@ -248,14 +378,58 @@ interface TTSAdapter {
|
|
|
248
378
|
* ``OpenAILLMProvider`` which preserves full backward compatibility.
|
|
249
379
|
*/
|
|
250
380
|
|
|
381
|
+
/**
|
|
382
|
+
* Minimal interface for recording LLM usage chunks.
|
|
383
|
+
* Avoids a circular import from metrics.ts.
|
|
384
|
+
*/
|
|
385
|
+
interface LlmUsageRecorder {
|
|
386
|
+
recordLlmUsage(provider: string, model: string, inputTokens: number, outputTokens: number, cacheReadTokens?: number, cacheCreationTokens?: number): void;
|
|
387
|
+
}
|
|
388
|
+
/**
|
|
389
|
+
* Pluggable tool executor — mirrors the Python ``ToolExecutor`` in
|
|
390
|
+
* ``sdk-py/getpatter/services/tool_executor.py``.
|
|
391
|
+
*
|
|
392
|
+
* Implementors receive a fully-resolved ``ToolDefinition`` (handler +/ webhook
|
|
393
|
+
* URL already validated by the SDK) and MUST return a JSON-stringifiable
|
|
394
|
+
* result. Errors should be returned as JSON like
|
|
395
|
+
* ``{ error: "...", fallback: true }`` rather than thrown.
|
|
396
|
+
*/
|
|
397
|
+
interface ToolExecutor {
|
|
398
|
+
execute(toolDef: ToolDefinition, args: Record<string, unknown>, callContext: Record<string, unknown>): Promise<string>;
|
|
399
|
+
}
|
|
400
|
+
interface DefaultToolExecutorOptions {
|
|
401
|
+
/** Total attempts = maxRetries + 1. Default: 2 (i.e. 3 attempts). */
|
|
402
|
+
maxRetries?: number;
|
|
403
|
+
/** Delay between attempts, in ms. */
|
|
404
|
+
retryDelayMs?: number;
|
|
405
|
+
/** Per-request timeout for webhook calls, in ms. */
|
|
406
|
+
requestTimeoutMs?: number;
|
|
407
|
+
}
|
|
408
|
+
/**
|
|
409
|
+
* Default executor — webhook with retry/fallback and local handler preference.
|
|
410
|
+
*
|
|
411
|
+
* This is the out-of-the-box behavior and is 1:1 equivalent to the previous
|
|
412
|
+
* inline logic in ``LLMLoop.executeTool``.
|
|
413
|
+
*/
|
|
414
|
+
declare class DefaultToolExecutor implements ToolExecutor {
|
|
415
|
+
private readonly maxRetries;
|
|
416
|
+
private readonly retryDelayMs;
|
|
417
|
+
private readonly requestTimeoutMs;
|
|
418
|
+
constructor(opts?: DefaultToolExecutorOptions);
|
|
419
|
+
execute(toolDef: ToolDefinition, args: Record<string, unknown>, callContext: Record<string, unknown>): Promise<string>;
|
|
420
|
+
}
|
|
251
421
|
/** A single streaming chunk yielded by an LLM provider. */
|
|
252
422
|
interface LLMChunk {
|
|
253
|
-
type: 'text' | 'tool_call' | 'done';
|
|
423
|
+
type: 'text' | 'tool_call' | 'done' | 'usage';
|
|
254
424
|
content?: string;
|
|
255
425
|
index?: number;
|
|
256
426
|
id?: string;
|
|
257
427
|
name?: string;
|
|
258
428
|
arguments?: string;
|
|
429
|
+
inputTokens?: number;
|
|
430
|
+
outputTokens?: number;
|
|
431
|
+
cacheReadInputTokens?: number;
|
|
432
|
+
cacheCreationInputTokens?: number;
|
|
259
433
|
}
|
|
260
434
|
/**
|
|
261
435
|
* Interface that any LLM provider must satisfy.
|
|
@@ -269,11 +443,44 @@ interface LLMChunk {
|
|
|
269
443
|
interface LLMProvider {
|
|
270
444
|
stream(messages: Array<Record<string, unknown>>, tools?: Array<Record<string, unknown>> | null): AsyncGenerator<LLMChunk, void, unknown>;
|
|
271
445
|
}
|
|
446
|
+
/** Optional sampling kwargs forwarded into the OpenAI Chat Completions body. */
|
|
447
|
+
interface OpenAILLMSamplingOptions {
|
|
448
|
+
/** Sampling temperature [0, 2]. */
|
|
449
|
+
temperature?: number;
|
|
450
|
+
/** Max tokens in the assistant response (sent as ``max_completion_tokens``). */
|
|
451
|
+
maxTokens?: number;
|
|
452
|
+
/** OpenAI-style ``response_format`` for JSON mode / structured outputs. */
|
|
453
|
+
responseFormat?: Record<string, unknown>;
|
|
454
|
+
/** Whether to allow parallel tool calls. */
|
|
455
|
+
parallelToolCalls?: boolean;
|
|
456
|
+
/** ``"auto" | "none" | "required"`` or a specific tool object. */
|
|
457
|
+
toolChoice?: string | Record<string, unknown>;
|
|
458
|
+
/** Sampling seed for reproducible outputs. */
|
|
459
|
+
seed?: number;
|
|
460
|
+
/** Nucleus sampling cutoff in [0, 1]. */
|
|
461
|
+
topP?: number;
|
|
462
|
+
/** Penalty in [-2, 2] applied to repeated tokens. */
|
|
463
|
+
frequencyPenalty?: number;
|
|
464
|
+
/** Penalty in [-2, 2] applied to seen tokens. */
|
|
465
|
+
presencePenalty?: number;
|
|
466
|
+
/** Stop sequence(s). */
|
|
467
|
+
stop?: string | string[];
|
|
468
|
+
}
|
|
272
469
|
/** LLM provider backed by OpenAI Chat Completions (streaming). */
|
|
273
470
|
declare class OpenAILLMProvider implements LLMProvider {
|
|
274
471
|
private readonly apiKey;
|
|
275
|
-
|
|
276
|
-
|
|
472
|
+
readonly model: string;
|
|
473
|
+
private readonly temperature?;
|
|
474
|
+
private readonly maxTokens?;
|
|
475
|
+
private readonly responseFormat?;
|
|
476
|
+
private readonly parallelToolCalls?;
|
|
477
|
+
private readonly toolChoice?;
|
|
478
|
+
private readonly seed?;
|
|
479
|
+
private readonly topP?;
|
|
480
|
+
private readonly frequencyPenalty?;
|
|
481
|
+
private readonly presencePenalty?;
|
|
482
|
+
private readonly stop?;
|
|
483
|
+
constructor(apiKey: string, model: string, sampling?: OpenAILLMSamplingOptions);
|
|
277
484
|
stream(messages: Array<Record<string, unknown>>, tools?: Array<Record<string, unknown>> | null): AsyncGenerator<LLMChunk, void, unknown>;
|
|
278
485
|
}
|
|
279
486
|
declare class LLMLoop {
|
|
@@ -282,15 +489,34 @@ declare class LLMLoop {
|
|
|
282
489
|
private readonly tools;
|
|
283
490
|
private readonly openaiTools;
|
|
284
491
|
private readonly toolMap;
|
|
492
|
+
private toolExecutor;
|
|
493
|
+
private eventBus?;
|
|
494
|
+
private readonly _providerName;
|
|
495
|
+
private readonly _modelName;
|
|
285
496
|
constructor(apiKey: string, model: string, systemPrompt: string, tools?: ToolDefinition[] | null, llmProvider?: LLMProvider);
|
|
497
|
+
/**
|
|
498
|
+
* Swap in a custom tool executor (e.g. different retry policy, metrics
|
|
499
|
+
* wrapping, tenant-aware fan-out). The default is ``DefaultToolExecutor``.
|
|
500
|
+
*/
|
|
501
|
+
setToolExecutor(executor: ToolExecutor): void;
|
|
502
|
+
/**
|
|
503
|
+
* Wire an :class:`EventBus` so the loop emits ``llm_chunk`` per text
|
|
504
|
+
* token and ``tool_call_started`` the first time each tool-call index
|
|
505
|
+
* appears. Set to ``undefined`` to disable.
|
|
506
|
+
*/
|
|
507
|
+
setEventBus(bus: EventBus | undefined): void;
|
|
286
508
|
/**
|
|
287
509
|
* Stream LLM response tokens, handling tool calls automatically.
|
|
288
510
|
* Yields text tokens as they arrive from the LLM.
|
|
511
|
+
*
|
|
512
|
+
* @param metrics Optional usage recorder — when provided, usage chunks
|
|
513
|
+
* from the provider are forwarded to {@link LlmUsageRecorder.recordLlmUsage}
|
|
514
|
+
* so token costs are included in the call cost breakdown (fix 10).
|
|
289
515
|
*/
|
|
290
516
|
run(userText: string, history: Array<{
|
|
291
517
|
role: string;
|
|
292
518
|
text: string;
|
|
293
|
-
}>, callContext: Record<string, unknown
|
|
519
|
+
}>, callContext: Record<string, unknown>, metrics?: LlmUsageRecorder, hookExecutor?: PipelineHookExecutor, hookCtx?: HookContext): AsyncGenerator<string, void, unknown>;
|
|
294
520
|
private executeTool;
|
|
295
521
|
private buildMessages;
|
|
296
522
|
}
|
|
@@ -305,11 +531,11 @@ interface STTConfig {
|
|
|
305
531
|
readonly apiKey: string;
|
|
306
532
|
readonly language: string;
|
|
307
533
|
/**
|
|
308
|
-
*
|
|
309
|
-
*
|
|
310
|
-
*
|
|
534
|
+
* Serialise the config into a JSON-compatible dict for the wire protocol.
|
|
535
|
+
* Mandatory — matches Python's ``STTConfig.to_dict()``. Concrete classes
|
|
536
|
+
* returned by ``stt(...)``/``deepgram(...)`` etc. all implement it.
|
|
311
537
|
*/
|
|
312
|
-
toDict
|
|
538
|
+
toDict(): Record<string, string | Record<string, unknown>>;
|
|
313
539
|
/** Provider-specific knobs (e.g. Deepgram endpointing). */
|
|
314
540
|
options?: Record<string, unknown>;
|
|
315
541
|
}
|
|
@@ -317,36 +543,15 @@ interface TTSConfig {
|
|
|
317
543
|
readonly provider: string;
|
|
318
544
|
readonly apiKey: string;
|
|
319
545
|
readonly voice: string;
|
|
320
|
-
|
|
546
|
+
/**
|
|
547
|
+
* Serialise the config into a JSON-compatible dict for the wire protocol.
|
|
548
|
+
* Mandatory — matches Python's ``TTSConfig.to_dict()``.
|
|
549
|
+
*/
|
|
550
|
+
toDict(): Record<string, string | Record<string, unknown>>;
|
|
321
551
|
options?: Record<string, unknown>;
|
|
322
552
|
}
|
|
323
553
|
type MessageHandler = (msg: IncomingMessage) => Promise<string>;
|
|
324
554
|
type CallEventHandler = (data: Record<string, unknown>) => Promise<void>;
|
|
325
|
-
interface PatterOptions {
|
|
326
|
-
apiKey: string;
|
|
327
|
-
backendUrl?: string;
|
|
328
|
-
restUrl?: string;
|
|
329
|
-
}
|
|
330
|
-
interface ConnectOptions {
|
|
331
|
-
onMessage: MessageHandler;
|
|
332
|
-
onCallStart?: CallEventHandler;
|
|
333
|
-
onCallEnd?: CallEventHandler;
|
|
334
|
-
provider?: string;
|
|
335
|
-
providerKey?: string;
|
|
336
|
-
providerSecret?: string;
|
|
337
|
-
number?: string;
|
|
338
|
-
country?: string;
|
|
339
|
-
stt?: STTConfig;
|
|
340
|
-
tts?: TTSConfig;
|
|
341
|
-
}
|
|
342
|
-
interface CallOptions {
|
|
343
|
-
to: string;
|
|
344
|
-
onMessage?: MessageHandler;
|
|
345
|
-
firstMessage?: string;
|
|
346
|
-
fromNumber?: string;
|
|
347
|
-
agentId?: string;
|
|
348
|
-
machineDetection?: boolean;
|
|
349
|
-
}
|
|
350
555
|
interface ToolDefinition {
|
|
351
556
|
name: string;
|
|
352
557
|
description: string;
|
|
@@ -356,58 +561,9 @@ interface ToolDefinition {
|
|
|
356
561
|
/** Local handler function — when provided, called instead of webhookUrl. */
|
|
357
562
|
handler?: (args: Record<string, unknown>, context: Record<string, unknown>) => Promise<string>;
|
|
358
563
|
}
|
|
359
|
-
interface CreateAgentOptions {
|
|
360
|
-
name: string;
|
|
361
|
-
systemPrompt: string;
|
|
362
|
-
model?: string;
|
|
363
|
-
voice?: string;
|
|
364
|
-
voiceProvider?: string;
|
|
365
|
-
language?: string;
|
|
366
|
-
firstMessage?: string;
|
|
367
|
-
tools?: ToolDefinition[];
|
|
368
|
-
}
|
|
369
|
-
interface Agent {
|
|
370
|
-
id: string;
|
|
371
|
-
name: string;
|
|
372
|
-
systemPrompt: string;
|
|
373
|
-
model: string;
|
|
374
|
-
voice: string;
|
|
375
|
-
voiceProvider: string;
|
|
376
|
-
language: string;
|
|
377
|
-
firstMessage: string | null;
|
|
378
|
-
tools: ToolDefinition[] | null;
|
|
379
|
-
}
|
|
380
|
-
interface PhoneNumber {
|
|
381
|
-
id: string;
|
|
382
|
-
number: string;
|
|
383
|
-
provider: string;
|
|
384
|
-
country: string;
|
|
385
|
-
status: string;
|
|
386
|
-
agentId: string | null;
|
|
387
|
-
}
|
|
388
|
-
interface Call {
|
|
389
|
-
id: string;
|
|
390
|
-
direction: string;
|
|
391
|
-
caller: string;
|
|
392
|
-
callee: string;
|
|
393
|
-
startedAt: string;
|
|
394
|
-
endedAt: string | null;
|
|
395
|
-
durationSeconds: number | null;
|
|
396
|
-
status: string;
|
|
397
|
-
transcript: Array<{
|
|
398
|
-
role: string;
|
|
399
|
-
text: string;
|
|
400
|
-
timestamp: string;
|
|
401
|
-
}> | null;
|
|
402
|
-
}
|
|
403
564
|
interface LocalOptions {
|
|
404
565
|
/**
|
|
405
|
-
*
|
|
406
|
-
* ``mode: 'local'`` to force local mode explicitly.
|
|
407
|
-
*/
|
|
408
|
-
mode?: 'local';
|
|
409
|
-
/**
|
|
410
|
-
* Telephony carrier instance. Required for local mode.
|
|
566
|
+
* Telephony carrier instance. Required.
|
|
411
567
|
*
|
|
412
568
|
* @example
|
|
413
569
|
* ```ts
|
|
@@ -456,6 +612,14 @@ interface PipelineHooks {
|
|
|
456
612
|
beforeSendToStt?: (audio: Buffer, ctx: HookContext) => Buffer | null | Promise<Buffer | null>;
|
|
457
613
|
/** Called after STT produces a transcript, before LLM. Return null to skip this turn. */
|
|
458
614
|
afterTranscribe?: (transcript: string, ctx: HookContext) => string | null | Promise<string | null>;
|
|
615
|
+
/** Called with the messages list before the LLM call.
|
|
616
|
+
* Return null to keep them, or return a new list to replace
|
|
617
|
+
* (useful for prompt injection, message filtering, RAG augmentation). */
|
|
618
|
+
beforeLlm?: (messages: Array<Record<string, unknown>>, ctx: HookContext) => Array<Record<string, unknown>> | null | Promise<Array<Record<string, unknown>> | null>;
|
|
619
|
+
/** Called with the final assistant text after the LLM stream completes.
|
|
620
|
+
* Return null to keep, or return a new string to replace
|
|
621
|
+
* (useful for output validation, redaction, post-processing). */
|
|
622
|
+
afterLlm?: (text: string, ctx: HookContext) => string | null | Promise<string | null>;
|
|
459
623
|
/** Called before TTS, per-sentence in streaming mode. Return null to skip TTS for this sentence. */
|
|
460
624
|
beforeSynthesize?: (text: string, ctx: HookContext) => string | null | Promise<string | null>;
|
|
461
625
|
/** Called after TTS produces an audio chunk. Return null to discard this chunk. */
|
|
@@ -585,38 +749,120 @@ interface LocalCallOptions {
|
|
|
585
749
|
variables?: Record<string, string>;
|
|
586
750
|
/**
|
|
587
751
|
* Ring timeout in seconds. Forwarded to Twilio as `Timeout` and to Telnyx
|
|
588
|
-
* as `timeout_secs`. Defaults to
|
|
589
|
-
*
|
|
590
|
-
*
|
|
752
|
+
* as `timeout_secs`. Defaults to **25 s** — the production-recommended
|
|
753
|
+
* value that limits phantom calls. Pass `60` for legacy carrier-default
|
|
754
|
+
* parity, or `null` to omit the parameter entirely (carrier picks its
|
|
755
|
+
* own default).
|
|
591
756
|
*/
|
|
592
|
-
ringTimeout?: number;
|
|
757
|
+
ringTimeout?: number | null;
|
|
758
|
+
}
|
|
759
|
+
|
|
760
|
+
/**
|
|
761
|
+
* In-memory metrics store for the local dashboard.
|
|
762
|
+
*
|
|
763
|
+
* Keeps the last `maxCalls` completed calls and tracks active calls.
|
|
764
|
+
* Supports SSE event subscribers for real-time updates.
|
|
765
|
+
*
|
|
766
|
+
* Optional disk hydration: when `CallLogger` writes per-call records under
|
|
767
|
+
* `<root>/calls/YYYY/MM/DD/<call_id>/metadata.json`, calling
|
|
768
|
+
* `hydrate(logRoot)` on a fresh store rebuilds the in-memory list from those
|
|
769
|
+
* files so the dashboard survives process restarts (the persistence is in
|
|
770
|
+
* the JSONL/JSON files, the store is just a cache on top).
|
|
771
|
+
*/
|
|
772
|
+
|
|
773
|
+
interface CallRecord {
|
|
774
|
+
call_id: string;
|
|
775
|
+
caller: string;
|
|
776
|
+
callee: string;
|
|
777
|
+
direction: string;
|
|
778
|
+
started_at: number;
|
|
779
|
+
ended_at?: number;
|
|
780
|
+
/**
|
|
781
|
+
* Current lifecycle state: ``initiated`` (pre-registered), ``ringing``,
|
|
782
|
+
* ``in-progress``, ``completed``, ``no-answer``, ``busy``, ``failed``,
|
|
783
|
+
* ``canceled``, or ``webhook_error``.
|
|
784
|
+
*/
|
|
785
|
+
status?: string;
|
|
786
|
+
transcript?: Array<{
|
|
787
|
+
role: string;
|
|
788
|
+
text: string;
|
|
789
|
+
timestamp: number;
|
|
790
|
+
}>;
|
|
791
|
+
turns?: unknown[];
|
|
792
|
+
metrics?: Record<string, unknown> | null;
|
|
793
|
+
[key: string]: unknown;
|
|
794
|
+
}
|
|
795
|
+
interface SSEEvent {
|
|
796
|
+
type: string;
|
|
797
|
+
data: Record<string, unknown>;
|
|
798
|
+
}
|
|
799
|
+
declare class MetricsStore extends EventEmitter {
|
|
800
|
+
private readonly maxCalls;
|
|
801
|
+
private calls;
|
|
802
|
+
private activeCalls;
|
|
803
|
+
/**
|
|
804
|
+
* Accepts either a numeric ``maxCalls`` (legacy positional — matches the
|
|
805
|
+
* original TS API) or an options object ``{ maxCalls }`` to align with the
|
|
806
|
+
* Python SDK's keyword-argument style. Plain literals also work:
|
|
807
|
+
* ``new MetricsStore()`` / ``new MetricsStore(100)`` / ``new MetricsStore({ maxCalls: 100 })``.
|
|
808
|
+
*/
|
|
809
|
+
constructor(maxCallsOrOpts?: number | {
|
|
810
|
+
maxCalls?: number;
|
|
811
|
+
});
|
|
812
|
+
private publish;
|
|
813
|
+
recordCallStart(data: Record<string, unknown>): void;
|
|
814
|
+
/**
|
|
815
|
+
* Pre-register an outbound call before any webhook fires. Lets the
|
|
816
|
+
* dashboard surface attempts that never reach media (no-answer, busy,
|
|
817
|
+
* carrier-rejected). Mirrors the Python ``record_call_initiated``.
|
|
818
|
+
*/
|
|
819
|
+
recordCallInitiated(data: Record<string, unknown>): void;
|
|
820
|
+
/**
|
|
821
|
+
* Update the status of an active or completed call. Terminal states
|
|
822
|
+
* (completed, no-answer, busy, failed, canceled, webhook_error) move the
|
|
823
|
+
* row from active to completed so the UI freezes the live duration timer.
|
|
824
|
+
*/
|
|
825
|
+
updateCallStatus(callId: string, status: string, extra?: Record<string, unknown>): void;
|
|
826
|
+
recordTurn(data: Record<string, unknown>): void;
|
|
827
|
+
recordCallEnd(data: Record<string, unknown>, metrics?: Record<string, unknown> | null): void;
|
|
828
|
+
getCalls(limit?: number, offset?: number): CallRecord[];
|
|
829
|
+
getCall(callId: string): CallRecord | null;
|
|
830
|
+
/** Look up an active call by id (returns undefined if not active or unknown). */
|
|
831
|
+
getActive(callId: string): CallRecord | undefined;
|
|
832
|
+
getActiveCalls(): CallRecord[];
|
|
833
|
+
getAggregates(): Record<string, unknown>;
|
|
834
|
+
getCallsInRange(fromTs?: number, toTs?: number): CallRecord[];
|
|
835
|
+
get callCount(): number;
|
|
836
|
+
/**
|
|
837
|
+
* Rebuild the in-memory call list from `metadata.json` files written by
|
|
838
|
+
* `CallLogger` under `<logRoot>/calls/YYYY/MM/DD/<call_id>/`. Idempotent:
|
|
839
|
+
* call_ids already in the store are skipped. Errors per file are logged
|
|
840
|
+
* and swallowed so a single corrupt entry doesn't block hydration.
|
|
841
|
+
*
|
|
842
|
+
* Returns the number of calls newly added to the store.
|
|
843
|
+
*
|
|
844
|
+
* Safe to call before any traffic; intended to run once at server startup.
|
|
845
|
+
*/
|
|
846
|
+
hydrate(logRoot: string | null | undefined): number;
|
|
593
847
|
}
|
|
594
848
|
|
|
595
849
|
declare class Patter {
|
|
596
|
-
readonly apiKey: string;
|
|
597
|
-
private readonly backendUrl;
|
|
598
|
-
private readonly restUrl;
|
|
599
|
-
private readonly connection;
|
|
600
|
-
private readonly mode;
|
|
601
850
|
private localConfig;
|
|
602
851
|
private embeddedServer;
|
|
603
852
|
private tunnelHandle;
|
|
604
|
-
|
|
853
|
+
/**
|
|
854
|
+
* Live `MetricsStore` for the embedded server. Returns `null` before
|
|
855
|
+
* `serve()` is called. Exposed so integrations like `PatterTool` can
|
|
856
|
+
* subscribe to per-call lifecycle events (`call_initiated`,
|
|
857
|
+
* `call_start`, `call_end`).
|
|
858
|
+
*/
|
|
859
|
+
get metricsStore(): MetricsStore | null;
|
|
860
|
+
constructor(options: LocalOptions);
|
|
605
861
|
agent(opts: AgentOptions): AgentOptions;
|
|
606
862
|
serve(opts: ServeOptions): Promise<void>;
|
|
607
863
|
test(opts: ServeOptions): Promise<void>;
|
|
608
|
-
|
|
609
|
-
call(options: CallOptions | LocalCallOptions): Promise<void>;
|
|
864
|
+
call(options: LocalCallOptions): Promise<void>;
|
|
610
865
|
disconnect(): Promise<void>;
|
|
611
|
-
createAgent(opts: CreateAgentOptions): Promise<Agent>;
|
|
612
|
-
listAgents(): Promise<Agent[]>;
|
|
613
|
-
buyNumber(opts?: {
|
|
614
|
-
country?: string;
|
|
615
|
-
provider?: string;
|
|
616
|
-
}): Promise<PhoneNumber>;
|
|
617
|
-
assignAgent(numberId: string, agentId: string): Promise<void>;
|
|
618
|
-
listCalls(limit?: number): Promise<Call[]>;
|
|
619
|
-
private registerNumber;
|
|
620
866
|
}
|
|
621
867
|
|
|
622
868
|
/**
|
|
@@ -704,51 +950,49 @@ declare const DEFAULT_MIN_SENTENCE_LEN = 20;
|
|
|
704
950
|
declare class SentenceChunker {
|
|
705
951
|
private buffer;
|
|
706
952
|
private readonly minSentenceLen;
|
|
953
|
+
private readonly minWordsForShortFlush;
|
|
707
954
|
constructor(options?: {
|
|
708
955
|
minSentenceLen?: number;
|
|
956
|
+
minWordsForShortFlush?: number;
|
|
709
957
|
});
|
|
710
|
-
/**
|
|
958
|
+
/**
|
|
959
|
+
* Feed a token. Returns zero or more complete sentences.
|
|
960
|
+
*
|
|
961
|
+
* Two emission paths:
|
|
962
|
+
* - **Standard path** — when the buffer is at least `minSentenceLen`
|
|
963
|
+
* characters long and the regex tokenizer reports more than one
|
|
964
|
+
* sentence, all but the last (potentially incomplete) are emitted.
|
|
965
|
+
* - **Short-flush path** — when the buffer is shorter than `minSentenceLen`
|
|
966
|
+
* but ends with a sentence terminator AND has at least
|
|
967
|
+
* `minWordsForShortFlush` whitespace-separated words, emit it
|
|
968
|
+
* immediately. This drops TTS TTFB on short greetings like `"Hi there!"`
|
|
969
|
+
* while keeping single-word utterances (`"Sì."`) buffered until
|
|
970
|
+
* `flush()`.
|
|
971
|
+
*/
|
|
711
972
|
push(token: string): string[];
|
|
973
|
+
/**
|
|
974
|
+
* Emit the buffer when it's a short, complete single-sentence utterance.
|
|
975
|
+
*
|
|
976
|
+
* A buffer qualifies when **all** of these hold:
|
|
977
|
+
* 1. Last non-whitespace char is a sentence terminator.
|
|
978
|
+
* 2. Word count is at least `minWordsForShortFlush` (default 2 — keeps
|
|
979
|
+
* single-word "Sì." / "Yes." buffered until `flush()`).
|
|
980
|
+
* 3. The buffer contains exactly one terminator (the trailing one).
|
|
981
|
+
* Multiple terminators mean we may be mid-stream of a longer merged
|
|
982
|
+
* utterance like `"Hey! Hi! Hello! This is a sentence."` — let the
|
|
983
|
+
* standard path keep merging.
|
|
984
|
+
* 4. The char immediately before the terminator is NOT a digit (avoids
|
|
985
|
+
* decimal mid-stream like `"f(x) = x * 2."` flushing before `54`).
|
|
986
|
+
* 5. The char immediately before the terminator is NOT an uppercase
|
|
987
|
+
* ASCII letter (avoids acronym patterns like `"U.S."` / `"U."`).
|
|
988
|
+
*/
|
|
989
|
+
private maybeShortFlush;
|
|
712
990
|
/** Flush remaining buffer as final sentence(s). Call at end of stream. */
|
|
713
991
|
flush(): string[];
|
|
714
992
|
/** Discard buffered text. Call on interrupt. */
|
|
715
993
|
reset(): void;
|
|
716
994
|
}
|
|
717
995
|
|
|
718
|
-
/**
|
|
719
|
-
* Pipeline hook executor for pipeline mode.
|
|
720
|
-
*
|
|
721
|
-
* Runs user-defined hooks at each stage of the STT → LLM → TTS pipeline.
|
|
722
|
-
* Fail-open: if a hook throws, the error is logged and the original value
|
|
723
|
-
* passes through unchanged.
|
|
724
|
-
*/
|
|
725
|
-
|
|
726
|
-
declare class PipelineHookExecutor {
|
|
727
|
-
private readonly hooks;
|
|
728
|
-
constructor(hooks: PipelineHooks | undefined);
|
|
729
|
-
/**
|
|
730
|
-
* Run beforeSendToStt hook. Returns null to drop the audio chunk.
|
|
731
|
-
* If no hook is defined, returns the audio unchanged.
|
|
732
|
-
* Fail-open: on exception, the original audio passes through.
|
|
733
|
-
*/
|
|
734
|
-
runBeforeSendToStt(audio: Buffer, ctx: HookContext): Promise<Buffer | null>;
|
|
735
|
-
/**
|
|
736
|
-
* Run afterTranscribe hook. Returns null if hook vetoes the turn.
|
|
737
|
-
* If no hook is defined, returns the transcript unchanged.
|
|
738
|
-
*/
|
|
739
|
-
runAfterTranscribe(transcript: string, ctx: HookContext): Promise<string | null>;
|
|
740
|
-
/**
|
|
741
|
-
* Run beforeSynthesize hook. Returns null if hook vetoes TTS for this sentence.
|
|
742
|
-
* If no hook is defined, returns the text unchanged.
|
|
743
|
-
*/
|
|
744
|
-
runBeforeSynthesize(text: string, ctx: HookContext): Promise<string | null>;
|
|
745
|
-
/**
|
|
746
|
-
* Run afterSynthesize hook. Returns null if hook vetoes this audio chunk.
|
|
747
|
-
* If no hook is defined, returns the audio unchanged.
|
|
748
|
-
*/
|
|
749
|
-
runAfterSynthesize(audio: Buffer, text: string, ctx: HookContext): Promise<Buffer | null>;
|
|
750
|
-
}
|
|
751
|
-
|
|
752
996
|
/**
|
|
753
997
|
* Built-in text transforms for cleaning LLM output before TTS synthesis.
|
|
754
998
|
*
|
|
@@ -787,7 +1031,23 @@ declare class AuthenticationError extends PatterError {
|
|
|
787
1031
|
declare class ProvisionError extends PatterError {
|
|
788
1032
|
constructor(message: string);
|
|
789
1033
|
}
|
|
1034
|
+
/** Thrown when a provider returns HTTP 429 on connect/upgrade. */
|
|
1035
|
+
declare class RateLimitError extends PatterConnectionError {
|
|
1036
|
+
constructor(message: string);
|
|
1037
|
+
}
|
|
790
1038
|
|
|
1039
|
+
/**
|
|
1040
|
+
* Config envelope for realtime / ConvAI pipelines — mirrors the wire-level
|
|
1041
|
+
* shape consumed by the backend. Kept narrow on purpose so callers can pass a
|
|
1042
|
+
* plain object literal if they prefer.
|
|
1043
|
+
*/
|
|
1044
|
+
interface RealtimeConfig {
|
|
1045
|
+
readonly provider: string;
|
|
1046
|
+
readonly apiKey: string;
|
|
1047
|
+
readonly model?: string;
|
|
1048
|
+
readonly voice?: string;
|
|
1049
|
+
readonly options?: Record<string, unknown>;
|
|
1050
|
+
}
|
|
791
1051
|
/**
|
|
792
1052
|
* Deepgram STT config builder. Tune latency via ``endpointingMs`` /
|
|
793
1053
|
* ``utteranceEndMs``. Internal only — public code should use ``DeepgramSTT``
|
|
@@ -815,13 +1075,64 @@ declare function openaiTts(opts: {
|
|
|
815
1075
|
apiKey: string;
|
|
816
1076
|
voice?: string;
|
|
817
1077
|
}): TTSConfig;
|
|
818
|
-
|
|
1078
|
+
/** Soniox real-time STT config helper. */
|
|
1079
|
+
declare function soniox(opts: {
|
|
1080
|
+
apiKey: string;
|
|
1081
|
+
language?: string;
|
|
1082
|
+
}): STTConfig;
|
|
1083
|
+
/**
|
|
1084
|
+
* Speechmatics STT config helper.
|
|
1085
|
+
*
|
|
1086
|
+
* NOTE: the Speechmatics adapter is currently Python-only. Calling this helper
|
|
1087
|
+
* throws a clear error so callers can switch providers or use the Python SDK
|
|
1088
|
+
* until the TS adapter ships.
|
|
1089
|
+
*/
|
|
1090
|
+
declare function speechmatics(_opts: {
|
|
1091
|
+
apiKey: string;
|
|
1092
|
+
language?: string;
|
|
1093
|
+
}): STTConfig;
|
|
1094
|
+
/** AssemblyAI real-time STT config helper. */
|
|
1095
|
+
declare function assemblyai(opts: {
|
|
1096
|
+
apiKey: string;
|
|
1097
|
+
language?: string;
|
|
1098
|
+
}): STTConfig;
|
|
1099
|
+
/** Cartesia TTS config helper. Default voice matches Python SDK. */
|
|
1100
|
+
declare function cartesia(opts: {
|
|
1101
|
+
apiKey: string;
|
|
1102
|
+
voice?: string;
|
|
1103
|
+
}): TTSConfig;
|
|
1104
|
+
/** Rime TTS config helper. */
|
|
1105
|
+
declare function rime(opts: {
|
|
1106
|
+
apiKey: string;
|
|
1107
|
+
voice?: string;
|
|
1108
|
+
}): TTSConfig;
|
|
1109
|
+
/** LMNT TTS config helper. */
|
|
1110
|
+
declare function lmnt(opts: {
|
|
1111
|
+
apiKey: string;
|
|
1112
|
+
voice?: string;
|
|
1113
|
+
}): TTSConfig;
|
|
819
1114
|
/**
|
|
820
|
-
*
|
|
1115
|
+
* Ultravox realtime engine config helper.
|
|
821
1116
|
*
|
|
822
|
-
*
|
|
823
|
-
*
|
|
1117
|
+
* Returns a ``RealtimeConfig`` envelope that the backend can dispatch. For
|
|
1118
|
+
* programmatic control over a live session use ``UltravoxRealtimeAdapter``
|
|
1119
|
+
* directly.
|
|
824
1120
|
*/
|
|
1121
|
+
declare function ultravox(opts: {
|
|
1122
|
+
apiKey: string;
|
|
1123
|
+
model?: string;
|
|
1124
|
+
voice?: string;
|
|
1125
|
+
}): RealtimeConfig;
|
|
1126
|
+
/**
|
|
1127
|
+
* Google Gemini Live realtime engine config helper. See
|
|
1128
|
+
* ``GeminiLiveAdapter`` for direct session control.
|
|
1129
|
+
*/
|
|
1130
|
+
declare function geminiLive(opts: {
|
|
1131
|
+
apiKey: string;
|
|
1132
|
+
model?: string;
|
|
1133
|
+
voice?: string;
|
|
1134
|
+
}): RealtimeConfig;
|
|
1135
|
+
|
|
825
1136
|
interface ProviderPricing {
|
|
826
1137
|
unit: string;
|
|
827
1138
|
price?: number;
|
|
@@ -829,6 +1140,8 @@ interface ProviderPricing {
|
|
|
829
1140
|
audio_output_per_token?: number;
|
|
830
1141
|
text_input_per_token?: number;
|
|
831
1142
|
text_output_per_token?: number;
|
|
1143
|
+
cached_audio_input_per_token?: number;
|
|
1144
|
+
cached_text_input_per_token?: number;
|
|
832
1145
|
}
|
|
833
1146
|
declare const DEFAULT_PRICING: Record<string, ProviderPricing>;
|
|
834
1147
|
/**
|
|
@@ -840,18 +1153,37 @@ declare function mergePricing(overrides?: Record<string, Partial<ProviderPricing
|
|
|
840
1153
|
declare function calculateSttCost(provider: string, audioSeconds: number, pricing: Record<string, ProviderPricing>): number;
|
|
841
1154
|
/** Calculate TTS cost from character count. */
|
|
842
1155
|
declare function calculateTtsCost(provider: string, characterCount: number, pricing: Record<string, ProviderPricing>): number;
|
|
843
|
-
/**
|
|
1156
|
+
/**
|
|
1157
|
+
* Calculate OpenAI Realtime cost from token usage.
|
|
1158
|
+
*
|
|
1159
|
+
* OpenAI bills the cached portion of ``input_token_details.audio_tokens`` and
|
|
1160
|
+
* ``.text_tokens`` at the reduced cached rate (typically ~3% of full for audio,
|
|
1161
|
+
* ~10% of full for text on the mini model). ``cached_tokens_details`` is a
|
|
1162
|
+
* nested breakdown of the same ``input_token_details`` totals — the cached
|
|
1163
|
+
* counts are already INCLUDED in the top-level totals, so we subtract them
|
|
1164
|
+
* out before applying the full rate and add them back at the cached rate.
|
|
1165
|
+
*/
|
|
844
1166
|
declare function calculateRealtimeCost(usage: {
|
|
845
1167
|
input_token_details?: {
|
|
846
1168
|
audio_tokens?: number;
|
|
847
1169
|
text_tokens?: number;
|
|
1170
|
+
cached_tokens_details?: {
|
|
1171
|
+
audio_tokens?: number;
|
|
1172
|
+
text_tokens?: number;
|
|
1173
|
+
};
|
|
848
1174
|
};
|
|
849
1175
|
output_token_details?: {
|
|
850
1176
|
audio_tokens?: number;
|
|
851
1177
|
text_tokens?: number;
|
|
852
1178
|
};
|
|
853
1179
|
}, pricing: Record<string, ProviderPricing>): number;
|
|
854
|
-
/**
|
|
1180
|
+
/**
|
|
1181
|
+
* Calculate telephony cost from call duration.
|
|
1182
|
+
*
|
|
1183
|
+
* Twilio bills in whole-minute increments (any partial minute is rounded up
|
|
1184
|
+
* to the next full minute per twilio.com/help/223132307). Telnyx bills
|
|
1185
|
+
* per-second. We detect Twilio by provider name and apply the round-up.
|
|
1186
|
+
*/
|
|
855
1187
|
declare function calculateTelephonyCost(provider: string, durationSeconds: number, pricing: Record<string, ProviderPricing>): number;
|
|
856
1188
|
|
|
857
1189
|
/**
|
|
@@ -862,9 +1194,40 @@ declare function calculateTelephonyCost(provider: string, durationSeconds: numbe
|
|
|
862
1194
|
|
|
863
1195
|
interface LatencyBreakdown {
|
|
864
1196
|
stt_ms: number;
|
|
1197
|
+
/**
|
|
1198
|
+
* Backwards-compatible LLM bucket. With the split below, this now reflects
|
|
1199
|
+
* the user-perceived first-token latency (TTFT) when streaming is available
|
|
1200
|
+
* and the full generation time otherwise. Prefer ``llm_ttft_ms`` /
|
|
1201
|
+
* ``llm_total_ms`` in new code.
|
|
1202
|
+
*/
|
|
865
1203
|
llm_ms: number;
|
|
1204
|
+
/** Time-to-first-token (UX-facing latency): stt_complete → first LLM token. */
|
|
1205
|
+
llm_ttft_ms?: number;
|
|
1206
|
+
/**
|
|
1207
|
+
* Total LLM generation time: stt_complete → last LLM token. Distinct from
|
|
1208
|
+
* ``llm_ms`` so cost/throughput analysis and TTFT can be tracked separately.
|
|
1209
|
+
*/
|
|
1210
|
+
llm_total_ms?: number;
|
|
866
1211
|
tts_ms: number;
|
|
867
1212
|
total_ms: number;
|
|
1213
|
+
/**
|
|
1214
|
+
* Endpoint latency: time from end-of-user-speech (VAD stop or STT
|
|
1215
|
+
* ``speech_final``) to LLM dispatch. Captures the silence-detection +
|
|
1216
|
+
* transcript-finalization gap. Optional — undefined when the source signal
|
|
1217
|
+
* is missing.
|
|
1218
|
+
*/
|
|
1219
|
+
endpoint_ms?: number;
|
|
1220
|
+
/**
|
|
1221
|
+
* Barge-in latency: time from user-interrupt detection to TTS playback
|
|
1222
|
+
* actually halting (i.e. after ``sendClear`` returned). Optional — only
|
|
1223
|
+
* populated on interrupted turns.
|
|
1224
|
+
*/
|
|
1225
|
+
bargein_ms?: number;
|
|
1226
|
+
/**
|
|
1227
|
+
* Total TTS time: LLM-first-token (or first-sentence boundary) to last
|
|
1228
|
+
* TTS audio byte sent. Optional — undefined when TTS never completed.
|
|
1229
|
+
*/
|
|
1230
|
+
tts_total_ms?: number;
|
|
868
1231
|
}
|
|
869
1232
|
interface CostBreakdown {
|
|
870
1233
|
stt: number;
|
|
@@ -872,6 +1235,12 @@ interface CostBreakdown {
|
|
|
872
1235
|
llm: number;
|
|
873
1236
|
telephony: number;
|
|
874
1237
|
total: number;
|
|
1238
|
+
/**
|
|
1239
|
+
* Amount saved on LLM cost thanks to OpenAI Realtime prompt caching.
|
|
1240
|
+
* ``llm`` above is the net cost AFTER this discount. Dashboards can
|
|
1241
|
+
* render ``saved $X (pct%)`` next to the LLM line when > 0.
|
|
1242
|
+
*/
|
|
1243
|
+
llm_cached_savings?: number;
|
|
875
1244
|
}
|
|
876
1245
|
interface TurnMetrics {
|
|
877
1246
|
turn_index: number;
|
|
@@ -889,6 +1258,9 @@ interface CallMetrics {
|
|
|
889
1258
|
cost: CostBreakdown;
|
|
890
1259
|
latency_avg: LatencyBreakdown;
|
|
891
1260
|
latency_p95: LatencyBreakdown;
|
|
1261
|
+
latency_p50?: LatencyBreakdown;
|
|
1262
|
+
latency_p90?: LatencyBreakdown;
|
|
1263
|
+
latency_p99?: LatencyBreakdown;
|
|
892
1264
|
provider_mode: string;
|
|
893
1265
|
stt_provider: string;
|
|
894
1266
|
tts_provider: string;
|
|
@@ -928,18 +1300,48 @@ declare class CallMetricsAccumulator {
|
|
|
928
1300
|
private readonly _turns;
|
|
929
1301
|
private _turnStart;
|
|
930
1302
|
private _sttComplete;
|
|
1303
|
+
private _llmFirstToken;
|
|
1304
|
+
private _llmFirstSentenceComplete;
|
|
931
1305
|
private _llmComplete;
|
|
932
1306
|
private _ttsFirstByte;
|
|
1307
|
+
/** Last TTS audio byte sent (hrTimeMs). Stamped by ``recordTtsComplete`` /
|
|
1308
|
+
* ``recordTtsCompleteTs``. Used to compute ``tts_total_ms``. */
|
|
1309
|
+
private _ttsLastByte;
|
|
1310
|
+
/** Endpoint signal (hrTimeMs) — VAD stop or STT speech_final, whichever
|
|
1311
|
+
* fires first. Used to compute ``endpoint_ms``. */
|
|
1312
|
+
private _endpointSignalAt;
|
|
1313
|
+
/** Monotonic stamp of LLM dispatch (paired with ``_endpointSignalAt``). */
|
|
1314
|
+
private _turnCommittedMono;
|
|
1315
|
+
/** Barge-in detected timestamp (hrTimeMs). */
|
|
1316
|
+
private _bargeinDetectedAt;
|
|
1317
|
+
/** TTS-stopped timestamp after barge-in (hrTimeMs). */
|
|
1318
|
+
private _bargeinStoppedAt;
|
|
933
1319
|
private _turnUserText;
|
|
934
1320
|
private _turnSttAudioSeconds;
|
|
935
1321
|
private _totalSttAudioSeconds;
|
|
936
1322
|
private _totalTtsCharacters;
|
|
937
1323
|
private _totalRealtimeCost;
|
|
1324
|
+
private _totalRealtimeCachedSavings;
|
|
938
1325
|
private _sttByteCount;
|
|
939
1326
|
private _sttSampleRate;
|
|
940
1327
|
private _sttBytesPerSample;
|
|
941
1328
|
private _actualTelephonyCost;
|
|
942
1329
|
private _actualSttCost;
|
|
1330
|
+
private _totalLlmCost;
|
|
1331
|
+
private _eventBus;
|
|
1332
|
+
/** Timestamp (hrTimeMs) when VAD emitted speech_end. */
|
|
1333
|
+
private _vadStoppedAt;
|
|
1334
|
+
/** Timestamp (hrTimeMs) when STT emitted its final transcript. */
|
|
1335
|
+
private _sttFinalAt;
|
|
1336
|
+
/** Timestamp (hrTimeMs) when the transcript was committed to the LLM. */
|
|
1337
|
+
private _turnCommittedAt;
|
|
1338
|
+
/** Delta (ms) from turn-committed to on_user_turn_completed hook done. */
|
|
1339
|
+
private _onUserTurnCompletedDelayMs;
|
|
1340
|
+
private _numInterruptions;
|
|
1341
|
+
private _numBackchannels;
|
|
1342
|
+
private _overlapStartedAt;
|
|
1343
|
+
private _reportOnlyInitialTtfb;
|
|
1344
|
+
private _initialTtfbEmitted;
|
|
943
1345
|
constructor(opts: {
|
|
944
1346
|
callId: string;
|
|
945
1347
|
providerMode: string;
|
|
@@ -948,23 +1350,116 @@ declare class CallMetricsAccumulator {
|
|
|
948
1350
|
ttsProvider?: string;
|
|
949
1351
|
llmProvider?: string;
|
|
950
1352
|
pricing?: Record<string, Partial<ProviderPricing>> | null;
|
|
1353
|
+
eventBus?: EventBus;
|
|
1354
|
+
/** When true, only the first TTFB emission per call is forwarded to the event bus. */
|
|
1355
|
+
reportOnlyInitialTtfb?: boolean;
|
|
951
1356
|
});
|
|
1357
|
+
/**
|
|
1358
|
+
* Attach (or replace) an EventBus after construction.
|
|
1359
|
+
* Useful when the bus is created after the accumulator (e.g. in tests).
|
|
1360
|
+
*/
|
|
1361
|
+
attachEventBus(bus: EventBus): void;
|
|
952
1362
|
/** Configure audio format for STT byte-to-seconds conversion. */
|
|
953
1363
|
configureSttFormat(sampleRate?: number, bytesPerSample?: number): void;
|
|
954
1364
|
/** Whether a turn is currently being measured (startTurn called, not yet completed). */
|
|
955
1365
|
get turnActive(): boolean;
|
|
956
1366
|
startTurn(): void;
|
|
1367
|
+
/**
|
|
1368
|
+
* Start a new turn only if no turn is currently open.
|
|
1369
|
+
* Use this at inbound-audio ingestion points so the turn timer begins
|
|
1370
|
+
* on the first audio byte rather than just before recordSttComplete().
|
|
1371
|
+
*/
|
|
1372
|
+
startTurnIfIdle(): void;
|
|
957
1373
|
recordSttComplete(text: string, audioSeconds?: number): void;
|
|
1374
|
+
/** Record the timestamp of the first LLM token (TTFT). No-op after first call. */
|
|
1375
|
+
recordLlmFirstToken(): void;
|
|
1376
|
+
/**
|
|
1377
|
+
* Record when the sentence chunker emits the first complete sentence.
|
|
1378
|
+
* Used as the TTS span start so tts_ms reflects true TTS-provider latency
|
|
1379
|
+
* rather than the gap from llm_complete (which fires after the full response).
|
|
1380
|
+
* No-op after first call.
|
|
1381
|
+
*/
|
|
1382
|
+
recordLlmFirstSentenceComplete(): void;
|
|
958
1383
|
recordLlmComplete(): void;
|
|
959
1384
|
recordTtsFirstByte(): void;
|
|
960
1385
|
recordTtsComplete(text: string): void;
|
|
1386
|
+
/**
|
|
1387
|
+
* Capture the timestamp when the last TTS audio byte was sent on the wire.
|
|
1388
|
+
* Useful when the caller wants to record the timing without bumping the
|
|
1389
|
+
* character counter (e.g. interrupted turns where audio actually went out
|
|
1390
|
+
* but synthesis was truncated).
|
|
1391
|
+
*/
|
|
1392
|
+
recordTtsCompleteTs(ts?: number): void;
|
|
1393
|
+
/**
|
|
1394
|
+
* Mark the moment a user interrupt (barge-in) was detected. Pairs with
|
|
1395
|
+
* ``recordTtsStopped`` to compute ``bargein_ms``.
|
|
1396
|
+
*/
|
|
1397
|
+
recordBargeinDetected(ts?: number): void;
|
|
1398
|
+
/**
|
|
1399
|
+
* Mark the moment TTS playback was actually halted after a barge-in. Call
|
|
1400
|
+
* this *after* ``sendClear`` returns. Pairs with ``recordBargeinDetected``
|
|
1401
|
+
* to compute ``bargein_ms``.
|
|
1402
|
+
*/
|
|
1403
|
+
recordTtsStopped(ts?: number): void;
|
|
961
1404
|
recordTurnComplete(agentText: string): TurnMetrics;
|
|
962
1405
|
recordTurnInterrupted(): TurnMetrics | null;
|
|
1406
|
+
/**
|
|
1407
|
+
* Record the moment VAD emitted speech_end for the current utterance.
|
|
1408
|
+
* @param ts Optional override timestamp in hrTimeMs units (defaults to now).
|
|
1409
|
+
*/
|
|
1410
|
+
recordVadStop(ts?: number): void;
|
|
1411
|
+
/**
|
|
1412
|
+
* Record the moment the STT provider delivered its final transcript.
|
|
1413
|
+
* Aliased to the same instant as recordSttComplete() when called from
|
|
1414
|
+
* the standard pipeline; can be called independently for custom pipelines.
|
|
1415
|
+
* @param ts Optional override timestamp in hrTimeMs units.
|
|
1416
|
+
*/
|
|
1417
|
+
recordSttFinalTimestamp(ts?: number): void;
|
|
1418
|
+
/**
|
|
1419
|
+
* Record the moment the transcript was committed to the LLM (turn start).
|
|
1420
|
+
* After this call, ``emitEouMetrics()`` can produce a complete EOUMetrics payload.
|
|
1421
|
+
* @param ts Optional override timestamp in hrTimeMs units.
|
|
1422
|
+
*/
|
|
1423
|
+
recordTurnCommitted(ts?: number): void;
|
|
1424
|
+
/**
|
|
1425
|
+
* Record the delta (ms) between turn-committed and when on_user_turn_completed
|
|
1426
|
+
* pipeline hook finished. Stored for inclusion in the next ``emitEouMetrics``
|
|
1427
|
+
* call (or an explicit re-emit if desired).
|
|
1428
|
+
*/
|
|
1429
|
+
recordOnUserTurnCompletedDelay(delayMs: number): void;
|
|
1430
|
+
/**
|
|
1431
|
+
* Compute and emit EOUMetrics when all three prerequisite timestamps are
|
|
1432
|
+
* available (VAD stop, STT final, turn committed).
|
|
1433
|
+
*
|
|
1434
|
+
* ``endOfUtteranceDelay`` = sttFinal − vadStopped (ms)
|
|
1435
|
+
* ``transcriptionDelay`` = turnCommitted − vadStopped (ms)
|
|
1436
|
+
* ``onUserTurnCompletedDelay`` = caller-supplied delta (ms) or 0
|
|
1437
|
+
*/
|
|
1438
|
+
emitEouMetrics(): void;
|
|
1439
|
+
/**
|
|
1440
|
+
* Record that a caller utterance started overlapping with agent speech.
|
|
1441
|
+
* Call this when VAD detects speech_start during TTS playback.
|
|
1442
|
+
* @param ts Optional override timestamp in hrTimeMs units.
|
|
1443
|
+
*/
|
|
1444
|
+
recordOverlapStart(ts?: number): void;
|
|
1445
|
+
/**
|
|
1446
|
+
* Record that the overlap ended. Emits ``InterruptionMetrics`` via the
|
|
1447
|
+
* event bus.
|
|
1448
|
+
*
|
|
1449
|
+
* @param wasInterruption true → barge-in (increments ``numInterruptions``),
|
|
1450
|
+
* false → backchannel (increments ``numBackchannels``).
|
|
1451
|
+
* @param ts Optional override timestamp in hrTimeMs units.
|
|
1452
|
+
*/
|
|
1453
|
+
recordOverlapEnd(wasInterruption: boolean, ts?: number): void;
|
|
963
1454
|
addSttAudioBytes(byteCount: number): void;
|
|
964
1455
|
recordRealtimeUsage(usage: {
|
|
965
1456
|
input_token_details?: {
|
|
966
1457
|
audio_tokens?: number;
|
|
967
1458
|
text_tokens?: number;
|
|
1459
|
+
cached_tokens_details?: {
|
|
1460
|
+
audio_tokens?: number;
|
|
1461
|
+
text_tokens?: number;
|
|
1462
|
+
};
|
|
968
1463
|
};
|
|
969
1464
|
output_token_details?: {
|
|
970
1465
|
audio_tokens?: number;
|
|
@@ -973,118 +1468,161 @@ declare class CallMetricsAccumulator {
|
|
|
973
1468
|
}): void;
|
|
974
1469
|
setActualTelephonyCost(cost: number): void;
|
|
975
1470
|
setActualSttCost(cost: number): void;
|
|
1471
|
+
/**
|
|
1472
|
+
* Accumulate LLM token cost for pipeline mode (non-Realtime).
|
|
1473
|
+
*
|
|
1474
|
+
* Called by LLMLoop.run() when a usage chunk arrives from the provider.
|
|
1475
|
+
* Mirrors Python's CallMetricsAccumulator.record_llm_usage().
|
|
1476
|
+
*
|
|
1477
|
+
* @param provider LLM provider key (e.g. 'openai', 'anthropic')
|
|
1478
|
+
* @param model Model name (e.g. 'gpt-4o-mini')
|
|
1479
|
+
* @param inputTokens Total input tokens (includes cached)
|
|
1480
|
+
* @param outputTokens Total output tokens
|
|
1481
|
+
* @param cacheReadTokens Cached input tokens (subtracted from input before billing full rate)
|
|
1482
|
+
* @param cacheWriteTokens Cache write tokens (billed at cache_write rate if present)
|
|
1483
|
+
*/
|
|
1484
|
+
recordLlmUsage(provider: string, model: string, inputTokens: number, outputTokens: number, cacheReadTokens?: number, cacheWriteTokens?: number): void;
|
|
976
1485
|
endCall(): CallMetrics;
|
|
977
1486
|
getCostSoFar(): CostBreakdown;
|
|
978
1487
|
private _resetTurnState;
|
|
979
1488
|
private _computeTurnLatency;
|
|
980
1489
|
private _computeCost;
|
|
1490
|
+
/**
|
|
1491
|
+
* Turns eligible for latency statistics.
|
|
1492
|
+
*
|
|
1493
|
+
* Excludes turns marked ``[interrupted]`` (barge-in, cancelled replacements)
|
|
1494
|
+
* because their recorded latency either reflects partial state or zero —
|
|
1495
|
+
* including them would drag every p95/avg bucket toward meaningless numbers.
|
|
1496
|
+
*/
|
|
1497
|
+
private _completedTurns;
|
|
981
1498
|
private _computeAverageLatency;
|
|
982
|
-
private
|
|
1499
|
+
private _computePercentileLatency;
|
|
983
1500
|
}
|
|
984
1501
|
|
|
1502
|
+
/**
|
|
1503
|
+
* Supported OpenAI Realtime wire audio formats. See
|
|
1504
|
+
* https://platform.openai.com/docs/guides/realtime for the full list.
|
|
1505
|
+
* ``g711_ulaw`` matches what Twilio/Telnyx emit natively on the phone leg,
|
|
1506
|
+
* so no transcoding is needed. ``pcm16`` is used in the terminal test-mode
|
|
1507
|
+
* path and when the telephony provider negotiates L16/16000.
|
|
1508
|
+
*/
|
|
1509
|
+
type OpenAIRealtimeAudioFormat = 'g711_ulaw' | 'g711_alaw' | 'pcm16';
|
|
1510
|
+
type RealtimeEventCallback = (type: string, data: unknown) => void | Promise<void>;
|
|
1511
|
+
interface OpenAIRealtimeOptions {
|
|
1512
|
+
temperature?: number;
|
|
1513
|
+
maxResponseOutputTokens?: number | 'inf';
|
|
1514
|
+
modalities?: string[];
|
|
1515
|
+
toolChoice?: string | Record<string, unknown>;
|
|
1516
|
+
inputAudioTranscriptionModel?: string;
|
|
1517
|
+
vadType?: 'server_vad' | 'semantic_vad';
|
|
1518
|
+
/**
|
|
1519
|
+
* Trailing silence (ms) the server VAD waits for before treating the user's
|
|
1520
|
+
* turn as complete. Defaults to 300 — OpenAI's documented sweet-spot for
|
|
1521
|
+
* snappier turn-taking, ~200 ms faster than the previous 500 default.
|
|
1522
|
+
* Increase for dictation-style flows where the user pauses mid-sentence.
|
|
1523
|
+
*/
|
|
1524
|
+
silenceDurationMs?: number;
|
|
1525
|
+
}
|
|
985
1526
|
declare class OpenAIRealtimeAdapter {
|
|
986
1527
|
private readonly apiKey;
|
|
987
1528
|
private readonly model;
|
|
988
1529
|
private readonly voice;
|
|
989
1530
|
private readonly instructions;
|
|
990
1531
|
private readonly tools?;
|
|
1532
|
+
private readonly audioFormat;
|
|
991
1533
|
private ws;
|
|
1534
|
+
private readonly eventCallbacks;
|
|
1535
|
+
private messageListenerAttached;
|
|
1536
|
+
private heartbeat;
|
|
1537
|
+
private currentResponseItemId;
|
|
1538
|
+
private currentResponseAudioMs;
|
|
1539
|
+
private readonly options;
|
|
992
1540
|
constructor(apiKey: string, model?: string, voice?: string, instructions?: string, tools?: Array<{
|
|
993
1541
|
name: string;
|
|
994
1542
|
description: string;
|
|
995
1543
|
parameters: Record<string, unknown>;
|
|
996
|
-
}> | undefined);
|
|
1544
|
+
}> | undefined, audioFormat?: OpenAIRealtimeAudioFormat, options?: OpenAIRealtimeOptions);
|
|
997
1545
|
connect(): Promise<void>;
|
|
998
1546
|
sendAudio(mulawAudio: Buffer): void;
|
|
999
|
-
|
|
1547
|
+
/**
|
|
1548
|
+
* Register a listener for parsed realtime events.
|
|
1549
|
+
*
|
|
1550
|
+
* Previously every call attached a new ``ws.on('message')`` handler,
|
|
1551
|
+
* which leaked listeners across retries and multi-consumer hooks. We now
|
|
1552
|
+
* route all traffic through a single persistent handler that fans out to
|
|
1553
|
+
* a Set of callbacks. Use {@link offEvent} to remove one.
|
|
1554
|
+
*/
|
|
1555
|
+
onEvent(callback: RealtimeEventCallback): void;
|
|
1556
|
+
offEvent(callback: RealtimeEventCallback): void;
|
|
1557
|
+
private ensureMessageListener;
|
|
1000
1558
|
cancelResponse(): void;
|
|
1001
1559
|
sendText(text: string): Promise<void>;
|
|
1002
1560
|
sendFunctionResult(callId: string, result: string): Promise<void>;
|
|
1003
1561
|
close(): void;
|
|
1004
1562
|
}
|
|
1005
1563
|
|
|
1564
|
+
interface ElevenLabsConvAIOptions {
|
|
1565
|
+
apiKey: string;
|
|
1566
|
+
agentId?: string;
|
|
1567
|
+
voiceId?: string;
|
|
1568
|
+
modelId?: string;
|
|
1569
|
+
language?: string;
|
|
1570
|
+
firstMessage?: string;
|
|
1571
|
+
outputAudioFormat?: string;
|
|
1572
|
+
inputAudioFormat?: string;
|
|
1573
|
+
useSignedUrl?: boolean;
|
|
1574
|
+
}
|
|
1575
|
+
type EventCallback = (type: string, data: unknown) => void | Promise<void>;
|
|
1006
1576
|
declare class ElevenLabsConvAIAdapter {
|
|
1577
|
+
private ws;
|
|
1578
|
+
private eventCallback;
|
|
1007
1579
|
private readonly apiKey;
|
|
1008
1580
|
private readonly agentId;
|
|
1009
1581
|
private readonly voiceId;
|
|
1582
|
+
readonly modelId: string;
|
|
1583
|
+
private readonly language;
|
|
1010
1584
|
private readonly firstMessage;
|
|
1011
|
-
|
|
1012
|
-
|
|
1013
|
-
|
|
1014
|
-
|
|
1015
|
-
|
|
1016
|
-
|
|
1017
|
-
|
|
1018
|
-
|
|
1019
|
-
|
|
1020
|
-
|
|
1021
|
-
|
|
1022
|
-
*
|
|
1023
|
-
* Keeps the last `maxCalls` completed calls and tracks active calls.
|
|
1024
|
-
* Supports SSE event subscribers for real-time updates.
|
|
1025
|
-
*/
|
|
1026
|
-
|
|
1027
|
-
interface CallRecord {
|
|
1028
|
-
call_id: string;
|
|
1029
|
-
caller: string;
|
|
1030
|
-
callee: string;
|
|
1031
|
-
direction: string;
|
|
1032
|
-
started_at: number;
|
|
1033
|
-
ended_at?: number;
|
|
1034
|
-
/**
|
|
1035
|
-
* Current lifecycle state: ``initiated`` (pre-registered), ``ringing``,
|
|
1036
|
-
* ``in-progress``, ``completed``, ``no-answer``, ``busy``, ``failed``,
|
|
1037
|
-
* ``canceled``, or ``webhook_error``.
|
|
1038
|
-
*/
|
|
1039
|
-
status?: string;
|
|
1040
|
-
transcript?: Array<{
|
|
1041
|
-
role: string;
|
|
1042
|
-
text: string;
|
|
1043
|
-
timestamp: number;
|
|
1044
|
-
}>;
|
|
1045
|
-
turns?: unknown[];
|
|
1046
|
-
metrics?: Record<string, unknown> | null;
|
|
1047
|
-
[key: string]: unknown;
|
|
1048
|
-
}
|
|
1049
|
-
interface SSEEvent {
|
|
1050
|
-
type: string;
|
|
1051
|
-
data: Record<string, unknown>;
|
|
1052
|
-
}
|
|
1053
|
-
declare class MetricsStore extends EventEmitter {
|
|
1054
|
-
private readonly maxCalls;
|
|
1055
|
-
private calls;
|
|
1056
|
-
private activeCalls;
|
|
1057
|
-
/**
|
|
1058
|
-
* Accepts either a numeric ``maxCalls`` (legacy positional — matches the
|
|
1059
|
-
* original TS API) or an options object ``{ maxCalls }`` to align with the
|
|
1060
|
-
* Python SDK's keyword-argument style. Plain literals also work:
|
|
1061
|
-
* ``new MetricsStore()`` / ``new MetricsStore(100)`` / ``new MetricsStore({ maxCalls: 100 })``.
|
|
1062
|
-
*/
|
|
1063
|
-
constructor(maxCallsOrOpts?: number | {
|
|
1064
|
-
maxCalls?: number;
|
|
1065
|
-
});
|
|
1066
|
-
private publish;
|
|
1067
|
-
recordCallStart(data: Record<string, unknown>): void;
|
|
1585
|
+
readonly outputAudioFormat: string | undefined;
|
|
1586
|
+
readonly inputAudioFormat: string | undefined;
|
|
1587
|
+
private readonly useSignedUrl;
|
|
1588
|
+
conversationId: string | null;
|
|
1589
|
+
agentOutputAudioFormat: string | null;
|
|
1590
|
+
userInputAudioFormat: string | null;
|
|
1591
|
+
private agentSpeaking;
|
|
1592
|
+
private silenceTimer;
|
|
1593
|
+
private closePromise;
|
|
1594
|
+
constructor(apiKey: string, agentId?: string, voiceId?: string, firstMessage?: string);
|
|
1595
|
+
constructor(options: ElevenLabsConvAIOptions);
|
|
1068
1596
|
/**
|
|
1069
|
-
*
|
|
1070
|
-
*
|
|
1071
|
-
*
|
|
1597
|
+
* Build an adapter pre-configured for Twilio Media Streams.
|
|
1598
|
+
*
|
|
1599
|
+
* Negotiates `ulaw_8000` for both `outputAudioFormat` and
|
|
1600
|
+
* `inputAudioFormat`, matching Twilio's μ-law @ 8 kHz wire format. The
|
|
1601
|
+
* SDK's stream handler detects this and skips the 8 kHz → 16 kHz inbound
|
|
1602
|
+
* resample and the 16 kHz → 8 kHz / PCM → μ-law outbound transcode.
|
|
1603
|
+
* Saves ~30–80 ms first-byte plus per-frame CPU on every turn.
|
|
1072
1604
|
*/
|
|
1073
|
-
|
|
1605
|
+
static forTwilio(apiKey: string, agentId: string, options?: Omit<ElevenLabsConvAIOptions, 'apiKey' | 'agentId' | 'outputAudioFormat' | 'inputAudioFormat'>): ElevenLabsConvAIAdapter;
|
|
1074
1606
|
/**
|
|
1075
|
-
*
|
|
1076
|
-
*
|
|
1077
|
-
*
|
|
1607
|
+
* Build an adapter pre-configured for Telnyx bidirectional media.
|
|
1608
|
+
*
|
|
1609
|
+
* Telnyx negotiates PCMU @ 8 kHz when `streaming_start` sets
|
|
1610
|
+
* `stream_bidirectional_codec=PCMU` (the SDK default). Picking
|
|
1611
|
+
* `ulaw_8000` on both ConvAI directions removes every transcode on the
|
|
1612
|
+
* audio path — same optimization as `forTwilio`.
|
|
1078
1613
|
*/
|
|
1079
|
-
|
|
1080
|
-
|
|
1081
|
-
|
|
1082
|
-
|
|
1083
|
-
|
|
1084
|
-
|
|
1085
|
-
|
|
1086
|
-
|
|
1087
|
-
|
|
1614
|
+
static forTelnyx(apiKey: string, agentId: string, options?: Omit<ElevenLabsConvAIOptions, 'apiKey' | 'agentId' | 'outputAudioFormat' | 'inputAudioFormat'>): ElevenLabsConvAIAdapter;
|
|
1615
|
+
private fetchSignedUrl;
|
|
1616
|
+
connect(): Promise<void>;
|
|
1617
|
+
private safeInvoke;
|
|
1618
|
+
private respondToPing;
|
|
1619
|
+
private clearSilenceTimer;
|
|
1620
|
+
private finalizeAgentTurn;
|
|
1621
|
+
private scheduleSilenceDone;
|
|
1622
|
+
private handleMessage;
|
|
1623
|
+
sendAudio(audioBytes: Buffer): void;
|
|
1624
|
+
onEvent(callback: EventCallback): void;
|
|
1625
|
+
close(): Promise<void>;
|
|
1088
1626
|
}
|
|
1089
1627
|
|
|
1090
1628
|
/**
|
|
@@ -1153,6 +1691,14 @@ interface LocalConfig {
|
|
|
1153
1691
|
* are rejected with HTTP 403.
|
|
1154
1692
|
*/
|
|
1155
1693
|
telnyxPublicKey?: string;
|
|
1694
|
+
/**
|
|
1695
|
+
* SECURITY: require valid webhook signatures on both Twilio and Telnyx
|
|
1696
|
+
* inbound webhooks. When True (the default), a missing credential
|
|
1697
|
+
* (twilioToken / telnyxPublicKey) causes the webhook to return
|
|
1698
|
+
* 503 Service Unavailable instead of silently accepting the request.
|
|
1699
|
+
* Set to false only for local development against mock providers.
|
|
1700
|
+
*/
|
|
1701
|
+
requireSignature?: boolean;
|
|
1156
1702
|
}
|
|
1157
1703
|
|
|
1158
1704
|
/**
|
|
@@ -1210,6 +1756,12 @@ declare function mountApi(app: Express, store: MetricsStore, token?: string): vo
|
|
|
1210
1756
|
* When the SDK completes a call, it fires a POST to the standalone dashboard
|
|
1211
1757
|
* (if running) so calls appear in real time. Data lives only in memory —
|
|
1212
1758
|
* nothing is written to disk.
|
|
1759
|
+
*
|
|
1760
|
+
* TODO(parity): Python's `notify_dashboard` is now an async fire-and-forget
|
|
1761
|
+
* coroutine (see sdk-py/getpatter/dashboard/persistence.py). This TS version
|
|
1762
|
+
* uses `http.request` which is already non-blocking, but for parity consider
|
|
1763
|
+
* exposing this as `async function notifyDashboard(...): Promise<void>` so
|
|
1764
|
+
* call sites can `await` or `void` it explicitly, matching the Python API.
|
|
1213
1765
|
*/
|
|
1214
1766
|
declare function notifyDashboard(callData: Record<string, unknown>, port?: number): void;
|
|
1215
1767
|
|
|
@@ -1275,6 +1827,215 @@ declare class FallbackLLMProvider implements LLMProvider {
|
|
|
1275
1827
|
private stopRecovery;
|
|
1276
1828
|
}
|
|
1277
1829
|
|
|
1830
|
+
/**
|
|
1831
|
+
* PatterTool — wrap a live Patter instance as a tool callable from external
|
|
1832
|
+
* agent frameworks (OpenAI Assistants, Anthropic Claude tool-use, LangChain,
|
|
1833
|
+
* Hermes Agent, MCP, generic OpenAI-compatible endpoints).
|
|
1834
|
+
*
|
|
1835
|
+
* Pattern this enables: a customer already runs an agent in their existing
|
|
1836
|
+
* stack (LangChain, OpenAI Assistant, Hermes Agent, …) and wants the agent
|
|
1837
|
+
* to *make phone calls* during a conversation. With this tool, the customer
|
|
1838
|
+
* registers `make_phone_call` and the agent's tool-call loop can dial out
|
|
1839
|
+
* via Patter, get a transcript + cost back, and continue reasoning.
|
|
1840
|
+
*
|
|
1841
|
+
* ## Design
|
|
1842
|
+
*
|
|
1843
|
+
* Each `PatterTool` wraps one `Patter` instance (carrier + agent + serve).
|
|
1844
|
+
* The tool exposes:
|
|
1845
|
+
*
|
|
1846
|
+
* - `openaiSchema()` — OpenAI / chat-completions tool spec
|
|
1847
|
+
* - `anthropicSchema()` — Anthropic Claude tool spec
|
|
1848
|
+
* - `hermesSchema()` — Hermes Agent / Nous registry schema (alias for
|
|
1849
|
+
* anthropicSchema; same JSON-Schema shape)
|
|
1850
|
+
* - `execute(args)` — dial outbound, await call end, return summary
|
|
1851
|
+
* - `hermesHandler()` — `(args, **kw) => Promise<string>` wrapper that
|
|
1852
|
+
* returns a JSON string and `{"error": "..."}` on
|
|
1853
|
+
* failure (matches Hermes' tool contract)
|
|
1854
|
+
*
|
|
1855
|
+
* ## Usage (OpenAI / Anthropic)
|
|
1856
|
+
*
|
|
1857
|
+
* ```ts
|
|
1858
|
+
* import { Patter, Twilio, DeepgramSTT, GroqLLM, ElevenLabsTTS } from 'getpatter';
|
|
1859
|
+
* import { PatterTool } from 'getpatter/integrations';
|
|
1860
|
+
*
|
|
1861
|
+
* const phone = new Patter({
|
|
1862
|
+
* carrier: new Twilio(),
|
|
1863
|
+
* phoneNumber: process.env.TWILIO_PHONE_NUMBER!,
|
|
1864
|
+
* webhookUrl: 'agent.example.com',
|
|
1865
|
+
* });
|
|
1866
|
+
*
|
|
1867
|
+
* const tool = new PatterTool({
|
|
1868
|
+
* phone,
|
|
1869
|
+
* agent: { stt: new DeepgramSTT(), llm: new GroqLLM(), tts: new ElevenLabsTTS() },
|
|
1870
|
+
* });
|
|
1871
|
+
*
|
|
1872
|
+
* await tool.start(); // boots phone.serve() once
|
|
1873
|
+
*
|
|
1874
|
+
* // Register with your LLM
|
|
1875
|
+
* const tools = [tool.openaiSchema()];
|
|
1876
|
+
*
|
|
1877
|
+
* // When the LLM emits a tool_call:
|
|
1878
|
+
* const result = await tool.execute({
|
|
1879
|
+
* to: '+15551234567',
|
|
1880
|
+
* goal: 'Book a dentist appointment for next Tuesday afternoon.',
|
|
1881
|
+
* });
|
|
1882
|
+
* // → { call_id, status, duration_seconds, cost_usd, transcript, … }
|
|
1883
|
+
* ```
|
|
1884
|
+
*
|
|
1885
|
+
* ## Usage (Hermes Agent)
|
|
1886
|
+
*
|
|
1887
|
+
* Hermes' contract: handler takes `args: dict` + kwargs, returns a JSON
|
|
1888
|
+
* string. The TS SDK is meant to be invoked from Python via your own bridge
|
|
1889
|
+
* (HTTP, MCP, subprocess); this `hermesSchema()` + `hermesHandler()` pair
|
|
1890
|
+
* matches the Python adapter shipped under `getpatter.integrations` so the
|
|
1891
|
+
* two SDKs stay in lockstep.
|
|
1892
|
+
*
|
|
1893
|
+
* For pure-Python Hermes setups, use `PatterTool` from `getpatter.integrations`
|
|
1894
|
+
* directly inside a `tools/patter.py` module:
|
|
1895
|
+
*
|
|
1896
|
+
* ```python
|
|
1897
|
+
* from tools.registry import registry
|
|
1898
|
+
* from getpatter.integrations import PatterTool
|
|
1899
|
+
*
|
|
1900
|
+
* tool = PatterTool(phone=...)
|
|
1901
|
+
* tool.register_hermes(registry)
|
|
1902
|
+
* ```
|
|
1903
|
+
*/
|
|
1904
|
+
|
|
1905
|
+
/** JSON-Schema of the call args. Identical wire shape across openai/anthropic/hermes. */
|
|
1906
|
+
declare const PARAMETERS_SCHEMA: {
|
|
1907
|
+
readonly type: "object";
|
|
1908
|
+
readonly properties: {
|
|
1909
|
+
readonly to: {
|
|
1910
|
+
readonly type: "string";
|
|
1911
|
+
readonly description: "Destination phone number in E.164 format (e.g. \"+15551234567\"). Required.";
|
|
1912
|
+
};
|
|
1913
|
+
readonly goal: {
|
|
1914
|
+
readonly type: "string";
|
|
1915
|
+
readonly description: "What the agent should accomplish on the call. Becomes the in-call agent's system prompt for this single call.";
|
|
1916
|
+
};
|
|
1917
|
+
readonly first_message: {
|
|
1918
|
+
readonly type: "string";
|
|
1919
|
+
readonly description: "Optional first message the agent speaks when the callee answers. Defaults to a generic greeting.";
|
|
1920
|
+
};
|
|
1921
|
+
readonly max_duration_sec: {
|
|
1922
|
+
readonly type: "integer";
|
|
1923
|
+
readonly description: "Hard timeout for the call in seconds. Default 180. The call is force-ended at this deadline whether or not it has resolved.";
|
|
1924
|
+
readonly minimum: 5;
|
|
1925
|
+
readonly maximum: 1800;
|
|
1926
|
+
};
|
|
1927
|
+
};
|
|
1928
|
+
readonly required: readonly ["to"];
|
|
1929
|
+
};
|
|
1930
|
+
interface PatterToolOptions {
|
|
1931
|
+
/**
|
|
1932
|
+
* Patter instance to dial through. Must be in local mode (have a `carrier`).
|
|
1933
|
+
* The tool boots `phone.serve()` on `start()`; do not call `serve()` yourself.
|
|
1934
|
+
*/
|
|
1935
|
+
phone: Patter;
|
|
1936
|
+
/**
|
|
1937
|
+
* Default agent config used for outbound calls. Per-call overrides come from
|
|
1938
|
+
* `execute({ goal, first_message })`.
|
|
1939
|
+
*/
|
|
1940
|
+
agent?: AgentOptions;
|
|
1941
|
+
/** Tool name shown to the LLM. Default `'make_phone_call'`. */
|
|
1942
|
+
name?: string;
|
|
1943
|
+
/** Tool description for the LLM. Default tuned for English assistants. */
|
|
1944
|
+
description?: string;
|
|
1945
|
+
/** Default per-call timeout in seconds. Default 180. */
|
|
1946
|
+
maxDurationSec?: number;
|
|
1947
|
+
/**
|
|
1948
|
+
* Optional pass-through for `phone.serve()`'s `recording` flag — record all
|
|
1949
|
+
* outbound calls placed via this tool.
|
|
1950
|
+
*/
|
|
1951
|
+
recording?: boolean;
|
|
1952
|
+
}
|
|
1953
|
+
interface PatterToolExecuteArgs {
|
|
1954
|
+
to: string;
|
|
1955
|
+
goal?: string;
|
|
1956
|
+
first_message?: string;
|
|
1957
|
+
max_duration_sec?: number;
|
|
1958
|
+
}
|
|
1959
|
+
interface PatterToolResult {
|
|
1960
|
+
call_id: string;
|
|
1961
|
+
status: string;
|
|
1962
|
+
duration_seconds: number;
|
|
1963
|
+
cost_usd?: number;
|
|
1964
|
+
transcript: Array<{
|
|
1965
|
+
role: string;
|
|
1966
|
+
text: string;
|
|
1967
|
+
timestamp?: number;
|
|
1968
|
+
}>;
|
|
1969
|
+
metrics?: Record<string, unknown> | null;
|
|
1970
|
+
}
|
|
1971
|
+
declare class PatterTool {
|
|
1972
|
+
readonly name: string;
|
|
1973
|
+
readonly description: string;
|
|
1974
|
+
private readonly phone;
|
|
1975
|
+
private readonly agent;
|
|
1976
|
+
private readonly maxDurationSec;
|
|
1977
|
+
private readonly recording;
|
|
1978
|
+
private started;
|
|
1979
|
+
/** Resolver for the next `call_initiated` SSE event. Only set inside the
|
|
1980
|
+
* dial mutex (`dialQueue`), so two parallel `execute()` calls never share
|
|
1981
|
+
* it and never lose a dispatch. */
|
|
1982
|
+
private pendingDial;
|
|
1983
|
+
/** Mutex that serializes the dial → call_id capture critical section.
|
|
1984
|
+
* Each `execute()` chains a continuation onto this promise so the
|
|
1985
|
+
* `pendingDial` slot is owned by exactly one caller at a time. */
|
|
1986
|
+
private dialQueue;
|
|
1987
|
+
/** Captured SSE listener so `stop()` can detach it (prevents leaks when
|
|
1988
|
+
* the underlying Patter instance outlives this tool). */
|
|
1989
|
+
private sseListener;
|
|
1990
|
+
/** Captured Patter metrics store, for cleanup in `stop()`. */
|
|
1991
|
+
private metricsStoreRef;
|
|
1992
|
+
/** call_id → pending promise machinery. */
|
|
1993
|
+
private readonly pending;
|
|
1994
|
+
private readonly bus;
|
|
1995
|
+
/** How long to wait for the `call_initiated` SSE before failing the dial. */
|
|
1996
|
+
private static readonly DIAL_CAPTURE_TIMEOUT_MS;
|
|
1997
|
+
constructor(opts: PatterToolOptions);
|
|
1998
|
+
/** OpenAI Chat Completions / Assistants tool spec. */
|
|
1999
|
+
openaiSchema(): {
|
|
2000
|
+
type: 'function';
|
|
2001
|
+
function: {
|
|
2002
|
+
name: string;
|
|
2003
|
+
description: string;
|
|
2004
|
+
parameters: typeof PARAMETERS_SCHEMA;
|
|
2005
|
+
};
|
|
2006
|
+
};
|
|
2007
|
+
/** Anthropic Messages API tool spec. */
|
|
2008
|
+
anthropicSchema(): {
|
|
2009
|
+
name: string;
|
|
2010
|
+
description: string;
|
|
2011
|
+
input_schema: typeof PARAMETERS_SCHEMA;
|
|
2012
|
+
};
|
|
2013
|
+
/**
|
|
2014
|
+
* Hermes Agent (Nous Research) registry schema. Same JSON-Schema shape as
|
|
2015
|
+
* Anthropic's; Hermes consumes it via `registry.register({ schema: ... })`.
|
|
2016
|
+
*/
|
|
2017
|
+
hermesSchema(): {
|
|
2018
|
+
name: string;
|
|
2019
|
+
description: string;
|
|
2020
|
+
parameters: typeof PARAMETERS_SCHEMA;
|
|
2021
|
+
};
|
|
2022
|
+
/** Start the underlying Patter server. Idempotent. */
|
|
2023
|
+
start(): Promise<void>;
|
|
2024
|
+
/** Stop the underlying Patter server (and reject any pending calls). */
|
|
2025
|
+
stop(): Promise<void>;
|
|
2026
|
+
execute(args: PatterToolExecuteArgs): Promise<PatterToolResult>;
|
|
2027
|
+
/** Issue the outbound dial under the mutex and return its assigned call_id. */
|
|
2028
|
+
private acquireCallId;
|
|
2029
|
+
/**
|
|
2030
|
+
* Hermes-style handler: `(args, kwargs) => Promise<string>` returning a JSON
|
|
2031
|
+
* string with either the result envelope or an `{"error": "..."}` payload.
|
|
2032
|
+
* Mirrors the Python `PatterTool.hermes_handler` so cross-SDK adapters share
|
|
2033
|
+
* the same wire contract.
|
|
2034
|
+
*/
|
|
2035
|
+
hermesHandler(): (args: PatterToolExecuteArgs) => Promise<string>;
|
|
2036
|
+
private onCallEndHandler;
|
|
2037
|
+
}
|
|
2038
|
+
|
|
1278
2039
|
/**
|
|
1279
2040
|
* Interactive terminal test mode for voice agents.
|
|
1280
2041
|
*
|
|
@@ -1303,6 +2064,12 @@ declare class TestSession {
|
|
|
1303
2064
|
* not use Gemini Live do not pay the load cost. Install with:
|
|
1304
2065
|
*
|
|
1305
2066
|
* npm install @google/genai
|
|
2067
|
+
*
|
|
2068
|
+
* NOTE: Native-audio Gemini Live models are **v1alpha-only**. We pass
|
|
2069
|
+
* `httpOptions: { apiVersion: 'v1alpha' }` when constructing the client.
|
|
2070
|
+
* When Google promotes native audio to GA, switch to `v1beta` / `v1` and
|
|
2071
|
+
* update the default model below.
|
|
2072
|
+
* See: https://ai.google.dev/gemini-api/docs/live
|
|
1306
2073
|
*/
|
|
1307
2074
|
declare const GEMINI_DEFAULT_INPUT_SR = 16000;
|
|
1308
2075
|
declare const GEMINI_DEFAULT_OUTPUT_SR = 24000;
|
|
@@ -1337,6 +2104,12 @@ declare class GeminiLiveAdapter {
|
|
|
1337
2104
|
private receiveLoop;
|
|
1338
2105
|
private handlers;
|
|
1339
2106
|
private running;
|
|
2107
|
+
/**
|
|
2108
|
+
* Tracks call_id -> function name so tool responses can be sent back with
|
|
2109
|
+
* the correct `name` field (Gemini expects the original function name,
|
|
2110
|
+
* not the call_id).
|
|
2111
|
+
*/
|
|
2112
|
+
private pendingToolCalls;
|
|
1340
2113
|
constructor(apiKey: string, options?: GeminiLiveOptions);
|
|
1341
2114
|
connect(): Promise<void>;
|
|
1342
2115
|
sendAudio(pcm: Buffer): void;
|
|
@@ -1514,22 +2287,19 @@ declare class SonioxSTT {
|
|
|
1514
2287
|
/**
|
|
1515
2288
|
* AssemblyAI Universal Streaming STT adapter for the Patter SDK pipeline mode.
|
|
1516
2289
|
*
|
|
1517
|
-
*
|
|
1518
|
-
* WebSocket API. Pure `ws` transport — does NOT depend on the vendor SDK.
|
|
1519
|
-
*
|
|
1520
|
-
* Algorithm adapted from LiveKit Agents (Apache 2.0):
|
|
1521
|
-
* https://github.com/livekit/agents
|
|
1522
|
-
* Source: livekit-plugins/livekit-plugins-assemblyai/livekit/plugins/assemblyai/stt.py
|
|
1523
|
-
* Upstream ref SHA: 78a66bcf79c5cea82989401c408f1dff4b961a5b
|
|
2290
|
+
* Pure `ws` transport — does NOT depend on the vendor SDK.
|
|
1524
2291
|
*/
|
|
1525
2292
|
interface Transcript$3 {
|
|
1526
2293
|
readonly text: string;
|
|
1527
2294
|
readonly isFinal: boolean;
|
|
1528
2295
|
readonly confidence: number;
|
|
2296
|
+
/** Optional event hint, e.g. `"SpeechStarted"` for barge-in signals. */
|
|
2297
|
+
readonly eventType?: string;
|
|
1529
2298
|
}
|
|
1530
2299
|
type TranscriptCallback$3 = (transcript: Transcript$3) => void;
|
|
1531
2300
|
type AssemblyAIEncoding = 'pcm_s16le' | 'pcm_mulaw';
|
|
1532
|
-
type AssemblyAIModel = 'universal-streaming-english' | 'universal-streaming-multilingual' | 'u3-rt-pro';
|
|
2301
|
+
type AssemblyAIModel = 'universal-streaming-english' | 'universal-streaming-multilingual' | 'u3-rt-pro' | 'whisper-rt';
|
|
2302
|
+
type AssemblyAIDomain = 'general' | 'medical-v1';
|
|
1533
2303
|
interface AssemblyAISTTOptions$1 {
|
|
1534
2304
|
/** One of the AssemblyAI speech models. */
|
|
1535
2305
|
readonly model?: AssemblyAIModel;
|
|
@@ -1539,6 +2309,11 @@ interface AssemblyAISTTOptions$1 {
|
|
|
1539
2309
|
readonly sampleRate?: number;
|
|
1540
2310
|
/** Override the streaming base URL (e.g. EU: `wss://streaming.eu.assemblyai.com`). */
|
|
1541
2311
|
readonly baseUrl?: string;
|
|
2312
|
+
/**
|
|
2313
|
+
* Authenticate via `?token=<apiKey>` in the URL instead of the
|
|
2314
|
+
* `Authorization` header. Default `false`.
|
|
2315
|
+
*/
|
|
2316
|
+
readonly useQueryToken?: boolean;
|
|
1542
2317
|
/** Enable automatic language detection (defaults: true for multilingual/u3-rt-pro). */
|
|
1543
2318
|
readonly languageDetection?: boolean;
|
|
1544
2319
|
/** 0..1 confidence required before end-of-turn is finalized. */
|
|
@@ -1553,34 +2328,54 @@ interface AssemblyAISTTOptions$1 {
|
|
|
1553
2328
|
readonly keytermsPrompt?: readonly string[];
|
|
1554
2329
|
/** Text prompt (u3-rt-pro only). */
|
|
1555
2330
|
readonly prompt?: string;
|
|
1556
|
-
/**
|
|
2331
|
+
/** Accepted for backward compatibility but NOT sent — not a valid v3 param. */
|
|
1557
2332
|
readonly vadThreshold?: number;
|
|
1558
2333
|
/** Enable diarization / speaker labels. */
|
|
1559
2334
|
readonly speakerLabels?: boolean;
|
|
1560
2335
|
/** Max speakers for diarization. */
|
|
1561
2336
|
readonly maxSpeakers?: number;
|
|
1562
|
-
/** Domain hint
|
|
1563
|
-
readonly domain?:
|
|
2337
|
+
/** Domain hint — must be `"general"` or `"medical-v1"`. */
|
|
2338
|
+
readonly domain?: AssemblyAIDomain;
|
|
1564
2339
|
}
|
|
1565
2340
|
declare class AssemblyAISTT {
|
|
1566
2341
|
private readonly apiKey;
|
|
1567
2342
|
private readonly options;
|
|
1568
2343
|
private ws;
|
|
1569
|
-
private callbacks;
|
|
2344
|
+
private readonly callbacks;
|
|
2345
|
+
private closing;
|
|
2346
|
+
private reconnectAttempts;
|
|
2347
|
+
private terminationResolve;
|
|
1570
2348
|
/** AssemblyAI session id — set when the `Begin` message arrives. */
|
|
1571
|
-
sessionId: string;
|
|
2349
|
+
sessionId: string | null;
|
|
1572
2350
|
/** Unix timestamp when the AssemblyAI session expires. */
|
|
1573
|
-
expiresAt: number;
|
|
2351
|
+
expiresAt: number | null;
|
|
1574
2352
|
constructor(apiKey: string, options?: AssemblyAISTTOptions$1);
|
|
1575
2353
|
/** Factory for Twilio calls — mulaw 8 kHz. */
|
|
1576
2354
|
static forTwilio(apiKey: string, model?: AssemblyAIModel): AssemblyAISTT;
|
|
1577
2355
|
private buildUrl;
|
|
2356
|
+
private buildHeaders;
|
|
1578
2357
|
connect(): Promise<void>;
|
|
2358
|
+
private awaitOpen;
|
|
2359
|
+
private attachHandlers;
|
|
2360
|
+
private reconnect;
|
|
1579
2361
|
private handleEvent;
|
|
1580
2362
|
private emit;
|
|
1581
2363
|
sendAudio(audio: Buffer): void;
|
|
1582
|
-
|
|
1583
|
-
|
|
2364
|
+
private estimateChunkDurationMs;
|
|
2365
|
+
/**
|
|
2366
|
+
* Send an `UpdateConfiguration` frame to change settings mid-stream.
|
|
2367
|
+
* Only defined fields are included.
|
|
2368
|
+
*/
|
|
2369
|
+
updateConfiguration(params: {
|
|
2370
|
+
keytermsPrompt?: readonly string[];
|
|
2371
|
+
prompt?: string;
|
|
2372
|
+
minTurnSilence?: number;
|
|
2373
|
+
maxTurnSilence?: number;
|
|
2374
|
+
}): void;
|
|
2375
|
+
/** Force the server to finalize the current turn (for barge-in). */
|
|
2376
|
+
forceEndpoint(): void;
|
|
2377
|
+
onTranscript(callback: TranscriptCallback$3): () => void;
|
|
2378
|
+
close(): Promise<void>;
|
|
1584
2379
|
}
|
|
1585
2380
|
|
|
1586
2381
|
/**
|
|
@@ -1620,8 +2415,11 @@ declare class CartesiaSTT {
|
|
|
1620
2415
|
private ws;
|
|
1621
2416
|
private callbacks;
|
|
1622
2417
|
private keepaliveTimer;
|
|
1623
|
-
/**
|
|
1624
|
-
|
|
2418
|
+
/**
|
|
2419
|
+
* Cartesia request id — set from the server transcript events.
|
|
2420
|
+
* `null` until the first transcript event arrives (matches Python's `None`).
|
|
2421
|
+
*/
|
|
2422
|
+
requestId: string | null;
|
|
1625
2423
|
constructor(apiKey: string, options?: CartesiaSTTOptions$1);
|
|
1626
2424
|
private buildWsUrl;
|
|
1627
2425
|
connect(): Promise<void>;
|
|
@@ -1629,7 +2427,23 @@ declare class CartesiaSTT {
|
|
|
1629
2427
|
private emit;
|
|
1630
2428
|
sendAudio(audio: Buffer): void;
|
|
1631
2429
|
onTranscript(callback: TranscriptCallback$2): void;
|
|
2430
|
+
/** Remove a previously registered transcript callback. */
|
|
2431
|
+
offTranscript(callback: TranscriptCallback$2): void;
|
|
2432
|
+
/**
|
|
2433
|
+
* Synchronous best-effort close. Sends `finalize` and closes the socket
|
|
2434
|
+
* without waiting for the server to flush any remaining transcripts.
|
|
2435
|
+
*
|
|
2436
|
+
* Limitation: any transcript events produced between the `finalize` send
|
|
2437
|
+
* and the socket close may be dropped. Callers that need to guarantee all
|
|
2438
|
+
* transcripts are delivered should await :meth:`closeAsync` instead.
|
|
2439
|
+
*/
|
|
1632
2440
|
close(): void;
|
|
2441
|
+
/**
|
|
2442
|
+
* Graceful close that awaits the `finalize` send and the socket closing
|
|
2443
|
+
* handshake, matching the Python adapter's behavior. Use this when you
|
|
2444
|
+
* need any in-flight transcripts to be flushed before teardown.
|
|
2445
|
+
*/
|
|
2446
|
+
closeAsync(): Promise<void>;
|
|
1633
2447
|
}
|
|
1634
2448
|
|
|
1635
2449
|
type LMNTAudioFormat = 'aac' | 'mp3' | 'mulaw' | 'raw' | 'wav';
|
|
@@ -1662,12 +2476,32 @@ declare class LMNTTTS {
|
|
|
1662
2476
|
synthesizeStream(text: string): AsyncGenerator<Buffer>;
|
|
1663
2477
|
}
|
|
1664
2478
|
|
|
2479
|
+
type TranscriptEventType = 'Results' | 'UtteranceEnd' | 'SpeechStarted';
|
|
2480
|
+
interface DeepgramWord {
|
|
2481
|
+
readonly word?: string;
|
|
2482
|
+
readonly start?: number;
|
|
2483
|
+
readonly end?: number;
|
|
2484
|
+
readonly confidence?: number;
|
|
2485
|
+
readonly punctuated_word?: string;
|
|
2486
|
+
readonly speaker?: number;
|
|
2487
|
+
}
|
|
1665
2488
|
interface Transcript$1 {
|
|
1666
2489
|
readonly text: string;
|
|
1667
2490
|
readonly isFinal: boolean;
|
|
1668
2491
|
readonly confidence: number;
|
|
2492
|
+
/** Deepgram VAD hint — faster end-of-utterance than ``isFinal``. */
|
|
2493
|
+
readonly speechFinal?: boolean;
|
|
2494
|
+
/** True when this Results frame was produced in response to a Finalize. */
|
|
2495
|
+
readonly fromFinalize?: boolean;
|
|
2496
|
+
/** Deepgram request id, populated from the initial Metadata frame. */
|
|
2497
|
+
readonly requestId?: string;
|
|
2498
|
+
/** Per-word timings/metadata when Deepgram emits them. */
|
|
2499
|
+
readonly words?: ReadonlyArray<DeepgramWord>;
|
|
2500
|
+
/** Which provider event this Transcript represents. Default ``Results``. */
|
|
2501
|
+
readonly eventType?: TranscriptEventType;
|
|
1669
2502
|
}
|
|
1670
2503
|
type TranscriptCallback$1 = (transcript: Transcript$1) => void;
|
|
2504
|
+
type ErrorCallback = (error: Error) => void;
|
|
1671
2505
|
/**
|
|
1672
2506
|
* Optional tuning knobs for Deepgram live transcription.
|
|
1673
2507
|
*
|
|
@@ -1692,7 +2526,13 @@ interface DeepgramSTTOptions$1 {
|
|
|
1692
2526
|
* hard minimum of 1000 ms. Set to ``null`` to disable. Default ``1000``.
|
|
1693
2527
|
*/
|
|
1694
2528
|
readonly utteranceEndMs?: number | null;
|
|
1695
|
-
/**
|
|
2529
|
+
/**
|
|
2530
|
+
* Enable smart formatting (punctuation + numerals). Default ``false`` —
|
|
2531
|
+
* smart formatting adds roughly 50–150 ms to TTFT on each final transcript
|
|
2532
|
+
* and is rarely useful for telephony pipelines that pass the text straight
|
|
2533
|
+
* to an LLM. Set to ``true`` for use cases (dashboards, raw transcripts)
|
|
2534
|
+
* where the formatted text is surfaced directly to humans.
|
|
2535
|
+
*/
|
|
1696
2536
|
readonly smartFormat?: boolean;
|
|
1697
2537
|
/** Emit interim (non-final) transcripts. Default ``true``. */
|
|
1698
2538
|
readonly interimResults?: boolean;
|
|
@@ -1701,7 +2541,11 @@ interface DeepgramSTTOptions$1 {
|
|
|
1701
2541
|
}
|
|
1702
2542
|
declare class DeepgramSTT {
|
|
1703
2543
|
private ws;
|
|
1704
|
-
private
|
|
2544
|
+
private readonly transcriptCallbacks;
|
|
2545
|
+
private readonly errorCallbacks;
|
|
2546
|
+
private keepaliveTimer;
|
|
2547
|
+
private running;
|
|
2548
|
+
private reconnectAttempted;
|
|
1705
2549
|
/** Request ID from Deepgram — used to query actual cost post-call. */
|
|
1706
2550
|
requestId: string;
|
|
1707
2551
|
private readonly apiKey;
|
|
@@ -1727,27 +2571,30 @@ declare class DeepgramSTT {
|
|
|
1727
2571
|
});
|
|
1728
2572
|
/** Factory for Twilio calls — mulaw 8 kHz. Forwards tuning options through. */
|
|
1729
2573
|
static forTwilio(apiKey: string, language?: string, model?: string, options?: DeepgramSTTOptions$1): DeepgramSTT;
|
|
2574
|
+
private buildUrl;
|
|
1730
2575
|
connect(): Promise<void>;
|
|
2576
|
+
private openSocket;
|
|
2577
|
+
private clearKeepalive;
|
|
2578
|
+
private handleMessage;
|
|
2579
|
+
private emitTranscript;
|
|
2580
|
+
private emitError;
|
|
2581
|
+
private handleError;
|
|
2582
|
+
private handleClose;
|
|
1731
2583
|
sendAudio(audio: Buffer): void;
|
|
1732
2584
|
onTranscript(callback: TranscriptCallback$1): void;
|
|
2585
|
+
offTranscript(callback: TranscriptCallback$1): void;
|
|
2586
|
+
onError(callback: ErrorCallback): void;
|
|
2587
|
+
offError(callback: ErrorCallback): void;
|
|
1733
2588
|
close(): void;
|
|
1734
2589
|
}
|
|
1735
2590
|
|
|
1736
2591
|
/** Deepgram streaming STT for Patter pipeline mode. */
|
|
1737
2592
|
|
|
1738
|
-
|
|
2593
|
+
type DeepgramSTTOptions = DeepgramSTTOptions$1 & {
|
|
1739
2594
|
/** API key. Falls back to DEEPGRAM_API_KEY env var when omitted. */
|
|
1740
2595
|
apiKey?: string;
|
|
1741
2596
|
language?: string;
|
|
1742
|
-
|
|
1743
|
-
encoding?: string;
|
|
1744
|
-
sampleRate?: number;
|
|
1745
|
-
endpointingMs?: number;
|
|
1746
|
-
utteranceEndMs?: number | null;
|
|
1747
|
-
smartFormat?: boolean;
|
|
1748
|
-
interimResults?: boolean;
|
|
1749
|
-
vadEvents?: boolean;
|
|
1750
|
-
}
|
|
2597
|
+
};
|
|
1751
2598
|
/**
|
|
1752
2599
|
* Deepgram streaming STT.
|
|
1753
2600
|
*
|
|
@@ -1758,7 +2605,8 @@ interface DeepgramSTTOptions {
|
|
|
1758
2605
|
* const stt = new deepgram.STT({ apiKey: "dg_...", endpointingMs: 80 });
|
|
1759
2606
|
* ```
|
|
1760
2607
|
*/
|
|
1761
|
-
declare class STT$
|
|
2608
|
+
declare class STT$5 extends DeepgramSTT {
|
|
2609
|
+
static readonly providerKey = "deepgram";
|
|
1762
2610
|
constructor(opts?: DeepgramSTTOptions);
|
|
1763
2611
|
}
|
|
1764
2612
|
|
|
@@ -1774,22 +2622,44 @@ interface Transcript {
|
|
|
1774
2622
|
readonly confidence: number;
|
|
1775
2623
|
}
|
|
1776
2624
|
type TranscriptCallback = (transcript: Transcript) => void;
|
|
2625
|
+
type WhisperResponseFormat = 'json' | 'verbose_json';
|
|
1777
2626
|
declare class WhisperSTT {
|
|
1778
2627
|
private readonly apiKey;
|
|
1779
2628
|
private readonly model;
|
|
1780
2629
|
private readonly language;
|
|
1781
2630
|
private readonly bufferSize;
|
|
1782
|
-
private
|
|
2631
|
+
private readonly responseFormat;
|
|
2632
|
+
private chunks;
|
|
2633
|
+
private bufferedBytes;
|
|
1783
2634
|
private callbacks;
|
|
1784
2635
|
private running;
|
|
1785
2636
|
private pendingTranscriptions;
|
|
1786
|
-
|
|
2637
|
+
/**
|
|
2638
|
+
* @param apiKey OpenAI API key.
|
|
2639
|
+
* @param language ISO-639-1 language code (e.g. ``"en"``, ``"it"``). Optional.
|
|
2640
|
+
* @param model One of ``whisper-1``, ``gpt-4o-transcribe``, ``gpt-4o-mini-transcribe``.
|
|
2641
|
+
* @param bufferSize Bytes of PCM16 to buffer before each transcription request.
|
|
2642
|
+
* @param responseFormat ``"json"`` (default) or ``"verbose_json"``.
|
|
2643
|
+
*
|
|
2644
|
+
* Argument order matches the Python SDK's ``WhisperSTT(api_key, language, model, response_format)``
|
|
2645
|
+
* for cross-language parity. Pre-0.5.3 the TS positional order was
|
|
2646
|
+
* ``(apiKey, model, language, bufferSize, responseFormat)`` — callers using
|
|
2647
|
+
* the old order will need to swap ``language`` and ``model``.
|
|
2648
|
+
*/
|
|
2649
|
+
constructor(apiKey: string, language?: string, model?: string, bufferSize?: number, responseFormat?: WhisperResponseFormat);
|
|
1787
2650
|
/** Factory for Twilio calls — mulaw 8 kHz is transcoded upstream, so we still receive PCM 16-bit. */
|
|
1788
2651
|
static forTwilio(apiKey: string, language?: string, model?: string): WhisperSTT;
|
|
1789
2652
|
connect(): Promise<void>;
|
|
1790
2653
|
sendAudio(audio: Buffer): void;
|
|
2654
|
+
private flushChunks;
|
|
1791
2655
|
private trackTranscription;
|
|
2656
|
+
/**
|
|
2657
|
+
* Register a transcript listener. Unlike the previous implementation
|
|
2658
|
+
* which capped at 10 and silently replaced the last one, we now keep all
|
|
2659
|
+
* registered callbacks in a Set; use {@link offTranscript} to remove one.
|
|
2660
|
+
*/
|
|
1792
2661
|
onTranscript(callback: TranscriptCallback): void;
|
|
2662
|
+
offTranscript(callback: TranscriptCallback): void;
|
|
1793
2663
|
close(): Promise<void>;
|
|
1794
2664
|
private transcribeBuffer;
|
|
1795
2665
|
}
|
|
@@ -1802,6 +2672,8 @@ interface WhisperSTTOptions {
|
|
|
1802
2672
|
model?: string;
|
|
1803
2673
|
language?: string;
|
|
1804
2674
|
bufferSize?: number;
|
|
2675
|
+
/** ``"verbose_json"`` exposes segment-level confidence / timestamps. */
|
|
2676
|
+
responseFormat?: WhisperResponseFormat;
|
|
1805
2677
|
}
|
|
1806
2678
|
/**
|
|
1807
2679
|
* OpenAI Whisper STT.
|
|
@@ -1813,10 +2685,68 @@ interface WhisperSTTOptions {
|
|
|
1813
2685
|
* const stt = new whisper.STT({ apiKey: "sk-...", language: "en" });
|
|
1814
2686
|
* ```
|
|
1815
2687
|
*/
|
|
1816
|
-
declare class STT$
|
|
2688
|
+
declare class STT$4 extends WhisperSTT {
|
|
2689
|
+
static readonly providerKey = "whisper";
|
|
1817
2690
|
constructor(opts?: WhisperSTTOptions);
|
|
1818
2691
|
}
|
|
1819
2692
|
|
|
2693
|
+
/**
|
|
2694
|
+
* OpenAI GPT-4o Transcribe STT adapter for the Patter SDK pipeline mode.
|
|
2695
|
+
*
|
|
2696
|
+
* First-class wrapper around OpenAI's ``gpt-4o-transcribe`` /
|
|
2697
|
+
* ``gpt-4o-mini-transcribe`` models. They share the
|
|
2698
|
+
* ``POST /v1/audio/transcriptions`` endpoint with Whisper-1 but offer ~10x
|
|
2699
|
+
* lower latency and stronger multilingual quality, making them a drop-in
|
|
2700
|
+
* replacement for ``WhisperSTT`` whenever speed matters.
|
|
2701
|
+
*
|
|
2702
|
+
* Use this class instead of ``WhisperSTT`` when you specifically want the
|
|
2703
|
+
* GPT-4o Transcribe family — it restricts the accepted models so
|
|
2704
|
+
* misconfigured calls fail fast instead of silently dropping back to
|
|
2705
|
+
* ``whisper-1``.
|
|
2706
|
+
*/
|
|
2707
|
+
|
|
2708
|
+
declare class OpenAITranscribeSTT extends WhisperSTT {
|
|
2709
|
+
/**
|
|
2710
|
+
* @param apiKey OpenAI API key.
|
|
2711
|
+
* @param language ISO-639-1 language code (e.g. ``"en"``, ``"it"``). Optional.
|
|
2712
|
+
* @param model One of ``gpt-4o-transcribe`` (default), ``gpt-4o-mini-transcribe``.
|
|
2713
|
+
* ``"whisper-1"`` is intentionally rejected here — use ``WhisperSTT`` for that.
|
|
2714
|
+
* @param bufferSize Bytes of PCM16 to buffer before each transcription request.
|
|
2715
|
+
* @param responseFormat ``"json"`` (default) or ``"verbose_json"``.
|
|
2716
|
+
*/
|
|
2717
|
+
constructor(apiKey: string, language?: string, model?: string, bufferSize?: number, responseFormat?: WhisperResponseFormat);
|
|
2718
|
+
}
|
|
2719
|
+
|
|
2720
|
+
/** OpenAI GPT-4o Transcribe STT for Patter pipeline mode. */
|
|
2721
|
+
|
|
2722
|
+
interface OpenAITranscribeSTTOptions {
|
|
2723
|
+
/** API key. Falls back to OPENAI_API_KEY env var when omitted. */
|
|
2724
|
+
apiKey?: string;
|
|
2725
|
+
/** ``gpt-4o-transcribe`` (default) or ``gpt-4o-mini-transcribe``. */
|
|
2726
|
+
model?: string;
|
|
2727
|
+
language?: string;
|
|
2728
|
+
bufferSize?: number;
|
|
2729
|
+
/** ``"verbose_json"`` exposes segment-level confidence / timestamps. */
|
|
2730
|
+
responseFormat?: WhisperResponseFormat;
|
|
2731
|
+
}
|
|
2732
|
+
/**
|
|
2733
|
+
* OpenAI GPT-4o Transcribe STT — ~10x faster than Whisper-1.
|
|
2734
|
+
*
|
|
2735
|
+
* Drop-in replacement for ``whisper.STT`` with stronger multilingual
|
|
2736
|
+
* quality and significantly lower latency.
|
|
2737
|
+
*
|
|
2738
|
+
* @example
|
|
2739
|
+
* ```ts
|
|
2740
|
+
* import * as openaiTranscribe from "getpatter/stt/openai-transcribe";
|
|
2741
|
+
* const stt = new openaiTranscribe.STT(); // reads OPENAI_API_KEY
|
|
2742
|
+
* const stt = new openaiTranscribe.STT({ apiKey: "sk-...", language: "en" });
|
|
2743
|
+
* ```
|
|
2744
|
+
*/
|
|
2745
|
+
declare class STT$3 extends OpenAITranscribeSTT {
|
|
2746
|
+
static readonly providerKey = "openai_transcribe";
|
|
2747
|
+
constructor(opts?: OpenAITranscribeSTTOptions);
|
|
2748
|
+
}
|
|
2749
|
+
|
|
1820
2750
|
/** Cartesia streaming STT for Patter pipeline mode. */
|
|
1821
2751
|
|
|
1822
2752
|
interface CartesiaSTTOptions {
|
|
@@ -1839,6 +2769,7 @@ interface CartesiaSTTOptions {
|
|
|
1839
2769
|
* ```
|
|
1840
2770
|
*/
|
|
1841
2771
|
declare class STT$2 extends CartesiaSTT {
|
|
2772
|
+
static readonly providerKey = "cartesia_stt";
|
|
1842
2773
|
constructor(opts?: CartesiaSTTOptions);
|
|
1843
2774
|
}
|
|
1844
2775
|
|
|
@@ -1869,6 +2800,7 @@ interface SonioxSTTOptions {
|
|
|
1869
2800
|
* ```
|
|
1870
2801
|
*/
|
|
1871
2802
|
declare class STT$1 extends SonioxSTT {
|
|
2803
|
+
static readonly providerKey = "soniox";
|
|
1872
2804
|
constructor(opts?: SonioxSTTOptions);
|
|
1873
2805
|
}
|
|
1874
2806
|
|
|
@@ -1891,7 +2823,7 @@ interface AssemblyAISTTOptions {
|
|
|
1891
2823
|
vadThreshold?: number;
|
|
1892
2824
|
speakerLabels?: boolean;
|
|
1893
2825
|
maxSpeakers?: number;
|
|
1894
|
-
domain?:
|
|
2826
|
+
domain?: AssemblyAIDomain;
|
|
1895
2827
|
}
|
|
1896
2828
|
/**
|
|
1897
2829
|
* AssemblyAI Universal Streaming STT.
|
|
@@ -1904,15 +2836,103 @@ interface AssemblyAISTTOptions {
|
|
|
1904
2836
|
* ```
|
|
1905
2837
|
*/
|
|
1906
2838
|
declare class STT extends AssemblyAISTT {
|
|
2839
|
+
static readonly providerKey = "assemblyai";
|
|
1907
2840
|
constructor(opts?: AssemblyAISTTOptions);
|
|
1908
2841
|
}
|
|
1909
2842
|
|
|
2843
|
+
/**
|
|
2844
|
+
* Known stable ElevenLabs voice models (from the official ElevenLabs API
|
|
2845
|
+
* reference). Provided as a string-literal union for autocomplete + type
|
|
2846
|
+
* narrowing; the public ``modelId`` option also accepts ``string`` so
|
|
2847
|
+
* users can pass forward-compat IDs we haven't enumerated yet.
|
|
2848
|
+
*
|
|
2849
|
+
* - ``eleven_v3`` — newest, highest quality (slower TTFT than Flash).
|
|
2850
|
+
* - ``eleven_flash_v2_5`` — current default, fastest (~75 ms TTFT).
|
|
2851
|
+
* - ``eleven_turbo_v2_5`` — balanced quality/speed.
|
|
2852
|
+
* - ``eleven_multilingual_v2`` — best multilingual support.
|
|
2853
|
+
* - ``eleven_monolingual_v1`` — legacy English-only.
|
|
2854
|
+
*/
|
|
2855
|
+
type ElevenLabsModel = 'eleven_v3' | 'eleven_flash_v2_5' | 'eleven_turbo_v2_5' | 'eleven_multilingual_v2' | 'eleven_monolingual_v1';
|
|
2856
|
+
type ElevenLabsOutputFormat = 'mp3_22050_32' | 'mp3_44100_32' | 'mp3_44100_64' | 'mp3_44100_96' | 'mp3_44100_128' | 'mp3_44100_192' | 'pcm_8000' | 'pcm_16000' | 'pcm_22050' | 'pcm_24000' | 'pcm_44100' | 'ulaw_8000';
|
|
2857
|
+
interface ElevenLabsVoiceSettings {
|
|
2858
|
+
stability?: number;
|
|
2859
|
+
similarity_boost?: number;
|
|
2860
|
+
style?: number;
|
|
2861
|
+
use_speaker_boost?: boolean;
|
|
2862
|
+
}
|
|
2863
|
+
interface ElevenLabsTTSOptions$1 {
|
|
2864
|
+
voiceId?: string;
|
|
2865
|
+
/**
|
|
2866
|
+
* ElevenLabs voice model ID. The default ``eleven_flash_v2_5`` has the
|
|
2867
|
+
* lowest TTFT (~75 ms). Pass ``eleven_v3`` for highest quality, or any
|
|
2868
|
+
* arbitrary string for forward-compat with future models.
|
|
2869
|
+
*/
|
|
2870
|
+
modelId?: ElevenLabsModel | string;
|
|
2871
|
+
outputFormat?: ElevenLabsOutputFormat;
|
|
2872
|
+
voiceSettings?: ElevenLabsVoiceSettings;
|
|
2873
|
+
languageCode?: string;
|
|
2874
|
+
chunkSize?: number;
|
|
2875
|
+
}
|
|
2876
|
+
/**
|
|
2877
|
+
* ElevenLabs streaming TTS adapter.
|
|
2878
|
+
*
|
|
2879
|
+
* Supported `modelId` values are autocompleted via {@link ElevenLabsModel}.
|
|
2880
|
+
* Default is `eleven_flash_v2_5` (lowest TTFT, ~75 ms).
|
|
2881
|
+
*
|
|
2882
|
+
* **Telephony optimization** — the constructor default
|
|
2883
|
+
* `outputFormat='pcm_16000'` is correct for web playback, dashboard
|
|
2884
|
+
* previews, and 16 kHz pipelines. For real phone calls, use the
|
|
2885
|
+
* carrier-specific factories instead:
|
|
2886
|
+
*
|
|
2887
|
+
* - {@link ElevenLabsTTS.forTwilio} emits `ulaw_8000` natively. Twilio's
|
|
2888
|
+
* media-stream WebSocket expects μ-law @ 8 kHz, so the SDK normally
|
|
2889
|
+
* resamples 16 kHz → 8 kHz and PCM → μ-law before sending. Asking
|
|
2890
|
+
* ElevenLabs to produce μ-law directly skips that step (saves
|
|
2891
|
+
* ~30–80 ms first-byte plus per-frame CPU and avoids any resampling
|
|
2892
|
+
* aliasing).
|
|
2893
|
+
* - {@link ElevenLabsTTS.forTelnyx} emits `pcm_16000`. Telnyx negotiates
|
|
2894
|
+
* L16/16000 on its bidirectional media WebSocket, so 16 kHz PCM is
|
|
2895
|
+
* already the format used end-to-end and no transcoding happens.
|
|
2896
|
+
* ElevenLabs *also* supports `ulaw_8000` if your Telnyx profile is
|
|
2897
|
+
* pinned to PCMU/8000 — pass `outputFormat: 'ulaw_8000'` explicitly
|
|
2898
|
+
* in that case.
|
|
2899
|
+
*/
|
|
1910
2900
|
declare class ElevenLabsTTS {
|
|
1911
2901
|
private readonly apiKey;
|
|
2902
|
+
private readonly voiceId;
|
|
1912
2903
|
private readonly modelId;
|
|
1913
2904
|
private readonly outputFormat;
|
|
1914
|
-
private readonly
|
|
1915
|
-
|
|
2905
|
+
private readonly voiceSettings;
|
|
2906
|
+
private readonly languageCode;
|
|
2907
|
+
private readonly chunkSize;
|
|
2908
|
+
constructor(apiKey: string, voiceId?: string, modelId?: string, outputFormat?: ElevenLabsOutputFormat | string);
|
|
2909
|
+
constructor(apiKey: string, options: ElevenLabsTTSOptions$1);
|
|
2910
|
+
/**
|
|
2911
|
+
* Construct an instance pre-configured for Twilio Media Streams.
|
|
2912
|
+
*
|
|
2913
|
+
* Sets `outputFormat='ulaw_8000'` so ElevenLabs emits μ-law @ 8 kHz
|
|
2914
|
+
* directly — the exact wire format Twilio's media stream uses — letting
|
|
2915
|
+
* the SDK skip the 16 kHz→8 kHz resample and PCM→μ-law conversion in
|
|
2916
|
+
* `TwilioAudioSender`. Saves ~30–80 ms first-byte and per-frame CPU,
|
|
2917
|
+
* and removes a potential aliasing source.
|
|
2918
|
+
*
|
|
2919
|
+
* `voiceSettings` defaults to a low-bandwidth-friendly profile
|
|
2920
|
+
* (speaker boost off, modest stability) which sounds cleaner at 8 kHz
|
|
2921
|
+
* μ-law than the studio default. Pass an explicit object to override.
|
|
2922
|
+
*/
|
|
2923
|
+
static forTwilio(apiKey: string, options?: Omit<ElevenLabsTTSOptions$1, 'outputFormat'>): ElevenLabsTTS;
|
|
2924
|
+
/**
|
|
2925
|
+
* Construct an instance pre-configured for Telnyx bidirectional media.
|
|
2926
|
+
*
|
|
2927
|
+
* Telnyx's default media-streaming codec is L16 PCM @ 16 kHz, which
|
|
2928
|
+
* matches our default Telnyx handler. We pick `pcm_16000` so the audio
|
|
2929
|
+
* flows end-to-end with zero resampling or transcoding.
|
|
2930
|
+
*
|
|
2931
|
+
* Trade-off: if your Telnyx profile is pinned to PCMU/8000 (μ-law),
|
|
2932
|
+
* construct `ElevenLabsTTS` directly with `outputFormat: 'ulaw_8000'`
|
|
2933
|
+
* — Telnyx supports that natively too.
|
|
2934
|
+
*/
|
|
2935
|
+
static forTelnyx(apiKey: string, options?: Omit<ElevenLabsTTSOptions$1, 'outputFormat'>): ElevenLabsTTS;
|
|
1916
2936
|
/**
|
|
1917
2937
|
* Synthesise text to speech and return the full audio as a single Buffer.
|
|
1918
2938
|
*
|
|
@@ -1923,7 +2943,8 @@ declare class ElevenLabsTTS {
|
|
|
1923
2943
|
* Synthesise text and yield audio chunks as they arrive (streaming).
|
|
1924
2944
|
*
|
|
1925
2945
|
* The yielded buffers are raw PCM at 16 kHz (or whatever `outputFormat` is
|
|
1926
|
-
* configured to).
|
|
2946
|
+
* configured to). `chunkSize` controls the maximum yield size — 512 is a
|
|
2947
|
+
* good choice for low-latency telephony.
|
|
1927
2948
|
*/
|
|
1928
2949
|
synthesizeStream(text: string): AsyncGenerator<Buffer>;
|
|
1929
2950
|
}
|
|
@@ -1934,9 +2955,15 @@ interface ElevenLabsTTSOptions {
|
|
|
1934
2955
|
/** API key. Falls back to ELEVENLABS_API_KEY env var when omitted. */
|
|
1935
2956
|
apiKey?: string;
|
|
1936
2957
|
voiceId?: string;
|
|
1937
|
-
|
|
2958
|
+
/**
|
|
2959
|
+
* ElevenLabs voice model ID. Default is ``eleven_flash_v2_5`` (lowest TTFT).
|
|
2960
|
+
* Pass ``eleven_v3`` for highest quality, or any string for forward-compat.
|
|
2961
|
+
*/
|
|
2962
|
+
modelId?: ElevenLabsModel | string;
|
|
1938
2963
|
outputFormat?: string;
|
|
1939
2964
|
}
|
|
2965
|
+
/** Options for the carrier-specific factories — same as the constructor minus `outputFormat`. */
|
|
2966
|
+
type ElevenLabsCarrierOptions = Omit<ElevenLabsTTSOptions, "outputFormat">;
|
|
1940
2967
|
/**
|
|
1941
2968
|
* ElevenLabs TTS.
|
|
1942
2969
|
*
|
|
@@ -1946,16 +2973,31 @@ interface ElevenLabsTTSOptions {
|
|
|
1946
2973
|
* const tts = new elevenlabs.TTS(); // reads ELEVENLABS_API_KEY
|
|
1947
2974
|
* const tts = new elevenlabs.TTS({ apiKey: "...", voiceId: "rachel" });
|
|
1948
2975
|
* ```
|
|
2976
|
+
*
|
|
2977
|
+
* **Telephony optimization** — use {@link TTS.forTwilio} (μ-law @ 8 kHz,
|
|
2978
|
+
* native Twilio Media Streams format) or {@link TTS.forTelnyx} (PCM @
|
|
2979
|
+
* 16 kHz, native Telnyx default) on phone calls to skip the SDK-side
|
|
2980
|
+
* resampling / transcoding step.
|
|
1949
2981
|
*/
|
|
1950
2982
|
declare class TTS$4 extends ElevenLabsTTS {
|
|
2983
|
+
static readonly providerKey = "elevenlabs";
|
|
1951
2984
|
constructor(opts?: ElevenLabsTTSOptions);
|
|
2985
|
+
/** Pipeline TTS pre-configured for Twilio Media Streams (`ulaw_8000`). */
|
|
2986
|
+
static forTwilio(opts?: ElevenLabsCarrierOptions): TTS$4;
|
|
2987
|
+
static forTwilio(apiKey: string, options?: Omit<ElevenLabsTTSOptions, "outputFormat">): TTS$4;
|
|
2988
|
+
/** Pipeline TTS pre-configured for Telnyx (`pcm_16000`). */
|
|
2989
|
+
static forTelnyx(opts?: ElevenLabsCarrierOptions): TTS$4;
|
|
2990
|
+
static forTelnyx(apiKey: string, options?: Omit<ElevenLabsTTSOptions, "outputFormat">): TTS$4;
|
|
1952
2991
|
}
|
|
1953
2992
|
|
|
1954
2993
|
declare class OpenAITTS {
|
|
1955
2994
|
private readonly apiKey;
|
|
1956
2995
|
private readonly voice;
|
|
1957
2996
|
private readonly model;
|
|
1958
|
-
|
|
2997
|
+
private readonly instructions;
|
|
2998
|
+
private readonly speed;
|
|
2999
|
+
private readonly antiAlias;
|
|
3000
|
+
constructor(apiKey: string, voice?: string, model?: string, instructions?: string | null, speed?: number | null, antiAlias?: boolean);
|
|
1959
3001
|
/**
|
|
1960
3002
|
* Synthesise text to speech and return the full audio as a single Buffer.
|
|
1961
3003
|
*
|
|
@@ -1965,26 +3007,36 @@ declare class OpenAITTS {
|
|
|
1965
3007
|
/**
|
|
1966
3008
|
* Synthesise text and yield audio chunks as they arrive (streaming).
|
|
1967
3009
|
*
|
|
1968
|
-
* OpenAI returns 24 kHz PCM16; each chunk is
|
|
1969
|
-
* yielding so the output is ready for
|
|
3010
|
+
* OpenAI returns 24 kHz PCM16; each chunk is lowpass-filtered then
|
|
3011
|
+
* decimated 3:2 to 16 kHz before yielding so the output is ready for
|
|
3012
|
+
* telephony pipelines.
|
|
1970
3013
|
*
|
|
1971
|
-
* The resampler carries state (buffered samples + odd
|
|
1972
|
-
* between chunks
|
|
1973
|
-
*
|
|
1974
|
-
* Python `audioop.ratecv` fix).
|
|
3014
|
+
* The resampler carries state (filter memory + buffered samples + odd
|
|
3015
|
+
* trailing byte) between chunks so cross-chunk sample alignment and
|
|
3016
|
+
* filter phase don't reset on every network read.
|
|
1975
3017
|
*/
|
|
1976
3018
|
synthesizeStream(text: string): AsyncGenerator<Buffer>;
|
|
1977
3019
|
/**
|
|
1978
|
-
* Streaming 24 kHz → 16 kHz resampler (PCM16-LE).
|
|
1979
|
-
*
|
|
3020
|
+
* Streaming 24 kHz → 16 kHz resampler (PCM16-LE). Applies a single-pole
|
|
3021
|
+
* lowpass ahead of the 3:2 decimation and carries filter + sample state
|
|
3022
|
+
* across chunks so the cadence doesn't reset at every network read.
|
|
3023
|
+
*
|
|
3024
|
+
* ``ctx.lpfEnabled`` (default true on the streaming path, false for the
|
|
3025
|
+
* legacy static helper) controls whether the LPF is engaged — we keep
|
|
3026
|
+
* the helper bit-exact for the downsample-only tests while the real
|
|
3027
|
+
* streaming path gets anti-alias filtering.
|
|
1980
3028
|
*/
|
|
1981
|
-
static resampleStreaming(audio: Buffer, ctx:
|
|
1982
|
-
carryByte: number | null;
|
|
1983
|
-
leftover: number[];
|
|
1984
|
-
}): Buffer;
|
|
3029
|
+
static resampleStreaming(audio: Buffer, ctx: ResampleCtx): Buffer;
|
|
1985
3030
|
/** @deprecated use {@link resampleStreaming} with persistent state. */
|
|
1986
3031
|
static resample24kTo16k(audio: Buffer): Buffer;
|
|
1987
3032
|
}
|
|
3033
|
+
interface ResampleCtx {
|
|
3034
|
+
carryByte: number | null;
|
|
3035
|
+
leftover: number[];
|
|
3036
|
+
lpfPrev: number;
|
|
3037
|
+
/** Enable the single-pole lowpass ahead of decimation. Default true. */
|
|
3038
|
+
lpfEnabled?: boolean;
|
|
3039
|
+
}
|
|
1988
3040
|
|
|
1989
3041
|
/** OpenAI TTS for Patter pipeline mode. */
|
|
1990
3042
|
|
|
@@ -1993,6 +3045,16 @@ interface OpenAITTSOptions {
|
|
|
1993
3045
|
apiKey?: string;
|
|
1994
3046
|
voice?: string;
|
|
1995
3047
|
model?: string;
|
|
3048
|
+
/** Voice-direction prompt (only honoured for gpt-4o-mini-tts and newer). */
|
|
3049
|
+
instructions?: string;
|
|
3050
|
+
/** Speech speed multiplier, must be in [0.25, 4.0] when set. */
|
|
3051
|
+
speed?: number;
|
|
3052
|
+
/**
|
|
3053
|
+
* Enable anti-aliasing LPF ahead of the 3:2 decimation. Defaults to
|
|
3054
|
+
* ``false`` for backwards-compatibility; set to ``true`` for cleaner
|
|
3055
|
+
* audio on sibilants / fricatives.
|
|
3056
|
+
*/
|
|
3057
|
+
antiAlias?: boolean;
|
|
1996
3058
|
}
|
|
1997
3059
|
/**
|
|
1998
3060
|
* OpenAI TTS.
|
|
@@ -2005,6 +3067,7 @@ interface OpenAITTSOptions {
|
|
|
2005
3067
|
* ```
|
|
2006
3068
|
*/
|
|
2007
3069
|
declare class TTS$3 extends OpenAITTS {
|
|
3070
|
+
static readonly providerKey = "openai_tts";
|
|
2008
3071
|
constructor(opts?: OpenAITTSOptions);
|
|
2009
3072
|
}
|
|
2010
3073
|
|
|
@@ -2031,6 +3094,25 @@ declare class CartesiaTTS {
|
|
|
2031
3094
|
private readonly baseUrl;
|
|
2032
3095
|
private readonly apiVersion;
|
|
2033
3096
|
constructor(apiKey: string, opts?: CartesiaTTSOptions$1);
|
|
3097
|
+
/**
|
|
3098
|
+
* Construct an instance pre-configured for Twilio Media Streams.
|
|
3099
|
+
*
|
|
3100
|
+
* Sets `sampleRate=8000` so Cartesia emits PCM_S16LE @ 8 kHz directly.
|
|
3101
|
+
* Twilio's media stream uses μ-law @ 8 kHz so the SDK still does the
|
|
3102
|
+
* PCM → μ-law transcode client-side, but the 16 kHz → 8 kHz resample
|
|
3103
|
+
* step is skipped. Saves ~10–30 ms first-byte plus per-frame CPU and
|
|
3104
|
+
* removes a potential aliasing source.
|
|
3105
|
+
*/
|
|
3106
|
+
static forTwilio(apiKey: string, options?: Omit<CartesiaTTSOptions$1, 'sampleRate'>): CartesiaTTS;
|
|
3107
|
+
/**
|
|
3108
|
+
* Construct an instance pre-configured for Telnyx bidirectional media.
|
|
3109
|
+
*
|
|
3110
|
+
* Sets `sampleRate=16000` to match Telnyx's L16/16000 default codec —
|
|
3111
|
+
* audio flows end-to-end with zero resampling or transcoding. Same as
|
|
3112
|
+
* the bare-constructor default; exists for API symmetry with
|
|
3113
|
+
* {@link CartesiaTTS.forTwilio}.
|
|
3114
|
+
*/
|
|
3115
|
+
static forTelnyx(apiKey: string, options?: Omit<CartesiaTTSOptions$1, 'sampleRate'>): CartesiaTTS;
|
|
2034
3116
|
/** Build the JSON payload for the Cartesia bytes endpoint. */
|
|
2035
3117
|
private buildPayload;
|
|
2036
3118
|
/** Synthesize text and return the concatenated audio buffer. */
|
|
@@ -2057,8 +3139,14 @@ interface CartesiaTTSOptions {
|
|
|
2057
3139
|
baseUrl?: string;
|
|
2058
3140
|
apiVersion?: string;
|
|
2059
3141
|
}
|
|
3142
|
+
/** Options for the carrier-specific factories — same as the constructor minus `sampleRate`. */
|
|
3143
|
+
type CartesiaCarrierOptions = Omit<CartesiaTTSOptions, "sampleRate">;
|
|
2060
3144
|
/**
|
|
2061
|
-
* Cartesia TTS (sonic-
|
|
3145
|
+
* Cartesia TTS (sonic-3 GA, ~90 ms TTFB).
|
|
3146
|
+
*
|
|
3147
|
+
* The default model is `sonic-3` — Cartesia's current GA model. Voice IDs
|
|
3148
|
+
* from the previous `sonic-2` family (including the default Katie voice)
|
|
3149
|
+
* remain compatible.
|
|
2062
3150
|
*
|
|
2063
3151
|
* @example
|
|
2064
3152
|
* ```ts
|
|
@@ -2066,9 +3154,21 @@ interface CartesiaTTSOptions {
|
|
|
2066
3154
|
* const tts = new cartesia.TTS(); // reads CARTESIA_API_KEY
|
|
2067
3155
|
* const tts = new cartesia.TTS({ apiKey: "..." });
|
|
2068
3156
|
* ```
|
|
3157
|
+
*
|
|
3158
|
+
* **Telephony optimization** — use {@link TTS.forTwilio} (PCM @ 8 kHz,
|
|
3159
|
+
* skipping the SDK-side 16 kHz → 8 kHz resample before μ-law transcoding)
|
|
3160
|
+
* or {@link TTS.forTelnyx} (PCM @ 16 kHz, native Telnyx default) on
|
|
3161
|
+
* phone calls.
|
|
2069
3162
|
*/
|
|
2070
3163
|
declare class TTS$2 extends CartesiaTTS {
|
|
3164
|
+
static readonly providerKey = "cartesia_tts";
|
|
2071
3165
|
constructor(opts?: CartesiaTTSOptions);
|
|
3166
|
+
/** Pipeline TTS pre-configured for Twilio Media Streams (PCM @ 8 kHz). */
|
|
3167
|
+
static forTwilio(opts?: CartesiaCarrierOptions): TTS$2;
|
|
3168
|
+
static forTwilio(apiKey: string, options?: Omit<CartesiaTTSOptions, "sampleRate">): TTS$2;
|
|
3169
|
+
/** Pipeline TTS pre-configured for Telnyx (PCM @ 16 kHz). */
|
|
3170
|
+
static forTelnyx(opts?: CartesiaCarrierOptions): TTS$2;
|
|
3171
|
+
static forTelnyx(apiKey: string, options?: Omit<CartesiaTTSOptions, "sampleRate">): TTS$2;
|
|
2072
3172
|
}
|
|
2073
3173
|
|
|
2074
3174
|
interface RimeTTSOptions$1 {
|
|
@@ -2142,6 +3242,7 @@ interface RimeTTSOptions {
|
|
|
2142
3242
|
* ```
|
|
2143
3243
|
*/
|
|
2144
3244
|
declare class TTS$1 extends RimeTTS {
|
|
3245
|
+
static readonly providerKey = "rime";
|
|
2145
3246
|
constructor(opts?: RimeTTSOptions);
|
|
2146
3247
|
}
|
|
2147
3248
|
|
|
@@ -2170,6 +3271,7 @@ interface LMNTTTSOptions {
|
|
|
2170
3271
|
* ```
|
|
2171
3272
|
*/
|
|
2172
3273
|
declare class TTS extends LMNTTTS {
|
|
3274
|
+
static readonly providerKey = "lmnt";
|
|
2173
3275
|
constructor(opts?: LMNTTTSOptions);
|
|
2174
3276
|
}
|
|
2175
3277
|
|
|
@@ -2180,6 +3282,26 @@ interface OpenAILLMOptions {
|
|
|
2180
3282
|
apiKey?: string;
|
|
2181
3283
|
/** Chat Completions model id. Defaults to ``"gpt-4o-mini"``. */
|
|
2182
3284
|
model?: string;
|
|
3285
|
+
/** Sampling temperature [0, 2]. */
|
|
3286
|
+
temperature?: number;
|
|
3287
|
+
/** Max tokens in the assistant response (sent as ``max_completion_tokens``). */
|
|
3288
|
+
maxTokens?: number;
|
|
3289
|
+
/** OpenAI-style ``response_format`` for JSON mode / structured outputs. */
|
|
3290
|
+
responseFormat?: Record<string, unknown>;
|
|
3291
|
+
/** Whether to allow parallel tool calls. */
|
|
3292
|
+
parallelToolCalls?: boolean;
|
|
3293
|
+
/** ``"auto" | "none" | "required"`` or a specific tool object. */
|
|
3294
|
+
toolChoice?: string | Record<string, unknown>;
|
|
3295
|
+
/** Sampling seed for reproducible outputs. */
|
|
3296
|
+
seed?: number;
|
|
3297
|
+
/** Nucleus sampling cutoff in [0, 1]. */
|
|
3298
|
+
topP?: number;
|
|
3299
|
+
/** Penalty in [-2, 2] applied to repeated tokens. */
|
|
3300
|
+
frequencyPenalty?: number;
|
|
3301
|
+
/** Penalty in [-2, 2] applied to seen tokens. */
|
|
3302
|
+
presencePenalty?: number;
|
|
3303
|
+
/** Stop sequence(s). */
|
|
3304
|
+
stop?: string | string[];
|
|
2183
3305
|
}
|
|
2184
3306
|
/**
|
|
2185
3307
|
* OpenAI Chat Completions LLM provider.
|
|
@@ -2188,10 +3310,11 @@ interface OpenAILLMOptions {
|
|
|
2188
3310
|
* ```ts
|
|
2189
3311
|
* import * as openai from "getpatter/llm/openai";
|
|
2190
3312
|
* const llm = new openai.LLM(); // reads OPENAI_API_KEY
|
|
2191
|
-
* const llm = new openai.LLM({ apiKey: "sk-...", model: "gpt-4o-mini" });
|
|
3313
|
+
* const llm = new openai.LLM({ apiKey: "sk-...", model: "gpt-4o-mini", temperature: 0.4 });
|
|
2192
3314
|
* ```
|
|
2193
3315
|
*/
|
|
2194
3316
|
declare class LLM$4 extends OpenAILLMProvider {
|
|
3317
|
+
static readonly providerKey = "openai";
|
|
2195
3318
|
constructor(opts?: OpenAILLMOptions);
|
|
2196
3319
|
}
|
|
2197
3320
|
|
|
@@ -2230,6 +3353,19 @@ interface AnthropicLLMOptions$1 {
|
|
|
2230
3353
|
temperature?: number;
|
|
2231
3354
|
baseUrl?: string;
|
|
2232
3355
|
anthropicVersion?: string;
|
|
3356
|
+
/**
|
|
3357
|
+
* Enable Anthropic prompt caching for the system prompt and tools.
|
|
3358
|
+
* Defaults to ``true`` — for voice agents with long instruction-dense
|
|
3359
|
+
* system prompts, the cache saves ~100-400 ms TTFT and ~90% of input-
|
|
3360
|
+
* token cost on every cached turn. The cache lives ~5 minutes; the
|
|
3361
|
+
* first request writes it, subsequent requests within that window
|
|
3362
|
+
* hit it.
|
|
3363
|
+
*
|
|
3364
|
+
* Disable when the system prompt + tools combined are smaller than
|
|
3365
|
+
* Anthropic's minimum cacheable size (~1024 tokens for Sonnet/Opus,
|
|
3366
|
+
* ~2048 for Haiku) — caching has no effect below that threshold.
|
|
3367
|
+
*/
|
|
3368
|
+
promptCaching?: boolean;
|
|
2233
3369
|
}
|
|
2234
3370
|
/** LLM provider backed by Anthropic's Messages API (streaming). */
|
|
2235
3371
|
declare class AnthropicLLMProvider implements LLMProvider {
|
|
@@ -2239,6 +3375,7 @@ declare class AnthropicLLMProvider implements LLMProvider {
|
|
|
2239
3375
|
private readonly temperature?;
|
|
2240
3376
|
private readonly url;
|
|
2241
3377
|
private readonly anthropicVersion;
|
|
3378
|
+
private readonly promptCaching;
|
|
2242
3379
|
constructor(options: AnthropicLLMOptions$1);
|
|
2243
3380
|
stream(messages: Array<Record<string, unknown>>, tools?: Array<Record<string, unknown>> | null): AsyncGenerator<LLMChunk, void, unknown>;
|
|
2244
3381
|
}
|
|
@@ -2248,7 +3385,7 @@ declare class AnthropicLLMProvider implements LLMProvider {
|
|
|
2248
3385
|
interface AnthropicLLMOptions {
|
|
2249
3386
|
/** API key. Falls back to ANTHROPIC_API_KEY env var when omitted. */
|
|
2250
3387
|
apiKey?: string;
|
|
2251
|
-
/** Anthropic Messages API model id (e.g. ``"claude-
|
|
3388
|
+
/** Anthropic Messages API model id (e.g. ``"claude-haiku-4-5-20251001"``). */
|
|
2252
3389
|
model?: string;
|
|
2253
3390
|
/** Maximum number of tokens to sample. Defaults to the adapter default. */
|
|
2254
3391
|
maxTokens?: number;
|
|
@@ -2258,18 +3395,33 @@ interface AnthropicLLMOptions {
|
|
|
2258
3395
|
baseUrl?: string;
|
|
2259
3396
|
/** ``anthropic-version`` header override. */
|
|
2260
3397
|
anthropicVersion?: string;
|
|
3398
|
+
/**
|
|
3399
|
+
* Enable Anthropic prompt caching (default: ``true``). For voice
|
|
3400
|
+
* agents with long instruction-dense system prompts, the cache saves
|
|
3401
|
+
* ~100-400 ms TTFT and ~90% input-token cost per cached turn. Disable
|
|
3402
|
+
* if your system prompt + tools are below Anthropic's minimum
|
|
3403
|
+
* cacheable size (~1024 tokens for Sonnet/Opus, ~2048 for Haiku) —
|
|
3404
|
+
* caching has no effect below that threshold.
|
|
3405
|
+
*/
|
|
3406
|
+
promptCaching?: boolean;
|
|
2261
3407
|
}
|
|
2262
3408
|
/**
|
|
2263
3409
|
* Anthropic Claude LLM provider (Messages API, streaming).
|
|
2264
3410
|
*
|
|
3411
|
+
* Prompt caching is **enabled by default**. The first request writes
|
|
3412
|
+
* the cache; subsequent requests within ~5 minutes hit it. Pass
|
|
3413
|
+
* ``{ promptCaching: false }`` to opt out.
|
|
3414
|
+
*
|
|
2265
3415
|
* @example
|
|
2266
3416
|
* ```ts
|
|
2267
3417
|
* import * as anthropic from "getpatter/llm/anthropic";
|
|
2268
3418
|
* const llm = new anthropic.LLM(); // reads ANTHROPIC_API_KEY
|
|
2269
|
-
* const llm = new anthropic.LLM({ apiKey: "sk-ant-...", model: "claude-
|
|
3419
|
+
* const llm = new anthropic.LLM({ apiKey: "sk-ant-...", model: "claude-haiku-4-5-20251001" });
|
|
3420
|
+
* const llm = new anthropic.LLM({ promptCaching: false }); // opt out of caching
|
|
2270
3421
|
* ```
|
|
2271
3422
|
*/
|
|
2272
3423
|
declare class LLM$3 extends AnthropicLLMProvider {
|
|
3424
|
+
static readonly providerKey = "anthropic";
|
|
2273
3425
|
constructor(opts?: AnthropicLLMOptions);
|
|
2274
3426
|
}
|
|
2275
3427
|
|
|
@@ -2296,12 +3448,42 @@ interface GroqLLMOptions$1 {
|
|
|
2296
3448
|
apiKey: string;
|
|
2297
3449
|
model?: string;
|
|
2298
3450
|
baseUrl?: string;
|
|
3451
|
+
/** Sampling temperature [0, 2]. */
|
|
3452
|
+
temperature?: number;
|
|
3453
|
+
/** Max tokens in the assistant response (sent as ``max_completion_tokens``). */
|
|
3454
|
+
maxTokens?: number;
|
|
3455
|
+
/** OpenAI-style ``response_format`` for JSON mode / structured outputs. */
|
|
3456
|
+
responseFormat?: Record<string, unknown>;
|
|
3457
|
+
/** Whether to allow parallel tool calls. */
|
|
3458
|
+
parallelToolCalls?: boolean;
|
|
3459
|
+
/** ``"auto" | "none" | "required"`` or a specific tool object. */
|
|
3460
|
+
toolChoice?: string | Record<string, unknown>;
|
|
3461
|
+
/** Sampling seed. */
|
|
3462
|
+
seed?: number;
|
|
3463
|
+
/** Nucleus sampling cutoff in [0, 1]. */
|
|
3464
|
+
topP?: number;
|
|
3465
|
+
/** Penalty in [-2, 2] applied to repeated tokens. */
|
|
3466
|
+
frequencyPenalty?: number;
|
|
3467
|
+
/** Penalty in [-2, 2] applied to seen tokens. */
|
|
3468
|
+
presencePenalty?: number;
|
|
3469
|
+
/** Stop sequence(s). */
|
|
3470
|
+
stop?: string | string[];
|
|
2299
3471
|
}
|
|
2300
3472
|
/** LLM provider backed by Groq's OpenAI-compatible Chat Completions API. */
|
|
2301
3473
|
declare class GroqLLMProvider implements LLMProvider {
|
|
2302
3474
|
private readonly apiKey;
|
|
2303
|
-
|
|
3475
|
+
readonly model: string;
|
|
2304
3476
|
private readonly baseUrl;
|
|
3477
|
+
private readonly temperature?;
|
|
3478
|
+
private readonly maxTokens?;
|
|
3479
|
+
private readonly responseFormat?;
|
|
3480
|
+
private readonly parallelToolCalls?;
|
|
3481
|
+
private readonly toolChoice?;
|
|
3482
|
+
private readonly seed?;
|
|
3483
|
+
private readonly topP?;
|
|
3484
|
+
private readonly frequencyPenalty?;
|
|
3485
|
+
private readonly presencePenalty?;
|
|
3486
|
+
private readonly stop?;
|
|
2305
3487
|
constructor(options: GroqLLMOptions$1);
|
|
2306
3488
|
stream(messages: Array<Record<string, unknown>>, tools?: Array<Record<string, unknown>> | null): AsyncGenerator<LLMChunk, void, unknown>;
|
|
2307
3489
|
}
|
|
@@ -2315,6 +3497,26 @@ interface GroqLLMOptions {
|
|
|
2315
3497
|
model?: string;
|
|
2316
3498
|
/** Override the OpenAI-compatible base URL (rarely needed). */
|
|
2317
3499
|
baseUrl?: string;
|
|
3500
|
+
/** Sampling temperature [0, 2]. */
|
|
3501
|
+
temperature?: number;
|
|
3502
|
+
/** Max tokens in the assistant response (sent as ``max_completion_tokens``). */
|
|
3503
|
+
maxTokens?: number;
|
|
3504
|
+
/** OpenAI-style ``response_format`` for JSON mode / structured outputs. */
|
|
3505
|
+
responseFormat?: Record<string, unknown>;
|
|
3506
|
+
/** Whether to allow parallel tool calls. */
|
|
3507
|
+
parallelToolCalls?: boolean;
|
|
3508
|
+
/** ``"auto" | "none" | "required"`` or a specific tool object. */
|
|
3509
|
+
toolChoice?: string | Record<string, unknown>;
|
|
3510
|
+
/** Sampling seed. */
|
|
3511
|
+
seed?: number;
|
|
3512
|
+
/** Nucleus sampling cutoff in [0, 1]. */
|
|
3513
|
+
topP?: number;
|
|
3514
|
+
/** Penalty in [-2, 2] applied to repeated tokens. */
|
|
3515
|
+
frequencyPenalty?: number;
|
|
3516
|
+
/** Penalty in [-2, 2] applied to seen tokens. */
|
|
3517
|
+
presencePenalty?: number;
|
|
3518
|
+
/** Stop sequence(s). */
|
|
3519
|
+
stop?: string | string[];
|
|
2318
3520
|
}
|
|
2319
3521
|
/**
|
|
2320
3522
|
* Groq LLM provider (OpenAI-compatible Chat Completions, streaming).
|
|
@@ -2327,6 +3529,7 @@ interface GroqLLMOptions {
|
|
|
2327
3529
|
* ```
|
|
2328
3530
|
*/
|
|
2329
3531
|
declare class LLM$2 extends GroqLLMProvider {
|
|
3532
|
+
static readonly providerKey = "groq";
|
|
2330
3533
|
constructor(opts?: GroqLLMOptions);
|
|
2331
3534
|
}
|
|
2332
3535
|
|
|
@@ -2358,15 +3561,68 @@ interface CerebrasLLMOptions$1 {
|
|
|
2358
3561
|
apiKey: string;
|
|
2359
3562
|
model?: string;
|
|
2360
3563
|
baseUrl?: string;
|
|
2361
|
-
/**
|
|
3564
|
+
/**
|
|
3565
|
+
* Gzip request payloads for faster TTFT on large prompts. Defaults to
|
|
3566
|
+
* ``true`` (parity with Python SDK) — set ``false`` to disable.
|
|
3567
|
+
*
|
|
3568
|
+
* msgpack encoding is Python-only; TS uses gzip alone, which captures
|
|
3569
|
+
* ~85% of the TTFT win.
|
|
3570
|
+
*/
|
|
2362
3571
|
gzipCompression?: boolean;
|
|
3572
|
+
/** Sampling temperature [0, 2]. */
|
|
3573
|
+
temperature?: number;
|
|
3574
|
+
/** Max tokens in the assistant response (sent as ``max_completion_tokens``). */
|
|
3575
|
+
maxTokens?: number;
|
|
3576
|
+
/**
|
|
3577
|
+
* Optional OpenAI-style ``response_format`` for JSON mode / structured
|
|
3578
|
+
* outputs, e.g. ``{ type: 'json_schema', json_schema: { ... } }``.
|
|
3579
|
+
* See https://inference-docs.cerebras.ai/capabilities/structured-outputs.
|
|
3580
|
+
*/
|
|
3581
|
+
responseFormat?: Record<string, unknown>;
|
|
3582
|
+
/** Whether to allow parallel tool calls. */
|
|
3583
|
+
parallelToolCalls?: boolean;
|
|
3584
|
+
/** ``"auto" | "none" | "required"`` or a specific tool object. */
|
|
3585
|
+
toolChoice?: string | Record<string, unknown>;
|
|
3586
|
+
/** Sampling seed for reproducible outputs. */
|
|
3587
|
+
seed?: number;
|
|
3588
|
+
/** Nucleus sampling cutoff in [0, 1]. */
|
|
3589
|
+
topP?: number;
|
|
3590
|
+
/** Penalty in [-2, 2] applied to repeated tokens. */
|
|
3591
|
+
frequencyPenalty?: number;
|
|
3592
|
+
/** Penalty in [-2, 2] applied to seen tokens. */
|
|
3593
|
+
presencePenalty?: number;
|
|
3594
|
+
/** Stop sequence(s). */
|
|
3595
|
+
stop?: string | string[];
|
|
2363
3596
|
}
|
|
2364
|
-
/**
|
|
3597
|
+
/**
|
|
3598
|
+
* LLM provider backed by Cerebras's OpenAI-compatible Inference API.
|
|
3599
|
+
*
|
|
3600
|
+
* Available models on Cerebras (verified against
|
|
3601
|
+
* https://inference-docs.cerebras.ai/models/overview):
|
|
3602
|
+
*
|
|
3603
|
+
* Production:
|
|
3604
|
+
* - gpt-oss-120b (default — highest throughput on Cerebras, no deprecation)
|
|
3605
|
+
* - llama3.1-8b (smaller context alternative; deprecating 2026-05-27)
|
|
3606
|
+
*
|
|
3607
|
+
* Preview (opt-in):
|
|
3608
|
+
* - qwen-3-235b-a22b-instruct-2507 (multilingual, strong on European languages)
|
|
3609
|
+
* - zai-glm-4.7
|
|
3610
|
+
*/
|
|
2365
3611
|
declare class CerebrasLLMProvider implements LLMProvider {
|
|
2366
3612
|
private readonly apiKey;
|
|
2367
|
-
|
|
3613
|
+
readonly model: string;
|
|
2368
3614
|
private readonly baseUrl;
|
|
2369
3615
|
private readonly gzipCompression;
|
|
3616
|
+
private readonly temperature?;
|
|
3617
|
+
private readonly maxTokens?;
|
|
3618
|
+
private readonly responseFormat?;
|
|
3619
|
+
private readonly parallelToolCalls?;
|
|
3620
|
+
private readonly toolChoice?;
|
|
3621
|
+
private readonly seed?;
|
|
3622
|
+
private readonly topP?;
|
|
3623
|
+
private readonly frequencyPenalty?;
|
|
3624
|
+
private readonly presencePenalty?;
|
|
3625
|
+
private readonly stop?;
|
|
2370
3626
|
constructor(options: CerebrasLLMOptions$1);
|
|
2371
3627
|
stream(messages: Array<Record<string, unknown>>, tools?: Array<Record<string, unknown>> | null): AsyncGenerator<LLMChunk, void, unknown>;
|
|
2372
3628
|
}
|
|
@@ -2376,12 +3632,32 @@ declare class CerebrasLLMProvider implements LLMProvider {
|
|
|
2376
3632
|
interface CerebrasLLMOptions {
|
|
2377
3633
|
/** API key. Falls back to CEREBRAS_API_KEY env var when omitted. */
|
|
2378
3634
|
apiKey?: string;
|
|
2379
|
-
/** Model id (e.g. ``"
|
|
3635
|
+
/** Model id (e.g. ``"gpt-oss-120b"``). */
|
|
2380
3636
|
model?: string;
|
|
2381
3637
|
/** Override the OpenAI-compatible base URL (rarely needed). */
|
|
2382
3638
|
baseUrl?: string;
|
|
2383
3639
|
/** Gzip request payloads for faster TTFT on large prompts. */
|
|
2384
3640
|
gzipCompression?: boolean;
|
|
3641
|
+
/** Sampling temperature [0, 2]. */
|
|
3642
|
+
temperature?: number;
|
|
3643
|
+
/** Max tokens in the assistant response (sent as ``max_completion_tokens``). */
|
|
3644
|
+
maxTokens?: number;
|
|
3645
|
+
/** OpenAI-style ``response_format`` for JSON mode / structured outputs. */
|
|
3646
|
+
responseFormat?: Record<string, unknown>;
|
|
3647
|
+
/** Whether to allow parallel tool calls. */
|
|
3648
|
+
parallelToolCalls?: boolean;
|
|
3649
|
+
/** ``"auto" | "none" | "required"`` or a specific tool object. */
|
|
3650
|
+
toolChoice?: string | Record<string, unknown>;
|
|
3651
|
+
/** Sampling seed for reproducible outputs. */
|
|
3652
|
+
seed?: number;
|
|
3653
|
+
/** Nucleus sampling cutoff in [0, 1]. */
|
|
3654
|
+
topP?: number;
|
|
3655
|
+
/** Penalty in [-2, 2] applied to repeated tokens. */
|
|
3656
|
+
frequencyPenalty?: number;
|
|
3657
|
+
/** Penalty in [-2, 2] applied to seen tokens. */
|
|
3658
|
+
presencePenalty?: number;
|
|
3659
|
+
/** Stop sequence(s). */
|
|
3660
|
+
stop?: string | string[];
|
|
2385
3661
|
}
|
|
2386
3662
|
/**
|
|
2387
3663
|
* Cerebras LLM provider (OpenAI-compatible Inference API, streaming).
|
|
@@ -2390,10 +3666,13 @@ interface CerebrasLLMOptions {
|
|
|
2390
3666
|
* ```ts
|
|
2391
3667
|
* import * as cerebras from "getpatter/llm/cerebras";
|
|
2392
3668
|
* const llm = new cerebras.LLM(); // reads CEREBRAS_API_KEY
|
|
3669
|
+
* const llm = new cerebras.LLM({ apiKey: "csk-...", model: "gpt-oss-120b" });
|
|
3670
|
+
* // smaller-context alternative:
|
|
2393
3671
|
* const llm = new cerebras.LLM({ apiKey: "csk-...", model: "llama3.1-8b" });
|
|
2394
3672
|
* ```
|
|
2395
3673
|
*/
|
|
2396
3674
|
declare class LLM$1 extends CerebrasLLMProvider {
|
|
3675
|
+
static readonly providerKey = "cerebras";
|
|
2397
3676
|
constructor(opts?: CerebrasLLMOptions);
|
|
2398
3677
|
}
|
|
2399
3678
|
|
|
@@ -2433,7 +3712,7 @@ interface GoogleLLMOptions$1 {
|
|
|
2433
3712
|
/** LLM provider backed by Google Gemini (Developer API, streaming SSE). */
|
|
2434
3713
|
declare class GoogleLLMProvider implements LLMProvider {
|
|
2435
3714
|
private readonly apiKey;
|
|
2436
|
-
|
|
3715
|
+
readonly model: string;
|
|
2437
3716
|
private readonly baseUrl;
|
|
2438
3717
|
private readonly temperature?;
|
|
2439
3718
|
private readonly maxOutputTokens?;
|
|
@@ -2470,9 +3749,109 @@ interface GoogleLLMOptions {
|
|
|
2470
3749
|
* ```
|
|
2471
3750
|
*/
|
|
2472
3751
|
declare class LLM extends GoogleLLMProvider {
|
|
3752
|
+
static readonly providerKey = "google";
|
|
2473
3753
|
constructor(opts?: GoogleLLMOptions);
|
|
2474
3754
|
}
|
|
2475
3755
|
|
|
3756
|
+
/**
|
|
3757
|
+
* Silero VAD provider (TypeScript port).
|
|
3758
|
+
*
|
|
3759
|
+
* Acoustic voice activity detection backed by the Silero ONNX model. Buffers
|
|
3760
|
+
* incoming int16 LE PCM frames, runs inference on fixed-size windows
|
|
3761
|
+
* (256 samples at 8 kHz, 512 at 16 kHz), applies an exponential probability
|
|
3762
|
+
* filter, and emits VADEvent transitions (speech_start / speech_end).
|
|
3763
|
+
*
|
|
3764
|
+
* Ported from LiveKit Agents (Apache 2.0):
|
|
3765
|
+
* https://github.com/livekit/agents
|
|
3766
|
+
* Sources:
|
|
3767
|
+
* - livekit-plugins/livekit-plugins-silero/livekit/plugins/silero/vad.py
|
|
3768
|
+
* - livekit-plugins/livekit-plugins-silero/livekit/plugins/silero/onnx_model.py
|
|
3769
|
+
*
|
|
3770
|
+
* Adaptations for Patter:
|
|
3771
|
+
* - Input is raw PCM `Buffer` (int16 LE, mono) via
|
|
3772
|
+
* `processFrame(pcmChunk, sampleRate)`, not `livekit.rtc.AudioFrame`.
|
|
3773
|
+
* - onnxruntime-node is loaded lazily as an optional dependency.
|
|
3774
|
+
* - Emits `VADEvent` (Patter protocol) instead of LiveKit event types.
|
|
3775
|
+
*/
|
|
3776
|
+
|
|
3777
|
+
declare const SUPPORTED_SAMPLE_RATES: readonly [8000, 16000];
|
|
3778
|
+
type SileroSampleRate = (typeof SUPPORTED_SAMPLE_RATES)[number];
|
|
3779
|
+
interface SileroVADOptions {
|
|
3780
|
+
minSpeechDuration?: number;
|
|
3781
|
+
minSilenceDuration?: number;
|
|
3782
|
+
prefixPaddingDuration?: number;
|
|
3783
|
+
activationThreshold?: number;
|
|
3784
|
+
deactivationThreshold?: number;
|
|
3785
|
+
sampleRate?: SileroSampleRate;
|
|
3786
|
+
forceCpu?: boolean;
|
|
3787
|
+
onnxFilePath?: string;
|
|
3788
|
+
}
|
|
3789
|
+
/**
|
|
3790
|
+
* Minimal structural type for the subset of `onnxruntime-node` we depend on.
|
|
3791
|
+
* Declared locally so consumers don't need the package installed at build time.
|
|
3792
|
+
*/
|
|
3793
|
+
interface OnnxInferenceSession {
|
|
3794
|
+
run(feeds: Record<string, OnnxTensor>): Promise<Record<string, OnnxTensor>>;
|
|
3795
|
+
}
|
|
3796
|
+
interface OnnxTensor {
|
|
3797
|
+
readonly data: Float32Array | BigInt64Array;
|
|
3798
|
+
readonly dims: readonly number[];
|
|
3799
|
+
}
|
|
3800
|
+
interface OnnxRuntime {
|
|
3801
|
+
InferenceSession: {
|
|
3802
|
+
create(pathOrBuffer: string | Uint8Array, options?: Record<string, unknown>): Promise<OnnxInferenceSession>;
|
|
3803
|
+
};
|
|
3804
|
+
Tensor: new (type: 'float32' | 'int64', data: Float32Array | BigInt64Array, dims: readonly number[]) => OnnxTensor;
|
|
3805
|
+
}
|
|
3806
|
+
/**
|
|
3807
|
+
* Silero-based `VADProvider`. Load via `SileroVAD.load()`:
|
|
3808
|
+
*
|
|
3809
|
+
* const vad = await SileroVAD.load({ sampleRate: 16000 });
|
|
3810
|
+
* const evt = await vad.processFrame(pcm, 16000);
|
|
3811
|
+
* if (evt && evt.type === 'speech_start') { ... }
|
|
3812
|
+
* await vad.close();
|
|
3813
|
+
*/
|
|
3814
|
+
declare class SileroVAD implements VADProvider {
|
|
3815
|
+
private readonly model;
|
|
3816
|
+
private readonly opts;
|
|
3817
|
+
private pending;
|
|
3818
|
+
private expFilter;
|
|
3819
|
+
private pubSpeaking;
|
|
3820
|
+
private speechThresholdDuration;
|
|
3821
|
+
private silenceThresholdDuration;
|
|
3822
|
+
private closed;
|
|
3823
|
+
private constructor();
|
|
3824
|
+
/**
|
|
3825
|
+
* Load the Silero VAD model. Defaults match the LiveKit Silero plugin.
|
|
3826
|
+
* Throws if `onnxruntime-node` is not installed.
|
|
3827
|
+
*/
|
|
3828
|
+
static load(options?: SileroVADOptions): Promise<SileroVAD>;
|
|
3829
|
+
/**
|
|
3830
|
+
* Internal factory used by tests — bypasses onnxruntime-node loading.
|
|
3831
|
+
* @internal
|
|
3832
|
+
*/
|
|
3833
|
+
static fromOnnxModel(runtime: OnnxRuntime, session: OnnxInferenceSession, options: Required<Omit<SileroVADOptions, 'onnxFilePath' | 'forceCpu'>>): SileroVAD;
|
|
3834
|
+
get sampleRate(): SileroSampleRate;
|
|
3835
|
+
/**
|
|
3836
|
+
* Number of int16 PCM samples that must be provided per call to
|
|
3837
|
+
* processFrame for the model to run one inference window.
|
|
3838
|
+
*
|
|
3839
|
+
* Constraint (ported from LiveKit Agents / Silero ONNX spec):
|
|
3840
|
+
* - 16 000 Hz → 512 samples (32 ms)
|
|
3841
|
+
* - 8 000 Hz → 256 samples (32 ms)
|
|
3842
|
+
*
|
|
3843
|
+
* Callers that feed raw audio in fixed-size chunks (e.g. WebSocket frames)
|
|
3844
|
+
* should buffer incoming audio until at least numFramesRequired() int16
|
|
3845
|
+
* samples are available before calling processFrame. The provider
|
|
3846
|
+
* internally buffers partial windows so smaller chunks are also safe, but
|
|
3847
|
+
* passing exactly one window per call minimises heap allocation.
|
|
3848
|
+
*/
|
|
3849
|
+
numFramesRequired(): number;
|
|
3850
|
+
processFrame(pcmChunk: Buffer, sampleRate: number): Promise<VADEvent | null>;
|
|
3851
|
+
private advanceState;
|
|
3852
|
+
close(): Promise<void>;
|
|
3853
|
+
}
|
|
3854
|
+
|
|
2476
3855
|
/**
|
|
2477
3856
|
* Audio transcoding utilities for Patter TypeScript SDK.
|
|
2478
3857
|
*
|
|
@@ -2495,6 +3874,137 @@ declare function mulawToPcm16(mulawData: Buffer): Buffer;
|
|
|
2495
3874
|
* If the input length is odd, the trailing byte is ignored.
|
|
2496
3875
|
*/
|
|
2497
3876
|
declare function pcm16ToMulaw(pcmData: Buffer): Buffer;
|
|
3877
|
+
/**
|
|
3878
|
+
* Buffers a trailing odd byte across chunk boundaries so that downstream
|
|
3879
|
+
* consumers (resamplers, encoders) always receive even-length (2-byte-aligned)
|
|
3880
|
+
* PCM16 buffers.
|
|
3881
|
+
*
|
|
3882
|
+
* Mirror of the Python-side PcmCarry helper. Typical usage:
|
|
3883
|
+
*
|
|
3884
|
+
* ```ts
|
|
3885
|
+
* const carry = new PcmCarry();
|
|
3886
|
+
* for (const raw of stream) {
|
|
3887
|
+
* const aligned = carry.push(raw);
|
|
3888
|
+
* if (aligned.length > 0) process(aligned);
|
|
3889
|
+
* }
|
|
3890
|
+
* const tail = carry.flush();
|
|
3891
|
+
* if (tail.length > 0) process(tail);
|
|
3892
|
+
* ```
|
|
3893
|
+
*/
|
|
3894
|
+
declare class PcmCarry {
|
|
3895
|
+
private pending;
|
|
3896
|
+
/**
|
|
3897
|
+
* Prepend any carried odd byte, return the even-length prefix, and stash
|
|
3898
|
+
* any new trailing odd byte for the next call.
|
|
3899
|
+
*
|
|
3900
|
+
* Returns a zero-length buffer when no complete sample is yet available.
|
|
3901
|
+
*/
|
|
3902
|
+
push(chunk: Buffer): Buffer;
|
|
3903
|
+
/**
|
|
3904
|
+
* Return any pending byte as a 1-byte buffer (rare in practice — only if
|
|
3905
|
+
* the entire stream had an odd byte count), then reset internal state.
|
|
3906
|
+
*/
|
|
3907
|
+
flush(): Buffer;
|
|
3908
|
+
/** Reset carry state without flushing. */
|
|
3909
|
+
reset(): void;
|
|
3910
|
+
}
|
|
3911
|
+
/** Options for constructing a {@link StatefulResampler}. */
|
|
3912
|
+
interface StatefulResamplerOptions {
|
|
3913
|
+
srcRate: number;
|
|
3914
|
+
dstRate: number;
|
|
3915
|
+
/** Number of channels (default 1 / mono). */
|
|
3916
|
+
channels?: number;
|
|
3917
|
+
}
|
|
3918
|
+
/**
|
|
3919
|
+
* Stateful PCM16 resampler that carries tail state across chunk boundaries,
|
|
3920
|
+
* eliminating the boundary discontinuities present in the legacy one-shot
|
|
3921
|
+
* helpers.
|
|
3922
|
+
*
|
|
3923
|
+
* Supported conversions:
|
|
3924
|
+
* - 16 000 → 8 000 Hz (2:1 decimation with 5-tap FIR anti-alias)
|
|
3925
|
+
* - 8 000 → 16 000 Hz (1:2 linear interpolation)
|
|
3926
|
+
* - 24 000 → 16 000 Hz (3:2 linear interpolation)
|
|
3927
|
+
*
|
|
3928
|
+
* All methods accept and return Buffer (PCM16-LE, mono by default).
|
|
3929
|
+
*/
|
|
3930
|
+
declare class StatefulResampler {
|
|
3931
|
+
private readonly srcRate;
|
|
3932
|
+
private readonly dstRate;
|
|
3933
|
+
private firHistory;
|
|
3934
|
+
private firHistoryValid;
|
|
3935
|
+
private firPendingSample;
|
|
3936
|
+
private upsampleLast;
|
|
3937
|
+
private upsampleHasHistory;
|
|
3938
|
+
private resample24Last;
|
|
3939
|
+
private resample24Phase;
|
|
3940
|
+
private resample24HasHistory;
|
|
3941
|
+
private readonly carry;
|
|
3942
|
+
constructor(opts: StatefulResamplerOptions);
|
|
3943
|
+
/**
|
|
3944
|
+
* Process a chunk of PCM16-LE samples.
|
|
3945
|
+
*
|
|
3946
|
+
* Handles odd-byte inputs via an internal carry buffer. Returns an even-byte-
|
|
3947
|
+
* aligned output buffer; may return a zero-length buffer if not enough
|
|
3948
|
+
* aligned input is available yet.
|
|
3949
|
+
*/
|
|
3950
|
+
process(pcm: Buffer): Buffer;
|
|
3951
|
+
/**
|
|
3952
|
+
* Flush internal state and return any remaining output samples.
|
|
3953
|
+
*
|
|
3954
|
+
* For 8k→16k: the deferred last sample is emitted duplicated (matching
|
|
3955
|
+
* the stateless helper's end-of-stream behaviour).
|
|
3956
|
+
* For 16k→8k: any pending odd sample is processed with edge-replication.
|
|
3957
|
+
* Resets all state after flushing.
|
|
3958
|
+
*/
|
|
3959
|
+
flush(): Buffer;
|
|
3960
|
+
/** Reset all carried state (e.g. at call boundaries). */
|
|
3961
|
+
reset(): void;
|
|
3962
|
+
/**
|
|
3963
|
+
* 2:1 decimation with a 5-tap binomial FIR anti-alias filter.
|
|
3964
|
+
*
|
|
3965
|
+
* FIR coefficients: [1, 4, 6, 4, 1] / 16 (cutoff ~Fs/4 = 4 kHz).
|
|
3966
|
+
*
|
|
3967
|
+
* Cross-chunk state:
|
|
3968
|
+
* - `firHistory[0]` = s_{-2}, `firHistory[1]` = s_{-1} relative to the
|
|
3969
|
+
* virtual stream (seeded to first-sample on the very first call).
|
|
3970
|
+
* - `firPendingSample` = a lone input sample carried from a chunk whose
|
|
3971
|
+
* sample count was odd; it will become the first input of the next chunk.
|
|
3972
|
+
*
|
|
3973
|
+
* Decimation: outputs are at even positions (0, 2, 4 …) in the virtual
|
|
3974
|
+
* extended stream, so every 2 input samples yield 1 output. An odd-sample-
|
|
3975
|
+
* count chunk leaves 1 sample in `firPendingSample`; the next chunk
|
|
3976
|
+
* prepends it so the output cadence is unbroken.
|
|
3977
|
+
*/
|
|
3978
|
+
private _downsample16kTo8k;
|
|
3979
|
+
/**
|
|
3980
|
+
* 1:2 linear-interpolation upsampler.
|
|
3981
|
+
*
|
|
3982
|
+
* For the first chunk (no history): emits 2*(N-1) samples and defers the
|
|
3983
|
+
* last sample. For subsequent chunks (with history): emits the deferred
|
|
3984
|
+
* sample + its interpolated midpoint THEN 2*(N-1) samples from the new
|
|
3985
|
+
* chunk, deferring the new last sample. Total across K chunks + flush =
|
|
3986
|
+
* 2*total_input_samples (correct output length).
|
|
3987
|
+
*
|
|
3988
|
+
* Call flush() after the final chunk to emit the last deferred sample
|
|
3989
|
+
* pair (self-duplicate at end of stream).
|
|
3990
|
+
*/
|
|
3991
|
+
private _upsample8kTo16k;
|
|
3992
|
+
/**
|
|
3993
|
+
* 3:2 linear-interpolation decimator (ratio srcRate/dstRate = 1.5).
|
|
3994
|
+
*
|
|
3995
|
+
* `resample24Phase` tracks the fractional input position of the next output
|
|
3996
|
+
* sample relative to the START of the next chunk. Negative phase means the
|
|
3997
|
+
* next output straddles the previous/current chunk boundary; those are
|
|
3998
|
+
* handled using `resample24Last`.
|
|
3999
|
+
*/
|
|
4000
|
+
private _resample24kTo16k;
|
|
4001
|
+
}
|
|
4002
|
+
/** Create a stateful 16 kHz → 8 kHz downsampling resampler. */
|
|
4003
|
+
declare function createResampler16kTo8k(): StatefulResampler;
|
|
4004
|
+
/** Create a stateful 8 kHz → 16 kHz upsampling resampler. */
|
|
4005
|
+
declare function createResampler8kTo16k(): StatefulResampler;
|
|
4006
|
+
/** Create a stateful 24 kHz → 16 kHz resampler (3:2 linear interpolation). */
|
|
4007
|
+
declare function createResampler24kTo16k(): StatefulResampler;
|
|
2498
4008
|
/**
|
|
2499
4009
|
* Upsample 8 kHz PCM16 to 16 kHz using linear interpolation.
|
|
2500
4010
|
*
|
|
@@ -2503,21 +4013,33 @@ declare function pcm16ToMulaw(pcmData: Buffer): Buffer;
|
|
|
2503
4013
|
* is duplicated to fill the final position.
|
|
2504
4014
|
*
|
|
2505
4015
|
* Output length = input length * 2.
|
|
4016
|
+
*
|
|
4017
|
+
* @deprecated Use {@link StatefulResampler} or {@link createResampler8kTo16k}
|
|
4018
|
+
* for streaming pipelines where chunk-boundary continuity matters.
|
|
2506
4019
|
*/
|
|
2507
4020
|
declare function resample8kTo16k(pcm8k: Buffer): Buffer;
|
|
2508
4021
|
/**
|
|
2509
|
-
* Downsample 16 kHz PCM16 to 8 kHz
|
|
4022
|
+
* Downsample 16 kHz PCM16 to 8 kHz with anti-aliasing.
|
|
4023
|
+
*
|
|
4024
|
+
* Uses a 5-tap binomial low-pass FIR filter ([1, 4, 6, 4, 1] / 16) applied
|
|
4025
|
+
* to every pair of input samples before decimating by 2.
|
|
2510
4026
|
*
|
|
2511
4027
|
* Output length = input length / 2.
|
|
4028
|
+
*
|
|
4029
|
+
* @deprecated Use {@link StatefulResampler} or {@link createResampler16kTo8k}
|
|
4030
|
+
* for streaming pipelines where chunk-boundary continuity matters.
|
|
2512
4031
|
*/
|
|
2513
4032
|
declare function resample16kTo8k(pcm16k: Buffer): Buffer;
|
|
2514
4033
|
/**
|
|
2515
|
-
* Downsample 24 kHz PCM16 to 16 kHz
|
|
4034
|
+
* Downsample 24 kHz PCM16 to 16 kHz with linear interpolation.
|
|
2516
4035
|
*
|
|
2517
|
-
*
|
|
2518
|
-
*
|
|
4036
|
+
* For a 3:2 ratio, each output sample is a weighted blend of the two
|
|
4037
|
+
* neighbouring input samples rather than a raw pick-every-third.
|
|
2519
4038
|
*
|
|
2520
4039
|
* Output length = floor(inputSamples * 2 / 3) * 2 bytes.
|
|
4040
|
+
*
|
|
4041
|
+
* @deprecated Use {@link StatefulResampler} or {@link OpenAITTS.resampleStreaming}
|
|
4042
|
+
* for anti-aliased resampling.
|
|
2521
4043
|
*/
|
|
2522
4044
|
declare function resample24kTo16k(pcm24k: Buffer): Buffer;
|
|
2523
4045
|
|
|
@@ -2834,4 +4356,193 @@ declare class BackgroundAudioPlayer implements BackgroundAudioPlayer$1 {
|
|
|
2834
4356
|
private resampleTo;
|
|
2835
4357
|
}
|
|
2836
4358
|
|
|
2837
|
-
|
|
4359
|
+
interface TwilioAdapterOptions {
|
|
4360
|
+
/** Optional Twilio edge region (e.g. ``ie1`` for Ireland). */
|
|
4361
|
+
region?: string;
|
|
4362
|
+
}
|
|
4363
|
+
interface ProvisionNumberOptions$1 {
|
|
4364
|
+
/** ISO-3166-1 alpha-2 country code, e.g. ``"US"``. */
|
|
4365
|
+
countryCode: string;
|
|
4366
|
+
/** Optional North-American area code (e.g. ``"415"``). */
|
|
4367
|
+
areaCode?: string;
|
|
4368
|
+
}
|
|
4369
|
+
interface ProvisionNumberResult$1 {
|
|
4370
|
+
readonly phoneNumber: string;
|
|
4371
|
+
readonly sid: string;
|
|
4372
|
+
}
|
|
4373
|
+
interface ConfigureNumberOptions$1 {
|
|
4374
|
+
/** URL Twilio should hit when the number receives a call. */
|
|
4375
|
+
voiceUrl: string;
|
|
4376
|
+
/** Optional status callback URL for call lifecycle events. */
|
|
4377
|
+
statusCallback?: string;
|
|
4378
|
+
}
|
|
4379
|
+
interface InitiateCallOptions$1 {
|
|
4380
|
+
from: string;
|
|
4381
|
+
to: string;
|
|
4382
|
+
/**
|
|
4383
|
+
* TwiML or absolute URL Twilio should request when the call connects.
|
|
4384
|
+
* Mutually exclusive with ``streamUrl`` — provide exactly one.
|
|
4385
|
+
*/
|
|
4386
|
+
url?: string;
|
|
4387
|
+
/**
|
|
4388
|
+
* Optional WebSocket stream URL. When provided (and ``url`` is not), the
|
|
4389
|
+
* adapter auto-builds a ``<Response><Connect><Stream>`` TwiML document
|
|
4390
|
+
* via :meth:`generateStreamTwiml` and sends it as the ``Twiml`` form
|
|
4391
|
+
* parameter. Mirrors the Python adapter's ``stream_url`` convenience path.
|
|
4392
|
+
*/
|
|
4393
|
+
streamUrl?: string;
|
|
4394
|
+
statusCallback?: string;
|
|
4395
|
+
/** Value accepted by Twilio's ``MachineDetection`` parameter. */
|
|
4396
|
+
machineDetection?: 'Enable' | 'DetectMessageEnd' | 'false';
|
|
4397
|
+
/** Raw extra form parameters forwarded to the Calls endpoint. */
|
|
4398
|
+
extraParams?: Record<string, string>;
|
|
4399
|
+
}
|
|
4400
|
+
interface InitiateCallResult$1 {
|
|
4401
|
+
readonly callSid: string;
|
|
4402
|
+
}
|
|
4403
|
+
declare class TwilioAdapter {
|
|
4404
|
+
readonly accountSid: string;
|
|
4405
|
+
readonly region: string | undefined;
|
|
4406
|
+
private readonly baseUrl;
|
|
4407
|
+
private readonly authHeader;
|
|
4408
|
+
constructor(accountSid: string, authToken: string, opts?: TwilioAdapterOptions);
|
|
4409
|
+
private request;
|
|
4410
|
+
/**
|
|
4411
|
+
* Provision a local phone number in the given country.
|
|
4412
|
+
*
|
|
4413
|
+
* Lists available local numbers, then purchases the first match.
|
|
4414
|
+
*/
|
|
4415
|
+
provisionNumber(opts: ProvisionNumberOptions$1): Promise<ProvisionNumberResult$1>;
|
|
4416
|
+
/** Update an already-purchased number to point at our voice webhook. */
|
|
4417
|
+
configureNumber(phoneNumberSid: string, opts: ConfigureNumberOptions$1): Promise<void>;
|
|
4418
|
+
/** Place an outbound call. Returns the Twilio call SID. */
|
|
4419
|
+
initiateCall(opts: InitiateCallOptions$1): Promise<InitiateCallResult$1>;
|
|
4420
|
+
/**
|
|
4421
|
+
* Build a minimal ``<Response><Connect><Stream url="..."/></Connect></Response>``
|
|
4422
|
+
* TwiML document. Mirrors the Python adapter's ``generate_stream_twiml``.
|
|
4423
|
+
*/
|
|
4424
|
+
static generateStreamTwiml(streamUrl: string): string;
|
|
4425
|
+
/** Force-complete an in-progress call. */
|
|
4426
|
+
endCall(callSid: string): Promise<void>;
|
|
4427
|
+
}
|
|
4428
|
+
|
|
4429
|
+
interface ProvisionNumberOptions {
|
|
4430
|
+
/** ISO-3166-1 alpha-2 country code (e.g. ``"US"``). */
|
|
4431
|
+
countryCode: string;
|
|
4432
|
+
}
|
|
4433
|
+
interface ProvisionNumberResult {
|
|
4434
|
+
readonly phoneNumber: string;
|
|
4435
|
+
readonly orderId: string;
|
|
4436
|
+
}
|
|
4437
|
+
interface ConfigureNumberOptions {
|
|
4438
|
+
/** Telnyx Call Control Application / Connection ID. */
|
|
4439
|
+
connectionId: string;
|
|
4440
|
+
}
|
|
4441
|
+
interface InitiateCallOptions {
|
|
4442
|
+
from: string;
|
|
4443
|
+
to: string;
|
|
4444
|
+
/** Override ``connectionId`` at dial time. Falls back to the adapter default. */
|
|
4445
|
+
connectionId?: string;
|
|
4446
|
+
/** Opaque state string that Telnyx echoes back on webhooks. Base64-encoded on wire. */
|
|
4447
|
+
clientState?: string;
|
|
4448
|
+
}
|
|
4449
|
+
interface InitiateCallResult {
|
|
4450
|
+
readonly callControlId: string;
|
|
4451
|
+
}
|
|
4452
|
+
interface EndCallOptions {
|
|
4453
|
+
/** Idempotency key for the hangup command. */
|
|
4454
|
+
commandId?: string;
|
|
4455
|
+
}
|
|
4456
|
+
declare class TelnyxAdapter {
|
|
4457
|
+
private readonly apiKey;
|
|
4458
|
+
readonly connectionId: string | undefined;
|
|
4459
|
+
private readonly baseUrl;
|
|
4460
|
+
constructor(apiKey: string, connectionId?: string);
|
|
4461
|
+
private request;
|
|
4462
|
+
/**
|
|
4463
|
+
* Search available numbers for ``countryCode`` and place an order for the
|
|
4464
|
+
* first match. Returns both the reserved E.164 number and the order ID.
|
|
4465
|
+
*/
|
|
4466
|
+
provisionNumber(opts: ProvisionNumberOptions): Promise<ProvisionNumberResult>;
|
|
4467
|
+
/** Attach a number to a Call Control Application. */
|
|
4468
|
+
configureNumber(phoneNumber: string, opts: ConfigureNumberOptions): Promise<void>;
|
|
4469
|
+
/**
|
|
4470
|
+
* Place an outbound call on the Call Control Application.
|
|
4471
|
+
*
|
|
4472
|
+
* Note: we intentionally do NOT pass ``stream_url`` here — audio streaming
|
|
4473
|
+
* is configured on the Application itself (or started explicitly via a
|
|
4474
|
+
* ``streaming_start`` command). Passing ``stream_url`` on dial is a
|
|
4475
|
+
* deprecated code path that Telnyx rejects in newer API versions.
|
|
4476
|
+
*/
|
|
4477
|
+
initiateCall(opts: InitiateCallOptions): Promise<InitiateCallResult>;
|
|
4478
|
+
/** Hang up an in-progress call. */
|
|
4479
|
+
endCall(callControlId: string, opts?: EndCallOptions): Promise<void>;
|
|
4480
|
+
}
|
|
4481
|
+
|
|
4482
|
+
declare const SPAN_CALL = "getpatter.call";
|
|
4483
|
+
declare const SPAN_STT = "getpatter.stt";
|
|
4484
|
+
declare const SPAN_LLM = "getpatter.llm";
|
|
4485
|
+
declare const SPAN_TTS = "getpatter.tts";
|
|
4486
|
+
declare const SPAN_TOOL = "getpatter.tool";
|
|
4487
|
+
declare const SPAN_ENDPOINT = "getpatter.endpoint";
|
|
4488
|
+
declare const SPAN_BARGEIN = "getpatter.bargein";
|
|
4489
|
+
/**
|
|
4490
|
+
* Minimal span surface area — subset of the OTel ``Span`` API the Patter SDK
|
|
4491
|
+
* relies on. We keep this narrow so the no-op fallback stays trivial.
|
|
4492
|
+
*/
|
|
4493
|
+
interface Span {
|
|
4494
|
+
setAttribute(key: string, value: unknown): void;
|
|
4495
|
+
recordException(exception: unknown): void;
|
|
4496
|
+
end(): void;
|
|
4497
|
+
}
|
|
4498
|
+
interface InitTracingOptions {
|
|
4499
|
+
serviceName?: string;
|
|
4500
|
+
otlpEndpoint?: string;
|
|
4501
|
+
resourceAttributes?: Record<string, string>;
|
|
4502
|
+
}
|
|
4503
|
+
/**
|
|
4504
|
+
* Initialize tracing. Returns ``true`` when OTel is wired, ``false`` otherwise
|
|
4505
|
+
* (which covers both "env flag off" and "peer dep missing").
|
|
4506
|
+
*
|
|
4507
|
+
* If the optional SDK packages (``@opentelemetry/sdk-trace-node``,
|
|
4508
|
+
* ``@opentelemetry/sdk-trace-base``, ``@opentelemetry/exporter-trace-otlp-http``)
|
|
4509
|
+
* are installed, a ``NodeTracerProvider`` with OTLP/HTTP exporter is wired up
|
|
4510
|
+
* automatically. Otherwise, spans produced via ``startSpan`` are still created
|
|
4511
|
+
* against whatever global provider ``@opentelemetry/api`` resolves to (which
|
|
4512
|
+
* may be a no-op if the host hasn't registered one).
|
|
4513
|
+
*/
|
|
4514
|
+
declare function initTracing(options?: InitTracingOptions): boolean;
|
|
4515
|
+
/** True only if the env flag is set AND the tracer initialized cleanly. */
|
|
4516
|
+
declare function isTracingEnabled(): boolean;
|
|
4517
|
+
/**
|
|
4518
|
+
* Start a span. Callers must ``end()`` the returned span — use try/finally:
|
|
4519
|
+
*
|
|
4520
|
+
* ```ts
|
|
4521
|
+
* const span = startSpan(SPAN_LLM, { 'llm.model': 'gpt-4o' });
|
|
4522
|
+
* try { ... } finally { span.end(); }
|
|
4523
|
+
* ```
|
|
4524
|
+
*
|
|
4525
|
+
* Returns a no-op span when tracing is disabled or unavailable.
|
|
4526
|
+
*/
|
|
4527
|
+
declare function startSpan(name: string, attrs?: Record<string, unknown>): Span;
|
|
4528
|
+
|
|
4529
|
+
/**
|
|
4530
|
+
* Observability entrypoint — re-exports the tracing API.
|
|
4531
|
+
*
|
|
4532
|
+
* See ``./tracing.ts`` for the implementation.
|
|
4533
|
+
*/
|
|
4534
|
+
|
|
4535
|
+
/**
|
|
4536
|
+
* Call lifecycle event — TS mirror of ``getpatter.models.CallEvent``.
|
|
4537
|
+
*
|
|
4538
|
+
* Kept in the observability namespace because the primary consumers are
|
|
4539
|
+
* metrics/tracing sinks (e.g. dashboard ingestion).
|
|
4540
|
+
*/
|
|
4541
|
+
interface CallEvent {
|
|
4542
|
+
readonly callId: string;
|
|
4543
|
+
readonly caller?: string;
|
|
4544
|
+
readonly callee?: string;
|
|
4545
|
+
readonly direction?: string;
|
|
4546
|
+
}
|
|
4547
|
+
|
|
4548
|
+
export { type AgentOptions, AllProvidersFailedError, type AnthropicConversion, LLM$3 as AnthropicLLM, type AnthropicLLMOptions, type AnthropicMessage, type AssemblyAIEncoding, type AssemblyAIModel, STT as AssemblyAISTT, type AssemblyAISTTOptions, type AudioConfig, type AudioSource, AuthenticationError, type BackgroundAudioOptions, BackgroundAudioPlayer, BuiltinAudioClip, type BuiltinAudioClipName, type BuiltinPcmSource, type CallControl, type CallEvent, type CallEventHandler, type CallMetrics, CallMetricsAccumulator, type CallRecord, type CartesiaEncoding, STT$2 as CartesiaSTT, type CartesiaSTTOptions, TTS$2 as CartesiaTTS, type CartesiaTTSOptions, LLM$1 as CerebrasLLM, type CerebrasLLMOptions, ChatContext, type ChatMessage, type ChatRole, CloudflareTunnel, type CostBreakdown, DEFAULT_MIN_SENTENCE_LEN, DEFAULT_PRICING, DTMF_EVENTS, STT$5 as DeepgramSTT, type DeepgramSTTOptions, DefaultToolExecutor, type DefaultToolExecutorOptions, type DefineToolInput, type DtmfEvent, ConvAI as ElevenLabsConvAI, ElevenLabsConvAIAdapter, type ConvAIOptions as ElevenLabsConvAIOptions, TTS$4 as ElevenLabsTTS, type ElevenLabsTTSOptions, EventBus, FallbackLLMProvider, type FallbackLLMProviderOptions, type FilePcmSource, GEMINI_DEFAULT_INPUT_SR, GEMINI_DEFAULT_OUTPUT_SR, GeminiLiveAdapter, type GeminiLiveEventHandler, LLM as GoogleLLM, type GoogleLLMOptions, LLM$2 as GroqLLM, type GroqLLMOptions, Guardrail$1 as Guardrail, type GuardrailOptions, type HookContext, IVRActivity, type IVRActivityOptions, type IVRToolDefinition, type IncomingMessage, type InitTracingOptions, type JobCallback, type LLMChunk, LLMLoop, type LLMProvider, type LMNTAudioFormat, type LMNTModel, type LMNTSampleRate, TTS as LMNTTTS, type LMNTTTSOptions, type LatencyBreakdown, type LocalCallOptions, type LocalConfig, type LocalOptions, type Logger, type LoopCallback, type MessageHandler, MetricsStore, Ngrok, LLM$4 as OpenAILLM, type OpenAILLMOptions, OpenAILLMProvider, type OpenAIMessage, Realtime as OpenAIRealtime, OpenAIRealtimeAdapter, type RealtimeOptions as OpenAIRealtimeOptions, TTS$3 as OpenAITTS, type OpenAITTSOptions, STT$3 as OpenAITranscribeSTT, type OpenAITranscribeSTTOptions, type ParamSpec, PartialStreamError, Patter, PatterConnectionError, PatterError, type PatterEventType, PatterTool, type PatterToolExecuteArgs, type PatterToolOptions, type PatterToolResult, PcmCarry, PipelineHookExecutor, type PipelineHooks, type PipelineMessageHandler, type ProviderPricing, ProvisionError, RateLimitError, type RawPcmSource, type RealtimeConfig, RemoteMessageHandler, TTS$1 as RimeTTS, type RimeTTSOptions, SPAN_BARGEIN, SPAN_CALL, SPAN_ENDPOINT, SPAN_LLM, SPAN_STT, SPAN_TOOL, SPAN_TTS, type SSEEvent, type STTConfig, type ScheduleHandle, SentenceChunker, type ServeOptions, type SilenceCallback, type SileroSampleRate, SileroVAD, type SileroVADOptions, STT$1 as SonioxSTT, type SonioxSTTOptions$1 as SonioxSTTOptions, type Span, StatefulResampler, type StatefulResamplerOptions, Static as StaticTunnel, type TTSConfig, Carrier as Telnyx, TelnyxAdapter, type TelnyxCarrierOptions, type ConfigureNumberOptions as TelnyxConfigureNumberOptions, type EndCallOptions as TelnyxEndCallOptions, type InitiateCallOptions as TelnyxInitiateCallOptions, type InitiateCallResult as TelnyxInitiateCallResult, type ProvisionNumberOptions as TelnyxProvisionNumberOptions, type ProvisionNumberResult as TelnyxProvisionNumberResult, TestSession, TfidfLoopDetector, type TfidfLoopDetectorOptions, Tool, type ToolDefinition, type ToolExecutor, type ToolHandler, type ToolOptions, type TunnelHandle, type TurnMetrics, Carrier$1 as Twilio, TwilioAdapter, type TwilioAdapterOptions, type TwilioCarrierOptions, type ConfigureNumberOptions$1 as TwilioConfigureNumberOptions, type InitiateCallOptions$1 as TwilioInitiateCallOptions, type InitiateCallResult$1 as TwilioInitiateCallResult, type ProvisionNumberOptions$1 as TwilioProvisionNumberOptions, type ProvisionNumberResult$1 as TwilioProvisionNumberResult, ULTRAVOX_DEFAULT_API_BASE, ULTRAVOX_DEFAULT_SR, type UltravoxEventHandler, UltravoxRealtimeAdapter, STT$4 as WhisperSTT, type WhisperSTTOptions, assemblyai, builtinClipPath, calculateRealtimeCost, calculateSttCost, calculateTelephonyCost, calculateTtsCost, callsToCsv, callsToJson, cartesia, createResampler16kTo8k, createResampler24kTo16k, createResampler8kTo16k, deepgram, defineTool, elevenlabs, filterEmoji, filterForTTS, filterMarkdown, formatDtmf, geminiLive, getLogger, guardrail, initTracing, isRemoteUrl, isTracingEnabled, isWebSocketUrl, lmnt, makeAuthMiddleware, mergePricing, mixPcm, mountApi, mountDashboard, mulawToPcm16, notifyDashboard, openaiTts, pcm16ToMulaw, resample16kTo8k, resample24kTo16k, resample8kTo16k, resamplePcm, rime, scheduleCron, scheduleInterval, scheduleOnce, selectSoundFromList, setLogger, soniox, speechmatics, startSpan, startTunnel, tool, ultravox, whisper };
|