getpatter 0.4.4 → 0.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +156 -158
- package/dist/carrier-config-CPG5CROM.mjs +84 -0
- package/dist/{chunk-JO5C35FM.mjs → chunk-AKQFOFLG.mjs} +1 -1
- package/dist/{chunk-O3RQG3NL.mjs → chunk-B6C3KIBG.mjs} +177 -567
- package/dist/index.d.mts +1163 -377
- package/dist/index.d.ts +1163 -377
- package/dist/index.js +2028 -1835
- package/dist/index.mjs +1644 -329
- package/dist/{test-mode-ASSLSQU2.mjs → test-mode-JZMYE5HY.mjs} +1 -1
- package/dist/{tunnel-BL7A7GXW.mjs → tunnel-O7ICMSTP.mjs} +1 -1
- package/package.json +1 -1
- package/dist/lib-4WCAS54J.mjs +0 -830
package/dist/index.d.ts
CHANGED
|
@@ -1,6 +1,300 @@
|
|
|
1
1
|
import { EventEmitter } from 'events';
|
|
2
2
|
import { Request, Response, NextFunction, Express } from 'express';
|
|
3
3
|
|
|
4
|
+
/** Twilio carrier credentials holder for Patter. */
|
|
5
|
+
interface TwilioCarrierOptions {
|
|
6
|
+
/** Twilio Account SID. Falls back to TWILIO_ACCOUNT_SID env var. */
|
|
7
|
+
accountSid?: string;
|
|
8
|
+
/** Twilio Auth Token. Falls back to TWILIO_AUTH_TOKEN env var. */
|
|
9
|
+
authToken?: string;
|
|
10
|
+
}
|
|
11
|
+
/**
|
|
12
|
+
* Twilio telephony carrier — holds Account SID + Auth Token.
|
|
13
|
+
*
|
|
14
|
+
* @example
|
|
15
|
+
* ```ts
|
|
16
|
+
* import * as twilio from "getpatter/carriers/twilio";
|
|
17
|
+
* const carrier = new twilio.Carrier(); // reads env
|
|
18
|
+
* const carrier = new twilio.Carrier({ accountSid: "AC...", authToken: "..." });
|
|
19
|
+
* ```
|
|
20
|
+
*/
|
|
21
|
+
declare class Carrier$1 {
|
|
22
|
+
readonly kind: "twilio";
|
|
23
|
+
readonly accountSid: string;
|
|
24
|
+
readonly authToken: string;
|
|
25
|
+
constructor(opts?: TwilioCarrierOptions);
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
/** Telnyx carrier credentials holder for Patter. */
|
|
29
|
+
interface TelnyxCarrierOptions {
|
|
30
|
+
/** Telnyx API key. Falls back to TELNYX_API_KEY env var. */
|
|
31
|
+
apiKey?: string;
|
|
32
|
+
/** Telnyx connection ID. Falls back to TELNYX_CONNECTION_ID env var. */
|
|
33
|
+
connectionId?: string;
|
|
34
|
+
/** Optional Ed25519 public key for webhook signature verification. Falls back to TELNYX_PUBLIC_KEY env var. */
|
|
35
|
+
publicKey?: string;
|
|
36
|
+
}
|
|
37
|
+
/**
|
|
38
|
+
* Telnyx telephony carrier — holds API key, connection ID, and optional webhook public key.
|
|
39
|
+
*
|
|
40
|
+
* @example
|
|
41
|
+
* ```ts
|
|
42
|
+
* import * as telnyx from "getpatter/carriers/telnyx";
|
|
43
|
+
* const carrier = new telnyx.Carrier(); // reads env
|
|
44
|
+
* const carrier = new telnyx.Carrier({ apiKey: "KEY...", connectionId: "123" });
|
|
45
|
+
* ```
|
|
46
|
+
*/
|
|
47
|
+
declare class Carrier {
|
|
48
|
+
readonly kind: "telnyx";
|
|
49
|
+
readonly apiKey: string;
|
|
50
|
+
readonly connectionId: string;
|
|
51
|
+
readonly publicKey: string | undefined;
|
|
52
|
+
constructor(opts?: TelnyxCarrierOptions);
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
/** OpenAI Realtime engine — marker class for Patter client dispatch. */
|
|
56
|
+
interface RealtimeOptions {
|
|
57
|
+
/** API key. Falls back to OPENAI_API_KEY env var when omitted. */
|
|
58
|
+
apiKey?: string;
|
|
59
|
+
/** Realtime model. Defaults to gpt-4o-mini-realtime-preview. */
|
|
60
|
+
model?: string;
|
|
61
|
+
/** Voice preset. Defaults to alloy. */
|
|
62
|
+
voice?: string;
|
|
63
|
+
}
|
|
64
|
+
/**
|
|
65
|
+
* OpenAI Realtime engine marker.
|
|
66
|
+
*
|
|
67
|
+
* @example
|
|
68
|
+
* ```ts
|
|
69
|
+
* import * as openai from "getpatter/engines/openai";
|
|
70
|
+
* const engine = new openai.Realtime(); // reads OPENAI_API_KEY
|
|
71
|
+
* const engine = new openai.Realtime({ voice: "alloy" });
|
|
72
|
+
* ```
|
|
73
|
+
*/
|
|
74
|
+
declare class Realtime {
|
|
75
|
+
readonly kind: "openai_realtime";
|
|
76
|
+
readonly apiKey: string;
|
|
77
|
+
readonly model: string;
|
|
78
|
+
readonly voice: string;
|
|
79
|
+
constructor(opts?: RealtimeOptions);
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
/** ElevenLabs ConvAI engine — marker class for Patter client dispatch. */
|
|
83
|
+
interface ConvAIOptions {
|
|
84
|
+
/** API key. Falls back to ELEVENLABS_API_KEY env var when omitted. */
|
|
85
|
+
apiKey?: string;
|
|
86
|
+
/** ElevenLabs Agent ID. Falls back to ELEVENLABS_AGENT_ID env var when omitted. */
|
|
87
|
+
agentId?: string;
|
|
88
|
+
/** Voice ID to override the agent's default voice. */
|
|
89
|
+
voice?: string;
|
|
90
|
+
}
|
|
91
|
+
/**
|
|
92
|
+
* ElevenLabs ConvAI engine marker.
|
|
93
|
+
*
|
|
94
|
+
* @example
|
|
95
|
+
* ```ts
|
|
96
|
+
* import * as elevenlabs from "getpatter/engines/elevenlabs";
|
|
97
|
+
* const engine = new elevenlabs.ConvAI(); // reads env vars
|
|
98
|
+
* const engine = new elevenlabs.ConvAI({ agentId: "agent_..." });
|
|
99
|
+
* ```
|
|
100
|
+
*/
|
|
101
|
+
declare class ConvAI {
|
|
102
|
+
readonly kind: "elevenlabs_convai";
|
|
103
|
+
readonly apiKey: string;
|
|
104
|
+
readonly agentId: string;
|
|
105
|
+
readonly voice: string | undefined;
|
|
106
|
+
constructor(opts?: ConvAIOptions);
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
/** Tunnel marker classes for Patter. Dispatched by the client to decide how to expose local servers. */
|
|
110
|
+
/**
|
|
111
|
+
* Cloudflare Quick Tunnel marker — ask Patter to start a cloudflared tunnel.
|
|
112
|
+
*
|
|
113
|
+
* @example
|
|
114
|
+
* ```ts
|
|
115
|
+
* import { CloudflareTunnel } from "getpatter/tunnels";
|
|
116
|
+
* const tunnel = new CloudflareTunnel();
|
|
117
|
+
* ```
|
|
118
|
+
*/
|
|
119
|
+
declare class CloudflareTunnel {
|
|
120
|
+
readonly kind: "cloudflare";
|
|
121
|
+
}
|
|
122
|
+
/**
|
|
123
|
+
* Static hostname marker — use a pre-existing public hostname (no tunnel).
|
|
124
|
+
*
|
|
125
|
+
* @example
|
|
126
|
+
* ```ts
|
|
127
|
+
* import { Static } from "getpatter/tunnels";
|
|
128
|
+
* const tunnel = new Static({ hostname: "agent.example.com" });
|
|
129
|
+
* ```
|
|
130
|
+
*/
|
|
131
|
+
declare class Static {
|
|
132
|
+
readonly kind: "static";
|
|
133
|
+
readonly hostname: string;
|
|
134
|
+
constructor(opts: {
|
|
135
|
+
hostname: string;
|
|
136
|
+
});
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
/**
|
|
140
|
+
* Public API primitives — `Tool` and `Guardrail` classes, plus the
|
|
141
|
+
* `tool()` / `guardrail()` factory functions.
|
|
142
|
+
*
|
|
143
|
+
* These mirror the Python SDK's `patter.Tool` / `patter.Guardrail`. The
|
|
144
|
+
* classes are structurally compatible with the existing `Guardrail`
|
|
145
|
+
* interface and `ToolDefinition` shape used internally, so code that
|
|
146
|
+
* consumed either form keeps working.
|
|
147
|
+
*/
|
|
148
|
+
|
|
149
|
+
interface GuardrailOptions {
|
|
150
|
+
/** Name for logging when triggered. */
|
|
151
|
+
name: string;
|
|
152
|
+
/** List of terms that trigger the guardrail (case-insensitive). */
|
|
153
|
+
blockedTerms?: string[];
|
|
154
|
+
/** Custom check function — return true to block the response. */
|
|
155
|
+
check?: (text: string) => boolean;
|
|
156
|
+
/** Replacement text spoken when guardrail triggers. */
|
|
157
|
+
replacement?: string;
|
|
158
|
+
}
|
|
159
|
+
/**
|
|
160
|
+
* Guardrail definition. Structurally matches the internal `Guardrail`
|
|
161
|
+
* interface so existing code consuming plain objects keeps working.
|
|
162
|
+
*
|
|
163
|
+
* @example
|
|
164
|
+
* ```ts
|
|
165
|
+
* import { Guardrail } from "getpatter";
|
|
166
|
+
* const rail = new Guardrail({ name: "profanity", blockedTerms: ["badword"] });
|
|
167
|
+
* ```
|
|
168
|
+
*/
|
|
169
|
+
declare class Guardrail$1 {
|
|
170
|
+
readonly name: string;
|
|
171
|
+
readonly blockedTerms?: string[];
|
|
172
|
+
readonly check?: (text: string) => boolean;
|
|
173
|
+
readonly replacement: string;
|
|
174
|
+
constructor(opts: GuardrailOptions);
|
|
175
|
+
}
|
|
176
|
+
/** Factory helper mirroring Python's `guardrail(...)` function. */
|
|
177
|
+
declare function guardrail(opts: GuardrailOptions): Guardrail$1;
|
|
178
|
+
type ToolHandler = (args: Record<string, unknown>, context: Record<string, unknown>) => Promise<string>;
|
|
179
|
+
interface ToolOptions {
|
|
180
|
+
/** Tool name (visible to the LLM). */
|
|
181
|
+
name: string;
|
|
182
|
+
/** What the tool does (visible to the LLM). */
|
|
183
|
+
description?: string;
|
|
184
|
+
/** JSON Schema for tool arguments. */
|
|
185
|
+
parameters?: Record<string, unknown>;
|
|
186
|
+
/** Async function called in-process when the LLM invokes the tool. */
|
|
187
|
+
handler?: ToolHandler;
|
|
188
|
+
/** URL to POST to when the LLM invokes the tool. */
|
|
189
|
+
webhookUrl?: string;
|
|
190
|
+
}
|
|
191
|
+
/**
|
|
192
|
+
* Tool definition. Structurally matches `ToolDefinition` so it drops
|
|
193
|
+
* directly into `agent({ tools: [...] })`.
|
|
194
|
+
*
|
|
195
|
+
* Exactly one of `handler` or `webhookUrl` must be provided.
|
|
196
|
+
*
|
|
197
|
+
* @example
|
|
198
|
+
* ```ts
|
|
199
|
+
* import { Tool } from "getpatter";
|
|
200
|
+
* const t = new Tool({
|
|
201
|
+
* name: "check_menu",
|
|
202
|
+
* description: "Check available menu items",
|
|
203
|
+
* handler: async () => JSON.stringify({ items: ["margherita"] }),
|
|
204
|
+
* });
|
|
205
|
+
* ```
|
|
206
|
+
*/
|
|
207
|
+
declare class Tool implements ToolDefinition {
|
|
208
|
+
readonly name: string;
|
|
209
|
+
readonly description: string;
|
|
210
|
+
readonly parameters: Record<string, unknown>;
|
|
211
|
+
readonly handler?: ToolHandler;
|
|
212
|
+
readonly webhookUrl?: string;
|
|
213
|
+
constructor(opts: ToolOptions);
|
|
214
|
+
}
|
|
215
|
+
/** Factory helper mirroring Python's `tool(...)` function. */
|
|
216
|
+
declare function tool(opts: ToolOptions): Tool;
|
|
217
|
+
|
|
218
|
+
/**
|
|
219
|
+
* Shared STT / TTS adapter dispatch.
|
|
220
|
+
*
|
|
221
|
+
* In v0.5.0+ callers always pass pre-instantiated adapters (``agent.stt`` /
|
|
222
|
+
* ``agent.tts`` are ``STTAdapter`` / ``TTSAdapter`` instances), so these
|
|
223
|
+
* helpers are thin pass-throughs that return the instance or null. Kept as
|
|
224
|
+
* functions so the Twilio/Telnyx bridges have a single dispatch point.
|
|
225
|
+
*/
|
|
226
|
+
|
|
227
|
+
interface STTTranscript {
|
|
228
|
+
text: string;
|
|
229
|
+
isFinal?: boolean;
|
|
230
|
+
}
|
|
231
|
+
type STTTranscriptCallback = (t: STTTranscript) => Promise<void> | void;
|
|
232
|
+
/** Shape shared by every STT adapter in the SDK. */
|
|
233
|
+
interface STTAdapter {
|
|
234
|
+
connect(): Promise<void>;
|
|
235
|
+
sendAudio(pcm: Buffer): void | Promise<void>;
|
|
236
|
+
onTranscript(cb: STTTranscriptCallback): void;
|
|
237
|
+
close(): void | Promise<void>;
|
|
238
|
+
}
|
|
239
|
+
interface TTSAdapter {
|
|
240
|
+
synthesizeStream(text: string): AsyncIterable<Buffer>;
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
/**
|
|
244
|
+
* Built-in LLM loop for pipeline mode when no onMessage handler is provided.
|
|
245
|
+
*
|
|
246
|
+
* Uses a pluggable ``LLMProvider`` interface so callers can supply OpenAI,
|
|
247
|
+
* Anthropic, Gemini, or any custom provider. The default provider is
|
|
248
|
+
* ``OpenAILLMProvider`` which preserves full backward compatibility.
|
|
249
|
+
*/
|
|
250
|
+
|
|
251
|
+
/** A single streaming chunk yielded by an LLM provider. */
|
|
252
|
+
interface LLMChunk {
|
|
253
|
+
type: 'text' | 'tool_call' | 'done';
|
|
254
|
+
content?: string;
|
|
255
|
+
index?: number;
|
|
256
|
+
id?: string;
|
|
257
|
+
name?: string;
|
|
258
|
+
arguments?: string;
|
|
259
|
+
}
|
|
260
|
+
/**
|
|
261
|
+
* Interface that any LLM provider must satisfy.
|
|
262
|
+
*
|
|
263
|
+
* Implementors yield streaming ``LLMChunk`` objects:
|
|
264
|
+
* - ``{ type: "text", content: "..." }`` — a text token.
|
|
265
|
+
* - ``{ type: "tool_call", index, id?, name?, arguments? }`` — a (partial) tool
|
|
266
|
+
* invocation. Chunks with the same ``index`` are concatenated.
|
|
267
|
+
* - ``{ type: "done" }`` — signals the end of the stream (optional).
|
|
268
|
+
*/
|
|
269
|
+
interface LLMProvider {
|
|
270
|
+
stream(messages: Array<Record<string, unknown>>, tools?: Array<Record<string, unknown>> | null): AsyncGenerator<LLMChunk, void, unknown>;
|
|
271
|
+
}
|
|
272
|
+
/** LLM provider backed by OpenAI Chat Completions (streaming). */
|
|
273
|
+
declare class OpenAILLMProvider implements LLMProvider {
|
|
274
|
+
private readonly apiKey;
|
|
275
|
+
private readonly model;
|
|
276
|
+
constructor(apiKey: string, model: string);
|
|
277
|
+
stream(messages: Array<Record<string, unknown>>, tools?: Array<Record<string, unknown>> | null): AsyncGenerator<LLMChunk, void, unknown>;
|
|
278
|
+
}
|
|
279
|
+
declare class LLMLoop {
|
|
280
|
+
private readonly provider;
|
|
281
|
+
private readonly systemPrompt;
|
|
282
|
+
private readonly tools;
|
|
283
|
+
private readonly openaiTools;
|
|
284
|
+
private readonly toolMap;
|
|
285
|
+
constructor(apiKey: string, model: string, systemPrompt: string, tools?: ToolDefinition[] | null, llmProvider?: LLMProvider);
|
|
286
|
+
/**
|
|
287
|
+
* Stream LLM response tokens, handling tool calls automatically.
|
|
288
|
+
* Yields text tokens as they arrive from the LLM.
|
|
289
|
+
*/
|
|
290
|
+
run(userText: string, history: Array<{
|
|
291
|
+
role: string;
|
|
292
|
+
text: string;
|
|
293
|
+
}>, callContext: Record<string, unknown>): AsyncGenerator<string, void, unknown>;
|
|
294
|
+
private executeTool;
|
|
295
|
+
private buildMessages;
|
|
296
|
+
}
|
|
297
|
+
|
|
4
298
|
interface IncomingMessage {
|
|
5
299
|
readonly text: string;
|
|
6
300
|
readonly callId: string;
|
|
@@ -108,32 +402,34 @@ interface Call {
|
|
|
108
402
|
}
|
|
109
403
|
interface LocalOptions {
|
|
110
404
|
/**
|
|
111
|
-
*
|
|
112
|
-
* ``
|
|
113
|
-
* ``Patter(twilio_sid=...)`` as local mode by default).
|
|
405
|
+
* Local mode is auto-detected when a ``carrier`` is passed. Pass
|
|
406
|
+
* ``mode: 'local'`` to force local mode explicitly.
|
|
114
407
|
*/
|
|
115
408
|
mode?: 'local';
|
|
116
|
-
twilioSid?: string;
|
|
117
|
-
twilioToken?: string;
|
|
118
|
-
openaiKey?: string;
|
|
119
|
-
phoneNumber: string;
|
|
120
|
-
webhookUrl?: string;
|
|
121
|
-
telephonyProvider?: 'twilio' | 'telnyx';
|
|
122
|
-
telnyxKey?: string;
|
|
123
|
-
telnyxConnectionId?: string;
|
|
124
409
|
/**
|
|
125
|
-
*
|
|
126
|
-
*
|
|
410
|
+
* Telephony carrier instance. Required for local mode.
|
|
411
|
+
*
|
|
412
|
+
* @example
|
|
413
|
+
* ```ts
|
|
414
|
+
* import { Patter, Twilio } from "getpatter";
|
|
415
|
+
* const phone = new Patter({ carrier: new Twilio(), phoneNumber: "+1..." });
|
|
416
|
+
* ```
|
|
127
417
|
*/
|
|
128
|
-
|
|
418
|
+
carrier: Carrier$1 | Carrier;
|
|
419
|
+
/**
|
|
420
|
+
* Tunnel configuration. Accepts a tunnel instance, ``true`` (alias for
|
|
421
|
+
* ``new CloudflareTunnel()``), or ``false`` / omitted (no tunnel).
|
|
422
|
+
*/
|
|
423
|
+
tunnel?: CloudflareTunnel | Static | boolean;
|
|
424
|
+
phoneNumber: string;
|
|
425
|
+
webhookUrl?: string;
|
|
129
426
|
/**
|
|
130
|
-
*
|
|
131
|
-
* ``
|
|
132
|
-
*
|
|
427
|
+
* @internal — allows ``StreamHandler`` to build the default OpenAI
|
|
428
|
+
* ``LLMLoop`` when no ``onMessage`` handler is supplied. The
|
|
429
|
+
* ``OpenAIRealtime`` engine instance carries its own key when one is
|
|
430
|
+
* used via ``phone.agent({ engine: new OpenAIRealtime({ apiKey }) })``.
|
|
133
431
|
*/
|
|
134
|
-
|
|
135
|
-
/** Provider-level ElevenLabs API key (same semantics as ``deepgramKey``). */
|
|
136
|
-
elevenlabsKey?: string;
|
|
432
|
+
openaiKey?: string;
|
|
137
433
|
}
|
|
138
434
|
interface Guardrail {
|
|
139
435
|
/** Name for logging when triggered */
|
|
@@ -189,23 +485,47 @@ interface BackgroundAudioPlayer$1 {
|
|
|
189
485
|
}
|
|
190
486
|
interface AgentOptions {
|
|
191
487
|
systemPrompt: string;
|
|
488
|
+
/**
|
|
489
|
+
* Voice preset. When ``engine`` is provided, its ``voice`` is used unless
|
|
490
|
+
* explicitly overridden here.
|
|
491
|
+
*/
|
|
192
492
|
voice?: string;
|
|
493
|
+
/**
|
|
494
|
+
* LLM / Realtime model. When ``engine`` is provided, its ``model`` is used
|
|
495
|
+
* unless explicitly overridden here.
|
|
496
|
+
*/
|
|
193
497
|
model?: string;
|
|
194
498
|
language?: string;
|
|
195
499
|
firstMessage?: string;
|
|
196
|
-
|
|
500
|
+
/** Tool definitions — ``Tool`` class instances from ``getpatter``. */
|
|
501
|
+
tools?: Array<Tool>;
|
|
502
|
+
/**
|
|
503
|
+
* Realtime / ConvAI engine instance. When present, the agent runs in the
|
|
504
|
+
* matching mode (``openai_realtime`` or ``elevenlabs_convai``). When absent,
|
|
505
|
+
* pipeline mode is selected if ``stt`` and ``tts`` are provided.
|
|
506
|
+
*/
|
|
507
|
+
engine?: Realtime | ConvAI;
|
|
508
|
+
/**
|
|
509
|
+
* Provider mode. Normally derived from ``engine`` / ``stt`` + ``tts``. Pass
|
|
510
|
+
* ``'pipeline'`` explicitly when building a pipeline-mode agent without
|
|
511
|
+
* an engine instance.
|
|
512
|
+
*/
|
|
197
513
|
provider?: 'openai_realtime' | 'elevenlabs_convai' | 'pipeline';
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
514
|
+
/** Pre-instantiated STT adapter (e.g. ``new DeepgramSTT({ apiKey })``). */
|
|
515
|
+
stt?: STTAdapter;
|
|
516
|
+
/** Pre-instantiated TTS adapter (e.g. ``new ElevenLabsTTS({ apiKey })``). */
|
|
517
|
+
tts?: TTSAdapter;
|
|
518
|
+
/**
|
|
519
|
+
* Pipeline-mode LLM provider (e.g. ``new AnthropicLLM()``). When set, the
|
|
520
|
+
* built-in LLM loop uses this provider instead of the OpenAI default.
|
|
521
|
+
* Mutually exclusive with ``onMessage`` passed to ``serve()``. Ignored
|
|
522
|
+
* when ``engine`` is set (realtime mode bypasses the pipeline LLM).
|
|
523
|
+
*/
|
|
524
|
+
llm?: LLMProvider;
|
|
205
525
|
/** Dynamic variables for ``{placeholder}`` substitution in systemPrompt at call time. */
|
|
206
526
|
variables?: Record<string, string>;
|
|
207
|
-
/** Output guardrails —
|
|
208
|
-
guardrails?: Guardrail
|
|
527
|
+
/** Output guardrails — ``Guardrail`` class instances from ``getpatter``. */
|
|
528
|
+
guardrails?: Array<Guardrail>;
|
|
209
529
|
/** Pipeline hooks — intercept and transform data at each pipeline stage (pipeline mode only). */
|
|
210
530
|
hooks?: PipelineHooks;
|
|
211
531
|
/** Text transforms applied to LLM output before TTS (pipeline mode only).
|
|
@@ -272,52 +592,13 @@ interface LocalCallOptions {
|
|
|
272
592
|
ringTimeout?: number;
|
|
273
593
|
}
|
|
274
594
|
|
|
275
|
-
/**
|
|
276
|
-
* Deepgram STT config. Tune latency via ``endpointingMs`` / ``utteranceEndMs``
|
|
277
|
-
* — mirrors Python's ``Patter.deepgram(endpointing_ms=..., utterance_end_ms=...)``.
|
|
278
|
-
*/
|
|
279
|
-
declare function deepgram(opts: {
|
|
280
|
-
apiKey: string;
|
|
281
|
-
language?: string;
|
|
282
|
-
model?: string;
|
|
283
|
-
endpointingMs?: number;
|
|
284
|
-
utteranceEndMs?: number | null;
|
|
285
|
-
smartFormat?: boolean;
|
|
286
|
-
interimResults?: boolean;
|
|
287
|
-
vadEvents?: boolean;
|
|
288
|
-
}): STTConfig;
|
|
289
|
-
declare function whisper(opts: {
|
|
290
|
-
apiKey: string;
|
|
291
|
-
language?: string;
|
|
292
|
-
}): STTConfig;
|
|
293
|
-
declare function elevenlabs(opts: {
|
|
294
|
-
apiKey: string;
|
|
295
|
-
voice?: string;
|
|
296
|
-
}): TTSConfig;
|
|
297
|
-
declare function openaiTts(opts: {
|
|
298
|
-
apiKey: string;
|
|
299
|
-
voice?: string;
|
|
300
|
-
}): TTSConfig;
|
|
301
|
-
declare function cartesia(opts: {
|
|
302
|
-
apiKey: string;
|
|
303
|
-
voice?: string;
|
|
304
|
-
}): TTSConfig;
|
|
305
|
-
declare function rime(opts: {
|
|
306
|
-
apiKey: string;
|
|
307
|
-
voice?: string;
|
|
308
|
-
}): TTSConfig;
|
|
309
|
-
declare function lmnt(opts: {
|
|
310
|
-
apiKey: string;
|
|
311
|
-
voice?: string;
|
|
312
|
-
}): TTSConfig;
|
|
313
|
-
|
|
314
595
|
declare class Patter {
|
|
315
596
|
readonly apiKey: string;
|
|
316
597
|
private readonly backendUrl;
|
|
317
598
|
private readonly restUrl;
|
|
318
599
|
private readonly connection;
|
|
319
600
|
private readonly mode;
|
|
320
|
-
private
|
|
601
|
+
private localConfig;
|
|
321
602
|
private embeddedServer;
|
|
322
603
|
private tunnelHandle;
|
|
323
604
|
constructor(options: PatterOptions | LocalOptions);
|
|
@@ -335,51 +616,6 @@ declare class Patter {
|
|
|
335
616
|
}): Promise<PhoneNumber>;
|
|
336
617
|
assignAgent(numberId: string, agentId: string): Promise<void>;
|
|
337
618
|
listCalls(limit?: number): Promise<Call[]>;
|
|
338
|
-
static deepgram: typeof deepgram;
|
|
339
|
-
static whisper: typeof whisper;
|
|
340
|
-
static elevenlabs: typeof elevenlabs;
|
|
341
|
-
static openaiTts: typeof openaiTts;
|
|
342
|
-
static cartesia: typeof cartesia;
|
|
343
|
-
static rime: typeof rime;
|
|
344
|
-
static lmnt: typeof lmnt;
|
|
345
|
-
static guardrail(opts: {
|
|
346
|
-
name: string;
|
|
347
|
-
blockedTerms?: string[];
|
|
348
|
-
check?: (text: string) => boolean;
|
|
349
|
-
replacement?: string;
|
|
350
|
-
}): Guardrail;
|
|
351
|
-
/**
|
|
352
|
-
* Create a tool definition for use with `agent({ tools: [...] })`.
|
|
353
|
-
*
|
|
354
|
-
* Either `handler` (a function) or `webhookUrl` must be provided.
|
|
355
|
-
*
|
|
356
|
-
* @param opts.name - Tool name (visible to the LLM).
|
|
357
|
-
* @param opts.description - What the tool does (visible to the LLM).
|
|
358
|
-
* @param opts.parameters - JSON Schema for tool arguments.
|
|
359
|
-
* @param opts.handler - Async function called in-process when the LLM invokes the tool.
|
|
360
|
-
* @param opts.webhookUrl - URL to POST to when the LLM invokes the tool.
|
|
361
|
-
*
|
|
362
|
-
* @example
|
|
363
|
-
* ```ts
|
|
364
|
-
* phone.agent({
|
|
365
|
-
* systemPrompt: 'You are a pizza bot.',
|
|
366
|
-
* tools: [
|
|
367
|
-
* Patter.tool({
|
|
368
|
-
* name: 'check_menu',
|
|
369
|
-
* description: 'Check available menu items',
|
|
370
|
-
* handler: async (args) => JSON.stringify({ items: ['margherita'] }),
|
|
371
|
-
* }),
|
|
372
|
-
* ],
|
|
373
|
-
* });
|
|
374
|
-
* ```
|
|
375
|
-
*/
|
|
376
|
-
static tool(opts: {
|
|
377
|
-
name: string;
|
|
378
|
-
description?: string;
|
|
379
|
-
parameters?: Record<string, unknown>;
|
|
380
|
-
handler?: (args: Record<string, unknown>, context: Record<string, unknown>) => Promise<string>;
|
|
381
|
-
webhookUrl?: string;
|
|
382
|
-
}): ToolDefinition;
|
|
383
619
|
private registerNumber;
|
|
384
620
|
}
|
|
385
621
|
|
|
@@ -552,6 +788,34 @@ declare class ProvisionError extends PatterError {
|
|
|
552
788
|
constructor(message: string);
|
|
553
789
|
}
|
|
554
790
|
|
|
791
|
+
/**
|
|
792
|
+
* Deepgram STT config builder. Tune latency via ``endpointingMs`` /
|
|
793
|
+
* ``utteranceEndMs``. Internal only — public code should use ``DeepgramSTT``
|
|
794
|
+
* from ``getpatter/stt/deepgram``.
|
|
795
|
+
*/
|
|
796
|
+
declare function deepgram(opts: {
|
|
797
|
+
apiKey: string;
|
|
798
|
+
language?: string;
|
|
799
|
+
model?: string;
|
|
800
|
+
endpointingMs?: number;
|
|
801
|
+
utteranceEndMs?: number | null;
|
|
802
|
+
smartFormat?: boolean;
|
|
803
|
+
interimResults?: boolean;
|
|
804
|
+
vadEvents?: boolean;
|
|
805
|
+
}): STTConfig;
|
|
806
|
+
declare function whisper(opts: {
|
|
807
|
+
apiKey: string;
|
|
808
|
+
language?: string;
|
|
809
|
+
}): STTConfig;
|
|
810
|
+
declare function elevenlabs(opts: {
|
|
811
|
+
apiKey: string;
|
|
812
|
+
voice?: string;
|
|
813
|
+
}): TTSConfig;
|
|
814
|
+
declare function openaiTts(opts: {
|
|
815
|
+
apiKey: string;
|
|
816
|
+
voice?: string;
|
|
817
|
+
}): TTSConfig;
|
|
818
|
+
|
|
555
819
|
/**
|
|
556
820
|
* Default provider pricing and merge utilities.
|
|
557
821
|
*
|
|
@@ -753,109 +1017,6 @@ declare class ElevenLabsConvAIAdapter {
|
|
|
753
1017
|
close(): void;
|
|
754
1018
|
}
|
|
755
1019
|
|
|
756
|
-
interface Transcript$4 {
|
|
757
|
-
readonly text: string;
|
|
758
|
-
readonly isFinal: boolean;
|
|
759
|
-
readonly confidence: number;
|
|
760
|
-
}
|
|
761
|
-
type TranscriptCallback$4 = (transcript: Transcript$4) => void;
|
|
762
|
-
/**
|
|
763
|
-
* Optional tuning knobs for Deepgram live transcription.
|
|
764
|
-
*
|
|
765
|
-
* Mirrors Python's ``DeepgramSTT`` kwargs so callers can lower turn latency
|
|
766
|
-
* without monkey-patching (BUG #13).
|
|
767
|
-
*/
|
|
768
|
-
interface DeepgramSTTOptions {
|
|
769
|
-
/** Model name. Default ``nova-3``. */
|
|
770
|
-
readonly model?: string;
|
|
771
|
-
/** Audio encoding (``linear16`` | ``mulaw`` | etc). Default ``linear16``. */
|
|
772
|
-
readonly encoding?: string;
|
|
773
|
-
/** Sample rate in Hz. Default ``16000``. */
|
|
774
|
-
readonly sampleRate?: number;
|
|
775
|
-
/**
|
|
776
|
-
* Voice-activity endpointing threshold in milliseconds.
|
|
777
|
-
* Lower values reduce turn latency at the cost of more false-start cuts.
|
|
778
|
-
* Default ``150``.
|
|
779
|
-
*/
|
|
780
|
-
readonly endpointingMs?: number;
|
|
781
|
-
/**
|
|
782
|
-
* End-of-utterance silence window in milliseconds. Deepgram enforces a
|
|
783
|
-
* hard minimum of 1000 ms. Set to ``null`` to disable. Default ``1000``.
|
|
784
|
-
*/
|
|
785
|
-
readonly utteranceEndMs?: number | null;
|
|
786
|
-
/** Enable smart formatting (punctuation + numerals). Default ``true``. */
|
|
787
|
-
readonly smartFormat?: boolean;
|
|
788
|
-
/** Emit interim (non-final) transcripts. Default ``true``. */
|
|
789
|
-
readonly interimResults?: boolean;
|
|
790
|
-
/** Emit VAD events (``SpeechStarted`` / ``UtteranceEnd``). Default ``true``. */
|
|
791
|
-
readonly vadEvents?: boolean;
|
|
792
|
-
}
|
|
793
|
-
declare class DeepgramSTT {
|
|
794
|
-
private ws;
|
|
795
|
-
private callbacks;
|
|
796
|
-
/** Request ID from Deepgram — used to query actual cost post-call. */
|
|
797
|
-
requestId: string;
|
|
798
|
-
private readonly apiKey;
|
|
799
|
-
private readonly language;
|
|
800
|
-
private readonly model;
|
|
801
|
-
private readonly encoding;
|
|
802
|
-
private readonly sampleRate;
|
|
803
|
-
private readonly endpointingMs;
|
|
804
|
-
private readonly utteranceEndMs;
|
|
805
|
-
private readonly smartFormat;
|
|
806
|
-
private readonly interimResults;
|
|
807
|
-
private readonly vadEvents;
|
|
808
|
-
/**
|
|
809
|
-
* New ergonomic constructor accepting an options object (mirrors Python kwargs).
|
|
810
|
-
*
|
|
811
|
-
* Also accepts the legacy positional form
|
|
812
|
-
* ``(apiKey, language?, model?, encoding?, sampleRate?)`` for backward
|
|
813
|
-
* compatibility with code that predated BUG #13.
|
|
814
|
-
*/
|
|
815
|
-
constructor(apiKey: string, language?: string, model?: string, encoding?: string, sampleRate?: number, options?: DeepgramSTTOptions);
|
|
816
|
-
constructor(apiKey: string, options: DeepgramSTTOptions & {
|
|
817
|
-
language?: string;
|
|
818
|
-
});
|
|
819
|
-
/** Factory for Twilio calls — mulaw 8 kHz. Forwards tuning options through. */
|
|
820
|
-
static forTwilio(apiKey: string, language?: string, model?: string, options?: DeepgramSTTOptions): DeepgramSTT;
|
|
821
|
-
connect(): Promise<void>;
|
|
822
|
-
sendAudio(audio: Buffer): void;
|
|
823
|
-
onTranscript(callback: TranscriptCallback$4): void;
|
|
824
|
-
close(): void;
|
|
825
|
-
}
|
|
826
|
-
|
|
827
|
-
/**
|
|
828
|
-
* OpenAI Whisper STT adapter for the Patter SDK pipeline mode.
|
|
829
|
-
*
|
|
830
|
-
* Buffers incoming PCM16 audio and periodically sends it to the
|
|
831
|
-
* OpenAI Whisper transcription API as a WAV file.
|
|
832
|
-
*/
|
|
833
|
-
interface Transcript$3 {
|
|
834
|
-
readonly text: string;
|
|
835
|
-
readonly isFinal: boolean;
|
|
836
|
-
readonly confidence: number;
|
|
837
|
-
}
|
|
838
|
-
type TranscriptCallback$3 = (transcript: Transcript$3) => void;
|
|
839
|
-
declare class WhisperSTT {
|
|
840
|
-
private readonly apiKey;
|
|
841
|
-
private readonly model;
|
|
842
|
-
private readonly language;
|
|
843
|
-
private readonly bufferSize;
|
|
844
|
-
private buffer;
|
|
845
|
-
private callbacks;
|
|
846
|
-
private running;
|
|
847
|
-
private pendingTranscriptions;
|
|
848
|
-
constructor(apiKey: string, model?: string, language?: string, bufferSize?: number);
|
|
849
|
-
/** Factory for Twilio calls — mulaw 8 kHz is transcoded upstream, so we still receive PCM 16-bit. */
|
|
850
|
-
static forTwilio(apiKey: string, language?: string, model?: string): WhisperSTT;
|
|
851
|
-
connect(): Promise<void>;
|
|
852
|
-
sendAudio(audio: Buffer): void;
|
|
853
|
-
private trackTranscription;
|
|
854
|
-
onTranscript(callback: TranscriptCallback$3): void;
|
|
855
|
-
close(): Promise<void>;
|
|
856
|
-
private transcribeBuffer;
|
|
857
|
-
}
|
|
858
|
-
|
|
859
1020
|
/**
|
|
860
1021
|
* In-memory metrics store for the local dashboard.
|
|
861
1022
|
*
|
|
@@ -1052,61 +1213,6 @@ declare function mountApi(app: Express, store: MetricsStore, token?: string): vo
|
|
|
1052
1213
|
*/
|
|
1053
1214
|
declare function notifyDashboard(callData: Record<string, unknown>, port?: number): void;
|
|
1054
1215
|
|
|
1055
|
-
/**
|
|
1056
|
-
* Built-in LLM loop for pipeline mode when no onMessage handler is provided.
|
|
1057
|
-
*
|
|
1058
|
-
* Uses a pluggable ``LLMProvider`` interface so callers can supply OpenAI,
|
|
1059
|
-
* Anthropic, Gemini, or any custom provider. The default provider is
|
|
1060
|
-
* ``OpenAILLMProvider`` which preserves full backward compatibility.
|
|
1061
|
-
*/
|
|
1062
|
-
|
|
1063
|
-
/** A single streaming chunk yielded by an LLM provider. */
|
|
1064
|
-
interface LLMChunk {
|
|
1065
|
-
type: 'text' | 'tool_call' | 'done';
|
|
1066
|
-
content?: string;
|
|
1067
|
-
index?: number;
|
|
1068
|
-
id?: string;
|
|
1069
|
-
name?: string;
|
|
1070
|
-
arguments?: string;
|
|
1071
|
-
}
|
|
1072
|
-
/**
|
|
1073
|
-
* Interface that any LLM provider must satisfy.
|
|
1074
|
-
*
|
|
1075
|
-
* Implementors yield streaming ``LLMChunk`` objects:
|
|
1076
|
-
* - ``{ type: "text", content: "..." }`` — a text token.
|
|
1077
|
-
* - ``{ type: "tool_call", index, id?, name?, arguments? }`` — a (partial) tool
|
|
1078
|
-
* invocation. Chunks with the same ``index`` are concatenated.
|
|
1079
|
-
* - ``{ type: "done" }`` — signals the end of the stream (optional).
|
|
1080
|
-
*/
|
|
1081
|
-
interface LLMProvider {
|
|
1082
|
-
stream(messages: Array<Record<string, unknown>>, tools?: Array<Record<string, unknown>> | null): AsyncGenerator<LLMChunk, void, unknown>;
|
|
1083
|
-
}
|
|
1084
|
-
/** LLM provider backed by OpenAI Chat Completions (streaming). */
|
|
1085
|
-
declare class OpenAILLMProvider implements LLMProvider {
|
|
1086
|
-
private readonly apiKey;
|
|
1087
|
-
private readonly model;
|
|
1088
|
-
constructor(apiKey: string, model: string);
|
|
1089
|
-
stream(messages: Array<Record<string, unknown>>, tools?: Array<Record<string, unknown>> | null): AsyncGenerator<LLMChunk, void, unknown>;
|
|
1090
|
-
}
|
|
1091
|
-
declare class LLMLoop {
|
|
1092
|
-
private readonly provider;
|
|
1093
|
-
private readonly systemPrompt;
|
|
1094
|
-
private readonly tools;
|
|
1095
|
-
private readonly openaiTools;
|
|
1096
|
-
private readonly toolMap;
|
|
1097
|
-
constructor(apiKey: string, model: string, systemPrompt: string, tools?: ToolDefinition[] | null, llmProvider?: LLMProvider);
|
|
1098
|
-
/**
|
|
1099
|
-
* Stream LLM response tokens, handling tool calls automatically.
|
|
1100
|
-
* Yields text tokens as they arrive from the LLM.
|
|
1101
|
-
*/
|
|
1102
|
-
run(userText: string, history: Array<{
|
|
1103
|
-
role: string;
|
|
1104
|
-
text: string;
|
|
1105
|
-
}>, callContext: Record<string, unknown>): AsyncGenerator<string, void, unknown>;
|
|
1106
|
-
private executeTool;
|
|
1107
|
-
private buildMessages;
|
|
1108
|
-
}
|
|
1109
|
-
|
|
1110
1216
|
/**
|
|
1111
1217
|
* Fallback LLM provider that tries multiple providers in sequence.
|
|
1112
1218
|
*
|
|
@@ -1358,13 +1464,13 @@ declare function scheduleInterval(intervalOrOpts: number | {
|
|
|
1358
1464
|
* `speechmatics` extra; TypeScript users need to wait for an official
|
|
1359
1465
|
* upstream SDK before this adapter can land without a WS-handshake reimpl.
|
|
1360
1466
|
*/
|
|
1361
|
-
interface Transcript$
|
|
1467
|
+
interface Transcript$4 {
|
|
1362
1468
|
readonly text: string;
|
|
1363
1469
|
readonly isFinal: boolean;
|
|
1364
1470
|
readonly confidence: number;
|
|
1365
1471
|
}
|
|
1366
|
-
type TranscriptCallback$
|
|
1367
|
-
interface SonioxSTTOptions {
|
|
1472
|
+
type TranscriptCallback$4 = (transcript: Transcript$4) => void;
|
|
1473
|
+
interface SonioxSTTOptions$1 {
|
|
1368
1474
|
model?: string;
|
|
1369
1475
|
languageHints?: string[];
|
|
1370
1476
|
languageHintsStrict?: boolean;
|
|
@@ -1392,7 +1498,7 @@ declare class SonioxSTT {
|
|
|
1392
1498
|
private readonly maxEndpointDelayMs;
|
|
1393
1499
|
private readonly clientReferenceId?;
|
|
1394
1500
|
private readonly baseUrl;
|
|
1395
|
-
constructor(apiKey: string, options?: SonioxSTTOptions);
|
|
1501
|
+
constructor(apiKey: string, options?: SonioxSTTOptions$1);
|
|
1396
1502
|
/** Factory for Twilio-style 8 kHz linear PCM. */
|
|
1397
1503
|
static forTwilio(apiKey: string, languageHints?: string[]): SonioxSTT;
|
|
1398
1504
|
private buildConfig;
|
|
@@ -1401,7 +1507,7 @@ declare class SonioxSTT {
|
|
|
1401
1507
|
private handleMessage;
|
|
1402
1508
|
private emit;
|
|
1403
1509
|
sendAudio(audio: Buffer): void;
|
|
1404
|
-
onTranscript(callback: TranscriptCallback$
|
|
1510
|
+
onTranscript(callback: TranscriptCallback$4): void;
|
|
1405
1511
|
close(): void;
|
|
1406
1512
|
}
|
|
1407
1513
|
|
|
@@ -1416,15 +1522,15 @@ declare class SonioxSTT {
|
|
|
1416
1522
|
* Source: livekit-plugins/livekit-plugins-assemblyai/livekit/plugins/assemblyai/stt.py
|
|
1417
1523
|
* Upstream ref SHA: 78a66bcf79c5cea82989401c408f1dff4b961a5b
|
|
1418
1524
|
*/
|
|
1419
|
-
interface Transcript$
|
|
1525
|
+
interface Transcript$3 {
|
|
1420
1526
|
readonly text: string;
|
|
1421
1527
|
readonly isFinal: boolean;
|
|
1422
1528
|
readonly confidence: number;
|
|
1423
1529
|
}
|
|
1424
|
-
type TranscriptCallback$
|
|
1530
|
+
type TranscriptCallback$3 = (transcript: Transcript$3) => void;
|
|
1425
1531
|
type AssemblyAIEncoding = 'pcm_s16le' | 'pcm_mulaw';
|
|
1426
1532
|
type AssemblyAIModel = 'universal-streaming-english' | 'universal-streaming-multilingual' | 'u3-rt-pro';
|
|
1427
|
-
interface AssemblyAISTTOptions {
|
|
1533
|
+
interface AssemblyAISTTOptions$1 {
|
|
1428
1534
|
/** One of the AssemblyAI speech models. */
|
|
1429
1535
|
readonly model?: AssemblyAIModel;
|
|
1430
1536
|
/** PCM encoding: 16-bit little-endian (default) or G.711 mu-law for telephony. */
|
|
@@ -1465,7 +1571,7 @@ declare class AssemblyAISTT {
|
|
|
1465
1571
|
sessionId: string;
|
|
1466
1572
|
/** Unix timestamp when the AssemblyAI session expires. */
|
|
1467
1573
|
expiresAt: number;
|
|
1468
|
-
constructor(apiKey: string, options?: AssemblyAISTTOptions);
|
|
1574
|
+
constructor(apiKey: string, options?: AssemblyAISTTOptions$1);
|
|
1469
1575
|
/** Factory for Twilio calls — mulaw 8 kHz. */
|
|
1470
1576
|
static forTwilio(apiKey: string, model?: AssemblyAIModel): AssemblyAISTT;
|
|
1471
1577
|
private buildUrl;
|
|
@@ -1473,7 +1579,7 @@ declare class AssemblyAISTT {
|
|
|
1473
1579
|
private handleEvent;
|
|
1474
1580
|
private emit;
|
|
1475
1581
|
sendAudio(audio: Buffer): void;
|
|
1476
|
-
onTranscript(callback: TranscriptCallback$
|
|
1582
|
+
onTranscript(callback: TranscriptCallback$3): void;
|
|
1477
1583
|
close(): void;
|
|
1478
1584
|
}
|
|
1479
1585
|
|
|
@@ -1488,15 +1594,15 @@ declare class AssemblyAISTT {
|
|
|
1488
1594
|
* Source: livekit-plugins/livekit-plugins-cartesia/livekit/plugins/cartesia/stt.py
|
|
1489
1595
|
* Upstream ref SHA: 78a66bcf79c5cea82989401c408f1dff4b961a5b
|
|
1490
1596
|
*/
|
|
1491
|
-
interface Transcript {
|
|
1597
|
+
interface Transcript$2 {
|
|
1492
1598
|
readonly text: string;
|
|
1493
1599
|
readonly isFinal: boolean;
|
|
1494
1600
|
readonly confidence: number;
|
|
1495
1601
|
}
|
|
1496
|
-
type TranscriptCallback = (transcript: Transcript) => void;
|
|
1602
|
+
type TranscriptCallback$2 = (transcript: Transcript$2) => void;
|
|
1497
1603
|
/** Cartesia STT currently only accepts 16-bit PCM little-endian. */
|
|
1498
1604
|
type CartesiaEncoding = 'pcm_s16le';
|
|
1499
|
-
interface CartesiaSTTOptions {
|
|
1605
|
+
interface CartesiaSTTOptions$1 {
|
|
1500
1606
|
/** Cartesia STT model. Currently only `"ink-whisper"`. */
|
|
1501
1607
|
readonly model?: string;
|
|
1502
1608
|
/** BCP-47 language code. */
|
|
@@ -1516,98 +1622,418 @@ declare class CartesiaSTT {
|
|
|
1516
1622
|
private keepaliveTimer;
|
|
1517
1623
|
/** Cartesia request id — set from the server transcript events. */
|
|
1518
1624
|
requestId: string;
|
|
1519
|
-
constructor(apiKey: string, options?: CartesiaSTTOptions);
|
|
1625
|
+
constructor(apiKey: string, options?: CartesiaSTTOptions$1);
|
|
1520
1626
|
private buildWsUrl;
|
|
1521
1627
|
connect(): Promise<void>;
|
|
1522
1628
|
private handleEvent;
|
|
1523
1629
|
private emit;
|
|
1524
1630
|
sendAudio(audio: Buffer): void;
|
|
1525
|
-
onTranscript(callback: TranscriptCallback): void;
|
|
1631
|
+
onTranscript(callback: TranscriptCallback$2): void;
|
|
1526
1632
|
close(): void;
|
|
1527
1633
|
}
|
|
1528
1634
|
|
|
1529
|
-
|
|
1530
|
-
|
|
1531
|
-
|
|
1532
|
-
|
|
1533
|
-
|
|
1534
|
-
constructor(apiKey: string, voiceId?: string, modelId?: string, outputFormat?: string);
|
|
1535
|
-
/**
|
|
1536
|
-
* Synthesise text to speech and return the full audio as a single Buffer.
|
|
1537
|
-
*
|
|
1538
|
-
* For large chunks (or when latency matters) call `synthesizeStream` instead.
|
|
1539
|
-
*/
|
|
1540
|
-
synthesize(text: string): Promise<Buffer>;
|
|
1541
|
-
/**
|
|
1542
|
-
* Synthesise text and yield audio chunks as they arrive (streaming).
|
|
1543
|
-
*
|
|
1544
|
-
* The yielded buffers are raw PCM at 16 kHz (or whatever `outputFormat` is
|
|
1545
|
-
* configured to).
|
|
1546
|
-
*/
|
|
1547
|
-
synthesizeStream(text: string): AsyncGenerator<Buffer>;
|
|
1548
|
-
}
|
|
1549
|
-
|
|
1550
|
-
declare class OpenAITTS {
|
|
1551
|
-
private readonly apiKey;
|
|
1552
|
-
private readonly voice;
|
|
1553
|
-
private readonly model;
|
|
1554
|
-
constructor(apiKey: string, voice?: string, model?: string);
|
|
1555
|
-
/**
|
|
1556
|
-
* Synthesise text to speech and return the full audio as a single Buffer.
|
|
1557
|
-
*
|
|
1558
|
-
* For large chunks (or when latency matters) call `synthesizeStream` instead.
|
|
1559
|
-
*/
|
|
1560
|
-
synthesize(text: string): Promise<Buffer>;
|
|
1561
|
-
/**
|
|
1562
|
-
* Synthesise text and yield audio chunks as they arrive (streaming).
|
|
1563
|
-
*
|
|
1564
|
-
* OpenAI returns 24 kHz PCM16; each chunk is resampled to 16 kHz before
|
|
1565
|
-
* yielding so the output is ready for telephony pipelines.
|
|
1566
|
-
*
|
|
1567
|
-
* The resampler carries state (buffered samples + odd trailing byte)
|
|
1568
|
-
* between chunks — without that state cross-chunk sample alignment drifts
|
|
1569
|
-
* and the caller hears pops / dropped audio (BUG #23, mirror of the
|
|
1570
|
-
* Python `audioop.ratecv` fix).
|
|
1571
|
-
*/
|
|
1572
|
-
synthesizeStream(text: string): AsyncGenerator<Buffer>;
|
|
1573
|
-
/**
|
|
1574
|
-
* Streaming 24 kHz → 16 kHz resampler (PCM16-LE). Maintains cross-chunk
|
|
1575
|
-
* state so the 3:2 pattern doesn't reset at every network read.
|
|
1576
|
-
*/
|
|
1577
|
-
static resampleStreaming(audio: Buffer, ctx: {
|
|
1578
|
-
carryByte: number | null;
|
|
1579
|
-
leftover: number[];
|
|
1580
|
-
}): Buffer;
|
|
1581
|
-
/** @deprecated use {@link resampleStreaming} with persistent state. */
|
|
1582
|
-
static resample24kTo16k(audio: Buffer): Buffer;
|
|
1583
|
-
}
|
|
1584
|
-
|
|
1585
|
-
interface CartesiaTTSOptions {
|
|
1586
|
-
model?: string;
|
|
1635
|
+
type LMNTAudioFormat = 'aac' | 'mp3' | 'mulaw' | 'raw' | 'wav';
|
|
1636
|
+
type LMNTModel = 'blizzard' | 'aurora';
|
|
1637
|
+
type LMNTSampleRate = 8000 | 16000 | 24000;
|
|
1638
|
+
interface LMNTTTSOptions$1 {
|
|
1639
|
+
model?: LMNTModel;
|
|
1587
1640
|
voice?: string;
|
|
1588
1641
|
language?: string;
|
|
1589
|
-
|
|
1590
|
-
|
|
1591
|
-
|
|
1592
|
-
|
|
1642
|
+
format?: LMNTAudioFormat;
|
|
1643
|
+
sampleRate?: LMNTSampleRate;
|
|
1644
|
+
temperature?: number;
|
|
1645
|
+
topP?: number;
|
|
1593
1646
|
baseUrl?: string;
|
|
1594
|
-
apiVersion?: string;
|
|
1595
1647
|
}
|
|
1596
|
-
declare class
|
|
1648
|
+
declare class LMNTTTS {
|
|
1597
1649
|
private readonly apiKey;
|
|
1598
1650
|
private readonly model;
|
|
1599
1651
|
private readonly voice;
|
|
1600
1652
|
private readonly language;
|
|
1653
|
+
private readonly format;
|
|
1601
1654
|
private readonly sampleRate;
|
|
1602
|
-
private readonly
|
|
1603
|
-
private readonly
|
|
1604
|
-
private readonly volume?;
|
|
1655
|
+
private readonly temperature;
|
|
1656
|
+
private readonly topP;
|
|
1605
1657
|
private readonly baseUrl;
|
|
1606
|
-
|
|
1607
|
-
constructor(apiKey: string, opts?: CartesiaTTSOptions);
|
|
1608
|
-
/** Build the JSON payload for the Cartesia bytes endpoint. */
|
|
1658
|
+
constructor(apiKey: string, opts?: LMNTTTSOptions$1);
|
|
1609
1659
|
private buildPayload;
|
|
1610
|
-
|
|
1660
|
+
synthesize(text: string): Promise<Buffer>;
|
|
1661
|
+
/** Yield audio chunks as they arrive — raw PCM_S16LE by default. */
|
|
1662
|
+
synthesizeStream(text: string): AsyncGenerator<Buffer>;
|
|
1663
|
+
}
|
|
1664
|
+
|
|
1665
|
+
interface Transcript$1 {
|
|
1666
|
+
readonly text: string;
|
|
1667
|
+
readonly isFinal: boolean;
|
|
1668
|
+
readonly confidence: number;
|
|
1669
|
+
}
|
|
1670
|
+
type TranscriptCallback$1 = (transcript: Transcript$1) => void;
|
|
1671
|
+
/**
|
|
1672
|
+
* Optional tuning knobs for Deepgram live transcription.
|
|
1673
|
+
*
|
|
1674
|
+
* Mirrors Python's ``DeepgramSTT`` kwargs so callers can lower turn latency
|
|
1675
|
+
* without monkey-patching (BUG #13).
|
|
1676
|
+
*/
|
|
1677
|
+
interface DeepgramSTTOptions$1 {
|
|
1678
|
+
/** Model name. Default ``nova-3``. */
|
|
1679
|
+
readonly model?: string;
|
|
1680
|
+
/** Audio encoding (``linear16`` | ``mulaw`` | etc). Default ``linear16``. */
|
|
1681
|
+
readonly encoding?: string;
|
|
1682
|
+
/** Sample rate in Hz. Default ``16000``. */
|
|
1683
|
+
readonly sampleRate?: number;
|
|
1684
|
+
/**
|
|
1685
|
+
* Voice-activity endpointing threshold in milliseconds.
|
|
1686
|
+
* Lower values reduce turn latency at the cost of more false-start cuts.
|
|
1687
|
+
* Default ``150``.
|
|
1688
|
+
*/
|
|
1689
|
+
readonly endpointingMs?: number;
|
|
1690
|
+
/**
|
|
1691
|
+
* End-of-utterance silence window in milliseconds. Deepgram enforces a
|
|
1692
|
+
* hard minimum of 1000 ms. Set to ``null`` to disable. Default ``1000``.
|
|
1693
|
+
*/
|
|
1694
|
+
readonly utteranceEndMs?: number | null;
|
|
1695
|
+
/** Enable smart formatting (punctuation + numerals). Default ``true``. */
|
|
1696
|
+
readonly smartFormat?: boolean;
|
|
1697
|
+
/** Emit interim (non-final) transcripts. Default ``true``. */
|
|
1698
|
+
readonly interimResults?: boolean;
|
|
1699
|
+
/** Emit VAD events (``SpeechStarted`` / ``UtteranceEnd``). Default ``true``. */
|
|
1700
|
+
readonly vadEvents?: boolean;
|
|
1701
|
+
}
|
|
1702
|
+
declare class DeepgramSTT {
|
|
1703
|
+
private ws;
|
|
1704
|
+
private callbacks;
|
|
1705
|
+
/** Request ID from Deepgram — used to query actual cost post-call. */
|
|
1706
|
+
requestId: string;
|
|
1707
|
+
private readonly apiKey;
|
|
1708
|
+
private readonly language;
|
|
1709
|
+
private readonly model;
|
|
1710
|
+
private readonly encoding;
|
|
1711
|
+
private readonly sampleRate;
|
|
1712
|
+
private readonly endpointingMs;
|
|
1713
|
+
private readonly utteranceEndMs;
|
|
1714
|
+
private readonly smartFormat;
|
|
1715
|
+
private readonly interimResults;
|
|
1716
|
+
private readonly vadEvents;
|
|
1717
|
+
/**
|
|
1718
|
+
* New ergonomic constructor accepting an options object (mirrors Python kwargs).
|
|
1719
|
+
*
|
|
1720
|
+
* Also accepts the legacy positional form
|
|
1721
|
+
* ``(apiKey, language?, model?, encoding?, sampleRate?)`` for backward
|
|
1722
|
+
* compatibility with code that predated BUG #13.
|
|
1723
|
+
*/
|
|
1724
|
+
constructor(apiKey: string, language?: string, model?: string, encoding?: string, sampleRate?: number, options?: DeepgramSTTOptions$1);
|
|
1725
|
+
constructor(apiKey: string, options: DeepgramSTTOptions$1 & {
|
|
1726
|
+
language?: string;
|
|
1727
|
+
});
|
|
1728
|
+
/** Factory for Twilio calls — mulaw 8 kHz. Forwards tuning options through. */
|
|
1729
|
+
static forTwilio(apiKey: string, language?: string, model?: string, options?: DeepgramSTTOptions$1): DeepgramSTT;
|
|
1730
|
+
connect(): Promise<void>;
|
|
1731
|
+
sendAudio(audio: Buffer): void;
|
|
1732
|
+
onTranscript(callback: TranscriptCallback$1): void;
|
|
1733
|
+
close(): void;
|
|
1734
|
+
}
|
|
1735
|
+
|
|
1736
|
+
/** Deepgram streaming STT for Patter pipeline mode. */
|
|
1737
|
+
|
|
1738
|
+
interface DeepgramSTTOptions {
|
|
1739
|
+
/** API key. Falls back to DEEPGRAM_API_KEY env var when omitted. */
|
|
1740
|
+
apiKey?: string;
|
|
1741
|
+
language?: string;
|
|
1742
|
+
model?: string;
|
|
1743
|
+
encoding?: string;
|
|
1744
|
+
sampleRate?: number;
|
|
1745
|
+
endpointingMs?: number;
|
|
1746
|
+
utteranceEndMs?: number | null;
|
|
1747
|
+
smartFormat?: boolean;
|
|
1748
|
+
interimResults?: boolean;
|
|
1749
|
+
vadEvents?: boolean;
|
|
1750
|
+
}
|
|
1751
|
+
/**
|
|
1752
|
+
* Deepgram streaming STT.
|
|
1753
|
+
*
|
|
1754
|
+
* @example
|
|
1755
|
+
* ```ts
|
|
1756
|
+
* import * as deepgram from "getpatter/stt/deepgram";
|
|
1757
|
+
* const stt = new deepgram.STT(); // reads DEEPGRAM_API_KEY
|
|
1758
|
+
* const stt = new deepgram.STT({ apiKey: "dg_...", endpointingMs: 80 });
|
|
1759
|
+
* ```
|
|
1760
|
+
*/
|
|
1761
|
+
declare class STT$4 extends DeepgramSTT {
|
|
1762
|
+
constructor(opts?: DeepgramSTTOptions);
|
|
1763
|
+
}
|
|
1764
|
+
|
|
1765
|
+
/**
|
|
1766
|
+
* OpenAI Whisper STT adapter for the Patter SDK pipeline mode.
|
|
1767
|
+
*
|
|
1768
|
+
* Buffers incoming PCM16 audio and periodically sends it to the
|
|
1769
|
+
* OpenAI Whisper transcription API as a WAV file.
|
|
1770
|
+
*/
|
|
1771
|
+
interface Transcript {
|
|
1772
|
+
readonly text: string;
|
|
1773
|
+
readonly isFinal: boolean;
|
|
1774
|
+
readonly confidence: number;
|
|
1775
|
+
}
|
|
1776
|
+
type TranscriptCallback = (transcript: Transcript) => void;
|
|
1777
|
+
declare class WhisperSTT {
|
|
1778
|
+
private readonly apiKey;
|
|
1779
|
+
private readonly model;
|
|
1780
|
+
private readonly language;
|
|
1781
|
+
private readonly bufferSize;
|
|
1782
|
+
private buffer;
|
|
1783
|
+
private callbacks;
|
|
1784
|
+
private running;
|
|
1785
|
+
private pendingTranscriptions;
|
|
1786
|
+
constructor(apiKey: string, model?: string, language?: string, bufferSize?: number);
|
|
1787
|
+
/** Factory for Twilio calls — mulaw 8 kHz is transcoded upstream, so we still receive PCM 16-bit. */
|
|
1788
|
+
static forTwilio(apiKey: string, language?: string, model?: string): WhisperSTT;
|
|
1789
|
+
connect(): Promise<void>;
|
|
1790
|
+
sendAudio(audio: Buffer): void;
|
|
1791
|
+
private trackTranscription;
|
|
1792
|
+
onTranscript(callback: TranscriptCallback): void;
|
|
1793
|
+
close(): Promise<void>;
|
|
1794
|
+
private transcribeBuffer;
|
|
1795
|
+
}
|
|
1796
|
+
|
|
1797
|
+
/** OpenAI Whisper STT for Patter pipeline mode. */
|
|
1798
|
+
|
|
1799
|
+
interface WhisperSTTOptions {
|
|
1800
|
+
/** API key. Falls back to OPENAI_API_KEY env var when omitted. */
|
|
1801
|
+
apiKey?: string;
|
|
1802
|
+
model?: string;
|
|
1803
|
+
language?: string;
|
|
1804
|
+
bufferSize?: number;
|
|
1805
|
+
}
|
|
1806
|
+
/**
|
|
1807
|
+
* OpenAI Whisper STT.
|
|
1808
|
+
*
|
|
1809
|
+
* @example
|
|
1810
|
+
* ```ts
|
|
1811
|
+
* import * as whisper from "getpatter/stt/whisper";
|
|
1812
|
+
* const stt = new whisper.STT(); // reads OPENAI_API_KEY
|
|
1813
|
+
* const stt = new whisper.STT({ apiKey: "sk-...", language: "en" });
|
|
1814
|
+
* ```
|
|
1815
|
+
*/
|
|
1816
|
+
declare class STT$3 extends WhisperSTT {
|
|
1817
|
+
constructor(opts?: WhisperSTTOptions);
|
|
1818
|
+
}
|
|
1819
|
+
|
|
1820
|
+
/** Cartesia streaming STT for Patter pipeline mode. */
|
|
1821
|
+
|
|
1822
|
+
interface CartesiaSTTOptions {
|
|
1823
|
+
/** API key. Falls back to CARTESIA_API_KEY env var when omitted. */
|
|
1824
|
+
apiKey?: string;
|
|
1825
|
+
model?: string;
|
|
1826
|
+
language?: string;
|
|
1827
|
+
encoding?: CartesiaEncoding;
|
|
1828
|
+
sampleRate?: number;
|
|
1829
|
+
baseUrl?: string;
|
|
1830
|
+
}
|
|
1831
|
+
/**
|
|
1832
|
+
* Cartesia streaming STT (ink-whisper).
|
|
1833
|
+
*
|
|
1834
|
+
* @example
|
|
1835
|
+
* ```ts
|
|
1836
|
+
* import * as cartesia from "getpatter/stt/cartesia";
|
|
1837
|
+
* const stt = new cartesia.STT(); // reads CARTESIA_API_KEY
|
|
1838
|
+
* const stt = new cartesia.STT({ apiKey: "..." });
|
|
1839
|
+
* ```
|
|
1840
|
+
*/
|
|
1841
|
+
declare class STT$2 extends CartesiaSTT {
|
|
1842
|
+
constructor(opts?: CartesiaSTTOptions);
|
|
1843
|
+
}
|
|
1844
|
+
|
|
1845
|
+
/** Soniox streaming STT for Patter pipeline mode. */
|
|
1846
|
+
|
|
1847
|
+
interface SonioxSTTOptions {
|
|
1848
|
+
/** API key. Falls back to SONIOX_API_KEY env var when omitted. */
|
|
1849
|
+
apiKey?: string;
|
|
1850
|
+
model?: string;
|
|
1851
|
+
languageHints?: string[];
|
|
1852
|
+
languageHintsStrict?: boolean;
|
|
1853
|
+
sampleRate?: number;
|
|
1854
|
+
numChannels?: number;
|
|
1855
|
+
enableSpeakerDiarization?: boolean;
|
|
1856
|
+
enableLanguageIdentification?: boolean;
|
|
1857
|
+
maxEndpointDelayMs?: number;
|
|
1858
|
+
clientReferenceId?: string;
|
|
1859
|
+
baseUrl?: string;
|
|
1860
|
+
}
|
|
1861
|
+
/**
|
|
1862
|
+
* Soniox streaming STT.
|
|
1863
|
+
*
|
|
1864
|
+
* @example
|
|
1865
|
+
* ```ts
|
|
1866
|
+
* import * as soniox from "getpatter/stt/soniox";
|
|
1867
|
+
* const stt = new soniox.STT(); // reads SONIOX_API_KEY
|
|
1868
|
+
* const stt = new soniox.STT({ apiKey: "..." });
|
|
1869
|
+
* ```
|
|
1870
|
+
*/
|
|
1871
|
+
declare class STT$1 extends SonioxSTT {
|
|
1872
|
+
constructor(opts?: SonioxSTTOptions);
|
|
1873
|
+
}
|
|
1874
|
+
|
|
1875
|
+
/** AssemblyAI Universal Streaming STT for Patter pipeline mode. */
|
|
1876
|
+
|
|
1877
|
+
interface AssemblyAISTTOptions {
|
|
1878
|
+
/** API key. Falls back to ASSEMBLYAI_API_KEY env var when omitted. */
|
|
1879
|
+
apiKey?: string;
|
|
1880
|
+
model?: AssemblyAIModel;
|
|
1881
|
+
encoding?: AssemblyAIEncoding;
|
|
1882
|
+
sampleRate?: number;
|
|
1883
|
+
baseUrl?: string;
|
|
1884
|
+
languageDetection?: boolean;
|
|
1885
|
+
endOfTurnConfidenceThreshold?: number;
|
|
1886
|
+
minTurnSilence?: number;
|
|
1887
|
+
maxTurnSilence?: number;
|
|
1888
|
+
formatTurns?: boolean;
|
|
1889
|
+
keytermsPrompt?: readonly string[];
|
|
1890
|
+
prompt?: string;
|
|
1891
|
+
vadThreshold?: number;
|
|
1892
|
+
speakerLabels?: boolean;
|
|
1893
|
+
maxSpeakers?: number;
|
|
1894
|
+
domain?: string;
|
|
1895
|
+
}
|
|
1896
|
+
/**
|
|
1897
|
+
* AssemblyAI Universal Streaming STT.
|
|
1898
|
+
*
|
|
1899
|
+
* @example
|
|
1900
|
+
* ```ts
|
|
1901
|
+
* import * as assemblyai from "getpatter/stt/assemblyai";
|
|
1902
|
+
* const stt = new assemblyai.STT(); // reads ASSEMBLYAI_API_KEY
|
|
1903
|
+
* const stt = new assemblyai.STT({ apiKey: "..." });
|
|
1904
|
+
* ```
|
|
1905
|
+
*/
|
|
1906
|
+
declare class STT extends AssemblyAISTT {
|
|
1907
|
+
constructor(opts?: AssemblyAISTTOptions);
|
|
1908
|
+
}
|
|
1909
|
+
|
|
1910
|
+
declare class ElevenLabsTTS {
|
|
1911
|
+
private readonly apiKey;
|
|
1912
|
+
private readonly modelId;
|
|
1913
|
+
private readonly outputFormat;
|
|
1914
|
+
private readonly voiceId;
|
|
1915
|
+
constructor(apiKey: string, voiceId?: string, modelId?: string, outputFormat?: string);
|
|
1916
|
+
/**
|
|
1917
|
+
* Synthesise text to speech and return the full audio as a single Buffer.
|
|
1918
|
+
*
|
|
1919
|
+
* For large chunks (or when latency matters) call `synthesizeStream` instead.
|
|
1920
|
+
*/
|
|
1921
|
+
synthesize(text: string): Promise<Buffer>;
|
|
1922
|
+
/**
|
|
1923
|
+
* Synthesise text and yield audio chunks as they arrive (streaming).
|
|
1924
|
+
*
|
|
1925
|
+
* The yielded buffers are raw PCM at 16 kHz (or whatever `outputFormat` is
|
|
1926
|
+
* configured to).
|
|
1927
|
+
*/
|
|
1928
|
+
synthesizeStream(text: string): AsyncGenerator<Buffer>;
|
|
1929
|
+
}
|
|
1930
|
+
|
|
1931
|
+
/** ElevenLabs TTS for Patter pipeline mode. */
|
|
1932
|
+
|
|
1933
|
+
interface ElevenLabsTTSOptions {
|
|
1934
|
+
/** API key. Falls back to ELEVENLABS_API_KEY env var when omitted. */
|
|
1935
|
+
apiKey?: string;
|
|
1936
|
+
voiceId?: string;
|
|
1937
|
+
modelId?: string;
|
|
1938
|
+
outputFormat?: string;
|
|
1939
|
+
}
|
|
1940
|
+
/**
|
|
1941
|
+
* ElevenLabs TTS.
|
|
1942
|
+
*
|
|
1943
|
+
* @example
|
|
1944
|
+
* ```ts
|
|
1945
|
+
* import * as elevenlabs from "getpatter/tts/elevenlabs";
|
|
1946
|
+
* const tts = new elevenlabs.TTS(); // reads ELEVENLABS_API_KEY
|
|
1947
|
+
* const tts = new elevenlabs.TTS({ apiKey: "...", voiceId: "rachel" });
|
|
1948
|
+
* ```
|
|
1949
|
+
*/
|
|
1950
|
+
declare class TTS$4 extends ElevenLabsTTS {
|
|
1951
|
+
constructor(opts?: ElevenLabsTTSOptions);
|
|
1952
|
+
}
|
|
1953
|
+
|
|
1954
|
+
declare class OpenAITTS {
|
|
1955
|
+
private readonly apiKey;
|
|
1956
|
+
private readonly voice;
|
|
1957
|
+
private readonly model;
|
|
1958
|
+
constructor(apiKey: string, voice?: string, model?: string);
|
|
1959
|
+
/**
|
|
1960
|
+
* Synthesise text to speech and return the full audio as a single Buffer.
|
|
1961
|
+
*
|
|
1962
|
+
* For large chunks (or when latency matters) call `synthesizeStream` instead.
|
|
1963
|
+
*/
|
|
1964
|
+
synthesize(text: string): Promise<Buffer>;
|
|
1965
|
+
/**
|
|
1966
|
+
* Synthesise text and yield audio chunks as they arrive (streaming).
|
|
1967
|
+
*
|
|
1968
|
+
* OpenAI returns 24 kHz PCM16; each chunk is resampled to 16 kHz before
|
|
1969
|
+
* yielding so the output is ready for telephony pipelines.
|
|
1970
|
+
*
|
|
1971
|
+
* The resampler carries state (buffered samples + odd trailing byte)
|
|
1972
|
+
* between chunks — without that state cross-chunk sample alignment drifts
|
|
1973
|
+
* and the caller hears pops / dropped audio (BUG #23, mirror of the
|
|
1974
|
+
* Python `audioop.ratecv` fix).
|
|
1975
|
+
*/
|
|
1976
|
+
synthesizeStream(text: string): AsyncGenerator<Buffer>;
|
|
1977
|
+
/**
|
|
1978
|
+
* Streaming 24 kHz → 16 kHz resampler (PCM16-LE). Maintains cross-chunk
|
|
1979
|
+
* state so the 3:2 pattern doesn't reset at every network read.
|
|
1980
|
+
*/
|
|
1981
|
+
static resampleStreaming(audio: Buffer, ctx: {
|
|
1982
|
+
carryByte: number | null;
|
|
1983
|
+
leftover: number[];
|
|
1984
|
+
}): Buffer;
|
|
1985
|
+
/** @deprecated use {@link resampleStreaming} with persistent state. */
|
|
1986
|
+
static resample24kTo16k(audio: Buffer): Buffer;
|
|
1987
|
+
}
|
|
1988
|
+
|
|
1989
|
+
/** OpenAI TTS for Patter pipeline mode. */
|
|
1990
|
+
|
|
1991
|
+
interface OpenAITTSOptions {
|
|
1992
|
+
/** API key. Falls back to OPENAI_API_KEY env var when omitted. */
|
|
1993
|
+
apiKey?: string;
|
|
1994
|
+
voice?: string;
|
|
1995
|
+
model?: string;
|
|
1996
|
+
}
|
|
1997
|
+
/**
|
|
1998
|
+
* OpenAI TTS.
|
|
1999
|
+
*
|
|
2000
|
+
* @example
|
|
2001
|
+
* ```ts
|
|
2002
|
+
* import * as openai from "getpatter/tts/openai";
|
|
2003
|
+
* const tts = new openai.TTS(); // reads OPENAI_API_KEY
|
|
2004
|
+
* const tts = new openai.TTS({ apiKey: "sk-...", voice: "alloy" });
|
|
2005
|
+
* ```
|
|
2006
|
+
*/
|
|
2007
|
+
declare class TTS$3 extends OpenAITTS {
|
|
2008
|
+
constructor(opts?: OpenAITTSOptions);
|
|
2009
|
+
}
|
|
2010
|
+
|
|
2011
|
+
interface CartesiaTTSOptions$1 {
|
|
2012
|
+
model?: string;
|
|
2013
|
+
voice?: string;
|
|
2014
|
+
language?: string;
|
|
2015
|
+
sampleRate?: number;
|
|
2016
|
+
speed?: string | number;
|
|
2017
|
+
emotion?: string | string[];
|
|
2018
|
+
volume?: number;
|
|
2019
|
+
baseUrl?: string;
|
|
2020
|
+
apiVersion?: string;
|
|
2021
|
+
}
|
|
2022
|
+
declare class CartesiaTTS {
|
|
2023
|
+
private readonly apiKey;
|
|
2024
|
+
private readonly model;
|
|
2025
|
+
private readonly voice;
|
|
2026
|
+
private readonly language;
|
|
2027
|
+
private readonly sampleRate;
|
|
2028
|
+
private readonly speed?;
|
|
2029
|
+
private readonly emotion?;
|
|
2030
|
+
private readonly volume?;
|
|
2031
|
+
private readonly baseUrl;
|
|
2032
|
+
private readonly apiVersion;
|
|
2033
|
+
constructor(apiKey: string, opts?: CartesiaTTSOptions$1);
|
|
2034
|
+
/** Build the JSON payload for the Cartesia bytes endpoint. */
|
|
2035
|
+
private buildPayload;
|
|
2036
|
+
/** Synthesize text and return the concatenated audio buffer. */
|
|
1611
2037
|
synthesize(text: string): Promise<Buffer>;
|
|
1612
2038
|
/**
|
|
1613
2039
|
* Synthesize text and yield raw PCM_S16LE chunks at the configured
|
|
@@ -1616,7 +2042,36 @@ declare class CartesiaTTS {
|
|
|
1616
2042
|
synthesizeStream(text: string): AsyncGenerator<Buffer>;
|
|
1617
2043
|
}
|
|
1618
2044
|
|
|
1619
|
-
|
|
2045
|
+
/** Cartesia TTS for Patter pipeline mode. */
|
|
2046
|
+
|
|
2047
|
+
interface CartesiaTTSOptions {
|
|
2048
|
+
/** API key. Falls back to CARTESIA_API_KEY env var when omitted. */
|
|
2049
|
+
apiKey?: string;
|
|
2050
|
+
model?: string;
|
|
2051
|
+
voice?: string;
|
|
2052
|
+
language?: string;
|
|
2053
|
+
sampleRate?: number;
|
|
2054
|
+
speed?: string | number;
|
|
2055
|
+
emotion?: string | string[];
|
|
2056
|
+
volume?: number;
|
|
2057
|
+
baseUrl?: string;
|
|
2058
|
+
apiVersion?: string;
|
|
2059
|
+
}
|
|
2060
|
+
/**
|
|
2061
|
+
* Cartesia TTS (sonic-2).
|
|
2062
|
+
*
|
|
2063
|
+
* @example
|
|
2064
|
+
* ```ts
|
|
2065
|
+
* import * as cartesia from "getpatter/tts/cartesia";
|
|
2066
|
+
* const tts = new cartesia.TTS(); // reads CARTESIA_API_KEY
|
|
2067
|
+
* const tts = new cartesia.TTS({ apiKey: "..." });
|
|
2068
|
+
* ```
|
|
2069
|
+
*/
|
|
2070
|
+
declare class TTS$2 extends CartesiaTTS {
|
|
2071
|
+
constructor(opts?: CartesiaTTSOptions);
|
|
2072
|
+
}
|
|
2073
|
+
|
|
2074
|
+
interface RimeTTSOptions$1 {
|
|
1620
2075
|
model?: string;
|
|
1621
2076
|
speaker?: string;
|
|
1622
2077
|
lang?: string;
|
|
@@ -1647,7 +2102,7 @@ declare class RimeTTS {
|
|
|
1647
2102
|
private readonly phonemizeBetweenBrackets?;
|
|
1648
2103
|
private readonly baseUrl;
|
|
1649
2104
|
private readonly totalTimeoutMs;
|
|
1650
|
-
constructor(apiKey: string, opts?: RimeTTSOptions);
|
|
2105
|
+
constructor(apiKey: string, opts?: RimeTTSOptions$1);
|
|
1651
2106
|
private buildPayload;
|
|
1652
2107
|
synthesize(text: string): Promise<Buffer>;
|
|
1653
2108
|
/**
|
|
@@ -1657,10 +2112,44 @@ declare class RimeTTS {
|
|
|
1657
2112
|
synthesizeStream(text: string): AsyncGenerator<Buffer>;
|
|
1658
2113
|
}
|
|
1659
2114
|
|
|
1660
|
-
|
|
1661
|
-
|
|
1662
|
-
|
|
2115
|
+
/** Rime TTS for Patter pipeline mode. */
|
|
2116
|
+
|
|
2117
|
+
interface RimeTTSOptions {
|
|
2118
|
+
/** API key. Falls back to RIME_API_KEY env var when omitted. */
|
|
2119
|
+
apiKey?: string;
|
|
2120
|
+
model?: string;
|
|
2121
|
+
speaker?: string;
|
|
2122
|
+
lang?: string;
|
|
2123
|
+
sampleRate?: number;
|
|
2124
|
+
repetitionPenalty?: number;
|
|
2125
|
+
temperature?: number;
|
|
2126
|
+
topP?: number;
|
|
2127
|
+
maxTokens?: number;
|
|
2128
|
+
speedAlpha?: number;
|
|
2129
|
+
reduceLatency?: boolean;
|
|
2130
|
+
pauseBetweenBrackets?: boolean;
|
|
2131
|
+
phonemizeBetweenBrackets?: boolean;
|
|
2132
|
+
baseUrl?: string;
|
|
2133
|
+
}
|
|
2134
|
+
/**
|
|
2135
|
+
* Rime TTS (Arcana or Mist models).
|
|
2136
|
+
*
|
|
2137
|
+
* @example
|
|
2138
|
+
* ```ts
|
|
2139
|
+
* import * as rime from "getpatter/tts/rime";
|
|
2140
|
+
* const tts = new rime.TTS(); // reads RIME_API_KEY
|
|
2141
|
+
* const tts = new rime.TTS({ apiKey: "...", speaker: "astra" });
|
|
2142
|
+
* ```
|
|
2143
|
+
*/
|
|
2144
|
+
declare class TTS$1 extends RimeTTS {
|
|
2145
|
+
constructor(opts?: RimeTTSOptions);
|
|
2146
|
+
}
|
|
2147
|
+
|
|
2148
|
+
/** LMNT TTS for Patter pipeline mode. */
|
|
2149
|
+
|
|
1663
2150
|
interface LMNTTTSOptions {
|
|
2151
|
+
/** API key. Falls back to LMNT_API_KEY env var when omitted. */
|
|
2152
|
+
apiKey?: string;
|
|
1664
2153
|
model?: LMNTModel;
|
|
1665
2154
|
voice?: string;
|
|
1666
2155
|
language?: string;
|
|
@@ -1670,21 +2159,318 @@ interface LMNTTTSOptions {
|
|
|
1670
2159
|
topP?: number;
|
|
1671
2160
|
baseUrl?: string;
|
|
1672
2161
|
}
|
|
1673
|
-
|
|
2162
|
+
/**
|
|
2163
|
+
* LMNT TTS (blizzard/aurora).
|
|
2164
|
+
*
|
|
2165
|
+
* @example
|
|
2166
|
+
* ```ts
|
|
2167
|
+
* import * as lmnt from "getpatter/tts/lmnt";
|
|
2168
|
+
* const tts = new lmnt.TTS(); // reads LMNT_API_KEY
|
|
2169
|
+
* const tts = new lmnt.TTS({ apiKey: "...", voice: "leah" });
|
|
2170
|
+
* ```
|
|
2171
|
+
*/
|
|
2172
|
+
declare class TTS extends LMNTTTS {
|
|
2173
|
+
constructor(opts?: LMNTTTSOptions);
|
|
2174
|
+
}
|
|
2175
|
+
|
|
2176
|
+
/** OpenAI LLM for Patter pipeline mode. */
|
|
2177
|
+
|
|
2178
|
+
interface OpenAILLMOptions {
|
|
2179
|
+
/** API key. Falls back to OPENAI_API_KEY env var when omitted. */
|
|
2180
|
+
apiKey?: string;
|
|
2181
|
+
/** Chat Completions model id. Defaults to ``"gpt-4o-mini"``. */
|
|
2182
|
+
model?: string;
|
|
2183
|
+
}
|
|
2184
|
+
/**
|
|
2185
|
+
* OpenAI Chat Completions LLM provider.
|
|
2186
|
+
*
|
|
2187
|
+
* @example
|
|
2188
|
+
* ```ts
|
|
2189
|
+
* import * as openai from "getpatter/llm/openai";
|
|
2190
|
+
* const llm = new openai.LLM(); // reads OPENAI_API_KEY
|
|
2191
|
+
* const llm = new openai.LLM({ apiKey: "sk-...", model: "gpt-4o-mini" });
|
|
2192
|
+
* ```
|
|
2193
|
+
*/
|
|
2194
|
+
declare class LLM$4 extends OpenAILLMProvider {
|
|
2195
|
+
constructor(opts?: OpenAILLMOptions);
|
|
2196
|
+
}
|
|
2197
|
+
|
|
2198
|
+
/**
|
|
2199
|
+
* Anthropic Claude LLM provider for Patter's pipeline mode.
|
|
2200
|
+
*
|
|
2201
|
+
* Implements the ``LLMProvider`` interface from ``../llm-loop`` on top
|
|
2202
|
+
* of Anthropic's Messages API with streaming via Server-Sent Events.
|
|
2203
|
+
* OpenAI-style ``messages`` / ``tools`` inputs are translated into the
|
|
2204
|
+
* Anthropic shape and the vendor event stream is normalised back into
|
|
2205
|
+
* Patter's ``{ type: 'text' | 'tool_call' | 'done' }`` chunk protocol.
|
|
2206
|
+
*
|
|
2207
|
+
* Portions adapted from LiveKit Agents
|
|
2208
|
+
* (https://github.com/livekit/agents, commit
|
|
2209
|
+
* 78a66bcf79c5cea82989401c408f1dff4b961a5b,
|
|
2210
|
+
* file livekit-plugins/livekit-plugins-anthropic/livekit/plugins/anthropic/llm.py),
|
|
2211
|
+
* licensed under Apache License 2.0. Copyright 2023 LiveKit, Inc.
|
|
2212
|
+
*
|
|
2213
|
+
* Adaptations from the LiveKit source:
|
|
2214
|
+
* * Ported the Python async class pair (``llm.LLM`` /
|
|
2215
|
+
* ``llm.LLMStream``) into a single TypeScript class that satisfies
|
|
2216
|
+
* Patter's ``LLMProvider`` interface.
|
|
2217
|
+
* * Uses native ``fetch`` + SSE parsing instead of the official
|
|
2218
|
+
* ``@anthropic-ai/sdk`` to keep Patter's runtime dependencies lean
|
|
2219
|
+
* (mirrors how ``OpenAILLMProvider`` is implemented in
|
|
2220
|
+
* ``llm-loop.ts``).
|
|
2221
|
+
* * Maps Anthropic event types (``content_block_start``,
|
|
2222
|
+
* ``content_block_delta``, ``content_block_stop``) to the Patter
|
|
2223
|
+
* chunk protocol.
|
|
2224
|
+
*/
|
|
2225
|
+
|
|
2226
|
+
interface AnthropicLLMOptions$1 {
|
|
2227
|
+
apiKey: string;
|
|
2228
|
+
model?: string;
|
|
2229
|
+
maxTokens?: number;
|
|
2230
|
+
temperature?: number;
|
|
2231
|
+
baseUrl?: string;
|
|
2232
|
+
anthropicVersion?: string;
|
|
2233
|
+
}
|
|
2234
|
+
/** LLM provider backed by Anthropic's Messages API (streaming). */
|
|
2235
|
+
declare class AnthropicLLMProvider implements LLMProvider {
|
|
2236
|
+
private readonly apiKey;
|
|
2237
|
+
private readonly model;
|
|
2238
|
+
private readonly maxTokens;
|
|
2239
|
+
private readonly temperature?;
|
|
2240
|
+
private readonly url;
|
|
2241
|
+
private readonly anthropicVersion;
|
|
2242
|
+
constructor(options: AnthropicLLMOptions$1);
|
|
2243
|
+
stream(messages: Array<Record<string, unknown>>, tools?: Array<Record<string, unknown>> | null): AsyncGenerator<LLMChunk, void, unknown>;
|
|
2244
|
+
}
|
|
2245
|
+
|
|
2246
|
+
/** Anthropic Claude LLM for Patter pipeline mode. */
|
|
2247
|
+
|
|
2248
|
+
interface AnthropicLLMOptions {
|
|
2249
|
+
/** API key. Falls back to ANTHROPIC_API_KEY env var when omitted. */
|
|
2250
|
+
apiKey?: string;
|
|
2251
|
+
/** Anthropic Messages API model id (e.g. ``"claude-3-5-sonnet-20241022"``). */
|
|
2252
|
+
model?: string;
|
|
2253
|
+
/** Maximum number of tokens to sample. Defaults to the adapter default. */
|
|
2254
|
+
maxTokens?: number;
|
|
2255
|
+
/** Sampling temperature. */
|
|
2256
|
+
temperature?: number;
|
|
2257
|
+
/** Override the Messages API base URL (rarely needed). */
|
|
2258
|
+
baseUrl?: string;
|
|
2259
|
+
/** ``anthropic-version`` header override. */
|
|
2260
|
+
anthropicVersion?: string;
|
|
2261
|
+
}
|
|
2262
|
+
/**
|
|
2263
|
+
* Anthropic Claude LLM provider (Messages API, streaming).
|
|
2264
|
+
*
|
|
2265
|
+
* @example
|
|
2266
|
+
* ```ts
|
|
2267
|
+
* import * as anthropic from "getpatter/llm/anthropic";
|
|
2268
|
+
* const llm = new anthropic.LLM(); // reads ANTHROPIC_API_KEY
|
|
2269
|
+
* const llm = new anthropic.LLM({ apiKey: "sk-ant-...", model: "claude-3-5-sonnet-20241022" });
|
|
2270
|
+
* ```
|
|
2271
|
+
*/
|
|
2272
|
+
declare class LLM$3 extends AnthropicLLMProvider {
|
|
2273
|
+
constructor(opts?: AnthropicLLMOptions);
|
|
2274
|
+
}
|
|
2275
|
+
|
|
2276
|
+
/**
|
|
2277
|
+
* Groq LLM provider for Patter's pipeline mode.
|
|
2278
|
+
*
|
|
2279
|
+
* Groq exposes an OpenAI-compatible Chat Completions API. We reuse the
|
|
2280
|
+
* streaming code path by implementing the same SSE parser as
|
|
2281
|
+
* ``OpenAILLMProvider`` but pointed at ``api.groq.com``.
|
|
2282
|
+
*
|
|
2283
|
+
* Portions adapted from LiveKit Agents
|
|
2284
|
+
* (https://github.com/livekit/agents, commit
|
|
2285
|
+
* 78a66bcf79c5cea82989401c408f1dff4b961a5b,
|
|
2286
|
+
* file livekit-plugins/livekit-plugins-groq/livekit/plugins/groq/services.py),
|
|
2287
|
+
* licensed under Apache License 2.0. Copyright LiveKit, Inc.
|
|
2288
|
+
*
|
|
2289
|
+
* Adaptations from the LiveKit source:
|
|
2290
|
+
* * Ported the Python ``groq.LLM`` subclass (which subclasses the
|
|
2291
|
+
* LiveKit OpenAI plugin) into a tiny TypeScript wrapper that swaps
|
|
2292
|
+
* the base URL and defaults to ``llama-3.3-70b-versatile``.
|
|
2293
|
+
*/
|
|
2294
|
+
|
|
2295
|
+
interface GroqLLMOptions$1 {
|
|
2296
|
+
apiKey: string;
|
|
2297
|
+
model?: string;
|
|
2298
|
+
baseUrl?: string;
|
|
2299
|
+
}
|
|
2300
|
+
/** LLM provider backed by Groq's OpenAI-compatible Chat Completions API. */
|
|
2301
|
+
declare class GroqLLMProvider implements LLMProvider {
|
|
1674
2302
|
private readonly apiKey;
|
|
1675
2303
|
private readonly model;
|
|
1676
|
-
private readonly voice;
|
|
1677
|
-
private readonly language;
|
|
1678
|
-
private readonly format;
|
|
1679
|
-
private readonly sampleRate;
|
|
1680
|
-
private readonly temperature;
|
|
1681
|
-
private readonly topP;
|
|
1682
2304
|
private readonly baseUrl;
|
|
1683
|
-
constructor(
|
|
1684
|
-
|
|
1685
|
-
|
|
1686
|
-
|
|
1687
|
-
|
|
2305
|
+
constructor(options: GroqLLMOptions$1);
|
|
2306
|
+
stream(messages: Array<Record<string, unknown>>, tools?: Array<Record<string, unknown>> | null): AsyncGenerator<LLMChunk, void, unknown>;
|
|
2307
|
+
}
|
|
2308
|
+
|
|
2309
|
+
/** Groq LLM for Patter pipeline mode. */
|
|
2310
|
+
|
|
2311
|
+
interface GroqLLMOptions {
|
|
2312
|
+
/** API key. Falls back to GROQ_API_KEY env var when omitted. */
|
|
2313
|
+
apiKey?: string;
|
|
2314
|
+
/** Model id (e.g. ``"llama-3.3-70b-versatile"``). */
|
|
2315
|
+
model?: string;
|
|
2316
|
+
/** Override the OpenAI-compatible base URL (rarely needed). */
|
|
2317
|
+
baseUrl?: string;
|
|
2318
|
+
}
|
|
2319
|
+
/**
|
|
2320
|
+
* Groq LLM provider (OpenAI-compatible Chat Completions, streaming).
|
|
2321
|
+
*
|
|
2322
|
+
* @example
|
|
2323
|
+
* ```ts
|
|
2324
|
+
* import * as groq from "getpatter/llm/groq";
|
|
2325
|
+
* const llm = new groq.LLM(); // reads GROQ_API_KEY
|
|
2326
|
+
* const llm = new groq.LLM({ apiKey: "gsk_...", model: "llama-3.3-70b-versatile" });
|
|
2327
|
+
* ```
|
|
2328
|
+
*/
|
|
2329
|
+
declare class LLM$2 extends GroqLLMProvider {
|
|
2330
|
+
constructor(opts?: GroqLLMOptions);
|
|
2331
|
+
}
|
|
2332
|
+
|
|
2333
|
+
/**
|
|
2334
|
+
* Cerebras LLM provider for Patter's pipeline mode.
|
|
2335
|
+
*
|
|
2336
|
+
* Cerebras exposes an OpenAI-compatible Chat Completions API at
|
|
2337
|
+
* ``https://api.cerebras.ai/v1``. This provider reuses the OpenAI SSE
|
|
2338
|
+
* parser from ``groq-llm.ts`` and optionally enables gzip request-body
|
|
2339
|
+
* compression to reduce TTFT for requests with large prompts
|
|
2340
|
+
* (see https://inference-docs.cerebras.ai/payload-optimization).
|
|
2341
|
+
*
|
|
2342
|
+
* Portions adapted from LiveKit Agents
|
|
2343
|
+
* (https://github.com/livekit/agents, commit
|
|
2344
|
+
* 78a66bcf79c5cea82989401c408f1dff4b961a5b,
|
|
2345
|
+
* file livekit-plugins/livekit-plugins-cerebras/livekit/plugins/cerebras/llm.py),
|
|
2346
|
+
* licensed under Apache License 2.0. Copyright 2026 LiveKit, Inc.
|
|
2347
|
+
*
|
|
2348
|
+
* Adaptations from the LiveKit source:
|
|
2349
|
+
* * LiveKit's ``cerebras.LLM`` subclasses the LiveKit OpenAI plugin.
|
|
2350
|
+
* Patter's analogue is a tiny wrapper around ``fetch`` that swaps
|
|
2351
|
+
* the base URL and default model.
|
|
2352
|
+
* * The msgpack payload optimisation from LiveKit is Python-only
|
|
2353
|
+
* (msgpack in Node land isn't as standard); only gzip compression
|
|
2354
|
+
* is ported. Enable with ``gzipCompression: true``.
|
|
2355
|
+
*/
|
|
2356
|
+
|
|
2357
|
+
interface CerebrasLLMOptions$1 {
|
|
2358
|
+
apiKey: string;
|
|
2359
|
+
model?: string;
|
|
2360
|
+
baseUrl?: string;
|
|
2361
|
+
/** Gzip request payloads for faster TTFT on large prompts. */
|
|
2362
|
+
gzipCompression?: boolean;
|
|
2363
|
+
}
|
|
2364
|
+
/** LLM provider backed by Cerebras's OpenAI-compatible Inference API. */
|
|
2365
|
+
declare class CerebrasLLMProvider implements LLMProvider {
|
|
2366
|
+
private readonly apiKey;
|
|
2367
|
+
private readonly model;
|
|
2368
|
+
private readonly baseUrl;
|
|
2369
|
+
private readonly gzipCompression;
|
|
2370
|
+
constructor(options: CerebrasLLMOptions$1);
|
|
2371
|
+
stream(messages: Array<Record<string, unknown>>, tools?: Array<Record<string, unknown>> | null): AsyncGenerator<LLMChunk, void, unknown>;
|
|
2372
|
+
}
|
|
2373
|
+
|
|
2374
|
+
/** Cerebras LLM for Patter pipeline mode. */
|
|
2375
|
+
|
|
2376
|
+
interface CerebrasLLMOptions {
|
|
2377
|
+
/** API key. Falls back to CEREBRAS_API_KEY env var when omitted. */
|
|
2378
|
+
apiKey?: string;
|
|
2379
|
+
/** Model id (e.g. ``"llama3.1-8b"``). */
|
|
2380
|
+
model?: string;
|
|
2381
|
+
/** Override the OpenAI-compatible base URL (rarely needed). */
|
|
2382
|
+
baseUrl?: string;
|
|
2383
|
+
/** Gzip request payloads for faster TTFT on large prompts. */
|
|
2384
|
+
gzipCompression?: boolean;
|
|
2385
|
+
}
|
|
2386
|
+
/**
|
|
2387
|
+
* Cerebras LLM provider (OpenAI-compatible Inference API, streaming).
|
|
2388
|
+
*
|
|
2389
|
+
* @example
|
|
2390
|
+
* ```ts
|
|
2391
|
+
* import * as cerebras from "getpatter/llm/cerebras";
|
|
2392
|
+
* const llm = new cerebras.LLM(); // reads CEREBRAS_API_KEY
|
|
2393
|
+
* const llm = new cerebras.LLM({ apiKey: "csk-...", model: "llama3.1-8b" });
|
|
2394
|
+
* ```
|
|
2395
|
+
*/
|
|
2396
|
+
declare class LLM$1 extends CerebrasLLMProvider {
|
|
2397
|
+
constructor(opts?: CerebrasLLMOptions);
|
|
2398
|
+
}
|
|
2399
|
+
|
|
2400
|
+
/**
|
|
2401
|
+
* Google Gemini LLM provider for Patter's pipeline mode.
|
|
2402
|
+
*
|
|
2403
|
+
* Implements the ``LLMProvider`` interface against the Gemini Developer
|
|
2404
|
+
* API's streaming endpoint (``:streamGenerateContent?alt=sse``).
|
|
2405
|
+
* OpenAI-style messages/tools are translated into Gemini's ``contents``
|
|
2406
|
+
* and ``tools`` shapes, and streamed response parts are normalised to
|
|
2407
|
+
* Patter's ``{ type: 'text' | 'tool_call' | 'done' }`` chunks.
|
|
2408
|
+
*
|
|
2409
|
+
* Portions adapted from LiveKit Agents
|
|
2410
|
+
* (https://github.com/livekit/agents, commit
|
|
2411
|
+
* 78a66bcf79c5cea82989401c408f1dff4b961a5b,
|
|
2412
|
+
* file livekit-plugins/livekit-plugins-google/livekit/plugins/google/llm.py),
|
|
2413
|
+
* licensed under Apache License 2.0. Copyright 2023 LiveKit, Inc.
|
|
2414
|
+
*
|
|
2415
|
+
* Adaptations from the LiveKit source:
|
|
2416
|
+
* * LiveKit uses the ``google-genai`` Python SDK. The TypeScript port
|
|
2417
|
+
* uses native ``fetch`` against the REST SSE endpoint so we don't
|
|
2418
|
+
* pull in a large SDK dependency.
|
|
2419
|
+
* * Collapsed the Python ``llm.LLM`` / ``llm.LLMStream`` pair into a
|
|
2420
|
+
* single class that satisfies Patter's ``LLMProvider`` interface.
|
|
2421
|
+
* * Dropped Vertex AI support (which requires GCP auth) — only the
|
|
2422
|
+
* Developer API (API key) path is ported. Vertex can be added by a
|
|
2423
|
+
* follow-up PR once credential plumbing is in place.
|
|
2424
|
+
*/
|
|
2425
|
+
|
|
2426
|
+
interface GoogleLLMOptions$1 {
|
|
2427
|
+
apiKey: string;
|
|
2428
|
+
model?: string;
|
|
2429
|
+
baseUrl?: string;
|
|
2430
|
+
temperature?: number;
|
|
2431
|
+
maxOutputTokens?: number;
|
|
2432
|
+
}
|
|
2433
|
+
/** LLM provider backed by Google Gemini (Developer API, streaming SSE). */
|
|
2434
|
+
declare class GoogleLLMProvider implements LLMProvider {
|
|
2435
|
+
private readonly apiKey;
|
|
2436
|
+
private readonly model;
|
|
2437
|
+
private readonly baseUrl;
|
|
2438
|
+
private readonly temperature?;
|
|
2439
|
+
private readonly maxOutputTokens?;
|
|
2440
|
+
constructor(options: GoogleLLMOptions$1);
|
|
2441
|
+
stream(messages: Array<Record<string, unknown>>, tools?: Array<Record<string, unknown>> | null): AsyncGenerator<LLMChunk, void, unknown>;
|
|
2442
|
+
}
|
|
2443
|
+
|
|
2444
|
+
/** Google Gemini LLM for Patter pipeline mode. */
|
|
2445
|
+
|
|
2446
|
+
interface GoogleLLMOptions {
|
|
2447
|
+
/**
|
|
2448
|
+
* API key. Falls back to ``GEMINI_API_KEY`` first, then ``GOOGLE_API_KEY``.
|
|
2449
|
+
* (Google's CLI tooling uses ``GEMINI_API_KEY``; ``GOOGLE_API_KEY`` is the
|
|
2450
|
+
* legacy/alt name accepted for parity with other SDKs.)
|
|
2451
|
+
*/
|
|
2452
|
+
apiKey?: string;
|
|
2453
|
+
/** Model id (e.g. ``"gemini-2.5-flash"``). */
|
|
2454
|
+
model?: string;
|
|
2455
|
+
/** Override the Generative Language API base URL (rarely needed). */
|
|
2456
|
+
baseUrl?: string;
|
|
2457
|
+
/** Sampling temperature. */
|
|
2458
|
+
temperature?: number;
|
|
2459
|
+
/** Maximum output tokens. */
|
|
2460
|
+
maxOutputTokens?: number;
|
|
2461
|
+
}
|
|
2462
|
+
/**
|
|
2463
|
+
* Google Gemini LLM provider (Developer API, streaming SSE).
|
|
2464
|
+
*
|
|
2465
|
+
* @example
|
|
2466
|
+
* ```ts
|
|
2467
|
+
* import * as google from "getpatter/llm/google";
|
|
2468
|
+
* const llm = new google.LLM(); // reads GEMINI_API_KEY or GOOGLE_API_KEY
|
|
2469
|
+
* const llm = new google.LLM({ apiKey: "AIza...", model: "gemini-2.5-flash" });
|
|
2470
|
+
* ```
|
|
2471
|
+
*/
|
|
2472
|
+
declare class LLM extends GoogleLLMProvider {
|
|
2473
|
+
constructor(opts?: GoogleLLMOptions);
|
|
1688
2474
|
}
|
|
1689
2475
|
|
|
1690
2476
|
/**
|
|
@@ -2048,4 +2834,4 @@ declare class BackgroundAudioPlayer implements BackgroundAudioPlayer$1 {
|
|
|
2048
2834
|
private resampleTo;
|
|
2049
2835
|
}
|
|
2050
2836
|
|
|
2051
|
-
export { type Agent, type AgentOptions, AllProvidersFailedError, type AnthropicConversion, type AnthropicMessage, type AssemblyAIEncoding, type AssemblyAIModel, AssemblyAISTT, type AssemblyAISTTOptions, type AudioConfig, type AudioSource, AuthenticationError, type BackgroundAudioOptions, BackgroundAudioPlayer, BuiltinAudioClip, type BuiltinAudioClipName, type BuiltinPcmSource, type Call, type CallControl, type CallEventHandler, type CallMetrics, CallMetricsAccumulator, type CallOptions, type CallRecord, type CartesiaEncoding, CartesiaSTT, type CartesiaSTTOptions, CartesiaTTS, type CartesiaTTSOptions, ChatContext, type ChatMessage, type ChatRole, type ConnectOptions, type CostBreakdown, type CreateAgentOptions, DEFAULT_MIN_SENTENCE_LEN, DEFAULT_PRICING, DTMF_EVENTS, DeepgramSTT, type DefineToolInput, type DtmfEvent, ElevenLabsConvAIAdapter, ElevenLabsTTS, FallbackLLMProvider, type FallbackLLMProviderOptions, type FilePcmSource, GEMINI_DEFAULT_INPUT_SR, GEMINI_DEFAULT_OUTPUT_SR, GeminiLiveAdapter, type GeminiLiveEventHandler, type Guardrail, type HookContext, IVRActivity, type IVRActivityOptions, type IVRToolDefinition, type IncomingMessage, type JobCallback, type LLMChunk, LLMLoop, type LLMProvider, type LMNTAudioFormat, type LMNTModel, type LMNTSampleRate, LMNTTTS, type LMNTTTSOptions, type LatencyBreakdown, type LocalCallOptions, type LocalConfig, type LocalOptions, type Logger, type LoopCallback, type MessageHandler, MetricsStore, OpenAILLMProvider, type OpenAIMessage, OpenAIRealtimeAdapter, OpenAITTS, type ParamSpec, PartialStreamError, Patter, PatterConnectionError, PatterError, type PatterOptions, type PhoneNumber, PipelineHookExecutor, type PipelineHooks, type PipelineMessageHandler, type ProviderPricing, ProvisionError, type RawPcmSource, RemoteMessageHandler, RimeTTS, type RimeTTSOptions, type SSEEvent, type STTConfig, type ScheduleHandle, SentenceChunker, type ServeOptions, type SilenceCallback, SonioxSTT, type SonioxSTTOptions, type TTSConfig, TestSession, TfidfLoopDetector, type TfidfLoopDetectorOptions, type ToolDefinition, type TunnelHandle, type TurnMetrics, ULTRAVOX_DEFAULT_API_BASE, ULTRAVOX_DEFAULT_SR, type UltravoxEventHandler, UltravoxRealtimeAdapter, WhisperSTT, builtinClipPath, calculateRealtimeCost, calculateSttCost, calculateTelephonyCost, calculateTtsCost, callsToCsv, callsToJson, deepgram, defineTool, elevenlabs, filterEmoji, filterForTTS, filterMarkdown, formatDtmf, getLogger, isRemoteUrl, isWebSocketUrl, makeAuthMiddleware, mergePricing, mixPcm, mountApi, mountDashboard, mulawToPcm16, notifyDashboard, openaiTts, pcm16ToMulaw, resample16kTo8k, resample24kTo16k, resample8kTo16k, resamplePcm, scheduleCron, scheduleInterval, scheduleOnce, selectSoundFromList, setLogger, startTunnel, whisper };
|
|
2837
|
+
export { type Agent, type AgentOptions, AllProvidersFailedError, type AnthropicConversion, LLM$3 as AnthropicLLM, type AnthropicLLMOptions, type AnthropicMessage, type AssemblyAIEncoding, type AssemblyAIModel, STT as AssemblyAISTT, type AssemblyAISTTOptions, type AudioConfig, type AudioSource, AuthenticationError, type BackgroundAudioOptions, BackgroundAudioPlayer, BuiltinAudioClip, type BuiltinAudioClipName, type BuiltinPcmSource, type Call, type CallControl, type CallEventHandler, type CallMetrics, CallMetricsAccumulator, type CallOptions, type CallRecord, type CartesiaEncoding, STT$2 as CartesiaSTT, type CartesiaSTTOptions, TTS$2 as CartesiaTTS, type CartesiaTTSOptions, LLM$1 as CerebrasLLM, type CerebrasLLMOptions, ChatContext, type ChatMessage, type ChatRole, CloudflareTunnel, type ConnectOptions, type CostBreakdown, type CreateAgentOptions, DEFAULT_MIN_SENTENCE_LEN, DEFAULT_PRICING, DTMF_EVENTS, STT$4 as DeepgramSTT, type DeepgramSTTOptions, type DefineToolInput, type DtmfEvent, ConvAI as ElevenLabsConvAI, ElevenLabsConvAIAdapter, type ConvAIOptions as ElevenLabsConvAIOptions, TTS$4 as ElevenLabsTTS, type ElevenLabsTTSOptions, FallbackLLMProvider, type FallbackLLMProviderOptions, type FilePcmSource, GEMINI_DEFAULT_INPUT_SR, GEMINI_DEFAULT_OUTPUT_SR, GeminiLiveAdapter, type GeminiLiveEventHandler, LLM as GoogleLLM, type GoogleLLMOptions, LLM$2 as GroqLLM, type GroqLLMOptions, Guardrail$1 as Guardrail, type GuardrailOptions, type HookContext, IVRActivity, type IVRActivityOptions, type IVRToolDefinition, type IncomingMessage, type JobCallback, type LLMChunk, LLMLoop, type LLMProvider, type LMNTAudioFormat, type LMNTModel, type LMNTSampleRate, TTS as LMNTTTS, type LMNTTTSOptions, type LatencyBreakdown, type LocalCallOptions, type LocalConfig, type LocalOptions, type Logger, type LoopCallback, type MessageHandler, MetricsStore, LLM$4 as OpenAILLM, type OpenAILLMOptions, OpenAILLMProvider, type OpenAIMessage, Realtime as OpenAIRealtime, OpenAIRealtimeAdapter, type RealtimeOptions as OpenAIRealtimeOptions, TTS$3 as OpenAITTS, type OpenAITTSOptions, type ParamSpec, PartialStreamError, Patter, PatterConnectionError, PatterError, type PatterOptions, type PhoneNumber, PipelineHookExecutor, type PipelineHooks, type PipelineMessageHandler, type ProviderPricing, ProvisionError, type RawPcmSource, RemoteMessageHandler, TTS$1 as RimeTTS, type RimeTTSOptions, type SSEEvent, type STTConfig, type ScheduleHandle, SentenceChunker, type ServeOptions, type SilenceCallback, STT$1 as SonioxSTT, type SonioxSTTOptions$1 as SonioxSTTOptions, Static as StaticTunnel, type TTSConfig, Carrier as Telnyx, type TelnyxCarrierOptions, TestSession, TfidfLoopDetector, type TfidfLoopDetectorOptions, Tool, type ToolDefinition, type ToolHandler, type ToolOptions, type TunnelHandle, type TurnMetrics, Carrier$1 as Twilio, type TwilioCarrierOptions, ULTRAVOX_DEFAULT_API_BASE, ULTRAVOX_DEFAULT_SR, type UltravoxEventHandler, UltravoxRealtimeAdapter, STT$3 as WhisperSTT, type WhisperSTTOptions, builtinClipPath, calculateRealtimeCost, calculateSttCost, calculateTelephonyCost, calculateTtsCost, callsToCsv, callsToJson, deepgram, defineTool, elevenlabs, filterEmoji, filterForTTS, filterMarkdown, formatDtmf, getLogger, guardrail, isRemoteUrl, isWebSocketUrl, makeAuthMiddleware, mergePricing, mixPcm, mountApi, mountDashboard, mulawToPcm16, notifyDashboard, openaiTts, pcm16ToMulaw, resample16kTo8k, resample24kTo16k, resample8kTo16k, resamplePcm, scheduleCron, scheduleInterval, scheduleOnce, selectSoundFromList, setLogger, startTunnel, tool, whisper };
|