getpatter 0.4.4 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +136 -162
- package/dist/carrier-config-CPG5CROM.mjs +84 -0
- package/dist/{chunk-O3RQG3NL.mjs → chunk-757NVN4L.mjs} +129 -544
- package/dist/index.d.mts +771 -292
- package/dist/index.d.ts +771 -292
- package/dist/index.js +1414 -1061
- package/dist/index.mjs +1141 -456
- package/dist/{test-mode-ASSLSQU2.mjs → test-mode-YFOL2HYH.mjs} +1 -1
- package/package.json +1 -1
package/dist/index.mjs
CHANGED
|
@@ -10,18 +10,15 @@ import {
|
|
|
10
10
|
DEFAULT_PRICING,
|
|
11
11
|
DeepgramSTT,
|
|
12
12
|
ElevenLabsConvAIAdapter,
|
|
13
|
-
ElevenLabsTTS,
|
|
14
13
|
EmbeddedServer,
|
|
15
14
|
LLMLoop,
|
|
16
15
|
MetricsStore,
|
|
17
16
|
OpenAILLMProvider,
|
|
18
17
|
OpenAIRealtimeAdapter,
|
|
19
|
-
OpenAITTS,
|
|
20
18
|
PipelineHookExecutor,
|
|
21
19
|
RemoteMessageHandler,
|
|
22
20
|
SentenceChunker,
|
|
23
21
|
TestSession,
|
|
24
|
-
WhisperSTT,
|
|
25
22
|
calculateRealtimeCost,
|
|
26
23
|
calculateSttCost,
|
|
27
24
|
calculateTelephonyCost,
|
|
@@ -39,7 +36,7 @@ import {
|
|
|
39
36
|
resample16kTo8k,
|
|
40
37
|
resample24kTo16k,
|
|
41
38
|
resample8kTo16k
|
|
42
|
-
} from "./chunk-
|
|
39
|
+
} from "./chunk-757NVN4L.mjs";
|
|
43
40
|
import {
|
|
44
41
|
getLogger,
|
|
45
42
|
setLogger
|
|
@@ -186,74 +183,64 @@ var PatterConnection = class {
|
|
|
186
183
|
}
|
|
187
184
|
};
|
|
188
185
|
|
|
189
|
-
// src/
|
|
190
|
-
var
|
|
191
|
-
|
|
186
|
+
// src/engines/openai.ts
|
|
187
|
+
var Realtime = class {
|
|
188
|
+
kind = "openai_realtime";
|
|
192
189
|
apiKey;
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
constructor(
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
language: this.language
|
|
206
|
-
};
|
|
207
|
-
if (this.options) out.options = { ...this.options };
|
|
208
|
-
return out;
|
|
190
|
+
model;
|
|
191
|
+
voice;
|
|
192
|
+
constructor(opts = {}) {
|
|
193
|
+
const key = opts.apiKey ?? process.env.OPENAI_API_KEY;
|
|
194
|
+
if (!key) {
|
|
195
|
+
throw new Error(
|
|
196
|
+
"OpenAI Realtime requires an apiKey. Pass { apiKey: 'sk-...' } or set OPENAI_API_KEY in the environment."
|
|
197
|
+
);
|
|
198
|
+
}
|
|
199
|
+
this.apiKey = key;
|
|
200
|
+
this.model = opts.model ?? "gpt-4o-mini-realtime-preview";
|
|
201
|
+
this.voice = opts.voice ?? "alloy";
|
|
209
202
|
}
|
|
210
203
|
};
|
|
211
|
-
|
|
212
|
-
|
|
204
|
+
|
|
205
|
+
// src/engines/elevenlabs.ts
|
|
206
|
+
var ConvAI = class {
|
|
207
|
+
kind = "elevenlabs_convai";
|
|
213
208
|
apiKey;
|
|
209
|
+
agentId;
|
|
214
210
|
voice;
|
|
215
|
-
constructor(
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
211
|
+
constructor(opts = {}) {
|
|
212
|
+
const key = opts.apiKey ?? process.env.ELEVENLABS_API_KEY;
|
|
213
|
+
const agent = opts.agentId ?? process.env.ELEVENLABS_AGENT_ID;
|
|
214
|
+
if (!key) {
|
|
215
|
+
throw new Error(
|
|
216
|
+
"ElevenLabs ConvAI requires an apiKey. Pass { apiKey: '...' } or set ELEVENLABS_API_KEY in the environment."
|
|
217
|
+
);
|
|
218
|
+
}
|
|
219
|
+
if (!agent) {
|
|
220
|
+
throw new Error(
|
|
221
|
+
"ElevenLabs ConvAI requires an agentId. Pass { agentId: 'agent_...' } or set ELEVENLABS_AGENT_ID in the environment."
|
|
222
|
+
);
|
|
223
|
+
}
|
|
224
|
+
this.apiKey = key;
|
|
225
|
+
this.agentId = agent;
|
|
226
|
+
this.voice = opts.voice;
|
|
219
227
|
}
|
|
220
|
-
|
|
221
|
-
|
|
228
|
+
};
|
|
229
|
+
|
|
230
|
+
// src/tunnels/index.ts
|
|
231
|
+
var CloudflareTunnel = class {
|
|
232
|
+
kind = "cloudflare";
|
|
233
|
+
};
|
|
234
|
+
var Static = class {
|
|
235
|
+
kind = "static";
|
|
236
|
+
hostname;
|
|
237
|
+
constructor(opts) {
|
|
238
|
+
if (!opts.hostname) {
|
|
239
|
+
throw new Error("Static tunnel requires a non-empty hostname.");
|
|
240
|
+
}
|
|
241
|
+
this.hostname = opts.hostname;
|
|
222
242
|
}
|
|
223
243
|
};
|
|
224
|
-
function deepgram(opts) {
|
|
225
|
-
const options = {
|
|
226
|
-
model: opts.model ?? "nova-3",
|
|
227
|
-
endpointing_ms: opts.endpointingMs ?? 150,
|
|
228
|
-
utterance_end_ms: opts.utteranceEndMs === null ? null : opts.utteranceEndMs ?? 1e3,
|
|
229
|
-
smart_format: opts.smartFormat ?? true,
|
|
230
|
-
interim_results: opts.interimResults ?? true
|
|
231
|
-
};
|
|
232
|
-
if (opts.vadEvents !== void 0) options.vad_events = opts.vadEvents;
|
|
233
|
-
return new STTConfigImpl("deepgram", opts.apiKey, opts.language ?? "en", options);
|
|
234
|
-
}
|
|
235
|
-
function whisper(opts) {
|
|
236
|
-
return new STTConfigImpl("whisper", opts.apiKey, opts.language ?? "en");
|
|
237
|
-
}
|
|
238
|
-
function elevenlabs(opts) {
|
|
239
|
-
return new TTSConfigImpl("elevenlabs", opts.apiKey, opts.voice ?? "rachel");
|
|
240
|
-
}
|
|
241
|
-
function openaiTts(opts) {
|
|
242
|
-
return new TTSConfigImpl("openai", opts.apiKey, opts.voice ?? "alloy");
|
|
243
|
-
}
|
|
244
|
-
function cartesia(opts) {
|
|
245
|
-
return new TTSConfigImpl(
|
|
246
|
-
"cartesia",
|
|
247
|
-
opts.apiKey,
|
|
248
|
-
opts.voice ?? "f786b574-daa5-4673-aa0c-cbe3e8534c02"
|
|
249
|
-
);
|
|
250
|
-
}
|
|
251
|
-
function rime(opts) {
|
|
252
|
-
return new TTSConfigImpl("rime", opts.apiKey, opts.voice ?? "astra");
|
|
253
|
-
}
|
|
254
|
-
function lmnt(opts) {
|
|
255
|
-
return new TTSConfigImpl("lmnt", opts.apiKey, opts.voice ?? "leah");
|
|
256
|
-
}
|
|
257
244
|
|
|
258
245
|
// src/client.ts
|
|
259
246
|
var DEFAULT_BACKEND_URL2 = "wss://api.getpatter.com";
|
|
@@ -286,21 +273,39 @@ var Patter = class {
|
|
|
286
273
|
embeddedServer = null;
|
|
287
274
|
tunnelHandle = null;
|
|
288
275
|
constructor(options) {
|
|
289
|
-
const
|
|
276
|
+
const hasCarrier = "carrier" in options && options.carrier !== void 0;
|
|
277
|
+
const isLocal = "mode" in options && options.mode === "local" || hasCarrier;
|
|
290
278
|
if (isLocal) {
|
|
291
279
|
const local = options;
|
|
292
280
|
if (!local.phoneNumber) {
|
|
293
281
|
throw new Error("Local mode requires phoneNumber");
|
|
294
282
|
}
|
|
295
|
-
if (!local.
|
|
296
|
-
throw new Error(
|
|
283
|
+
if (!local.carrier) {
|
|
284
|
+
throw new Error(
|
|
285
|
+
"Local mode requires a `carrier` instance. Pass `carrier: new Twilio({...})` or `carrier: new Telnyx({...})`."
|
|
286
|
+
);
|
|
297
287
|
}
|
|
298
|
-
|
|
299
|
-
|
|
288
|
+
const carrier = local.carrier;
|
|
289
|
+
const tunnel = local.tunnel;
|
|
290
|
+
let tunnelWebhookUrl;
|
|
291
|
+
if (tunnel instanceof Static) {
|
|
292
|
+
if (local.webhookUrl) {
|
|
293
|
+
throw new Error(
|
|
294
|
+
"Cannot use both `tunnel: new StaticTunnel(...)` and `webhookUrl`. Pick one."
|
|
295
|
+
);
|
|
296
|
+
}
|
|
297
|
+
tunnelWebhookUrl = tunnel.hostname;
|
|
300
298
|
}
|
|
301
299
|
this.mode = "local";
|
|
302
|
-
const
|
|
303
|
-
|
|
300
|
+
const rawWebhook = tunnelWebhookUrl ?? local.webhookUrl;
|
|
301
|
+
const normalizedWebhook = rawWebhook ? rawWebhook.replace(/^https?:\/\//, "").replace(/\/$/, "") : void 0;
|
|
302
|
+
this.localConfig = {
|
|
303
|
+
carrier,
|
|
304
|
+
phoneNumber: local.phoneNumber,
|
|
305
|
+
webhookUrl: normalizedWebhook,
|
|
306
|
+
tunnel: local.tunnel,
|
|
307
|
+
openaiKey: local.openaiKey
|
|
308
|
+
};
|
|
304
309
|
this.apiKey = "";
|
|
305
310
|
this.backendUrl = DEFAULT_BACKEND_URL2;
|
|
306
311
|
this.restUrl = DEFAULT_REST_URL;
|
|
@@ -317,25 +322,55 @@ var Patter = class {
|
|
|
317
322
|
}
|
|
318
323
|
// === Local mode ===
|
|
319
324
|
agent(opts) {
|
|
320
|
-
|
|
325
|
+
let working = { ...opts };
|
|
326
|
+
if (opts.engine) {
|
|
327
|
+
if (opts.provider) {
|
|
328
|
+
throw new Error(
|
|
329
|
+
"Cannot pass both `engine:` and `provider:`. Use one (engine is preferred)."
|
|
330
|
+
);
|
|
331
|
+
}
|
|
332
|
+
const engine = opts.engine;
|
|
333
|
+
if (engine instanceof Realtime) {
|
|
334
|
+
working = {
|
|
335
|
+
...working,
|
|
336
|
+
provider: "openai_realtime",
|
|
337
|
+
model: working.model ?? engine.model,
|
|
338
|
+
voice: working.voice ?? engine.voice
|
|
339
|
+
};
|
|
340
|
+
if (this.localConfig && !this.localConfig.openaiKey) {
|
|
341
|
+
this.localConfig = { ...this.localConfig, openaiKey: engine.apiKey };
|
|
342
|
+
}
|
|
343
|
+
} else if (engine instanceof ConvAI) {
|
|
344
|
+
working = {
|
|
345
|
+
...working,
|
|
346
|
+
provider: "elevenlabs_convai",
|
|
347
|
+
voice: working.voice ?? engine.voice
|
|
348
|
+
};
|
|
349
|
+
} else {
|
|
350
|
+
throw new Error(
|
|
351
|
+
"Unknown engine. Expected OpenAIRealtime or ElevenLabsConvAI instance."
|
|
352
|
+
);
|
|
353
|
+
}
|
|
354
|
+
}
|
|
355
|
+
if (working.provider) {
|
|
321
356
|
const valid = ["openai_realtime", "elevenlabs_convai", "pipeline"];
|
|
322
|
-
if (!valid.includes(
|
|
323
|
-
throw new Error(`provider must be one of: ${valid.join(", ")}. Got: '${
|
|
357
|
+
if (!valid.includes(working.provider)) {
|
|
358
|
+
throw new Error(`provider must be one of: ${valid.join(", ")}. Got: '${working.provider}'`);
|
|
324
359
|
}
|
|
325
360
|
}
|
|
326
|
-
if (
|
|
327
|
-
if (!Array.isArray(
|
|
361
|
+
if (working.tools) {
|
|
362
|
+
if (!Array.isArray(working.tools)) {
|
|
328
363
|
throw new TypeError("tools must be an array");
|
|
329
364
|
}
|
|
330
|
-
|
|
331
|
-
if (!
|
|
332
|
-
if (!
|
|
365
|
+
working.tools.forEach((tool2, i) => {
|
|
366
|
+
if (!tool2.name) throw new Error(`tools[${i}] missing required 'name' field`);
|
|
367
|
+
if (!tool2.webhookUrl && !tool2.handler) throw new Error(`tools[${i}] requires either 'webhookUrl' or 'handler'`);
|
|
333
368
|
});
|
|
334
369
|
}
|
|
335
|
-
if (
|
|
370
|
+
if (working.variables !== void 0 && (typeof working.variables !== "object" || Array.isArray(working.variables))) {
|
|
336
371
|
throw new TypeError("variables must be an object");
|
|
337
372
|
}
|
|
338
|
-
return
|
|
373
|
+
return working;
|
|
339
374
|
}
|
|
340
375
|
async serve(opts) {
|
|
341
376
|
if (this.mode !== "local" || !this.localConfig) {
|
|
@@ -358,10 +393,14 @@ var Patter = class {
|
|
|
358
393
|
}
|
|
359
394
|
let webhookUrl = this.localConfig.webhookUrl ?? "";
|
|
360
395
|
const port = opts.port ?? 8e3;
|
|
361
|
-
|
|
396
|
+
const ctorTunnel = this.localConfig.tunnel;
|
|
397
|
+
const wantsCloudflaredFromServe = opts.tunnel === true;
|
|
398
|
+
const wantsCloudflaredFromCtor = ctorTunnel === true || ctorTunnel instanceof CloudflareTunnel;
|
|
399
|
+
const wantsCloudflared = wantsCloudflaredFromServe || wantsCloudflaredFromCtor;
|
|
400
|
+
if (wantsCloudflared && webhookUrl) {
|
|
362
401
|
throw new Error("Cannot use both tunnel: true and webhookUrl. Pick one.");
|
|
363
402
|
}
|
|
364
|
-
if (
|
|
403
|
+
if (wantsCloudflared) {
|
|
365
404
|
const { startTunnel: startTunnel2 } = await import("./tunnel-BL7A7GXW.mjs");
|
|
366
405
|
this.tunnelHandle = await startTunnel2(port);
|
|
367
406
|
webhookUrl = this.tunnelHandle.hostname;
|
|
@@ -371,17 +410,29 @@ var Patter = class {
|
|
|
371
410
|
"No webhookUrl configured. Either:\n - Pass webhookUrl in the Patter constructor\n - Use tunnel: true in serve() to auto-create a tunnel"
|
|
372
411
|
);
|
|
373
412
|
}
|
|
413
|
+
const carrier = this.localConfig.carrier;
|
|
414
|
+
const telephonyProvider = carrier.kind === "twilio" ? "twilio" : "telnyx";
|
|
415
|
+
const { autoConfigureCarrier } = await import("./carrier-config-CPG5CROM.mjs");
|
|
416
|
+
await autoConfigureCarrier({
|
|
417
|
+
telephonyProvider,
|
|
418
|
+
twilioSid: carrier.kind === "twilio" ? carrier.accountSid : void 0,
|
|
419
|
+
twilioToken: carrier.kind === "twilio" ? carrier.authToken : void 0,
|
|
420
|
+
telnyxKey: carrier.kind === "telnyx" ? carrier.apiKey : void 0,
|
|
421
|
+
telnyxConnectionId: carrier.kind === "telnyx" ? carrier.connectionId : void 0,
|
|
422
|
+
phoneNumber: this.localConfig.phoneNumber,
|
|
423
|
+
webhookHost: webhookUrl
|
|
424
|
+
});
|
|
374
425
|
this.embeddedServer = new EmbeddedServer(
|
|
375
426
|
{
|
|
376
|
-
twilioSid:
|
|
377
|
-
twilioToken:
|
|
427
|
+
twilioSid: carrier.kind === "twilio" ? carrier.accountSid : void 0,
|
|
428
|
+
twilioToken: carrier.kind === "twilio" ? carrier.authToken : void 0,
|
|
378
429
|
openaiKey: this.localConfig.openaiKey,
|
|
379
430
|
phoneNumber: this.localConfig.phoneNumber,
|
|
380
431
|
webhookUrl,
|
|
381
|
-
telephonyProvider
|
|
382
|
-
telnyxKey:
|
|
383
|
-
telnyxConnectionId:
|
|
384
|
-
telnyxPublicKey:
|
|
432
|
+
telephonyProvider,
|
|
433
|
+
telnyxKey: carrier.kind === "telnyx" ? carrier.apiKey : void 0,
|
|
434
|
+
telnyxConnectionId: carrier.kind === "telnyx" ? carrier.connectionId : void 0,
|
|
435
|
+
telnyxPublicKey: carrier.kind === "telnyx" ? carrier.publicKey : void 0
|
|
385
436
|
},
|
|
386
437
|
opts.agent,
|
|
387
438
|
opts.onCallStart,
|
|
@@ -401,7 +452,7 @@ var Patter = class {
|
|
|
401
452
|
if (this.mode !== "local") {
|
|
402
453
|
throw new Error("test() is only available in local mode");
|
|
403
454
|
}
|
|
404
|
-
const { TestSession: TestSession2 } = await import("./test-mode-
|
|
455
|
+
const { TestSession: TestSession2 } = await import("./test-mode-YFOL2HYH.mjs");
|
|
405
456
|
const session = new TestSession2();
|
|
406
457
|
await session.run({
|
|
407
458
|
agent: opts.agent,
|
|
@@ -442,10 +493,10 @@ var Patter = class {
|
|
|
442
493
|
if (!this.localConfig) {
|
|
443
494
|
throw new Error("local config missing");
|
|
444
495
|
}
|
|
445
|
-
const { phoneNumber, webhookUrl,
|
|
446
|
-
if (
|
|
447
|
-
const telnyxKey =
|
|
448
|
-
const connectionId =
|
|
496
|
+
const { phoneNumber, webhookUrl, carrier } = this.localConfig;
|
|
497
|
+
if (carrier.kind === "telnyx") {
|
|
498
|
+
const telnyxKey = carrier.apiKey;
|
|
499
|
+
const connectionId = carrier.connectionId;
|
|
449
500
|
const streamUrl = `wss://${webhookUrl}/ws/stream/${encodeURIComponent(localOpts.to)}?caller=${encodeURIComponent(phoneNumber)}&callee=${encodeURIComponent(localOpts.to)}`;
|
|
450
501
|
const telnyxPayload = {
|
|
451
502
|
connection_id: connectionId,
|
|
@@ -485,8 +536,8 @@ var Patter = class {
|
|
|
485
536
|
}
|
|
486
537
|
return;
|
|
487
538
|
}
|
|
488
|
-
const twilioSid =
|
|
489
|
-
const twilioToken =
|
|
539
|
+
const twilioSid = carrier.accountSid;
|
|
540
|
+
const twilioToken = carrier.authToken;
|
|
490
541
|
const statusCallbackUrl = `https://${webhookUrl}/webhooks/twilio/status`;
|
|
491
542
|
const url = `https://api.twilio.com/2010-04-01/Accounts/${twilioSid}/Calls.json`;
|
|
492
543
|
const params = new URLSearchParams({
|
|
@@ -618,65 +669,6 @@ var Patter = class {
|
|
|
618
669
|
const data = await response.json();
|
|
619
670
|
return data.map((c) => ({ id: c.id, direction: c.direction, caller: c.caller, callee: c.callee, startedAt: c.started_at, endedAt: c.ended_at, durationSeconds: c.duration_seconds, status: c.status, transcript: c.transcript }));
|
|
620
671
|
}
|
|
621
|
-
// Provider helpers — mirror the Python classmethod factories so callers can
|
|
622
|
-
// write ``Patter.deepgram({ apiKey })`` without importing the top-level.
|
|
623
|
-
static deepgram = deepgram;
|
|
624
|
-
static whisper = whisper;
|
|
625
|
-
static elevenlabs = elevenlabs;
|
|
626
|
-
static openaiTts = openaiTts;
|
|
627
|
-
static cartesia = cartesia;
|
|
628
|
-
static rime = rime;
|
|
629
|
-
static lmnt = lmnt;
|
|
630
|
-
static guardrail(opts) {
|
|
631
|
-
return {
|
|
632
|
-
name: opts.name,
|
|
633
|
-
blockedTerms: opts.blockedTerms,
|
|
634
|
-
check: opts.check,
|
|
635
|
-
replacement: opts.replacement ?? "I'm sorry, I can't respond to that."
|
|
636
|
-
};
|
|
637
|
-
}
|
|
638
|
-
/**
|
|
639
|
-
* Create a tool definition for use with `agent({ tools: [...] })`.
|
|
640
|
-
*
|
|
641
|
-
* Either `handler` (a function) or `webhookUrl` must be provided.
|
|
642
|
-
*
|
|
643
|
-
* @param opts.name - Tool name (visible to the LLM).
|
|
644
|
-
* @param opts.description - What the tool does (visible to the LLM).
|
|
645
|
-
* @param opts.parameters - JSON Schema for tool arguments.
|
|
646
|
-
* @param opts.handler - Async function called in-process when the LLM invokes the tool.
|
|
647
|
-
* @param opts.webhookUrl - URL to POST to when the LLM invokes the tool.
|
|
648
|
-
*
|
|
649
|
-
* @example
|
|
650
|
-
* ```ts
|
|
651
|
-
* phone.agent({
|
|
652
|
-
* systemPrompt: 'You are a pizza bot.',
|
|
653
|
-
* tools: [
|
|
654
|
-
* Patter.tool({
|
|
655
|
-
* name: 'check_menu',
|
|
656
|
-
* description: 'Check available menu items',
|
|
657
|
-
* handler: async (args) => JSON.stringify({ items: ['margherita'] }),
|
|
658
|
-
* }),
|
|
659
|
-
* ],
|
|
660
|
-
* });
|
|
661
|
-
* ```
|
|
662
|
-
*/
|
|
663
|
-
static tool(opts) {
|
|
664
|
-
if (!opts.handler && !opts.webhookUrl) {
|
|
665
|
-
throw new Error("tool() requires either handler or webhookUrl");
|
|
666
|
-
}
|
|
667
|
-
const t = {
|
|
668
|
-
name: opts.name,
|
|
669
|
-
description: opts.description ?? "",
|
|
670
|
-
parameters: opts.parameters ?? { type: "object", properties: {} }
|
|
671
|
-
};
|
|
672
|
-
if (opts.handler) {
|
|
673
|
-
t.handler = opts.handler;
|
|
674
|
-
}
|
|
675
|
-
if (opts.webhookUrl) {
|
|
676
|
-
t.webhookUrl = opts.webhookUrl;
|
|
677
|
-
}
|
|
678
|
-
return t;
|
|
679
|
-
}
|
|
680
672
|
// Internal
|
|
681
673
|
async registerNumber(provider, providerKey, number, providerSecret, country = "US", stt, tts) {
|
|
682
674
|
const credentials = { api_key: providerKey };
|
|
@@ -766,6 +758,62 @@ function filterForTTS(text) {
|
|
|
766
758
|
return filterEmoji(filterMarkdown(text));
|
|
767
759
|
}
|
|
768
760
|
|
|
761
|
+
// src/providers.ts
|
|
762
|
+
var STTConfigImpl = class {
|
|
763
|
+
provider;
|
|
764
|
+
apiKey;
|
|
765
|
+
language;
|
|
766
|
+
options;
|
|
767
|
+
constructor(provider, apiKey, language = "en", options) {
|
|
768
|
+
this.provider = provider;
|
|
769
|
+
this.apiKey = apiKey;
|
|
770
|
+
this.language = language;
|
|
771
|
+
if (options) this.options = options;
|
|
772
|
+
}
|
|
773
|
+
toDict() {
|
|
774
|
+
const out = {
|
|
775
|
+
provider: this.provider,
|
|
776
|
+
api_key: this.apiKey,
|
|
777
|
+
language: this.language
|
|
778
|
+
};
|
|
779
|
+
if (this.options) out.options = { ...this.options };
|
|
780
|
+
return out;
|
|
781
|
+
}
|
|
782
|
+
};
|
|
783
|
+
var TTSConfigImpl = class {
|
|
784
|
+
provider;
|
|
785
|
+
apiKey;
|
|
786
|
+
voice;
|
|
787
|
+
constructor(provider, apiKey, voice = "alloy") {
|
|
788
|
+
this.provider = provider;
|
|
789
|
+
this.apiKey = apiKey;
|
|
790
|
+
this.voice = voice;
|
|
791
|
+
}
|
|
792
|
+
toDict() {
|
|
793
|
+
return { provider: this.provider, api_key: this.apiKey, voice: this.voice };
|
|
794
|
+
}
|
|
795
|
+
};
|
|
796
|
+
function deepgram(opts) {
|
|
797
|
+
const options = {
|
|
798
|
+
model: opts.model ?? "nova-3",
|
|
799
|
+
endpointing_ms: opts.endpointingMs ?? 150,
|
|
800
|
+
utterance_end_ms: opts.utteranceEndMs === null ? null : opts.utteranceEndMs ?? 1e3,
|
|
801
|
+
smart_format: opts.smartFormat ?? true,
|
|
802
|
+
interim_results: opts.interimResults ?? true
|
|
803
|
+
};
|
|
804
|
+
if (opts.vadEvents !== void 0) options.vad_events = opts.vadEvents;
|
|
805
|
+
return new STTConfigImpl("deepgram", opts.apiKey, opts.language ?? "en", options);
|
|
806
|
+
}
|
|
807
|
+
function whisper(opts) {
|
|
808
|
+
return new STTConfigImpl("whisper", opts.apiKey, opts.language ?? "en");
|
|
809
|
+
}
|
|
810
|
+
function elevenlabs(opts) {
|
|
811
|
+
return new TTSConfigImpl("elevenlabs", opts.apiKey, opts.voice ?? "rachel");
|
|
812
|
+
}
|
|
813
|
+
function openaiTts(opts) {
|
|
814
|
+
return new TTSConfigImpl("openai", opts.apiKey, opts.voice ?? "alloy");
|
|
815
|
+
}
|
|
816
|
+
|
|
769
817
|
// src/fallback-provider.ts
|
|
770
818
|
var AllProvidersFailedError = class extends Error {
|
|
771
819
|
constructor(message) {
|
|
@@ -1439,110 +1487,424 @@ function scheduleInterval(intervalOrOpts, callback) {
|
|
|
1439
1487
|
};
|
|
1440
1488
|
}
|
|
1441
1489
|
|
|
1442
|
-
// src/
|
|
1443
|
-
|
|
1444
|
-
|
|
1445
|
-
|
|
1446
|
-
|
|
1447
|
-
|
|
1448
|
-
|
|
1449
|
-
|
|
1450
|
-
return token.text === END_TOKEN || token.text === FINALIZED_TOKEN;
|
|
1451
|
-
}
|
|
1452
|
-
var TokenAccumulator = class {
|
|
1453
|
-
text = "";
|
|
1454
|
-
confSum = 0;
|
|
1455
|
-
confCount = 0;
|
|
1456
|
-
update(token) {
|
|
1457
|
-
if (token.text) {
|
|
1458
|
-
this.text += token.text;
|
|
1459
|
-
}
|
|
1460
|
-
if (typeof token.confidence === "number") {
|
|
1461
|
-
this.confSum += token.confidence;
|
|
1462
|
-
this.confCount += 1;
|
|
1490
|
+
// src/stt/deepgram.ts
|
|
1491
|
+
var STT = class extends DeepgramSTT {
|
|
1492
|
+
constructor(opts = {}) {
|
|
1493
|
+
const key = opts.apiKey ?? process.env.DEEPGRAM_API_KEY;
|
|
1494
|
+
if (!key) {
|
|
1495
|
+
throw new Error(
|
|
1496
|
+
"Deepgram STT requires an apiKey. Pass { apiKey: 'dg_...' } or set DEEPGRAM_API_KEY in the environment."
|
|
1497
|
+
);
|
|
1463
1498
|
}
|
|
1464
|
-
|
|
1465
|
-
|
|
1466
|
-
|
|
1467
|
-
|
|
1468
|
-
|
|
1469
|
-
|
|
1470
|
-
|
|
1471
|
-
|
|
1472
|
-
|
|
1473
|
-
|
|
1474
|
-
|
|
1499
|
+
super(
|
|
1500
|
+
key,
|
|
1501
|
+
opts.language ?? "en",
|
|
1502
|
+
opts.model ?? "nova-3",
|
|
1503
|
+
opts.encoding ?? "linear16",
|
|
1504
|
+
opts.sampleRate ?? 16e3,
|
|
1505
|
+
{
|
|
1506
|
+
endpointingMs: opts.endpointingMs ?? 150,
|
|
1507
|
+
utteranceEndMs: opts.utteranceEndMs === null ? null : opts.utteranceEndMs ?? 1e3,
|
|
1508
|
+
smartFormat: opts.smartFormat ?? true,
|
|
1509
|
+
interimResults: opts.interimResults ?? true,
|
|
1510
|
+
...opts.vadEvents !== void 0 ? { vadEvents: opts.vadEvents } : {}
|
|
1511
|
+
}
|
|
1512
|
+
);
|
|
1475
1513
|
}
|
|
1476
1514
|
};
|
|
1477
|
-
|
|
1478
|
-
|
|
1479
|
-
|
|
1480
|
-
|
|
1481
|
-
|
|
1515
|
+
|
|
1516
|
+
// src/providers/whisper-stt.ts
|
|
1517
|
+
var OPENAI_TRANSCRIPTION_URL = "https://api.openai.com/v1/audio/transcriptions";
|
|
1518
|
+
var DEFAULT_BUFFER_SIZE = 16e3 * 2;
|
|
1519
|
+
function wrapPcmInWav(pcm, sampleRate = 16e3, channels = 1, bitsPerSample = 16) {
|
|
1520
|
+
const dataSize = pcm.length;
|
|
1521
|
+
const header = Buffer.alloc(44);
|
|
1522
|
+
header.write("RIFF", 0);
|
|
1523
|
+
header.writeUInt32LE(36 + dataSize, 4);
|
|
1524
|
+
header.write("WAVE", 8);
|
|
1525
|
+
header.write("fmt ", 12);
|
|
1526
|
+
header.writeUInt32LE(16, 16);
|
|
1527
|
+
header.writeUInt16LE(1, 20);
|
|
1528
|
+
header.writeUInt16LE(channels, 22);
|
|
1529
|
+
header.writeUInt32LE(sampleRate, 24);
|
|
1530
|
+
header.writeUInt32LE(sampleRate * channels * (bitsPerSample / 8), 28);
|
|
1531
|
+
header.writeUInt16LE(channels * (bitsPerSample / 8), 32);
|
|
1532
|
+
header.writeUInt16LE(bitsPerSample, 34);
|
|
1533
|
+
header.write("data", 36);
|
|
1534
|
+
header.writeUInt32LE(dataSize, 40);
|
|
1535
|
+
return Buffer.concat([header, pcm]);
|
|
1536
|
+
}
|
|
1537
|
+
var WhisperSTT = class _WhisperSTT {
|
|
1482
1538
|
apiKey;
|
|
1483
1539
|
model;
|
|
1484
|
-
|
|
1485
|
-
|
|
1486
|
-
|
|
1487
|
-
|
|
1488
|
-
|
|
1489
|
-
|
|
1490
|
-
|
|
1491
|
-
clientReferenceId;
|
|
1492
|
-
baseUrl;
|
|
1493
|
-
constructor(apiKey, options = {}) {
|
|
1494
|
-
if (!apiKey) {
|
|
1495
|
-
throw new Error("Soniox apiKey is required");
|
|
1496
|
-
}
|
|
1497
|
-
const maxEndpointDelayMs = options.maxEndpointDelayMs ?? 500;
|
|
1498
|
-
if (maxEndpointDelayMs < 500 || maxEndpointDelayMs > 3e3) {
|
|
1499
|
-
throw new Error("maxEndpointDelayMs must be between 500 and 3000");
|
|
1500
|
-
}
|
|
1540
|
+
language;
|
|
1541
|
+
bufferSize;
|
|
1542
|
+
buffer = Buffer.alloc(0);
|
|
1543
|
+
callbacks = [];
|
|
1544
|
+
running = false;
|
|
1545
|
+
pendingTranscriptions = [];
|
|
1546
|
+
constructor(apiKey, model = "whisper-1", language, bufferSize = DEFAULT_BUFFER_SIZE) {
|
|
1501
1547
|
this.apiKey = apiKey;
|
|
1502
|
-
this.model =
|
|
1503
|
-
this.
|
|
1504
|
-
this.
|
|
1505
|
-
this.sampleRate = options.sampleRate ?? 16e3;
|
|
1506
|
-
this.numChannels = options.numChannels ?? 1;
|
|
1507
|
-
this.enableSpeakerDiarization = options.enableSpeakerDiarization ?? false;
|
|
1508
|
-
this.enableLanguageIdentification = options.enableLanguageIdentification ?? true;
|
|
1509
|
-
this.maxEndpointDelayMs = maxEndpointDelayMs;
|
|
1510
|
-
this.clientReferenceId = options.clientReferenceId;
|
|
1511
|
-
this.baseUrl = options.baseUrl ?? SONIOX_WS_URL;
|
|
1512
|
-
}
|
|
1513
|
-
/** Factory for Twilio-style 8 kHz linear PCM. */
|
|
1514
|
-
static forTwilio(apiKey, languageHints) {
|
|
1515
|
-
return new _SonioxSTT(apiKey, { sampleRate: 8e3, languageHints });
|
|
1548
|
+
this.model = model;
|
|
1549
|
+
this.language = language;
|
|
1550
|
+
this.bufferSize = bufferSize;
|
|
1516
1551
|
}
|
|
1517
|
-
|
|
1518
|
-
|
|
1519
|
-
|
|
1520
|
-
model: this.model,
|
|
1521
|
-
audio_format: "pcm_s16le",
|
|
1522
|
-
num_channels: this.numChannels,
|
|
1523
|
-
sample_rate: this.sampleRate,
|
|
1524
|
-
enable_endpoint_detection: true,
|
|
1525
|
-
enable_speaker_diarization: this.enableSpeakerDiarization,
|
|
1526
|
-
enable_language_identification: this.enableLanguageIdentification,
|
|
1527
|
-
max_endpoint_delay_ms: this.maxEndpointDelayMs
|
|
1528
|
-
};
|
|
1529
|
-
if (this.languageHints) {
|
|
1530
|
-
config.language_hints = this.languageHints;
|
|
1531
|
-
config.language_hints_strict = this.languageHintsStrict;
|
|
1532
|
-
}
|
|
1533
|
-
if (this.clientReferenceId) {
|
|
1534
|
-
config.client_reference_id = this.clientReferenceId;
|
|
1535
|
-
}
|
|
1536
|
-
return config;
|
|
1552
|
+
/** Factory for Twilio calls — mulaw 8 kHz is transcoded upstream, so we still receive PCM 16-bit. */
|
|
1553
|
+
static forTwilio(apiKey, language = "en", model = "whisper-1") {
|
|
1554
|
+
return new _WhisperSTT(apiKey, model, language);
|
|
1537
1555
|
}
|
|
1538
1556
|
async connect() {
|
|
1539
|
-
this.
|
|
1540
|
-
|
|
1541
|
-
|
|
1542
|
-
|
|
1543
|
-
|
|
1544
|
-
|
|
1545
|
-
|
|
1557
|
+
this.running = true;
|
|
1558
|
+
this.buffer = Buffer.alloc(0);
|
|
1559
|
+
}
|
|
1560
|
+
sendAudio(audio) {
|
|
1561
|
+
if (!this.running) return;
|
|
1562
|
+
this.buffer = Buffer.concat([this.buffer, audio]);
|
|
1563
|
+
if (this.buffer.length >= this.bufferSize) {
|
|
1564
|
+
const pcm = this.buffer;
|
|
1565
|
+
this.buffer = Buffer.alloc(0);
|
|
1566
|
+
this.trackTranscription(this.transcribeBuffer(pcm));
|
|
1567
|
+
}
|
|
1568
|
+
}
|
|
1569
|
+
trackTranscription(promise) {
|
|
1570
|
+
const wrapped = promise.finally(() => {
|
|
1571
|
+
const idx = this.pendingTranscriptions.indexOf(wrapped);
|
|
1572
|
+
if (idx !== -1) this.pendingTranscriptions.splice(idx, 1);
|
|
1573
|
+
});
|
|
1574
|
+
this.pendingTranscriptions.push(wrapped);
|
|
1575
|
+
}
|
|
1576
|
+
onTranscript(callback) {
|
|
1577
|
+
if (this.callbacks.length >= 10) {
|
|
1578
|
+
getLogger().warn("WhisperSTT: maximum of 10 onTranscript callbacks reached; replacing the last callback.");
|
|
1579
|
+
this.callbacks[this.callbacks.length - 1] = callback;
|
|
1580
|
+
return;
|
|
1581
|
+
}
|
|
1582
|
+
this.callbacks.push(callback);
|
|
1583
|
+
}
|
|
1584
|
+
async close() {
|
|
1585
|
+
this.running = false;
|
|
1586
|
+
if (this.buffer.length >= this.bufferSize / 4) {
|
|
1587
|
+
const pcm = this.buffer;
|
|
1588
|
+
this.buffer = Buffer.alloc(0);
|
|
1589
|
+
this.trackTranscription(this.transcribeBuffer(pcm));
|
|
1590
|
+
} else {
|
|
1591
|
+
this.buffer = Buffer.alloc(0);
|
|
1592
|
+
}
|
|
1593
|
+
await Promise.allSettled(this.pendingTranscriptions);
|
|
1594
|
+
this.callbacks = [];
|
|
1595
|
+
}
|
|
1596
|
+
// ------------------------------------------------------------------
|
|
1597
|
+
// Private
|
|
1598
|
+
// ------------------------------------------------------------------
|
|
1599
|
+
async transcribeBuffer(pcm) {
|
|
1600
|
+
const wav = wrapPcmInWav(pcm);
|
|
1601
|
+
const formData = new FormData();
|
|
1602
|
+
formData.append("file", new Blob([wav.buffer.slice(wav.byteOffset, wav.byteOffset + wav.byteLength)], { type: "audio/wav" }), "audio.wav");
|
|
1603
|
+
formData.append("model", this.model);
|
|
1604
|
+
if (this.language) {
|
|
1605
|
+
formData.append("language", this.language);
|
|
1606
|
+
}
|
|
1607
|
+
try {
|
|
1608
|
+
const resp = await fetch(OPENAI_TRANSCRIPTION_URL, {
|
|
1609
|
+
method: "POST",
|
|
1610
|
+
headers: { Authorization: `Bearer ${this.apiKey}` },
|
|
1611
|
+
body: formData,
|
|
1612
|
+
signal: AbortSignal.timeout(15e3)
|
|
1613
|
+
});
|
|
1614
|
+
if (!resp.ok) {
|
|
1615
|
+
const body = await resp.text();
|
|
1616
|
+
getLogger().error(`WhisperSTT transcription error: ${resp.status} ${body}`);
|
|
1617
|
+
return;
|
|
1618
|
+
}
|
|
1619
|
+
const json = await resp.json();
|
|
1620
|
+
const text = (json.text ?? "").trim();
|
|
1621
|
+
if (!text) return;
|
|
1622
|
+
const transcript = {
|
|
1623
|
+
text,
|
|
1624
|
+
isFinal: true,
|
|
1625
|
+
confidence: 1
|
|
1626
|
+
};
|
|
1627
|
+
for (const cb of this.callbacks) {
|
|
1628
|
+
cb(transcript);
|
|
1629
|
+
}
|
|
1630
|
+
} catch (err) {
|
|
1631
|
+
getLogger().error(`WhisperSTT transcription error: ${String(err)}`);
|
|
1632
|
+
}
|
|
1633
|
+
}
|
|
1634
|
+
};
|
|
1635
|
+
|
|
1636
|
+
// src/stt/whisper.ts
|
|
1637
|
+
var STT2 = class extends WhisperSTT {
|
|
1638
|
+
constructor(opts = {}) {
|
|
1639
|
+
const key = opts.apiKey ?? process.env.OPENAI_API_KEY;
|
|
1640
|
+
if (!key) {
|
|
1641
|
+
throw new Error(
|
|
1642
|
+
"Whisper STT requires an apiKey. Pass { apiKey: 'sk-...' } or set OPENAI_API_KEY in the environment."
|
|
1643
|
+
);
|
|
1644
|
+
}
|
|
1645
|
+
super(key, opts.model ?? "whisper-1", opts.language, opts.bufferSize);
|
|
1646
|
+
}
|
|
1647
|
+
};
|
|
1648
|
+
|
|
1649
|
+
// src/providers/cartesia-stt.ts
|
|
1650
|
+
import WebSocket3 from "ws";
|
|
1651
|
+
var DEFAULT_BASE_URL = "https://api.cartesia.ai";
|
|
1652
|
+
var API_VERSION = "2025-04-16";
|
|
1653
|
+
var USER_AGENT = "Patter/1.0 (integration=LiveKit-port; provider=Cartesia)";
|
|
1654
|
+
var KEEPALIVE_INTERVAL_MS = 3e4;
|
|
1655
|
+
var CONNECT_TIMEOUT_MS = 1e4;
|
|
1656
|
+
var MAX_CALLBACKS = 10;
|
|
1657
|
+
var CartesiaSTT = class {
|
|
1658
|
+
constructor(apiKey, options = {}) {
|
|
1659
|
+
this.apiKey = apiKey;
|
|
1660
|
+
this.options = options;
|
|
1661
|
+
if (!apiKey) {
|
|
1662
|
+
throw new Error("CartesiaSTT requires a non-empty apiKey");
|
|
1663
|
+
}
|
|
1664
|
+
}
|
|
1665
|
+
ws = null;
|
|
1666
|
+
callbacks = [];
|
|
1667
|
+
keepaliveTimer = null;
|
|
1668
|
+
/** Cartesia request id — set from the server transcript events. */
|
|
1669
|
+
requestId = "";
|
|
1670
|
+
buildWsUrl() {
|
|
1671
|
+
const opts = this.options;
|
|
1672
|
+
const rawBase = opts.baseUrl ?? DEFAULT_BASE_URL;
|
|
1673
|
+
let base;
|
|
1674
|
+
if (rawBase.startsWith("http://")) {
|
|
1675
|
+
base = `ws://${rawBase.slice("http://".length)}`;
|
|
1676
|
+
} else if (rawBase.startsWith("https://")) {
|
|
1677
|
+
base = `wss://${rawBase.slice("https://".length)}`;
|
|
1678
|
+
} else if (rawBase.startsWith("ws://") || rawBase.startsWith("wss://")) {
|
|
1679
|
+
base = rawBase;
|
|
1680
|
+
} else {
|
|
1681
|
+
base = `wss://${rawBase}`;
|
|
1682
|
+
}
|
|
1683
|
+
const language = opts.language ?? "en";
|
|
1684
|
+
const params = new URLSearchParams({
|
|
1685
|
+
model: opts.model ?? "ink-whisper",
|
|
1686
|
+
sample_rate: String(opts.sampleRate ?? 16e3),
|
|
1687
|
+
encoding: opts.encoding ?? "pcm_s16le",
|
|
1688
|
+
cartesia_version: API_VERSION,
|
|
1689
|
+
api_key: this.apiKey,
|
|
1690
|
+
language
|
|
1691
|
+
});
|
|
1692
|
+
return `${base}/stt/websocket?${params.toString()}`;
|
|
1693
|
+
}
|
|
1694
|
+
async connect() {
|
|
1695
|
+
const url = this.buildWsUrl();
|
|
1696
|
+
this.ws = new WebSocket3(url, {
|
|
1697
|
+
headers: { "User-Agent": USER_AGENT }
|
|
1698
|
+
});
|
|
1699
|
+
await new Promise((resolve, reject) => {
|
|
1700
|
+
const timer = setTimeout(
|
|
1701
|
+
() => reject(new Error("Cartesia STT connect timeout")),
|
|
1702
|
+
CONNECT_TIMEOUT_MS
|
|
1703
|
+
);
|
|
1704
|
+
this.ws.once("open", () => {
|
|
1705
|
+
clearTimeout(timer);
|
|
1706
|
+
resolve();
|
|
1707
|
+
});
|
|
1708
|
+
this.ws.once("error", (err) => {
|
|
1709
|
+
clearTimeout(timer);
|
|
1710
|
+
reject(err);
|
|
1711
|
+
});
|
|
1712
|
+
});
|
|
1713
|
+
this.ws.on("message", (raw) => {
|
|
1714
|
+
let event;
|
|
1715
|
+
try {
|
|
1716
|
+
event = JSON.parse(raw.toString());
|
|
1717
|
+
} catch {
|
|
1718
|
+
return;
|
|
1719
|
+
}
|
|
1720
|
+
this.handleEvent(event);
|
|
1721
|
+
});
|
|
1722
|
+
this.keepaliveTimer = setInterval(() => {
|
|
1723
|
+
if (this.ws && this.ws.readyState === WebSocket3.OPEN) {
|
|
1724
|
+
try {
|
|
1725
|
+
this.ws.ping();
|
|
1726
|
+
} catch {
|
|
1727
|
+
}
|
|
1728
|
+
}
|
|
1729
|
+
}, KEEPALIVE_INTERVAL_MS);
|
|
1730
|
+
}
|
|
1731
|
+
handleEvent(event) {
|
|
1732
|
+
const type = event.type;
|
|
1733
|
+
if (type === "transcript") {
|
|
1734
|
+
const text = (event.text ?? "").trim();
|
|
1735
|
+
const isFinal = Boolean(event.is_final);
|
|
1736
|
+
if (!text && !isFinal) return;
|
|
1737
|
+
if (event.request_id) {
|
|
1738
|
+
this.requestId = event.request_id;
|
|
1739
|
+
}
|
|
1740
|
+
if (!text) return;
|
|
1741
|
+
const confidence = Number(event.probability ?? 1);
|
|
1742
|
+
this.emit({ text, isFinal, confidence });
|
|
1743
|
+
return;
|
|
1744
|
+
}
|
|
1745
|
+
if (type === "error") {
|
|
1746
|
+
getLogger().error(`Cartesia STT error: ${event.message ?? "unknown"}`);
|
|
1747
|
+
return;
|
|
1748
|
+
}
|
|
1749
|
+
}
|
|
1750
|
+
emit(transcript) {
|
|
1751
|
+
for (const cb of this.callbacks) {
|
|
1752
|
+
cb(transcript);
|
|
1753
|
+
}
|
|
1754
|
+
}
|
|
1755
|
+
sendAudio(audio) {
|
|
1756
|
+
if (!this.ws || this.ws.readyState !== WebSocket3.OPEN) return;
|
|
1757
|
+
this.ws.send(audio);
|
|
1758
|
+
}
|
|
1759
|
+
onTranscript(callback) {
|
|
1760
|
+
if (this.callbacks.length >= MAX_CALLBACKS) {
|
|
1761
|
+
getLogger().warn(
|
|
1762
|
+
"CartesiaSTT: maximum of 10 onTranscript callbacks reached; replacing the last callback."
|
|
1763
|
+
);
|
|
1764
|
+
this.callbacks[this.callbacks.length - 1] = callback;
|
|
1765
|
+
return;
|
|
1766
|
+
}
|
|
1767
|
+
this.callbacks.push(callback);
|
|
1768
|
+
}
|
|
1769
|
+
close() {
|
|
1770
|
+
if (this.keepaliveTimer) {
|
|
1771
|
+
clearInterval(this.keepaliveTimer);
|
|
1772
|
+
this.keepaliveTimer = null;
|
|
1773
|
+
}
|
|
1774
|
+
if (this.ws) {
|
|
1775
|
+
try {
|
|
1776
|
+
this.ws.send("finalize");
|
|
1777
|
+
} catch {
|
|
1778
|
+
}
|
|
1779
|
+
this.ws.close();
|
|
1780
|
+
this.ws = null;
|
|
1781
|
+
}
|
|
1782
|
+
}
|
|
1783
|
+
};
|
|
1784
|
+
|
|
1785
|
+
// src/stt/cartesia.ts
|
|
1786
|
+
var STT3 = class extends CartesiaSTT {
|
|
1787
|
+
constructor(opts = {}) {
|
|
1788
|
+
const key = opts.apiKey ?? process.env.CARTESIA_API_KEY;
|
|
1789
|
+
if (!key) {
|
|
1790
|
+
throw new Error(
|
|
1791
|
+
"Cartesia STT requires an apiKey. Pass { apiKey: '...' } or set CARTESIA_API_KEY in the environment."
|
|
1792
|
+
);
|
|
1793
|
+
}
|
|
1794
|
+
super(key, {
|
|
1795
|
+
model: opts.model,
|
|
1796
|
+
language: opts.language,
|
|
1797
|
+
encoding: opts.encoding,
|
|
1798
|
+
sampleRate: opts.sampleRate,
|
|
1799
|
+
baseUrl: opts.baseUrl
|
|
1800
|
+
});
|
|
1801
|
+
}
|
|
1802
|
+
};
|
|
1803
|
+
|
|
1804
|
+
// src/providers/soniox-stt.ts
|
|
1805
|
+
import WebSocket4 from "ws";
|
|
1806
|
+
var SONIOX_WS_URL = "wss://stt-rt.soniox.com/transcribe-websocket";
|
|
1807
|
+
var KEEPALIVE_MESSAGE = '{"type": "keepalive"}';
|
|
1808
|
+
var END_TOKEN = "<end>";
|
|
1809
|
+
var FINALIZED_TOKEN = "<fin>";
|
|
1810
|
+
var KEEPALIVE_INTERVAL_MS2 = 5e3;
|
|
1811
|
+
function isEndToken(token) {
|
|
1812
|
+
return token.text === END_TOKEN || token.text === FINALIZED_TOKEN;
|
|
1813
|
+
}
|
|
1814
|
+
var TokenAccumulator = class {
|
|
1815
|
+
text = "";
|
|
1816
|
+
confSum = 0;
|
|
1817
|
+
confCount = 0;
|
|
1818
|
+
update(token) {
|
|
1819
|
+
if (token.text) {
|
|
1820
|
+
this.text += token.text;
|
|
1821
|
+
}
|
|
1822
|
+
if (typeof token.confidence === "number") {
|
|
1823
|
+
this.confSum += token.confidence;
|
|
1824
|
+
this.confCount += 1;
|
|
1825
|
+
}
|
|
1826
|
+
}
|
|
1827
|
+
get confidence() {
|
|
1828
|
+
return this.confCount === 0 ? 0 : this.confSum / this.confCount;
|
|
1829
|
+
}
|
|
1830
|
+
reset() {
|
|
1831
|
+
this.text = "";
|
|
1832
|
+
this.confSum = 0;
|
|
1833
|
+
this.confCount = 0;
|
|
1834
|
+
}
|
|
1835
|
+
get raw() {
|
|
1836
|
+
return { sum: this.confSum, count: this.confCount };
|
|
1837
|
+
}
|
|
1838
|
+
};
|
|
1839
|
+
var SonioxSTT = class _SonioxSTT {
|
|
1840
|
+
ws = null;
|
|
1841
|
+
callbacks = [];
|
|
1842
|
+
final = new TokenAccumulator();
|
|
1843
|
+
keepaliveTimer = null;
|
|
1844
|
+
apiKey;
|
|
1845
|
+
model;
|
|
1846
|
+
languageHints;
|
|
1847
|
+
languageHintsStrict;
|
|
1848
|
+
sampleRate;
|
|
1849
|
+
numChannels;
|
|
1850
|
+
enableSpeakerDiarization;
|
|
1851
|
+
enableLanguageIdentification;
|
|
1852
|
+
maxEndpointDelayMs;
|
|
1853
|
+
clientReferenceId;
|
|
1854
|
+
baseUrl;
|
|
1855
|
+
constructor(apiKey, options = {}) {
|
|
1856
|
+
if (!apiKey) {
|
|
1857
|
+
throw new Error("Soniox apiKey is required");
|
|
1858
|
+
}
|
|
1859
|
+
const maxEndpointDelayMs = options.maxEndpointDelayMs ?? 500;
|
|
1860
|
+
if (maxEndpointDelayMs < 500 || maxEndpointDelayMs > 3e3) {
|
|
1861
|
+
throw new Error("maxEndpointDelayMs must be between 500 and 3000");
|
|
1862
|
+
}
|
|
1863
|
+
this.apiKey = apiKey;
|
|
1864
|
+
this.model = options.model ?? "stt-rt-v4";
|
|
1865
|
+
this.languageHints = options.languageHints;
|
|
1866
|
+
this.languageHintsStrict = options.languageHintsStrict ?? false;
|
|
1867
|
+
this.sampleRate = options.sampleRate ?? 16e3;
|
|
1868
|
+
this.numChannels = options.numChannels ?? 1;
|
|
1869
|
+
this.enableSpeakerDiarization = options.enableSpeakerDiarization ?? false;
|
|
1870
|
+
this.enableLanguageIdentification = options.enableLanguageIdentification ?? true;
|
|
1871
|
+
this.maxEndpointDelayMs = maxEndpointDelayMs;
|
|
1872
|
+
this.clientReferenceId = options.clientReferenceId;
|
|
1873
|
+
this.baseUrl = options.baseUrl ?? SONIOX_WS_URL;
|
|
1874
|
+
}
|
|
1875
|
+
/** Factory for Twilio-style 8 kHz linear PCM. */
|
|
1876
|
+
static forTwilio(apiKey, languageHints) {
|
|
1877
|
+
return new _SonioxSTT(apiKey, { sampleRate: 8e3, languageHints });
|
|
1878
|
+
}
|
|
1879
|
+
buildConfig() {
|
|
1880
|
+
const config = {
|
|
1881
|
+
api_key: this.apiKey,
|
|
1882
|
+
model: this.model,
|
|
1883
|
+
audio_format: "pcm_s16le",
|
|
1884
|
+
num_channels: this.numChannels,
|
|
1885
|
+
sample_rate: this.sampleRate,
|
|
1886
|
+
enable_endpoint_detection: true,
|
|
1887
|
+
enable_speaker_diarization: this.enableSpeakerDiarization,
|
|
1888
|
+
enable_language_identification: this.enableLanguageIdentification,
|
|
1889
|
+
max_endpoint_delay_ms: this.maxEndpointDelayMs
|
|
1890
|
+
};
|
|
1891
|
+
if (this.languageHints) {
|
|
1892
|
+
config.language_hints = this.languageHints;
|
|
1893
|
+
config.language_hints_strict = this.languageHintsStrict;
|
|
1894
|
+
}
|
|
1895
|
+
if (this.clientReferenceId) {
|
|
1896
|
+
config.client_reference_id = this.clientReferenceId;
|
|
1897
|
+
}
|
|
1898
|
+
return config;
|
|
1899
|
+
}
|
|
1900
|
+
async connect() {
|
|
1901
|
+
this.ws = new WebSocket4(this.baseUrl);
|
|
1902
|
+
await new Promise((resolve, reject) => {
|
|
1903
|
+
const timer = setTimeout(() => reject(new Error("Soniox connect timeout")), 1e4);
|
|
1904
|
+
this.ws.once("open", () => {
|
|
1905
|
+
clearTimeout(timer);
|
|
1906
|
+
resolve();
|
|
1907
|
+
});
|
|
1546
1908
|
this.ws.once("error", (err) => {
|
|
1547
1909
|
clearTimeout(timer);
|
|
1548
1910
|
reject(err);
|
|
@@ -1555,13 +1917,13 @@ var SonioxSTT = class _SonioxSTT {
|
|
|
1555
1917
|
getLogger().error(`SonioxSTT WebSocket error: ${String(err)}`);
|
|
1556
1918
|
});
|
|
1557
1919
|
this.keepaliveTimer = setInterval(() => {
|
|
1558
|
-
if (this.ws && this.ws.readyState ===
|
|
1920
|
+
if (this.ws && this.ws.readyState === WebSocket4.OPEN) {
|
|
1559
1921
|
try {
|
|
1560
1922
|
this.ws.send(KEEPALIVE_MESSAGE);
|
|
1561
1923
|
} catch {
|
|
1562
1924
|
}
|
|
1563
1925
|
}
|
|
1564
|
-
},
|
|
1926
|
+
}, KEEPALIVE_INTERVAL_MS2);
|
|
1565
1927
|
}
|
|
1566
1928
|
clearKeepalive() {
|
|
1567
1929
|
if (this.keepaliveTimer) {
|
|
@@ -1628,7 +1990,7 @@ var SonioxSTT = class _SonioxSTT {
|
|
|
1628
1990
|
}
|
|
1629
1991
|
}
|
|
1630
1992
|
sendAudio(audio) {
|
|
1631
|
-
if (!this.ws || this.ws.readyState !==
|
|
1993
|
+
if (!this.ws || this.ws.readyState !== WebSocket4.OPEN) return;
|
|
1632
1994
|
if (audio.length === 0) return;
|
|
1633
1995
|
this.ws.send(audio);
|
|
1634
1996
|
}
|
|
@@ -1658,12 +2020,27 @@ var SonioxSTT = class _SonioxSTT {
|
|
|
1658
2020
|
}
|
|
1659
2021
|
};
|
|
1660
2022
|
|
|
2023
|
+
// src/stt/soniox.ts
|
|
2024
|
+
var STT4 = class extends SonioxSTT {
|
|
2025
|
+
constructor(opts = {}) {
|
|
2026
|
+
const key = opts.apiKey ?? process.env.SONIOX_API_KEY;
|
|
2027
|
+
if (!key) {
|
|
2028
|
+
throw new Error(
|
|
2029
|
+
"Soniox STT requires an apiKey. Pass { apiKey: '...' } or set SONIOX_API_KEY in the environment."
|
|
2030
|
+
);
|
|
2031
|
+
}
|
|
2032
|
+
const { apiKey: _ignored, ...rest } = opts;
|
|
2033
|
+
void _ignored;
|
|
2034
|
+
super(key, rest);
|
|
2035
|
+
}
|
|
2036
|
+
};
|
|
2037
|
+
|
|
1661
2038
|
// src/providers/assemblyai-stt.ts
|
|
1662
|
-
import
|
|
1663
|
-
var
|
|
2039
|
+
import WebSocket5 from "ws";
|
|
2040
|
+
var DEFAULT_BASE_URL2 = "wss://streaming.assemblyai.com";
|
|
1664
2041
|
var DEFAULT_MIN_TURN_SILENCE_MS = 100;
|
|
1665
|
-
var
|
|
1666
|
-
var
|
|
2042
|
+
var CONNECT_TIMEOUT_MS2 = 1e4;
|
|
2043
|
+
var MAX_CALLBACKS2 = 10;
|
|
1667
2044
|
var AssemblyAISTT = class _AssemblyAISTT {
|
|
1668
2045
|
constructor(apiKey, options = {}) {
|
|
1669
2046
|
this.apiKey = apiKey;
|
|
@@ -1718,175 +2095,29 @@ var AssemblyAISTT = class _AssemblyAISTT {
|
|
|
1718
2095
|
domain: opts.domain
|
|
1719
2096
|
};
|
|
1720
2097
|
const params = new URLSearchParams();
|
|
1721
|
-
for (const [key, value] of Object.entries(raw)) {
|
|
1722
|
-
if (value === void 0 || value === null) continue;
|
|
1723
|
-
if (typeof value === "boolean") {
|
|
1724
|
-
params.set(key, value ? "true" : "false");
|
|
1725
|
-
} else {
|
|
1726
|
-
params.set(key, String(value));
|
|
1727
|
-
}
|
|
1728
|
-
}
|
|
1729
|
-
const base = opts.baseUrl ??
|
|
1730
|
-
return `${base}/v3/ws?${params.toString()}`;
|
|
1731
|
-
}
|
|
1732
|
-
async connect() {
|
|
1733
|
-
const url = this.buildUrl();
|
|
1734
|
-
this.ws = new WebSocket4(url, {
|
|
1735
|
-
headers: {
|
|
1736
|
-
Authorization: this.apiKey,
|
|
1737
|
-
"Content-Type": "application/json",
|
|
1738
|
-
"User-Agent": "Patter/1.0 (integration=LiveKit-port)"
|
|
1739
|
-
}
|
|
1740
|
-
});
|
|
1741
|
-
await new Promise((resolve, reject) => {
|
|
1742
|
-
const timer = setTimeout(
|
|
1743
|
-
() => reject(new Error("AssemblyAI connect timeout")),
|
|
1744
|
-
CONNECT_TIMEOUT_MS
|
|
1745
|
-
);
|
|
1746
|
-
this.ws.once("open", () => {
|
|
1747
|
-
clearTimeout(timer);
|
|
1748
|
-
resolve();
|
|
1749
|
-
});
|
|
1750
|
-
this.ws.once("error", (err) => {
|
|
1751
|
-
clearTimeout(timer);
|
|
1752
|
-
reject(err);
|
|
1753
|
-
});
|
|
1754
|
-
});
|
|
1755
|
-
this.ws.on("message", (raw) => {
|
|
1756
|
-
let event;
|
|
1757
|
-
try {
|
|
1758
|
-
event = JSON.parse(raw.toString());
|
|
1759
|
-
} catch {
|
|
1760
|
-
return;
|
|
1761
|
-
}
|
|
1762
|
-
this.handleEvent(event);
|
|
1763
|
-
});
|
|
1764
|
-
}
|
|
1765
|
-
handleEvent(event) {
|
|
1766
|
-
const type = event.type;
|
|
1767
|
-
if (type === "Begin") {
|
|
1768
|
-
this.sessionId = event.id ?? "";
|
|
1769
|
-
this.expiresAt = event.expires_at ?? 0;
|
|
1770
|
-
return;
|
|
1771
|
-
}
|
|
1772
|
-
if (type !== "Turn") {
|
|
1773
|
-
return;
|
|
1774
|
-
}
|
|
1775
|
-
const endOfTurn = Boolean(event.end_of_turn);
|
|
1776
|
-
const turnIsFormatted = Boolean(event.turn_is_formatted);
|
|
1777
|
-
const words = event.words ?? [];
|
|
1778
|
-
const transcriptText = (event.transcript ?? "").trim();
|
|
1779
|
-
if (endOfTurn) {
|
|
1780
|
-
if (this.options.formatTurns && !turnIsFormatted) return;
|
|
1781
|
-
if (!transcriptText) return;
|
|
1782
|
-
this.emit({
|
|
1783
|
-
text: transcriptText,
|
|
1784
|
-
isFinal: true,
|
|
1785
|
-
confidence: averageConfidence(words)
|
|
1786
|
-
});
|
|
1787
|
-
return;
|
|
1788
|
-
}
|
|
1789
|
-
if (!words.length) return;
|
|
1790
|
-
const interim = words.map((w) => (w.text ?? "").trim()).filter(Boolean).join(" ");
|
|
1791
|
-
if (!interim) return;
|
|
1792
|
-
this.emit({
|
|
1793
|
-
text: interim,
|
|
1794
|
-
isFinal: false,
|
|
1795
|
-
confidence: averageConfidence(words)
|
|
1796
|
-
});
|
|
1797
|
-
}
|
|
1798
|
-
emit(transcript) {
|
|
1799
|
-
for (const cb of this.callbacks) {
|
|
1800
|
-
cb(transcript);
|
|
1801
|
-
}
|
|
1802
|
-
}
|
|
1803
|
-
sendAudio(audio) {
|
|
1804
|
-
if (!this.ws || this.ws.readyState !== WebSocket4.OPEN) return;
|
|
1805
|
-
this.ws.send(audio);
|
|
1806
|
-
}
|
|
1807
|
-
onTranscript(callback) {
|
|
1808
|
-
if (this.callbacks.length >= MAX_CALLBACKS) {
|
|
1809
|
-
getLogger().warn(
|
|
1810
|
-
"AssemblyAISTT: maximum of 10 onTranscript callbacks reached; replacing the last callback."
|
|
1811
|
-
);
|
|
1812
|
-
this.callbacks[this.callbacks.length - 1] = callback;
|
|
1813
|
-
return;
|
|
1814
|
-
}
|
|
1815
|
-
this.callbacks.push(callback);
|
|
1816
|
-
}
|
|
1817
|
-
close() {
|
|
1818
|
-
if (this.ws) {
|
|
1819
|
-
try {
|
|
1820
|
-
this.ws.send(JSON.stringify({ type: "Terminate" }));
|
|
1821
|
-
} catch {
|
|
1822
|
-
}
|
|
1823
|
-
this.ws.close();
|
|
1824
|
-
this.ws = null;
|
|
1825
|
-
}
|
|
1826
|
-
}
|
|
1827
|
-
};
|
|
1828
|
-
function averageConfidence(words) {
|
|
1829
|
-
if (!words.length) return 0;
|
|
1830
|
-
let total = 0;
|
|
1831
|
-
for (const w of words) {
|
|
1832
|
-
total += Number(w.confidence ?? 0);
|
|
1833
|
-
}
|
|
1834
|
-
return total / words.length;
|
|
1835
|
-
}
|
|
1836
|
-
|
|
1837
|
-
// src/providers/cartesia-stt.ts
|
|
1838
|
-
import WebSocket5 from "ws";
|
|
1839
|
-
var DEFAULT_BASE_URL2 = "https://api.cartesia.ai";
|
|
1840
|
-
var API_VERSION = "2025-04-16";
|
|
1841
|
-
var USER_AGENT = "Patter/1.0 (integration=LiveKit-port; provider=Cartesia)";
|
|
1842
|
-
var KEEPALIVE_INTERVAL_MS2 = 3e4;
|
|
1843
|
-
var CONNECT_TIMEOUT_MS2 = 1e4;
|
|
1844
|
-
var MAX_CALLBACKS2 = 10;
|
|
1845
|
-
var CartesiaSTT = class {
|
|
1846
|
-
constructor(apiKey, options = {}) {
|
|
1847
|
-
this.apiKey = apiKey;
|
|
1848
|
-
this.options = options;
|
|
1849
|
-
if (!apiKey) {
|
|
1850
|
-
throw new Error("CartesiaSTT requires a non-empty apiKey");
|
|
1851
|
-
}
|
|
1852
|
-
}
|
|
1853
|
-
ws = null;
|
|
1854
|
-
callbacks = [];
|
|
1855
|
-
keepaliveTimer = null;
|
|
1856
|
-
/** Cartesia request id — set from the server transcript events. */
|
|
1857
|
-
requestId = "";
|
|
1858
|
-
buildWsUrl() {
|
|
1859
|
-
const opts = this.options;
|
|
1860
|
-
const rawBase = opts.baseUrl ?? DEFAULT_BASE_URL2;
|
|
1861
|
-
let base;
|
|
1862
|
-
if (rawBase.startsWith("http://")) {
|
|
1863
|
-
base = `ws://${rawBase.slice("http://".length)}`;
|
|
1864
|
-
} else if (rawBase.startsWith("https://")) {
|
|
1865
|
-
base = `wss://${rawBase.slice("https://".length)}`;
|
|
1866
|
-
} else if (rawBase.startsWith("ws://") || rawBase.startsWith("wss://")) {
|
|
1867
|
-
base = rawBase;
|
|
1868
|
-
} else {
|
|
1869
|
-
base = `wss://${rawBase}`;
|
|
1870
|
-
}
|
|
1871
|
-
const language = opts.language ?? "en";
|
|
1872
|
-
const params = new URLSearchParams({
|
|
1873
|
-
model: opts.model ?? "ink-whisper",
|
|
1874
|
-
sample_rate: String(opts.sampleRate ?? 16e3),
|
|
1875
|
-
encoding: opts.encoding ?? "pcm_s16le",
|
|
1876
|
-
cartesia_version: API_VERSION,
|
|
1877
|
-
api_key: this.apiKey,
|
|
1878
|
-
language
|
|
1879
|
-
});
|
|
1880
|
-
return `${base}/stt/websocket?${params.toString()}`;
|
|
2098
|
+
for (const [key, value] of Object.entries(raw)) {
|
|
2099
|
+
if (value === void 0 || value === null) continue;
|
|
2100
|
+
if (typeof value === "boolean") {
|
|
2101
|
+
params.set(key, value ? "true" : "false");
|
|
2102
|
+
} else {
|
|
2103
|
+
params.set(key, String(value));
|
|
2104
|
+
}
|
|
2105
|
+
}
|
|
2106
|
+
const base = opts.baseUrl ?? DEFAULT_BASE_URL2;
|
|
2107
|
+
return `${base}/v3/ws?${params.toString()}`;
|
|
1881
2108
|
}
|
|
1882
2109
|
async connect() {
|
|
1883
|
-
const url = this.
|
|
2110
|
+
const url = this.buildUrl();
|
|
1884
2111
|
this.ws = new WebSocket5(url, {
|
|
1885
|
-
headers: {
|
|
2112
|
+
headers: {
|
|
2113
|
+
Authorization: this.apiKey,
|
|
2114
|
+
"Content-Type": "application/json",
|
|
2115
|
+
"User-Agent": "Patter/1.0 (integration=LiveKit-port)"
|
|
2116
|
+
}
|
|
1886
2117
|
});
|
|
1887
2118
|
await new Promise((resolve, reject) => {
|
|
1888
2119
|
const timer = setTimeout(
|
|
1889
|
-
() => reject(new Error("
|
|
2120
|
+
() => reject(new Error("AssemblyAI connect timeout")),
|
|
1890
2121
|
CONNECT_TIMEOUT_MS2
|
|
1891
2122
|
);
|
|
1892
2123
|
this.ws.once("open", () => {
|
|
@@ -1907,33 +2138,39 @@ var CartesiaSTT = class {
|
|
|
1907
2138
|
}
|
|
1908
2139
|
this.handleEvent(event);
|
|
1909
2140
|
});
|
|
1910
|
-
this.keepaliveTimer = setInterval(() => {
|
|
1911
|
-
if (this.ws && this.ws.readyState === WebSocket5.OPEN) {
|
|
1912
|
-
try {
|
|
1913
|
-
this.ws.ping();
|
|
1914
|
-
} catch {
|
|
1915
|
-
}
|
|
1916
|
-
}
|
|
1917
|
-
}, KEEPALIVE_INTERVAL_MS2);
|
|
1918
2141
|
}
|
|
1919
2142
|
handleEvent(event) {
|
|
1920
2143
|
const type = event.type;
|
|
1921
|
-
if (type === "
|
|
1922
|
-
|
|
1923
|
-
|
|
1924
|
-
if (!text && !isFinal) return;
|
|
1925
|
-
if (event.request_id) {
|
|
1926
|
-
this.requestId = event.request_id;
|
|
1927
|
-
}
|
|
1928
|
-
if (!text) return;
|
|
1929
|
-
const confidence = Number(event.probability ?? 1);
|
|
1930
|
-
this.emit({ text, isFinal, confidence });
|
|
2144
|
+
if (type === "Begin") {
|
|
2145
|
+
this.sessionId = event.id ?? "";
|
|
2146
|
+
this.expiresAt = event.expires_at ?? 0;
|
|
1931
2147
|
return;
|
|
1932
2148
|
}
|
|
1933
|
-
if (type
|
|
1934
|
-
|
|
2149
|
+
if (type !== "Turn") {
|
|
2150
|
+
return;
|
|
2151
|
+
}
|
|
2152
|
+
const endOfTurn = Boolean(event.end_of_turn);
|
|
2153
|
+
const turnIsFormatted = Boolean(event.turn_is_formatted);
|
|
2154
|
+
const words = event.words ?? [];
|
|
2155
|
+
const transcriptText = (event.transcript ?? "").trim();
|
|
2156
|
+
if (endOfTurn) {
|
|
2157
|
+
if (this.options.formatTurns && !turnIsFormatted) return;
|
|
2158
|
+
if (!transcriptText) return;
|
|
2159
|
+
this.emit({
|
|
2160
|
+
text: transcriptText,
|
|
2161
|
+
isFinal: true,
|
|
2162
|
+
confidence: averageConfidence(words)
|
|
2163
|
+
});
|
|
1935
2164
|
return;
|
|
1936
2165
|
}
|
|
2166
|
+
if (!words.length) return;
|
|
2167
|
+
const interim = words.map((w) => (w.text ?? "").trim()).filter(Boolean).join(" ");
|
|
2168
|
+
if (!interim) return;
|
|
2169
|
+
this.emit({
|
|
2170
|
+
text: interim,
|
|
2171
|
+
isFinal: false,
|
|
2172
|
+
confidence: averageConfidence(words)
|
|
2173
|
+
});
|
|
1937
2174
|
}
|
|
1938
2175
|
emit(transcript) {
|
|
1939
2176
|
for (const cb of this.callbacks) {
|
|
@@ -1947,7 +2184,7 @@ var CartesiaSTT = class {
|
|
|
1947
2184
|
onTranscript(callback) {
|
|
1948
2185
|
if (this.callbacks.length >= MAX_CALLBACKS2) {
|
|
1949
2186
|
getLogger().warn(
|
|
1950
|
-
"
|
|
2187
|
+
"AssemblyAISTT: maximum of 10 onTranscript callbacks reached; replacing the last callback."
|
|
1951
2188
|
);
|
|
1952
2189
|
this.callbacks[this.callbacks.length - 1] = callback;
|
|
1953
2190
|
return;
|
|
@@ -1955,13 +2192,9 @@ var CartesiaSTT = class {
|
|
|
1955
2192
|
this.callbacks.push(callback);
|
|
1956
2193
|
}
|
|
1957
2194
|
close() {
|
|
1958
|
-
if (this.keepaliveTimer) {
|
|
1959
|
-
clearInterval(this.keepaliveTimer);
|
|
1960
|
-
this.keepaliveTimer = null;
|
|
1961
|
-
}
|
|
1962
2195
|
if (this.ws) {
|
|
1963
2196
|
try {
|
|
1964
|
-
this.ws.send("
|
|
2197
|
+
this.ws.send(JSON.stringify({ type: "Terminate" }));
|
|
1965
2198
|
} catch {
|
|
1966
2199
|
}
|
|
1967
2200
|
this.ws.close();
|
|
@@ -1969,6 +2202,305 @@ var CartesiaSTT = class {
|
|
|
1969
2202
|
}
|
|
1970
2203
|
}
|
|
1971
2204
|
};
|
|
2205
|
+
function averageConfidence(words) {
|
|
2206
|
+
if (!words.length) return 0;
|
|
2207
|
+
let total = 0;
|
|
2208
|
+
for (const w of words) {
|
|
2209
|
+
total += Number(w.confidence ?? 0);
|
|
2210
|
+
}
|
|
2211
|
+
return total / words.length;
|
|
2212
|
+
}
|
|
2213
|
+
|
|
2214
|
+
// src/stt/assemblyai.ts
|
|
2215
|
+
var STT5 = class extends AssemblyAISTT {
|
|
2216
|
+
constructor(opts = {}) {
|
|
2217
|
+
const key = opts.apiKey ?? process.env.ASSEMBLYAI_API_KEY;
|
|
2218
|
+
if (!key) {
|
|
2219
|
+
throw new Error(
|
|
2220
|
+
"AssemblyAI STT requires an apiKey. Pass { apiKey: '...' } or set ASSEMBLYAI_API_KEY in the environment."
|
|
2221
|
+
);
|
|
2222
|
+
}
|
|
2223
|
+
const { apiKey: _ignored, ...rest } = opts;
|
|
2224
|
+
void _ignored;
|
|
2225
|
+
super(key, rest);
|
|
2226
|
+
}
|
|
2227
|
+
};
|
|
2228
|
+
|
|
2229
|
+
// src/providers/elevenlabs-tts.ts
|
|
2230
|
+
var ELEVENLABS_BASE_URL = "https://api.elevenlabs.io/v1";
|
|
2231
|
+
var ELEVENLABS_VOICE_ID_BY_NAME = {
|
|
2232
|
+
rachel: "21m00Tcm4TlvDq8ikWAM",
|
|
2233
|
+
drew: "29vD33N1CtxCmqQRPOHJ",
|
|
2234
|
+
clyde: "2EiwWnXFnvU5JabPnv8n",
|
|
2235
|
+
paul: "5Q0t7uMcjvnagumLfvZi",
|
|
2236
|
+
domi: "AZnzlk1XvdvUeBnXmlld",
|
|
2237
|
+
dave: "CYw3kZ02Hs0563khs1Fj",
|
|
2238
|
+
fin: "D38z5RcWu1voky8WS1ja",
|
|
2239
|
+
bella: "EXAVITQu4vr4xnSDxMaL",
|
|
2240
|
+
antoni: "ErXwobaYiN019PkySvjV",
|
|
2241
|
+
thomas: "GBv7mTt0atIp3Br8iCZE",
|
|
2242
|
+
charlie: "IKne3meq5aSn9XLyUdCD",
|
|
2243
|
+
george: "JBFqnCBsd6RMkjVDRZzb",
|
|
2244
|
+
emily: "LcfcDJNUP1GQjkzn1xUU",
|
|
2245
|
+
elli: "MF3mGyEYCl7XYWbV9V6O",
|
|
2246
|
+
callum: "N2lVS1w4EtoT3dr4eOWO",
|
|
2247
|
+
patrick: "ODq5zmih8GrVes37Dizd",
|
|
2248
|
+
harry: "SOYHLrjzK2X1ezoPC6cr",
|
|
2249
|
+
liam: "TX3LPaxmHKxFdv7VOQHJ",
|
|
2250
|
+
dorothy: "ThT5KcBeYPX3keUQqHPh",
|
|
2251
|
+
josh: "TxGEqnHWrfWFTfGW9XjX",
|
|
2252
|
+
arnold: "VR6AewLTigWG4xSOukaG",
|
|
2253
|
+
charlotte: "XB0fDUnXU5powFXDhCwa",
|
|
2254
|
+
matilda: "XrExE9yKIg1WjnnlVkGX",
|
|
2255
|
+
matthew: "Yko7PKHZNXotIFUBG7I9",
|
|
2256
|
+
james: "ZQe5CZNOzWyzPSCn5a3c",
|
|
2257
|
+
joseph: "Zlb1dXrM653N07WRdFW3",
|
|
2258
|
+
jeremy: "bVMeCyTHy58xNoL34h3p",
|
|
2259
|
+
michael: "flq6f7yk4E4fJM5XTYuZ",
|
|
2260
|
+
ethan: "g5CIjZEefAph4nQFvHAz",
|
|
2261
|
+
gigi: "jBpfuIE2acCO8z3wKNLl",
|
|
2262
|
+
freya: "jsCqWAovK2LkecY7zXl4",
|
|
2263
|
+
brian: "nPczCjzI2devNBz1zQrb",
|
|
2264
|
+
grace: "oWAxZDx7w5VEj9dCyTzz",
|
|
2265
|
+
daniel: "onwK4e9ZLuTAKqWW03F9",
|
|
2266
|
+
lily: "pFZP5JQG7iQjIQuC4Bku",
|
|
2267
|
+
serena: "pMsXgVXv3BLzUgSXRplE",
|
|
2268
|
+
adam: "pNInz6obpgDQGcFmaJgB",
|
|
2269
|
+
nicole: "piTKgcLEGmPE4e6mEKli",
|
|
2270
|
+
bill: "pqHfZKP75CvOlQylNhV4",
|
|
2271
|
+
jessie: "t0jbNlBVZ17f02VDIeMI",
|
|
2272
|
+
ryan: "wViXBPUzp2ZZixB1xQuM",
|
|
2273
|
+
sam: "yoZ06aMxZJJ28mfd3POQ",
|
|
2274
|
+
glinda: "z9fAnlkpzviPz146aGWa",
|
|
2275
|
+
giovanni: "zcAOhNBS3c14rBihAFp1",
|
|
2276
|
+
mimi: "zrHiDhphv9ZnVXBqCLjz",
|
|
2277
|
+
alloy: "21m00Tcm4TlvDq8ikWAM"
|
|
2278
|
+
};
|
|
2279
|
+
var VOICE_ID_PATTERN = /^[A-Za-z0-9]{20}$/;
|
|
2280
|
+
function resolveVoiceId(voice) {
|
|
2281
|
+
if (!voice) return voice;
|
|
2282
|
+
if (VOICE_ID_PATTERN.test(voice)) return voice;
|
|
2283
|
+
return ELEVENLABS_VOICE_ID_BY_NAME[voice.toLowerCase()] ?? voice;
|
|
2284
|
+
}
|
|
2285
|
+
var ElevenLabsTTS = class {
|
|
2286
|
+
constructor(apiKey, voiceId = "21m00Tcm4TlvDq8ikWAM", modelId = "eleven_turbo_v2_5", outputFormat = "pcm_16000") {
|
|
2287
|
+
this.apiKey = apiKey;
|
|
2288
|
+
this.modelId = modelId;
|
|
2289
|
+
this.outputFormat = outputFormat;
|
|
2290
|
+
this.voiceId = resolveVoiceId(voiceId);
|
|
2291
|
+
}
|
|
2292
|
+
voiceId;
|
|
2293
|
+
/**
|
|
2294
|
+
* Synthesise text to speech and return the full audio as a single Buffer.
|
|
2295
|
+
*
|
|
2296
|
+
* For large chunks (or when latency matters) call `synthesizeStream` instead.
|
|
2297
|
+
*/
|
|
2298
|
+
async synthesize(text) {
|
|
2299
|
+
const chunks = [];
|
|
2300
|
+
for await (const chunk of this.synthesizeStream(text)) {
|
|
2301
|
+
chunks.push(chunk);
|
|
2302
|
+
}
|
|
2303
|
+
return Buffer.concat(chunks);
|
|
2304
|
+
}
|
|
2305
|
+
/**
|
|
2306
|
+
* Synthesise text and yield audio chunks as they arrive (streaming).
|
|
2307
|
+
*
|
|
2308
|
+
* The yielded buffers are raw PCM at 16 kHz (or whatever `outputFormat` is
|
|
2309
|
+
* configured to).
|
|
2310
|
+
*/
|
|
2311
|
+
async *synthesizeStream(text) {
|
|
2312
|
+
const url = `${ELEVENLABS_BASE_URL}/text-to-speech/${encodeURIComponent(this.voiceId)}/stream?output_format=${encodeURIComponent(this.outputFormat)}`;
|
|
2313
|
+
const response = await fetch(url, {
|
|
2314
|
+
method: "POST",
|
|
2315
|
+
headers: {
|
|
2316
|
+
"xi-api-key": this.apiKey,
|
|
2317
|
+
"Content-Type": "application/json"
|
|
2318
|
+
},
|
|
2319
|
+
body: JSON.stringify({ text, model_id: this.modelId }),
|
|
2320
|
+
signal: AbortSignal.timeout(3e4)
|
|
2321
|
+
});
|
|
2322
|
+
if (!response.ok) {
|
|
2323
|
+
const body = await response.text();
|
|
2324
|
+
throw new Error(`ElevenLabs TTS error ${response.status}: ${body}`);
|
|
2325
|
+
}
|
|
2326
|
+
if (!response.body) {
|
|
2327
|
+
throw new Error("ElevenLabs TTS: no response body");
|
|
2328
|
+
}
|
|
2329
|
+
const reader = response.body.getReader();
|
|
2330
|
+
try {
|
|
2331
|
+
while (true) {
|
|
2332
|
+
const { done, value } = await reader.read();
|
|
2333
|
+
if (done) break;
|
|
2334
|
+
if (value && value.length > 0) {
|
|
2335
|
+
yield Buffer.from(value);
|
|
2336
|
+
}
|
|
2337
|
+
}
|
|
2338
|
+
} finally {
|
|
2339
|
+
if (typeof reader.cancel === "function") await reader.cancel().catch(() => {
|
|
2340
|
+
});
|
|
2341
|
+
reader.releaseLock();
|
|
2342
|
+
}
|
|
2343
|
+
}
|
|
2344
|
+
};
|
|
2345
|
+
|
|
2346
|
+
// src/tts/elevenlabs.ts
|
|
2347
|
+
var TTS = class extends ElevenLabsTTS {
|
|
2348
|
+
constructor(opts = {}) {
|
|
2349
|
+
const key = opts.apiKey ?? process.env.ELEVENLABS_API_KEY;
|
|
2350
|
+
if (!key) {
|
|
2351
|
+
throw new Error(
|
|
2352
|
+
"ElevenLabs TTS requires an apiKey. Pass { apiKey: '...' } or set ELEVENLABS_API_KEY in the environment."
|
|
2353
|
+
);
|
|
2354
|
+
}
|
|
2355
|
+
super(
|
|
2356
|
+
key,
|
|
2357
|
+
opts.voiceId ?? "21m00Tcm4TlvDq8ikWAM",
|
|
2358
|
+
opts.modelId ?? "eleven_turbo_v2_5",
|
|
2359
|
+
opts.outputFormat ?? "pcm_16000"
|
|
2360
|
+
);
|
|
2361
|
+
}
|
|
2362
|
+
};
|
|
2363
|
+
|
|
2364
|
+
// src/providers/openai-tts.ts
|
|
2365
|
+
var OPENAI_TTS_URL = "https://api.openai.com/v1/audio/speech";
|
|
2366
|
+
var OpenAITTS = class _OpenAITTS {
|
|
2367
|
+
constructor(apiKey, voice = "alloy", model = "tts-1") {
|
|
2368
|
+
this.apiKey = apiKey;
|
|
2369
|
+
this.voice = voice;
|
|
2370
|
+
this.model = model;
|
|
2371
|
+
}
|
|
2372
|
+
/**
|
|
2373
|
+
* Synthesise text to speech and return the full audio as a single Buffer.
|
|
2374
|
+
*
|
|
2375
|
+
* For large chunks (or when latency matters) call `synthesizeStream` instead.
|
|
2376
|
+
*/
|
|
2377
|
+
async synthesize(text) {
|
|
2378
|
+
const chunks = [];
|
|
2379
|
+
for await (const chunk of this.synthesizeStream(text)) {
|
|
2380
|
+
chunks.push(chunk);
|
|
2381
|
+
}
|
|
2382
|
+
return Buffer.concat(chunks);
|
|
2383
|
+
}
|
|
2384
|
+
/**
|
|
2385
|
+
* Synthesise text and yield audio chunks as they arrive (streaming).
|
|
2386
|
+
*
|
|
2387
|
+
* OpenAI returns 24 kHz PCM16; each chunk is resampled to 16 kHz before
|
|
2388
|
+
* yielding so the output is ready for telephony pipelines.
|
|
2389
|
+
*
|
|
2390
|
+
* The resampler carries state (buffered samples + odd trailing byte)
|
|
2391
|
+
* between chunks — without that state cross-chunk sample alignment drifts
|
|
2392
|
+
* and the caller hears pops / dropped audio (BUG #23, mirror of the
|
|
2393
|
+
* Python `audioop.ratecv` fix).
|
|
2394
|
+
*/
|
|
2395
|
+
async *synthesizeStream(text) {
|
|
2396
|
+
const response = await fetch(OPENAI_TTS_URL, {
|
|
2397
|
+
method: "POST",
|
|
2398
|
+
headers: {
|
|
2399
|
+
"Authorization": `Bearer ${this.apiKey}`,
|
|
2400
|
+
"Content-Type": "application/json"
|
|
2401
|
+
},
|
|
2402
|
+
body: JSON.stringify({
|
|
2403
|
+
model: this.model,
|
|
2404
|
+
input: text,
|
|
2405
|
+
voice: this.voice,
|
|
2406
|
+
response_format: "pcm"
|
|
2407
|
+
}),
|
|
2408
|
+
signal: AbortSignal.timeout(3e4)
|
|
2409
|
+
});
|
|
2410
|
+
if (!response.ok) {
|
|
2411
|
+
const body = await response.text();
|
|
2412
|
+
throw new Error(`OpenAI TTS error ${response.status}: ${body}`);
|
|
2413
|
+
}
|
|
2414
|
+
if (!response.body) {
|
|
2415
|
+
throw new Error("OpenAI TTS: no response body");
|
|
2416
|
+
}
|
|
2417
|
+
const ctx = { carryByte: null, leftover: [] };
|
|
2418
|
+
const reader = response.body.getReader();
|
|
2419
|
+
try {
|
|
2420
|
+
while (true) {
|
|
2421
|
+
const { done, value } = await reader.read();
|
|
2422
|
+
if (done) break;
|
|
2423
|
+
if (value && value.length > 0) {
|
|
2424
|
+
const out = _OpenAITTS.resampleStreaming(Buffer.from(value), ctx);
|
|
2425
|
+
if (out.length > 0) yield out;
|
|
2426
|
+
}
|
|
2427
|
+
}
|
|
2428
|
+
if (ctx.leftover.length > 0) {
|
|
2429
|
+
const tail = Buffer.alloc(ctx.leftover.length * 2);
|
|
2430
|
+
for (let i = 0; i < ctx.leftover.length; i++) {
|
|
2431
|
+
tail.writeInt16LE(ctx.leftover[i], i * 2);
|
|
2432
|
+
}
|
|
2433
|
+
yield tail;
|
|
2434
|
+
}
|
|
2435
|
+
} finally {
|
|
2436
|
+
if (typeof reader.cancel === "function") await reader.cancel().catch(() => {
|
|
2437
|
+
});
|
|
2438
|
+
reader.releaseLock();
|
|
2439
|
+
}
|
|
2440
|
+
}
|
|
2441
|
+
/**
|
|
2442
|
+
* Streaming 24 kHz → 16 kHz resampler (PCM16-LE). Maintains cross-chunk
|
|
2443
|
+
* state so the 3:2 pattern doesn't reset at every network read.
|
|
2444
|
+
*/
|
|
2445
|
+
static resampleStreaming(audio, ctx) {
|
|
2446
|
+
let buf;
|
|
2447
|
+
if (ctx.carryByte !== null) {
|
|
2448
|
+
buf = Buffer.concat([Buffer.from([ctx.carryByte]), audio]);
|
|
2449
|
+
ctx.carryByte = null;
|
|
2450
|
+
} else {
|
|
2451
|
+
buf = audio;
|
|
2452
|
+
}
|
|
2453
|
+
if (buf.length % 2 === 1) {
|
|
2454
|
+
ctx.carryByte = buf[buf.length - 1];
|
|
2455
|
+
buf = buf.subarray(0, buf.length - 1);
|
|
2456
|
+
}
|
|
2457
|
+
if (buf.length === 0 && ctx.leftover.length === 0) {
|
|
2458
|
+
return Buffer.alloc(0);
|
|
2459
|
+
}
|
|
2460
|
+
const sampleCount = buf.length / 2;
|
|
2461
|
+
const samples = ctx.leftover.slice();
|
|
2462
|
+
for (let i2 = 0; i2 < sampleCount; i2++) {
|
|
2463
|
+
samples.push(buf.readInt16LE(i2 * 2));
|
|
2464
|
+
}
|
|
2465
|
+
const out = [];
|
|
2466
|
+
let i = 0;
|
|
2467
|
+
while (i + 2 < samples.length) {
|
|
2468
|
+
out.push(samples[i]);
|
|
2469
|
+
out.push(Math.trunc((samples[i + 1] + samples[i + 2]) / 2));
|
|
2470
|
+
i += 3;
|
|
2471
|
+
}
|
|
2472
|
+
ctx.leftover = samples.slice(i);
|
|
2473
|
+
const buffer = Buffer.alloc(out.length * 2);
|
|
2474
|
+
for (let j = 0; j < out.length; j++) {
|
|
2475
|
+
buffer.writeInt16LE(out[j], j * 2);
|
|
2476
|
+
}
|
|
2477
|
+
return buffer;
|
|
2478
|
+
}
|
|
2479
|
+
/** @deprecated use {@link resampleStreaming} with persistent state. */
|
|
2480
|
+
static resample24kTo16k(audio) {
|
|
2481
|
+
const ctx = { carryByte: null, leftover: [] };
|
|
2482
|
+
const out = _OpenAITTS.resampleStreaming(audio, ctx);
|
|
2483
|
+
if (ctx.leftover.length === 0) return out;
|
|
2484
|
+
const tail = Buffer.alloc(ctx.leftover.length * 2);
|
|
2485
|
+
for (let i = 0; i < ctx.leftover.length; i++) {
|
|
2486
|
+
tail.writeInt16LE(ctx.leftover[i], i * 2);
|
|
2487
|
+
}
|
|
2488
|
+
return Buffer.concat([out, tail]);
|
|
2489
|
+
}
|
|
2490
|
+
};
|
|
2491
|
+
|
|
2492
|
+
// src/tts/openai.ts
|
|
2493
|
+
var TTS2 = class extends OpenAITTS {
|
|
2494
|
+
constructor(opts = {}) {
|
|
2495
|
+
const key = opts.apiKey ?? process.env.OPENAI_API_KEY;
|
|
2496
|
+
if (!key) {
|
|
2497
|
+
throw new Error(
|
|
2498
|
+
"OpenAI TTS requires an apiKey. Pass { apiKey: 'sk-...' } or set OPENAI_API_KEY in the environment."
|
|
2499
|
+
);
|
|
2500
|
+
}
|
|
2501
|
+
super(key, opts.voice ?? "alloy", opts.model ?? "tts-1");
|
|
2502
|
+
}
|
|
2503
|
+
};
|
|
1972
2504
|
|
|
1973
2505
|
// src/providers/cartesia-tts.ts
|
|
1974
2506
|
var CARTESIA_BASE_URL = "https://api.cartesia.ai";
|
|
@@ -2068,6 +2600,21 @@ var CartesiaTTS = class {
|
|
|
2068
2600
|
}
|
|
2069
2601
|
};
|
|
2070
2602
|
|
|
2603
|
+
// src/tts/cartesia.ts
|
|
2604
|
+
var TTS3 = class extends CartesiaTTS {
|
|
2605
|
+
constructor(opts = {}) {
|
|
2606
|
+
const key = opts.apiKey ?? process.env.CARTESIA_API_KEY;
|
|
2607
|
+
if (!key) {
|
|
2608
|
+
throw new Error(
|
|
2609
|
+
"Cartesia TTS requires an apiKey. Pass { apiKey: '...' } or set CARTESIA_API_KEY in the environment."
|
|
2610
|
+
);
|
|
2611
|
+
}
|
|
2612
|
+
const { apiKey: _ignored, ...rest } = opts;
|
|
2613
|
+
void _ignored;
|
|
2614
|
+
super(key, rest);
|
|
2615
|
+
}
|
|
2616
|
+
};
|
|
2617
|
+
|
|
2071
2618
|
// src/providers/rime-tts.ts
|
|
2072
2619
|
var RIME_BASE_URL = "https://users.rime.ai/v1/rime-tts";
|
|
2073
2620
|
var ARCANA_MODEL_TIMEOUT_MS = 60 * 4 * 1e3;
|
|
@@ -2195,6 +2742,21 @@ var RimeTTS = class {
|
|
|
2195
2742
|
}
|
|
2196
2743
|
};
|
|
2197
2744
|
|
|
2745
|
+
// src/tts/rime.ts
|
|
2746
|
+
var TTS4 = class extends RimeTTS {
|
|
2747
|
+
constructor(opts = {}) {
|
|
2748
|
+
const key = opts.apiKey ?? process.env.RIME_API_KEY;
|
|
2749
|
+
if (!key) {
|
|
2750
|
+
throw new Error(
|
|
2751
|
+
"Rime TTS requires an apiKey. Pass { apiKey: '...' } or set RIME_API_KEY in the environment."
|
|
2752
|
+
);
|
|
2753
|
+
}
|
|
2754
|
+
const { apiKey: _ignored, ...rest } = opts;
|
|
2755
|
+
void _ignored;
|
|
2756
|
+
super(key, rest);
|
|
2757
|
+
}
|
|
2758
|
+
};
|
|
2759
|
+
|
|
2198
2760
|
// src/providers/lmnt-tts.ts
|
|
2199
2761
|
var LMNT_BASE_URL = "https://api.lmnt.com/v1/ai/speech/bytes";
|
|
2200
2762
|
var LMNTTTS = class {
|
|
@@ -2273,6 +2835,119 @@ var LMNTTTS = class {
|
|
|
2273
2835
|
}
|
|
2274
2836
|
};
|
|
2275
2837
|
|
|
2838
|
+
// src/tts/lmnt.ts
|
|
2839
|
+
var TTS5 = class extends LMNTTTS {
|
|
2840
|
+
constructor(opts = {}) {
|
|
2841
|
+
const key = opts.apiKey ?? process.env.LMNT_API_KEY;
|
|
2842
|
+
if (!key) {
|
|
2843
|
+
throw new Error(
|
|
2844
|
+
"LMNT TTS requires an apiKey. Pass { apiKey: '...' } or set LMNT_API_KEY in the environment."
|
|
2845
|
+
);
|
|
2846
|
+
}
|
|
2847
|
+
const { apiKey: _ignored, ...rest } = opts;
|
|
2848
|
+
void _ignored;
|
|
2849
|
+
super(key, rest);
|
|
2850
|
+
}
|
|
2851
|
+
};
|
|
2852
|
+
|
|
2853
|
+
// src/carriers/twilio.ts
|
|
2854
|
+
var Carrier = class {
|
|
2855
|
+
kind = "twilio";
|
|
2856
|
+
accountSid;
|
|
2857
|
+
authToken;
|
|
2858
|
+
constructor(opts = {}) {
|
|
2859
|
+
const sid = opts.accountSid ?? process.env.TWILIO_ACCOUNT_SID;
|
|
2860
|
+
const tok = opts.authToken ?? process.env.TWILIO_AUTH_TOKEN;
|
|
2861
|
+
if (!sid) {
|
|
2862
|
+
throw new Error(
|
|
2863
|
+
"Twilio carrier requires accountSid. Pass { accountSid: 'AC...' } or set TWILIO_ACCOUNT_SID in the environment."
|
|
2864
|
+
);
|
|
2865
|
+
}
|
|
2866
|
+
if (!tok) {
|
|
2867
|
+
throw new Error(
|
|
2868
|
+
"Twilio carrier requires authToken. Pass { authToken: '...' } or set TWILIO_AUTH_TOKEN in the environment."
|
|
2869
|
+
);
|
|
2870
|
+
}
|
|
2871
|
+
this.accountSid = sid;
|
|
2872
|
+
this.authToken = tok;
|
|
2873
|
+
}
|
|
2874
|
+
};
|
|
2875
|
+
|
|
2876
|
+
// src/carriers/telnyx.ts
|
|
2877
|
+
var Carrier2 = class {
|
|
2878
|
+
kind = "telnyx";
|
|
2879
|
+
apiKey;
|
|
2880
|
+
connectionId;
|
|
2881
|
+
publicKey;
|
|
2882
|
+
constructor(opts = {}) {
|
|
2883
|
+
const key = opts.apiKey ?? process.env.TELNYX_API_KEY;
|
|
2884
|
+
const conn = opts.connectionId ?? process.env.TELNYX_CONNECTION_ID;
|
|
2885
|
+
const pub = opts.publicKey ?? process.env.TELNYX_PUBLIC_KEY;
|
|
2886
|
+
if (!key) {
|
|
2887
|
+
throw new Error(
|
|
2888
|
+
"Telnyx carrier requires apiKey. Pass { apiKey: '...' } or set TELNYX_API_KEY in the environment."
|
|
2889
|
+
);
|
|
2890
|
+
}
|
|
2891
|
+
if (!conn) {
|
|
2892
|
+
throw new Error(
|
|
2893
|
+
"Telnyx carrier requires connectionId. Pass { connectionId: '...' } or set TELNYX_CONNECTION_ID in the environment."
|
|
2894
|
+
);
|
|
2895
|
+
}
|
|
2896
|
+
this.apiKey = key;
|
|
2897
|
+
this.connectionId = conn;
|
|
2898
|
+
this.publicKey = pub;
|
|
2899
|
+
}
|
|
2900
|
+
};
|
|
2901
|
+
|
|
2902
|
+
// src/public-api.ts
|
|
2903
|
+
var DEFAULT_GUARDRAIL_REPLACEMENT = "I'm sorry, I can't respond to that.";
|
|
2904
|
+
var Guardrail = class {
|
|
2905
|
+
name;
|
|
2906
|
+
blockedTerms;
|
|
2907
|
+
check;
|
|
2908
|
+
replacement;
|
|
2909
|
+
constructor(opts) {
|
|
2910
|
+
if (!opts.name) {
|
|
2911
|
+
throw new Error("Guardrail requires a non-empty name.");
|
|
2912
|
+
}
|
|
2913
|
+
this.name = opts.name;
|
|
2914
|
+
if (opts.blockedTerms) this.blockedTerms = opts.blockedTerms;
|
|
2915
|
+
if (opts.check) this.check = opts.check;
|
|
2916
|
+
this.replacement = opts.replacement ?? DEFAULT_GUARDRAIL_REPLACEMENT;
|
|
2917
|
+
}
|
|
2918
|
+
};
|
|
2919
|
+
function guardrail(opts) {
|
|
2920
|
+
return new Guardrail(opts);
|
|
2921
|
+
}
|
|
2922
|
+
var Tool = class {
|
|
2923
|
+
name;
|
|
2924
|
+
description;
|
|
2925
|
+
parameters;
|
|
2926
|
+
handler;
|
|
2927
|
+
webhookUrl;
|
|
2928
|
+
constructor(opts) {
|
|
2929
|
+
if (!opts.name) {
|
|
2930
|
+
throw new Error("Tool requires a non-empty name.");
|
|
2931
|
+
}
|
|
2932
|
+
const hasHandler = typeof opts.handler === "function";
|
|
2933
|
+
const hasWebhook = typeof opts.webhookUrl === "string" && opts.webhookUrl.length > 0;
|
|
2934
|
+
if (!hasHandler && !hasWebhook) {
|
|
2935
|
+
throw new Error("Tool requires either handler or webhookUrl.");
|
|
2936
|
+
}
|
|
2937
|
+
if (hasHandler && hasWebhook) {
|
|
2938
|
+
throw new Error("Tool accepts handler OR webhookUrl, not both.");
|
|
2939
|
+
}
|
|
2940
|
+
this.name = opts.name;
|
|
2941
|
+
this.description = opts.description ?? "";
|
|
2942
|
+
this.parameters = opts.parameters ?? { type: "object", properties: {} };
|
|
2943
|
+
if (hasHandler) this.handler = opts.handler;
|
|
2944
|
+
if (hasWebhook) this.webhookUrl = opts.webhookUrl;
|
|
2945
|
+
}
|
|
2946
|
+
};
|
|
2947
|
+
function tool(opts) {
|
|
2948
|
+
return new Tool(opts);
|
|
2949
|
+
}
|
|
2950
|
+
|
|
2276
2951
|
// src/chat-context.ts
|
|
2277
2952
|
import { randomUUID } from "crypto";
|
|
2278
2953
|
function generateId() {
|
|
@@ -2883,31 +3558,35 @@ function isAudioConfig(value) {
|
|
|
2883
3558
|
}
|
|
2884
3559
|
export {
|
|
2885
3560
|
AllProvidersFailedError,
|
|
2886
|
-
AssemblyAISTT,
|
|
3561
|
+
STT5 as AssemblyAISTT,
|
|
2887
3562
|
AuthenticationError,
|
|
2888
3563
|
BackgroundAudioPlayer,
|
|
2889
3564
|
BuiltinAudioClip,
|
|
2890
3565
|
CallMetricsAccumulator,
|
|
2891
|
-
CartesiaSTT,
|
|
2892
|
-
CartesiaTTS,
|
|
3566
|
+
STT3 as CartesiaSTT,
|
|
3567
|
+
TTS3 as CartesiaTTS,
|
|
2893
3568
|
ChatContext,
|
|
3569
|
+
CloudflareTunnel,
|
|
2894
3570
|
DEFAULT_MIN_SENTENCE_LEN,
|
|
2895
3571
|
DEFAULT_PRICING,
|
|
2896
3572
|
DTMF_EVENTS,
|
|
2897
|
-
DeepgramSTT,
|
|
3573
|
+
STT as DeepgramSTT,
|
|
3574
|
+
ConvAI as ElevenLabsConvAI,
|
|
2898
3575
|
ElevenLabsConvAIAdapter,
|
|
2899
|
-
ElevenLabsTTS,
|
|
3576
|
+
TTS as ElevenLabsTTS,
|
|
2900
3577
|
FallbackLLMProvider,
|
|
2901
3578
|
GEMINI_DEFAULT_INPUT_SR,
|
|
2902
3579
|
GEMINI_DEFAULT_OUTPUT_SR,
|
|
2903
3580
|
GeminiLiveAdapter,
|
|
3581
|
+
Guardrail,
|
|
2904
3582
|
IVRActivity,
|
|
2905
3583
|
LLMLoop,
|
|
2906
|
-
LMNTTTS,
|
|
3584
|
+
TTS5 as LMNTTTS,
|
|
2907
3585
|
MetricsStore,
|
|
2908
3586
|
OpenAILLMProvider,
|
|
3587
|
+
Realtime as OpenAIRealtime,
|
|
2909
3588
|
OpenAIRealtimeAdapter,
|
|
2910
|
-
OpenAITTS,
|
|
3589
|
+
TTS2 as OpenAITTS,
|
|
2911
3590
|
PartialStreamError,
|
|
2912
3591
|
Patter,
|
|
2913
3592
|
PatterConnectionError,
|
|
@@ -2915,15 +3594,19 @@ export {
|
|
|
2915
3594
|
PipelineHookExecutor,
|
|
2916
3595
|
ProvisionError,
|
|
2917
3596
|
RemoteMessageHandler,
|
|
2918
|
-
RimeTTS,
|
|
3597
|
+
TTS4 as RimeTTS,
|
|
2919
3598
|
SentenceChunker,
|
|
2920
|
-
SonioxSTT,
|
|
3599
|
+
STT4 as SonioxSTT,
|
|
3600
|
+
Static as StaticTunnel,
|
|
3601
|
+
Carrier2 as Telnyx,
|
|
2921
3602
|
TestSession,
|
|
2922
3603
|
TfidfLoopDetector,
|
|
3604
|
+
Tool,
|
|
3605
|
+
Carrier as Twilio,
|
|
2923
3606
|
ULTRAVOX_DEFAULT_API_BASE,
|
|
2924
3607
|
ULTRAVOX_DEFAULT_SR,
|
|
2925
3608
|
UltravoxRealtimeAdapter,
|
|
2926
|
-
WhisperSTT,
|
|
3609
|
+
STT2 as WhisperSTT,
|
|
2927
3610
|
builtinClipPath,
|
|
2928
3611
|
calculateRealtimeCost,
|
|
2929
3612
|
calculateSttCost,
|
|
@@ -2939,6 +3622,7 @@ export {
|
|
|
2939
3622
|
filterMarkdown,
|
|
2940
3623
|
formatDtmf,
|
|
2941
3624
|
getLogger,
|
|
3625
|
+
guardrail,
|
|
2942
3626
|
isRemoteUrl,
|
|
2943
3627
|
isWebSocketUrl,
|
|
2944
3628
|
makeAuthMiddleware,
|
|
@@ -2960,5 +3644,6 @@ export {
|
|
|
2960
3644
|
selectSoundFromList,
|
|
2961
3645
|
setLogger,
|
|
2962
3646
|
startTunnel,
|
|
3647
|
+
tool,
|
|
2963
3648
|
whisper
|
|
2964
3649
|
};
|