getpatter 0.4.4 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +136 -162
- package/dist/carrier-config-CPG5CROM.mjs +84 -0
- package/dist/{chunk-O3RQG3NL.mjs → chunk-757NVN4L.mjs} +129 -544
- package/dist/index.d.mts +771 -292
- package/dist/index.d.ts +771 -292
- package/dist/index.js +1414 -1061
- package/dist/index.mjs +1141 -456
- package/dist/{test-mode-ASSLSQU2.mjs → test-mode-YFOL2HYH.mjs} +1 -1
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -305,254 +305,16 @@ var init_elevenlabs_convai = __esm({
|
|
|
305
305
|
}
|
|
306
306
|
});
|
|
307
307
|
|
|
308
|
-
// src/
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
"src/providers/deepgram-stt.ts"() {
|
|
312
|
-
"use strict";
|
|
313
|
-
import_ws4 = __toESM(require("ws"));
|
|
314
|
-
init_logger();
|
|
315
|
-
DEEPGRAM_WS_URL = "wss://api.deepgram.com/v1/listen";
|
|
316
|
-
DeepgramSTT = class _DeepgramSTT {
|
|
317
|
-
ws = null;
|
|
318
|
-
callbacks = [];
|
|
319
|
-
/** Request ID from Deepgram — used to query actual cost post-call. */
|
|
320
|
-
requestId = "";
|
|
321
|
-
apiKey;
|
|
322
|
-
language;
|
|
323
|
-
model;
|
|
324
|
-
encoding;
|
|
325
|
-
sampleRate;
|
|
326
|
-
endpointingMs;
|
|
327
|
-
utteranceEndMs;
|
|
328
|
-
smartFormat;
|
|
329
|
-
interimResults;
|
|
330
|
-
vadEvents;
|
|
331
|
-
constructor(apiKey, languageOrOptions, model, encoding, sampleRate, options) {
|
|
332
|
-
this.apiKey = apiKey;
|
|
333
|
-
const opts = typeof languageOrOptions === "object" && languageOrOptions !== null ? languageOrOptions : options ?? {};
|
|
334
|
-
this.language = (typeof languageOrOptions === "string" ? languageOrOptions : opts.language) ?? "en";
|
|
335
|
-
this.model = model ?? opts.model ?? "nova-3";
|
|
336
|
-
this.encoding = encoding ?? opts.encoding ?? "linear16";
|
|
337
|
-
this.sampleRate = sampleRate ?? opts.sampleRate ?? 16e3;
|
|
338
|
-
this.endpointingMs = opts.endpointingMs ?? 150;
|
|
339
|
-
this.utteranceEndMs = opts.utteranceEndMs === null ? null : opts.utteranceEndMs ?? 1e3;
|
|
340
|
-
this.smartFormat = opts.smartFormat ?? true;
|
|
341
|
-
this.interimResults = opts.interimResults ?? true;
|
|
342
|
-
this.vadEvents = opts.vadEvents ?? true;
|
|
343
|
-
}
|
|
344
|
-
/** Factory for Twilio calls — mulaw 8 kHz. Forwards tuning options through. */
|
|
345
|
-
static forTwilio(apiKey, language = "en", model = "nova-3", options = {}) {
|
|
346
|
-
return new _DeepgramSTT(apiKey, language, model, "mulaw", 8e3, options);
|
|
347
|
-
}
|
|
348
|
-
async connect() {
|
|
349
|
-
const params = new URLSearchParams({
|
|
350
|
-
model: this.model,
|
|
351
|
-
language: this.language,
|
|
352
|
-
encoding: this.encoding,
|
|
353
|
-
sample_rate: String(this.sampleRate),
|
|
354
|
-
channels: "1",
|
|
355
|
-
interim_results: this.interimResults ? "true" : "false",
|
|
356
|
-
endpointing: String(this.endpointingMs),
|
|
357
|
-
smart_format: this.smartFormat ? "true" : "false",
|
|
358
|
-
vad_events: this.vadEvents ? "true" : "false",
|
|
359
|
-
no_delay: "true"
|
|
360
|
-
});
|
|
361
|
-
if (this.utteranceEndMs !== null) {
|
|
362
|
-
params.set("utterance_end_ms", String(Math.max(this.utteranceEndMs, 1e3)));
|
|
363
|
-
}
|
|
364
|
-
const url = `${DEEPGRAM_WS_URL}?${params.toString()}`;
|
|
365
|
-
this.ws = new import_ws4.default(url, {
|
|
366
|
-
headers: { Authorization: `Token ${this.apiKey}` }
|
|
367
|
-
});
|
|
368
|
-
await new Promise((resolve, reject) => {
|
|
369
|
-
const timer = setTimeout(() => reject(new Error("Deepgram connect timeout")), 1e4);
|
|
370
|
-
this.ws.once("open", () => {
|
|
371
|
-
clearTimeout(timer);
|
|
372
|
-
resolve();
|
|
373
|
-
});
|
|
374
|
-
this.ws.once("error", (err) => {
|
|
375
|
-
clearTimeout(timer);
|
|
376
|
-
reject(err);
|
|
377
|
-
});
|
|
378
|
-
});
|
|
379
|
-
this.ws.on("message", (raw) => {
|
|
380
|
-
let data;
|
|
381
|
-
try {
|
|
382
|
-
data = JSON.parse(raw.toString());
|
|
383
|
-
} catch {
|
|
384
|
-
return;
|
|
385
|
-
}
|
|
386
|
-
if (data.type === "Metadata" && data.request_id) {
|
|
387
|
-
this.requestId = data.request_id;
|
|
388
|
-
return;
|
|
389
|
-
}
|
|
390
|
-
if (data.type !== "Results") return;
|
|
391
|
-
const alternatives = data.channel?.alternatives ?? [];
|
|
392
|
-
if (!alternatives.length) return;
|
|
393
|
-
const best = alternatives[0];
|
|
394
|
-
const text = (best.transcript ?? "").trim();
|
|
395
|
-
if (!text) return;
|
|
396
|
-
const transcript = {
|
|
397
|
-
text,
|
|
398
|
-
isFinal: Boolean(data.is_final) || Boolean(data.speech_final),
|
|
399
|
-
confidence: best.confidence ?? 0
|
|
400
|
-
};
|
|
401
|
-
for (const cb of this.callbacks) {
|
|
402
|
-
cb(transcript);
|
|
403
|
-
}
|
|
404
|
-
});
|
|
405
|
-
}
|
|
406
|
-
sendAudio(audio) {
|
|
407
|
-
if (!this.ws || this.ws.readyState !== import_ws4.default.OPEN) return;
|
|
408
|
-
this.ws.send(audio);
|
|
409
|
-
}
|
|
410
|
-
onTranscript(callback) {
|
|
411
|
-
if (this.callbacks.length >= 10) {
|
|
412
|
-
getLogger().warn("DeepgramSTT: maximum of 10 onTranscript callbacks reached; replacing the last callback.");
|
|
413
|
-
this.callbacks[this.callbacks.length - 1] = callback;
|
|
414
|
-
return;
|
|
415
|
-
}
|
|
416
|
-
this.callbacks.push(callback);
|
|
417
|
-
}
|
|
418
|
-
close() {
|
|
419
|
-
if (this.ws) {
|
|
420
|
-
try {
|
|
421
|
-
this.ws.send(JSON.stringify({ type: "CloseStream" }));
|
|
422
|
-
} catch {
|
|
423
|
-
}
|
|
424
|
-
this.ws.close();
|
|
425
|
-
this.ws = null;
|
|
426
|
-
}
|
|
427
|
-
}
|
|
428
|
-
};
|
|
429
|
-
}
|
|
430
|
-
});
|
|
431
|
-
|
|
432
|
-
// src/providers/whisper-stt.ts
|
|
433
|
-
function wrapPcmInWav(pcm, sampleRate = 16e3, channels = 1, bitsPerSample = 16) {
|
|
434
|
-
const dataSize = pcm.length;
|
|
435
|
-
const header = Buffer.alloc(44);
|
|
436
|
-
header.write("RIFF", 0);
|
|
437
|
-
header.writeUInt32LE(36 + dataSize, 4);
|
|
438
|
-
header.write("WAVE", 8);
|
|
439
|
-
header.write("fmt ", 12);
|
|
440
|
-
header.writeUInt32LE(16, 16);
|
|
441
|
-
header.writeUInt16LE(1, 20);
|
|
442
|
-
header.writeUInt16LE(channels, 22);
|
|
443
|
-
header.writeUInt32LE(sampleRate, 24);
|
|
444
|
-
header.writeUInt32LE(sampleRate * channels * (bitsPerSample / 8), 28);
|
|
445
|
-
header.writeUInt16LE(channels * (bitsPerSample / 8), 32);
|
|
446
|
-
header.writeUInt16LE(bitsPerSample, 34);
|
|
447
|
-
header.write("data", 36);
|
|
448
|
-
header.writeUInt32LE(dataSize, 40);
|
|
449
|
-
return Buffer.concat([header, pcm]);
|
|
308
|
+
// src/provider-factory.ts
|
|
309
|
+
async function createSTT(agent) {
|
|
310
|
+
return agent.stt ?? null;
|
|
450
311
|
}
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
312
|
+
async function createTTS(agent) {
|
|
313
|
+
return agent.tts ?? null;
|
|
314
|
+
}
|
|
315
|
+
var init_provider_factory = __esm({
|
|
316
|
+
"src/provider-factory.ts"() {
|
|
454
317
|
"use strict";
|
|
455
|
-
init_logger();
|
|
456
|
-
OPENAI_TRANSCRIPTION_URL = "https://api.openai.com/v1/audio/transcriptions";
|
|
457
|
-
DEFAULT_BUFFER_SIZE = 16e3 * 2;
|
|
458
|
-
WhisperSTT = class _WhisperSTT {
|
|
459
|
-
apiKey;
|
|
460
|
-
model;
|
|
461
|
-
language;
|
|
462
|
-
bufferSize;
|
|
463
|
-
buffer = Buffer.alloc(0);
|
|
464
|
-
callbacks = [];
|
|
465
|
-
running = false;
|
|
466
|
-
pendingTranscriptions = [];
|
|
467
|
-
constructor(apiKey, model = "whisper-1", language, bufferSize = DEFAULT_BUFFER_SIZE) {
|
|
468
|
-
this.apiKey = apiKey;
|
|
469
|
-
this.model = model;
|
|
470
|
-
this.language = language;
|
|
471
|
-
this.bufferSize = bufferSize;
|
|
472
|
-
}
|
|
473
|
-
/** Factory for Twilio calls — mulaw 8 kHz is transcoded upstream, so we still receive PCM 16-bit. */
|
|
474
|
-
static forTwilio(apiKey, language = "en", model = "whisper-1") {
|
|
475
|
-
return new _WhisperSTT(apiKey, model, language);
|
|
476
|
-
}
|
|
477
|
-
async connect() {
|
|
478
|
-
this.running = true;
|
|
479
|
-
this.buffer = Buffer.alloc(0);
|
|
480
|
-
}
|
|
481
|
-
sendAudio(audio) {
|
|
482
|
-
if (!this.running) return;
|
|
483
|
-
this.buffer = Buffer.concat([this.buffer, audio]);
|
|
484
|
-
if (this.buffer.length >= this.bufferSize) {
|
|
485
|
-
const pcm = this.buffer;
|
|
486
|
-
this.buffer = Buffer.alloc(0);
|
|
487
|
-
this.trackTranscription(this.transcribeBuffer(pcm));
|
|
488
|
-
}
|
|
489
|
-
}
|
|
490
|
-
trackTranscription(promise) {
|
|
491
|
-
const wrapped = promise.finally(() => {
|
|
492
|
-
const idx = this.pendingTranscriptions.indexOf(wrapped);
|
|
493
|
-
if (idx !== -1) this.pendingTranscriptions.splice(idx, 1);
|
|
494
|
-
});
|
|
495
|
-
this.pendingTranscriptions.push(wrapped);
|
|
496
|
-
}
|
|
497
|
-
onTranscript(callback) {
|
|
498
|
-
if (this.callbacks.length >= 10) {
|
|
499
|
-
getLogger().warn("WhisperSTT: maximum of 10 onTranscript callbacks reached; replacing the last callback.");
|
|
500
|
-
this.callbacks[this.callbacks.length - 1] = callback;
|
|
501
|
-
return;
|
|
502
|
-
}
|
|
503
|
-
this.callbacks.push(callback);
|
|
504
|
-
}
|
|
505
|
-
async close() {
|
|
506
|
-
this.running = false;
|
|
507
|
-
if (this.buffer.length >= this.bufferSize / 4) {
|
|
508
|
-
const pcm = this.buffer;
|
|
509
|
-
this.buffer = Buffer.alloc(0);
|
|
510
|
-
this.trackTranscription(this.transcribeBuffer(pcm));
|
|
511
|
-
} else {
|
|
512
|
-
this.buffer = Buffer.alloc(0);
|
|
513
|
-
}
|
|
514
|
-
await Promise.allSettled(this.pendingTranscriptions);
|
|
515
|
-
this.callbacks = [];
|
|
516
|
-
}
|
|
517
|
-
// ------------------------------------------------------------------
|
|
518
|
-
// Private
|
|
519
|
-
// ------------------------------------------------------------------
|
|
520
|
-
async transcribeBuffer(pcm) {
|
|
521
|
-
const wav = wrapPcmInWav(pcm);
|
|
522
|
-
const formData = new FormData();
|
|
523
|
-
formData.append("file", new Blob([wav.buffer.slice(wav.byteOffset, wav.byteOffset + wav.byteLength)], { type: "audio/wav" }), "audio.wav");
|
|
524
|
-
formData.append("model", this.model);
|
|
525
|
-
if (this.language) {
|
|
526
|
-
formData.append("language", this.language);
|
|
527
|
-
}
|
|
528
|
-
try {
|
|
529
|
-
const resp = await fetch(OPENAI_TRANSCRIPTION_URL, {
|
|
530
|
-
method: "POST",
|
|
531
|
-
headers: { Authorization: `Bearer ${this.apiKey}` },
|
|
532
|
-
body: formData,
|
|
533
|
-
signal: AbortSignal.timeout(15e3)
|
|
534
|
-
});
|
|
535
|
-
if (!resp.ok) {
|
|
536
|
-
const body = await resp.text();
|
|
537
|
-
getLogger().error(`WhisperSTT transcription error: ${resp.status} ${body}`);
|
|
538
|
-
return;
|
|
539
|
-
}
|
|
540
|
-
const json = await resp.json();
|
|
541
|
-
const text = (json.text ?? "").trim();
|
|
542
|
-
if (!text) return;
|
|
543
|
-
const transcript = {
|
|
544
|
-
text,
|
|
545
|
-
isFinal: true,
|
|
546
|
-
confidence: 1
|
|
547
|
-
};
|
|
548
|
-
for (const cb of this.callbacks) {
|
|
549
|
-
cb(transcript);
|
|
550
|
-
}
|
|
551
|
-
} catch (err) {
|
|
552
|
-
getLogger().error(`WhisperSTT transcription error: ${String(err)}`);
|
|
553
|
-
}
|
|
554
|
-
}
|
|
555
|
-
};
|
|
556
318
|
}
|
|
557
319
|
});
|
|
558
320
|
|
|
@@ -1972,258 +1734,125 @@ var init_remote_message = __esm({
|
|
|
1972
1734
|
}
|
|
1973
1735
|
});
|
|
1974
1736
|
|
|
1975
|
-
// src/providers/
|
|
1976
|
-
|
|
1977
|
-
|
|
1978
|
-
|
|
1979
|
-
return ELEVENLABS_VOICE_ID_BY_NAME[voice.toLowerCase()] ?? voice;
|
|
1980
|
-
}
|
|
1981
|
-
var ELEVENLABS_BASE_URL, ELEVENLABS_VOICE_ID_BY_NAME, VOICE_ID_PATTERN, ElevenLabsTTS;
|
|
1982
|
-
var init_elevenlabs_tts = __esm({
|
|
1983
|
-
"src/providers/elevenlabs-tts.ts"() {
|
|
1737
|
+
// src/providers/deepgram-stt.ts
|
|
1738
|
+
var import_ws4, DEEPGRAM_WS_URL, DeepgramSTT;
|
|
1739
|
+
var init_deepgram_stt = __esm({
|
|
1740
|
+
"src/providers/deepgram-stt.ts"() {
|
|
1984
1741
|
"use strict";
|
|
1985
|
-
|
|
1986
|
-
|
|
1987
|
-
|
|
1988
|
-
|
|
1989
|
-
|
|
1990
|
-
|
|
1991
|
-
|
|
1992
|
-
|
|
1993
|
-
|
|
1994
|
-
|
|
1995
|
-
|
|
1996
|
-
|
|
1997
|
-
|
|
1998
|
-
|
|
1999
|
-
|
|
2000
|
-
|
|
2001
|
-
|
|
2002
|
-
|
|
2003
|
-
|
|
2004
|
-
liam: "TX3LPaxmHKxFdv7VOQHJ",
|
|
2005
|
-
dorothy: "ThT5KcBeYPX3keUQqHPh",
|
|
2006
|
-
josh: "TxGEqnHWrfWFTfGW9XjX",
|
|
2007
|
-
arnold: "VR6AewLTigWG4xSOukaG",
|
|
2008
|
-
charlotte: "XB0fDUnXU5powFXDhCwa",
|
|
2009
|
-
matilda: "XrExE9yKIg1WjnnlVkGX",
|
|
2010
|
-
matthew: "Yko7PKHZNXotIFUBG7I9",
|
|
2011
|
-
james: "ZQe5CZNOzWyzPSCn5a3c",
|
|
2012
|
-
joseph: "Zlb1dXrM653N07WRdFW3",
|
|
2013
|
-
jeremy: "bVMeCyTHy58xNoL34h3p",
|
|
2014
|
-
michael: "flq6f7yk4E4fJM5XTYuZ",
|
|
2015
|
-
ethan: "g5CIjZEefAph4nQFvHAz",
|
|
2016
|
-
gigi: "jBpfuIE2acCO8z3wKNLl",
|
|
2017
|
-
freya: "jsCqWAovK2LkecY7zXl4",
|
|
2018
|
-
brian: "nPczCjzI2devNBz1zQrb",
|
|
2019
|
-
grace: "oWAxZDx7w5VEj9dCyTzz",
|
|
2020
|
-
daniel: "onwK4e9ZLuTAKqWW03F9",
|
|
2021
|
-
lily: "pFZP5JQG7iQjIQuC4Bku",
|
|
2022
|
-
serena: "pMsXgVXv3BLzUgSXRplE",
|
|
2023
|
-
adam: "pNInz6obpgDQGcFmaJgB",
|
|
2024
|
-
nicole: "piTKgcLEGmPE4e6mEKli",
|
|
2025
|
-
bill: "pqHfZKP75CvOlQylNhV4",
|
|
2026
|
-
jessie: "t0jbNlBVZ17f02VDIeMI",
|
|
2027
|
-
ryan: "wViXBPUzp2ZZixB1xQuM",
|
|
2028
|
-
sam: "yoZ06aMxZJJ28mfd3POQ",
|
|
2029
|
-
glinda: "z9fAnlkpzviPz146aGWa",
|
|
2030
|
-
giovanni: "zcAOhNBS3c14rBihAFp1",
|
|
2031
|
-
mimi: "zrHiDhphv9ZnVXBqCLjz",
|
|
2032
|
-
alloy: "21m00Tcm4TlvDq8ikWAM"
|
|
2033
|
-
};
|
|
2034
|
-
VOICE_ID_PATTERN = /^[A-Za-z0-9]{20}$/;
|
|
2035
|
-
ElevenLabsTTS = class {
|
|
2036
|
-
constructor(apiKey, voiceId = "21m00Tcm4TlvDq8ikWAM", modelId = "eleven_turbo_v2_5", outputFormat = "pcm_16000") {
|
|
1742
|
+
import_ws4 = __toESM(require("ws"));
|
|
1743
|
+
init_logger();
|
|
1744
|
+
DEEPGRAM_WS_URL = "wss://api.deepgram.com/v1/listen";
|
|
1745
|
+
DeepgramSTT = class _DeepgramSTT {
|
|
1746
|
+
ws = null;
|
|
1747
|
+
callbacks = [];
|
|
1748
|
+
/** Request ID from Deepgram — used to query actual cost post-call. */
|
|
1749
|
+
requestId = "";
|
|
1750
|
+
apiKey;
|
|
1751
|
+
language;
|
|
1752
|
+
model;
|
|
1753
|
+
encoding;
|
|
1754
|
+
sampleRate;
|
|
1755
|
+
endpointingMs;
|
|
1756
|
+
utteranceEndMs;
|
|
1757
|
+
smartFormat;
|
|
1758
|
+
interimResults;
|
|
1759
|
+
vadEvents;
|
|
1760
|
+
constructor(apiKey, languageOrOptions, model, encoding, sampleRate, options) {
|
|
2037
1761
|
this.apiKey = apiKey;
|
|
2038
|
-
|
|
2039
|
-
this.
|
|
2040
|
-
this.
|
|
1762
|
+
const opts = typeof languageOrOptions === "object" && languageOrOptions !== null ? languageOrOptions : options ?? {};
|
|
1763
|
+
this.language = (typeof languageOrOptions === "string" ? languageOrOptions : opts.language) ?? "en";
|
|
1764
|
+
this.model = model ?? opts.model ?? "nova-3";
|
|
1765
|
+
this.encoding = encoding ?? opts.encoding ?? "linear16";
|
|
1766
|
+
this.sampleRate = sampleRate ?? opts.sampleRate ?? 16e3;
|
|
1767
|
+
this.endpointingMs = opts.endpointingMs ?? 150;
|
|
1768
|
+
this.utteranceEndMs = opts.utteranceEndMs === null ? null : opts.utteranceEndMs ?? 1e3;
|
|
1769
|
+
this.smartFormat = opts.smartFormat ?? true;
|
|
1770
|
+
this.interimResults = opts.interimResults ?? true;
|
|
1771
|
+
this.vadEvents = opts.vadEvents ?? true;
|
|
2041
1772
|
}
|
|
2042
|
-
|
|
2043
|
-
|
|
2044
|
-
|
|
2045
|
-
*
|
|
2046
|
-
* For large chunks (or when latency matters) call `synthesizeStream` instead.
|
|
2047
|
-
*/
|
|
2048
|
-
async synthesize(text) {
|
|
2049
|
-
const chunks = [];
|
|
2050
|
-
for await (const chunk of this.synthesizeStream(text)) {
|
|
2051
|
-
chunks.push(chunk);
|
|
2052
|
-
}
|
|
2053
|
-
return Buffer.concat(chunks);
|
|
1773
|
+
/** Factory for Twilio calls — mulaw 8 kHz. Forwards tuning options through. */
|
|
1774
|
+
static forTwilio(apiKey, language = "en", model = "nova-3", options = {}) {
|
|
1775
|
+
return new _DeepgramSTT(apiKey, language, model, "mulaw", 8e3, options);
|
|
2054
1776
|
}
|
|
2055
|
-
|
|
2056
|
-
|
|
2057
|
-
|
|
2058
|
-
|
|
2059
|
-
|
|
2060
|
-
|
|
2061
|
-
|
|
2062
|
-
|
|
2063
|
-
|
|
2064
|
-
|
|
2065
|
-
|
|
2066
|
-
|
|
2067
|
-
"Content-Type": "application/json"
|
|
2068
|
-
},
|
|
2069
|
-
body: JSON.stringify({ text, model_id: this.modelId }),
|
|
2070
|
-
signal: AbortSignal.timeout(3e4)
|
|
1777
|
+
async connect() {
|
|
1778
|
+
const params = new URLSearchParams({
|
|
1779
|
+
model: this.model,
|
|
1780
|
+
language: this.language,
|
|
1781
|
+
encoding: this.encoding,
|
|
1782
|
+
sample_rate: String(this.sampleRate),
|
|
1783
|
+
channels: "1",
|
|
1784
|
+
interim_results: this.interimResults ? "true" : "false",
|
|
1785
|
+
endpointing: String(this.endpointingMs),
|
|
1786
|
+
smart_format: this.smartFormat ? "true" : "false",
|
|
1787
|
+
vad_events: this.vadEvents ? "true" : "false",
|
|
1788
|
+
no_delay: "true"
|
|
2071
1789
|
});
|
|
2072
|
-
if (
|
|
2073
|
-
|
|
2074
|
-
throw new Error(`ElevenLabs TTS error ${response.status}: ${body}`);
|
|
2075
|
-
}
|
|
2076
|
-
if (!response.body) {
|
|
2077
|
-
throw new Error("ElevenLabs TTS: no response body");
|
|
1790
|
+
if (this.utteranceEndMs !== null) {
|
|
1791
|
+
params.set("utterance_end_ms", String(Math.max(this.utteranceEndMs, 1e3)));
|
|
2078
1792
|
}
|
|
2079
|
-
const
|
|
2080
|
-
|
|
2081
|
-
|
|
2082
|
-
|
|
2083
|
-
|
|
2084
|
-
|
|
2085
|
-
|
|
2086
|
-
|
|
2087
|
-
|
|
2088
|
-
|
|
2089
|
-
|
|
1793
|
+
const url = `${DEEPGRAM_WS_URL}?${params.toString()}`;
|
|
1794
|
+
this.ws = new import_ws4.default(url, {
|
|
1795
|
+
headers: { Authorization: `Token ${this.apiKey}` }
|
|
1796
|
+
});
|
|
1797
|
+
await new Promise((resolve, reject) => {
|
|
1798
|
+
const timer = setTimeout(() => reject(new Error("Deepgram connect timeout")), 1e4);
|
|
1799
|
+
this.ws.once("open", () => {
|
|
1800
|
+
clearTimeout(timer);
|
|
1801
|
+
resolve();
|
|
1802
|
+
});
|
|
1803
|
+
this.ws.once("error", (err) => {
|
|
1804
|
+
clearTimeout(timer);
|
|
1805
|
+
reject(err);
|
|
2090
1806
|
});
|
|
2091
|
-
reader.releaseLock();
|
|
2092
|
-
}
|
|
2093
|
-
}
|
|
2094
|
-
};
|
|
2095
|
-
}
|
|
2096
|
-
});
|
|
2097
|
-
|
|
2098
|
-
// src/providers/openai-tts.ts
|
|
2099
|
-
var OPENAI_TTS_URL, OpenAITTS;
|
|
2100
|
-
var init_openai_tts = __esm({
|
|
2101
|
-
"src/providers/openai-tts.ts"() {
|
|
2102
|
-
"use strict";
|
|
2103
|
-
OPENAI_TTS_URL = "https://api.openai.com/v1/audio/speech";
|
|
2104
|
-
OpenAITTS = class _OpenAITTS {
|
|
2105
|
-
constructor(apiKey, voice = "alloy", model = "tts-1") {
|
|
2106
|
-
this.apiKey = apiKey;
|
|
2107
|
-
this.voice = voice;
|
|
2108
|
-
this.model = model;
|
|
2109
|
-
}
|
|
2110
|
-
/**
|
|
2111
|
-
* Synthesise text to speech and return the full audio as a single Buffer.
|
|
2112
|
-
*
|
|
2113
|
-
* For large chunks (or when latency matters) call `synthesizeStream` instead.
|
|
2114
|
-
*/
|
|
2115
|
-
async synthesize(text) {
|
|
2116
|
-
const chunks = [];
|
|
2117
|
-
for await (const chunk of this.synthesizeStream(text)) {
|
|
2118
|
-
chunks.push(chunk);
|
|
2119
|
-
}
|
|
2120
|
-
return Buffer.concat(chunks);
|
|
2121
|
-
}
|
|
2122
|
-
/**
|
|
2123
|
-
* Synthesise text and yield audio chunks as they arrive (streaming).
|
|
2124
|
-
*
|
|
2125
|
-
* OpenAI returns 24 kHz PCM16; each chunk is resampled to 16 kHz before
|
|
2126
|
-
* yielding so the output is ready for telephony pipelines.
|
|
2127
|
-
*
|
|
2128
|
-
* The resampler carries state (buffered samples + odd trailing byte)
|
|
2129
|
-
* between chunks — without that state cross-chunk sample alignment drifts
|
|
2130
|
-
* and the caller hears pops / dropped audio (BUG #23, mirror of the
|
|
2131
|
-
* Python `audioop.ratecv` fix).
|
|
2132
|
-
*/
|
|
2133
|
-
async *synthesizeStream(text) {
|
|
2134
|
-
const response = await fetch(OPENAI_TTS_URL, {
|
|
2135
|
-
method: "POST",
|
|
2136
|
-
headers: {
|
|
2137
|
-
"Authorization": `Bearer ${this.apiKey}`,
|
|
2138
|
-
"Content-Type": "application/json"
|
|
2139
|
-
},
|
|
2140
|
-
body: JSON.stringify({
|
|
2141
|
-
model: this.model,
|
|
2142
|
-
input: text,
|
|
2143
|
-
voice: this.voice,
|
|
2144
|
-
response_format: "pcm"
|
|
2145
|
-
}),
|
|
2146
|
-
signal: AbortSignal.timeout(3e4)
|
|
2147
1807
|
});
|
|
2148
|
-
|
|
2149
|
-
|
|
2150
|
-
|
|
2151
|
-
|
|
2152
|
-
|
|
2153
|
-
|
|
2154
|
-
}
|
|
2155
|
-
const ctx = { carryByte: null, leftover: [] };
|
|
2156
|
-
const reader = response.body.getReader();
|
|
2157
|
-
try {
|
|
2158
|
-
while (true) {
|
|
2159
|
-
const { done, value } = await reader.read();
|
|
2160
|
-
if (done) break;
|
|
2161
|
-
if (value && value.length > 0) {
|
|
2162
|
-
const out = _OpenAITTS.resampleStreaming(Buffer.from(value), ctx);
|
|
2163
|
-
if (out.length > 0) yield out;
|
|
2164
|
-
}
|
|
1808
|
+
this.ws.on("message", (raw) => {
|
|
1809
|
+
let data;
|
|
1810
|
+
try {
|
|
1811
|
+
data = JSON.parse(raw.toString());
|
|
1812
|
+
} catch {
|
|
1813
|
+
return;
|
|
2165
1814
|
}
|
|
2166
|
-
if (
|
|
2167
|
-
|
|
2168
|
-
|
|
2169
|
-
tail.writeInt16LE(ctx.leftover[i], i * 2);
|
|
2170
|
-
}
|
|
2171
|
-
yield tail;
|
|
1815
|
+
if (data.type === "Metadata" && data.request_id) {
|
|
1816
|
+
this.requestId = data.request_id;
|
|
1817
|
+
return;
|
|
2172
1818
|
}
|
|
2173
|
-
|
|
2174
|
-
|
|
2175
|
-
|
|
2176
|
-
|
|
1819
|
+
if (data.type !== "Results") return;
|
|
1820
|
+
const alternatives = data.channel?.alternatives ?? [];
|
|
1821
|
+
if (!alternatives.length) return;
|
|
1822
|
+
const best = alternatives[0];
|
|
1823
|
+
const text = (best.transcript ?? "").trim();
|
|
1824
|
+
if (!text) return;
|
|
1825
|
+
const transcript = {
|
|
1826
|
+
text,
|
|
1827
|
+
isFinal: Boolean(data.is_final) || Boolean(data.speech_final),
|
|
1828
|
+
confidence: best.confidence ?? 0
|
|
1829
|
+
};
|
|
1830
|
+
for (const cb of this.callbacks) {
|
|
1831
|
+
cb(transcript);
|
|
1832
|
+
}
|
|
1833
|
+
});
|
|
1834
|
+
}
|
|
1835
|
+
sendAudio(audio) {
|
|
1836
|
+
if (!this.ws || this.ws.readyState !== import_ws4.default.OPEN) return;
|
|
1837
|
+
this.ws.send(audio);
|
|
1838
|
+
}
|
|
1839
|
+
onTranscript(callback) {
|
|
1840
|
+
if (this.callbacks.length >= 10) {
|
|
1841
|
+
getLogger().warn("DeepgramSTT: maximum of 10 onTranscript callbacks reached; replacing the last callback.");
|
|
1842
|
+
this.callbacks[this.callbacks.length - 1] = callback;
|
|
1843
|
+
return;
|
|
2177
1844
|
}
|
|
1845
|
+
this.callbacks.push(callback);
|
|
2178
1846
|
}
|
|
2179
|
-
|
|
2180
|
-
|
|
2181
|
-
|
|
2182
|
-
|
|
2183
|
-
|
|
2184
|
-
|
|
2185
|
-
|
|
2186
|
-
|
|
2187
|
-
ctx.carryByte = null;
|
|
2188
|
-
} else {
|
|
2189
|
-
buf = audio;
|
|
2190
|
-
}
|
|
2191
|
-
if (buf.length % 2 === 1) {
|
|
2192
|
-
ctx.carryByte = buf[buf.length - 1];
|
|
2193
|
-
buf = buf.subarray(0, buf.length - 1);
|
|
2194
|
-
}
|
|
2195
|
-
if (buf.length === 0 && ctx.leftover.length === 0) {
|
|
2196
|
-
return Buffer.alloc(0);
|
|
2197
|
-
}
|
|
2198
|
-
const sampleCount = buf.length / 2;
|
|
2199
|
-
const samples = ctx.leftover.slice();
|
|
2200
|
-
for (let i2 = 0; i2 < sampleCount; i2++) {
|
|
2201
|
-
samples.push(buf.readInt16LE(i2 * 2));
|
|
2202
|
-
}
|
|
2203
|
-
const out = [];
|
|
2204
|
-
let i = 0;
|
|
2205
|
-
while (i + 2 < samples.length) {
|
|
2206
|
-
out.push(samples[i]);
|
|
2207
|
-
out.push(Math.trunc((samples[i + 1] + samples[i + 2]) / 2));
|
|
2208
|
-
i += 3;
|
|
2209
|
-
}
|
|
2210
|
-
ctx.leftover = samples.slice(i);
|
|
2211
|
-
const buffer = Buffer.alloc(out.length * 2);
|
|
2212
|
-
for (let j = 0; j < out.length; j++) {
|
|
2213
|
-
buffer.writeInt16LE(out[j], j * 2);
|
|
2214
|
-
}
|
|
2215
|
-
return buffer;
|
|
2216
|
-
}
|
|
2217
|
-
/** @deprecated use {@link resampleStreaming} with persistent state. */
|
|
2218
|
-
static resample24kTo16k(audio) {
|
|
2219
|
-
const ctx = { carryByte: null, leftover: [] };
|
|
2220
|
-
const out = _OpenAITTS.resampleStreaming(audio, ctx);
|
|
2221
|
-
if (ctx.leftover.length === 0) return out;
|
|
2222
|
-
const tail = Buffer.alloc(ctx.leftover.length * 2);
|
|
2223
|
-
for (let i = 0; i < ctx.leftover.length; i++) {
|
|
2224
|
-
tail.writeInt16LE(ctx.leftover[i], i * 2);
|
|
1847
|
+
close() {
|
|
1848
|
+
if (this.ws) {
|
|
1849
|
+
try {
|
|
1850
|
+
this.ws.send(JSON.stringify({ type: "CloseStream" }));
|
|
1851
|
+
} catch {
|
|
1852
|
+
}
|
|
1853
|
+
this.ws.close();
|
|
1854
|
+
this.ws = null;
|
|
2225
1855
|
}
|
|
2226
|
-
return Buffer.concat([out, tail]);
|
|
2227
1856
|
}
|
|
2228
1857
|
};
|
|
2229
1858
|
}
|
|
@@ -3133,8 +2762,8 @@ var init_stream_handler = __esm({
|
|
|
3133
2762
|
"use strict";
|
|
3134
2763
|
init_openai_realtime();
|
|
3135
2764
|
init_elevenlabs_convai();
|
|
3136
|
-
|
|
3137
|
-
|
|
2765
|
+
init_deepgram_stt();
|
|
2766
|
+
init_provider_factory();
|
|
3138
2767
|
init_metrics();
|
|
3139
2768
|
init_transcoding();
|
|
3140
2769
|
init_llm_loop();
|
|
@@ -3176,8 +2805,8 @@ var init_stream_handler = __esm({
|
|
|
3176
2805
|
this.caller = caller;
|
|
3177
2806
|
this.callee = callee;
|
|
3178
2807
|
this.history = createHistoryManager(200);
|
|
3179
|
-
const sttProviderName = deps.agent.stt
|
|
3180
|
-
const ttsProviderName = deps.agent.tts
|
|
2808
|
+
const sttProviderName = deps.agent.stt ? deps.agent.stt.constructor?.name ?? "custom" : void 0;
|
|
2809
|
+
const ttsProviderName = deps.agent.tts ? deps.agent.tts.constructor?.name ?? "custom" : void 0;
|
|
3181
2810
|
const providerMode = deps.agent.provider ?? "openai_realtime";
|
|
3182
2811
|
this.metricsAcc = new CallMetricsAccumulator({
|
|
3183
2812
|
callId: "",
|
|
@@ -3367,17 +2996,8 @@ var init_stream_handler = __esm({
|
|
|
3367
2996
|
// ---------------------------------------------------------------------------
|
|
3368
2997
|
async initPipeline(resolvedPrompt) {
|
|
3369
2998
|
const label = this.deps.bridge.label;
|
|
3370
|
-
this.stt = this.deps.bridge.createStt(this.deps.agent);
|
|
3371
|
-
|
|
3372
|
-
if (this.deps.agent.tts.provider === "elevenlabs") {
|
|
3373
|
-
this.tts = new ElevenLabsTTS(this.deps.agent.tts.apiKey, this.deps.agent.tts.voice ?? "21m00Tcm4TlvDq8ikWAM");
|
|
3374
|
-
}
|
|
3375
|
-
if (this.deps.agent.tts.provider === "openai") {
|
|
3376
|
-
this.tts = new OpenAITTS(this.deps.agent.tts.apiKey, this.deps.agent.tts.voice ?? "alloy");
|
|
3377
|
-
}
|
|
3378
|
-
} else if (this.deps.agent.elevenlabsKey) {
|
|
3379
|
-
this.tts = new ElevenLabsTTS(this.deps.agent.elevenlabsKey, this.deps.agent.voice || "rachel");
|
|
3380
|
-
}
|
|
2999
|
+
this.stt = await this.deps.bridge.createStt(this.deps.agent);
|
|
3000
|
+
this.tts = await createTTS(this.deps.agent);
|
|
3381
3001
|
if (!this.stt) {
|
|
3382
3002
|
getLogger().info(`Pipeline mode (${label}): no STT configured`);
|
|
3383
3003
|
}
|
|
@@ -3895,10 +3515,11 @@ var init_stream_handler = __esm({
|
|
|
3895
3515
|
this.maxDurationTimer = null;
|
|
3896
3516
|
}
|
|
3897
3517
|
await this.deps.bridge.queryTelephonyCost(this.metricsAcc, this.callId);
|
|
3898
|
-
|
|
3899
|
-
|
|
3900
|
-
|
|
3901
|
-
|
|
3518
|
+
if (this.stt instanceof DeepgramSTT && this.stt.requestId) {
|
|
3519
|
+
const dgKey = this.stt.apiKey;
|
|
3520
|
+
if (dgKey) {
|
|
3521
|
+
await queryDeepgramCost(this.metricsAcc, dgKey, this.stt.requestId);
|
|
3522
|
+
}
|
|
3902
3523
|
}
|
|
3903
3524
|
const finalMetrics = this.metricsAcc.endCall();
|
|
3904
3525
|
const callEndData = {
|
|
@@ -4000,11 +3621,16 @@ function resolveVariables(template, variables) {
|
|
|
4000
3621
|
return result;
|
|
4001
3622
|
}
|
|
4002
3623
|
function buildAIAdapter(config, agent, resolvedPrompt) {
|
|
3624
|
+
const engine = agent.engine;
|
|
4003
3625
|
if (agent.provider === "elevenlabs_convai") {
|
|
4004
|
-
|
|
3626
|
+
if (!engine || engine.kind !== "elevenlabs_convai") {
|
|
3627
|
+
throw new Error(
|
|
3628
|
+
"ElevenLabs ConvAI mode requires `agent.engine = new ElevenLabsConvAI({...})`."
|
|
3629
|
+
);
|
|
3630
|
+
}
|
|
4005
3631
|
return new ElevenLabsConvAIAdapter(
|
|
4006
|
-
|
|
4007
|
-
|
|
3632
|
+
engine.apiKey,
|
|
3633
|
+
engine.agentId,
|
|
4008
3634
|
agent.voice ?? "21m00Tcm4TlvDq8ikWAM",
|
|
4009
3635
|
"eleven_turbo_v2_5",
|
|
4010
3636
|
agent.language ?? "en",
|
|
@@ -4017,33 +3643,15 @@ function buildAIAdapter(config, agent, resolvedPrompt) {
|
|
|
4017
3643
|
parameters: t.parameters
|
|
4018
3644
|
})) ?? [];
|
|
4019
3645
|
const tools = [...agentTools, TRANSFER_CALL_TOOL, END_CALL_TOOL];
|
|
3646
|
+
const openaiKey = engine && engine.kind === "openai_realtime" ? engine.apiKey : config.openaiKey ?? "";
|
|
4020
3647
|
return new OpenAIRealtimeAdapter(
|
|
4021
|
-
|
|
3648
|
+
openaiKey,
|
|
4022
3649
|
agent.model,
|
|
4023
3650
|
agent.voice,
|
|
4024
3651
|
resolvedPrompt ?? agent.systemPrompt,
|
|
4025
3652
|
tools
|
|
4026
3653
|
);
|
|
4027
3654
|
}
|
|
4028
|
-
function extractDeepgramOptions(options) {
|
|
4029
|
-
if (!options) return {};
|
|
4030
|
-
const get = (snake, camel) => options[snake] ?? options[camel];
|
|
4031
|
-
const out = {};
|
|
4032
|
-
const model = get("model", "model");
|
|
4033
|
-
if (typeof model === "string") out.model = model;
|
|
4034
|
-
const endpointing = get("endpointing_ms", "endpointingMs");
|
|
4035
|
-
if (typeof endpointing === "number") out.endpointingMs = endpointing;
|
|
4036
|
-
const utteranceEnd = get("utterance_end_ms", "utteranceEndMs");
|
|
4037
|
-
if (utteranceEnd === null) out.utteranceEndMs = null;
|
|
4038
|
-
else if (typeof utteranceEnd === "number") out.utteranceEndMs = utteranceEnd;
|
|
4039
|
-
const smart = get("smart_format", "smartFormat");
|
|
4040
|
-
if (typeof smart === "boolean") out.smartFormat = smart;
|
|
4041
|
-
const interim = get("interim_results", "interimResults");
|
|
4042
|
-
if (typeof interim === "boolean") out.interimResults = interim;
|
|
4043
|
-
const vad = get("vad_events", "vadEvents");
|
|
4044
|
-
if (typeof vad === "boolean") out.vadEvents = vad;
|
|
4045
|
-
return out;
|
|
4046
|
-
}
|
|
4047
3655
|
function isValidTelnyxTransferTarget(target) {
|
|
4048
3656
|
if (typeof target !== "string" || !target) return false;
|
|
4049
3657
|
if (/^\+[1-9]\d{6,14}$/.test(target)) return true;
|
|
@@ -4063,8 +3671,7 @@ var init_server = __esm({
|
|
|
4063
3671
|
import_ws5 = require("ws");
|
|
4064
3672
|
init_openai_realtime();
|
|
4065
3673
|
init_elevenlabs_convai();
|
|
4066
|
-
|
|
4067
|
-
init_whisper_stt();
|
|
3674
|
+
init_provider_factory();
|
|
4068
3675
|
init_pricing();
|
|
4069
3676
|
init_store();
|
|
4070
3677
|
init_routes();
|
|
@@ -4149,24 +3756,7 @@ var init_server = __esm({
|
|
|
4149
3756
|
}
|
|
4150
3757
|
}
|
|
4151
3758
|
createStt(agent) {
|
|
4152
|
-
|
|
4153
|
-
if (agent.stt) {
|
|
4154
|
-
if (agent.stt.provider === "deepgram") {
|
|
4155
|
-
const dgOptions = extractDeepgramOptions(agent.stt.options);
|
|
4156
|
-
if (isPipeline) {
|
|
4157
|
-
return new DeepgramSTT(agent.stt.apiKey, agent.stt.language ?? "en", dgOptions.model, "linear16", 16e3, dgOptions);
|
|
4158
|
-
}
|
|
4159
|
-
return DeepgramSTT.forTwilio(agent.stt.apiKey, agent.stt.language ?? "en", dgOptions.model, dgOptions);
|
|
4160
|
-
} else if (agent.stt.provider === "whisper") {
|
|
4161
|
-
return isPipeline ? new WhisperSTT(agent.stt.apiKey, "whisper-1", agent.stt.language ?? "en") : WhisperSTT.forTwilio(agent.stt.apiKey, agent.stt.language ?? "en");
|
|
4162
|
-
}
|
|
4163
|
-
} else if (agent.deepgramKey) {
|
|
4164
|
-
if (isPipeline) {
|
|
4165
|
-
return new DeepgramSTT(agent.deepgramKey, agent.language ?? "en", "nova-3", "linear16", 16e3);
|
|
4166
|
-
}
|
|
4167
|
-
return DeepgramSTT.forTwilio(agent.deepgramKey, agent.language ?? "en");
|
|
4168
|
-
}
|
|
4169
|
-
return null;
|
|
3759
|
+
return createSTT(agent);
|
|
4170
3760
|
}
|
|
4171
3761
|
async queryTelephonyCost(metricsAcc, callId) {
|
|
4172
3762
|
if (this.config.twilioSid && this.config.twilioToken && callId) {
|
|
@@ -4304,24 +3894,7 @@ var init_server = __esm({
|
|
|
4304
3894
|
ws.close();
|
|
4305
3895
|
}
|
|
4306
3896
|
createStt(agent) {
|
|
4307
|
-
|
|
4308
|
-
if (agent.stt.provider === "deepgram") {
|
|
4309
|
-
const dgOptions = extractDeepgramOptions(agent.stt.options);
|
|
4310
|
-
return new DeepgramSTT(
|
|
4311
|
-
agent.stt.apiKey,
|
|
4312
|
-
agent.stt.language ?? "en",
|
|
4313
|
-
dgOptions.model ?? "nova-3",
|
|
4314
|
-
"linear16",
|
|
4315
|
-
16e3,
|
|
4316
|
-
dgOptions
|
|
4317
|
-
);
|
|
4318
|
-
} else if (agent.stt.provider === "whisper") {
|
|
4319
|
-
return new WhisperSTT(agent.stt.apiKey, "whisper-1", agent.stt.language ?? "en");
|
|
4320
|
-
}
|
|
4321
|
-
} else if (agent.deepgramKey) {
|
|
4322
|
-
return new DeepgramSTT(agent.deepgramKey, agent.language ?? "en", "nova-3", "linear16", 16e3);
|
|
4323
|
-
}
|
|
4324
|
-
return null;
|
|
3897
|
+
return createSTT(agent);
|
|
4325
3898
|
}
|
|
4326
3899
|
async queryTelephonyCost(metricsAcc, callId) {
|
|
4327
3900
|
if (this.config.telnyxKey && callId) {
|
|
@@ -5760,6 +5333,94 @@ var init_tunnel = __esm({
|
|
|
5760
5333
|
}
|
|
5761
5334
|
});
|
|
5762
5335
|
|
|
5336
|
+
// src/carrier-config.ts
|
|
5337
|
+
var carrier_config_exports = {};
|
|
5338
|
+
__export(carrier_config_exports, {
|
|
5339
|
+
autoConfigureCarrier: () => autoConfigureCarrier,
|
|
5340
|
+
configureTelnyxNumber: () => configureTelnyxNumber,
|
|
5341
|
+
configureTwilioNumber: () => configureTwilioNumber
|
|
5342
|
+
});
|
|
5343
|
+
async function configureTwilioNumber(accountSid, authToken, phoneNumber, voiceUrl) {
|
|
5344
|
+
const auth = `Basic ${Buffer.from(`${accountSid}:${authToken}`).toString("base64")}`;
|
|
5345
|
+
const listUrl = `${TWILIO_API_BASE}/Accounts/${accountSid}/IncomingPhoneNumbers.json?PhoneNumber=${encodeURIComponent(phoneNumber)}`;
|
|
5346
|
+
const listResp = await fetch(listUrl, {
|
|
5347
|
+
method: "GET",
|
|
5348
|
+
headers: { Authorization: auth }
|
|
5349
|
+
});
|
|
5350
|
+
if (!listResp.ok) {
|
|
5351
|
+
throw new Error(
|
|
5352
|
+
`Twilio IncomingPhoneNumbers.list failed: ${listResp.status} ${await listResp.text()}`
|
|
5353
|
+
);
|
|
5354
|
+
}
|
|
5355
|
+
const body = await listResp.json();
|
|
5356
|
+
const match = body.incoming_phone_numbers?.[0];
|
|
5357
|
+
if (!match) {
|
|
5358
|
+
throw new Error(`Twilio number ${phoneNumber} not found on account ${accountSid}`);
|
|
5359
|
+
}
|
|
5360
|
+
const updateUrl = `${TWILIO_API_BASE}/Accounts/${accountSid}/IncomingPhoneNumbers/${match.sid}.json`;
|
|
5361
|
+
const form = new URLSearchParams({ VoiceUrl: voiceUrl, VoiceMethod: "POST" });
|
|
5362
|
+
const updateResp = await fetch(updateUrl, {
|
|
5363
|
+
method: "POST",
|
|
5364
|
+
headers: {
|
|
5365
|
+
Authorization: auth,
|
|
5366
|
+
"Content-Type": "application/x-www-form-urlencoded"
|
|
5367
|
+
},
|
|
5368
|
+
body: form.toString()
|
|
5369
|
+
});
|
|
5370
|
+
if (!updateResp.ok) {
|
|
5371
|
+
throw new Error(
|
|
5372
|
+
`Twilio IncomingPhoneNumbers.update failed: ${updateResp.status} ${await updateResp.text()}`
|
|
5373
|
+
);
|
|
5374
|
+
}
|
|
5375
|
+
}
|
|
5376
|
+
async function configureTelnyxNumber(apiKey, connectionId, phoneNumber) {
|
|
5377
|
+
const resp = await fetch(`${TELNYX_API_BASE}/phone_numbers/${encodeURIComponent(phoneNumber)}`, {
|
|
5378
|
+
method: "PATCH",
|
|
5379
|
+
headers: {
|
|
5380
|
+
Authorization: `Bearer ${apiKey}`,
|
|
5381
|
+
"Content-Type": "application/json"
|
|
5382
|
+
},
|
|
5383
|
+
body: JSON.stringify({ connection_id: connectionId })
|
|
5384
|
+
});
|
|
5385
|
+
if (!resp.ok) {
|
|
5386
|
+
throw new Error(
|
|
5387
|
+
`Telnyx PATCH /phone_numbers/${phoneNumber} failed: ${resp.status} ${await resp.text()}`
|
|
5388
|
+
);
|
|
5389
|
+
}
|
|
5390
|
+
}
|
|
5391
|
+
async function autoConfigureCarrier(params) {
|
|
5392
|
+
const log2 = getLogger();
|
|
5393
|
+
const provider = params.telephonyProvider ?? (params.twilioSid ? "twilio" : "telnyx");
|
|
5394
|
+
if (provider === "twilio" && params.twilioSid && params.twilioToken) {
|
|
5395
|
+
const voiceUrl = `https://${params.webhookHost}/webhooks/twilio/voice`;
|
|
5396
|
+
try {
|
|
5397
|
+
await configureTwilioNumber(params.twilioSid, params.twilioToken, params.phoneNumber, voiceUrl);
|
|
5398
|
+
log2.info("Twilio webhook set to %s", voiceUrl);
|
|
5399
|
+
} catch (err) {
|
|
5400
|
+
log2.warn("Could not auto-configure Twilio webhook: %s", err instanceof Error ? err.message : String(err));
|
|
5401
|
+
log2.info("Set webhook manually to: %s", voiceUrl);
|
|
5402
|
+
}
|
|
5403
|
+
return;
|
|
5404
|
+
}
|
|
5405
|
+
if (provider === "telnyx" && params.telnyxKey && params.telnyxConnectionId) {
|
|
5406
|
+
try {
|
|
5407
|
+
await configureTelnyxNumber(params.telnyxKey, params.telnyxConnectionId, params.phoneNumber);
|
|
5408
|
+
log2.info("Telnyx number %s associated with connection %s", params.phoneNumber, params.telnyxConnectionId);
|
|
5409
|
+
} catch (err) {
|
|
5410
|
+
log2.warn("Could not auto-configure Telnyx number: %s", err instanceof Error ? err.message : String(err));
|
|
5411
|
+
}
|
|
5412
|
+
}
|
|
5413
|
+
}
|
|
5414
|
+
var TWILIO_API_BASE, TELNYX_API_BASE;
|
|
5415
|
+
var init_carrier_config = __esm({
|
|
5416
|
+
"src/carrier-config.ts"() {
|
|
5417
|
+
"use strict";
|
|
5418
|
+
init_logger();
|
|
5419
|
+
TWILIO_API_BASE = "https://api.twilio.com/2010-04-01";
|
|
5420
|
+
TELNYX_API_BASE = "https://api.telnyx.com/v2";
|
|
5421
|
+
}
|
|
5422
|
+
});
|
|
5423
|
+
|
|
5763
5424
|
// src/test-mode.ts
|
|
5764
5425
|
var test_mode_exports = {};
|
|
5765
5426
|
__export(test_mode_exports, {
|
|
@@ -6874,31 +6535,35 @@ var require_node_cron = __commonJS({
|
|
|
6874
6535
|
var index_exports = {};
|
|
6875
6536
|
__export(index_exports, {
|
|
6876
6537
|
AllProvidersFailedError: () => AllProvidersFailedError,
|
|
6877
|
-
AssemblyAISTT: () =>
|
|
6538
|
+
AssemblyAISTT: () => STT5,
|
|
6878
6539
|
AuthenticationError: () => AuthenticationError,
|
|
6879
6540
|
BackgroundAudioPlayer: () => BackgroundAudioPlayer,
|
|
6880
6541
|
BuiltinAudioClip: () => BuiltinAudioClip,
|
|
6881
6542
|
CallMetricsAccumulator: () => CallMetricsAccumulator,
|
|
6882
|
-
CartesiaSTT: () =>
|
|
6883
|
-
CartesiaTTS: () =>
|
|
6543
|
+
CartesiaSTT: () => STT3,
|
|
6544
|
+
CartesiaTTS: () => TTS3,
|
|
6884
6545
|
ChatContext: () => ChatContext,
|
|
6546
|
+
CloudflareTunnel: () => CloudflareTunnel,
|
|
6885
6547
|
DEFAULT_MIN_SENTENCE_LEN: () => DEFAULT_MIN_SENTENCE_LEN,
|
|
6886
6548
|
DEFAULT_PRICING: () => DEFAULT_PRICING,
|
|
6887
6549
|
DTMF_EVENTS: () => DTMF_EVENTS,
|
|
6888
|
-
DeepgramSTT: () =>
|
|
6550
|
+
DeepgramSTT: () => STT,
|
|
6551
|
+
ElevenLabsConvAI: () => ConvAI,
|
|
6889
6552
|
ElevenLabsConvAIAdapter: () => ElevenLabsConvAIAdapter,
|
|
6890
|
-
ElevenLabsTTS: () =>
|
|
6553
|
+
ElevenLabsTTS: () => TTS,
|
|
6891
6554
|
FallbackLLMProvider: () => FallbackLLMProvider,
|
|
6892
6555
|
GEMINI_DEFAULT_INPUT_SR: () => GEMINI_DEFAULT_INPUT_SR,
|
|
6893
6556
|
GEMINI_DEFAULT_OUTPUT_SR: () => GEMINI_DEFAULT_OUTPUT_SR,
|
|
6894
6557
|
GeminiLiveAdapter: () => GeminiLiveAdapter,
|
|
6558
|
+
Guardrail: () => Guardrail,
|
|
6895
6559
|
IVRActivity: () => IVRActivity,
|
|
6896
6560
|
LLMLoop: () => LLMLoop,
|
|
6897
|
-
LMNTTTS: () =>
|
|
6561
|
+
LMNTTTS: () => TTS5,
|
|
6898
6562
|
MetricsStore: () => MetricsStore,
|
|
6899
6563
|
OpenAILLMProvider: () => OpenAILLMProvider,
|
|
6564
|
+
OpenAIRealtime: () => Realtime,
|
|
6900
6565
|
OpenAIRealtimeAdapter: () => OpenAIRealtimeAdapter,
|
|
6901
|
-
OpenAITTS: () =>
|
|
6566
|
+
OpenAITTS: () => TTS2,
|
|
6902
6567
|
PartialStreamError: () => PartialStreamError,
|
|
6903
6568
|
Patter: () => Patter,
|
|
6904
6569
|
PatterConnectionError: () => PatterConnectionError,
|
|
@@ -6906,15 +6571,19 @@ __export(index_exports, {
|
|
|
6906
6571
|
PipelineHookExecutor: () => PipelineHookExecutor,
|
|
6907
6572
|
ProvisionError: () => ProvisionError,
|
|
6908
6573
|
RemoteMessageHandler: () => RemoteMessageHandler,
|
|
6909
|
-
RimeTTS: () =>
|
|
6574
|
+
RimeTTS: () => TTS4,
|
|
6910
6575
|
SentenceChunker: () => SentenceChunker,
|
|
6911
|
-
SonioxSTT: () =>
|
|
6576
|
+
SonioxSTT: () => STT4,
|
|
6577
|
+
StaticTunnel: () => Static,
|
|
6578
|
+
Telnyx: () => Carrier2,
|
|
6912
6579
|
TestSession: () => TestSession,
|
|
6913
6580
|
TfidfLoopDetector: () => TfidfLoopDetector,
|
|
6581
|
+
Tool: () => Tool,
|
|
6582
|
+
Twilio: () => Carrier,
|
|
6914
6583
|
ULTRAVOX_DEFAULT_API_BASE: () => ULTRAVOX_DEFAULT_API_BASE,
|
|
6915
6584
|
ULTRAVOX_DEFAULT_SR: () => ULTRAVOX_DEFAULT_SR,
|
|
6916
6585
|
UltravoxRealtimeAdapter: () => UltravoxRealtimeAdapter,
|
|
6917
|
-
WhisperSTT: () =>
|
|
6586
|
+
WhisperSTT: () => STT2,
|
|
6918
6587
|
builtinClipPath: () => builtinClipPath,
|
|
6919
6588
|
calculateRealtimeCost: () => calculateRealtimeCost,
|
|
6920
6589
|
calculateSttCost: () => calculateSttCost,
|
|
@@ -6930,6 +6599,7 @@ __export(index_exports, {
|
|
|
6930
6599
|
filterMarkdown: () => filterMarkdown,
|
|
6931
6600
|
formatDtmf: () => formatDtmf,
|
|
6932
6601
|
getLogger: () => getLogger,
|
|
6602
|
+
guardrail: () => guardrail,
|
|
6933
6603
|
isRemoteUrl: () => isRemoteUrl,
|
|
6934
6604
|
isWebSocketUrl: () => isWebSocketUrl,
|
|
6935
6605
|
makeAuthMiddleware: () => makeAuthMiddleware,
|
|
@@ -6951,6 +6621,7 @@ __export(index_exports, {
|
|
|
6951
6621
|
selectSoundFromList: () => selectSoundFromList,
|
|
6952
6622
|
setLogger: () => setLogger,
|
|
6953
6623
|
startTunnel: () => startTunnel,
|
|
6624
|
+
tool: () => tool,
|
|
6954
6625
|
whisper: () => whisper
|
|
6955
6626
|
});
|
|
6956
6627
|
module.exports = __toCommonJS(index_exports);
|
|
@@ -7096,77 +6767,69 @@ var PatterConnection = class {
|
|
|
7096
6767
|
}
|
|
7097
6768
|
};
|
|
7098
6769
|
|
|
7099
|
-
// src/
|
|
7100
|
-
|
|
7101
|
-
|
|
6770
|
+
// src/client.ts
|
|
6771
|
+
init_server();
|
|
6772
|
+
|
|
6773
|
+
// src/engines/openai.ts
|
|
6774
|
+
var Realtime = class {
|
|
6775
|
+
kind = "openai_realtime";
|
|
7102
6776
|
apiKey;
|
|
7103
|
-
|
|
7104
|
-
|
|
7105
|
-
constructor(
|
|
7106
|
-
|
|
7107
|
-
|
|
7108
|
-
|
|
7109
|
-
|
|
7110
|
-
|
|
7111
|
-
|
|
7112
|
-
|
|
7113
|
-
|
|
7114
|
-
|
|
7115
|
-
language: this.language
|
|
7116
|
-
};
|
|
7117
|
-
if (this.options) out.options = { ...this.options };
|
|
7118
|
-
return out;
|
|
6777
|
+
model;
|
|
6778
|
+
voice;
|
|
6779
|
+
constructor(opts = {}) {
|
|
6780
|
+
const key = opts.apiKey ?? process.env.OPENAI_API_KEY;
|
|
6781
|
+
if (!key) {
|
|
6782
|
+
throw new Error(
|
|
6783
|
+
"OpenAI Realtime requires an apiKey. Pass { apiKey: 'sk-...' } or set OPENAI_API_KEY in the environment."
|
|
6784
|
+
);
|
|
6785
|
+
}
|
|
6786
|
+
this.apiKey = key;
|
|
6787
|
+
this.model = opts.model ?? "gpt-4o-mini-realtime-preview";
|
|
6788
|
+
this.voice = opts.voice ?? "alloy";
|
|
7119
6789
|
}
|
|
7120
6790
|
};
|
|
7121
|
-
|
|
7122
|
-
|
|
6791
|
+
|
|
6792
|
+
// src/engines/elevenlabs.ts
|
|
6793
|
+
var ConvAI = class {
|
|
6794
|
+
kind = "elevenlabs_convai";
|
|
7123
6795
|
apiKey;
|
|
6796
|
+
agentId;
|
|
7124
6797
|
voice;
|
|
7125
|
-
constructor(
|
|
7126
|
-
|
|
7127
|
-
|
|
7128
|
-
|
|
6798
|
+
constructor(opts = {}) {
|
|
6799
|
+
const key = opts.apiKey ?? process.env.ELEVENLABS_API_KEY;
|
|
6800
|
+
const agent = opts.agentId ?? process.env.ELEVENLABS_AGENT_ID;
|
|
6801
|
+
if (!key) {
|
|
6802
|
+
throw new Error(
|
|
6803
|
+
"ElevenLabs ConvAI requires an apiKey. Pass { apiKey: '...' } or set ELEVENLABS_API_KEY in the environment."
|
|
6804
|
+
);
|
|
6805
|
+
}
|
|
6806
|
+
if (!agent) {
|
|
6807
|
+
throw new Error(
|
|
6808
|
+
"ElevenLabs ConvAI requires an agentId. Pass { agentId: 'agent_...' } or set ELEVENLABS_AGENT_ID in the environment."
|
|
6809
|
+
);
|
|
6810
|
+
}
|
|
6811
|
+
this.apiKey = key;
|
|
6812
|
+
this.agentId = agent;
|
|
6813
|
+
this.voice = opts.voice;
|
|
7129
6814
|
}
|
|
7130
|
-
|
|
7131
|
-
|
|
6815
|
+
};
|
|
6816
|
+
|
|
6817
|
+
// src/tunnels/index.ts
|
|
6818
|
+
var CloudflareTunnel = class {
|
|
6819
|
+
kind = "cloudflare";
|
|
6820
|
+
};
|
|
6821
|
+
var Static = class {
|
|
6822
|
+
kind = "static";
|
|
6823
|
+
hostname;
|
|
6824
|
+
constructor(opts) {
|
|
6825
|
+
if (!opts.hostname) {
|
|
6826
|
+
throw new Error("Static tunnel requires a non-empty hostname.");
|
|
6827
|
+
}
|
|
6828
|
+
this.hostname = opts.hostname;
|
|
7132
6829
|
}
|
|
7133
6830
|
};
|
|
7134
|
-
function deepgram(opts) {
|
|
7135
|
-
const options = {
|
|
7136
|
-
model: opts.model ?? "nova-3",
|
|
7137
|
-
endpointing_ms: opts.endpointingMs ?? 150,
|
|
7138
|
-
utterance_end_ms: opts.utteranceEndMs === null ? null : opts.utteranceEndMs ?? 1e3,
|
|
7139
|
-
smart_format: opts.smartFormat ?? true,
|
|
7140
|
-
interim_results: opts.interimResults ?? true
|
|
7141
|
-
};
|
|
7142
|
-
if (opts.vadEvents !== void 0) options.vad_events = opts.vadEvents;
|
|
7143
|
-
return new STTConfigImpl("deepgram", opts.apiKey, opts.language ?? "en", options);
|
|
7144
|
-
}
|
|
7145
|
-
function whisper(opts) {
|
|
7146
|
-
return new STTConfigImpl("whisper", opts.apiKey, opts.language ?? "en");
|
|
7147
|
-
}
|
|
7148
|
-
function elevenlabs(opts) {
|
|
7149
|
-
return new TTSConfigImpl("elevenlabs", opts.apiKey, opts.voice ?? "rachel");
|
|
7150
|
-
}
|
|
7151
|
-
function openaiTts(opts) {
|
|
7152
|
-
return new TTSConfigImpl("openai", opts.apiKey, opts.voice ?? "alloy");
|
|
7153
|
-
}
|
|
7154
|
-
function cartesia(opts) {
|
|
7155
|
-
return new TTSConfigImpl(
|
|
7156
|
-
"cartesia",
|
|
7157
|
-
opts.apiKey,
|
|
7158
|
-
opts.voice ?? "f786b574-daa5-4673-aa0c-cbe3e8534c02"
|
|
7159
|
-
);
|
|
7160
|
-
}
|
|
7161
|
-
function rime(opts) {
|
|
7162
|
-
return new TTSConfigImpl("rime", opts.apiKey, opts.voice ?? "astra");
|
|
7163
|
-
}
|
|
7164
|
-
function lmnt(opts) {
|
|
7165
|
-
return new TTSConfigImpl("lmnt", opts.apiKey, opts.voice ?? "leah");
|
|
7166
|
-
}
|
|
7167
6831
|
|
|
7168
6832
|
// src/client.ts
|
|
7169
|
-
init_server();
|
|
7170
6833
|
var DEFAULT_BACKEND_URL2 = "wss://api.getpatter.com";
|
|
7171
6834
|
var DEFAULT_REST_URL = "https://api.getpatter.com";
|
|
7172
6835
|
function sttConfigToDict(cfg) {
|
|
@@ -7197,21 +6860,39 @@ var Patter = class {
|
|
|
7197
6860
|
embeddedServer = null;
|
|
7198
6861
|
tunnelHandle = null;
|
|
7199
6862
|
constructor(options) {
|
|
7200
|
-
const
|
|
6863
|
+
const hasCarrier = "carrier" in options && options.carrier !== void 0;
|
|
6864
|
+
const isLocal = "mode" in options && options.mode === "local" || hasCarrier;
|
|
7201
6865
|
if (isLocal) {
|
|
7202
6866
|
const local = options;
|
|
7203
6867
|
if (!local.phoneNumber) {
|
|
7204
6868
|
throw new Error("Local mode requires phoneNumber");
|
|
7205
6869
|
}
|
|
7206
|
-
if (!local.
|
|
7207
|
-
throw new Error(
|
|
6870
|
+
if (!local.carrier) {
|
|
6871
|
+
throw new Error(
|
|
6872
|
+
"Local mode requires a `carrier` instance. Pass `carrier: new Twilio({...})` or `carrier: new Telnyx({...})`."
|
|
6873
|
+
);
|
|
7208
6874
|
}
|
|
7209
|
-
|
|
7210
|
-
|
|
6875
|
+
const carrier = local.carrier;
|
|
6876
|
+
const tunnel = local.tunnel;
|
|
6877
|
+
let tunnelWebhookUrl;
|
|
6878
|
+
if (tunnel instanceof Static) {
|
|
6879
|
+
if (local.webhookUrl) {
|
|
6880
|
+
throw new Error(
|
|
6881
|
+
"Cannot use both `tunnel: new StaticTunnel(...)` and `webhookUrl`. Pick one."
|
|
6882
|
+
);
|
|
6883
|
+
}
|
|
6884
|
+
tunnelWebhookUrl = tunnel.hostname;
|
|
7211
6885
|
}
|
|
7212
6886
|
this.mode = "local";
|
|
7213
|
-
const
|
|
7214
|
-
|
|
6887
|
+
const rawWebhook = tunnelWebhookUrl ?? local.webhookUrl;
|
|
6888
|
+
const normalizedWebhook = rawWebhook ? rawWebhook.replace(/^https?:\/\//, "").replace(/\/$/, "") : void 0;
|
|
6889
|
+
this.localConfig = {
|
|
6890
|
+
carrier,
|
|
6891
|
+
phoneNumber: local.phoneNumber,
|
|
6892
|
+
webhookUrl: normalizedWebhook,
|
|
6893
|
+
tunnel: local.tunnel,
|
|
6894
|
+
openaiKey: local.openaiKey
|
|
6895
|
+
};
|
|
7215
6896
|
this.apiKey = "";
|
|
7216
6897
|
this.backendUrl = DEFAULT_BACKEND_URL2;
|
|
7217
6898
|
this.restUrl = DEFAULT_REST_URL;
|
|
@@ -7228,25 +6909,55 @@ var Patter = class {
|
|
|
7228
6909
|
}
|
|
7229
6910
|
// === Local mode ===
|
|
7230
6911
|
agent(opts) {
|
|
7231
|
-
|
|
6912
|
+
let working = { ...opts };
|
|
6913
|
+
if (opts.engine) {
|
|
6914
|
+
if (opts.provider) {
|
|
6915
|
+
throw new Error(
|
|
6916
|
+
"Cannot pass both `engine:` and `provider:`. Use one (engine is preferred)."
|
|
6917
|
+
);
|
|
6918
|
+
}
|
|
6919
|
+
const engine = opts.engine;
|
|
6920
|
+
if (engine instanceof Realtime) {
|
|
6921
|
+
working = {
|
|
6922
|
+
...working,
|
|
6923
|
+
provider: "openai_realtime",
|
|
6924
|
+
model: working.model ?? engine.model,
|
|
6925
|
+
voice: working.voice ?? engine.voice
|
|
6926
|
+
};
|
|
6927
|
+
if (this.localConfig && !this.localConfig.openaiKey) {
|
|
6928
|
+
this.localConfig = { ...this.localConfig, openaiKey: engine.apiKey };
|
|
6929
|
+
}
|
|
6930
|
+
} else if (engine instanceof ConvAI) {
|
|
6931
|
+
working = {
|
|
6932
|
+
...working,
|
|
6933
|
+
provider: "elevenlabs_convai",
|
|
6934
|
+
voice: working.voice ?? engine.voice
|
|
6935
|
+
};
|
|
6936
|
+
} else {
|
|
6937
|
+
throw new Error(
|
|
6938
|
+
"Unknown engine. Expected OpenAIRealtime or ElevenLabsConvAI instance."
|
|
6939
|
+
);
|
|
6940
|
+
}
|
|
6941
|
+
}
|
|
6942
|
+
if (working.provider) {
|
|
7232
6943
|
const valid = ["openai_realtime", "elevenlabs_convai", "pipeline"];
|
|
7233
|
-
if (!valid.includes(
|
|
7234
|
-
throw new Error(`provider must be one of: ${valid.join(", ")}. Got: '${
|
|
6944
|
+
if (!valid.includes(working.provider)) {
|
|
6945
|
+
throw new Error(`provider must be one of: ${valid.join(", ")}. Got: '${working.provider}'`);
|
|
7235
6946
|
}
|
|
7236
6947
|
}
|
|
7237
|
-
if (
|
|
7238
|
-
if (!Array.isArray(
|
|
6948
|
+
if (working.tools) {
|
|
6949
|
+
if (!Array.isArray(working.tools)) {
|
|
7239
6950
|
throw new TypeError("tools must be an array");
|
|
7240
6951
|
}
|
|
7241
|
-
|
|
7242
|
-
if (!
|
|
7243
|
-
if (!
|
|
6952
|
+
working.tools.forEach((tool2, i) => {
|
|
6953
|
+
if (!tool2.name) throw new Error(`tools[${i}] missing required 'name' field`);
|
|
6954
|
+
if (!tool2.webhookUrl && !tool2.handler) throw new Error(`tools[${i}] requires either 'webhookUrl' or 'handler'`);
|
|
7244
6955
|
});
|
|
7245
6956
|
}
|
|
7246
|
-
if (
|
|
6957
|
+
if (working.variables !== void 0 && (typeof working.variables !== "object" || Array.isArray(working.variables))) {
|
|
7247
6958
|
throw new TypeError("variables must be an object");
|
|
7248
6959
|
}
|
|
7249
|
-
return
|
|
6960
|
+
return working;
|
|
7250
6961
|
}
|
|
7251
6962
|
async serve(opts) {
|
|
7252
6963
|
if (this.mode !== "local" || !this.localConfig) {
|
|
@@ -7269,10 +6980,14 @@ var Patter = class {
|
|
|
7269
6980
|
}
|
|
7270
6981
|
let webhookUrl = this.localConfig.webhookUrl ?? "";
|
|
7271
6982
|
const port = opts.port ?? 8e3;
|
|
7272
|
-
|
|
6983
|
+
const ctorTunnel = this.localConfig.tunnel;
|
|
6984
|
+
const wantsCloudflaredFromServe = opts.tunnel === true;
|
|
6985
|
+
const wantsCloudflaredFromCtor = ctorTunnel === true || ctorTunnel instanceof CloudflareTunnel;
|
|
6986
|
+
const wantsCloudflared = wantsCloudflaredFromServe || wantsCloudflaredFromCtor;
|
|
6987
|
+
if (wantsCloudflared && webhookUrl) {
|
|
7273
6988
|
throw new Error("Cannot use both tunnel: true and webhookUrl. Pick one.");
|
|
7274
6989
|
}
|
|
7275
|
-
if (
|
|
6990
|
+
if (wantsCloudflared) {
|
|
7276
6991
|
const { startTunnel: startTunnel2 } = await Promise.resolve().then(() => (init_tunnel(), tunnel_exports));
|
|
7277
6992
|
this.tunnelHandle = await startTunnel2(port);
|
|
7278
6993
|
webhookUrl = this.tunnelHandle.hostname;
|
|
@@ -7282,17 +6997,29 @@ var Patter = class {
|
|
|
7282
6997
|
"No webhookUrl configured. Either:\n - Pass webhookUrl in the Patter constructor\n - Use tunnel: true in serve() to auto-create a tunnel"
|
|
7283
6998
|
);
|
|
7284
6999
|
}
|
|
7000
|
+
const carrier = this.localConfig.carrier;
|
|
7001
|
+
const telephonyProvider = carrier.kind === "twilio" ? "twilio" : "telnyx";
|
|
7002
|
+
const { autoConfigureCarrier: autoConfigureCarrier2 } = await Promise.resolve().then(() => (init_carrier_config(), carrier_config_exports));
|
|
7003
|
+
await autoConfigureCarrier2({
|
|
7004
|
+
telephonyProvider,
|
|
7005
|
+
twilioSid: carrier.kind === "twilio" ? carrier.accountSid : void 0,
|
|
7006
|
+
twilioToken: carrier.kind === "twilio" ? carrier.authToken : void 0,
|
|
7007
|
+
telnyxKey: carrier.kind === "telnyx" ? carrier.apiKey : void 0,
|
|
7008
|
+
telnyxConnectionId: carrier.kind === "telnyx" ? carrier.connectionId : void 0,
|
|
7009
|
+
phoneNumber: this.localConfig.phoneNumber,
|
|
7010
|
+
webhookHost: webhookUrl
|
|
7011
|
+
});
|
|
7285
7012
|
this.embeddedServer = new EmbeddedServer(
|
|
7286
7013
|
{
|
|
7287
|
-
twilioSid:
|
|
7288
|
-
twilioToken:
|
|
7014
|
+
twilioSid: carrier.kind === "twilio" ? carrier.accountSid : void 0,
|
|
7015
|
+
twilioToken: carrier.kind === "twilio" ? carrier.authToken : void 0,
|
|
7289
7016
|
openaiKey: this.localConfig.openaiKey,
|
|
7290
7017
|
phoneNumber: this.localConfig.phoneNumber,
|
|
7291
7018
|
webhookUrl,
|
|
7292
|
-
telephonyProvider
|
|
7293
|
-
telnyxKey:
|
|
7294
|
-
telnyxConnectionId:
|
|
7295
|
-
telnyxPublicKey:
|
|
7019
|
+
telephonyProvider,
|
|
7020
|
+
telnyxKey: carrier.kind === "telnyx" ? carrier.apiKey : void 0,
|
|
7021
|
+
telnyxConnectionId: carrier.kind === "telnyx" ? carrier.connectionId : void 0,
|
|
7022
|
+
telnyxPublicKey: carrier.kind === "telnyx" ? carrier.publicKey : void 0
|
|
7296
7023
|
},
|
|
7297
7024
|
opts.agent,
|
|
7298
7025
|
opts.onCallStart,
|
|
@@ -7353,10 +7080,10 @@ var Patter = class {
|
|
|
7353
7080
|
if (!this.localConfig) {
|
|
7354
7081
|
throw new Error("local config missing");
|
|
7355
7082
|
}
|
|
7356
|
-
const { phoneNumber, webhookUrl,
|
|
7357
|
-
if (
|
|
7358
|
-
const telnyxKey =
|
|
7359
|
-
const connectionId =
|
|
7083
|
+
const { phoneNumber, webhookUrl, carrier } = this.localConfig;
|
|
7084
|
+
if (carrier.kind === "telnyx") {
|
|
7085
|
+
const telnyxKey = carrier.apiKey;
|
|
7086
|
+
const connectionId = carrier.connectionId;
|
|
7360
7087
|
const streamUrl = `wss://${webhookUrl}/ws/stream/${encodeURIComponent(localOpts.to)}?caller=${encodeURIComponent(phoneNumber)}&callee=${encodeURIComponent(localOpts.to)}`;
|
|
7361
7088
|
const telnyxPayload = {
|
|
7362
7089
|
connection_id: connectionId,
|
|
@@ -7396,8 +7123,8 @@ var Patter = class {
|
|
|
7396
7123
|
}
|
|
7397
7124
|
return;
|
|
7398
7125
|
}
|
|
7399
|
-
const twilioSid =
|
|
7400
|
-
const twilioToken =
|
|
7126
|
+
const twilioSid = carrier.accountSid;
|
|
7127
|
+
const twilioToken = carrier.authToken;
|
|
7401
7128
|
const statusCallbackUrl = `https://${webhookUrl}/webhooks/twilio/status`;
|
|
7402
7129
|
const url = `https://api.twilio.com/2010-04-01/Accounts/${twilioSid}/Calls.json`;
|
|
7403
7130
|
const params = new URLSearchParams({
|
|
@@ -7529,65 +7256,6 @@ var Patter = class {
|
|
|
7529
7256
|
const data = await response.json();
|
|
7530
7257
|
return data.map((c) => ({ id: c.id, direction: c.direction, caller: c.caller, callee: c.callee, startedAt: c.started_at, endedAt: c.ended_at, durationSeconds: c.duration_seconds, status: c.status, transcript: c.transcript }));
|
|
7531
7258
|
}
|
|
7532
|
-
// Provider helpers — mirror the Python classmethod factories so callers can
|
|
7533
|
-
// write ``Patter.deepgram({ apiKey })`` without importing the top-level.
|
|
7534
|
-
static deepgram = deepgram;
|
|
7535
|
-
static whisper = whisper;
|
|
7536
|
-
static elevenlabs = elevenlabs;
|
|
7537
|
-
static openaiTts = openaiTts;
|
|
7538
|
-
static cartesia = cartesia;
|
|
7539
|
-
static rime = rime;
|
|
7540
|
-
static lmnt = lmnt;
|
|
7541
|
-
static guardrail(opts) {
|
|
7542
|
-
return {
|
|
7543
|
-
name: opts.name,
|
|
7544
|
-
blockedTerms: opts.blockedTerms,
|
|
7545
|
-
check: opts.check,
|
|
7546
|
-
replacement: opts.replacement ?? "I'm sorry, I can't respond to that."
|
|
7547
|
-
};
|
|
7548
|
-
}
|
|
7549
|
-
/**
|
|
7550
|
-
* Create a tool definition for use with `agent({ tools: [...] })`.
|
|
7551
|
-
*
|
|
7552
|
-
* Either `handler` (a function) or `webhookUrl` must be provided.
|
|
7553
|
-
*
|
|
7554
|
-
* @param opts.name - Tool name (visible to the LLM).
|
|
7555
|
-
* @param opts.description - What the tool does (visible to the LLM).
|
|
7556
|
-
* @param opts.parameters - JSON Schema for tool arguments.
|
|
7557
|
-
* @param opts.handler - Async function called in-process when the LLM invokes the tool.
|
|
7558
|
-
* @param opts.webhookUrl - URL to POST to when the LLM invokes the tool.
|
|
7559
|
-
*
|
|
7560
|
-
* @example
|
|
7561
|
-
* ```ts
|
|
7562
|
-
* phone.agent({
|
|
7563
|
-
* systemPrompt: 'You are a pizza bot.',
|
|
7564
|
-
* tools: [
|
|
7565
|
-
* Patter.tool({
|
|
7566
|
-
* name: 'check_menu',
|
|
7567
|
-
* description: 'Check available menu items',
|
|
7568
|
-
* handler: async (args) => JSON.stringify({ items: ['margherita'] }),
|
|
7569
|
-
* }),
|
|
7570
|
-
* ],
|
|
7571
|
-
* });
|
|
7572
|
-
* ```
|
|
7573
|
-
*/
|
|
7574
|
-
static tool(opts) {
|
|
7575
|
-
if (!opts.handler && !opts.webhookUrl) {
|
|
7576
|
-
throw new Error("tool() requires either handler or webhookUrl");
|
|
7577
|
-
}
|
|
7578
|
-
const t = {
|
|
7579
|
-
name: opts.name,
|
|
7580
|
-
description: opts.description ?? "",
|
|
7581
|
-
parameters: opts.parameters ?? { type: "object", properties: {} }
|
|
7582
|
-
};
|
|
7583
|
-
if (opts.handler) {
|
|
7584
|
-
t.handler = opts.handler;
|
|
7585
|
-
}
|
|
7586
|
-
if (opts.webhookUrl) {
|
|
7587
|
-
t.webhookUrl = opts.webhookUrl;
|
|
7588
|
-
}
|
|
7589
|
-
return t;
|
|
7590
|
-
}
|
|
7591
7259
|
// Internal
|
|
7592
7260
|
async registerNumber(provider, providerKey, number, providerSecret, country = "US", stt, tts) {
|
|
7593
7261
|
const credentials = { api_key: providerKey };
|
|
@@ -7682,8 +7350,64 @@ function filterForTTS(text) {
|
|
|
7682
7350
|
return filterEmoji(filterMarkdown(text));
|
|
7683
7351
|
}
|
|
7684
7352
|
|
|
7685
|
-
// src/
|
|
7686
|
-
|
|
7353
|
+
// src/providers.ts
|
|
7354
|
+
var STTConfigImpl = class {
|
|
7355
|
+
provider;
|
|
7356
|
+
apiKey;
|
|
7357
|
+
language;
|
|
7358
|
+
options;
|
|
7359
|
+
constructor(provider, apiKey, language = "en", options) {
|
|
7360
|
+
this.provider = provider;
|
|
7361
|
+
this.apiKey = apiKey;
|
|
7362
|
+
this.language = language;
|
|
7363
|
+
if (options) this.options = options;
|
|
7364
|
+
}
|
|
7365
|
+
toDict() {
|
|
7366
|
+
const out = {
|
|
7367
|
+
provider: this.provider,
|
|
7368
|
+
api_key: this.apiKey,
|
|
7369
|
+
language: this.language
|
|
7370
|
+
};
|
|
7371
|
+
if (this.options) out.options = { ...this.options };
|
|
7372
|
+
return out;
|
|
7373
|
+
}
|
|
7374
|
+
};
|
|
7375
|
+
var TTSConfigImpl = class {
|
|
7376
|
+
provider;
|
|
7377
|
+
apiKey;
|
|
7378
|
+
voice;
|
|
7379
|
+
constructor(provider, apiKey, voice = "alloy") {
|
|
7380
|
+
this.provider = provider;
|
|
7381
|
+
this.apiKey = apiKey;
|
|
7382
|
+
this.voice = voice;
|
|
7383
|
+
}
|
|
7384
|
+
toDict() {
|
|
7385
|
+
return { provider: this.provider, api_key: this.apiKey, voice: this.voice };
|
|
7386
|
+
}
|
|
7387
|
+
};
|
|
7388
|
+
function deepgram(opts) {
|
|
7389
|
+
const options = {
|
|
7390
|
+
model: opts.model ?? "nova-3",
|
|
7391
|
+
endpointing_ms: opts.endpointingMs ?? 150,
|
|
7392
|
+
utterance_end_ms: opts.utteranceEndMs === null ? null : opts.utteranceEndMs ?? 1e3,
|
|
7393
|
+
smart_format: opts.smartFormat ?? true,
|
|
7394
|
+
interim_results: opts.interimResults ?? true
|
|
7395
|
+
};
|
|
7396
|
+
if (opts.vadEvents !== void 0) options.vad_events = opts.vadEvents;
|
|
7397
|
+
return new STTConfigImpl("deepgram", opts.apiKey, opts.language ?? "en", options);
|
|
7398
|
+
}
|
|
7399
|
+
function whisper(opts) {
|
|
7400
|
+
return new STTConfigImpl("whisper", opts.apiKey, opts.language ?? "en");
|
|
7401
|
+
}
|
|
7402
|
+
function elevenlabs(opts) {
|
|
7403
|
+
return new TTSConfigImpl("elevenlabs", opts.apiKey, opts.voice ?? "rachel");
|
|
7404
|
+
}
|
|
7405
|
+
function openaiTts(opts) {
|
|
7406
|
+
return new TTSConfigImpl("openai", opts.apiKey, opts.voice ?? "alloy");
|
|
7407
|
+
}
|
|
7408
|
+
|
|
7409
|
+
// src/index.ts
|
|
7410
|
+
init_pricing();
|
|
7687
7411
|
init_metrics();
|
|
7688
7412
|
init_store();
|
|
7689
7413
|
init_auth();
|
|
@@ -8372,166 +8096,480 @@ function scheduleInterval(intervalOrOpts, callback) {
|
|
|
8372
8096
|
};
|
|
8373
8097
|
}
|
|
8374
8098
|
|
|
8375
|
-
// src/
|
|
8099
|
+
// src/stt/deepgram.ts
|
|
8376
8100
|
init_deepgram_stt();
|
|
8377
|
-
|
|
8378
|
-
|
|
8379
|
-
|
|
8380
|
-
|
|
8381
|
-
|
|
8382
|
-
|
|
8383
|
-
|
|
8384
|
-
var FINALIZED_TOKEN = "<fin>";
|
|
8385
|
-
var KEEPALIVE_INTERVAL_MS = 5e3;
|
|
8386
|
-
function isEndToken(token) {
|
|
8387
|
-
return token.text === END_TOKEN || token.text === FINALIZED_TOKEN;
|
|
8388
|
-
}
|
|
8389
|
-
var TokenAccumulator = class {
|
|
8390
|
-
text = "";
|
|
8391
|
-
confSum = 0;
|
|
8392
|
-
confCount = 0;
|
|
8393
|
-
update(token) {
|
|
8394
|
-
if (token.text) {
|
|
8395
|
-
this.text += token.text;
|
|
8396
|
-
}
|
|
8397
|
-
if (typeof token.confidence === "number") {
|
|
8398
|
-
this.confSum += token.confidence;
|
|
8399
|
-
this.confCount += 1;
|
|
8101
|
+
var STT = class extends DeepgramSTT {
|
|
8102
|
+
constructor(opts = {}) {
|
|
8103
|
+
const key = opts.apiKey ?? process.env.DEEPGRAM_API_KEY;
|
|
8104
|
+
if (!key) {
|
|
8105
|
+
throw new Error(
|
|
8106
|
+
"Deepgram STT requires an apiKey. Pass { apiKey: 'dg_...' } or set DEEPGRAM_API_KEY in the environment."
|
|
8107
|
+
);
|
|
8400
8108
|
}
|
|
8401
|
-
|
|
8402
|
-
|
|
8403
|
-
|
|
8404
|
-
|
|
8405
|
-
|
|
8406
|
-
|
|
8407
|
-
|
|
8408
|
-
|
|
8409
|
-
|
|
8410
|
-
|
|
8411
|
-
|
|
8109
|
+
super(
|
|
8110
|
+
key,
|
|
8111
|
+
opts.language ?? "en",
|
|
8112
|
+
opts.model ?? "nova-3",
|
|
8113
|
+
opts.encoding ?? "linear16",
|
|
8114
|
+
opts.sampleRate ?? 16e3,
|
|
8115
|
+
{
|
|
8116
|
+
endpointingMs: opts.endpointingMs ?? 150,
|
|
8117
|
+
utteranceEndMs: opts.utteranceEndMs === null ? null : opts.utteranceEndMs ?? 1e3,
|
|
8118
|
+
smartFormat: opts.smartFormat ?? true,
|
|
8119
|
+
interimResults: opts.interimResults ?? true,
|
|
8120
|
+
...opts.vadEvents !== void 0 ? { vadEvents: opts.vadEvents } : {}
|
|
8121
|
+
}
|
|
8122
|
+
);
|
|
8412
8123
|
}
|
|
8413
8124
|
};
|
|
8414
|
-
|
|
8415
|
-
|
|
8416
|
-
|
|
8417
|
-
|
|
8418
|
-
|
|
8125
|
+
|
|
8126
|
+
// src/providers/whisper-stt.ts
|
|
8127
|
+
init_logger();
|
|
8128
|
+
var OPENAI_TRANSCRIPTION_URL = "https://api.openai.com/v1/audio/transcriptions";
|
|
8129
|
+
var DEFAULT_BUFFER_SIZE = 16e3 * 2;
|
|
8130
|
+
function wrapPcmInWav(pcm, sampleRate = 16e3, channels = 1, bitsPerSample = 16) {
|
|
8131
|
+
const dataSize = pcm.length;
|
|
8132
|
+
const header = Buffer.alloc(44);
|
|
8133
|
+
header.write("RIFF", 0);
|
|
8134
|
+
header.writeUInt32LE(36 + dataSize, 4);
|
|
8135
|
+
header.write("WAVE", 8);
|
|
8136
|
+
header.write("fmt ", 12);
|
|
8137
|
+
header.writeUInt32LE(16, 16);
|
|
8138
|
+
header.writeUInt16LE(1, 20);
|
|
8139
|
+
header.writeUInt16LE(channels, 22);
|
|
8140
|
+
header.writeUInt32LE(sampleRate, 24);
|
|
8141
|
+
header.writeUInt32LE(sampleRate * channels * (bitsPerSample / 8), 28);
|
|
8142
|
+
header.writeUInt16LE(channels * (bitsPerSample / 8), 32);
|
|
8143
|
+
header.writeUInt16LE(bitsPerSample, 34);
|
|
8144
|
+
header.write("data", 36);
|
|
8145
|
+
header.writeUInt32LE(dataSize, 40);
|
|
8146
|
+
return Buffer.concat([header, pcm]);
|
|
8147
|
+
}
|
|
8148
|
+
var WhisperSTT = class _WhisperSTT {
|
|
8419
8149
|
apiKey;
|
|
8420
8150
|
model;
|
|
8421
|
-
|
|
8422
|
-
|
|
8423
|
-
|
|
8424
|
-
|
|
8425
|
-
|
|
8426
|
-
|
|
8427
|
-
|
|
8428
|
-
clientReferenceId;
|
|
8429
|
-
baseUrl;
|
|
8430
|
-
constructor(apiKey, options = {}) {
|
|
8431
|
-
if (!apiKey) {
|
|
8432
|
-
throw new Error("Soniox apiKey is required");
|
|
8433
|
-
}
|
|
8434
|
-
const maxEndpointDelayMs = options.maxEndpointDelayMs ?? 500;
|
|
8435
|
-
if (maxEndpointDelayMs < 500 || maxEndpointDelayMs > 3e3) {
|
|
8436
|
-
throw new Error("maxEndpointDelayMs must be between 500 and 3000");
|
|
8437
|
-
}
|
|
8151
|
+
language;
|
|
8152
|
+
bufferSize;
|
|
8153
|
+
buffer = Buffer.alloc(0);
|
|
8154
|
+
callbacks = [];
|
|
8155
|
+
running = false;
|
|
8156
|
+
pendingTranscriptions = [];
|
|
8157
|
+
constructor(apiKey, model = "whisper-1", language, bufferSize = DEFAULT_BUFFER_SIZE) {
|
|
8438
8158
|
this.apiKey = apiKey;
|
|
8439
|
-
this.model =
|
|
8440
|
-
this.
|
|
8441
|
-
this.
|
|
8442
|
-
this.sampleRate = options.sampleRate ?? 16e3;
|
|
8443
|
-
this.numChannels = options.numChannels ?? 1;
|
|
8444
|
-
this.enableSpeakerDiarization = options.enableSpeakerDiarization ?? false;
|
|
8445
|
-
this.enableLanguageIdentification = options.enableLanguageIdentification ?? true;
|
|
8446
|
-
this.maxEndpointDelayMs = maxEndpointDelayMs;
|
|
8447
|
-
this.clientReferenceId = options.clientReferenceId;
|
|
8448
|
-
this.baseUrl = options.baseUrl ?? SONIOX_WS_URL;
|
|
8449
|
-
}
|
|
8450
|
-
/** Factory for Twilio-style 8 kHz linear PCM. */
|
|
8451
|
-
static forTwilio(apiKey, languageHints) {
|
|
8452
|
-
return new _SonioxSTT(apiKey, { sampleRate: 8e3, languageHints });
|
|
8159
|
+
this.model = model;
|
|
8160
|
+
this.language = language;
|
|
8161
|
+
this.bufferSize = bufferSize;
|
|
8453
8162
|
}
|
|
8454
|
-
|
|
8455
|
-
|
|
8456
|
-
|
|
8457
|
-
model: this.model,
|
|
8458
|
-
audio_format: "pcm_s16le",
|
|
8459
|
-
num_channels: this.numChannels,
|
|
8460
|
-
sample_rate: this.sampleRate,
|
|
8461
|
-
enable_endpoint_detection: true,
|
|
8462
|
-
enable_speaker_diarization: this.enableSpeakerDiarization,
|
|
8463
|
-
enable_language_identification: this.enableLanguageIdentification,
|
|
8464
|
-
max_endpoint_delay_ms: this.maxEndpointDelayMs
|
|
8465
|
-
};
|
|
8466
|
-
if (this.languageHints) {
|
|
8467
|
-
config.language_hints = this.languageHints;
|
|
8468
|
-
config.language_hints_strict = this.languageHintsStrict;
|
|
8469
|
-
}
|
|
8470
|
-
if (this.clientReferenceId) {
|
|
8471
|
-
config.client_reference_id = this.clientReferenceId;
|
|
8472
|
-
}
|
|
8473
|
-
return config;
|
|
8163
|
+
/** Factory for Twilio calls — mulaw 8 kHz is transcoded upstream, so we still receive PCM 16-bit. */
|
|
8164
|
+
static forTwilio(apiKey, language = "en", model = "whisper-1") {
|
|
8165
|
+
return new _WhisperSTT(apiKey, model, language);
|
|
8474
8166
|
}
|
|
8475
8167
|
async connect() {
|
|
8476
|
-
this.
|
|
8477
|
-
|
|
8478
|
-
|
|
8479
|
-
|
|
8480
|
-
|
|
8481
|
-
|
|
8482
|
-
|
|
8483
|
-
|
|
8484
|
-
|
|
8485
|
-
|
|
8486
|
-
|
|
8487
|
-
|
|
8488
|
-
|
|
8489
|
-
|
|
8490
|
-
|
|
8491
|
-
|
|
8492
|
-
getLogger().error(`SonioxSTT WebSocket error: ${String(err)}`);
|
|
8168
|
+
this.running = true;
|
|
8169
|
+
this.buffer = Buffer.alloc(0);
|
|
8170
|
+
}
|
|
8171
|
+
sendAudio(audio) {
|
|
8172
|
+
if (!this.running) return;
|
|
8173
|
+
this.buffer = Buffer.concat([this.buffer, audio]);
|
|
8174
|
+
if (this.buffer.length >= this.bufferSize) {
|
|
8175
|
+
const pcm = this.buffer;
|
|
8176
|
+
this.buffer = Buffer.alloc(0);
|
|
8177
|
+
this.trackTranscription(this.transcribeBuffer(pcm));
|
|
8178
|
+
}
|
|
8179
|
+
}
|
|
8180
|
+
trackTranscription(promise) {
|
|
8181
|
+
const wrapped = promise.finally(() => {
|
|
8182
|
+
const idx = this.pendingTranscriptions.indexOf(wrapped);
|
|
8183
|
+
if (idx !== -1) this.pendingTranscriptions.splice(idx, 1);
|
|
8493
8184
|
});
|
|
8494
|
-
this.
|
|
8495
|
-
if (this.ws && this.ws.readyState === import_ws7.default.OPEN) {
|
|
8496
|
-
try {
|
|
8497
|
-
this.ws.send(KEEPALIVE_MESSAGE);
|
|
8498
|
-
} catch {
|
|
8499
|
-
}
|
|
8500
|
-
}
|
|
8501
|
-
}, KEEPALIVE_INTERVAL_MS);
|
|
8185
|
+
this.pendingTranscriptions.push(wrapped);
|
|
8502
8186
|
}
|
|
8503
|
-
|
|
8504
|
-
if (this.
|
|
8505
|
-
|
|
8506
|
-
this.
|
|
8187
|
+
onTranscript(callback) {
|
|
8188
|
+
if (this.callbacks.length >= 10) {
|
|
8189
|
+
getLogger().warn("WhisperSTT: maximum of 10 onTranscript callbacks reached; replacing the last callback.");
|
|
8190
|
+
this.callbacks[this.callbacks.length - 1] = callback;
|
|
8191
|
+
return;
|
|
8507
8192
|
}
|
|
8193
|
+
this.callbacks.push(callback);
|
|
8508
8194
|
}
|
|
8509
|
-
|
|
8510
|
-
|
|
8195
|
+
async close() {
|
|
8196
|
+
this.running = false;
|
|
8197
|
+
if (this.buffer.length >= this.bufferSize / 4) {
|
|
8198
|
+
const pcm = this.buffer;
|
|
8199
|
+
this.buffer = Buffer.alloc(0);
|
|
8200
|
+
this.trackTranscription(this.transcribeBuffer(pcm));
|
|
8201
|
+
} else {
|
|
8202
|
+
this.buffer = Buffer.alloc(0);
|
|
8203
|
+
}
|
|
8204
|
+
await Promise.allSettled(this.pendingTranscriptions);
|
|
8205
|
+
this.callbacks = [];
|
|
8206
|
+
}
|
|
8207
|
+
// ------------------------------------------------------------------
|
|
8208
|
+
// Private
|
|
8209
|
+
// ------------------------------------------------------------------
|
|
8210
|
+
async transcribeBuffer(pcm) {
|
|
8211
|
+
const wav = wrapPcmInWav(pcm);
|
|
8212
|
+
const formData = new FormData();
|
|
8213
|
+
formData.append("file", new Blob([wav.buffer.slice(wav.byteOffset, wav.byteOffset + wav.byteLength)], { type: "audio/wav" }), "audio.wav");
|
|
8214
|
+
formData.append("model", this.model);
|
|
8215
|
+
if (this.language) {
|
|
8216
|
+
formData.append("language", this.language);
|
|
8217
|
+
}
|
|
8511
8218
|
try {
|
|
8512
|
-
|
|
8513
|
-
|
|
8514
|
-
|
|
8219
|
+
const resp = await fetch(OPENAI_TRANSCRIPTION_URL, {
|
|
8220
|
+
method: "POST",
|
|
8221
|
+
headers: { Authorization: `Bearer ${this.apiKey}` },
|
|
8222
|
+
body: formData,
|
|
8223
|
+
signal: AbortSignal.timeout(15e3)
|
|
8224
|
+
});
|
|
8225
|
+
if (!resp.ok) {
|
|
8226
|
+
const body = await resp.text();
|
|
8227
|
+
getLogger().error(`WhisperSTT transcription error: ${resp.status} ${body}`);
|
|
8228
|
+
return;
|
|
8229
|
+
}
|
|
8230
|
+
const json = await resp.json();
|
|
8231
|
+
const text = (json.text ?? "").trim();
|
|
8232
|
+
if (!text) return;
|
|
8233
|
+
const transcript = {
|
|
8234
|
+
text,
|
|
8235
|
+
isFinal: true,
|
|
8236
|
+
confidence: 1
|
|
8237
|
+
};
|
|
8238
|
+
for (const cb of this.callbacks) {
|
|
8239
|
+
cb(transcript);
|
|
8240
|
+
}
|
|
8241
|
+
} catch (err) {
|
|
8242
|
+
getLogger().error(`WhisperSTT transcription error: ${String(err)}`);
|
|
8515
8243
|
}
|
|
8516
|
-
|
|
8517
|
-
|
|
8518
|
-
|
|
8244
|
+
}
|
|
8245
|
+
};
|
|
8246
|
+
|
|
8247
|
+
// src/stt/whisper.ts
|
|
8248
|
+
var STT2 = class extends WhisperSTT {
|
|
8249
|
+
constructor(opts = {}) {
|
|
8250
|
+
const key = opts.apiKey ?? process.env.OPENAI_API_KEY;
|
|
8251
|
+
if (!key) {
|
|
8252
|
+
throw new Error(
|
|
8253
|
+
"Whisper STT requires an apiKey. Pass { apiKey: 'sk-...' } or set OPENAI_API_KEY in the environment."
|
|
8519
8254
|
);
|
|
8520
8255
|
}
|
|
8521
|
-
|
|
8522
|
-
|
|
8523
|
-
|
|
8524
|
-
|
|
8525
|
-
|
|
8526
|
-
|
|
8527
|
-
|
|
8528
|
-
|
|
8529
|
-
|
|
8530
|
-
|
|
8531
|
-
|
|
8532
|
-
|
|
8533
|
-
|
|
8534
|
-
|
|
8256
|
+
super(key, opts.model ?? "whisper-1", opts.language, opts.bufferSize);
|
|
8257
|
+
}
|
|
8258
|
+
};
|
|
8259
|
+
|
|
8260
|
+
// src/providers/cartesia-stt.ts
|
|
8261
|
+
var import_ws7 = __toESM(require("ws"));
|
|
8262
|
+
init_logger();
|
|
8263
|
+
var DEFAULT_BASE_URL = "https://api.cartesia.ai";
|
|
8264
|
+
var API_VERSION = "2025-04-16";
|
|
8265
|
+
var USER_AGENT = "Patter/1.0 (integration=LiveKit-port; provider=Cartesia)";
|
|
8266
|
+
var KEEPALIVE_INTERVAL_MS = 3e4;
|
|
8267
|
+
var CONNECT_TIMEOUT_MS = 1e4;
|
|
8268
|
+
var MAX_CALLBACKS = 10;
|
|
8269
|
+
var CartesiaSTT = class {
|
|
8270
|
+
constructor(apiKey, options = {}) {
|
|
8271
|
+
this.apiKey = apiKey;
|
|
8272
|
+
this.options = options;
|
|
8273
|
+
if (!apiKey) {
|
|
8274
|
+
throw new Error("CartesiaSTT requires a non-empty apiKey");
|
|
8275
|
+
}
|
|
8276
|
+
}
|
|
8277
|
+
ws = null;
|
|
8278
|
+
callbacks = [];
|
|
8279
|
+
keepaliveTimer = null;
|
|
8280
|
+
/** Cartesia request id — set from the server transcript events. */
|
|
8281
|
+
requestId = "";
|
|
8282
|
+
buildWsUrl() {
|
|
8283
|
+
const opts = this.options;
|
|
8284
|
+
const rawBase = opts.baseUrl ?? DEFAULT_BASE_URL;
|
|
8285
|
+
let base;
|
|
8286
|
+
if (rawBase.startsWith("http://")) {
|
|
8287
|
+
base = `ws://${rawBase.slice("http://".length)}`;
|
|
8288
|
+
} else if (rawBase.startsWith("https://")) {
|
|
8289
|
+
base = `wss://${rawBase.slice("https://".length)}`;
|
|
8290
|
+
} else if (rawBase.startsWith("ws://") || rawBase.startsWith("wss://")) {
|
|
8291
|
+
base = rawBase;
|
|
8292
|
+
} else {
|
|
8293
|
+
base = `wss://${rawBase}`;
|
|
8294
|
+
}
|
|
8295
|
+
const language = opts.language ?? "en";
|
|
8296
|
+
const params = new URLSearchParams({
|
|
8297
|
+
model: opts.model ?? "ink-whisper",
|
|
8298
|
+
sample_rate: String(opts.sampleRate ?? 16e3),
|
|
8299
|
+
encoding: opts.encoding ?? "pcm_s16le",
|
|
8300
|
+
cartesia_version: API_VERSION,
|
|
8301
|
+
api_key: this.apiKey,
|
|
8302
|
+
language
|
|
8303
|
+
});
|
|
8304
|
+
return `${base}/stt/websocket?${params.toString()}`;
|
|
8305
|
+
}
|
|
8306
|
+
async connect() {
|
|
8307
|
+
const url = this.buildWsUrl();
|
|
8308
|
+
this.ws = new import_ws7.default(url, {
|
|
8309
|
+
headers: { "User-Agent": USER_AGENT }
|
|
8310
|
+
});
|
|
8311
|
+
await new Promise((resolve, reject) => {
|
|
8312
|
+
const timer = setTimeout(
|
|
8313
|
+
() => reject(new Error("Cartesia STT connect timeout")),
|
|
8314
|
+
CONNECT_TIMEOUT_MS
|
|
8315
|
+
);
|
|
8316
|
+
this.ws.once("open", () => {
|
|
8317
|
+
clearTimeout(timer);
|
|
8318
|
+
resolve();
|
|
8319
|
+
});
|
|
8320
|
+
this.ws.once("error", (err) => {
|
|
8321
|
+
clearTimeout(timer);
|
|
8322
|
+
reject(err);
|
|
8323
|
+
});
|
|
8324
|
+
});
|
|
8325
|
+
this.ws.on("message", (raw) => {
|
|
8326
|
+
let event;
|
|
8327
|
+
try {
|
|
8328
|
+
event = JSON.parse(raw.toString());
|
|
8329
|
+
} catch {
|
|
8330
|
+
return;
|
|
8331
|
+
}
|
|
8332
|
+
this.handleEvent(event);
|
|
8333
|
+
});
|
|
8334
|
+
this.keepaliveTimer = setInterval(() => {
|
|
8335
|
+
if (this.ws && this.ws.readyState === import_ws7.default.OPEN) {
|
|
8336
|
+
try {
|
|
8337
|
+
this.ws.ping();
|
|
8338
|
+
} catch {
|
|
8339
|
+
}
|
|
8340
|
+
}
|
|
8341
|
+
}, KEEPALIVE_INTERVAL_MS);
|
|
8342
|
+
}
|
|
8343
|
+
handleEvent(event) {
|
|
8344
|
+
const type = event.type;
|
|
8345
|
+
if (type === "transcript") {
|
|
8346
|
+
const text = (event.text ?? "").trim();
|
|
8347
|
+
const isFinal = Boolean(event.is_final);
|
|
8348
|
+
if (!text && !isFinal) return;
|
|
8349
|
+
if (event.request_id) {
|
|
8350
|
+
this.requestId = event.request_id;
|
|
8351
|
+
}
|
|
8352
|
+
if (!text) return;
|
|
8353
|
+
const confidence = Number(event.probability ?? 1);
|
|
8354
|
+
this.emit({ text, isFinal, confidence });
|
|
8355
|
+
return;
|
|
8356
|
+
}
|
|
8357
|
+
if (type === "error") {
|
|
8358
|
+
getLogger().error(`Cartesia STT error: ${event.message ?? "unknown"}`);
|
|
8359
|
+
return;
|
|
8360
|
+
}
|
|
8361
|
+
}
|
|
8362
|
+
emit(transcript) {
|
|
8363
|
+
for (const cb of this.callbacks) {
|
|
8364
|
+
cb(transcript);
|
|
8365
|
+
}
|
|
8366
|
+
}
|
|
8367
|
+
sendAudio(audio) {
|
|
8368
|
+
if (!this.ws || this.ws.readyState !== import_ws7.default.OPEN) return;
|
|
8369
|
+
this.ws.send(audio);
|
|
8370
|
+
}
|
|
8371
|
+
onTranscript(callback) {
|
|
8372
|
+
if (this.callbacks.length >= MAX_CALLBACKS) {
|
|
8373
|
+
getLogger().warn(
|
|
8374
|
+
"CartesiaSTT: maximum of 10 onTranscript callbacks reached; replacing the last callback."
|
|
8375
|
+
);
|
|
8376
|
+
this.callbacks[this.callbacks.length - 1] = callback;
|
|
8377
|
+
return;
|
|
8378
|
+
}
|
|
8379
|
+
this.callbacks.push(callback);
|
|
8380
|
+
}
|
|
8381
|
+
close() {
|
|
8382
|
+
if (this.keepaliveTimer) {
|
|
8383
|
+
clearInterval(this.keepaliveTimer);
|
|
8384
|
+
this.keepaliveTimer = null;
|
|
8385
|
+
}
|
|
8386
|
+
if (this.ws) {
|
|
8387
|
+
try {
|
|
8388
|
+
this.ws.send("finalize");
|
|
8389
|
+
} catch {
|
|
8390
|
+
}
|
|
8391
|
+
this.ws.close();
|
|
8392
|
+
this.ws = null;
|
|
8393
|
+
}
|
|
8394
|
+
}
|
|
8395
|
+
};
|
|
8396
|
+
|
|
8397
|
+
// src/stt/cartesia.ts
|
|
8398
|
+
var STT3 = class extends CartesiaSTT {
|
|
8399
|
+
constructor(opts = {}) {
|
|
8400
|
+
const key = opts.apiKey ?? process.env.CARTESIA_API_KEY;
|
|
8401
|
+
if (!key) {
|
|
8402
|
+
throw new Error(
|
|
8403
|
+
"Cartesia STT requires an apiKey. Pass { apiKey: '...' } or set CARTESIA_API_KEY in the environment."
|
|
8404
|
+
);
|
|
8405
|
+
}
|
|
8406
|
+
super(key, {
|
|
8407
|
+
model: opts.model,
|
|
8408
|
+
language: opts.language,
|
|
8409
|
+
encoding: opts.encoding,
|
|
8410
|
+
sampleRate: opts.sampleRate,
|
|
8411
|
+
baseUrl: opts.baseUrl
|
|
8412
|
+
});
|
|
8413
|
+
}
|
|
8414
|
+
};
|
|
8415
|
+
|
|
8416
|
+
// src/providers/soniox-stt.ts
|
|
8417
|
+
var import_ws8 = __toESM(require("ws"));
|
|
8418
|
+
init_logger();
|
|
8419
|
+
var SONIOX_WS_URL = "wss://stt-rt.soniox.com/transcribe-websocket";
|
|
8420
|
+
var KEEPALIVE_MESSAGE = '{"type": "keepalive"}';
|
|
8421
|
+
var END_TOKEN = "<end>";
|
|
8422
|
+
var FINALIZED_TOKEN = "<fin>";
|
|
8423
|
+
var KEEPALIVE_INTERVAL_MS2 = 5e3;
|
|
8424
|
+
function isEndToken(token) {
|
|
8425
|
+
return token.text === END_TOKEN || token.text === FINALIZED_TOKEN;
|
|
8426
|
+
}
|
|
8427
|
+
var TokenAccumulator = class {
|
|
8428
|
+
text = "";
|
|
8429
|
+
confSum = 0;
|
|
8430
|
+
confCount = 0;
|
|
8431
|
+
update(token) {
|
|
8432
|
+
if (token.text) {
|
|
8433
|
+
this.text += token.text;
|
|
8434
|
+
}
|
|
8435
|
+
if (typeof token.confidence === "number") {
|
|
8436
|
+
this.confSum += token.confidence;
|
|
8437
|
+
this.confCount += 1;
|
|
8438
|
+
}
|
|
8439
|
+
}
|
|
8440
|
+
get confidence() {
|
|
8441
|
+
return this.confCount === 0 ? 0 : this.confSum / this.confCount;
|
|
8442
|
+
}
|
|
8443
|
+
reset() {
|
|
8444
|
+
this.text = "";
|
|
8445
|
+
this.confSum = 0;
|
|
8446
|
+
this.confCount = 0;
|
|
8447
|
+
}
|
|
8448
|
+
get raw() {
|
|
8449
|
+
return { sum: this.confSum, count: this.confCount };
|
|
8450
|
+
}
|
|
8451
|
+
};
|
|
8452
|
+
var SonioxSTT = class _SonioxSTT {
|
|
8453
|
+
ws = null;
|
|
8454
|
+
callbacks = [];
|
|
8455
|
+
final = new TokenAccumulator();
|
|
8456
|
+
keepaliveTimer = null;
|
|
8457
|
+
apiKey;
|
|
8458
|
+
model;
|
|
8459
|
+
languageHints;
|
|
8460
|
+
languageHintsStrict;
|
|
8461
|
+
sampleRate;
|
|
8462
|
+
numChannels;
|
|
8463
|
+
enableSpeakerDiarization;
|
|
8464
|
+
enableLanguageIdentification;
|
|
8465
|
+
maxEndpointDelayMs;
|
|
8466
|
+
clientReferenceId;
|
|
8467
|
+
baseUrl;
|
|
8468
|
+
constructor(apiKey, options = {}) {
|
|
8469
|
+
if (!apiKey) {
|
|
8470
|
+
throw new Error("Soniox apiKey is required");
|
|
8471
|
+
}
|
|
8472
|
+
const maxEndpointDelayMs = options.maxEndpointDelayMs ?? 500;
|
|
8473
|
+
if (maxEndpointDelayMs < 500 || maxEndpointDelayMs > 3e3) {
|
|
8474
|
+
throw new Error("maxEndpointDelayMs must be between 500 and 3000");
|
|
8475
|
+
}
|
|
8476
|
+
this.apiKey = apiKey;
|
|
8477
|
+
this.model = options.model ?? "stt-rt-v4";
|
|
8478
|
+
this.languageHints = options.languageHints;
|
|
8479
|
+
this.languageHintsStrict = options.languageHintsStrict ?? false;
|
|
8480
|
+
this.sampleRate = options.sampleRate ?? 16e3;
|
|
8481
|
+
this.numChannels = options.numChannels ?? 1;
|
|
8482
|
+
this.enableSpeakerDiarization = options.enableSpeakerDiarization ?? false;
|
|
8483
|
+
this.enableLanguageIdentification = options.enableLanguageIdentification ?? true;
|
|
8484
|
+
this.maxEndpointDelayMs = maxEndpointDelayMs;
|
|
8485
|
+
this.clientReferenceId = options.clientReferenceId;
|
|
8486
|
+
this.baseUrl = options.baseUrl ?? SONIOX_WS_URL;
|
|
8487
|
+
}
|
|
8488
|
+
/** Factory for Twilio-style 8 kHz linear PCM. */
|
|
8489
|
+
static forTwilio(apiKey, languageHints) {
|
|
8490
|
+
return new _SonioxSTT(apiKey, { sampleRate: 8e3, languageHints });
|
|
8491
|
+
}
|
|
8492
|
+
buildConfig() {
|
|
8493
|
+
const config = {
|
|
8494
|
+
api_key: this.apiKey,
|
|
8495
|
+
model: this.model,
|
|
8496
|
+
audio_format: "pcm_s16le",
|
|
8497
|
+
num_channels: this.numChannels,
|
|
8498
|
+
sample_rate: this.sampleRate,
|
|
8499
|
+
enable_endpoint_detection: true,
|
|
8500
|
+
enable_speaker_diarization: this.enableSpeakerDiarization,
|
|
8501
|
+
enable_language_identification: this.enableLanguageIdentification,
|
|
8502
|
+
max_endpoint_delay_ms: this.maxEndpointDelayMs
|
|
8503
|
+
};
|
|
8504
|
+
if (this.languageHints) {
|
|
8505
|
+
config.language_hints = this.languageHints;
|
|
8506
|
+
config.language_hints_strict = this.languageHintsStrict;
|
|
8507
|
+
}
|
|
8508
|
+
if (this.clientReferenceId) {
|
|
8509
|
+
config.client_reference_id = this.clientReferenceId;
|
|
8510
|
+
}
|
|
8511
|
+
return config;
|
|
8512
|
+
}
|
|
8513
|
+
async connect() {
|
|
8514
|
+
this.ws = new import_ws8.default(this.baseUrl);
|
|
8515
|
+
await new Promise((resolve, reject) => {
|
|
8516
|
+
const timer = setTimeout(() => reject(new Error("Soniox connect timeout")), 1e4);
|
|
8517
|
+
this.ws.once("open", () => {
|
|
8518
|
+
clearTimeout(timer);
|
|
8519
|
+
resolve();
|
|
8520
|
+
});
|
|
8521
|
+
this.ws.once("error", (err) => {
|
|
8522
|
+
clearTimeout(timer);
|
|
8523
|
+
reject(err);
|
|
8524
|
+
});
|
|
8525
|
+
});
|
|
8526
|
+
this.ws.send(JSON.stringify(this.buildConfig()));
|
|
8527
|
+
this.ws.on("message", (raw) => this.handleMessage(raw.toString()));
|
|
8528
|
+
this.ws.on("close", () => this.clearKeepalive());
|
|
8529
|
+
this.ws.on("error", (err) => {
|
|
8530
|
+
getLogger().error(`SonioxSTT WebSocket error: ${String(err)}`);
|
|
8531
|
+
});
|
|
8532
|
+
this.keepaliveTimer = setInterval(() => {
|
|
8533
|
+
if (this.ws && this.ws.readyState === import_ws8.default.OPEN) {
|
|
8534
|
+
try {
|
|
8535
|
+
this.ws.send(KEEPALIVE_MESSAGE);
|
|
8536
|
+
} catch {
|
|
8537
|
+
}
|
|
8538
|
+
}
|
|
8539
|
+
}, KEEPALIVE_INTERVAL_MS2);
|
|
8540
|
+
}
|
|
8541
|
+
clearKeepalive() {
|
|
8542
|
+
if (this.keepaliveTimer) {
|
|
8543
|
+
clearInterval(this.keepaliveTimer);
|
|
8544
|
+
this.keepaliveTimer = null;
|
|
8545
|
+
}
|
|
8546
|
+
}
|
|
8547
|
+
handleMessage(raw) {
|
|
8548
|
+
let content;
|
|
8549
|
+
try {
|
|
8550
|
+
content = JSON.parse(raw);
|
|
8551
|
+
} catch {
|
|
8552
|
+
return;
|
|
8553
|
+
}
|
|
8554
|
+
if (content.error_code || content.error_message) {
|
|
8555
|
+
getLogger().error(
|
|
8556
|
+
`SonioxSTT error ${String(content.error_code)}: ${String(content.error_message)}`
|
|
8557
|
+
);
|
|
8558
|
+
}
|
|
8559
|
+
const tokens = content.tokens ?? [];
|
|
8560
|
+
const nonFinal = new TokenAccumulator();
|
|
8561
|
+
let emittedFinalThisMsg = false;
|
|
8562
|
+
for (const token of tokens) {
|
|
8563
|
+
if (token.is_final) {
|
|
8564
|
+
if (isEndToken(token)) {
|
|
8565
|
+
if (this.final.text) {
|
|
8566
|
+
this.emit({
|
|
8567
|
+
text: this.final.text.trim(),
|
|
8568
|
+
isFinal: true,
|
|
8569
|
+
confidence: this.final.confidence
|
|
8570
|
+
});
|
|
8571
|
+
this.final.reset();
|
|
8572
|
+
emittedFinalThisMsg = true;
|
|
8535
8573
|
}
|
|
8536
8574
|
} else {
|
|
8537
8575
|
this.final.update(token);
|
|
@@ -8565,7 +8603,7 @@ var SonioxSTT = class _SonioxSTT {
|
|
|
8565
8603
|
}
|
|
8566
8604
|
}
|
|
8567
8605
|
sendAudio(audio) {
|
|
8568
|
-
if (!this.ws || this.ws.readyState !==
|
|
8606
|
+
if (!this.ws || this.ws.readyState !== import_ws8.default.OPEN) return;
|
|
8569
8607
|
if (audio.length === 0) return;
|
|
8570
8608
|
this.ws.send(audio);
|
|
8571
8609
|
}
|
|
@@ -8595,16 +8633,28 @@ var SonioxSTT = class _SonioxSTT {
|
|
|
8595
8633
|
}
|
|
8596
8634
|
};
|
|
8597
8635
|
|
|
8598
|
-
// src/
|
|
8599
|
-
|
|
8636
|
+
// src/stt/soniox.ts
|
|
8637
|
+
var STT4 = class extends SonioxSTT {
|
|
8638
|
+
constructor(opts = {}) {
|
|
8639
|
+
const key = opts.apiKey ?? process.env.SONIOX_API_KEY;
|
|
8640
|
+
if (!key) {
|
|
8641
|
+
throw new Error(
|
|
8642
|
+
"Soniox STT requires an apiKey. Pass { apiKey: '...' } or set SONIOX_API_KEY in the environment."
|
|
8643
|
+
);
|
|
8644
|
+
}
|
|
8645
|
+
const { apiKey: _ignored, ...rest } = opts;
|
|
8646
|
+
void _ignored;
|
|
8647
|
+
super(key, rest);
|
|
8648
|
+
}
|
|
8649
|
+
};
|
|
8600
8650
|
|
|
8601
8651
|
// src/providers/assemblyai-stt.ts
|
|
8602
|
-
var
|
|
8652
|
+
var import_ws9 = __toESM(require("ws"));
|
|
8603
8653
|
init_logger();
|
|
8604
|
-
var
|
|
8654
|
+
var DEFAULT_BASE_URL2 = "wss://streaming.assemblyai.com";
|
|
8605
8655
|
var DEFAULT_MIN_TURN_SILENCE_MS = 100;
|
|
8606
|
-
var
|
|
8607
|
-
var
|
|
8656
|
+
var CONNECT_TIMEOUT_MS2 = 1e4;
|
|
8657
|
+
var MAX_CALLBACKS2 = 10;
|
|
8608
8658
|
var AssemblyAISTT = class _AssemblyAISTT {
|
|
8609
8659
|
constructor(apiKey, options = {}) {
|
|
8610
8660
|
this.apiKey = apiKey;
|
|
@@ -8667,168 +8717,21 @@ var AssemblyAISTT = class _AssemblyAISTT {
|
|
|
8667
8717
|
params.set(key, String(value));
|
|
8668
8718
|
}
|
|
8669
8719
|
}
|
|
8670
|
-
const base = opts.baseUrl ??
|
|
8720
|
+
const base = opts.baseUrl ?? DEFAULT_BASE_URL2;
|
|
8671
8721
|
return `${base}/v3/ws?${params.toString()}`;
|
|
8672
8722
|
}
|
|
8673
8723
|
async connect() {
|
|
8674
|
-
const url = this.buildUrl();
|
|
8675
|
-
this.ws = new import_ws8.default(url, {
|
|
8676
|
-
headers: {
|
|
8677
|
-
Authorization: this.apiKey,
|
|
8678
|
-
"Content-Type": "application/json",
|
|
8679
|
-
"User-Agent": "Patter/1.0 (integration=LiveKit-port)"
|
|
8680
|
-
}
|
|
8681
|
-
});
|
|
8682
|
-
await new Promise((resolve, reject) => {
|
|
8683
|
-
const timer = setTimeout(
|
|
8684
|
-
() => reject(new Error("AssemblyAI connect timeout")),
|
|
8685
|
-
CONNECT_TIMEOUT_MS
|
|
8686
|
-
);
|
|
8687
|
-
this.ws.once("open", () => {
|
|
8688
|
-
clearTimeout(timer);
|
|
8689
|
-
resolve();
|
|
8690
|
-
});
|
|
8691
|
-
this.ws.once("error", (err) => {
|
|
8692
|
-
clearTimeout(timer);
|
|
8693
|
-
reject(err);
|
|
8694
|
-
});
|
|
8695
|
-
});
|
|
8696
|
-
this.ws.on("message", (raw) => {
|
|
8697
|
-
let event;
|
|
8698
|
-
try {
|
|
8699
|
-
event = JSON.parse(raw.toString());
|
|
8700
|
-
} catch {
|
|
8701
|
-
return;
|
|
8702
|
-
}
|
|
8703
|
-
this.handleEvent(event);
|
|
8704
|
-
});
|
|
8705
|
-
}
|
|
8706
|
-
handleEvent(event) {
|
|
8707
|
-
const type = event.type;
|
|
8708
|
-
if (type === "Begin") {
|
|
8709
|
-
this.sessionId = event.id ?? "";
|
|
8710
|
-
this.expiresAt = event.expires_at ?? 0;
|
|
8711
|
-
return;
|
|
8712
|
-
}
|
|
8713
|
-
if (type !== "Turn") {
|
|
8714
|
-
return;
|
|
8715
|
-
}
|
|
8716
|
-
const endOfTurn = Boolean(event.end_of_turn);
|
|
8717
|
-
const turnIsFormatted = Boolean(event.turn_is_formatted);
|
|
8718
|
-
const words = event.words ?? [];
|
|
8719
|
-
const transcriptText = (event.transcript ?? "").trim();
|
|
8720
|
-
if (endOfTurn) {
|
|
8721
|
-
if (this.options.formatTurns && !turnIsFormatted) return;
|
|
8722
|
-
if (!transcriptText) return;
|
|
8723
|
-
this.emit({
|
|
8724
|
-
text: transcriptText,
|
|
8725
|
-
isFinal: true,
|
|
8726
|
-
confidence: averageConfidence(words)
|
|
8727
|
-
});
|
|
8728
|
-
return;
|
|
8729
|
-
}
|
|
8730
|
-
if (!words.length) return;
|
|
8731
|
-
const interim = words.map((w) => (w.text ?? "").trim()).filter(Boolean).join(" ");
|
|
8732
|
-
if (!interim) return;
|
|
8733
|
-
this.emit({
|
|
8734
|
-
text: interim,
|
|
8735
|
-
isFinal: false,
|
|
8736
|
-
confidence: averageConfidence(words)
|
|
8737
|
-
});
|
|
8738
|
-
}
|
|
8739
|
-
emit(transcript) {
|
|
8740
|
-
for (const cb of this.callbacks) {
|
|
8741
|
-
cb(transcript);
|
|
8742
|
-
}
|
|
8743
|
-
}
|
|
8744
|
-
sendAudio(audio) {
|
|
8745
|
-
if (!this.ws || this.ws.readyState !== import_ws8.default.OPEN) return;
|
|
8746
|
-
this.ws.send(audio);
|
|
8747
|
-
}
|
|
8748
|
-
onTranscript(callback) {
|
|
8749
|
-
if (this.callbacks.length >= MAX_CALLBACKS) {
|
|
8750
|
-
getLogger().warn(
|
|
8751
|
-
"AssemblyAISTT: maximum of 10 onTranscript callbacks reached; replacing the last callback."
|
|
8752
|
-
);
|
|
8753
|
-
this.callbacks[this.callbacks.length - 1] = callback;
|
|
8754
|
-
return;
|
|
8755
|
-
}
|
|
8756
|
-
this.callbacks.push(callback);
|
|
8757
|
-
}
|
|
8758
|
-
close() {
|
|
8759
|
-
if (this.ws) {
|
|
8760
|
-
try {
|
|
8761
|
-
this.ws.send(JSON.stringify({ type: "Terminate" }));
|
|
8762
|
-
} catch {
|
|
8763
|
-
}
|
|
8764
|
-
this.ws.close();
|
|
8765
|
-
this.ws = null;
|
|
8766
|
-
}
|
|
8767
|
-
}
|
|
8768
|
-
};
|
|
8769
|
-
function averageConfidence(words) {
|
|
8770
|
-
if (!words.length) return 0;
|
|
8771
|
-
let total = 0;
|
|
8772
|
-
for (const w of words) {
|
|
8773
|
-
total += Number(w.confidence ?? 0);
|
|
8774
|
-
}
|
|
8775
|
-
return total / words.length;
|
|
8776
|
-
}
|
|
8777
|
-
|
|
8778
|
-
// src/providers/cartesia-stt.ts
|
|
8779
|
-
var import_ws9 = __toESM(require("ws"));
|
|
8780
|
-
init_logger();
|
|
8781
|
-
var DEFAULT_BASE_URL2 = "https://api.cartesia.ai";
|
|
8782
|
-
var API_VERSION = "2025-04-16";
|
|
8783
|
-
var USER_AGENT = "Patter/1.0 (integration=LiveKit-port; provider=Cartesia)";
|
|
8784
|
-
var KEEPALIVE_INTERVAL_MS2 = 3e4;
|
|
8785
|
-
var CONNECT_TIMEOUT_MS2 = 1e4;
|
|
8786
|
-
var MAX_CALLBACKS2 = 10;
|
|
8787
|
-
var CartesiaSTT = class {
|
|
8788
|
-
constructor(apiKey, options = {}) {
|
|
8789
|
-
this.apiKey = apiKey;
|
|
8790
|
-
this.options = options;
|
|
8791
|
-
if (!apiKey) {
|
|
8792
|
-
throw new Error("CartesiaSTT requires a non-empty apiKey");
|
|
8793
|
-
}
|
|
8794
|
-
}
|
|
8795
|
-
ws = null;
|
|
8796
|
-
callbacks = [];
|
|
8797
|
-
keepaliveTimer = null;
|
|
8798
|
-
/** Cartesia request id — set from the server transcript events. */
|
|
8799
|
-
requestId = "";
|
|
8800
|
-
buildWsUrl() {
|
|
8801
|
-
const opts = this.options;
|
|
8802
|
-
const rawBase = opts.baseUrl ?? DEFAULT_BASE_URL2;
|
|
8803
|
-
let base;
|
|
8804
|
-
if (rawBase.startsWith("http://")) {
|
|
8805
|
-
base = `ws://${rawBase.slice("http://".length)}`;
|
|
8806
|
-
} else if (rawBase.startsWith("https://")) {
|
|
8807
|
-
base = `wss://${rawBase.slice("https://".length)}`;
|
|
8808
|
-
} else if (rawBase.startsWith("ws://") || rawBase.startsWith("wss://")) {
|
|
8809
|
-
base = rawBase;
|
|
8810
|
-
} else {
|
|
8811
|
-
base = `wss://${rawBase}`;
|
|
8812
|
-
}
|
|
8813
|
-
const language = opts.language ?? "en";
|
|
8814
|
-
const params = new URLSearchParams({
|
|
8815
|
-
model: opts.model ?? "ink-whisper",
|
|
8816
|
-
sample_rate: String(opts.sampleRate ?? 16e3),
|
|
8817
|
-
encoding: opts.encoding ?? "pcm_s16le",
|
|
8818
|
-
cartesia_version: API_VERSION,
|
|
8819
|
-
api_key: this.apiKey,
|
|
8820
|
-
language
|
|
8821
|
-
});
|
|
8822
|
-
return `${base}/stt/websocket?${params.toString()}`;
|
|
8823
|
-
}
|
|
8824
|
-
async connect() {
|
|
8825
|
-
const url = this.buildWsUrl();
|
|
8724
|
+
const url = this.buildUrl();
|
|
8826
8725
|
this.ws = new import_ws9.default(url, {
|
|
8827
|
-
headers: {
|
|
8726
|
+
headers: {
|
|
8727
|
+
Authorization: this.apiKey,
|
|
8728
|
+
"Content-Type": "application/json",
|
|
8729
|
+
"User-Agent": "Patter/1.0 (integration=LiveKit-port)"
|
|
8730
|
+
}
|
|
8828
8731
|
});
|
|
8829
8732
|
await new Promise((resolve, reject) => {
|
|
8830
8733
|
const timer = setTimeout(
|
|
8831
|
-
() => reject(new Error("
|
|
8734
|
+
() => reject(new Error("AssemblyAI connect timeout")),
|
|
8832
8735
|
CONNECT_TIMEOUT_MS2
|
|
8833
8736
|
);
|
|
8834
8737
|
this.ws.once("open", () => {
|
|
@@ -8849,33 +8752,39 @@ var CartesiaSTT = class {
|
|
|
8849
8752
|
}
|
|
8850
8753
|
this.handleEvent(event);
|
|
8851
8754
|
});
|
|
8852
|
-
this.keepaliveTimer = setInterval(() => {
|
|
8853
|
-
if (this.ws && this.ws.readyState === import_ws9.default.OPEN) {
|
|
8854
|
-
try {
|
|
8855
|
-
this.ws.ping();
|
|
8856
|
-
} catch {
|
|
8857
|
-
}
|
|
8858
|
-
}
|
|
8859
|
-
}, KEEPALIVE_INTERVAL_MS2);
|
|
8860
8755
|
}
|
|
8861
8756
|
handleEvent(event) {
|
|
8862
8757
|
const type = event.type;
|
|
8863
|
-
if (type === "
|
|
8864
|
-
|
|
8865
|
-
|
|
8866
|
-
if (!text && !isFinal) return;
|
|
8867
|
-
if (event.request_id) {
|
|
8868
|
-
this.requestId = event.request_id;
|
|
8869
|
-
}
|
|
8870
|
-
if (!text) return;
|
|
8871
|
-
const confidence = Number(event.probability ?? 1);
|
|
8872
|
-
this.emit({ text, isFinal, confidence });
|
|
8758
|
+
if (type === "Begin") {
|
|
8759
|
+
this.sessionId = event.id ?? "";
|
|
8760
|
+
this.expiresAt = event.expires_at ?? 0;
|
|
8873
8761
|
return;
|
|
8874
8762
|
}
|
|
8875
|
-
if (type
|
|
8876
|
-
|
|
8763
|
+
if (type !== "Turn") {
|
|
8764
|
+
return;
|
|
8765
|
+
}
|
|
8766
|
+
const endOfTurn = Boolean(event.end_of_turn);
|
|
8767
|
+
const turnIsFormatted = Boolean(event.turn_is_formatted);
|
|
8768
|
+
const words = event.words ?? [];
|
|
8769
|
+
const transcriptText = (event.transcript ?? "").trim();
|
|
8770
|
+
if (endOfTurn) {
|
|
8771
|
+
if (this.options.formatTurns && !turnIsFormatted) return;
|
|
8772
|
+
if (!transcriptText) return;
|
|
8773
|
+
this.emit({
|
|
8774
|
+
text: transcriptText,
|
|
8775
|
+
isFinal: true,
|
|
8776
|
+
confidence: averageConfidence(words)
|
|
8777
|
+
});
|
|
8877
8778
|
return;
|
|
8878
8779
|
}
|
|
8780
|
+
if (!words.length) return;
|
|
8781
|
+
const interim = words.map((w) => (w.text ?? "").trim()).filter(Boolean).join(" ");
|
|
8782
|
+
if (!interim) return;
|
|
8783
|
+
this.emit({
|
|
8784
|
+
text: interim,
|
|
8785
|
+
isFinal: false,
|
|
8786
|
+
confidence: averageConfidence(words)
|
|
8787
|
+
});
|
|
8879
8788
|
}
|
|
8880
8789
|
emit(transcript) {
|
|
8881
8790
|
for (const cb of this.callbacks) {
|
|
@@ -8889,7 +8798,7 @@ var CartesiaSTT = class {
|
|
|
8889
8798
|
onTranscript(callback) {
|
|
8890
8799
|
if (this.callbacks.length >= MAX_CALLBACKS2) {
|
|
8891
8800
|
getLogger().warn(
|
|
8892
|
-
"
|
|
8801
|
+
"AssemblyAISTT: maximum of 10 onTranscript callbacks reached; replacing the last callback."
|
|
8893
8802
|
);
|
|
8894
8803
|
this.callbacks[this.callbacks.length - 1] = callback;
|
|
8895
8804
|
return;
|
|
@@ -8897,13 +8806,9 @@ var CartesiaSTT = class {
|
|
|
8897
8806
|
this.callbacks.push(callback);
|
|
8898
8807
|
}
|
|
8899
8808
|
close() {
|
|
8900
|
-
if (this.keepaliveTimer) {
|
|
8901
|
-
clearInterval(this.keepaliveTimer);
|
|
8902
|
-
this.keepaliveTimer = null;
|
|
8903
|
-
}
|
|
8904
8809
|
if (this.ws) {
|
|
8905
8810
|
try {
|
|
8906
|
-
this.ws.send("
|
|
8811
|
+
this.ws.send(JSON.stringify({ type: "Terminate" }));
|
|
8907
8812
|
} catch {
|
|
8908
8813
|
}
|
|
8909
8814
|
this.ws.close();
|
|
@@ -8911,10 +8816,305 @@ var CartesiaSTT = class {
|
|
|
8911
8816
|
}
|
|
8912
8817
|
}
|
|
8913
8818
|
};
|
|
8819
|
+
function averageConfidence(words) {
|
|
8820
|
+
if (!words.length) return 0;
|
|
8821
|
+
let total = 0;
|
|
8822
|
+
for (const w of words) {
|
|
8823
|
+
total += Number(w.confidence ?? 0);
|
|
8824
|
+
}
|
|
8825
|
+
return total / words.length;
|
|
8826
|
+
}
|
|
8827
|
+
|
|
8828
|
+
// src/stt/assemblyai.ts
|
|
8829
|
+
var STT5 = class extends AssemblyAISTT {
|
|
8830
|
+
constructor(opts = {}) {
|
|
8831
|
+
const key = opts.apiKey ?? process.env.ASSEMBLYAI_API_KEY;
|
|
8832
|
+
if (!key) {
|
|
8833
|
+
throw new Error(
|
|
8834
|
+
"AssemblyAI STT requires an apiKey. Pass { apiKey: '...' } or set ASSEMBLYAI_API_KEY in the environment."
|
|
8835
|
+
);
|
|
8836
|
+
}
|
|
8837
|
+
const { apiKey: _ignored, ...rest } = opts;
|
|
8838
|
+
void _ignored;
|
|
8839
|
+
super(key, rest);
|
|
8840
|
+
}
|
|
8841
|
+
};
|
|
8914
8842
|
|
|
8915
|
-
// src/
|
|
8916
|
-
|
|
8917
|
-
|
|
8843
|
+
// src/providers/elevenlabs-tts.ts
|
|
8844
|
+
var ELEVENLABS_BASE_URL = "https://api.elevenlabs.io/v1";
|
|
8845
|
+
var ELEVENLABS_VOICE_ID_BY_NAME = {
|
|
8846
|
+
rachel: "21m00Tcm4TlvDq8ikWAM",
|
|
8847
|
+
drew: "29vD33N1CtxCmqQRPOHJ",
|
|
8848
|
+
clyde: "2EiwWnXFnvU5JabPnv8n",
|
|
8849
|
+
paul: "5Q0t7uMcjvnagumLfvZi",
|
|
8850
|
+
domi: "AZnzlk1XvdvUeBnXmlld",
|
|
8851
|
+
dave: "CYw3kZ02Hs0563khs1Fj",
|
|
8852
|
+
fin: "D38z5RcWu1voky8WS1ja",
|
|
8853
|
+
bella: "EXAVITQu4vr4xnSDxMaL",
|
|
8854
|
+
antoni: "ErXwobaYiN019PkySvjV",
|
|
8855
|
+
thomas: "GBv7mTt0atIp3Br8iCZE",
|
|
8856
|
+
charlie: "IKne3meq5aSn9XLyUdCD",
|
|
8857
|
+
george: "JBFqnCBsd6RMkjVDRZzb",
|
|
8858
|
+
emily: "LcfcDJNUP1GQjkzn1xUU",
|
|
8859
|
+
elli: "MF3mGyEYCl7XYWbV9V6O",
|
|
8860
|
+
callum: "N2lVS1w4EtoT3dr4eOWO",
|
|
8861
|
+
patrick: "ODq5zmih8GrVes37Dizd",
|
|
8862
|
+
harry: "SOYHLrjzK2X1ezoPC6cr",
|
|
8863
|
+
liam: "TX3LPaxmHKxFdv7VOQHJ",
|
|
8864
|
+
dorothy: "ThT5KcBeYPX3keUQqHPh",
|
|
8865
|
+
josh: "TxGEqnHWrfWFTfGW9XjX",
|
|
8866
|
+
arnold: "VR6AewLTigWG4xSOukaG",
|
|
8867
|
+
charlotte: "XB0fDUnXU5powFXDhCwa",
|
|
8868
|
+
matilda: "XrExE9yKIg1WjnnlVkGX",
|
|
8869
|
+
matthew: "Yko7PKHZNXotIFUBG7I9",
|
|
8870
|
+
james: "ZQe5CZNOzWyzPSCn5a3c",
|
|
8871
|
+
joseph: "Zlb1dXrM653N07WRdFW3",
|
|
8872
|
+
jeremy: "bVMeCyTHy58xNoL34h3p",
|
|
8873
|
+
michael: "flq6f7yk4E4fJM5XTYuZ",
|
|
8874
|
+
ethan: "g5CIjZEefAph4nQFvHAz",
|
|
8875
|
+
gigi: "jBpfuIE2acCO8z3wKNLl",
|
|
8876
|
+
freya: "jsCqWAovK2LkecY7zXl4",
|
|
8877
|
+
brian: "nPczCjzI2devNBz1zQrb",
|
|
8878
|
+
grace: "oWAxZDx7w5VEj9dCyTzz",
|
|
8879
|
+
daniel: "onwK4e9ZLuTAKqWW03F9",
|
|
8880
|
+
lily: "pFZP5JQG7iQjIQuC4Bku",
|
|
8881
|
+
serena: "pMsXgVXv3BLzUgSXRplE",
|
|
8882
|
+
adam: "pNInz6obpgDQGcFmaJgB",
|
|
8883
|
+
nicole: "piTKgcLEGmPE4e6mEKli",
|
|
8884
|
+
bill: "pqHfZKP75CvOlQylNhV4",
|
|
8885
|
+
jessie: "t0jbNlBVZ17f02VDIeMI",
|
|
8886
|
+
ryan: "wViXBPUzp2ZZixB1xQuM",
|
|
8887
|
+
sam: "yoZ06aMxZJJ28mfd3POQ",
|
|
8888
|
+
glinda: "z9fAnlkpzviPz146aGWa",
|
|
8889
|
+
giovanni: "zcAOhNBS3c14rBihAFp1",
|
|
8890
|
+
mimi: "zrHiDhphv9ZnVXBqCLjz",
|
|
8891
|
+
alloy: "21m00Tcm4TlvDq8ikWAM"
|
|
8892
|
+
};
|
|
8893
|
+
var VOICE_ID_PATTERN = /^[A-Za-z0-9]{20}$/;
|
|
8894
|
+
function resolveVoiceId(voice) {
|
|
8895
|
+
if (!voice) return voice;
|
|
8896
|
+
if (VOICE_ID_PATTERN.test(voice)) return voice;
|
|
8897
|
+
return ELEVENLABS_VOICE_ID_BY_NAME[voice.toLowerCase()] ?? voice;
|
|
8898
|
+
}
|
|
8899
|
+
var ElevenLabsTTS = class {
|
|
8900
|
+
constructor(apiKey, voiceId = "21m00Tcm4TlvDq8ikWAM", modelId = "eleven_turbo_v2_5", outputFormat = "pcm_16000") {
|
|
8901
|
+
this.apiKey = apiKey;
|
|
8902
|
+
this.modelId = modelId;
|
|
8903
|
+
this.outputFormat = outputFormat;
|
|
8904
|
+
this.voiceId = resolveVoiceId(voiceId);
|
|
8905
|
+
}
|
|
8906
|
+
voiceId;
|
|
8907
|
+
/**
|
|
8908
|
+
* Synthesise text to speech and return the full audio as a single Buffer.
|
|
8909
|
+
*
|
|
8910
|
+
* For large chunks (or when latency matters) call `synthesizeStream` instead.
|
|
8911
|
+
*/
|
|
8912
|
+
async synthesize(text) {
|
|
8913
|
+
const chunks = [];
|
|
8914
|
+
for await (const chunk of this.synthesizeStream(text)) {
|
|
8915
|
+
chunks.push(chunk);
|
|
8916
|
+
}
|
|
8917
|
+
return Buffer.concat(chunks);
|
|
8918
|
+
}
|
|
8919
|
+
/**
|
|
8920
|
+
* Synthesise text and yield audio chunks as they arrive (streaming).
|
|
8921
|
+
*
|
|
8922
|
+
* The yielded buffers are raw PCM at 16 kHz (or whatever `outputFormat` is
|
|
8923
|
+
* configured to).
|
|
8924
|
+
*/
|
|
8925
|
+
async *synthesizeStream(text) {
|
|
8926
|
+
const url = `${ELEVENLABS_BASE_URL}/text-to-speech/${encodeURIComponent(this.voiceId)}/stream?output_format=${encodeURIComponent(this.outputFormat)}`;
|
|
8927
|
+
const response = await fetch(url, {
|
|
8928
|
+
method: "POST",
|
|
8929
|
+
headers: {
|
|
8930
|
+
"xi-api-key": this.apiKey,
|
|
8931
|
+
"Content-Type": "application/json"
|
|
8932
|
+
},
|
|
8933
|
+
body: JSON.stringify({ text, model_id: this.modelId }),
|
|
8934
|
+
signal: AbortSignal.timeout(3e4)
|
|
8935
|
+
});
|
|
8936
|
+
if (!response.ok) {
|
|
8937
|
+
const body = await response.text();
|
|
8938
|
+
throw new Error(`ElevenLabs TTS error ${response.status}: ${body}`);
|
|
8939
|
+
}
|
|
8940
|
+
if (!response.body) {
|
|
8941
|
+
throw new Error("ElevenLabs TTS: no response body");
|
|
8942
|
+
}
|
|
8943
|
+
const reader = response.body.getReader();
|
|
8944
|
+
try {
|
|
8945
|
+
while (true) {
|
|
8946
|
+
const { done, value } = await reader.read();
|
|
8947
|
+
if (done) break;
|
|
8948
|
+
if (value && value.length > 0) {
|
|
8949
|
+
yield Buffer.from(value);
|
|
8950
|
+
}
|
|
8951
|
+
}
|
|
8952
|
+
} finally {
|
|
8953
|
+
if (typeof reader.cancel === "function") await reader.cancel().catch(() => {
|
|
8954
|
+
});
|
|
8955
|
+
reader.releaseLock();
|
|
8956
|
+
}
|
|
8957
|
+
}
|
|
8958
|
+
};
|
|
8959
|
+
|
|
8960
|
+
// src/tts/elevenlabs.ts
|
|
8961
|
+
var TTS = class extends ElevenLabsTTS {
|
|
8962
|
+
constructor(opts = {}) {
|
|
8963
|
+
const key = opts.apiKey ?? process.env.ELEVENLABS_API_KEY;
|
|
8964
|
+
if (!key) {
|
|
8965
|
+
throw new Error(
|
|
8966
|
+
"ElevenLabs TTS requires an apiKey. Pass { apiKey: '...' } or set ELEVENLABS_API_KEY in the environment."
|
|
8967
|
+
);
|
|
8968
|
+
}
|
|
8969
|
+
super(
|
|
8970
|
+
key,
|
|
8971
|
+
opts.voiceId ?? "21m00Tcm4TlvDq8ikWAM",
|
|
8972
|
+
opts.modelId ?? "eleven_turbo_v2_5",
|
|
8973
|
+
opts.outputFormat ?? "pcm_16000"
|
|
8974
|
+
);
|
|
8975
|
+
}
|
|
8976
|
+
};
|
|
8977
|
+
|
|
8978
|
+
// src/providers/openai-tts.ts
|
|
8979
|
+
var OPENAI_TTS_URL = "https://api.openai.com/v1/audio/speech";
|
|
8980
|
+
var OpenAITTS = class _OpenAITTS {
|
|
8981
|
+
constructor(apiKey, voice = "alloy", model = "tts-1") {
|
|
8982
|
+
this.apiKey = apiKey;
|
|
8983
|
+
this.voice = voice;
|
|
8984
|
+
this.model = model;
|
|
8985
|
+
}
|
|
8986
|
+
/**
|
|
8987
|
+
* Synthesise text to speech and return the full audio as a single Buffer.
|
|
8988
|
+
*
|
|
8989
|
+
* For large chunks (or when latency matters) call `synthesizeStream` instead.
|
|
8990
|
+
*/
|
|
8991
|
+
async synthesize(text) {
|
|
8992
|
+
const chunks = [];
|
|
8993
|
+
for await (const chunk of this.synthesizeStream(text)) {
|
|
8994
|
+
chunks.push(chunk);
|
|
8995
|
+
}
|
|
8996
|
+
return Buffer.concat(chunks);
|
|
8997
|
+
}
|
|
8998
|
+
/**
|
|
8999
|
+
* Synthesise text and yield audio chunks as they arrive (streaming).
|
|
9000
|
+
*
|
|
9001
|
+
* OpenAI returns 24 kHz PCM16; each chunk is resampled to 16 kHz before
|
|
9002
|
+
* yielding so the output is ready for telephony pipelines.
|
|
9003
|
+
*
|
|
9004
|
+
* The resampler carries state (buffered samples + odd trailing byte)
|
|
9005
|
+
* between chunks — without that state cross-chunk sample alignment drifts
|
|
9006
|
+
* and the caller hears pops / dropped audio (BUG #23, mirror of the
|
|
9007
|
+
* Python `audioop.ratecv` fix).
|
|
9008
|
+
*/
|
|
9009
|
+
async *synthesizeStream(text) {
|
|
9010
|
+
const response = await fetch(OPENAI_TTS_URL, {
|
|
9011
|
+
method: "POST",
|
|
9012
|
+
headers: {
|
|
9013
|
+
"Authorization": `Bearer ${this.apiKey}`,
|
|
9014
|
+
"Content-Type": "application/json"
|
|
9015
|
+
},
|
|
9016
|
+
body: JSON.stringify({
|
|
9017
|
+
model: this.model,
|
|
9018
|
+
input: text,
|
|
9019
|
+
voice: this.voice,
|
|
9020
|
+
response_format: "pcm"
|
|
9021
|
+
}),
|
|
9022
|
+
signal: AbortSignal.timeout(3e4)
|
|
9023
|
+
});
|
|
9024
|
+
if (!response.ok) {
|
|
9025
|
+
const body = await response.text();
|
|
9026
|
+
throw new Error(`OpenAI TTS error ${response.status}: ${body}`);
|
|
9027
|
+
}
|
|
9028
|
+
if (!response.body) {
|
|
9029
|
+
throw new Error("OpenAI TTS: no response body");
|
|
9030
|
+
}
|
|
9031
|
+
const ctx = { carryByte: null, leftover: [] };
|
|
9032
|
+
const reader = response.body.getReader();
|
|
9033
|
+
try {
|
|
9034
|
+
while (true) {
|
|
9035
|
+
const { done, value } = await reader.read();
|
|
9036
|
+
if (done) break;
|
|
9037
|
+
if (value && value.length > 0) {
|
|
9038
|
+
const out = _OpenAITTS.resampleStreaming(Buffer.from(value), ctx);
|
|
9039
|
+
if (out.length > 0) yield out;
|
|
9040
|
+
}
|
|
9041
|
+
}
|
|
9042
|
+
if (ctx.leftover.length > 0) {
|
|
9043
|
+
const tail = Buffer.alloc(ctx.leftover.length * 2);
|
|
9044
|
+
for (let i = 0; i < ctx.leftover.length; i++) {
|
|
9045
|
+
tail.writeInt16LE(ctx.leftover[i], i * 2);
|
|
9046
|
+
}
|
|
9047
|
+
yield tail;
|
|
9048
|
+
}
|
|
9049
|
+
} finally {
|
|
9050
|
+
if (typeof reader.cancel === "function") await reader.cancel().catch(() => {
|
|
9051
|
+
});
|
|
9052
|
+
reader.releaseLock();
|
|
9053
|
+
}
|
|
9054
|
+
}
|
|
9055
|
+
/**
|
|
9056
|
+
* Streaming 24 kHz → 16 kHz resampler (PCM16-LE). Maintains cross-chunk
|
|
9057
|
+
* state so the 3:2 pattern doesn't reset at every network read.
|
|
9058
|
+
*/
|
|
9059
|
+
static resampleStreaming(audio, ctx) {
|
|
9060
|
+
let buf;
|
|
9061
|
+
if (ctx.carryByte !== null) {
|
|
9062
|
+
buf = Buffer.concat([Buffer.from([ctx.carryByte]), audio]);
|
|
9063
|
+
ctx.carryByte = null;
|
|
9064
|
+
} else {
|
|
9065
|
+
buf = audio;
|
|
9066
|
+
}
|
|
9067
|
+
if (buf.length % 2 === 1) {
|
|
9068
|
+
ctx.carryByte = buf[buf.length - 1];
|
|
9069
|
+
buf = buf.subarray(0, buf.length - 1);
|
|
9070
|
+
}
|
|
9071
|
+
if (buf.length === 0 && ctx.leftover.length === 0) {
|
|
9072
|
+
return Buffer.alloc(0);
|
|
9073
|
+
}
|
|
9074
|
+
const sampleCount = buf.length / 2;
|
|
9075
|
+
const samples = ctx.leftover.slice();
|
|
9076
|
+
for (let i2 = 0; i2 < sampleCount; i2++) {
|
|
9077
|
+
samples.push(buf.readInt16LE(i2 * 2));
|
|
9078
|
+
}
|
|
9079
|
+
const out = [];
|
|
9080
|
+
let i = 0;
|
|
9081
|
+
while (i + 2 < samples.length) {
|
|
9082
|
+
out.push(samples[i]);
|
|
9083
|
+
out.push(Math.trunc((samples[i + 1] + samples[i + 2]) / 2));
|
|
9084
|
+
i += 3;
|
|
9085
|
+
}
|
|
9086
|
+
ctx.leftover = samples.slice(i);
|
|
9087
|
+
const buffer = Buffer.alloc(out.length * 2);
|
|
9088
|
+
for (let j = 0; j < out.length; j++) {
|
|
9089
|
+
buffer.writeInt16LE(out[j], j * 2);
|
|
9090
|
+
}
|
|
9091
|
+
return buffer;
|
|
9092
|
+
}
|
|
9093
|
+
/** @deprecated use {@link resampleStreaming} with persistent state. */
|
|
9094
|
+
static resample24kTo16k(audio) {
|
|
9095
|
+
const ctx = { carryByte: null, leftover: [] };
|
|
9096
|
+
const out = _OpenAITTS.resampleStreaming(audio, ctx);
|
|
9097
|
+
if (ctx.leftover.length === 0) return out;
|
|
9098
|
+
const tail = Buffer.alloc(ctx.leftover.length * 2);
|
|
9099
|
+
for (let i = 0; i < ctx.leftover.length; i++) {
|
|
9100
|
+
tail.writeInt16LE(ctx.leftover[i], i * 2);
|
|
9101
|
+
}
|
|
9102
|
+
return Buffer.concat([out, tail]);
|
|
9103
|
+
}
|
|
9104
|
+
};
|
|
9105
|
+
|
|
9106
|
+
// src/tts/openai.ts
|
|
9107
|
+
var TTS2 = class extends OpenAITTS {
|
|
9108
|
+
constructor(opts = {}) {
|
|
9109
|
+
const key = opts.apiKey ?? process.env.OPENAI_API_KEY;
|
|
9110
|
+
if (!key) {
|
|
9111
|
+
throw new Error(
|
|
9112
|
+
"OpenAI TTS requires an apiKey. Pass { apiKey: 'sk-...' } or set OPENAI_API_KEY in the environment."
|
|
9113
|
+
);
|
|
9114
|
+
}
|
|
9115
|
+
super(key, opts.voice ?? "alloy", opts.model ?? "tts-1");
|
|
9116
|
+
}
|
|
9117
|
+
};
|
|
8918
9118
|
|
|
8919
9119
|
// src/providers/cartesia-tts.ts
|
|
8920
9120
|
var CARTESIA_BASE_URL = "https://api.cartesia.ai";
|
|
@@ -9014,6 +9214,21 @@ var CartesiaTTS = class {
|
|
|
9014
9214
|
}
|
|
9015
9215
|
};
|
|
9016
9216
|
|
|
9217
|
+
// src/tts/cartesia.ts
|
|
9218
|
+
var TTS3 = class extends CartesiaTTS {
|
|
9219
|
+
constructor(opts = {}) {
|
|
9220
|
+
const key = opts.apiKey ?? process.env.CARTESIA_API_KEY;
|
|
9221
|
+
if (!key) {
|
|
9222
|
+
throw new Error(
|
|
9223
|
+
"Cartesia TTS requires an apiKey. Pass { apiKey: '...' } or set CARTESIA_API_KEY in the environment."
|
|
9224
|
+
);
|
|
9225
|
+
}
|
|
9226
|
+
const { apiKey: _ignored, ...rest } = opts;
|
|
9227
|
+
void _ignored;
|
|
9228
|
+
super(key, rest);
|
|
9229
|
+
}
|
|
9230
|
+
};
|
|
9231
|
+
|
|
9017
9232
|
// src/providers/rime-tts.ts
|
|
9018
9233
|
var RIME_BASE_URL = "https://users.rime.ai/v1/rime-tts";
|
|
9019
9234
|
var ARCANA_MODEL_TIMEOUT_MS = 60 * 4 * 1e3;
|
|
@@ -9141,6 +9356,21 @@ var RimeTTS = class {
|
|
|
9141
9356
|
}
|
|
9142
9357
|
};
|
|
9143
9358
|
|
|
9359
|
+
// src/tts/rime.ts
|
|
9360
|
+
var TTS4 = class extends RimeTTS {
|
|
9361
|
+
constructor(opts = {}) {
|
|
9362
|
+
const key = opts.apiKey ?? process.env.RIME_API_KEY;
|
|
9363
|
+
if (!key) {
|
|
9364
|
+
throw new Error(
|
|
9365
|
+
"Rime TTS requires an apiKey. Pass { apiKey: '...' } or set RIME_API_KEY in the environment."
|
|
9366
|
+
);
|
|
9367
|
+
}
|
|
9368
|
+
const { apiKey: _ignored, ...rest } = opts;
|
|
9369
|
+
void _ignored;
|
|
9370
|
+
super(key, rest);
|
|
9371
|
+
}
|
|
9372
|
+
};
|
|
9373
|
+
|
|
9144
9374
|
// src/providers/lmnt-tts.ts
|
|
9145
9375
|
var LMNT_BASE_URL = "https://api.lmnt.com/v1/ai/speech/bytes";
|
|
9146
9376
|
var LMNTTTS = class {
|
|
@@ -9219,6 +9449,119 @@ var LMNTTTS = class {
|
|
|
9219
9449
|
}
|
|
9220
9450
|
};
|
|
9221
9451
|
|
|
9452
|
+
// src/tts/lmnt.ts
|
|
9453
|
+
var TTS5 = class extends LMNTTTS {
|
|
9454
|
+
constructor(opts = {}) {
|
|
9455
|
+
const key = opts.apiKey ?? process.env.LMNT_API_KEY;
|
|
9456
|
+
if (!key) {
|
|
9457
|
+
throw new Error(
|
|
9458
|
+
"LMNT TTS requires an apiKey. Pass { apiKey: '...' } or set LMNT_API_KEY in the environment."
|
|
9459
|
+
);
|
|
9460
|
+
}
|
|
9461
|
+
const { apiKey: _ignored, ...rest } = opts;
|
|
9462
|
+
void _ignored;
|
|
9463
|
+
super(key, rest);
|
|
9464
|
+
}
|
|
9465
|
+
};
|
|
9466
|
+
|
|
9467
|
+
// src/carriers/twilio.ts
|
|
9468
|
+
var Carrier = class {
|
|
9469
|
+
kind = "twilio";
|
|
9470
|
+
accountSid;
|
|
9471
|
+
authToken;
|
|
9472
|
+
constructor(opts = {}) {
|
|
9473
|
+
const sid = opts.accountSid ?? process.env.TWILIO_ACCOUNT_SID;
|
|
9474
|
+
const tok = opts.authToken ?? process.env.TWILIO_AUTH_TOKEN;
|
|
9475
|
+
if (!sid) {
|
|
9476
|
+
throw new Error(
|
|
9477
|
+
"Twilio carrier requires accountSid. Pass { accountSid: 'AC...' } or set TWILIO_ACCOUNT_SID in the environment."
|
|
9478
|
+
);
|
|
9479
|
+
}
|
|
9480
|
+
if (!tok) {
|
|
9481
|
+
throw new Error(
|
|
9482
|
+
"Twilio carrier requires authToken. Pass { authToken: '...' } or set TWILIO_AUTH_TOKEN in the environment."
|
|
9483
|
+
);
|
|
9484
|
+
}
|
|
9485
|
+
this.accountSid = sid;
|
|
9486
|
+
this.authToken = tok;
|
|
9487
|
+
}
|
|
9488
|
+
};
|
|
9489
|
+
|
|
9490
|
+
// src/carriers/telnyx.ts
|
|
9491
|
+
var Carrier2 = class {
|
|
9492
|
+
kind = "telnyx";
|
|
9493
|
+
apiKey;
|
|
9494
|
+
connectionId;
|
|
9495
|
+
publicKey;
|
|
9496
|
+
constructor(opts = {}) {
|
|
9497
|
+
const key = opts.apiKey ?? process.env.TELNYX_API_KEY;
|
|
9498
|
+
const conn = opts.connectionId ?? process.env.TELNYX_CONNECTION_ID;
|
|
9499
|
+
const pub = opts.publicKey ?? process.env.TELNYX_PUBLIC_KEY;
|
|
9500
|
+
if (!key) {
|
|
9501
|
+
throw new Error(
|
|
9502
|
+
"Telnyx carrier requires apiKey. Pass { apiKey: '...' } or set TELNYX_API_KEY in the environment."
|
|
9503
|
+
);
|
|
9504
|
+
}
|
|
9505
|
+
if (!conn) {
|
|
9506
|
+
throw new Error(
|
|
9507
|
+
"Telnyx carrier requires connectionId. Pass { connectionId: '...' } or set TELNYX_CONNECTION_ID in the environment."
|
|
9508
|
+
);
|
|
9509
|
+
}
|
|
9510
|
+
this.apiKey = key;
|
|
9511
|
+
this.connectionId = conn;
|
|
9512
|
+
this.publicKey = pub;
|
|
9513
|
+
}
|
|
9514
|
+
};
|
|
9515
|
+
|
|
9516
|
+
// src/public-api.ts
|
|
9517
|
+
var DEFAULT_GUARDRAIL_REPLACEMENT = "I'm sorry, I can't respond to that.";
|
|
9518
|
+
var Guardrail = class {
|
|
9519
|
+
name;
|
|
9520
|
+
blockedTerms;
|
|
9521
|
+
check;
|
|
9522
|
+
replacement;
|
|
9523
|
+
constructor(opts) {
|
|
9524
|
+
if (!opts.name) {
|
|
9525
|
+
throw new Error("Guardrail requires a non-empty name.");
|
|
9526
|
+
}
|
|
9527
|
+
this.name = opts.name;
|
|
9528
|
+
if (opts.blockedTerms) this.blockedTerms = opts.blockedTerms;
|
|
9529
|
+
if (opts.check) this.check = opts.check;
|
|
9530
|
+
this.replacement = opts.replacement ?? DEFAULT_GUARDRAIL_REPLACEMENT;
|
|
9531
|
+
}
|
|
9532
|
+
};
|
|
9533
|
+
function guardrail(opts) {
|
|
9534
|
+
return new Guardrail(opts);
|
|
9535
|
+
}
|
|
9536
|
+
var Tool = class {
|
|
9537
|
+
name;
|
|
9538
|
+
description;
|
|
9539
|
+
parameters;
|
|
9540
|
+
handler;
|
|
9541
|
+
webhookUrl;
|
|
9542
|
+
constructor(opts) {
|
|
9543
|
+
if (!opts.name) {
|
|
9544
|
+
throw new Error("Tool requires a non-empty name.");
|
|
9545
|
+
}
|
|
9546
|
+
const hasHandler = typeof opts.handler === "function";
|
|
9547
|
+
const hasWebhook = typeof opts.webhookUrl === "string" && opts.webhookUrl.length > 0;
|
|
9548
|
+
if (!hasHandler && !hasWebhook) {
|
|
9549
|
+
throw new Error("Tool requires either handler or webhookUrl.");
|
|
9550
|
+
}
|
|
9551
|
+
if (hasHandler && hasWebhook) {
|
|
9552
|
+
throw new Error("Tool accepts handler OR webhookUrl, not both.");
|
|
9553
|
+
}
|
|
9554
|
+
this.name = opts.name;
|
|
9555
|
+
this.description = opts.description ?? "";
|
|
9556
|
+
this.parameters = opts.parameters ?? { type: "object", properties: {} };
|
|
9557
|
+
if (hasHandler) this.handler = opts.handler;
|
|
9558
|
+
if (hasWebhook) this.webhookUrl = opts.webhookUrl;
|
|
9559
|
+
}
|
|
9560
|
+
};
|
|
9561
|
+
function tool(opts) {
|
|
9562
|
+
return new Tool(opts);
|
|
9563
|
+
}
|
|
9564
|
+
|
|
9222
9565
|
// src/index.ts
|
|
9223
9566
|
init_transcoding();
|
|
9224
9567
|
init_tunnel();
|
|
@@ -9843,21 +10186,25 @@ function isAudioConfig(value) {
|
|
|
9843
10186
|
CartesiaSTT,
|
|
9844
10187
|
CartesiaTTS,
|
|
9845
10188
|
ChatContext,
|
|
10189
|
+
CloudflareTunnel,
|
|
9846
10190
|
DEFAULT_MIN_SENTENCE_LEN,
|
|
9847
10191
|
DEFAULT_PRICING,
|
|
9848
10192
|
DTMF_EVENTS,
|
|
9849
10193
|
DeepgramSTT,
|
|
10194
|
+
ElevenLabsConvAI,
|
|
9850
10195
|
ElevenLabsConvAIAdapter,
|
|
9851
10196
|
ElevenLabsTTS,
|
|
9852
10197
|
FallbackLLMProvider,
|
|
9853
10198
|
GEMINI_DEFAULT_INPUT_SR,
|
|
9854
10199
|
GEMINI_DEFAULT_OUTPUT_SR,
|
|
9855
10200
|
GeminiLiveAdapter,
|
|
10201
|
+
Guardrail,
|
|
9856
10202
|
IVRActivity,
|
|
9857
10203
|
LLMLoop,
|
|
9858
10204
|
LMNTTTS,
|
|
9859
10205
|
MetricsStore,
|
|
9860
10206
|
OpenAILLMProvider,
|
|
10207
|
+
OpenAIRealtime,
|
|
9861
10208
|
OpenAIRealtimeAdapter,
|
|
9862
10209
|
OpenAITTS,
|
|
9863
10210
|
PartialStreamError,
|
|
@@ -9870,8 +10217,12 @@ function isAudioConfig(value) {
|
|
|
9870
10217
|
RimeTTS,
|
|
9871
10218
|
SentenceChunker,
|
|
9872
10219
|
SonioxSTT,
|
|
10220
|
+
StaticTunnel,
|
|
10221
|
+
Telnyx,
|
|
9873
10222
|
TestSession,
|
|
9874
10223
|
TfidfLoopDetector,
|
|
10224
|
+
Tool,
|
|
10225
|
+
Twilio,
|
|
9875
10226
|
ULTRAVOX_DEFAULT_API_BASE,
|
|
9876
10227
|
ULTRAVOX_DEFAULT_SR,
|
|
9877
10228
|
UltravoxRealtimeAdapter,
|
|
@@ -9891,6 +10242,7 @@ function isAudioConfig(value) {
|
|
|
9891
10242
|
filterMarkdown,
|
|
9892
10243
|
formatDtmf,
|
|
9893
10244
|
getLogger,
|
|
10245
|
+
guardrail,
|
|
9894
10246
|
isRemoteUrl,
|
|
9895
10247
|
isWebSocketUrl,
|
|
9896
10248
|
makeAuthMiddleware,
|
|
@@ -9912,5 +10264,6 @@ function isAudioConfig(value) {
|
|
|
9912
10264
|
selectSoundFromList,
|
|
9913
10265
|
setLogger,
|
|
9914
10266
|
startTunnel,
|
|
10267
|
+
tool,
|
|
9915
10268
|
whisper
|
|
9916
10269
|
});
|