getpatter 0.4.4 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +136 -162
- package/dist/carrier-config-CPG5CROM.mjs +84 -0
- package/dist/{chunk-O3RQG3NL.mjs → chunk-757NVN4L.mjs} +129 -544
- package/dist/index.d.mts +771 -292
- package/dist/index.d.ts +771 -292
- package/dist/index.js +1414 -1061
- package/dist/index.mjs +1141 -456
- package/dist/{test-mode-ASSLSQU2.mjs → test-mode-YFOL2HYH.mjs} +1 -1
- package/package.json +1 -1
|
@@ -247,242 +247,13 @@ var ElevenLabsConvAIAdapter = class {
|
|
|
247
247
|
}
|
|
248
248
|
};
|
|
249
249
|
|
|
250
|
-
// src/
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
/** Request ID from Deepgram — used to query actual cost post-call. */
|
|
257
|
-
requestId = "";
|
|
258
|
-
apiKey;
|
|
259
|
-
language;
|
|
260
|
-
model;
|
|
261
|
-
encoding;
|
|
262
|
-
sampleRate;
|
|
263
|
-
endpointingMs;
|
|
264
|
-
utteranceEndMs;
|
|
265
|
-
smartFormat;
|
|
266
|
-
interimResults;
|
|
267
|
-
vadEvents;
|
|
268
|
-
constructor(apiKey, languageOrOptions, model, encoding, sampleRate, options) {
|
|
269
|
-
this.apiKey = apiKey;
|
|
270
|
-
const opts = typeof languageOrOptions === "object" && languageOrOptions !== null ? languageOrOptions : options ?? {};
|
|
271
|
-
this.language = (typeof languageOrOptions === "string" ? languageOrOptions : opts.language) ?? "en";
|
|
272
|
-
this.model = model ?? opts.model ?? "nova-3";
|
|
273
|
-
this.encoding = encoding ?? opts.encoding ?? "linear16";
|
|
274
|
-
this.sampleRate = sampleRate ?? opts.sampleRate ?? 16e3;
|
|
275
|
-
this.endpointingMs = opts.endpointingMs ?? 150;
|
|
276
|
-
this.utteranceEndMs = opts.utteranceEndMs === null ? null : opts.utteranceEndMs ?? 1e3;
|
|
277
|
-
this.smartFormat = opts.smartFormat ?? true;
|
|
278
|
-
this.interimResults = opts.interimResults ?? true;
|
|
279
|
-
this.vadEvents = opts.vadEvents ?? true;
|
|
280
|
-
}
|
|
281
|
-
/** Factory for Twilio calls — mulaw 8 kHz. Forwards tuning options through. */
|
|
282
|
-
static forTwilio(apiKey, language = "en", model = "nova-3", options = {}) {
|
|
283
|
-
return new _DeepgramSTT(apiKey, language, model, "mulaw", 8e3, options);
|
|
284
|
-
}
|
|
285
|
-
async connect() {
|
|
286
|
-
const params = new URLSearchParams({
|
|
287
|
-
model: this.model,
|
|
288
|
-
language: this.language,
|
|
289
|
-
encoding: this.encoding,
|
|
290
|
-
sample_rate: String(this.sampleRate),
|
|
291
|
-
channels: "1",
|
|
292
|
-
interim_results: this.interimResults ? "true" : "false",
|
|
293
|
-
endpointing: String(this.endpointingMs),
|
|
294
|
-
smart_format: this.smartFormat ? "true" : "false",
|
|
295
|
-
vad_events: this.vadEvents ? "true" : "false",
|
|
296
|
-
no_delay: "true"
|
|
297
|
-
});
|
|
298
|
-
if (this.utteranceEndMs !== null) {
|
|
299
|
-
params.set("utterance_end_ms", String(Math.max(this.utteranceEndMs, 1e3)));
|
|
300
|
-
}
|
|
301
|
-
const url = `${DEEPGRAM_WS_URL}?${params.toString()}`;
|
|
302
|
-
this.ws = new WebSocket3(url, {
|
|
303
|
-
headers: { Authorization: `Token ${this.apiKey}` }
|
|
304
|
-
});
|
|
305
|
-
await new Promise((resolve, reject) => {
|
|
306
|
-
const timer = setTimeout(() => reject(new Error("Deepgram connect timeout")), 1e4);
|
|
307
|
-
this.ws.once("open", () => {
|
|
308
|
-
clearTimeout(timer);
|
|
309
|
-
resolve();
|
|
310
|
-
});
|
|
311
|
-
this.ws.once("error", (err) => {
|
|
312
|
-
clearTimeout(timer);
|
|
313
|
-
reject(err);
|
|
314
|
-
});
|
|
315
|
-
});
|
|
316
|
-
this.ws.on("message", (raw) => {
|
|
317
|
-
let data;
|
|
318
|
-
try {
|
|
319
|
-
data = JSON.parse(raw.toString());
|
|
320
|
-
} catch {
|
|
321
|
-
return;
|
|
322
|
-
}
|
|
323
|
-
if (data.type === "Metadata" && data.request_id) {
|
|
324
|
-
this.requestId = data.request_id;
|
|
325
|
-
return;
|
|
326
|
-
}
|
|
327
|
-
if (data.type !== "Results") return;
|
|
328
|
-
const alternatives = data.channel?.alternatives ?? [];
|
|
329
|
-
if (!alternatives.length) return;
|
|
330
|
-
const best = alternatives[0];
|
|
331
|
-
const text = (best.transcript ?? "").trim();
|
|
332
|
-
if (!text) return;
|
|
333
|
-
const transcript = {
|
|
334
|
-
text,
|
|
335
|
-
isFinal: Boolean(data.is_final) || Boolean(data.speech_final),
|
|
336
|
-
confidence: best.confidence ?? 0
|
|
337
|
-
};
|
|
338
|
-
for (const cb of this.callbacks) {
|
|
339
|
-
cb(transcript);
|
|
340
|
-
}
|
|
341
|
-
});
|
|
342
|
-
}
|
|
343
|
-
sendAudio(audio) {
|
|
344
|
-
if (!this.ws || this.ws.readyState !== WebSocket3.OPEN) return;
|
|
345
|
-
this.ws.send(audio);
|
|
346
|
-
}
|
|
347
|
-
onTranscript(callback) {
|
|
348
|
-
if (this.callbacks.length >= 10) {
|
|
349
|
-
getLogger().warn("DeepgramSTT: maximum of 10 onTranscript callbacks reached; replacing the last callback.");
|
|
350
|
-
this.callbacks[this.callbacks.length - 1] = callback;
|
|
351
|
-
return;
|
|
352
|
-
}
|
|
353
|
-
this.callbacks.push(callback);
|
|
354
|
-
}
|
|
355
|
-
close() {
|
|
356
|
-
if (this.ws) {
|
|
357
|
-
try {
|
|
358
|
-
this.ws.send(JSON.stringify({ type: "CloseStream" }));
|
|
359
|
-
} catch {
|
|
360
|
-
}
|
|
361
|
-
this.ws.close();
|
|
362
|
-
this.ws = null;
|
|
363
|
-
}
|
|
364
|
-
}
|
|
365
|
-
};
|
|
366
|
-
|
|
367
|
-
// src/providers/whisper-stt.ts
|
|
368
|
-
var OPENAI_TRANSCRIPTION_URL = "https://api.openai.com/v1/audio/transcriptions";
|
|
369
|
-
var DEFAULT_BUFFER_SIZE = 16e3 * 2;
|
|
370
|
-
function wrapPcmInWav(pcm, sampleRate = 16e3, channels = 1, bitsPerSample = 16) {
|
|
371
|
-
const dataSize = pcm.length;
|
|
372
|
-
const header = Buffer.alloc(44);
|
|
373
|
-
header.write("RIFF", 0);
|
|
374
|
-
header.writeUInt32LE(36 + dataSize, 4);
|
|
375
|
-
header.write("WAVE", 8);
|
|
376
|
-
header.write("fmt ", 12);
|
|
377
|
-
header.writeUInt32LE(16, 16);
|
|
378
|
-
header.writeUInt16LE(1, 20);
|
|
379
|
-
header.writeUInt16LE(channels, 22);
|
|
380
|
-
header.writeUInt32LE(sampleRate, 24);
|
|
381
|
-
header.writeUInt32LE(sampleRate * channels * (bitsPerSample / 8), 28);
|
|
382
|
-
header.writeUInt16LE(channels * (bitsPerSample / 8), 32);
|
|
383
|
-
header.writeUInt16LE(bitsPerSample, 34);
|
|
384
|
-
header.write("data", 36);
|
|
385
|
-
header.writeUInt32LE(dataSize, 40);
|
|
386
|
-
return Buffer.concat([header, pcm]);
|
|
250
|
+
// src/provider-factory.ts
|
|
251
|
+
async function createSTT(agent) {
|
|
252
|
+
return agent.stt ?? null;
|
|
253
|
+
}
|
|
254
|
+
async function createTTS(agent) {
|
|
255
|
+
return agent.tts ?? null;
|
|
387
256
|
}
|
|
388
|
-
var WhisperSTT = class _WhisperSTT {
|
|
389
|
-
apiKey;
|
|
390
|
-
model;
|
|
391
|
-
language;
|
|
392
|
-
bufferSize;
|
|
393
|
-
buffer = Buffer.alloc(0);
|
|
394
|
-
callbacks = [];
|
|
395
|
-
running = false;
|
|
396
|
-
pendingTranscriptions = [];
|
|
397
|
-
constructor(apiKey, model = "whisper-1", language, bufferSize = DEFAULT_BUFFER_SIZE) {
|
|
398
|
-
this.apiKey = apiKey;
|
|
399
|
-
this.model = model;
|
|
400
|
-
this.language = language;
|
|
401
|
-
this.bufferSize = bufferSize;
|
|
402
|
-
}
|
|
403
|
-
/** Factory for Twilio calls — mulaw 8 kHz is transcoded upstream, so we still receive PCM 16-bit. */
|
|
404
|
-
static forTwilio(apiKey, language = "en", model = "whisper-1") {
|
|
405
|
-
return new _WhisperSTT(apiKey, model, language);
|
|
406
|
-
}
|
|
407
|
-
async connect() {
|
|
408
|
-
this.running = true;
|
|
409
|
-
this.buffer = Buffer.alloc(0);
|
|
410
|
-
}
|
|
411
|
-
sendAudio(audio) {
|
|
412
|
-
if (!this.running) return;
|
|
413
|
-
this.buffer = Buffer.concat([this.buffer, audio]);
|
|
414
|
-
if (this.buffer.length >= this.bufferSize) {
|
|
415
|
-
const pcm = this.buffer;
|
|
416
|
-
this.buffer = Buffer.alloc(0);
|
|
417
|
-
this.trackTranscription(this.transcribeBuffer(pcm));
|
|
418
|
-
}
|
|
419
|
-
}
|
|
420
|
-
trackTranscription(promise) {
|
|
421
|
-
const wrapped = promise.finally(() => {
|
|
422
|
-
const idx = this.pendingTranscriptions.indexOf(wrapped);
|
|
423
|
-
if (idx !== -1) this.pendingTranscriptions.splice(idx, 1);
|
|
424
|
-
});
|
|
425
|
-
this.pendingTranscriptions.push(wrapped);
|
|
426
|
-
}
|
|
427
|
-
onTranscript(callback) {
|
|
428
|
-
if (this.callbacks.length >= 10) {
|
|
429
|
-
getLogger().warn("WhisperSTT: maximum of 10 onTranscript callbacks reached; replacing the last callback.");
|
|
430
|
-
this.callbacks[this.callbacks.length - 1] = callback;
|
|
431
|
-
return;
|
|
432
|
-
}
|
|
433
|
-
this.callbacks.push(callback);
|
|
434
|
-
}
|
|
435
|
-
async close() {
|
|
436
|
-
this.running = false;
|
|
437
|
-
if (this.buffer.length >= this.bufferSize / 4) {
|
|
438
|
-
const pcm = this.buffer;
|
|
439
|
-
this.buffer = Buffer.alloc(0);
|
|
440
|
-
this.trackTranscription(this.transcribeBuffer(pcm));
|
|
441
|
-
} else {
|
|
442
|
-
this.buffer = Buffer.alloc(0);
|
|
443
|
-
}
|
|
444
|
-
await Promise.allSettled(this.pendingTranscriptions);
|
|
445
|
-
this.callbacks = [];
|
|
446
|
-
}
|
|
447
|
-
// ------------------------------------------------------------------
|
|
448
|
-
// Private
|
|
449
|
-
// ------------------------------------------------------------------
|
|
450
|
-
async transcribeBuffer(pcm) {
|
|
451
|
-
const wav = wrapPcmInWav(pcm);
|
|
452
|
-
const formData = new FormData();
|
|
453
|
-
formData.append("file", new Blob([wav.buffer.slice(wav.byteOffset, wav.byteOffset + wav.byteLength)], { type: "audio/wav" }), "audio.wav");
|
|
454
|
-
formData.append("model", this.model);
|
|
455
|
-
if (this.language) {
|
|
456
|
-
formData.append("language", this.language);
|
|
457
|
-
}
|
|
458
|
-
try {
|
|
459
|
-
const resp = await fetch(OPENAI_TRANSCRIPTION_URL, {
|
|
460
|
-
method: "POST",
|
|
461
|
-
headers: { Authorization: `Bearer ${this.apiKey}` },
|
|
462
|
-
body: formData,
|
|
463
|
-
signal: AbortSignal.timeout(15e3)
|
|
464
|
-
});
|
|
465
|
-
if (!resp.ok) {
|
|
466
|
-
const body = await resp.text();
|
|
467
|
-
getLogger().error(`WhisperSTT transcription error: ${resp.status} ${body}`);
|
|
468
|
-
return;
|
|
469
|
-
}
|
|
470
|
-
const json = await resp.json();
|
|
471
|
-
const text = (json.text ?? "").trim();
|
|
472
|
-
if (!text) return;
|
|
473
|
-
const transcript = {
|
|
474
|
-
text,
|
|
475
|
-
isFinal: true,
|
|
476
|
-
confidence: 1
|
|
477
|
-
};
|
|
478
|
-
for (const cb of this.callbacks) {
|
|
479
|
-
cb(transcript);
|
|
480
|
-
}
|
|
481
|
-
} catch (err) {
|
|
482
|
-
getLogger().error(`WhisperSTT transcription error: ${String(err)}`);
|
|
483
|
-
}
|
|
484
|
-
}
|
|
485
|
-
};
|
|
486
257
|
|
|
487
258
|
// src/pricing.ts
|
|
488
259
|
var DEFAULT_PRICING = {
|
|
@@ -1855,248 +1626,120 @@ function isWebSocketUrl(url) {
|
|
|
1855
1626
|
return url.startsWith("ws://") || url.startsWith("wss://");
|
|
1856
1627
|
}
|
|
1857
1628
|
|
|
1858
|
-
// src/providers/
|
|
1859
|
-
|
|
1860
|
-
var
|
|
1861
|
-
|
|
1862
|
-
|
|
1863
|
-
|
|
1864
|
-
|
|
1865
|
-
|
|
1866
|
-
|
|
1867
|
-
|
|
1868
|
-
|
|
1869
|
-
|
|
1870
|
-
|
|
1871
|
-
|
|
1872
|
-
|
|
1873
|
-
|
|
1874
|
-
|
|
1875
|
-
|
|
1876
|
-
|
|
1877
|
-
harry: "SOYHLrjzK2X1ezoPC6cr",
|
|
1878
|
-
liam: "TX3LPaxmHKxFdv7VOQHJ",
|
|
1879
|
-
dorothy: "ThT5KcBeYPX3keUQqHPh",
|
|
1880
|
-
josh: "TxGEqnHWrfWFTfGW9XjX",
|
|
1881
|
-
arnold: "VR6AewLTigWG4xSOukaG",
|
|
1882
|
-
charlotte: "XB0fDUnXU5powFXDhCwa",
|
|
1883
|
-
matilda: "XrExE9yKIg1WjnnlVkGX",
|
|
1884
|
-
matthew: "Yko7PKHZNXotIFUBG7I9",
|
|
1885
|
-
james: "ZQe5CZNOzWyzPSCn5a3c",
|
|
1886
|
-
joseph: "Zlb1dXrM653N07WRdFW3",
|
|
1887
|
-
jeremy: "bVMeCyTHy58xNoL34h3p",
|
|
1888
|
-
michael: "flq6f7yk4E4fJM5XTYuZ",
|
|
1889
|
-
ethan: "g5CIjZEefAph4nQFvHAz",
|
|
1890
|
-
gigi: "jBpfuIE2acCO8z3wKNLl",
|
|
1891
|
-
freya: "jsCqWAovK2LkecY7zXl4",
|
|
1892
|
-
brian: "nPczCjzI2devNBz1zQrb",
|
|
1893
|
-
grace: "oWAxZDx7w5VEj9dCyTzz",
|
|
1894
|
-
daniel: "onwK4e9ZLuTAKqWW03F9",
|
|
1895
|
-
lily: "pFZP5JQG7iQjIQuC4Bku",
|
|
1896
|
-
serena: "pMsXgVXv3BLzUgSXRplE",
|
|
1897
|
-
adam: "pNInz6obpgDQGcFmaJgB",
|
|
1898
|
-
nicole: "piTKgcLEGmPE4e6mEKli",
|
|
1899
|
-
bill: "pqHfZKP75CvOlQylNhV4",
|
|
1900
|
-
jessie: "t0jbNlBVZ17f02VDIeMI",
|
|
1901
|
-
ryan: "wViXBPUzp2ZZixB1xQuM",
|
|
1902
|
-
sam: "yoZ06aMxZJJ28mfd3POQ",
|
|
1903
|
-
glinda: "z9fAnlkpzviPz146aGWa",
|
|
1904
|
-
giovanni: "zcAOhNBS3c14rBihAFp1",
|
|
1905
|
-
mimi: "zrHiDhphv9ZnVXBqCLjz",
|
|
1906
|
-
alloy: "21m00Tcm4TlvDq8ikWAM"
|
|
1907
|
-
};
|
|
1908
|
-
var VOICE_ID_PATTERN = /^[A-Za-z0-9]{20}$/;
|
|
1909
|
-
function resolveVoiceId(voice) {
|
|
1910
|
-
if (!voice) return voice;
|
|
1911
|
-
if (VOICE_ID_PATTERN.test(voice)) return voice;
|
|
1912
|
-
return ELEVENLABS_VOICE_ID_BY_NAME[voice.toLowerCase()] ?? voice;
|
|
1913
|
-
}
|
|
1914
|
-
var ElevenLabsTTS = class {
|
|
1915
|
-
constructor(apiKey, voiceId = "21m00Tcm4TlvDq8ikWAM", modelId = "eleven_turbo_v2_5", outputFormat = "pcm_16000") {
|
|
1629
|
+
// src/providers/deepgram-stt.ts
|
|
1630
|
+
import WebSocket3 from "ws";
|
|
1631
|
+
var DEEPGRAM_WS_URL = "wss://api.deepgram.com/v1/listen";
|
|
1632
|
+
var DeepgramSTT = class _DeepgramSTT {
|
|
1633
|
+
ws = null;
|
|
1634
|
+
callbacks = [];
|
|
1635
|
+
/** Request ID from Deepgram — used to query actual cost post-call. */
|
|
1636
|
+
requestId = "";
|
|
1637
|
+
apiKey;
|
|
1638
|
+
language;
|
|
1639
|
+
model;
|
|
1640
|
+
encoding;
|
|
1641
|
+
sampleRate;
|
|
1642
|
+
endpointingMs;
|
|
1643
|
+
utteranceEndMs;
|
|
1644
|
+
smartFormat;
|
|
1645
|
+
interimResults;
|
|
1646
|
+
vadEvents;
|
|
1647
|
+
constructor(apiKey, languageOrOptions, model, encoding, sampleRate, options) {
|
|
1916
1648
|
this.apiKey = apiKey;
|
|
1917
|
-
|
|
1918
|
-
this.
|
|
1919
|
-
this.
|
|
1649
|
+
const opts = typeof languageOrOptions === "object" && languageOrOptions !== null ? languageOrOptions : options ?? {};
|
|
1650
|
+
this.language = (typeof languageOrOptions === "string" ? languageOrOptions : opts.language) ?? "en";
|
|
1651
|
+
this.model = model ?? opts.model ?? "nova-3";
|
|
1652
|
+
this.encoding = encoding ?? opts.encoding ?? "linear16";
|
|
1653
|
+
this.sampleRate = sampleRate ?? opts.sampleRate ?? 16e3;
|
|
1654
|
+
this.endpointingMs = opts.endpointingMs ?? 150;
|
|
1655
|
+
this.utteranceEndMs = opts.utteranceEndMs === null ? null : opts.utteranceEndMs ?? 1e3;
|
|
1656
|
+
this.smartFormat = opts.smartFormat ?? true;
|
|
1657
|
+
this.interimResults = opts.interimResults ?? true;
|
|
1658
|
+
this.vadEvents = opts.vadEvents ?? true;
|
|
1920
1659
|
}
|
|
1921
|
-
|
|
1922
|
-
|
|
1923
|
-
|
|
1924
|
-
*
|
|
1925
|
-
* For large chunks (or when latency matters) call `synthesizeStream` instead.
|
|
1926
|
-
*/
|
|
1927
|
-
async synthesize(text) {
|
|
1928
|
-
const chunks = [];
|
|
1929
|
-
for await (const chunk of this.synthesizeStream(text)) {
|
|
1930
|
-
chunks.push(chunk);
|
|
1931
|
-
}
|
|
1932
|
-
return Buffer.concat(chunks);
|
|
1660
|
+
/** Factory for Twilio calls — mulaw 8 kHz. Forwards tuning options through. */
|
|
1661
|
+
static forTwilio(apiKey, language = "en", model = "nova-3", options = {}) {
|
|
1662
|
+
return new _DeepgramSTT(apiKey, language, model, "mulaw", 8e3, options);
|
|
1933
1663
|
}
|
|
1934
|
-
|
|
1935
|
-
|
|
1936
|
-
|
|
1937
|
-
|
|
1938
|
-
|
|
1939
|
-
|
|
1940
|
-
|
|
1941
|
-
|
|
1942
|
-
|
|
1943
|
-
|
|
1944
|
-
|
|
1945
|
-
|
|
1946
|
-
"Content-Type": "application/json"
|
|
1947
|
-
},
|
|
1948
|
-
body: JSON.stringify({ text, model_id: this.modelId }),
|
|
1949
|
-
signal: AbortSignal.timeout(3e4)
|
|
1664
|
+
async connect() {
|
|
1665
|
+
const params = new URLSearchParams({
|
|
1666
|
+
model: this.model,
|
|
1667
|
+
language: this.language,
|
|
1668
|
+
encoding: this.encoding,
|
|
1669
|
+
sample_rate: String(this.sampleRate),
|
|
1670
|
+
channels: "1",
|
|
1671
|
+
interim_results: this.interimResults ? "true" : "false",
|
|
1672
|
+
endpointing: String(this.endpointingMs),
|
|
1673
|
+
smart_format: this.smartFormat ? "true" : "false",
|
|
1674
|
+
vad_events: this.vadEvents ? "true" : "false",
|
|
1675
|
+
no_delay: "true"
|
|
1950
1676
|
});
|
|
1951
|
-
if (
|
|
1952
|
-
|
|
1953
|
-
throw new Error(`ElevenLabs TTS error ${response.status}: ${body}`);
|
|
1954
|
-
}
|
|
1955
|
-
if (!response.body) {
|
|
1956
|
-
throw new Error("ElevenLabs TTS: no response body");
|
|
1677
|
+
if (this.utteranceEndMs !== null) {
|
|
1678
|
+
params.set("utterance_end_ms", String(Math.max(this.utteranceEndMs, 1e3)));
|
|
1957
1679
|
}
|
|
1958
|
-
const
|
|
1959
|
-
|
|
1960
|
-
|
|
1961
|
-
|
|
1962
|
-
|
|
1963
|
-
|
|
1964
|
-
|
|
1965
|
-
|
|
1966
|
-
|
|
1967
|
-
|
|
1968
|
-
|
|
1680
|
+
const url = `${DEEPGRAM_WS_URL}?${params.toString()}`;
|
|
1681
|
+
this.ws = new WebSocket3(url, {
|
|
1682
|
+
headers: { Authorization: `Token ${this.apiKey}` }
|
|
1683
|
+
});
|
|
1684
|
+
await new Promise((resolve, reject) => {
|
|
1685
|
+
const timer = setTimeout(() => reject(new Error("Deepgram connect timeout")), 1e4);
|
|
1686
|
+
this.ws.once("open", () => {
|
|
1687
|
+
clearTimeout(timer);
|
|
1688
|
+
resolve();
|
|
1689
|
+
});
|
|
1690
|
+
this.ws.once("error", (err) => {
|
|
1691
|
+
clearTimeout(timer);
|
|
1692
|
+
reject(err);
|
|
1969
1693
|
});
|
|
1970
|
-
reader.releaseLock();
|
|
1971
|
-
}
|
|
1972
|
-
}
|
|
1973
|
-
};
|
|
1974
|
-
|
|
1975
|
-
// src/providers/openai-tts.ts
|
|
1976
|
-
var OPENAI_TTS_URL = "https://api.openai.com/v1/audio/speech";
|
|
1977
|
-
var OpenAITTS = class _OpenAITTS {
|
|
1978
|
-
constructor(apiKey, voice = "alloy", model = "tts-1") {
|
|
1979
|
-
this.apiKey = apiKey;
|
|
1980
|
-
this.voice = voice;
|
|
1981
|
-
this.model = model;
|
|
1982
|
-
}
|
|
1983
|
-
/**
|
|
1984
|
-
* Synthesise text to speech and return the full audio as a single Buffer.
|
|
1985
|
-
*
|
|
1986
|
-
* For large chunks (or when latency matters) call `synthesizeStream` instead.
|
|
1987
|
-
*/
|
|
1988
|
-
async synthesize(text) {
|
|
1989
|
-
const chunks = [];
|
|
1990
|
-
for await (const chunk of this.synthesizeStream(text)) {
|
|
1991
|
-
chunks.push(chunk);
|
|
1992
|
-
}
|
|
1993
|
-
return Buffer.concat(chunks);
|
|
1994
|
-
}
|
|
1995
|
-
/**
|
|
1996
|
-
* Synthesise text and yield audio chunks as they arrive (streaming).
|
|
1997
|
-
*
|
|
1998
|
-
* OpenAI returns 24 kHz PCM16; each chunk is resampled to 16 kHz before
|
|
1999
|
-
* yielding so the output is ready for telephony pipelines.
|
|
2000
|
-
*
|
|
2001
|
-
* The resampler carries state (buffered samples + odd trailing byte)
|
|
2002
|
-
* between chunks — without that state cross-chunk sample alignment drifts
|
|
2003
|
-
* and the caller hears pops / dropped audio (BUG #23, mirror of the
|
|
2004
|
-
* Python `audioop.ratecv` fix).
|
|
2005
|
-
*/
|
|
2006
|
-
async *synthesizeStream(text) {
|
|
2007
|
-
const response = await fetch(OPENAI_TTS_URL, {
|
|
2008
|
-
method: "POST",
|
|
2009
|
-
headers: {
|
|
2010
|
-
"Authorization": `Bearer ${this.apiKey}`,
|
|
2011
|
-
"Content-Type": "application/json"
|
|
2012
|
-
},
|
|
2013
|
-
body: JSON.stringify({
|
|
2014
|
-
model: this.model,
|
|
2015
|
-
input: text,
|
|
2016
|
-
voice: this.voice,
|
|
2017
|
-
response_format: "pcm"
|
|
2018
|
-
}),
|
|
2019
|
-
signal: AbortSignal.timeout(3e4)
|
|
2020
1694
|
});
|
|
2021
|
-
|
|
2022
|
-
|
|
2023
|
-
|
|
2024
|
-
|
|
2025
|
-
|
|
2026
|
-
|
|
2027
|
-
}
|
|
2028
|
-
const ctx = { carryByte: null, leftover: [] };
|
|
2029
|
-
const reader = response.body.getReader();
|
|
2030
|
-
try {
|
|
2031
|
-
while (true) {
|
|
2032
|
-
const { done, value } = await reader.read();
|
|
2033
|
-
if (done) break;
|
|
2034
|
-
if (value && value.length > 0) {
|
|
2035
|
-
const out = _OpenAITTS.resampleStreaming(Buffer.from(value), ctx);
|
|
2036
|
-
if (out.length > 0) yield out;
|
|
2037
|
-
}
|
|
1695
|
+
this.ws.on("message", (raw) => {
|
|
1696
|
+
let data;
|
|
1697
|
+
try {
|
|
1698
|
+
data = JSON.parse(raw.toString());
|
|
1699
|
+
} catch {
|
|
1700
|
+
return;
|
|
2038
1701
|
}
|
|
2039
|
-
if (
|
|
2040
|
-
|
|
2041
|
-
|
|
2042
|
-
tail.writeInt16LE(ctx.leftover[i], i * 2);
|
|
2043
|
-
}
|
|
2044
|
-
yield tail;
|
|
1702
|
+
if (data.type === "Metadata" && data.request_id) {
|
|
1703
|
+
this.requestId = data.request_id;
|
|
1704
|
+
return;
|
|
2045
1705
|
}
|
|
2046
|
-
|
|
2047
|
-
|
|
2048
|
-
|
|
2049
|
-
|
|
2050
|
-
|
|
1706
|
+
if (data.type !== "Results") return;
|
|
1707
|
+
const alternatives = data.channel?.alternatives ?? [];
|
|
1708
|
+
if (!alternatives.length) return;
|
|
1709
|
+
const best = alternatives[0];
|
|
1710
|
+
const text = (best.transcript ?? "").trim();
|
|
1711
|
+
if (!text) return;
|
|
1712
|
+
const transcript = {
|
|
1713
|
+
text,
|
|
1714
|
+
isFinal: Boolean(data.is_final) || Boolean(data.speech_final),
|
|
1715
|
+
confidence: best.confidence ?? 0
|
|
1716
|
+
};
|
|
1717
|
+
for (const cb of this.callbacks) {
|
|
1718
|
+
cb(transcript);
|
|
1719
|
+
}
|
|
1720
|
+
});
|
|
2051
1721
|
}
|
|
2052
|
-
|
|
2053
|
-
|
|
2054
|
-
|
|
2055
|
-
|
|
2056
|
-
|
|
2057
|
-
|
|
2058
|
-
|
|
2059
|
-
|
|
2060
|
-
|
|
2061
|
-
} else {
|
|
2062
|
-
buf = audio;
|
|
2063
|
-
}
|
|
2064
|
-
if (buf.length % 2 === 1) {
|
|
2065
|
-
ctx.carryByte = buf[buf.length - 1];
|
|
2066
|
-
buf = buf.subarray(0, buf.length - 1);
|
|
2067
|
-
}
|
|
2068
|
-
if (buf.length === 0 && ctx.leftover.length === 0) {
|
|
2069
|
-
return Buffer.alloc(0);
|
|
2070
|
-
}
|
|
2071
|
-
const sampleCount = buf.length / 2;
|
|
2072
|
-
const samples = ctx.leftover.slice();
|
|
2073
|
-
for (let i2 = 0; i2 < sampleCount; i2++) {
|
|
2074
|
-
samples.push(buf.readInt16LE(i2 * 2));
|
|
2075
|
-
}
|
|
2076
|
-
const out = [];
|
|
2077
|
-
let i = 0;
|
|
2078
|
-
while (i + 2 < samples.length) {
|
|
2079
|
-
out.push(samples[i]);
|
|
2080
|
-
out.push(Math.trunc((samples[i + 1] + samples[i + 2]) / 2));
|
|
2081
|
-
i += 3;
|
|
2082
|
-
}
|
|
2083
|
-
ctx.leftover = samples.slice(i);
|
|
2084
|
-
const buffer = Buffer.alloc(out.length * 2);
|
|
2085
|
-
for (let j = 0; j < out.length; j++) {
|
|
2086
|
-
buffer.writeInt16LE(out[j], j * 2);
|
|
1722
|
+
sendAudio(audio) {
|
|
1723
|
+
if (!this.ws || this.ws.readyState !== WebSocket3.OPEN) return;
|
|
1724
|
+
this.ws.send(audio);
|
|
1725
|
+
}
|
|
1726
|
+
onTranscript(callback) {
|
|
1727
|
+
if (this.callbacks.length >= 10) {
|
|
1728
|
+
getLogger().warn("DeepgramSTT: maximum of 10 onTranscript callbacks reached; replacing the last callback.");
|
|
1729
|
+
this.callbacks[this.callbacks.length - 1] = callback;
|
|
1730
|
+
return;
|
|
2087
1731
|
}
|
|
2088
|
-
|
|
1732
|
+
this.callbacks.push(callback);
|
|
2089
1733
|
}
|
|
2090
|
-
|
|
2091
|
-
|
|
2092
|
-
|
|
2093
|
-
|
|
2094
|
-
|
|
2095
|
-
|
|
2096
|
-
|
|
2097
|
-
|
|
1734
|
+
close() {
|
|
1735
|
+
if (this.ws) {
|
|
1736
|
+
try {
|
|
1737
|
+
this.ws.send(JSON.stringify({ type: "CloseStream" }));
|
|
1738
|
+
} catch {
|
|
1739
|
+
}
|
|
1740
|
+
this.ws.close();
|
|
1741
|
+
this.ws = null;
|
|
2098
1742
|
}
|
|
2099
|
-
return Buffer.concat([out, tail]);
|
|
2100
1743
|
}
|
|
2101
1744
|
};
|
|
2102
1745
|
|
|
@@ -2697,8 +2340,8 @@ var StreamHandler = class {
|
|
|
2697
2340
|
this.caller = caller;
|
|
2698
2341
|
this.callee = callee;
|
|
2699
2342
|
this.history = createHistoryManager(200);
|
|
2700
|
-
const sttProviderName = deps.agent.stt
|
|
2701
|
-
const ttsProviderName = deps.agent.tts
|
|
2343
|
+
const sttProviderName = deps.agent.stt ? deps.agent.stt.constructor?.name ?? "custom" : void 0;
|
|
2344
|
+
const ttsProviderName = deps.agent.tts ? deps.agent.tts.constructor?.name ?? "custom" : void 0;
|
|
2702
2345
|
const providerMode = deps.agent.provider ?? "openai_realtime";
|
|
2703
2346
|
this.metricsAcc = new CallMetricsAccumulator({
|
|
2704
2347
|
callId: "",
|
|
@@ -2888,17 +2531,8 @@ var StreamHandler = class {
|
|
|
2888
2531
|
// ---------------------------------------------------------------------------
|
|
2889
2532
|
async initPipeline(resolvedPrompt) {
|
|
2890
2533
|
const label = this.deps.bridge.label;
|
|
2891
|
-
this.stt = this.deps.bridge.createStt(this.deps.agent);
|
|
2892
|
-
|
|
2893
|
-
if (this.deps.agent.tts.provider === "elevenlabs") {
|
|
2894
|
-
this.tts = new ElevenLabsTTS(this.deps.agent.tts.apiKey, this.deps.agent.tts.voice ?? "21m00Tcm4TlvDq8ikWAM");
|
|
2895
|
-
}
|
|
2896
|
-
if (this.deps.agent.tts.provider === "openai") {
|
|
2897
|
-
this.tts = new OpenAITTS(this.deps.agent.tts.apiKey, this.deps.agent.tts.voice ?? "alloy");
|
|
2898
|
-
}
|
|
2899
|
-
} else if (this.deps.agent.elevenlabsKey) {
|
|
2900
|
-
this.tts = new ElevenLabsTTS(this.deps.agent.elevenlabsKey, this.deps.agent.voice || "rachel");
|
|
2901
|
-
}
|
|
2534
|
+
this.stt = await this.deps.bridge.createStt(this.deps.agent);
|
|
2535
|
+
this.tts = await createTTS(this.deps.agent);
|
|
2902
2536
|
if (!this.stt) {
|
|
2903
2537
|
getLogger().info(`Pipeline mode (${label}): no STT configured`);
|
|
2904
2538
|
}
|
|
@@ -3416,10 +3050,11 @@ var StreamHandler = class {
|
|
|
3416
3050
|
this.maxDurationTimer = null;
|
|
3417
3051
|
}
|
|
3418
3052
|
await this.deps.bridge.queryTelephonyCost(this.metricsAcc, this.callId);
|
|
3419
|
-
|
|
3420
|
-
|
|
3421
|
-
|
|
3422
|
-
|
|
3053
|
+
if (this.stt instanceof DeepgramSTT && this.stt.requestId) {
|
|
3054
|
+
const dgKey = this.stt.apiKey;
|
|
3055
|
+
if (dgKey) {
|
|
3056
|
+
await queryDeepgramCost(this.metricsAcc, dgKey, this.stt.requestId);
|
|
3057
|
+
}
|
|
3423
3058
|
}
|
|
3424
3059
|
const finalMetrics = this.metricsAcc.endCall();
|
|
3425
3060
|
const callEndData = {
|
|
@@ -3576,11 +3211,16 @@ function resolveVariables(template, variables) {
|
|
|
3576
3211
|
return result;
|
|
3577
3212
|
}
|
|
3578
3213
|
function buildAIAdapter(config, agent, resolvedPrompt) {
|
|
3214
|
+
const engine = agent.engine;
|
|
3579
3215
|
if (agent.provider === "elevenlabs_convai") {
|
|
3580
|
-
|
|
3216
|
+
if (!engine || engine.kind !== "elevenlabs_convai") {
|
|
3217
|
+
throw new Error(
|
|
3218
|
+
"ElevenLabs ConvAI mode requires `agent.engine = new ElevenLabsConvAI({...})`."
|
|
3219
|
+
);
|
|
3220
|
+
}
|
|
3581
3221
|
return new ElevenLabsConvAIAdapter(
|
|
3582
|
-
|
|
3583
|
-
|
|
3222
|
+
engine.apiKey,
|
|
3223
|
+
engine.agentId,
|
|
3584
3224
|
agent.voice ?? "21m00Tcm4TlvDq8ikWAM",
|
|
3585
3225
|
"eleven_turbo_v2_5",
|
|
3586
3226
|
agent.language ?? "en",
|
|
@@ -3593,33 +3233,15 @@ function buildAIAdapter(config, agent, resolvedPrompt) {
|
|
|
3593
3233
|
parameters: t.parameters
|
|
3594
3234
|
})) ?? [];
|
|
3595
3235
|
const tools = [...agentTools, TRANSFER_CALL_TOOL, END_CALL_TOOL];
|
|
3236
|
+
const openaiKey = engine && engine.kind === "openai_realtime" ? engine.apiKey : config.openaiKey ?? "";
|
|
3596
3237
|
return new OpenAIRealtimeAdapter(
|
|
3597
|
-
|
|
3238
|
+
openaiKey,
|
|
3598
3239
|
agent.model,
|
|
3599
3240
|
agent.voice,
|
|
3600
3241
|
resolvedPrompt ?? agent.systemPrompt,
|
|
3601
3242
|
tools
|
|
3602
3243
|
);
|
|
3603
3244
|
}
|
|
3604
|
-
function extractDeepgramOptions(options) {
|
|
3605
|
-
if (!options) return {};
|
|
3606
|
-
const get = (snake, camel) => options[snake] ?? options[camel];
|
|
3607
|
-
const out = {};
|
|
3608
|
-
const model = get("model", "model");
|
|
3609
|
-
if (typeof model === "string") out.model = model;
|
|
3610
|
-
const endpointing = get("endpointing_ms", "endpointingMs");
|
|
3611
|
-
if (typeof endpointing === "number") out.endpointingMs = endpointing;
|
|
3612
|
-
const utteranceEnd = get("utterance_end_ms", "utteranceEndMs");
|
|
3613
|
-
if (utteranceEnd === null) out.utteranceEndMs = null;
|
|
3614
|
-
else if (typeof utteranceEnd === "number") out.utteranceEndMs = utteranceEnd;
|
|
3615
|
-
const smart = get("smart_format", "smartFormat");
|
|
3616
|
-
if (typeof smart === "boolean") out.smartFormat = smart;
|
|
3617
|
-
const interim = get("interim_results", "interimResults");
|
|
3618
|
-
if (typeof interim === "boolean") out.interimResults = interim;
|
|
3619
|
-
const vad = get("vad_events", "vadEvents");
|
|
3620
|
-
if (typeof vad === "boolean") out.vadEvents = vad;
|
|
3621
|
-
return out;
|
|
3622
|
-
}
|
|
3623
3245
|
var TwilioBridge = class {
|
|
3624
3246
|
constructor(config) {
|
|
3625
3247
|
this.config = config;
|
|
@@ -3671,24 +3293,7 @@ var TwilioBridge = class {
|
|
|
3671
3293
|
}
|
|
3672
3294
|
}
|
|
3673
3295
|
createStt(agent) {
|
|
3674
|
-
|
|
3675
|
-
if (agent.stt) {
|
|
3676
|
-
if (agent.stt.provider === "deepgram") {
|
|
3677
|
-
const dgOptions = extractDeepgramOptions(agent.stt.options);
|
|
3678
|
-
if (isPipeline) {
|
|
3679
|
-
return new DeepgramSTT(agent.stt.apiKey, agent.stt.language ?? "en", dgOptions.model, "linear16", 16e3, dgOptions);
|
|
3680
|
-
}
|
|
3681
|
-
return DeepgramSTT.forTwilio(agent.stt.apiKey, agent.stt.language ?? "en", dgOptions.model, dgOptions);
|
|
3682
|
-
} else if (agent.stt.provider === "whisper") {
|
|
3683
|
-
return isPipeline ? new WhisperSTT(agent.stt.apiKey, "whisper-1", agent.stt.language ?? "en") : WhisperSTT.forTwilio(agent.stt.apiKey, agent.stt.language ?? "en");
|
|
3684
|
-
}
|
|
3685
|
-
} else if (agent.deepgramKey) {
|
|
3686
|
-
if (isPipeline) {
|
|
3687
|
-
return new DeepgramSTT(agent.deepgramKey, agent.language ?? "en", "nova-3", "linear16", 16e3);
|
|
3688
|
-
}
|
|
3689
|
-
return DeepgramSTT.forTwilio(agent.deepgramKey, agent.language ?? "en");
|
|
3690
|
-
}
|
|
3691
|
-
return null;
|
|
3296
|
+
return createSTT(agent);
|
|
3692
3297
|
}
|
|
3693
3298
|
async queryTelephonyCost(metricsAcc, callId) {
|
|
3694
3299
|
if (this.config.twilioSid && this.config.twilioToken && callId) {
|
|
@@ -3835,24 +3440,7 @@ var TelnyxBridge = class {
|
|
|
3835
3440
|
ws.close();
|
|
3836
3441
|
}
|
|
3837
3442
|
createStt(agent) {
|
|
3838
|
-
|
|
3839
|
-
if (agent.stt.provider === "deepgram") {
|
|
3840
|
-
const dgOptions = extractDeepgramOptions(agent.stt.options);
|
|
3841
|
-
return new DeepgramSTT(
|
|
3842
|
-
agent.stt.apiKey,
|
|
3843
|
-
agent.stt.language ?? "en",
|
|
3844
|
-
dgOptions.model ?? "nova-3",
|
|
3845
|
-
"linear16",
|
|
3846
|
-
16e3,
|
|
3847
|
-
dgOptions
|
|
3848
|
-
);
|
|
3849
|
-
} else if (agent.stt.provider === "whisper") {
|
|
3850
|
-
return new WhisperSTT(agent.stt.apiKey, "whisper-1", agent.stt.language ?? "en");
|
|
3851
|
-
}
|
|
3852
|
-
} else if (agent.deepgramKey) {
|
|
3853
|
-
return new DeepgramSTT(agent.deepgramKey, agent.language ?? "en", "nova-3", "linear16", 16e3);
|
|
3854
|
-
}
|
|
3855
|
-
return null;
|
|
3443
|
+
return createSTT(agent);
|
|
3856
3444
|
}
|
|
3857
3445
|
async queryTelephonyCost(metricsAcc, callId) {
|
|
3858
3446
|
if (this.config.telnyxKey && callId) {
|
|
@@ -4790,8 +4378,6 @@ var TestSession = class {
|
|
|
4790
4378
|
export {
|
|
4791
4379
|
OpenAIRealtimeAdapter,
|
|
4792
4380
|
ElevenLabsConvAIAdapter,
|
|
4793
|
-
DeepgramSTT,
|
|
4794
|
-
WhisperSTT,
|
|
4795
4381
|
DEFAULT_PRICING,
|
|
4796
4382
|
mergePricing,
|
|
4797
4383
|
calculateSttCost,
|
|
@@ -4807,8 +4393,7 @@ export {
|
|
|
4807
4393
|
RemoteMessageHandler,
|
|
4808
4394
|
isRemoteUrl,
|
|
4809
4395
|
isWebSocketUrl,
|
|
4810
|
-
|
|
4811
|
-
OpenAITTS,
|
|
4396
|
+
DeepgramSTT,
|
|
4812
4397
|
CallMetricsAccumulator,
|
|
4813
4398
|
mulawToPcm16,
|
|
4814
4399
|
pcm16ToMulaw,
|