getpatter 0.4.4 → 0.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +156 -158
- package/dist/carrier-config-CPG5CROM.mjs +84 -0
- package/dist/{chunk-JO5C35FM.mjs → chunk-AKQFOFLG.mjs} +1 -1
- package/dist/{chunk-O3RQG3NL.mjs → chunk-B6C3KIBG.mjs} +177 -567
- package/dist/index.d.mts +1163 -377
- package/dist/index.d.ts +1163 -377
- package/dist/index.js +2028 -1835
- package/dist/index.mjs +1644 -329
- package/dist/{test-mode-ASSLSQU2.mjs → test-mode-JZMYE5HY.mjs} +1 -1
- package/dist/{tunnel-BL7A7GXW.mjs → tunnel-O7ICMSTP.mjs} +1 -1
- package/package.json +1 -1
- package/dist/lib-4WCAS54J.mjs +0 -830
|
@@ -247,242 +247,13 @@ var ElevenLabsConvAIAdapter = class {
|
|
|
247
247
|
}
|
|
248
248
|
};
|
|
249
249
|
|
|
250
|
-
// src/
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
/** Request ID from Deepgram — used to query actual cost post-call. */
|
|
257
|
-
requestId = "";
|
|
258
|
-
apiKey;
|
|
259
|
-
language;
|
|
260
|
-
model;
|
|
261
|
-
encoding;
|
|
262
|
-
sampleRate;
|
|
263
|
-
endpointingMs;
|
|
264
|
-
utteranceEndMs;
|
|
265
|
-
smartFormat;
|
|
266
|
-
interimResults;
|
|
267
|
-
vadEvents;
|
|
268
|
-
constructor(apiKey, languageOrOptions, model, encoding, sampleRate, options) {
|
|
269
|
-
this.apiKey = apiKey;
|
|
270
|
-
const opts = typeof languageOrOptions === "object" && languageOrOptions !== null ? languageOrOptions : options ?? {};
|
|
271
|
-
this.language = (typeof languageOrOptions === "string" ? languageOrOptions : opts.language) ?? "en";
|
|
272
|
-
this.model = model ?? opts.model ?? "nova-3";
|
|
273
|
-
this.encoding = encoding ?? opts.encoding ?? "linear16";
|
|
274
|
-
this.sampleRate = sampleRate ?? opts.sampleRate ?? 16e3;
|
|
275
|
-
this.endpointingMs = opts.endpointingMs ?? 150;
|
|
276
|
-
this.utteranceEndMs = opts.utteranceEndMs === null ? null : opts.utteranceEndMs ?? 1e3;
|
|
277
|
-
this.smartFormat = opts.smartFormat ?? true;
|
|
278
|
-
this.interimResults = opts.interimResults ?? true;
|
|
279
|
-
this.vadEvents = opts.vadEvents ?? true;
|
|
280
|
-
}
|
|
281
|
-
/** Factory for Twilio calls — mulaw 8 kHz. Forwards tuning options through. */
|
|
282
|
-
static forTwilio(apiKey, language = "en", model = "nova-3", options = {}) {
|
|
283
|
-
return new _DeepgramSTT(apiKey, language, model, "mulaw", 8e3, options);
|
|
284
|
-
}
|
|
285
|
-
async connect() {
|
|
286
|
-
const params = new URLSearchParams({
|
|
287
|
-
model: this.model,
|
|
288
|
-
language: this.language,
|
|
289
|
-
encoding: this.encoding,
|
|
290
|
-
sample_rate: String(this.sampleRate),
|
|
291
|
-
channels: "1",
|
|
292
|
-
interim_results: this.interimResults ? "true" : "false",
|
|
293
|
-
endpointing: String(this.endpointingMs),
|
|
294
|
-
smart_format: this.smartFormat ? "true" : "false",
|
|
295
|
-
vad_events: this.vadEvents ? "true" : "false",
|
|
296
|
-
no_delay: "true"
|
|
297
|
-
});
|
|
298
|
-
if (this.utteranceEndMs !== null) {
|
|
299
|
-
params.set("utterance_end_ms", String(Math.max(this.utteranceEndMs, 1e3)));
|
|
300
|
-
}
|
|
301
|
-
const url = `${DEEPGRAM_WS_URL}?${params.toString()}`;
|
|
302
|
-
this.ws = new WebSocket3(url, {
|
|
303
|
-
headers: { Authorization: `Token ${this.apiKey}` }
|
|
304
|
-
});
|
|
305
|
-
await new Promise((resolve, reject) => {
|
|
306
|
-
const timer = setTimeout(() => reject(new Error("Deepgram connect timeout")), 1e4);
|
|
307
|
-
this.ws.once("open", () => {
|
|
308
|
-
clearTimeout(timer);
|
|
309
|
-
resolve();
|
|
310
|
-
});
|
|
311
|
-
this.ws.once("error", (err) => {
|
|
312
|
-
clearTimeout(timer);
|
|
313
|
-
reject(err);
|
|
314
|
-
});
|
|
315
|
-
});
|
|
316
|
-
this.ws.on("message", (raw) => {
|
|
317
|
-
let data;
|
|
318
|
-
try {
|
|
319
|
-
data = JSON.parse(raw.toString());
|
|
320
|
-
} catch {
|
|
321
|
-
return;
|
|
322
|
-
}
|
|
323
|
-
if (data.type === "Metadata" && data.request_id) {
|
|
324
|
-
this.requestId = data.request_id;
|
|
325
|
-
return;
|
|
326
|
-
}
|
|
327
|
-
if (data.type !== "Results") return;
|
|
328
|
-
const alternatives = data.channel?.alternatives ?? [];
|
|
329
|
-
if (!alternatives.length) return;
|
|
330
|
-
const best = alternatives[0];
|
|
331
|
-
const text = (best.transcript ?? "").trim();
|
|
332
|
-
if (!text) return;
|
|
333
|
-
const transcript = {
|
|
334
|
-
text,
|
|
335
|
-
isFinal: Boolean(data.is_final) || Boolean(data.speech_final),
|
|
336
|
-
confidence: best.confidence ?? 0
|
|
337
|
-
};
|
|
338
|
-
for (const cb of this.callbacks) {
|
|
339
|
-
cb(transcript);
|
|
340
|
-
}
|
|
341
|
-
});
|
|
342
|
-
}
|
|
343
|
-
sendAudio(audio) {
|
|
344
|
-
if (!this.ws || this.ws.readyState !== WebSocket3.OPEN) return;
|
|
345
|
-
this.ws.send(audio);
|
|
346
|
-
}
|
|
347
|
-
onTranscript(callback) {
|
|
348
|
-
if (this.callbacks.length >= 10) {
|
|
349
|
-
getLogger().warn("DeepgramSTT: maximum of 10 onTranscript callbacks reached; replacing the last callback.");
|
|
350
|
-
this.callbacks[this.callbacks.length - 1] = callback;
|
|
351
|
-
return;
|
|
352
|
-
}
|
|
353
|
-
this.callbacks.push(callback);
|
|
354
|
-
}
|
|
355
|
-
close() {
|
|
356
|
-
if (this.ws) {
|
|
357
|
-
try {
|
|
358
|
-
this.ws.send(JSON.stringify({ type: "CloseStream" }));
|
|
359
|
-
} catch {
|
|
360
|
-
}
|
|
361
|
-
this.ws.close();
|
|
362
|
-
this.ws = null;
|
|
363
|
-
}
|
|
364
|
-
}
|
|
365
|
-
};
|
|
366
|
-
|
|
367
|
-
// src/providers/whisper-stt.ts
|
|
368
|
-
var OPENAI_TRANSCRIPTION_URL = "https://api.openai.com/v1/audio/transcriptions";
|
|
369
|
-
var DEFAULT_BUFFER_SIZE = 16e3 * 2;
|
|
370
|
-
function wrapPcmInWav(pcm, sampleRate = 16e3, channels = 1, bitsPerSample = 16) {
|
|
371
|
-
const dataSize = pcm.length;
|
|
372
|
-
const header = Buffer.alloc(44);
|
|
373
|
-
header.write("RIFF", 0);
|
|
374
|
-
header.writeUInt32LE(36 + dataSize, 4);
|
|
375
|
-
header.write("WAVE", 8);
|
|
376
|
-
header.write("fmt ", 12);
|
|
377
|
-
header.writeUInt32LE(16, 16);
|
|
378
|
-
header.writeUInt16LE(1, 20);
|
|
379
|
-
header.writeUInt16LE(channels, 22);
|
|
380
|
-
header.writeUInt32LE(sampleRate, 24);
|
|
381
|
-
header.writeUInt32LE(sampleRate * channels * (bitsPerSample / 8), 28);
|
|
382
|
-
header.writeUInt16LE(channels * (bitsPerSample / 8), 32);
|
|
383
|
-
header.writeUInt16LE(bitsPerSample, 34);
|
|
384
|
-
header.write("data", 36);
|
|
385
|
-
header.writeUInt32LE(dataSize, 40);
|
|
386
|
-
return Buffer.concat([header, pcm]);
|
|
250
|
+
// src/provider-factory.ts
|
|
251
|
+
async function createSTT(agent) {
|
|
252
|
+
return agent.stt ?? null;
|
|
253
|
+
}
|
|
254
|
+
async function createTTS(agent) {
|
|
255
|
+
return agent.tts ?? null;
|
|
387
256
|
}
|
|
388
|
-
var WhisperSTT = class _WhisperSTT {
|
|
389
|
-
apiKey;
|
|
390
|
-
model;
|
|
391
|
-
language;
|
|
392
|
-
bufferSize;
|
|
393
|
-
buffer = Buffer.alloc(0);
|
|
394
|
-
callbacks = [];
|
|
395
|
-
running = false;
|
|
396
|
-
pendingTranscriptions = [];
|
|
397
|
-
constructor(apiKey, model = "whisper-1", language, bufferSize = DEFAULT_BUFFER_SIZE) {
|
|
398
|
-
this.apiKey = apiKey;
|
|
399
|
-
this.model = model;
|
|
400
|
-
this.language = language;
|
|
401
|
-
this.bufferSize = bufferSize;
|
|
402
|
-
}
|
|
403
|
-
/** Factory for Twilio calls — mulaw 8 kHz is transcoded upstream, so we still receive PCM 16-bit. */
|
|
404
|
-
static forTwilio(apiKey, language = "en", model = "whisper-1") {
|
|
405
|
-
return new _WhisperSTT(apiKey, model, language);
|
|
406
|
-
}
|
|
407
|
-
async connect() {
|
|
408
|
-
this.running = true;
|
|
409
|
-
this.buffer = Buffer.alloc(0);
|
|
410
|
-
}
|
|
411
|
-
sendAudio(audio) {
|
|
412
|
-
if (!this.running) return;
|
|
413
|
-
this.buffer = Buffer.concat([this.buffer, audio]);
|
|
414
|
-
if (this.buffer.length >= this.bufferSize) {
|
|
415
|
-
const pcm = this.buffer;
|
|
416
|
-
this.buffer = Buffer.alloc(0);
|
|
417
|
-
this.trackTranscription(this.transcribeBuffer(pcm));
|
|
418
|
-
}
|
|
419
|
-
}
|
|
420
|
-
trackTranscription(promise) {
|
|
421
|
-
const wrapped = promise.finally(() => {
|
|
422
|
-
const idx = this.pendingTranscriptions.indexOf(wrapped);
|
|
423
|
-
if (idx !== -1) this.pendingTranscriptions.splice(idx, 1);
|
|
424
|
-
});
|
|
425
|
-
this.pendingTranscriptions.push(wrapped);
|
|
426
|
-
}
|
|
427
|
-
onTranscript(callback) {
|
|
428
|
-
if (this.callbacks.length >= 10) {
|
|
429
|
-
getLogger().warn("WhisperSTT: maximum of 10 onTranscript callbacks reached; replacing the last callback.");
|
|
430
|
-
this.callbacks[this.callbacks.length - 1] = callback;
|
|
431
|
-
return;
|
|
432
|
-
}
|
|
433
|
-
this.callbacks.push(callback);
|
|
434
|
-
}
|
|
435
|
-
async close() {
|
|
436
|
-
this.running = false;
|
|
437
|
-
if (this.buffer.length >= this.bufferSize / 4) {
|
|
438
|
-
const pcm = this.buffer;
|
|
439
|
-
this.buffer = Buffer.alloc(0);
|
|
440
|
-
this.trackTranscription(this.transcribeBuffer(pcm));
|
|
441
|
-
} else {
|
|
442
|
-
this.buffer = Buffer.alloc(0);
|
|
443
|
-
}
|
|
444
|
-
await Promise.allSettled(this.pendingTranscriptions);
|
|
445
|
-
this.callbacks = [];
|
|
446
|
-
}
|
|
447
|
-
// ------------------------------------------------------------------
|
|
448
|
-
// Private
|
|
449
|
-
// ------------------------------------------------------------------
|
|
450
|
-
async transcribeBuffer(pcm) {
|
|
451
|
-
const wav = wrapPcmInWav(pcm);
|
|
452
|
-
const formData = new FormData();
|
|
453
|
-
formData.append("file", new Blob([wav.buffer.slice(wav.byteOffset, wav.byteOffset + wav.byteLength)], { type: "audio/wav" }), "audio.wav");
|
|
454
|
-
formData.append("model", this.model);
|
|
455
|
-
if (this.language) {
|
|
456
|
-
formData.append("language", this.language);
|
|
457
|
-
}
|
|
458
|
-
try {
|
|
459
|
-
const resp = await fetch(OPENAI_TRANSCRIPTION_URL, {
|
|
460
|
-
method: "POST",
|
|
461
|
-
headers: { Authorization: `Bearer ${this.apiKey}` },
|
|
462
|
-
body: formData,
|
|
463
|
-
signal: AbortSignal.timeout(15e3)
|
|
464
|
-
});
|
|
465
|
-
if (!resp.ok) {
|
|
466
|
-
const body = await resp.text();
|
|
467
|
-
getLogger().error(`WhisperSTT transcription error: ${resp.status} ${body}`);
|
|
468
|
-
return;
|
|
469
|
-
}
|
|
470
|
-
const json = await resp.json();
|
|
471
|
-
const text = (json.text ?? "").trim();
|
|
472
|
-
if (!text) return;
|
|
473
|
-
const transcript = {
|
|
474
|
-
text,
|
|
475
|
-
isFinal: true,
|
|
476
|
-
confidence: 1
|
|
477
|
-
};
|
|
478
|
-
for (const cb of this.callbacks) {
|
|
479
|
-
cb(transcript);
|
|
480
|
-
}
|
|
481
|
-
} catch (err) {
|
|
482
|
-
getLogger().error(`WhisperSTT transcription error: ${String(err)}`);
|
|
483
|
-
}
|
|
484
|
-
}
|
|
485
|
-
};
|
|
486
257
|
|
|
487
258
|
// src/pricing.ts
|
|
488
259
|
var DEFAULT_PRICING = {
|
|
@@ -1855,248 +1626,120 @@ function isWebSocketUrl(url) {
|
|
|
1855
1626
|
return url.startsWith("ws://") || url.startsWith("wss://");
|
|
1856
1627
|
}
|
|
1857
1628
|
|
|
1858
|
-
// src/providers/
|
|
1859
|
-
|
|
1860
|
-
var
|
|
1861
|
-
|
|
1862
|
-
|
|
1863
|
-
|
|
1864
|
-
|
|
1865
|
-
|
|
1866
|
-
|
|
1867
|
-
|
|
1868
|
-
|
|
1869
|
-
|
|
1870
|
-
|
|
1871
|
-
|
|
1872
|
-
|
|
1873
|
-
|
|
1874
|
-
|
|
1875
|
-
|
|
1876
|
-
|
|
1877
|
-
harry: "SOYHLrjzK2X1ezoPC6cr",
|
|
1878
|
-
liam: "TX3LPaxmHKxFdv7VOQHJ",
|
|
1879
|
-
dorothy: "ThT5KcBeYPX3keUQqHPh",
|
|
1880
|
-
josh: "TxGEqnHWrfWFTfGW9XjX",
|
|
1881
|
-
arnold: "VR6AewLTigWG4xSOukaG",
|
|
1882
|
-
charlotte: "XB0fDUnXU5powFXDhCwa",
|
|
1883
|
-
matilda: "XrExE9yKIg1WjnnlVkGX",
|
|
1884
|
-
matthew: "Yko7PKHZNXotIFUBG7I9",
|
|
1885
|
-
james: "ZQe5CZNOzWyzPSCn5a3c",
|
|
1886
|
-
joseph: "Zlb1dXrM653N07WRdFW3",
|
|
1887
|
-
jeremy: "bVMeCyTHy58xNoL34h3p",
|
|
1888
|
-
michael: "flq6f7yk4E4fJM5XTYuZ",
|
|
1889
|
-
ethan: "g5CIjZEefAph4nQFvHAz",
|
|
1890
|
-
gigi: "jBpfuIE2acCO8z3wKNLl",
|
|
1891
|
-
freya: "jsCqWAovK2LkecY7zXl4",
|
|
1892
|
-
brian: "nPczCjzI2devNBz1zQrb",
|
|
1893
|
-
grace: "oWAxZDx7w5VEj9dCyTzz",
|
|
1894
|
-
daniel: "onwK4e9ZLuTAKqWW03F9",
|
|
1895
|
-
lily: "pFZP5JQG7iQjIQuC4Bku",
|
|
1896
|
-
serena: "pMsXgVXv3BLzUgSXRplE",
|
|
1897
|
-
adam: "pNInz6obpgDQGcFmaJgB",
|
|
1898
|
-
nicole: "piTKgcLEGmPE4e6mEKli",
|
|
1899
|
-
bill: "pqHfZKP75CvOlQylNhV4",
|
|
1900
|
-
jessie: "t0jbNlBVZ17f02VDIeMI",
|
|
1901
|
-
ryan: "wViXBPUzp2ZZixB1xQuM",
|
|
1902
|
-
sam: "yoZ06aMxZJJ28mfd3POQ",
|
|
1903
|
-
glinda: "z9fAnlkpzviPz146aGWa",
|
|
1904
|
-
giovanni: "zcAOhNBS3c14rBihAFp1",
|
|
1905
|
-
mimi: "zrHiDhphv9ZnVXBqCLjz",
|
|
1906
|
-
alloy: "21m00Tcm4TlvDq8ikWAM"
|
|
1907
|
-
};
|
|
1908
|
-
var VOICE_ID_PATTERN = /^[A-Za-z0-9]{20}$/;
|
|
1909
|
-
function resolveVoiceId(voice) {
|
|
1910
|
-
if (!voice) return voice;
|
|
1911
|
-
if (VOICE_ID_PATTERN.test(voice)) return voice;
|
|
1912
|
-
return ELEVENLABS_VOICE_ID_BY_NAME[voice.toLowerCase()] ?? voice;
|
|
1913
|
-
}
|
|
1914
|
-
var ElevenLabsTTS = class {
|
|
1915
|
-
constructor(apiKey, voiceId = "21m00Tcm4TlvDq8ikWAM", modelId = "eleven_turbo_v2_5", outputFormat = "pcm_16000") {
|
|
1629
|
+
// src/providers/deepgram-stt.ts
|
|
1630
|
+
import WebSocket3 from "ws";
|
|
1631
|
+
var DEEPGRAM_WS_URL = "wss://api.deepgram.com/v1/listen";
|
|
1632
|
+
var DeepgramSTT = class _DeepgramSTT {
|
|
1633
|
+
ws = null;
|
|
1634
|
+
callbacks = [];
|
|
1635
|
+
/** Request ID from Deepgram — used to query actual cost post-call. */
|
|
1636
|
+
requestId = "";
|
|
1637
|
+
apiKey;
|
|
1638
|
+
language;
|
|
1639
|
+
model;
|
|
1640
|
+
encoding;
|
|
1641
|
+
sampleRate;
|
|
1642
|
+
endpointingMs;
|
|
1643
|
+
utteranceEndMs;
|
|
1644
|
+
smartFormat;
|
|
1645
|
+
interimResults;
|
|
1646
|
+
vadEvents;
|
|
1647
|
+
constructor(apiKey, languageOrOptions, model, encoding, sampleRate, options) {
|
|
1916
1648
|
this.apiKey = apiKey;
|
|
1917
|
-
|
|
1918
|
-
this.
|
|
1919
|
-
this.
|
|
1649
|
+
const opts = typeof languageOrOptions === "object" && languageOrOptions !== null ? languageOrOptions : options ?? {};
|
|
1650
|
+
this.language = (typeof languageOrOptions === "string" ? languageOrOptions : opts.language) ?? "en";
|
|
1651
|
+
this.model = model ?? opts.model ?? "nova-3";
|
|
1652
|
+
this.encoding = encoding ?? opts.encoding ?? "linear16";
|
|
1653
|
+
this.sampleRate = sampleRate ?? opts.sampleRate ?? 16e3;
|
|
1654
|
+
this.endpointingMs = opts.endpointingMs ?? 150;
|
|
1655
|
+
this.utteranceEndMs = opts.utteranceEndMs === null ? null : opts.utteranceEndMs ?? 1e3;
|
|
1656
|
+
this.smartFormat = opts.smartFormat ?? true;
|
|
1657
|
+
this.interimResults = opts.interimResults ?? true;
|
|
1658
|
+
this.vadEvents = opts.vadEvents ?? true;
|
|
1920
1659
|
}
|
|
1921
|
-
|
|
1922
|
-
|
|
1923
|
-
|
|
1924
|
-
*
|
|
1925
|
-
* For large chunks (or when latency matters) call `synthesizeStream` instead.
|
|
1926
|
-
*/
|
|
1927
|
-
async synthesize(text) {
|
|
1928
|
-
const chunks = [];
|
|
1929
|
-
for await (const chunk of this.synthesizeStream(text)) {
|
|
1930
|
-
chunks.push(chunk);
|
|
1931
|
-
}
|
|
1932
|
-
return Buffer.concat(chunks);
|
|
1660
|
+
/** Factory for Twilio calls — mulaw 8 kHz. Forwards tuning options through. */
|
|
1661
|
+
static forTwilio(apiKey, language = "en", model = "nova-3", options = {}) {
|
|
1662
|
+
return new _DeepgramSTT(apiKey, language, model, "mulaw", 8e3, options);
|
|
1933
1663
|
}
|
|
1934
|
-
|
|
1935
|
-
|
|
1936
|
-
|
|
1937
|
-
|
|
1938
|
-
|
|
1939
|
-
|
|
1940
|
-
|
|
1941
|
-
|
|
1942
|
-
|
|
1943
|
-
|
|
1944
|
-
|
|
1945
|
-
|
|
1946
|
-
"Content-Type": "application/json"
|
|
1947
|
-
},
|
|
1948
|
-
body: JSON.stringify({ text, model_id: this.modelId }),
|
|
1949
|
-
signal: AbortSignal.timeout(3e4)
|
|
1664
|
+
async connect() {
|
|
1665
|
+
const params = new URLSearchParams({
|
|
1666
|
+
model: this.model,
|
|
1667
|
+
language: this.language,
|
|
1668
|
+
encoding: this.encoding,
|
|
1669
|
+
sample_rate: String(this.sampleRate),
|
|
1670
|
+
channels: "1",
|
|
1671
|
+
interim_results: this.interimResults ? "true" : "false",
|
|
1672
|
+
endpointing: String(this.endpointingMs),
|
|
1673
|
+
smart_format: this.smartFormat ? "true" : "false",
|
|
1674
|
+
vad_events: this.vadEvents ? "true" : "false",
|
|
1675
|
+
no_delay: "true"
|
|
1950
1676
|
});
|
|
1951
|
-
if (
|
|
1952
|
-
|
|
1953
|
-
throw new Error(`ElevenLabs TTS error ${response.status}: ${body}`);
|
|
1954
|
-
}
|
|
1955
|
-
if (!response.body) {
|
|
1956
|
-
throw new Error("ElevenLabs TTS: no response body");
|
|
1677
|
+
if (this.utteranceEndMs !== null) {
|
|
1678
|
+
params.set("utterance_end_ms", String(Math.max(this.utteranceEndMs, 1e3)));
|
|
1957
1679
|
}
|
|
1958
|
-
const
|
|
1959
|
-
|
|
1960
|
-
|
|
1961
|
-
|
|
1962
|
-
|
|
1963
|
-
|
|
1964
|
-
|
|
1965
|
-
|
|
1966
|
-
|
|
1967
|
-
|
|
1968
|
-
|
|
1680
|
+
const url = `${DEEPGRAM_WS_URL}?${params.toString()}`;
|
|
1681
|
+
this.ws = new WebSocket3(url, {
|
|
1682
|
+
headers: { Authorization: `Token ${this.apiKey}` }
|
|
1683
|
+
});
|
|
1684
|
+
await new Promise((resolve, reject) => {
|
|
1685
|
+
const timer = setTimeout(() => reject(new Error("Deepgram connect timeout")), 1e4);
|
|
1686
|
+
this.ws.once("open", () => {
|
|
1687
|
+
clearTimeout(timer);
|
|
1688
|
+
resolve();
|
|
1689
|
+
});
|
|
1690
|
+
this.ws.once("error", (err) => {
|
|
1691
|
+
clearTimeout(timer);
|
|
1692
|
+
reject(err);
|
|
1969
1693
|
});
|
|
1970
|
-
reader.releaseLock();
|
|
1971
|
-
}
|
|
1972
|
-
}
|
|
1973
|
-
};
|
|
1974
|
-
|
|
1975
|
-
// src/providers/openai-tts.ts
|
|
1976
|
-
var OPENAI_TTS_URL = "https://api.openai.com/v1/audio/speech";
|
|
1977
|
-
var OpenAITTS = class _OpenAITTS {
|
|
1978
|
-
constructor(apiKey, voice = "alloy", model = "tts-1") {
|
|
1979
|
-
this.apiKey = apiKey;
|
|
1980
|
-
this.voice = voice;
|
|
1981
|
-
this.model = model;
|
|
1982
|
-
}
|
|
1983
|
-
/**
|
|
1984
|
-
* Synthesise text to speech and return the full audio as a single Buffer.
|
|
1985
|
-
*
|
|
1986
|
-
* For large chunks (or when latency matters) call `synthesizeStream` instead.
|
|
1987
|
-
*/
|
|
1988
|
-
async synthesize(text) {
|
|
1989
|
-
const chunks = [];
|
|
1990
|
-
for await (const chunk of this.synthesizeStream(text)) {
|
|
1991
|
-
chunks.push(chunk);
|
|
1992
|
-
}
|
|
1993
|
-
return Buffer.concat(chunks);
|
|
1994
|
-
}
|
|
1995
|
-
/**
|
|
1996
|
-
* Synthesise text and yield audio chunks as they arrive (streaming).
|
|
1997
|
-
*
|
|
1998
|
-
* OpenAI returns 24 kHz PCM16; each chunk is resampled to 16 kHz before
|
|
1999
|
-
* yielding so the output is ready for telephony pipelines.
|
|
2000
|
-
*
|
|
2001
|
-
* The resampler carries state (buffered samples + odd trailing byte)
|
|
2002
|
-
* between chunks — without that state cross-chunk sample alignment drifts
|
|
2003
|
-
* and the caller hears pops / dropped audio (BUG #23, mirror of the
|
|
2004
|
-
* Python `audioop.ratecv` fix).
|
|
2005
|
-
*/
|
|
2006
|
-
async *synthesizeStream(text) {
|
|
2007
|
-
const response = await fetch(OPENAI_TTS_URL, {
|
|
2008
|
-
method: "POST",
|
|
2009
|
-
headers: {
|
|
2010
|
-
"Authorization": `Bearer ${this.apiKey}`,
|
|
2011
|
-
"Content-Type": "application/json"
|
|
2012
|
-
},
|
|
2013
|
-
body: JSON.stringify({
|
|
2014
|
-
model: this.model,
|
|
2015
|
-
input: text,
|
|
2016
|
-
voice: this.voice,
|
|
2017
|
-
response_format: "pcm"
|
|
2018
|
-
}),
|
|
2019
|
-
signal: AbortSignal.timeout(3e4)
|
|
2020
1694
|
});
|
|
2021
|
-
|
|
2022
|
-
|
|
2023
|
-
|
|
2024
|
-
|
|
2025
|
-
|
|
2026
|
-
|
|
2027
|
-
}
|
|
2028
|
-
const ctx = { carryByte: null, leftover: [] };
|
|
2029
|
-
const reader = response.body.getReader();
|
|
2030
|
-
try {
|
|
2031
|
-
while (true) {
|
|
2032
|
-
const { done, value } = await reader.read();
|
|
2033
|
-
if (done) break;
|
|
2034
|
-
if (value && value.length > 0) {
|
|
2035
|
-
const out = _OpenAITTS.resampleStreaming(Buffer.from(value), ctx);
|
|
2036
|
-
if (out.length > 0) yield out;
|
|
2037
|
-
}
|
|
1695
|
+
this.ws.on("message", (raw) => {
|
|
1696
|
+
let data;
|
|
1697
|
+
try {
|
|
1698
|
+
data = JSON.parse(raw.toString());
|
|
1699
|
+
} catch {
|
|
1700
|
+
return;
|
|
2038
1701
|
}
|
|
2039
|
-
if (
|
|
2040
|
-
|
|
2041
|
-
|
|
2042
|
-
tail.writeInt16LE(ctx.leftover[i], i * 2);
|
|
2043
|
-
}
|
|
2044
|
-
yield tail;
|
|
1702
|
+
if (data.type === "Metadata" && data.request_id) {
|
|
1703
|
+
this.requestId = data.request_id;
|
|
1704
|
+
return;
|
|
2045
1705
|
}
|
|
2046
|
-
|
|
2047
|
-
|
|
2048
|
-
|
|
2049
|
-
|
|
2050
|
-
|
|
1706
|
+
if (data.type !== "Results") return;
|
|
1707
|
+
const alternatives = data.channel?.alternatives ?? [];
|
|
1708
|
+
if (!alternatives.length) return;
|
|
1709
|
+
const best = alternatives[0];
|
|
1710
|
+
const text = (best.transcript ?? "").trim();
|
|
1711
|
+
if (!text) return;
|
|
1712
|
+
const transcript = {
|
|
1713
|
+
text,
|
|
1714
|
+
isFinal: Boolean(data.is_final) || Boolean(data.speech_final),
|
|
1715
|
+
confidence: best.confidence ?? 0
|
|
1716
|
+
};
|
|
1717
|
+
for (const cb of this.callbacks) {
|
|
1718
|
+
cb(transcript);
|
|
1719
|
+
}
|
|
1720
|
+
});
|
|
2051
1721
|
}
|
|
2052
|
-
|
|
2053
|
-
|
|
2054
|
-
|
|
2055
|
-
|
|
2056
|
-
|
|
2057
|
-
|
|
2058
|
-
|
|
2059
|
-
|
|
2060
|
-
|
|
2061
|
-
} else {
|
|
2062
|
-
buf = audio;
|
|
2063
|
-
}
|
|
2064
|
-
if (buf.length % 2 === 1) {
|
|
2065
|
-
ctx.carryByte = buf[buf.length - 1];
|
|
2066
|
-
buf = buf.subarray(0, buf.length - 1);
|
|
2067
|
-
}
|
|
2068
|
-
if (buf.length === 0 && ctx.leftover.length === 0) {
|
|
2069
|
-
return Buffer.alloc(0);
|
|
2070
|
-
}
|
|
2071
|
-
const sampleCount = buf.length / 2;
|
|
2072
|
-
const samples = ctx.leftover.slice();
|
|
2073
|
-
for (let i2 = 0; i2 < sampleCount; i2++) {
|
|
2074
|
-
samples.push(buf.readInt16LE(i2 * 2));
|
|
2075
|
-
}
|
|
2076
|
-
const out = [];
|
|
2077
|
-
let i = 0;
|
|
2078
|
-
while (i + 2 < samples.length) {
|
|
2079
|
-
out.push(samples[i]);
|
|
2080
|
-
out.push(Math.trunc((samples[i + 1] + samples[i + 2]) / 2));
|
|
2081
|
-
i += 3;
|
|
2082
|
-
}
|
|
2083
|
-
ctx.leftover = samples.slice(i);
|
|
2084
|
-
const buffer = Buffer.alloc(out.length * 2);
|
|
2085
|
-
for (let j = 0; j < out.length; j++) {
|
|
2086
|
-
buffer.writeInt16LE(out[j], j * 2);
|
|
1722
|
+
sendAudio(audio) {
|
|
1723
|
+
if (!this.ws || this.ws.readyState !== WebSocket3.OPEN) return;
|
|
1724
|
+
this.ws.send(audio);
|
|
1725
|
+
}
|
|
1726
|
+
onTranscript(callback) {
|
|
1727
|
+
if (this.callbacks.length >= 10) {
|
|
1728
|
+
getLogger().warn("DeepgramSTT: maximum of 10 onTranscript callbacks reached; replacing the last callback.");
|
|
1729
|
+
this.callbacks[this.callbacks.length - 1] = callback;
|
|
1730
|
+
return;
|
|
2087
1731
|
}
|
|
2088
|
-
|
|
1732
|
+
this.callbacks.push(callback);
|
|
2089
1733
|
}
|
|
2090
|
-
|
|
2091
|
-
|
|
2092
|
-
|
|
2093
|
-
|
|
2094
|
-
|
|
2095
|
-
|
|
2096
|
-
|
|
2097
|
-
|
|
1734
|
+
close() {
|
|
1735
|
+
if (this.ws) {
|
|
1736
|
+
try {
|
|
1737
|
+
this.ws.send(JSON.stringify({ type: "CloseStream" }));
|
|
1738
|
+
} catch {
|
|
1739
|
+
}
|
|
1740
|
+
this.ws.close();
|
|
1741
|
+
this.ws = null;
|
|
2098
1742
|
}
|
|
2099
|
-
return Buffer.concat([out, tail]);
|
|
2100
1743
|
}
|
|
2101
1744
|
};
|
|
2102
1745
|
|
|
@@ -2697,8 +2340,8 @@ var StreamHandler = class {
|
|
|
2697
2340
|
this.caller = caller;
|
|
2698
2341
|
this.callee = callee;
|
|
2699
2342
|
this.history = createHistoryManager(200);
|
|
2700
|
-
const sttProviderName = deps.agent.stt
|
|
2701
|
-
const ttsProviderName = deps.agent.tts
|
|
2343
|
+
const sttProviderName = deps.agent.stt ? deps.agent.stt.constructor?.name ?? "custom" : void 0;
|
|
2344
|
+
const ttsProviderName = deps.agent.tts ? deps.agent.tts.constructor?.name ?? "custom" : void 0;
|
|
2702
2345
|
const providerMode = deps.agent.provider ?? "openai_realtime";
|
|
2703
2346
|
this.metricsAcc = new CallMetricsAccumulator({
|
|
2704
2347
|
callId: "",
|
|
@@ -2708,7 +2351,7 @@ var StreamHandler = class {
|
|
|
2708
2351
|
ttsProvider: ttsProviderName,
|
|
2709
2352
|
pricing: deps.pricing
|
|
2710
2353
|
});
|
|
2711
|
-
getLogger().
|
|
2354
|
+
getLogger().debug(`WebSocket connection opened (${deps.bridge.label})`);
|
|
2712
2355
|
}
|
|
2713
2356
|
// ---------------------------------------------------------------------------
|
|
2714
2357
|
// Public: called by the provider-specific parsers in server.ts
|
|
@@ -2724,9 +2367,12 @@ var StreamHandler = class {
|
|
|
2724
2367
|
this.metricsAcc.callId = callId;
|
|
2725
2368
|
if (customParams.caller && !this.caller) this.caller = customParams.caller;
|
|
2726
2369
|
if (customParams.callee && !this.callee) this.callee = customParams.callee;
|
|
2727
|
-
|
|
2370
|
+
const mode = this.deps.agent.engine ? `engine=${this.deps.agent.engine.kind ?? "unknown"}` : "pipeline";
|
|
2371
|
+
getLogger().info(
|
|
2372
|
+
`Call started: ${callId} (${this.deps.bridge.label}, ${mode}, ${sanitizeLogValue(this.caller || "?")} \u2192 ${sanitizeLogValue(this.callee || "?")})`
|
|
2373
|
+
);
|
|
2728
2374
|
if (Object.keys(customParams).length > 0) {
|
|
2729
|
-
getLogger().
|
|
2375
|
+
getLogger().debug(`Custom params: ${sanitizeLogValue(JSON.stringify(customParams))}`);
|
|
2730
2376
|
}
|
|
2731
2377
|
this.deps.metricsStore.recordCallStart({
|
|
2732
2378
|
call_id: callId,
|
|
@@ -2774,7 +2420,7 @@ var StreamHandler = class {
|
|
|
2774
2420
|
}
|
|
2775
2421
|
});
|
|
2776
2422
|
if (recResp.ok) {
|
|
2777
|
-
getLogger().
|
|
2423
|
+
getLogger().debug(`Recording started for ${callId}`);
|
|
2778
2424
|
} else {
|
|
2779
2425
|
getLogger().warn(`could not start recording: ${await recResp.text()}`);
|
|
2780
2426
|
}
|
|
@@ -2829,7 +2475,7 @@ var StreamHandler = class {
|
|
|
2829
2475
|
}
|
|
2830
2476
|
/** Handle a DTMF keypress event (Twilio only). */
|
|
2831
2477
|
async handleDtmf(digit) {
|
|
2832
|
-
getLogger().
|
|
2478
|
+
getLogger().debug(`DTMF: ${digit}`);
|
|
2833
2479
|
if (this.adapter instanceof OpenAIRealtimeAdapter) {
|
|
2834
2480
|
await this.adapter.sendText(`The user pressed key ${digit} on their phone keypad.`);
|
|
2835
2481
|
}
|
|
@@ -2888,26 +2534,17 @@ var StreamHandler = class {
|
|
|
2888
2534
|
// ---------------------------------------------------------------------------
|
|
2889
2535
|
async initPipeline(resolvedPrompt) {
|
|
2890
2536
|
const label = this.deps.bridge.label;
|
|
2891
|
-
this.stt = this.deps.bridge.createStt(this.deps.agent);
|
|
2892
|
-
|
|
2893
|
-
if (this.deps.agent.tts.provider === "elevenlabs") {
|
|
2894
|
-
this.tts = new ElevenLabsTTS(this.deps.agent.tts.apiKey, this.deps.agent.tts.voice ?? "21m00Tcm4TlvDq8ikWAM");
|
|
2895
|
-
}
|
|
2896
|
-
if (this.deps.agent.tts.provider === "openai") {
|
|
2897
|
-
this.tts = new OpenAITTS(this.deps.agent.tts.apiKey, this.deps.agent.tts.voice ?? "alloy");
|
|
2898
|
-
}
|
|
2899
|
-
} else if (this.deps.agent.elevenlabsKey) {
|
|
2900
|
-
this.tts = new ElevenLabsTTS(this.deps.agent.elevenlabsKey, this.deps.agent.voice || "rachel");
|
|
2901
|
-
}
|
|
2537
|
+
this.stt = await this.deps.bridge.createStt(this.deps.agent);
|
|
2538
|
+
this.tts = await createTTS(this.deps.agent);
|
|
2902
2539
|
if (!this.stt) {
|
|
2903
|
-
getLogger().
|
|
2540
|
+
getLogger().debug(`Pipeline mode (${label}): no STT configured`);
|
|
2904
2541
|
}
|
|
2905
2542
|
if (!this.tts) {
|
|
2906
|
-
getLogger().
|
|
2543
|
+
getLogger().debug(`Pipeline mode (${label}): no TTS configured`);
|
|
2907
2544
|
}
|
|
2908
2545
|
try {
|
|
2909
2546
|
if (this.stt) await this.stt.connect();
|
|
2910
|
-
getLogger().
|
|
2547
|
+
getLogger().debug(`Pipeline mode (${label}): STT + TTS connected`);
|
|
2911
2548
|
} catch (e) {
|
|
2912
2549
|
getLogger().error(`Pipeline connect FAILED (${label}):`, e);
|
|
2913
2550
|
try {
|
|
@@ -2940,7 +2577,24 @@ var StreamHandler = class {
|
|
|
2940
2577
|
this.history.push({ role: "assistant", text: this.deps.agent.firstMessage, timestamp: Date.now() });
|
|
2941
2578
|
}
|
|
2942
2579
|
}
|
|
2943
|
-
if (
|
|
2580
|
+
if (this.deps.agent.llm) {
|
|
2581
|
+
if (this.deps.onMessage) {
|
|
2582
|
+
throw new Error(
|
|
2583
|
+
"Cannot pass both agent({ llm }) and serve({ onMessage }). Pick one \u2014 `llm` for built-in LLMs, `onMessage` for custom logic."
|
|
2584
|
+
);
|
|
2585
|
+
}
|
|
2586
|
+
this.llmLoop = new LLMLoop(
|
|
2587
|
+
"",
|
|
2588
|
+
// apiKey unused when llmProvider is supplied
|
|
2589
|
+
"",
|
|
2590
|
+
// model unused when llmProvider is supplied
|
|
2591
|
+
resolvedPrompt,
|
|
2592
|
+
this.deps.agent.tools,
|
|
2593
|
+
this.deps.agent.llm
|
|
2594
|
+
);
|
|
2595
|
+
const llmLabel = this.deps.agent.llm.constructor?.name ?? "custom";
|
|
2596
|
+
getLogger().debug(`Built-in LLM loop active (pipeline, ${label}, llm=${llmLabel})`);
|
|
2597
|
+
} else if (!this.deps.onMessage && this.deps.config.openaiKey) {
|
|
2944
2598
|
let llmModel = this.deps.agent.model || "gpt-4o-mini";
|
|
2945
2599
|
if (llmModel.includes("realtime")) llmModel = "gpt-4o-mini";
|
|
2946
2600
|
this.llmLoop = new LLMLoop(
|
|
@@ -2949,7 +2603,7 @@ var StreamHandler = class {
|
|
|
2949
2603
|
resolvedPrompt,
|
|
2950
2604
|
this.deps.agent.tools
|
|
2951
2605
|
);
|
|
2952
|
-
getLogger().
|
|
2606
|
+
getLogger().debug(`Built-in LLM loop active (pipeline, ${label})`);
|
|
2953
2607
|
}
|
|
2954
2608
|
if (this.stt) {
|
|
2955
2609
|
this.stt.onTranscript(async (transcript) => {
|
|
@@ -3010,7 +2664,7 @@ var StreamHandler = class {
|
|
|
3010
2664
|
}
|
|
3011
2665
|
async processTranscript(transcript) {
|
|
3012
2666
|
if (transcript.text && this.isSpeaking) {
|
|
3013
|
-
getLogger().
|
|
2667
|
+
getLogger().debug(
|
|
3014
2668
|
`Barge-in: caller spoke over agent (${sanitizeLogValue(transcript.text.slice(0, 40))})`
|
|
3015
2669
|
);
|
|
3016
2670
|
this.isSpeaking = false;
|
|
@@ -3045,17 +2699,17 @@ var StreamHandler = class {
|
|
|
3045
2699
|
"cool"
|
|
3046
2700
|
]);
|
|
3047
2701
|
if (HALLUCINATIONS.has(stripped) || stripped === "") {
|
|
3048
|
-
getLogger().
|
|
2702
|
+
getLogger().debug(`Dropped likely STT hallucination: ${sanitizeLogValue(normalised.slice(0, 40))}`);
|
|
3049
2703
|
return;
|
|
3050
2704
|
}
|
|
3051
2705
|
if (sinceLastMs < 2e3 && normalised === this.lastCommitText) {
|
|
3052
|
-
getLogger().
|
|
2706
|
+
getLogger().debug(
|
|
3053
2707
|
`Dropped duplicate final transcript (${(sinceLastMs / 1e3).toFixed(1)}s since last): ${sanitizeLogValue(normalised.slice(0, 40))}`
|
|
3054
2708
|
);
|
|
3055
2709
|
return;
|
|
3056
2710
|
}
|
|
3057
2711
|
if (sinceLastMs < 500) {
|
|
3058
|
-
getLogger().
|
|
2712
|
+
getLogger().debug(
|
|
3059
2713
|
`Dropped back-to-back final transcript (${(sinceLastMs / 1e3).toFixed(2)}s since last): ${sanitizeLogValue(normalised.slice(0, 40))}`
|
|
3060
2714
|
);
|
|
3061
2715
|
return;
|
|
@@ -3063,7 +2717,7 @@ var StreamHandler = class {
|
|
|
3063
2717
|
this.lastCommitText = normalised;
|
|
3064
2718
|
this.lastCommitAt = now;
|
|
3065
2719
|
const label = this.deps.bridge.label;
|
|
3066
|
-
getLogger().
|
|
2720
|
+
getLogger().debug(`User (${label} pipeline): ${sanitizeLogValue(transcript.text)}`);
|
|
3067
2721
|
this.metricsAcc.startTurn();
|
|
3068
2722
|
this.metricsAcc.recordSttComplete(transcript.text);
|
|
3069
2723
|
if (this.deps.onTranscript) {
|
|
@@ -3078,7 +2732,7 @@ var StreamHandler = class {
|
|
|
3078
2732
|
const hookCtx = this.buildHookContext();
|
|
3079
2733
|
const filteredTranscript = await hookExecutor.runAfterTranscribe(transcript.text, hookCtx);
|
|
3080
2734
|
if (filteredTranscript === null) {
|
|
3081
|
-
getLogger().
|
|
2735
|
+
getLogger().debug(`afterTranscribe hook vetoed turn (${label})`);
|
|
3082
2736
|
this.metricsAcc.recordTurnInterrupted();
|
|
3083
2737
|
return;
|
|
3084
2738
|
}
|
|
@@ -3166,7 +2820,7 @@ var StreamHandler = class {
|
|
|
3166
2820
|
if (!this.llmLoop) {
|
|
3167
2821
|
const guard = checkGuardrails(responseText, this.deps.agent.guardrails);
|
|
3168
2822
|
if (guard) {
|
|
3169
|
-
getLogger().
|
|
2823
|
+
getLogger().debug(`Guardrail '${guard.name}' triggered (pipeline)`);
|
|
3170
2824
|
responseText = guard.replacement ?? "I'm sorry, I can't respond to that.";
|
|
3171
2825
|
}
|
|
3172
2826
|
this.metricsAcc.recordLlmComplete();
|
|
@@ -3244,7 +2898,7 @@ var StreamHandler = class {
|
|
|
3244
2898
|
this.adapter = this.deps.buildAIAdapter(resolvedPrompt);
|
|
3245
2899
|
try {
|
|
3246
2900
|
await this.adapter.connect();
|
|
3247
|
-
getLogger().
|
|
2901
|
+
getLogger().debug(`AI adapter connected (${label})`);
|
|
3248
2902
|
} catch (e) {
|
|
3249
2903
|
getLogger().error(`AI adapter connect FAILED (${label}):`, e);
|
|
3250
2904
|
try {
|
|
@@ -3286,7 +2940,7 @@ var StreamHandler = class {
|
|
|
3286
2940
|
this.deps.bridge.sendMark(this.ws, `audio_${this.chunkCount}`, this.streamSid);
|
|
3287
2941
|
} else if (type === "transcript_input") {
|
|
3288
2942
|
const inputText = eventData;
|
|
3289
|
-
getLogger().
|
|
2943
|
+
getLogger().debug(`User (${this.deps.bridge.label}): ${sanitizeLogValue(inputText)}`);
|
|
3290
2944
|
this.history.push({ role: "user", text: inputText, timestamp: Date.now() });
|
|
3291
2945
|
this.metricsAcc.startTurn();
|
|
3292
2946
|
this.currentAgentText = "";
|
|
@@ -3304,7 +2958,7 @@ var StreamHandler = class {
|
|
|
3304
2958
|
if (outputText) {
|
|
3305
2959
|
const triggered = checkGuardrails(outputText, this.deps.agent.guardrails);
|
|
3306
2960
|
if (triggered) {
|
|
3307
|
-
getLogger().
|
|
2961
|
+
getLogger().debug(`Guardrail '${triggered.name}' triggered`);
|
|
3308
2962
|
if (this.adapter instanceof OpenAIRealtimeAdapter) {
|
|
3309
2963
|
this.adapter.cancelResponse();
|
|
3310
2964
|
await this.adapter.sendText(triggered.replacement ?? "I'm sorry, I can't respond to that.");
|
|
@@ -3363,7 +3017,7 @@ var StreamHandler = class {
|
|
|
3363
3017
|
await adapter.sendFunctionResult(fc.call_id, JSON.stringify({ error: "Invalid phone number format", status: "rejected" }));
|
|
3364
3018
|
return;
|
|
3365
3019
|
}
|
|
3366
|
-
getLogger().
|
|
3020
|
+
getLogger().debug(`Transferring call to ${transferTo}`);
|
|
3367
3021
|
await adapter.sendFunctionResult(fc.call_id, JSON.stringify({ status: "transferring", to: transferTo }));
|
|
3368
3022
|
await this.deps.bridge.transferCall(this.callId, transferTo);
|
|
3369
3023
|
if (this.deps.onTranscript) {
|
|
@@ -3379,7 +3033,7 @@ var StreamHandler = class {
|
|
|
3379
3033
|
endArgs = {};
|
|
3380
3034
|
}
|
|
3381
3035
|
const reason = endArgs.reason ?? "conversation_complete";
|
|
3382
|
-
getLogger().
|
|
3036
|
+
getLogger().debug(`Ending call (${this.deps.bridge.label}): ${reason}`);
|
|
3383
3037
|
await adapter.sendFunctionResult(fc.call_id, JSON.stringify({ status: "ending", reason }));
|
|
3384
3038
|
await this.deps.bridge.endCall(this.callId, this.ws);
|
|
3385
3039
|
if (this.deps.onTranscript) {
|
|
@@ -3416,10 +3070,11 @@ var StreamHandler = class {
|
|
|
3416
3070
|
this.maxDurationTimer = null;
|
|
3417
3071
|
}
|
|
3418
3072
|
await this.deps.bridge.queryTelephonyCost(this.metricsAcc, this.callId);
|
|
3419
|
-
|
|
3420
|
-
|
|
3421
|
-
|
|
3422
|
-
|
|
3073
|
+
if (this.stt instanceof DeepgramSTT && this.stt.requestId) {
|
|
3074
|
+
const dgKey = this.stt.apiKey;
|
|
3075
|
+
if (dgKey) {
|
|
3076
|
+
await queryDeepgramCost(this.metricsAcc, dgKey, this.stt.requestId);
|
|
3077
|
+
}
|
|
3423
3078
|
}
|
|
3424
3079
|
const finalMetrics = this.metricsAcc.endCall();
|
|
3425
3080
|
const callEndData = {
|
|
@@ -3430,6 +3085,11 @@ var StreamHandler = class {
|
|
|
3430
3085
|
transcript: [...this.history.entries],
|
|
3431
3086
|
metrics: finalMetrics
|
|
3432
3087
|
};
|
|
3088
|
+
const cost = finalMetrics.cost?.total ?? 0;
|
|
3089
|
+
const latencyP95 = finalMetrics.latency_p95?.total_ms ?? 0;
|
|
3090
|
+
getLogger().info(
|
|
3091
|
+
`Call ended: ${this.callId} (${finalMetrics.duration_seconds.toFixed(1)}s, ${finalMetrics.turns.length} turns, cost=$${cost.toFixed(4)}, p95=${Math.round(latencyP95)}ms)`
|
|
3092
|
+
);
|
|
3433
3093
|
this.deps.metricsStore.recordCallEnd(
|
|
3434
3094
|
callEndData,
|
|
3435
3095
|
finalMetrics
|
|
@@ -3466,7 +3126,7 @@ async function queryDeepgramCost(metricsAcc, deepgramKey, deepgramRequestId) {
|
|
|
3466
3126
|
const usd = reqData.response?.details?.usd;
|
|
3467
3127
|
if (usd != null) {
|
|
3468
3128
|
metricsAcc.setActualSttCost(usd);
|
|
3469
|
-
getLogger().
|
|
3129
|
+
getLogger().debug(`Deepgram actual cost: $${usd}`);
|
|
3470
3130
|
}
|
|
3471
3131
|
}
|
|
3472
3132
|
}
|
|
@@ -3576,11 +3236,16 @@ function resolveVariables(template, variables) {
|
|
|
3576
3236
|
return result;
|
|
3577
3237
|
}
|
|
3578
3238
|
function buildAIAdapter(config, agent, resolvedPrompt) {
|
|
3239
|
+
const engine = agent.engine;
|
|
3579
3240
|
if (agent.provider === "elevenlabs_convai") {
|
|
3580
|
-
|
|
3241
|
+
if (!engine || engine.kind !== "elevenlabs_convai") {
|
|
3242
|
+
throw new Error(
|
|
3243
|
+
"ElevenLabs ConvAI mode requires `agent.engine = new ElevenLabsConvAI({...})`."
|
|
3244
|
+
);
|
|
3245
|
+
}
|
|
3581
3246
|
return new ElevenLabsConvAIAdapter(
|
|
3582
|
-
|
|
3583
|
-
|
|
3247
|
+
engine.apiKey,
|
|
3248
|
+
engine.agentId,
|
|
3584
3249
|
agent.voice ?? "21m00Tcm4TlvDq8ikWAM",
|
|
3585
3250
|
"eleven_turbo_v2_5",
|
|
3586
3251
|
agent.language ?? "en",
|
|
@@ -3593,33 +3258,15 @@ function buildAIAdapter(config, agent, resolvedPrompt) {
|
|
|
3593
3258
|
parameters: t.parameters
|
|
3594
3259
|
})) ?? [];
|
|
3595
3260
|
const tools = [...agentTools, TRANSFER_CALL_TOOL, END_CALL_TOOL];
|
|
3261
|
+
const openaiKey = engine && engine.kind === "openai_realtime" ? engine.apiKey : config.openaiKey ?? "";
|
|
3596
3262
|
return new OpenAIRealtimeAdapter(
|
|
3597
|
-
|
|
3263
|
+
openaiKey,
|
|
3598
3264
|
agent.model,
|
|
3599
3265
|
agent.voice,
|
|
3600
3266
|
resolvedPrompt ?? agent.systemPrompt,
|
|
3601
3267
|
tools
|
|
3602
3268
|
);
|
|
3603
3269
|
}
|
|
3604
|
-
function extractDeepgramOptions(options) {
|
|
3605
|
-
if (!options) return {};
|
|
3606
|
-
const get = (snake, camel) => options[snake] ?? options[camel];
|
|
3607
|
-
const out = {};
|
|
3608
|
-
const model = get("model", "model");
|
|
3609
|
-
if (typeof model === "string") out.model = model;
|
|
3610
|
-
const endpointing = get("endpointing_ms", "endpointingMs");
|
|
3611
|
-
if (typeof endpointing === "number") out.endpointingMs = endpointing;
|
|
3612
|
-
const utteranceEnd = get("utterance_end_ms", "utteranceEndMs");
|
|
3613
|
-
if (utteranceEnd === null) out.utteranceEndMs = null;
|
|
3614
|
-
else if (typeof utteranceEnd === "number") out.utteranceEndMs = utteranceEnd;
|
|
3615
|
-
const smart = get("smart_format", "smartFormat");
|
|
3616
|
-
if (typeof smart === "boolean") out.smartFormat = smart;
|
|
3617
|
-
const interim = get("interim_results", "interimResults");
|
|
3618
|
-
if (typeof interim === "boolean") out.interimResults = interim;
|
|
3619
|
-
const vad = get("vad_events", "vadEvents");
|
|
3620
|
-
if (typeof vad === "boolean") out.vadEvents = vad;
|
|
3621
|
-
return out;
|
|
3622
|
-
}
|
|
3623
3270
|
var TwilioBridge = class {
|
|
3624
3271
|
constructor(config) {
|
|
3625
3272
|
this.config = config;
|
|
@@ -3671,24 +3318,7 @@ var TwilioBridge = class {
|
|
|
3671
3318
|
}
|
|
3672
3319
|
}
|
|
3673
3320
|
createStt(agent) {
|
|
3674
|
-
|
|
3675
|
-
if (agent.stt) {
|
|
3676
|
-
if (agent.stt.provider === "deepgram") {
|
|
3677
|
-
const dgOptions = extractDeepgramOptions(agent.stt.options);
|
|
3678
|
-
if (isPipeline) {
|
|
3679
|
-
return new DeepgramSTT(agent.stt.apiKey, agent.stt.language ?? "en", dgOptions.model, "linear16", 16e3, dgOptions);
|
|
3680
|
-
}
|
|
3681
|
-
return DeepgramSTT.forTwilio(agent.stt.apiKey, agent.stt.language ?? "en", dgOptions.model, dgOptions);
|
|
3682
|
-
} else if (agent.stt.provider === "whisper") {
|
|
3683
|
-
return isPipeline ? new WhisperSTT(agent.stt.apiKey, "whisper-1", agent.stt.language ?? "en") : WhisperSTT.forTwilio(agent.stt.apiKey, agent.stt.language ?? "en");
|
|
3684
|
-
}
|
|
3685
|
-
} else if (agent.deepgramKey) {
|
|
3686
|
-
if (isPipeline) {
|
|
3687
|
-
return new DeepgramSTT(agent.deepgramKey, agent.language ?? "en", "nova-3", "linear16", 16e3);
|
|
3688
|
-
}
|
|
3689
|
-
return DeepgramSTT.forTwilio(agent.deepgramKey, agent.language ?? "en");
|
|
3690
|
-
}
|
|
3691
|
-
return null;
|
|
3321
|
+
return createSTT(agent);
|
|
3692
3322
|
}
|
|
3693
3323
|
async queryTelephonyCost(metricsAcc, callId) {
|
|
3694
3324
|
if (this.config.twilioSid && this.config.twilioToken && callId) {
|
|
@@ -3835,24 +3465,7 @@ var TelnyxBridge = class {
|
|
|
3835
3465
|
ws.close();
|
|
3836
3466
|
}
|
|
3837
3467
|
createStt(agent) {
|
|
3838
|
-
|
|
3839
|
-
if (agent.stt.provider === "deepgram") {
|
|
3840
|
-
const dgOptions = extractDeepgramOptions(agent.stt.options);
|
|
3841
|
-
return new DeepgramSTT(
|
|
3842
|
-
agent.stt.apiKey,
|
|
3843
|
-
agent.stt.language ?? "en",
|
|
3844
|
-
dgOptions.model ?? "nova-3",
|
|
3845
|
-
"linear16",
|
|
3846
|
-
16e3,
|
|
3847
|
-
dgOptions
|
|
3848
|
-
);
|
|
3849
|
-
} else if (agent.stt.provider === "whisper") {
|
|
3850
|
-
return new WhisperSTT(agent.stt.apiKey, "whisper-1", agent.stt.language ?? "en");
|
|
3851
|
-
}
|
|
3852
|
-
} else if (agent.deepgramKey) {
|
|
3853
|
-
return new DeepgramSTT(agent.deepgramKey, agent.language ?? "en", "nova-3", "linear16", 16e3);
|
|
3854
|
-
}
|
|
3855
|
-
return null;
|
|
3468
|
+
return createSTT(agent);
|
|
3856
3469
|
}
|
|
3857
3470
|
async queryTelephonyCost(metricsAcc, callId) {
|
|
3858
3471
|
if (this.config.telnyxKey && callId) {
|
|
@@ -4790,8 +4403,6 @@ var TestSession = class {
|
|
|
4790
4403
|
export {
|
|
4791
4404
|
OpenAIRealtimeAdapter,
|
|
4792
4405
|
ElevenLabsConvAIAdapter,
|
|
4793
|
-
DeepgramSTT,
|
|
4794
|
-
WhisperSTT,
|
|
4795
4406
|
DEFAULT_PRICING,
|
|
4796
4407
|
mergePricing,
|
|
4797
4408
|
calculateSttCost,
|
|
@@ -4807,8 +4418,7 @@ export {
|
|
|
4807
4418
|
RemoteMessageHandler,
|
|
4808
4419
|
isRemoteUrl,
|
|
4809
4420
|
isWebSocketUrl,
|
|
4810
|
-
|
|
4811
|
-
OpenAITTS,
|
|
4421
|
+
DeepgramSTT,
|
|
4812
4422
|
CallMetricsAccumulator,
|
|
4813
4423
|
mulawToPcm16,
|
|
4814
4424
|
pcm16ToMulaw,
|