getpatter 0.4.3 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +136 -162
- package/dist/carrier-config-CPG5CROM.mjs +84 -0
- package/dist/{chunk-35EVXMGB.mjs → chunk-757NVN4L.mjs} +396 -458
- package/dist/cli.js +92 -5
- package/dist/index.d.mts +901 -241
- package/dist/index.d.ts +901 -241
- package/dist/index.js +1763 -921
- package/dist/index.mjs +1240 -419
- package/dist/{test-mode-RH65MMSP.mjs → test-mode-YFOL2HYH.mjs} +1 -1
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -305,235 +305,16 @@ var init_elevenlabs_convai = __esm({
|
|
|
305
305
|
}
|
|
306
306
|
});
|
|
307
307
|
|
|
308
|
-
// src/
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
init_logger();
|
|
315
|
-
DEEPGRAM_WS_URL = "wss://api.deepgram.com/v1/listen";
|
|
316
|
-
DeepgramSTT = class _DeepgramSTT {
|
|
317
|
-
constructor(apiKey, language = "en", model = "nova-3", encoding = "linear16", sampleRate = 16e3) {
|
|
318
|
-
this.apiKey = apiKey;
|
|
319
|
-
this.language = language;
|
|
320
|
-
this.model = model;
|
|
321
|
-
this.encoding = encoding;
|
|
322
|
-
this.sampleRate = sampleRate;
|
|
323
|
-
}
|
|
324
|
-
ws = null;
|
|
325
|
-
callbacks = [];
|
|
326
|
-
/** Request ID from Deepgram — used to query actual cost post-call. */
|
|
327
|
-
requestId = "";
|
|
328
|
-
/** Factory for Twilio calls — mulaw 8 kHz. */
|
|
329
|
-
static forTwilio(apiKey, language = "en", model = "nova-3") {
|
|
330
|
-
return new _DeepgramSTT(apiKey, language, model, "mulaw", 8e3);
|
|
331
|
-
}
|
|
332
|
-
async connect() {
|
|
333
|
-
const params = new URLSearchParams({
|
|
334
|
-
model: this.model,
|
|
335
|
-
language: this.language,
|
|
336
|
-
encoding: this.encoding,
|
|
337
|
-
sample_rate: String(this.sampleRate),
|
|
338
|
-
channels: "1",
|
|
339
|
-
interim_results: "true",
|
|
340
|
-
endpointing: "300",
|
|
341
|
-
smart_format: "true",
|
|
342
|
-
vad_events: "true",
|
|
343
|
-
no_delay: "true"
|
|
344
|
-
});
|
|
345
|
-
const url = `${DEEPGRAM_WS_URL}?${params.toString()}`;
|
|
346
|
-
this.ws = new import_ws4.default(url, {
|
|
347
|
-
headers: { Authorization: `Token ${this.apiKey}` }
|
|
348
|
-
});
|
|
349
|
-
await new Promise((resolve, reject) => {
|
|
350
|
-
const timer = setTimeout(() => reject(new Error("Deepgram connect timeout")), 1e4);
|
|
351
|
-
this.ws.once("open", () => {
|
|
352
|
-
clearTimeout(timer);
|
|
353
|
-
resolve();
|
|
354
|
-
});
|
|
355
|
-
this.ws.once("error", (err) => {
|
|
356
|
-
clearTimeout(timer);
|
|
357
|
-
reject(err);
|
|
358
|
-
});
|
|
359
|
-
});
|
|
360
|
-
this.ws.on("message", (raw) => {
|
|
361
|
-
let data;
|
|
362
|
-
try {
|
|
363
|
-
data = JSON.parse(raw.toString());
|
|
364
|
-
} catch {
|
|
365
|
-
return;
|
|
366
|
-
}
|
|
367
|
-
if (data.type === "Metadata" && data.request_id) {
|
|
368
|
-
this.requestId = data.request_id;
|
|
369
|
-
return;
|
|
370
|
-
}
|
|
371
|
-
if (data.type !== "Results") return;
|
|
372
|
-
const alternatives = data.channel?.alternatives ?? [];
|
|
373
|
-
if (!alternatives.length) return;
|
|
374
|
-
const best = alternatives[0];
|
|
375
|
-
const text = (best.transcript ?? "").trim();
|
|
376
|
-
if (!text) return;
|
|
377
|
-
const transcript = {
|
|
378
|
-
text,
|
|
379
|
-
isFinal: Boolean(data.is_final) && Boolean(data.speech_final),
|
|
380
|
-
confidence: best.confidence ?? 0
|
|
381
|
-
};
|
|
382
|
-
for (const cb of this.callbacks) {
|
|
383
|
-
cb(transcript);
|
|
384
|
-
}
|
|
385
|
-
});
|
|
386
|
-
}
|
|
387
|
-
sendAudio(audio) {
|
|
388
|
-
if (!this.ws || this.ws.readyState !== import_ws4.default.OPEN) return;
|
|
389
|
-
this.ws.send(audio);
|
|
390
|
-
}
|
|
391
|
-
onTranscript(callback) {
|
|
392
|
-
if (this.callbacks.length >= 10) {
|
|
393
|
-
getLogger().warn("DeepgramSTT: maximum of 10 onTranscript callbacks reached; replacing the last callback.");
|
|
394
|
-
this.callbacks[this.callbacks.length - 1] = callback;
|
|
395
|
-
return;
|
|
396
|
-
}
|
|
397
|
-
this.callbacks.push(callback);
|
|
398
|
-
}
|
|
399
|
-
close() {
|
|
400
|
-
if (this.ws) {
|
|
401
|
-
try {
|
|
402
|
-
this.ws.send(JSON.stringify({ type: "CloseStream" }));
|
|
403
|
-
} catch {
|
|
404
|
-
}
|
|
405
|
-
this.ws.close();
|
|
406
|
-
this.ws = null;
|
|
407
|
-
}
|
|
408
|
-
}
|
|
409
|
-
};
|
|
410
|
-
}
|
|
411
|
-
});
|
|
412
|
-
|
|
413
|
-
// src/providers/whisper-stt.ts
|
|
414
|
-
function wrapPcmInWav(pcm, sampleRate = 16e3, channels = 1, bitsPerSample = 16) {
|
|
415
|
-
const dataSize = pcm.length;
|
|
416
|
-
const header = Buffer.alloc(44);
|
|
417
|
-
header.write("RIFF", 0);
|
|
418
|
-
header.writeUInt32LE(36 + dataSize, 4);
|
|
419
|
-
header.write("WAVE", 8);
|
|
420
|
-
header.write("fmt ", 12);
|
|
421
|
-
header.writeUInt32LE(16, 16);
|
|
422
|
-
header.writeUInt16LE(1, 20);
|
|
423
|
-
header.writeUInt16LE(channels, 22);
|
|
424
|
-
header.writeUInt32LE(sampleRate, 24);
|
|
425
|
-
header.writeUInt32LE(sampleRate * channels * (bitsPerSample / 8), 28);
|
|
426
|
-
header.writeUInt16LE(channels * (bitsPerSample / 8), 32);
|
|
427
|
-
header.writeUInt16LE(bitsPerSample, 34);
|
|
428
|
-
header.write("data", 36);
|
|
429
|
-
header.writeUInt32LE(dataSize, 40);
|
|
430
|
-
return Buffer.concat([header, pcm]);
|
|
308
|
+
// src/provider-factory.ts
|
|
309
|
+
async function createSTT(agent) {
|
|
310
|
+
return agent.stt ?? null;
|
|
311
|
+
}
|
|
312
|
+
async function createTTS(agent) {
|
|
313
|
+
return agent.tts ?? null;
|
|
431
314
|
}
|
|
432
|
-
var
|
|
433
|
-
|
|
434
|
-
"src/providers/whisper-stt.ts"() {
|
|
315
|
+
var init_provider_factory = __esm({
|
|
316
|
+
"src/provider-factory.ts"() {
|
|
435
317
|
"use strict";
|
|
436
|
-
init_logger();
|
|
437
|
-
OPENAI_TRANSCRIPTION_URL = "https://api.openai.com/v1/audio/transcriptions";
|
|
438
|
-
DEFAULT_BUFFER_SIZE = 16e3 * 2;
|
|
439
|
-
WhisperSTT = class _WhisperSTT {
|
|
440
|
-
apiKey;
|
|
441
|
-
model;
|
|
442
|
-
language;
|
|
443
|
-
bufferSize;
|
|
444
|
-
buffer = Buffer.alloc(0);
|
|
445
|
-
callbacks = [];
|
|
446
|
-
running = false;
|
|
447
|
-
pendingTranscriptions = [];
|
|
448
|
-
constructor(apiKey, model = "whisper-1", language, bufferSize = DEFAULT_BUFFER_SIZE) {
|
|
449
|
-
this.apiKey = apiKey;
|
|
450
|
-
this.model = model;
|
|
451
|
-
this.language = language;
|
|
452
|
-
this.bufferSize = bufferSize;
|
|
453
|
-
}
|
|
454
|
-
/** Factory for Twilio calls — mulaw 8 kHz is transcoded upstream, so we still receive PCM 16-bit. */
|
|
455
|
-
static forTwilio(apiKey, language = "en", model = "whisper-1") {
|
|
456
|
-
return new _WhisperSTT(apiKey, model, language);
|
|
457
|
-
}
|
|
458
|
-
async connect() {
|
|
459
|
-
this.running = true;
|
|
460
|
-
this.buffer = Buffer.alloc(0);
|
|
461
|
-
}
|
|
462
|
-
sendAudio(audio) {
|
|
463
|
-
if (!this.running) return;
|
|
464
|
-
this.buffer = Buffer.concat([this.buffer, audio]);
|
|
465
|
-
if (this.buffer.length >= this.bufferSize) {
|
|
466
|
-
const pcm = this.buffer;
|
|
467
|
-
this.buffer = Buffer.alloc(0);
|
|
468
|
-
this.trackTranscription(this.transcribeBuffer(pcm));
|
|
469
|
-
}
|
|
470
|
-
}
|
|
471
|
-
trackTranscription(promise) {
|
|
472
|
-
const wrapped = promise.finally(() => {
|
|
473
|
-
const idx = this.pendingTranscriptions.indexOf(wrapped);
|
|
474
|
-
if (idx !== -1) this.pendingTranscriptions.splice(idx, 1);
|
|
475
|
-
});
|
|
476
|
-
this.pendingTranscriptions.push(wrapped);
|
|
477
|
-
}
|
|
478
|
-
onTranscript(callback) {
|
|
479
|
-
if (this.callbacks.length >= 10) {
|
|
480
|
-
getLogger().warn("WhisperSTT: maximum of 10 onTranscript callbacks reached; replacing the last callback.");
|
|
481
|
-
this.callbacks[this.callbacks.length - 1] = callback;
|
|
482
|
-
return;
|
|
483
|
-
}
|
|
484
|
-
this.callbacks.push(callback);
|
|
485
|
-
}
|
|
486
|
-
async close() {
|
|
487
|
-
this.running = false;
|
|
488
|
-
if (this.buffer.length >= this.bufferSize / 4) {
|
|
489
|
-
const pcm = this.buffer;
|
|
490
|
-
this.buffer = Buffer.alloc(0);
|
|
491
|
-
this.trackTranscription(this.transcribeBuffer(pcm));
|
|
492
|
-
} else {
|
|
493
|
-
this.buffer = Buffer.alloc(0);
|
|
494
|
-
}
|
|
495
|
-
await Promise.allSettled(this.pendingTranscriptions);
|
|
496
|
-
this.callbacks = [];
|
|
497
|
-
}
|
|
498
|
-
// ------------------------------------------------------------------
|
|
499
|
-
// Private
|
|
500
|
-
// ------------------------------------------------------------------
|
|
501
|
-
async transcribeBuffer(pcm) {
|
|
502
|
-
const wav = wrapPcmInWav(pcm);
|
|
503
|
-
const formData = new FormData();
|
|
504
|
-
formData.append("file", new Blob([wav.buffer.slice(wav.byteOffset, wav.byteOffset + wav.byteLength)], { type: "audio/wav" }), "audio.wav");
|
|
505
|
-
formData.append("model", this.model);
|
|
506
|
-
if (this.language) {
|
|
507
|
-
formData.append("language", this.language);
|
|
508
|
-
}
|
|
509
|
-
try {
|
|
510
|
-
const resp = await fetch(OPENAI_TRANSCRIPTION_URL, {
|
|
511
|
-
method: "POST",
|
|
512
|
-
headers: { Authorization: `Bearer ${this.apiKey}` },
|
|
513
|
-
body: formData,
|
|
514
|
-
signal: AbortSignal.timeout(15e3)
|
|
515
|
-
});
|
|
516
|
-
if (!resp.ok) {
|
|
517
|
-
const body = await resp.text();
|
|
518
|
-
getLogger().error(`WhisperSTT transcription error: ${resp.status} ${body}`);
|
|
519
|
-
return;
|
|
520
|
-
}
|
|
521
|
-
const json = await resp.json();
|
|
522
|
-
const text = (json.text ?? "").trim();
|
|
523
|
-
if (!text) return;
|
|
524
|
-
const transcript = {
|
|
525
|
-
text,
|
|
526
|
-
isFinal: true,
|
|
527
|
-
confidence: 1
|
|
528
|
-
};
|
|
529
|
-
for (const cb of this.callbacks) {
|
|
530
|
-
cb(transcript);
|
|
531
|
-
}
|
|
532
|
-
} catch (err) {
|
|
533
|
-
getLogger().error(`WhisperSTT transcription error: ${String(err)}`);
|
|
534
|
-
}
|
|
535
|
-
}
|
|
536
|
-
};
|
|
537
318
|
}
|
|
538
319
|
});
|
|
539
320
|
|
|
@@ -616,9 +397,15 @@ var init_store = __esm({
|
|
|
616
397
|
maxCalls;
|
|
617
398
|
calls = [];
|
|
618
399
|
activeCalls = /* @__PURE__ */ new Map();
|
|
619
|
-
|
|
400
|
+
/**
|
|
401
|
+
* Accepts either a numeric ``maxCalls`` (legacy positional — matches the
|
|
402
|
+
* original TS API) or an options object ``{ maxCalls }`` to align with the
|
|
403
|
+
* Python SDK's keyword-argument style. Plain literals also work:
|
|
404
|
+
* ``new MetricsStore()`` / ``new MetricsStore(100)`` / ``new MetricsStore({ maxCalls: 100 })``.
|
|
405
|
+
*/
|
|
406
|
+
constructor(maxCallsOrOpts = 500) {
|
|
620
407
|
super();
|
|
621
|
-
this.maxCalls = maxCalls;
|
|
408
|
+
this.maxCalls = typeof maxCallsOrOpts === "number" ? maxCallsOrOpts : maxCallsOrOpts.maxCalls ?? 500;
|
|
622
409
|
}
|
|
623
410
|
publish(eventType, data) {
|
|
624
411
|
this.emit("sse", { type: eventType, data });
|
|
@@ -626,22 +413,100 @@ var init_store = __esm({
|
|
|
626
413
|
recordCallStart(data) {
|
|
627
414
|
const callId = data.call_id || "";
|
|
628
415
|
if (!callId) return;
|
|
416
|
+
const existing = this.activeCalls.get(callId);
|
|
417
|
+
if (existing) {
|
|
418
|
+
existing.caller = data.caller || existing.caller;
|
|
419
|
+
existing.callee = data.callee || existing.callee;
|
|
420
|
+
existing.direction = data.direction || existing.direction;
|
|
421
|
+
existing.status = "in-progress";
|
|
422
|
+
existing.turns = existing.turns || [];
|
|
423
|
+
} else {
|
|
424
|
+
const record = {
|
|
425
|
+
call_id: callId,
|
|
426
|
+
caller: data.caller || "",
|
|
427
|
+
callee: data.callee || "",
|
|
428
|
+
direction: data.direction || "inbound",
|
|
429
|
+
started_at: Date.now() / 1e3,
|
|
430
|
+
status: "in-progress",
|
|
431
|
+
turns: []
|
|
432
|
+
};
|
|
433
|
+
this.activeCalls.set(callId, record);
|
|
434
|
+
}
|
|
435
|
+
this.publish("call_start", {
|
|
436
|
+
call_id: callId,
|
|
437
|
+
caller: data.caller || "",
|
|
438
|
+
callee: data.callee || "",
|
|
439
|
+
direction: data.direction || "inbound"
|
|
440
|
+
});
|
|
441
|
+
}
|
|
442
|
+
/**
|
|
443
|
+
* Pre-register an outbound call before any webhook fires. Lets the
|
|
444
|
+
* dashboard surface attempts that never reach media (no-answer, busy,
|
|
445
|
+
* carrier-rejected). Mirrors the Python ``record_call_initiated``.
|
|
446
|
+
*/
|
|
447
|
+
recordCallInitiated(data) {
|
|
448
|
+
const callId = data.call_id || "";
|
|
449
|
+
if (!callId) return;
|
|
450
|
+
if (this.activeCalls.has(callId)) return;
|
|
629
451
|
const record = {
|
|
630
452
|
call_id: callId,
|
|
631
453
|
caller: data.caller || "",
|
|
632
454
|
callee: data.callee || "",
|
|
633
|
-
direction: data.direction || "
|
|
455
|
+
direction: data.direction || "outbound",
|
|
634
456
|
started_at: Date.now() / 1e3,
|
|
457
|
+
status: "initiated",
|
|
635
458
|
turns: []
|
|
636
459
|
};
|
|
637
460
|
this.activeCalls.set(callId, record);
|
|
638
|
-
this.publish("
|
|
461
|
+
this.publish("call_initiated", {
|
|
639
462
|
call_id: callId,
|
|
640
463
|
caller: record.caller,
|
|
641
464
|
callee: record.callee,
|
|
642
|
-
direction: record.direction
|
|
465
|
+
direction: record.direction,
|
|
466
|
+
status: record.status
|
|
643
467
|
});
|
|
644
468
|
}
|
|
469
|
+
/**
|
|
470
|
+
* Update the status of an active or completed call. Terminal states
|
|
471
|
+
* (completed, no-answer, busy, failed, canceled, webhook_error) move the
|
|
472
|
+
* row from active to completed so the UI freezes the live duration timer.
|
|
473
|
+
*/
|
|
474
|
+
updateCallStatus(callId, status, extra = {}) {
|
|
475
|
+
if (!callId || !status) return;
|
|
476
|
+
const TERMINAL = /* @__PURE__ */ new Set(["completed", "no-answer", "busy", "failed", "canceled", "webhook_error"]);
|
|
477
|
+
const active = this.activeCalls.get(callId);
|
|
478
|
+
if (active) {
|
|
479
|
+
active.status = status;
|
|
480
|
+
Object.assign(active, extra);
|
|
481
|
+
if (TERMINAL.has(status)) {
|
|
482
|
+
const entry = {
|
|
483
|
+
call_id: callId,
|
|
484
|
+
caller: active.caller || "",
|
|
485
|
+
callee: active.callee || "",
|
|
486
|
+
direction: active.direction || "outbound",
|
|
487
|
+
started_at: active.started_at || 0,
|
|
488
|
+
ended_at: Date.now() / 1e3,
|
|
489
|
+
status,
|
|
490
|
+
metrics: null,
|
|
491
|
+
...extra
|
|
492
|
+
};
|
|
493
|
+
this.activeCalls.delete(callId);
|
|
494
|
+
this.calls.push(entry);
|
|
495
|
+
if (this.calls.length > this.maxCalls) {
|
|
496
|
+
this.calls = this.calls.slice(-this.maxCalls);
|
|
497
|
+
}
|
|
498
|
+
}
|
|
499
|
+
} else {
|
|
500
|
+
for (let i = this.calls.length - 1; i >= 0; i--) {
|
|
501
|
+
if (this.calls[i].call_id === callId) {
|
|
502
|
+
this.calls[i].status = status;
|
|
503
|
+
Object.assign(this.calls[i], extra);
|
|
504
|
+
break;
|
|
505
|
+
}
|
|
506
|
+
}
|
|
507
|
+
}
|
|
508
|
+
this.publish("call_status", { call_id: callId, status, ...extra });
|
|
509
|
+
}
|
|
645
510
|
recordTurn(data) {
|
|
646
511
|
const callId = data.call_id || "";
|
|
647
512
|
const turn = data.turn;
|
|
@@ -658,6 +523,8 @@ var init_store = __esm({
|
|
|
658
523
|
if (!callId) return;
|
|
659
524
|
const active = this.activeCalls.get(callId);
|
|
660
525
|
this.activeCalls.delete(callId);
|
|
526
|
+
const activeStatus = active?.status;
|
|
527
|
+
const resolvedStatus = activeStatus && activeStatus !== "in-progress" ? activeStatus : "completed";
|
|
661
528
|
const entry = {
|
|
662
529
|
call_id: callId,
|
|
663
530
|
caller: data.caller || active?.caller || "",
|
|
@@ -666,6 +533,7 @@ var init_store = __esm({
|
|
|
666
533
|
started_at: active?.started_at || 0,
|
|
667
534
|
ended_at: Date.now() / 1e3,
|
|
668
535
|
transcript: data.transcript || [],
|
|
536
|
+
status: resolvedStatus,
|
|
669
537
|
metrics: metrics ?? null
|
|
670
538
|
};
|
|
671
539
|
this.calls.push(entry);
|
|
@@ -1866,171 +1734,125 @@ var init_remote_message = __esm({
|
|
|
1866
1734
|
}
|
|
1867
1735
|
});
|
|
1868
1736
|
|
|
1869
|
-
// src/providers/
|
|
1870
|
-
var
|
|
1871
|
-
var
|
|
1872
|
-
"src/providers/
|
|
1737
|
+
// src/providers/deepgram-stt.ts
|
|
1738
|
+
var import_ws4, DEEPGRAM_WS_URL, DeepgramSTT;
|
|
1739
|
+
var init_deepgram_stt = __esm({
|
|
1740
|
+
"src/providers/deepgram-stt.ts"() {
|
|
1873
1741
|
"use strict";
|
|
1874
|
-
|
|
1875
|
-
|
|
1876
|
-
|
|
1742
|
+
import_ws4 = __toESM(require("ws"));
|
|
1743
|
+
init_logger();
|
|
1744
|
+
DEEPGRAM_WS_URL = "wss://api.deepgram.com/v1/listen";
|
|
1745
|
+
DeepgramSTT = class _DeepgramSTT {
|
|
1746
|
+
ws = null;
|
|
1747
|
+
callbacks = [];
|
|
1748
|
+
/** Request ID from Deepgram — used to query actual cost post-call. */
|
|
1749
|
+
requestId = "";
|
|
1750
|
+
apiKey;
|
|
1751
|
+
language;
|
|
1752
|
+
model;
|
|
1753
|
+
encoding;
|
|
1754
|
+
sampleRate;
|
|
1755
|
+
endpointingMs;
|
|
1756
|
+
utteranceEndMs;
|
|
1757
|
+
smartFormat;
|
|
1758
|
+
interimResults;
|
|
1759
|
+
vadEvents;
|
|
1760
|
+
constructor(apiKey, languageOrOptions, model, encoding, sampleRate, options) {
|
|
1877
1761
|
this.apiKey = apiKey;
|
|
1878
|
-
|
|
1879
|
-
this.
|
|
1880
|
-
this.
|
|
1881
|
-
|
|
1882
|
-
|
|
1883
|
-
|
|
1884
|
-
|
|
1885
|
-
|
|
1886
|
-
|
|
1887
|
-
|
|
1888
|
-
|
|
1889
|
-
|
|
1890
|
-
|
|
1891
|
-
|
|
1892
|
-
return Buffer.concat(chunks);
|
|
1762
|
+
const opts = typeof languageOrOptions === "object" && languageOrOptions !== null ? languageOrOptions : options ?? {};
|
|
1763
|
+
this.language = (typeof languageOrOptions === "string" ? languageOrOptions : opts.language) ?? "en";
|
|
1764
|
+
this.model = model ?? opts.model ?? "nova-3";
|
|
1765
|
+
this.encoding = encoding ?? opts.encoding ?? "linear16";
|
|
1766
|
+
this.sampleRate = sampleRate ?? opts.sampleRate ?? 16e3;
|
|
1767
|
+
this.endpointingMs = opts.endpointingMs ?? 150;
|
|
1768
|
+
this.utteranceEndMs = opts.utteranceEndMs === null ? null : opts.utteranceEndMs ?? 1e3;
|
|
1769
|
+
this.smartFormat = opts.smartFormat ?? true;
|
|
1770
|
+
this.interimResults = opts.interimResults ?? true;
|
|
1771
|
+
this.vadEvents = opts.vadEvents ?? true;
|
|
1772
|
+
}
|
|
1773
|
+
/** Factory for Twilio calls — mulaw 8 kHz. Forwards tuning options through. */
|
|
1774
|
+
static forTwilio(apiKey, language = "en", model = "nova-3", options = {}) {
|
|
1775
|
+
return new _DeepgramSTT(apiKey, language, model, "mulaw", 8e3, options);
|
|
1893
1776
|
}
|
|
1894
|
-
|
|
1895
|
-
|
|
1896
|
-
|
|
1897
|
-
|
|
1898
|
-
|
|
1899
|
-
|
|
1900
|
-
|
|
1901
|
-
|
|
1902
|
-
|
|
1903
|
-
|
|
1904
|
-
|
|
1905
|
-
|
|
1906
|
-
"Content-Type": "application/json"
|
|
1907
|
-
},
|
|
1908
|
-
body: JSON.stringify({ text, model_id: this.modelId }),
|
|
1909
|
-
signal: AbortSignal.timeout(3e4)
|
|
1777
|
+
async connect() {
|
|
1778
|
+
const params = new URLSearchParams({
|
|
1779
|
+
model: this.model,
|
|
1780
|
+
language: this.language,
|
|
1781
|
+
encoding: this.encoding,
|
|
1782
|
+
sample_rate: String(this.sampleRate),
|
|
1783
|
+
channels: "1",
|
|
1784
|
+
interim_results: this.interimResults ? "true" : "false",
|
|
1785
|
+
endpointing: String(this.endpointingMs),
|
|
1786
|
+
smart_format: this.smartFormat ? "true" : "false",
|
|
1787
|
+
vad_events: this.vadEvents ? "true" : "false",
|
|
1788
|
+
no_delay: "true"
|
|
1910
1789
|
});
|
|
1911
|
-
if (
|
|
1912
|
-
|
|
1913
|
-
throw new Error(`ElevenLabs TTS error ${response.status}: ${body}`);
|
|
1790
|
+
if (this.utteranceEndMs !== null) {
|
|
1791
|
+
params.set("utterance_end_ms", String(Math.max(this.utteranceEndMs, 1e3)));
|
|
1914
1792
|
}
|
|
1915
|
-
|
|
1916
|
-
|
|
1917
|
-
|
|
1918
|
-
|
|
1919
|
-
|
|
1920
|
-
|
|
1921
|
-
|
|
1922
|
-
|
|
1923
|
-
|
|
1924
|
-
|
|
1925
|
-
|
|
1926
|
-
|
|
1927
|
-
|
|
1928
|
-
if (typeof reader.cancel === "function") await reader.cancel().catch(() => {
|
|
1793
|
+
const url = `${DEEPGRAM_WS_URL}?${params.toString()}`;
|
|
1794
|
+
this.ws = new import_ws4.default(url, {
|
|
1795
|
+
headers: { Authorization: `Token ${this.apiKey}` }
|
|
1796
|
+
});
|
|
1797
|
+
await new Promise((resolve, reject) => {
|
|
1798
|
+
const timer = setTimeout(() => reject(new Error("Deepgram connect timeout")), 1e4);
|
|
1799
|
+
this.ws.once("open", () => {
|
|
1800
|
+
clearTimeout(timer);
|
|
1801
|
+
resolve();
|
|
1802
|
+
});
|
|
1803
|
+
this.ws.once("error", (err) => {
|
|
1804
|
+
clearTimeout(timer);
|
|
1805
|
+
reject(err);
|
|
1929
1806
|
});
|
|
1930
|
-
reader.releaseLock();
|
|
1931
|
-
}
|
|
1932
|
-
}
|
|
1933
|
-
};
|
|
1934
|
-
}
|
|
1935
|
-
});
|
|
1936
|
-
|
|
1937
|
-
// src/providers/openai-tts.ts
|
|
1938
|
-
var OPENAI_TTS_URL, OpenAITTS;
|
|
1939
|
-
var init_openai_tts = __esm({
|
|
1940
|
-
"src/providers/openai-tts.ts"() {
|
|
1941
|
-
"use strict";
|
|
1942
|
-
OPENAI_TTS_URL = "https://api.openai.com/v1/audio/speech";
|
|
1943
|
-
OpenAITTS = class _OpenAITTS {
|
|
1944
|
-
constructor(apiKey, voice = "alloy", model = "tts-1") {
|
|
1945
|
-
this.apiKey = apiKey;
|
|
1946
|
-
this.voice = voice;
|
|
1947
|
-
this.model = model;
|
|
1948
|
-
}
|
|
1949
|
-
/**
|
|
1950
|
-
* Synthesise text to speech and return the full audio as a single Buffer.
|
|
1951
|
-
*
|
|
1952
|
-
* For large chunks (or when latency matters) call `synthesizeStream` instead.
|
|
1953
|
-
*/
|
|
1954
|
-
async synthesize(text) {
|
|
1955
|
-
const chunks = [];
|
|
1956
|
-
for await (const chunk of this.synthesizeStream(text)) {
|
|
1957
|
-
chunks.push(chunk);
|
|
1958
|
-
}
|
|
1959
|
-
return Buffer.concat(chunks);
|
|
1960
|
-
}
|
|
1961
|
-
/**
|
|
1962
|
-
* Synthesise text and yield audio chunks as they arrive (streaming).
|
|
1963
|
-
*
|
|
1964
|
-
* OpenAI returns 24 kHz PCM16; each chunk is resampled to 16 kHz before
|
|
1965
|
-
* yielding so the output is ready for telephony pipelines.
|
|
1966
|
-
*/
|
|
1967
|
-
async *synthesizeStream(text) {
|
|
1968
|
-
const response = await fetch(OPENAI_TTS_URL, {
|
|
1969
|
-
method: "POST",
|
|
1970
|
-
headers: {
|
|
1971
|
-
"Authorization": `Bearer ${this.apiKey}`,
|
|
1972
|
-
"Content-Type": "application/json"
|
|
1973
|
-
},
|
|
1974
|
-
body: JSON.stringify({
|
|
1975
|
-
model: this.model,
|
|
1976
|
-
input: text,
|
|
1977
|
-
voice: this.voice,
|
|
1978
|
-
response_format: "pcm"
|
|
1979
|
-
}),
|
|
1980
|
-
signal: AbortSignal.timeout(3e4)
|
|
1981
1807
|
});
|
|
1982
|
-
|
|
1983
|
-
|
|
1984
|
-
|
|
1985
|
-
|
|
1986
|
-
|
|
1987
|
-
|
|
1988
|
-
}
|
|
1989
|
-
const reader = response.body.getReader();
|
|
1990
|
-
try {
|
|
1991
|
-
while (true) {
|
|
1992
|
-
const { done, value } = await reader.read();
|
|
1993
|
-
if (done) break;
|
|
1994
|
-
if (value && value.length > 0) {
|
|
1995
|
-
yield _OpenAITTS.resample24kTo16k(Buffer.from(value));
|
|
1996
|
-
}
|
|
1808
|
+
this.ws.on("message", (raw) => {
|
|
1809
|
+
let data;
|
|
1810
|
+
try {
|
|
1811
|
+
data = JSON.parse(raw.toString());
|
|
1812
|
+
} catch {
|
|
1813
|
+
return;
|
|
1997
1814
|
}
|
|
1998
|
-
|
|
1999
|
-
|
|
2000
|
-
|
|
2001
|
-
|
|
1815
|
+
if (data.type === "Metadata" && data.request_id) {
|
|
1816
|
+
this.requestId = data.request_id;
|
|
1817
|
+
return;
|
|
1818
|
+
}
|
|
1819
|
+
if (data.type !== "Results") return;
|
|
1820
|
+
const alternatives = data.channel?.alternatives ?? [];
|
|
1821
|
+
if (!alternatives.length) return;
|
|
1822
|
+
const best = alternatives[0];
|
|
1823
|
+
const text = (best.transcript ?? "").trim();
|
|
1824
|
+
if (!text) return;
|
|
1825
|
+
const transcript = {
|
|
1826
|
+
text,
|
|
1827
|
+
isFinal: Boolean(data.is_final) || Boolean(data.speech_final),
|
|
1828
|
+
confidence: best.confidence ?? 0
|
|
1829
|
+
};
|
|
1830
|
+
for (const cb of this.callbacks) {
|
|
1831
|
+
cb(transcript);
|
|
1832
|
+
}
|
|
1833
|
+
});
|
|
1834
|
+
}
|
|
1835
|
+
sendAudio(audio) {
|
|
1836
|
+
if (!this.ws || this.ws.readyState !== import_ws4.default.OPEN) return;
|
|
1837
|
+
this.ws.send(audio);
|
|
1838
|
+
}
|
|
1839
|
+
onTranscript(callback) {
|
|
1840
|
+
if (this.callbacks.length >= 10) {
|
|
1841
|
+
getLogger().warn("DeepgramSTT: maximum of 10 onTranscript callbacks reached; replacing the last callback.");
|
|
1842
|
+
this.callbacks[this.callbacks.length - 1] = callback;
|
|
1843
|
+
return;
|
|
2002
1844
|
}
|
|
1845
|
+
this.callbacks.push(callback);
|
|
2003
1846
|
}
|
|
2004
|
-
|
|
2005
|
-
|
|
2006
|
-
|
|
2007
|
-
|
|
2008
|
-
|
|
2009
|
-
* Python SDK implementation.
|
|
2010
|
-
*/
|
|
2011
|
-
static resample24kTo16k(audio) {
|
|
2012
|
-
if (audio.length < 2) return audio;
|
|
2013
|
-
const sampleCount = Math.floor(audio.length / 2);
|
|
2014
|
-
const samples = new Int16Array(sampleCount);
|
|
2015
|
-
for (let i = 0; i < sampleCount; i++) {
|
|
2016
|
-
samples[i] = audio.readInt16LE(i * 2);
|
|
2017
|
-
}
|
|
2018
|
-
const resampled = [];
|
|
2019
|
-
for (let i = 0; i < samples.length; i += 3) {
|
|
2020
|
-
resampled.push(samples[i]);
|
|
2021
|
-
if (i + 1 < samples.length) {
|
|
2022
|
-
if (i + 2 < samples.length) {
|
|
2023
|
-
resampled.push(Math.trunc((samples[i + 1] + samples[i + 2]) / 2));
|
|
2024
|
-
} else {
|
|
2025
|
-
resampled.push(samples[i + 1]);
|
|
2026
|
-
}
|
|
1847
|
+
close() {
|
|
1848
|
+
if (this.ws) {
|
|
1849
|
+
try {
|
|
1850
|
+
this.ws.send(JSON.stringify({ type: "CloseStream" }));
|
|
1851
|
+
} catch {
|
|
2027
1852
|
}
|
|
1853
|
+
this.ws.close();
|
|
1854
|
+
this.ws = null;
|
|
2028
1855
|
}
|
|
2029
|
-
const out = Buffer.alloc(resampled.length * 2);
|
|
2030
|
-
for (let i = 0; i < resampled.length; i++) {
|
|
2031
|
-
out.writeInt16LE(resampled[i], i * 2);
|
|
2032
|
-
}
|
|
2033
|
-
return out;
|
|
2034
1856
|
}
|
|
2035
1857
|
};
|
|
2036
1858
|
}
|
|
@@ -2940,8 +2762,8 @@ var init_stream_handler = __esm({
|
|
|
2940
2762
|
"use strict";
|
|
2941
2763
|
init_openai_realtime();
|
|
2942
2764
|
init_elevenlabs_convai();
|
|
2943
|
-
|
|
2944
|
-
|
|
2765
|
+
init_deepgram_stt();
|
|
2766
|
+
init_provider_factory();
|
|
2945
2767
|
init_metrics();
|
|
2946
2768
|
init_transcoding();
|
|
2947
2769
|
init_llm_loop();
|
|
@@ -2972,6 +2794,9 @@ var init_stream_handler = __esm({
|
|
|
2972
2794
|
maxDurationTimer = null;
|
|
2973
2795
|
transcriptProcessing = false;
|
|
2974
2796
|
transcriptQueue = [];
|
|
2797
|
+
// BUG #22 throttle state — mirror Python impl.
|
|
2798
|
+
lastCommitText = "";
|
|
2799
|
+
lastCommitAt = 0;
|
|
2975
2800
|
history;
|
|
2976
2801
|
metricsAcc;
|
|
2977
2802
|
constructor(deps, ws, caller, callee) {
|
|
@@ -2980,8 +2805,8 @@ var init_stream_handler = __esm({
|
|
|
2980
2805
|
this.caller = caller;
|
|
2981
2806
|
this.callee = callee;
|
|
2982
2807
|
this.history = createHistoryManager(200);
|
|
2983
|
-
const sttProviderName = deps.agent.stt
|
|
2984
|
-
const ttsProviderName = deps.agent.tts
|
|
2808
|
+
const sttProviderName = deps.agent.stt ? deps.agent.stt.constructor?.name ?? "custom" : void 0;
|
|
2809
|
+
const ttsProviderName = deps.agent.tts ? deps.agent.tts.constructor?.name ?? "custom" : void 0;
|
|
2985
2810
|
const providerMode = deps.agent.provider ?? "openai_realtime";
|
|
2986
2811
|
this.metricsAcc = new CallMetricsAccumulator({
|
|
2987
2812
|
callId: "",
|
|
@@ -3082,15 +2907,23 @@ var init_stream_handler = __esm({
|
|
|
3082
2907
|
this.streamSid = sid;
|
|
3083
2908
|
}
|
|
3084
2909
|
/** Handle an incoming audio chunk (already decoded from base64). */
|
|
3085
|
-
handleAudio(audioBuffer) {
|
|
2910
|
+
async handleAudio(audioBuffer) {
|
|
3086
2911
|
const provider = this.deps.agent.provider ?? "openai_realtime";
|
|
3087
|
-
if (provider === "pipeline" && this.stt
|
|
3088
|
-
if (this.deps.
|
|
3089
|
-
|
|
3090
|
-
|
|
3091
|
-
|
|
2912
|
+
if (provider === "pipeline" && this.stt) {
|
|
2913
|
+
if (this.isSpeaking && (this.deps.agent.bargeInThresholdMs ?? 300) === 0) {
|
|
2914
|
+
return;
|
|
2915
|
+
}
|
|
2916
|
+
const pcm8k = mulawToPcm16(audioBuffer);
|
|
2917
|
+
const pcm16k = resample8kTo16k(pcm8k);
|
|
2918
|
+
const hooks = this.deps.agent.hooks;
|
|
2919
|
+
if (hooks) {
|
|
2920
|
+
const hookExecutor = new PipelineHookExecutor(hooks);
|
|
2921
|
+
const hookCtx = this.buildHookContext();
|
|
2922
|
+
const processed = await hookExecutor.runBeforeSendToStt(pcm16k, hookCtx);
|
|
2923
|
+
if (processed === null) return;
|
|
2924
|
+
this.stt.sendAudio(processed);
|
|
3092
2925
|
} else {
|
|
3093
|
-
this.stt.sendAudio(
|
|
2926
|
+
this.stt.sendAudio(pcm16k);
|
|
3094
2927
|
}
|
|
3095
2928
|
} else if (this.adapter) {
|
|
3096
2929
|
if (this.adapter instanceof ElevenLabsConvAIAdapter && this.deps.bridge.telephonyProvider === "twilio") {
|
|
@@ -3163,18 +2996,8 @@ var init_stream_handler = __esm({
|
|
|
3163
2996
|
// ---------------------------------------------------------------------------
|
|
3164
2997
|
async initPipeline(resolvedPrompt) {
|
|
3165
2998
|
const label = this.deps.bridge.label;
|
|
3166
|
-
this.stt = this.deps.bridge.createStt(this.deps.agent);
|
|
3167
|
-
|
|
3168
|
-
if (this.deps.agent.tts.provider === "elevenlabs") {
|
|
3169
|
-
this.tts = new ElevenLabsTTS(this.deps.agent.tts.apiKey, this.deps.agent.tts.voice ?? "21m00Tcm4TlvDq8ikWAM");
|
|
3170
|
-
}
|
|
3171
|
-
if (this.deps.agent.tts.provider === "openai") {
|
|
3172
|
-
this.tts = new OpenAITTS(this.deps.agent.tts.apiKey, this.deps.agent.tts.voice ?? "alloy");
|
|
3173
|
-
}
|
|
3174
|
-
} else if (this.deps.agent.elevenlabsKey) {
|
|
3175
|
-
const voiceId = this.deps.agent.voice && this.deps.agent.voice !== "alloy" ? this.deps.agent.voice : "21m00Tcm4TlvDq8ikWAM";
|
|
3176
|
-
this.tts = new ElevenLabsTTS(this.deps.agent.elevenlabsKey, voiceId);
|
|
3177
|
-
}
|
|
2999
|
+
this.stt = await this.deps.bridge.createStt(this.deps.agent);
|
|
3000
|
+
this.tts = await createTTS(this.deps.agent);
|
|
3178
3001
|
if (!this.stt) {
|
|
3179
3002
|
getLogger().info(`Pipeline mode (${label}): no STT configured`);
|
|
3180
3003
|
}
|
|
@@ -3285,7 +3108,59 @@ var init_stream_handler = __esm({
|
|
|
3285
3108
|
}
|
|
3286
3109
|
}
|
|
3287
3110
|
async processTranscript(transcript) {
|
|
3111
|
+
if (transcript.text && this.isSpeaking) {
|
|
3112
|
+
getLogger().info(
|
|
3113
|
+
`Barge-in: caller spoke over agent (${sanitizeLogValue(transcript.text.slice(0, 40))})`
|
|
3114
|
+
);
|
|
3115
|
+
this.isSpeaking = false;
|
|
3116
|
+
try {
|
|
3117
|
+
this.deps.bridge.sendClear(this.ws, this.streamSid);
|
|
3118
|
+
} catch (err) {
|
|
3119
|
+
getLogger().debug(`sendClear during barge-in failed: ${String(err)}`);
|
|
3120
|
+
}
|
|
3121
|
+
this.metricsAcc.recordTurnInterrupted();
|
|
3122
|
+
}
|
|
3288
3123
|
if (!transcript.isFinal || !transcript.text) return;
|
|
3124
|
+
const now = Date.now();
|
|
3125
|
+
const normalised = transcript.text.trim().toLowerCase();
|
|
3126
|
+
const stripped = normalised.replace(/[.,!?;: ]+$/, "").trim();
|
|
3127
|
+
const sinceLastMs = now - this.lastCommitAt;
|
|
3128
|
+
const HALLUCINATIONS = /* @__PURE__ */ new Set([
|
|
3129
|
+
"you",
|
|
3130
|
+
"thank you",
|
|
3131
|
+
"thanks",
|
|
3132
|
+
"yeah",
|
|
3133
|
+
"yes",
|
|
3134
|
+
"no",
|
|
3135
|
+
"okay",
|
|
3136
|
+
"ok",
|
|
3137
|
+
"uh",
|
|
3138
|
+
"um",
|
|
3139
|
+
"mmm",
|
|
3140
|
+
"hmm",
|
|
3141
|
+
".",
|
|
3142
|
+
"bye",
|
|
3143
|
+
"right",
|
|
3144
|
+
"cool"
|
|
3145
|
+
]);
|
|
3146
|
+
if (HALLUCINATIONS.has(stripped) || stripped === "") {
|
|
3147
|
+
getLogger().info(`Dropped likely STT hallucination: ${sanitizeLogValue(normalised.slice(0, 40))}`);
|
|
3148
|
+
return;
|
|
3149
|
+
}
|
|
3150
|
+
if (sinceLastMs < 2e3 && normalised === this.lastCommitText) {
|
|
3151
|
+
getLogger().info(
|
|
3152
|
+
`Dropped duplicate final transcript (${(sinceLastMs / 1e3).toFixed(1)}s since last): ${sanitizeLogValue(normalised.slice(0, 40))}`
|
|
3153
|
+
);
|
|
3154
|
+
return;
|
|
3155
|
+
}
|
|
3156
|
+
if (sinceLastMs < 500) {
|
|
3157
|
+
getLogger().info(
|
|
3158
|
+
`Dropped back-to-back final transcript (${(sinceLastMs / 1e3).toFixed(2)}s since last): ${sanitizeLogValue(normalised.slice(0, 40))}`
|
|
3159
|
+
);
|
|
3160
|
+
return;
|
|
3161
|
+
}
|
|
3162
|
+
this.lastCommitText = normalised;
|
|
3163
|
+
this.lastCommitAt = now;
|
|
3289
3164
|
const label = this.deps.bridge.label;
|
|
3290
3165
|
getLogger().info(`User (${label} pipeline): ${sanitizeLogValue(transcript.text)}`);
|
|
3291
3166
|
this.metricsAcc.startTurn();
|
|
@@ -3640,10 +3515,11 @@ var init_stream_handler = __esm({
|
|
|
3640
3515
|
this.maxDurationTimer = null;
|
|
3641
3516
|
}
|
|
3642
3517
|
await this.deps.bridge.queryTelephonyCost(this.metricsAcc, this.callId);
|
|
3643
|
-
|
|
3644
|
-
|
|
3645
|
-
|
|
3646
|
-
|
|
3518
|
+
if (this.stt instanceof DeepgramSTT && this.stt.requestId) {
|
|
3519
|
+
const dgKey = this.stt.apiKey;
|
|
3520
|
+
if (dgKey) {
|
|
3521
|
+
await queryDeepgramCost(this.metricsAcc, dgKey, this.stt.requestId);
|
|
3522
|
+
}
|
|
3647
3523
|
}
|
|
3648
3524
|
const finalMetrics = this.metricsAcc.endCall();
|
|
3649
3525
|
const callEndData = {
|
|
@@ -3745,11 +3621,16 @@ function resolveVariables(template, variables) {
|
|
|
3745
3621
|
return result;
|
|
3746
3622
|
}
|
|
3747
3623
|
function buildAIAdapter(config, agent, resolvedPrompt) {
|
|
3624
|
+
const engine = agent.engine;
|
|
3748
3625
|
if (agent.provider === "elevenlabs_convai") {
|
|
3749
|
-
|
|
3626
|
+
if (!engine || engine.kind !== "elevenlabs_convai") {
|
|
3627
|
+
throw new Error(
|
|
3628
|
+
"ElevenLabs ConvAI mode requires `agent.engine = new ElevenLabsConvAI({...})`."
|
|
3629
|
+
);
|
|
3630
|
+
}
|
|
3750
3631
|
return new ElevenLabsConvAIAdapter(
|
|
3751
|
-
|
|
3752
|
-
|
|
3632
|
+
engine.apiKey,
|
|
3633
|
+
engine.agentId,
|
|
3753
3634
|
agent.voice ?? "21m00Tcm4TlvDq8ikWAM",
|
|
3754
3635
|
"eleven_turbo_v2_5",
|
|
3755
3636
|
agent.language ?? "en",
|
|
@@ -3762,8 +3643,9 @@ function buildAIAdapter(config, agent, resolvedPrompt) {
|
|
|
3762
3643
|
parameters: t.parameters
|
|
3763
3644
|
})) ?? [];
|
|
3764
3645
|
const tools = [...agentTools, TRANSFER_CALL_TOOL, END_CALL_TOOL];
|
|
3646
|
+
const openaiKey = engine && engine.kind === "openai_realtime" ? engine.apiKey : config.openaiKey ?? "";
|
|
3765
3647
|
return new OpenAIRealtimeAdapter(
|
|
3766
|
-
|
|
3648
|
+
openaiKey,
|
|
3767
3649
|
agent.model,
|
|
3768
3650
|
agent.voice,
|
|
3769
3651
|
resolvedPrompt ?? agent.systemPrompt,
|
|
@@ -3789,8 +3671,7 @@ var init_server = __esm({
|
|
|
3789
3671
|
import_ws5 = require("ws");
|
|
3790
3672
|
init_openai_realtime();
|
|
3791
3673
|
init_elevenlabs_convai();
|
|
3792
|
-
|
|
3793
|
-
init_whisper_stt();
|
|
3674
|
+
init_provider_factory();
|
|
3794
3675
|
init_pricing();
|
|
3795
3676
|
init_store();
|
|
3796
3677
|
init_routes();
|
|
@@ -3875,16 +3756,7 @@ var init_server = __esm({
|
|
|
3875
3756
|
}
|
|
3876
3757
|
}
|
|
3877
3758
|
createStt(agent) {
|
|
3878
|
-
|
|
3879
|
-
if (agent.stt.provider === "deepgram") {
|
|
3880
|
-
return DeepgramSTT.forTwilio(agent.stt.apiKey, agent.stt.language ?? "en");
|
|
3881
|
-
} else if (agent.stt.provider === "whisper") {
|
|
3882
|
-
return WhisperSTT.forTwilio(agent.stt.apiKey, agent.stt.language ?? "en");
|
|
3883
|
-
}
|
|
3884
|
-
} else if (agent.deepgramKey) {
|
|
3885
|
-
return DeepgramSTT.forTwilio(agent.deepgramKey, agent.language ?? "en");
|
|
3886
|
-
}
|
|
3887
|
-
return null;
|
|
3759
|
+
return createSTT(agent);
|
|
3888
3760
|
}
|
|
3889
3761
|
async queryTelephonyCost(metricsAcc, callId) {
|
|
3890
3762
|
if (this.config.twilioSid && this.config.twilioToken && callId) {
|
|
@@ -3923,12 +3795,12 @@ var init_server = __esm({
|
|
|
3923
3795
|
label = "Telnyx";
|
|
3924
3796
|
telephonyProvider = "telnyx";
|
|
3925
3797
|
sendAudio(ws, audioBase64, _streamSid) {
|
|
3926
|
-
ws.send(JSON.stringify({
|
|
3798
|
+
ws.send(JSON.stringify({ event: "media", media: { payload: audioBase64 } }));
|
|
3927
3799
|
}
|
|
3928
3800
|
sendMark(_ws, _markName, _streamSid) {
|
|
3929
3801
|
}
|
|
3930
3802
|
sendClear(ws, _streamSid) {
|
|
3931
|
-
ws.send(JSON.stringify({
|
|
3803
|
+
ws.send(JSON.stringify({ event: "clear" }));
|
|
3932
3804
|
}
|
|
3933
3805
|
async transferCall(callId, toNumber) {
|
|
3934
3806
|
if (!isValidTelnyxTransferTarget(toNumber)) {
|
|
@@ -4022,16 +3894,7 @@ var init_server = __esm({
|
|
|
4022
3894
|
ws.close();
|
|
4023
3895
|
}
|
|
4024
3896
|
createStt(agent) {
|
|
4025
|
-
|
|
4026
|
-
if (agent.stt.provider === "deepgram") {
|
|
4027
|
-
return new DeepgramSTT(agent.stt.apiKey, agent.stt.language ?? "en", "nova-3", "linear16", 16e3);
|
|
4028
|
-
} else if (agent.stt.provider === "whisper") {
|
|
4029
|
-
return new WhisperSTT(agent.stt.apiKey, "whisper-1", agent.stt.language ?? "en");
|
|
4030
|
-
}
|
|
4031
|
-
} else if (agent.deepgramKey) {
|
|
4032
|
-
return new DeepgramSTT(agent.deepgramKey, agent.language ?? "en", "nova-3", "linear16", 16e3);
|
|
4033
|
-
}
|
|
4034
|
-
return null;
|
|
3897
|
+
return createSTT(agent);
|
|
4035
3898
|
}
|
|
4036
3899
|
async queryTelephonyCost(metricsAcc, callId) {
|
|
4037
3900
|
if (this.config.telnyxKey && callId) {
|
|
@@ -4076,6 +3939,7 @@ var init_server = __esm({
|
|
|
4076
3939
|
server = null;
|
|
4077
3940
|
wss = null;
|
|
4078
3941
|
twilioTokenWarningLogged = false;
|
|
3942
|
+
telnyxSigWarningLogged = false;
|
|
4079
3943
|
metricsStore;
|
|
4080
3944
|
pricing;
|
|
4081
3945
|
remoteHandler = new RemoteMessageHandler();
|
|
@@ -4123,6 +3987,31 @@ var init_server = __esm({
|
|
|
4123
3987
|
mountApi(app, this.metricsStore, this.dashboardToken);
|
|
4124
3988
|
getLogger().info("Dashboard: http://127.0.0.1:" + port + "/");
|
|
4125
3989
|
}
|
|
3990
|
+
app.post("/webhooks/twilio/status", (req, res) => {
|
|
3991
|
+
if (this.config.twilioToken) {
|
|
3992
|
+
const signature = req.headers["x-twilio-signature"] || "";
|
|
3993
|
+
const url = `https://${this.config.webhookUrl}${req.originalUrl}`;
|
|
3994
|
+
const params = req.body ?? {};
|
|
3995
|
+
if (!validateTwilioSignature(url, params, signature, this.config.twilioToken)) {
|
|
3996
|
+
res.status(403).send("Invalid signature");
|
|
3997
|
+
return;
|
|
3998
|
+
}
|
|
3999
|
+
}
|
|
4000
|
+
const body = req.body;
|
|
4001
|
+
const callSid = sanitizeLogValue(body["CallSid"] ?? "");
|
|
4002
|
+
const callStatus = sanitizeLogValue(body["CallStatus"] ?? "");
|
|
4003
|
+
const duration = body["CallDuration"] ?? body["Duration"] ?? "";
|
|
4004
|
+
getLogger().info(
|
|
4005
|
+
`Twilio status ${callStatus} for call ${callSid} (duration=${duration})`
|
|
4006
|
+
);
|
|
4007
|
+
if (callSid && callStatus) {
|
|
4008
|
+
const extra = {};
|
|
4009
|
+
const parsed = parseFloat(duration);
|
|
4010
|
+
if (!Number.isNaN(parsed)) extra.duration_seconds = parsed;
|
|
4011
|
+
this.metricsStore.updateCallStatus(callSid, callStatus, extra);
|
|
4012
|
+
}
|
|
4013
|
+
res.status(204).send();
|
|
4014
|
+
});
|
|
4126
4015
|
app.post("/webhooks/twilio/recording", (req, res) => {
|
|
4127
4016
|
if (this.config.twilioToken) {
|
|
4128
4017
|
const signature = req.headers["x-twilio-signature"] || "";
|
|
@@ -4208,7 +4097,7 @@ var init_server = __esm({
|
|
|
4208
4097
|
const twiml = `<?xml version="1.0" encoding="UTF-8"?><Response><Connect><Stream url="${xmlStreamUrl}"><Parameter name="caller" value="${xmlEscape(caller)}"/><Parameter name="callee" value="${xmlEscape(callee)}"/></Stream></Connect></Response>`;
|
|
4209
4098
|
res.type("text/xml").send(twiml);
|
|
4210
4099
|
});
|
|
4211
|
-
app.post("/webhooks/telnyx/voice", (req, res) => {
|
|
4100
|
+
app.post("/webhooks/telnyx/voice", async (req, res) => {
|
|
4212
4101
|
if (this.config.telnyxPublicKey) {
|
|
4213
4102
|
const rawBody = req.rawBody ?? "";
|
|
4214
4103
|
const signature = req.headers["telnyx-signature-ed25519"] ?? "";
|
|
@@ -4217,7 +4106,8 @@ var init_server = __esm({
|
|
|
4217
4106
|
getLogger().warn("Telnyx webhook rejected: invalid or missing Ed25519 signature");
|
|
4218
4107
|
return res.status(403).send("Invalid signature");
|
|
4219
4108
|
}
|
|
4220
|
-
} else {
|
|
4109
|
+
} else if (!this.telnyxSigWarningLogged) {
|
|
4110
|
+
this.telnyxSigWarningLogged = true;
|
|
4221
4111
|
getLogger().warn("Telnyx webhook signature verification is disabled. Set telnyxPublicKey in LocalOptions for production use.");
|
|
4222
4112
|
}
|
|
4223
4113
|
const body = req.body;
|
|
@@ -4227,41 +4117,77 @@ var init_server = __esm({
|
|
|
4227
4117
|
if (typeof body.data.event_type !== "string" || typeof body.data.payload !== "object" || body.data.payload === null) {
|
|
4228
4118
|
return res.status(400).send("Invalid body");
|
|
4229
4119
|
}
|
|
4230
|
-
const eventType = body
|
|
4120
|
+
const eventType = body.data.event_type ?? "";
|
|
4121
|
+
const payload = body.data.payload ?? {};
|
|
4231
4122
|
if (eventType === "call.dtmf.received") {
|
|
4232
|
-
const digit = String(
|
|
4123
|
+
const digit = String(payload.digit ?? "").trim();
|
|
4233
4124
|
if (digit) {
|
|
4234
4125
|
getLogger().info(`Telnyx DTMF received (webhook): ${sanitizeLogValue(digit)}`);
|
|
4235
4126
|
}
|
|
4236
|
-
return res.
|
|
4127
|
+
return res.status(200).send();
|
|
4237
4128
|
}
|
|
4238
4129
|
if (eventType === "call.recording.saved") {
|
|
4239
|
-
const recordingUrl =
|
|
4130
|
+
const recordingUrl = payload.recording_urls?.mp3 ?? payload.recording_urls?.wav ?? payload.public_recording_urls?.mp3 ?? "";
|
|
4240
4131
|
if (recordingUrl) {
|
|
4241
4132
|
getLogger().info(`Telnyx recording saved (webhook): ${sanitizeLogValue(recordingUrl)}`);
|
|
4242
4133
|
}
|
|
4243
|
-
return res.
|
|
4244
|
-
}
|
|
4245
|
-
|
|
4246
|
-
|
|
4247
|
-
|
|
4248
|
-
|
|
4249
|
-
|
|
4250
|
-
|
|
4251
|
-
|
|
4252
|
-
|
|
4253
|
-
|
|
4254
|
-
|
|
4255
|
-
|
|
4134
|
+
return res.status(200).send();
|
|
4135
|
+
}
|
|
4136
|
+
const callControlId = payload.call_control_id ?? "";
|
|
4137
|
+
if (!callControlId) {
|
|
4138
|
+
getLogger().warn("Telnyx webhook rejected: missing call_control_id");
|
|
4139
|
+
return res.status(400).send("Invalid webhook payload");
|
|
4140
|
+
}
|
|
4141
|
+
const apiKey = this.config.telnyxKey;
|
|
4142
|
+
if (!apiKey) {
|
|
4143
|
+
getLogger().warn("Telnyx webhook: missing telnyxKey in LocalOptions");
|
|
4144
|
+
return res.status(500).send("Missing Telnyx API key");
|
|
4145
|
+
}
|
|
4146
|
+
const apiBase = "https://api.telnyx.com/v2";
|
|
4147
|
+
const authHeaders = {
|
|
4148
|
+
"Content-Type": "application/json",
|
|
4149
|
+
Authorization: `Bearer ${apiKey}`
|
|
4150
|
+
};
|
|
4151
|
+
try {
|
|
4152
|
+
if (eventType === "call.initiated") {
|
|
4153
|
+
getLogger().info(`Telnyx call.initiated ${callControlId} \u2014 answering`);
|
|
4154
|
+
const resp = await fetch(`${apiBase}/calls/${encodeURIComponent(callControlId)}/actions/answer`, {
|
|
4155
|
+
method: "POST",
|
|
4156
|
+
headers: authHeaders,
|
|
4157
|
+
body: JSON.stringify({}),
|
|
4158
|
+
signal: AbortSignal.timeout(1e4)
|
|
4159
|
+
});
|
|
4160
|
+
if (!resp.ok) {
|
|
4161
|
+
getLogger().warn(`Telnyx answer failed: ${resp.status} ${(await resp.text()).slice(0, 200)}`);
|
|
4162
|
+
}
|
|
4163
|
+
} else if (eventType === "call.answered") {
|
|
4164
|
+
const caller = payload.from ?? "";
|
|
4165
|
+
const callee = payload.to ?? "";
|
|
4166
|
+
const streamUrl = `wss://${this.config.webhookUrl}/ws/stream/${encodeURIComponent(callControlId)}?caller=${encodeURIComponent(caller)}&callee=${encodeURIComponent(callee)}`;
|
|
4167
|
+
getLogger().info(`Telnyx call.answered ${callControlId} \u2014 starting stream`);
|
|
4168
|
+
const resp = await fetch(`${apiBase}/calls/${encodeURIComponent(callControlId)}/actions/streaming_start`, {
|
|
4169
|
+
method: "POST",
|
|
4170
|
+
headers: authHeaders,
|
|
4171
|
+
body: JSON.stringify({
|
|
4256
4172
|
stream_url: streamUrl,
|
|
4257
|
-
stream_track: "both_tracks"
|
|
4258
|
-
|
|
4173
|
+
stream_track: "both_tracks",
|
|
4174
|
+
stream_bidirectional_mode: "rtp",
|
|
4175
|
+
stream_bidirectional_codec: "PCMU",
|
|
4176
|
+
stream_bidirectional_sampling_rate: 8e3,
|
|
4177
|
+
stream_bidirectional_target_legs: "self"
|
|
4178
|
+
}),
|
|
4179
|
+
signal: AbortSignal.timeout(1e4)
|
|
4180
|
+
});
|
|
4181
|
+
if (!resp.ok) {
|
|
4182
|
+
getLogger().warn(`Telnyx streaming_start failed: ${resp.status} ${(await resp.text()).slice(0, 200)}`);
|
|
4259
4183
|
}
|
|
4260
|
-
|
|
4261
|
-
|
|
4262
|
-
|
|
4263
|
-
|
|
4184
|
+
} else {
|
|
4185
|
+
getLogger().debug(`Telnyx event ignored: ${eventType}`);
|
|
4186
|
+
}
|
|
4187
|
+
} catch (e) {
|
|
4188
|
+
getLogger().error(`Telnyx webhook handler error: ${String(e)}`);
|
|
4264
4189
|
}
|
|
4190
|
+
return res.status(200).send();
|
|
4265
4191
|
});
|
|
4266
4192
|
this.server = (0, import_http.createServer)(app);
|
|
4267
4193
|
this.wss = new import_ws5.WebSocketServer({ noServer: true });
|
|
@@ -4408,11 +4334,12 @@ Connect AI agents to phone numbers in 4 lines of code
|
|
|
4408
4334
|
getLogger().error("Failed to parse Telnyx WS message:", e);
|
|
4409
4335
|
return;
|
|
4410
4336
|
}
|
|
4411
|
-
const
|
|
4412
|
-
|
|
4413
|
-
|
|
4337
|
+
const event = data.event ?? "";
|
|
4338
|
+
if (event === "connected") return;
|
|
4339
|
+
getLogger().info(`Telnyx event: ${event}`);
|
|
4340
|
+
if (event === "start" && !streamStarted) {
|
|
4414
4341
|
streamStarted = true;
|
|
4415
|
-
const callControlId = data.
|
|
4342
|
+
const callControlId = data.start?.call_control_id ?? "";
|
|
4416
4343
|
if (callControlId) this.activeCallIds.set(ws, callControlId);
|
|
4417
4344
|
await handler.handleCallStart(callControlId);
|
|
4418
4345
|
if (this.recording) {
|
|
@@ -4422,22 +4349,21 @@ Connect AI agents to phone numbers in 4 lines of code
|
|
|
4422
4349
|
getLogger().warn(`Could not start recording: ${String(e)}`);
|
|
4423
4350
|
}
|
|
4424
4351
|
}
|
|
4425
|
-
} else if (
|
|
4426
|
-
const
|
|
4352
|
+
} else if (event === "media") {
|
|
4353
|
+
const track = data.media?.track ?? "inbound";
|
|
4354
|
+
if (track !== "inbound") return;
|
|
4355
|
+
const audioChunk = data.media?.payload ?? "";
|
|
4427
4356
|
if (!audioChunk) return;
|
|
4428
4357
|
handler.handleAudio(Buffer.from(audioChunk, "base64"));
|
|
4429
|
-
} else if (
|
|
4430
|
-
const digit = String(data.
|
|
4358
|
+
} else if (event === "dtmf") {
|
|
4359
|
+
const digit = String(data.dtmf?.digit ?? "").trim();
|
|
4431
4360
|
if (digit) {
|
|
4432
4361
|
getLogger().info(`Telnyx DTMF received: ${digit}`);
|
|
4433
4362
|
await handler.handleDtmf(digit);
|
|
4434
4363
|
}
|
|
4435
|
-
} else if (
|
|
4436
|
-
|
|
4437
|
-
|
|
4438
|
-
getLogger().info(`Telnyx recording saved: ${recordingUrl}`);
|
|
4439
|
-
}
|
|
4440
|
-
} else if (eventType === "stream_stopped") {
|
|
4364
|
+
} else if (event === "error") {
|
|
4365
|
+
getLogger().warn(`Telnyx stream error: ${JSON.stringify(data)}`);
|
|
4366
|
+
} else if (event === "stop") {
|
|
4441
4367
|
await handler.handleStop();
|
|
4442
4368
|
}
|
|
4443
4369
|
} catch (err) {
|
|
@@ -5407,6 +5333,94 @@ var init_tunnel = __esm({
|
|
|
5407
5333
|
}
|
|
5408
5334
|
});
|
|
5409
5335
|
|
|
5336
|
+
// src/carrier-config.ts
|
|
5337
|
+
var carrier_config_exports = {};
|
|
5338
|
+
__export(carrier_config_exports, {
|
|
5339
|
+
autoConfigureCarrier: () => autoConfigureCarrier,
|
|
5340
|
+
configureTelnyxNumber: () => configureTelnyxNumber,
|
|
5341
|
+
configureTwilioNumber: () => configureTwilioNumber
|
|
5342
|
+
});
|
|
5343
|
+
async function configureTwilioNumber(accountSid, authToken, phoneNumber, voiceUrl) {
|
|
5344
|
+
const auth = `Basic ${Buffer.from(`${accountSid}:${authToken}`).toString("base64")}`;
|
|
5345
|
+
const listUrl = `${TWILIO_API_BASE}/Accounts/${accountSid}/IncomingPhoneNumbers.json?PhoneNumber=${encodeURIComponent(phoneNumber)}`;
|
|
5346
|
+
const listResp = await fetch(listUrl, {
|
|
5347
|
+
method: "GET",
|
|
5348
|
+
headers: { Authorization: auth }
|
|
5349
|
+
});
|
|
5350
|
+
if (!listResp.ok) {
|
|
5351
|
+
throw new Error(
|
|
5352
|
+
`Twilio IncomingPhoneNumbers.list failed: ${listResp.status} ${await listResp.text()}`
|
|
5353
|
+
);
|
|
5354
|
+
}
|
|
5355
|
+
const body = await listResp.json();
|
|
5356
|
+
const match = body.incoming_phone_numbers?.[0];
|
|
5357
|
+
if (!match) {
|
|
5358
|
+
throw new Error(`Twilio number ${phoneNumber} not found on account ${accountSid}`);
|
|
5359
|
+
}
|
|
5360
|
+
const updateUrl = `${TWILIO_API_BASE}/Accounts/${accountSid}/IncomingPhoneNumbers/${match.sid}.json`;
|
|
5361
|
+
const form = new URLSearchParams({ VoiceUrl: voiceUrl, VoiceMethod: "POST" });
|
|
5362
|
+
const updateResp = await fetch(updateUrl, {
|
|
5363
|
+
method: "POST",
|
|
5364
|
+
headers: {
|
|
5365
|
+
Authorization: auth,
|
|
5366
|
+
"Content-Type": "application/x-www-form-urlencoded"
|
|
5367
|
+
},
|
|
5368
|
+
body: form.toString()
|
|
5369
|
+
});
|
|
5370
|
+
if (!updateResp.ok) {
|
|
5371
|
+
throw new Error(
|
|
5372
|
+
`Twilio IncomingPhoneNumbers.update failed: ${updateResp.status} ${await updateResp.text()}`
|
|
5373
|
+
);
|
|
5374
|
+
}
|
|
5375
|
+
}
|
|
5376
|
+
async function configureTelnyxNumber(apiKey, connectionId, phoneNumber) {
|
|
5377
|
+
const resp = await fetch(`${TELNYX_API_BASE}/phone_numbers/${encodeURIComponent(phoneNumber)}`, {
|
|
5378
|
+
method: "PATCH",
|
|
5379
|
+
headers: {
|
|
5380
|
+
Authorization: `Bearer ${apiKey}`,
|
|
5381
|
+
"Content-Type": "application/json"
|
|
5382
|
+
},
|
|
5383
|
+
body: JSON.stringify({ connection_id: connectionId })
|
|
5384
|
+
});
|
|
5385
|
+
if (!resp.ok) {
|
|
5386
|
+
throw new Error(
|
|
5387
|
+
`Telnyx PATCH /phone_numbers/${phoneNumber} failed: ${resp.status} ${await resp.text()}`
|
|
5388
|
+
);
|
|
5389
|
+
}
|
|
5390
|
+
}
|
|
5391
|
+
async function autoConfigureCarrier(params) {
|
|
5392
|
+
const log2 = getLogger();
|
|
5393
|
+
const provider = params.telephonyProvider ?? (params.twilioSid ? "twilio" : "telnyx");
|
|
5394
|
+
if (provider === "twilio" && params.twilioSid && params.twilioToken) {
|
|
5395
|
+
const voiceUrl = `https://${params.webhookHost}/webhooks/twilio/voice`;
|
|
5396
|
+
try {
|
|
5397
|
+
await configureTwilioNumber(params.twilioSid, params.twilioToken, params.phoneNumber, voiceUrl);
|
|
5398
|
+
log2.info("Twilio webhook set to %s", voiceUrl);
|
|
5399
|
+
} catch (err) {
|
|
5400
|
+
log2.warn("Could not auto-configure Twilio webhook: %s", err instanceof Error ? err.message : String(err));
|
|
5401
|
+
log2.info("Set webhook manually to: %s", voiceUrl);
|
|
5402
|
+
}
|
|
5403
|
+
return;
|
|
5404
|
+
}
|
|
5405
|
+
if (provider === "telnyx" && params.telnyxKey && params.telnyxConnectionId) {
|
|
5406
|
+
try {
|
|
5407
|
+
await configureTelnyxNumber(params.telnyxKey, params.telnyxConnectionId, params.phoneNumber);
|
|
5408
|
+
log2.info("Telnyx number %s associated with connection %s", params.phoneNumber, params.telnyxConnectionId);
|
|
5409
|
+
} catch (err) {
|
|
5410
|
+
log2.warn("Could not auto-configure Telnyx number: %s", err instanceof Error ? err.message : String(err));
|
|
5411
|
+
}
|
|
5412
|
+
}
|
|
5413
|
+
}
|
|
5414
|
+
var TWILIO_API_BASE, TELNYX_API_BASE;
|
|
5415
|
+
var init_carrier_config = __esm({
|
|
5416
|
+
"src/carrier-config.ts"() {
|
|
5417
|
+
"use strict";
|
|
5418
|
+
init_logger();
|
|
5419
|
+
TWILIO_API_BASE = "https://api.twilio.com/2010-04-01";
|
|
5420
|
+
TELNYX_API_BASE = "https://api.telnyx.com/v2";
|
|
5421
|
+
}
|
|
5422
|
+
});
|
|
5423
|
+
|
|
5410
5424
|
// src/test-mode.ts
|
|
5411
5425
|
var test_mode_exports = {};
|
|
5412
5426
|
__export(test_mode_exports, {
|
|
@@ -6521,31 +6535,35 @@ var require_node_cron = __commonJS({
|
|
|
6521
6535
|
var index_exports = {};
|
|
6522
6536
|
__export(index_exports, {
|
|
6523
6537
|
AllProvidersFailedError: () => AllProvidersFailedError,
|
|
6524
|
-
AssemblyAISTT: () =>
|
|
6538
|
+
AssemblyAISTT: () => STT5,
|
|
6525
6539
|
AuthenticationError: () => AuthenticationError,
|
|
6526
6540
|
BackgroundAudioPlayer: () => BackgroundAudioPlayer,
|
|
6527
6541
|
BuiltinAudioClip: () => BuiltinAudioClip,
|
|
6528
6542
|
CallMetricsAccumulator: () => CallMetricsAccumulator,
|
|
6529
|
-
CartesiaSTT: () =>
|
|
6530
|
-
CartesiaTTS: () =>
|
|
6543
|
+
CartesiaSTT: () => STT3,
|
|
6544
|
+
CartesiaTTS: () => TTS3,
|
|
6531
6545
|
ChatContext: () => ChatContext,
|
|
6546
|
+
CloudflareTunnel: () => CloudflareTunnel,
|
|
6532
6547
|
DEFAULT_MIN_SENTENCE_LEN: () => DEFAULT_MIN_SENTENCE_LEN,
|
|
6533
6548
|
DEFAULT_PRICING: () => DEFAULT_PRICING,
|
|
6534
6549
|
DTMF_EVENTS: () => DTMF_EVENTS,
|
|
6535
|
-
DeepgramSTT: () =>
|
|
6550
|
+
DeepgramSTT: () => STT,
|
|
6551
|
+
ElevenLabsConvAI: () => ConvAI,
|
|
6536
6552
|
ElevenLabsConvAIAdapter: () => ElevenLabsConvAIAdapter,
|
|
6537
|
-
ElevenLabsTTS: () =>
|
|
6553
|
+
ElevenLabsTTS: () => TTS,
|
|
6538
6554
|
FallbackLLMProvider: () => FallbackLLMProvider,
|
|
6539
6555
|
GEMINI_DEFAULT_INPUT_SR: () => GEMINI_DEFAULT_INPUT_SR,
|
|
6540
6556
|
GEMINI_DEFAULT_OUTPUT_SR: () => GEMINI_DEFAULT_OUTPUT_SR,
|
|
6541
6557
|
GeminiLiveAdapter: () => GeminiLiveAdapter,
|
|
6558
|
+
Guardrail: () => Guardrail,
|
|
6542
6559
|
IVRActivity: () => IVRActivity,
|
|
6543
6560
|
LLMLoop: () => LLMLoop,
|
|
6544
|
-
LMNTTTS: () =>
|
|
6561
|
+
LMNTTTS: () => TTS5,
|
|
6545
6562
|
MetricsStore: () => MetricsStore,
|
|
6546
6563
|
OpenAILLMProvider: () => OpenAILLMProvider,
|
|
6564
|
+
OpenAIRealtime: () => Realtime,
|
|
6547
6565
|
OpenAIRealtimeAdapter: () => OpenAIRealtimeAdapter,
|
|
6548
|
-
OpenAITTS: () =>
|
|
6566
|
+
OpenAITTS: () => TTS2,
|
|
6549
6567
|
PartialStreamError: () => PartialStreamError,
|
|
6550
6568
|
Patter: () => Patter,
|
|
6551
6569
|
PatterConnectionError: () => PatterConnectionError,
|
|
@@ -6553,15 +6571,19 @@ __export(index_exports, {
|
|
|
6553
6571
|
PipelineHookExecutor: () => PipelineHookExecutor,
|
|
6554
6572
|
ProvisionError: () => ProvisionError,
|
|
6555
6573
|
RemoteMessageHandler: () => RemoteMessageHandler,
|
|
6556
|
-
RimeTTS: () =>
|
|
6574
|
+
RimeTTS: () => TTS4,
|
|
6557
6575
|
SentenceChunker: () => SentenceChunker,
|
|
6558
|
-
SonioxSTT: () =>
|
|
6576
|
+
SonioxSTT: () => STT4,
|
|
6577
|
+
StaticTunnel: () => Static,
|
|
6578
|
+
Telnyx: () => Carrier2,
|
|
6559
6579
|
TestSession: () => TestSession,
|
|
6560
6580
|
TfidfLoopDetector: () => TfidfLoopDetector,
|
|
6581
|
+
Tool: () => Tool,
|
|
6582
|
+
Twilio: () => Carrier,
|
|
6561
6583
|
ULTRAVOX_DEFAULT_API_BASE: () => ULTRAVOX_DEFAULT_API_BASE,
|
|
6562
6584
|
ULTRAVOX_DEFAULT_SR: () => ULTRAVOX_DEFAULT_SR,
|
|
6563
6585
|
UltravoxRealtimeAdapter: () => UltravoxRealtimeAdapter,
|
|
6564
|
-
WhisperSTT: () =>
|
|
6586
|
+
WhisperSTT: () => STT2,
|
|
6565
6587
|
builtinClipPath: () => builtinClipPath,
|
|
6566
6588
|
calculateRealtimeCost: () => calculateRealtimeCost,
|
|
6567
6589
|
calculateSttCost: () => calculateSttCost,
|
|
@@ -6577,6 +6599,7 @@ __export(index_exports, {
|
|
|
6577
6599
|
filterMarkdown: () => filterMarkdown,
|
|
6578
6600
|
formatDtmf: () => formatDtmf,
|
|
6579
6601
|
getLogger: () => getLogger,
|
|
6602
|
+
guardrail: () => guardrail,
|
|
6580
6603
|
isRemoteUrl: () => isRemoteUrl,
|
|
6581
6604
|
isWebSocketUrl: () => isWebSocketUrl,
|
|
6582
6605
|
makeAuthMiddleware: () => makeAuthMiddleware,
|
|
@@ -6598,6 +6621,7 @@ __export(index_exports, {
|
|
|
6598
6621
|
selectSoundFromList: () => selectSoundFromList,
|
|
6599
6622
|
setLogger: () => setLogger,
|
|
6600
6623
|
startTunnel: () => startTunnel,
|
|
6624
|
+
tool: () => tool,
|
|
6601
6625
|
whisper: () => whisper
|
|
6602
6626
|
});
|
|
6603
6627
|
module.exports = __toCommonJS(index_exports);
|
|
@@ -6743,50 +6767,89 @@ var PatterConnection = class {
|
|
|
6743
6767
|
}
|
|
6744
6768
|
};
|
|
6745
6769
|
|
|
6746
|
-
// src/
|
|
6747
|
-
|
|
6748
|
-
|
|
6770
|
+
// src/client.ts
|
|
6771
|
+
init_server();
|
|
6772
|
+
|
|
6773
|
+
// src/engines/openai.ts
|
|
6774
|
+
var Realtime = class {
|
|
6775
|
+
kind = "openai_realtime";
|
|
6749
6776
|
apiKey;
|
|
6750
|
-
|
|
6751
|
-
|
|
6752
|
-
|
|
6753
|
-
|
|
6754
|
-
|
|
6755
|
-
|
|
6756
|
-
|
|
6757
|
-
|
|
6777
|
+
model;
|
|
6778
|
+
voice;
|
|
6779
|
+
constructor(opts = {}) {
|
|
6780
|
+
const key = opts.apiKey ?? process.env.OPENAI_API_KEY;
|
|
6781
|
+
if (!key) {
|
|
6782
|
+
throw new Error(
|
|
6783
|
+
"OpenAI Realtime requires an apiKey. Pass { apiKey: 'sk-...' } or set OPENAI_API_KEY in the environment."
|
|
6784
|
+
);
|
|
6785
|
+
}
|
|
6786
|
+
this.apiKey = key;
|
|
6787
|
+
this.model = opts.model ?? "gpt-4o-mini-realtime-preview";
|
|
6788
|
+
this.voice = opts.voice ?? "alloy";
|
|
6758
6789
|
}
|
|
6759
6790
|
};
|
|
6760
|
-
|
|
6761
|
-
|
|
6791
|
+
|
|
6792
|
+
// src/engines/elevenlabs.ts
|
|
6793
|
+
var ConvAI = class {
|
|
6794
|
+
kind = "elevenlabs_convai";
|
|
6762
6795
|
apiKey;
|
|
6796
|
+
agentId;
|
|
6763
6797
|
voice;
|
|
6764
|
-
constructor(
|
|
6765
|
-
|
|
6766
|
-
|
|
6767
|
-
|
|
6798
|
+
constructor(opts = {}) {
|
|
6799
|
+
const key = opts.apiKey ?? process.env.ELEVENLABS_API_KEY;
|
|
6800
|
+
const agent = opts.agentId ?? process.env.ELEVENLABS_AGENT_ID;
|
|
6801
|
+
if (!key) {
|
|
6802
|
+
throw new Error(
|
|
6803
|
+
"ElevenLabs ConvAI requires an apiKey. Pass { apiKey: '...' } or set ELEVENLABS_API_KEY in the environment."
|
|
6804
|
+
);
|
|
6805
|
+
}
|
|
6806
|
+
if (!agent) {
|
|
6807
|
+
throw new Error(
|
|
6808
|
+
"ElevenLabs ConvAI requires an agentId. Pass { agentId: 'agent_...' } or set ELEVENLABS_AGENT_ID in the environment."
|
|
6809
|
+
);
|
|
6810
|
+
}
|
|
6811
|
+
this.apiKey = key;
|
|
6812
|
+
this.agentId = agent;
|
|
6813
|
+
this.voice = opts.voice;
|
|
6768
6814
|
}
|
|
6769
|
-
|
|
6770
|
-
|
|
6815
|
+
};
|
|
6816
|
+
|
|
6817
|
+
// src/tunnels/index.ts
|
|
6818
|
+
var CloudflareTunnel = class {
|
|
6819
|
+
kind = "cloudflare";
|
|
6820
|
+
};
|
|
6821
|
+
var Static = class {
|
|
6822
|
+
kind = "static";
|
|
6823
|
+
hostname;
|
|
6824
|
+
constructor(opts) {
|
|
6825
|
+
if (!opts.hostname) {
|
|
6826
|
+
throw new Error("Static tunnel requires a non-empty hostname.");
|
|
6827
|
+
}
|
|
6828
|
+
this.hostname = opts.hostname;
|
|
6771
6829
|
}
|
|
6772
6830
|
};
|
|
6773
|
-
function deepgram(opts) {
|
|
6774
|
-
return new STTConfigImpl("deepgram", opts.apiKey, opts.language ?? "en");
|
|
6775
|
-
}
|
|
6776
|
-
function whisper(opts) {
|
|
6777
|
-
return new STTConfigImpl("whisper", opts.apiKey, opts.language ?? "en");
|
|
6778
|
-
}
|
|
6779
|
-
function elevenlabs(opts) {
|
|
6780
|
-
return new TTSConfigImpl("elevenlabs", opts.apiKey, opts.voice ?? "rachel");
|
|
6781
|
-
}
|
|
6782
|
-
function openaiTts(opts) {
|
|
6783
|
-
return new TTSConfigImpl("openai", opts.apiKey, opts.voice ?? "alloy");
|
|
6784
|
-
}
|
|
6785
6831
|
|
|
6786
6832
|
// src/client.ts
|
|
6787
|
-
init_server();
|
|
6788
6833
|
var DEFAULT_BACKEND_URL2 = "wss://api.getpatter.com";
|
|
6789
6834
|
var DEFAULT_REST_URL = "https://api.getpatter.com";
|
|
6835
|
+
function sttConfigToDict(cfg) {
|
|
6836
|
+
const out = {
|
|
6837
|
+
provider: cfg.provider,
|
|
6838
|
+
api_key: cfg.apiKey,
|
|
6839
|
+
language: cfg.language
|
|
6840
|
+
};
|
|
6841
|
+
if (cfg.options) out.options = { ...cfg.options };
|
|
6842
|
+
return out;
|
|
6843
|
+
}
|
|
6844
|
+
function ttsConfigToDict(cfg) {
|
|
6845
|
+
const out = {
|
|
6846
|
+
provider: cfg.provider,
|
|
6847
|
+
api_key: cfg.apiKey,
|
|
6848
|
+
voice: cfg.voice
|
|
6849
|
+
};
|
|
6850
|
+
if (cfg.options) out.options = { ...cfg.options };
|
|
6851
|
+
return out;
|
|
6852
|
+
}
|
|
6790
6853
|
var Patter = class {
|
|
6791
6854
|
apiKey;
|
|
6792
6855
|
backendUrl;
|
|
@@ -6797,20 +6860,39 @@ var Patter = class {
|
|
|
6797
6860
|
embeddedServer = null;
|
|
6798
6861
|
tunnelHandle = null;
|
|
6799
6862
|
constructor(options) {
|
|
6800
|
-
|
|
6863
|
+
const hasCarrier = "carrier" in options && options.carrier !== void 0;
|
|
6864
|
+
const isLocal = "mode" in options && options.mode === "local" || hasCarrier;
|
|
6865
|
+
if (isLocal) {
|
|
6801
6866
|
const local = options;
|
|
6802
6867
|
if (!local.phoneNumber) {
|
|
6803
6868
|
throw new Error("Local mode requires phoneNumber");
|
|
6804
6869
|
}
|
|
6805
|
-
if (!local.
|
|
6806
|
-
throw new Error(
|
|
6870
|
+
if (!local.carrier) {
|
|
6871
|
+
throw new Error(
|
|
6872
|
+
"Local mode requires a `carrier` instance. Pass `carrier: new Twilio({...})` or `carrier: new Telnyx({...})`."
|
|
6873
|
+
);
|
|
6807
6874
|
}
|
|
6808
|
-
|
|
6809
|
-
|
|
6875
|
+
const carrier = local.carrier;
|
|
6876
|
+
const tunnel = local.tunnel;
|
|
6877
|
+
let tunnelWebhookUrl;
|
|
6878
|
+
if (tunnel instanceof Static) {
|
|
6879
|
+
if (local.webhookUrl) {
|
|
6880
|
+
throw new Error(
|
|
6881
|
+
"Cannot use both `tunnel: new StaticTunnel(...)` and `webhookUrl`. Pick one."
|
|
6882
|
+
);
|
|
6883
|
+
}
|
|
6884
|
+
tunnelWebhookUrl = tunnel.hostname;
|
|
6810
6885
|
}
|
|
6811
6886
|
this.mode = "local";
|
|
6812
|
-
const
|
|
6813
|
-
|
|
6887
|
+
const rawWebhook = tunnelWebhookUrl ?? local.webhookUrl;
|
|
6888
|
+
const normalizedWebhook = rawWebhook ? rawWebhook.replace(/^https?:\/\//, "").replace(/\/$/, "") : void 0;
|
|
6889
|
+
this.localConfig = {
|
|
6890
|
+
carrier,
|
|
6891
|
+
phoneNumber: local.phoneNumber,
|
|
6892
|
+
webhookUrl: normalizedWebhook,
|
|
6893
|
+
tunnel: local.tunnel,
|
|
6894
|
+
openaiKey: local.openaiKey
|
|
6895
|
+
};
|
|
6814
6896
|
this.apiKey = "";
|
|
6815
6897
|
this.backendUrl = DEFAULT_BACKEND_URL2;
|
|
6816
6898
|
this.restUrl = DEFAULT_REST_URL;
|
|
@@ -6827,25 +6909,55 @@ var Patter = class {
|
|
|
6827
6909
|
}
|
|
6828
6910
|
// === Local mode ===
|
|
6829
6911
|
agent(opts) {
|
|
6830
|
-
|
|
6912
|
+
let working = { ...opts };
|
|
6913
|
+
if (opts.engine) {
|
|
6914
|
+
if (opts.provider) {
|
|
6915
|
+
throw new Error(
|
|
6916
|
+
"Cannot pass both `engine:` and `provider:`. Use one (engine is preferred)."
|
|
6917
|
+
);
|
|
6918
|
+
}
|
|
6919
|
+
const engine = opts.engine;
|
|
6920
|
+
if (engine instanceof Realtime) {
|
|
6921
|
+
working = {
|
|
6922
|
+
...working,
|
|
6923
|
+
provider: "openai_realtime",
|
|
6924
|
+
model: working.model ?? engine.model,
|
|
6925
|
+
voice: working.voice ?? engine.voice
|
|
6926
|
+
};
|
|
6927
|
+
if (this.localConfig && !this.localConfig.openaiKey) {
|
|
6928
|
+
this.localConfig = { ...this.localConfig, openaiKey: engine.apiKey };
|
|
6929
|
+
}
|
|
6930
|
+
} else if (engine instanceof ConvAI) {
|
|
6931
|
+
working = {
|
|
6932
|
+
...working,
|
|
6933
|
+
provider: "elevenlabs_convai",
|
|
6934
|
+
voice: working.voice ?? engine.voice
|
|
6935
|
+
};
|
|
6936
|
+
} else {
|
|
6937
|
+
throw new Error(
|
|
6938
|
+
"Unknown engine. Expected OpenAIRealtime or ElevenLabsConvAI instance."
|
|
6939
|
+
);
|
|
6940
|
+
}
|
|
6941
|
+
}
|
|
6942
|
+
if (working.provider) {
|
|
6831
6943
|
const valid = ["openai_realtime", "elevenlabs_convai", "pipeline"];
|
|
6832
|
-
if (!valid.includes(
|
|
6833
|
-
throw new Error(`provider must be one of: ${valid.join(", ")}. Got: '${
|
|
6944
|
+
if (!valid.includes(working.provider)) {
|
|
6945
|
+
throw new Error(`provider must be one of: ${valid.join(", ")}. Got: '${working.provider}'`);
|
|
6834
6946
|
}
|
|
6835
6947
|
}
|
|
6836
|
-
if (
|
|
6837
|
-
if (!Array.isArray(
|
|
6948
|
+
if (working.tools) {
|
|
6949
|
+
if (!Array.isArray(working.tools)) {
|
|
6838
6950
|
throw new TypeError("tools must be an array");
|
|
6839
6951
|
}
|
|
6840
|
-
|
|
6841
|
-
if (!
|
|
6842
|
-
if (!
|
|
6952
|
+
working.tools.forEach((tool2, i) => {
|
|
6953
|
+
if (!tool2.name) throw new Error(`tools[${i}] missing required 'name' field`);
|
|
6954
|
+
if (!tool2.webhookUrl && !tool2.handler) throw new Error(`tools[${i}] requires either 'webhookUrl' or 'handler'`);
|
|
6843
6955
|
});
|
|
6844
6956
|
}
|
|
6845
|
-
if (
|
|
6957
|
+
if (working.variables !== void 0 && (typeof working.variables !== "object" || Array.isArray(working.variables))) {
|
|
6846
6958
|
throw new TypeError("variables must be an object");
|
|
6847
6959
|
}
|
|
6848
|
-
return
|
|
6960
|
+
return working;
|
|
6849
6961
|
}
|
|
6850
6962
|
async serve(opts) {
|
|
6851
6963
|
if (this.mode !== "local" || !this.localConfig) {
|
|
@@ -6868,10 +6980,14 @@ var Patter = class {
|
|
|
6868
6980
|
}
|
|
6869
6981
|
let webhookUrl = this.localConfig.webhookUrl ?? "";
|
|
6870
6982
|
const port = opts.port ?? 8e3;
|
|
6871
|
-
|
|
6983
|
+
const ctorTunnel = this.localConfig.tunnel;
|
|
6984
|
+
const wantsCloudflaredFromServe = opts.tunnel === true;
|
|
6985
|
+
const wantsCloudflaredFromCtor = ctorTunnel === true || ctorTunnel instanceof CloudflareTunnel;
|
|
6986
|
+
const wantsCloudflared = wantsCloudflaredFromServe || wantsCloudflaredFromCtor;
|
|
6987
|
+
if (wantsCloudflared && webhookUrl) {
|
|
6872
6988
|
throw new Error("Cannot use both tunnel: true and webhookUrl. Pick one.");
|
|
6873
6989
|
}
|
|
6874
|
-
if (
|
|
6990
|
+
if (wantsCloudflared) {
|
|
6875
6991
|
const { startTunnel: startTunnel2 } = await Promise.resolve().then(() => (init_tunnel(), tunnel_exports));
|
|
6876
6992
|
this.tunnelHandle = await startTunnel2(port);
|
|
6877
6993
|
webhookUrl = this.tunnelHandle.hostname;
|
|
@@ -6881,17 +6997,29 @@ var Patter = class {
|
|
|
6881
6997
|
"No webhookUrl configured. Either:\n - Pass webhookUrl in the Patter constructor\n - Use tunnel: true in serve() to auto-create a tunnel"
|
|
6882
6998
|
);
|
|
6883
6999
|
}
|
|
7000
|
+
const carrier = this.localConfig.carrier;
|
|
7001
|
+
const telephonyProvider = carrier.kind === "twilio" ? "twilio" : "telnyx";
|
|
7002
|
+
const { autoConfigureCarrier: autoConfigureCarrier2 } = await Promise.resolve().then(() => (init_carrier_config(), carrier_config_exports));
|
|
7003
|
+
await autoConfigureCarrier2({
|
|
7004
|
+
telephonyProvider,
|
|
7005
|
+
twilioSid: carrier.kind === "twilio" ? carrier.accountSid : void 0,
|
|
7006
|
+
twilioToken: carrier.kind === "twilio" ? carrier.authToken : void 0,
|
|
7007
|
+
telnyxKey: carrier.kind === "telnyx" ? carrier.apiKey : void 0,
|
|
7008
|
+
telnyxConnectionId: carrier.kind === "telnyx" ? carrier.connectionId : void 0,
|
|
7009
|
+
phoneNumber: this.localConfig.phoneNumber,
|
|
7010
|
+
webhookHost: webhookUrl
|
|
7011
|
+
});
|
|
6884
7012
|
this.embeddedServer = new EmbeddedServer(
|
|
6885
7013
|
{
|
|
6886
|
-
twilioSid:
|
|
6887
|
-
twilioToken:
|
|
7014
|
+
twilioSid: carrier.kind === "twilio" ? carrier.accountSid : void 0,
|
|
7015
|
+
twilioToken: carrier.kind === "twilio" ? carrier.authToken : void 0,
|
|
6888
7016
|
openaiKey: this.localConfig.openaiKey,
|
|
6889
7017
|
phoneNumber: this.localConfig.phoneNumber,
|
|
6890
7018
|
webhookUrl,
|
|
6891
|
-
telephonyProvider
|
|
6892
|
-
telnyxKey:
|
|
6893
|
-
telnyxConnectionId:
|
|
6894
|
-
telnyxPublicKey:
|
|
7019
|
+
telephonyProvider,
|
|
7020
|
+
telnyxKey: carrier.kind === "telnyx" ? carrier.apiKey : void 0,
|
|
7021
|
+
telnyxConnectionId: carrier.kind === "telnyx" ? carrier.connectionId : void 0,
|
|
7022
|
+
telnyxPublicKey: carrier.kind === "telnyx" ? carrier.publicKey : void 0
|
|
6895
7023
|
},
|
|
6896
7024
|
opts.agent,
|
|
6897
7025
|
opts.onCallStart,
|
|
@@ -6952,32 +7080,51 @@ var Patter = class {
|
|
|
6952
7080
|
if (!this.localConfig) {
|
|
6953
7081
|
throw new Error("local config missing");
|
|
6954
7082
|
}
|
|
6955
|
-
const { phoneNumber, webhookUrl,
|
|
6956
|
-
if (
|
|
6957
|
-
const telnyxKey =
|
|
6958
|
-
const connectionId =
|
|
7083
|
+
const { phoneNumber, webhookUrl, carrier } = this.localConfig;
|
|
7084
|
+
if (carrier.kind === "telnyx") {
|
|
7085
|
+
const telnyxKey = carrier.apiKey;
|
|
7086
|
+
const connectionId = carrier.connectionId;
|
|
6959
7087
|
const streamUrl = `wss://${webhookUrl}/ws/stream/${encodeURIComponent(localOpts.to)}?caller=${encodeURIComponent(phoneNumber)}&callee=${encodeURIComponent(localOpts.to)}`;
|
|
7088
|
+
const telnyxPayload = {
|
|
7089
|
+
connection_id: connectionId,
|
|
7090
|
+
from: phoneNumber,
|
|
7091
|
+
to: localOpts.to,
|
|
7092
|
+
stream_url: streamUrl,
|
|
7093
|
+
stream_track: "both_tracks"
|
|
7094
|
+
};
|
|
7095
|
+
if (localOpts.ringTimeout !== void 0) {
|
|
7096
|
+
telnyxPayload.timeout_secs = Math.max(1, Math.floor(localOpts.ringTimeout));
|
|
7097
|
+
}
|
|
6960
7098
|
const response2 = await fetch("https://api.telnyx.com/v2/calls", {
|
|
6961
7099
|
method: "POST",
|
|
6962
7100
|
headers: {
|
|
6963
7101
|
"Content-Type": "application/json",
|
|
6964
7102
|
Authorization: `Bearer ${telnyxKey}`
|
|
6965
7103
|
},
|
|
6966
|
-
body: JSON.stringify(
|
|
6967
|
-
connection_id: connectionId,
|
|
6968
|
-
from: phoneNumber,
|
|
6969
|
-
to: localOpts.to,
|
|
6970
|
-
stream_url: streamUrl,
|
|
6971
|
-
stream_track: "both_tracks"
|
|
6972
|
-
})
|
|
7104
|
+
body: JSON.stringify(telnyxPayload)
|
|
6973
7105
|
});
|
|
6974
7106
|
if (!response2.ok) {
|
|
6975
7107
|
throw new ProvisionError(`Failed to initiate Telnyx call: ${await response2.text()}`);
|
|
6976
7108
|
}
|
|
6977
|
-
|
|
6978
|
-
|
|
6979
|
-
|
|
6980
|
-
|
|
7109
|
+
if (this.embeddedServer) {
|
|
7110
|
+
try {
|
|
7111
|
+
const body = await response2.clone().json();
|
|
7112
|
+
const callId = body.data?.call_control_id;
|
|
7113
|
+
if (callId) {
|
|
7114
|
+
this.embeddedServer.metricsStore.recordCallInitiated({
|
|
7115
|
+
call_id: callId,
|
|
7116
|
+
caller: phoneNumber,
|
|
7117
|
+
callee: localOpts.to,
|
|
7118
|
+
direction: "outbound"
|
|
7119
|
+
});
|
|
7120
|
+
}
|
|
7121
|
+
} catch {
|
|
7122
|
+
}
|
|
7123
|
+
}
|
|
7124
|
+
return;
|
|
7125
|
+
}
|
|
7126
|
+
const twilioSid = carrier.accountSid;
|
|
7127
|
+
const twilioToken = carrier.authToken;
|
|
6981
7128
|
const statusCallbackUrl = `https://${webhookUrl}/webhooks/twilio/status`;
|
|
6982
7129
|
const url = `https://api.twilio.com/2010-04-01/Accounts/${twilioSid}/Calls.json`;
|
|
6983
7130
|
const params = new URLSearchParams({
|
|
@@ -6985,13 +7132,19 @@ var Patter = class {
|
|
|
6985
7132
|
From: phoneNumber,
|
|
6986
7133
|
Url: `https://${webhookUrl}/webhooks/twilio/voice`,
|
|
6987
7134
|
StatusCallback: statusCallbackUrl,
|
|
6988
|
-
StatusCallbackMethod: "POST"
|
|
7135
|
+
StatusCallbackMethod: "POST",
|
|
7136
|
+
// Full lifecycle so the dashboard sees ringing/no-answer/busy/failed
|
|
7137
|
+
// transitions even when media never arrives.
|
|
7138
|
+
StatusCallbackEvent: "initiated ringing answered completed"
|
|
6989
7139
|
});
|
|
6990
7140
|
if (localOpts.machineDetection) {
|
|
6991
7141
|
params.append("MachineDetection", "DetectMessageEnd");
|
|
6992
7142
|
params.append("AsyncAmd", "true");
|
|
6993
7143
|
params.append("AsyncAmdStatusCallback", `https://${webhookUrl}/webhooks/twilio/amd`);
|
|
6994
7144
|
}
|
|
7145
|
+
if (localOpts.ringTimeout !== void 0) {
|
|
7146
|
+
params.append("Timeout", String(Math.max(1, Math.floor(localOpts.ringTimeout))));
|
|
7147
|
+
}
|
|
6995
7148
|
if (localOpts.voicemailMessage && this.embeddedServer) {
|
|
6996
7149
|
this.embeddedServer.voicemailMessage = localOpts.voicemailMessage;
|
|
6997
7150
|
}
|
|
@@ -7006,6 +7159,21 @@ var Patter = class {
|
|
|
7006
7159
|
if (!response.ok) {
|
|
7007
7160
|
throw new ProvisionError(`Failed to initiate call: ${await response.text()}`);
|
|
7008
7161
|
}
|
|
7162
|
+
if (this.embeddedServer) {
|
|
7163
|
+
try {
|
|
7164
|
+
const body = await response.clone().json();
|
|
7165
|
+
const callSid = body.sid;
|
|
7166
|
+
if (callSid) {
|
|
7167
|
+
this.embeddedServer.metricsStore.recordCallInitiated({
|
|
7168
|
+
call_id: callSid,
|
|
7169
|
+
caller: phoneNumber,
|
|
7170
|
+
callee: localOpts.to,
|
|
7171
|
+
direction: "outbound"
|
|
7172
|
+
});
|
|
7173
|
+
}
|
|
7174
|
+
} catch {
|
|
7175
|
+
}
|
|
7176
|
+
}
|
|
7009
7177
|
return;
|
|
7010
7178
|
}
|
|
7011
7179
|
const cloudOpts = options;
|
|
@@ -7088,61 +7256,6 @@ var Patter = class {
|
|
|
7088
7256
|
const data = await response.json();
|
|
7089
7257
|
return data.map((c) => ({ id: c.id, direction: c.direction, caller: c.caller, callee: c.callee, startedAt: c.started_at, endedAt: c.ended_at, durationSeconds: c.duration_seconds, status: c.status, transcript: c.transcript }));
|
|
7090
7258
|
}
|
|
7091
|
-
// Provider helpers
|
|
7092
|
-
static deepgram = deepgram;
|
|
7093
|
-
static whisper = whisper;
|
|
7094
|
-
static elevenlabs = elevenlabs;
|
|
7095
|
-
static openaiTts = openaiTts;
|
|
7096
|
-
static guardrail(opts) {
|
|
7097
|
-
return {
|
|
7098
|
-
name: opts.name,
|
|
7099
|
-
blockedTerms: opts.blockedTerms,
|
|
7100
|
-
check: opts.check,
|
|
7101
|
-
replacement: opts.replacement ?? "I'm sorry, I can't respond to that."
|
|
7102
|
-
};
|
|
7103
|
-
}
|
|
7104
|
-
/**
|
|
7105
|
-
* Create a tool definition for use with `agent({ tools: [...] })`.
|
|
7106
|
-
*
|
|
7107
|
-
* Either `handler` (a function) or `webhookUrl` must be provided.
|
|
7108
|
-
*
|
|
7109
|
-
* @param opts.name - Tool name (visible to the LLM).
|
|
7110
|
-
* @param opts.description - What the tool does (visible to the LLM).
|
|
7111
|
-
* @param opts.parameters - JSON Schema for tool arguments.
|
|
7112
|
-
* @param opts.handler - Async function called in-process when the LLM invokes the tool.
|
|
7113
|
-
* @param opts.webhookUrl - URL to POST to when the LLM invokes the tool.
|
|
7114
|
-
*
|
|
7115
|
-
* @example
|
|
7116
|
-
* ```ts
|
|
7117
|
-
* phone.agent({
|
|
7118
|
-
* systemPrompt: 'You are a pizza bot.',
|
|
7119
|
-
* tools: [
|
|
7120
|
-
* Patter.tool({
|
|
7121
|
-
* name: 'check_menu',
|
|
7122
|
-
* description: 'Check available menu items',
|
|
7123
|
-
* handler: async (args) => JSON.stringify({ items: ['margherita'] }),
|
|
7124
|
-
* }),
|
|
7125
|
-
* ],
|
|
7126
|
-
* });
|
|
7127
|
-
* ```
|
|
7128
|
-
*/
|
|
7129
|
-
static tool(opts) {
|
|
7130
|
-
if (!opts.handler && !opts.webhookUrl) {
|
|
7131
|
-
throw new Error("tool() requires either handler or webhookUrl");
|
|
7132
|
-
}
|
|
7133
|
-
const t = {
|
|
7134
|
-
name: opts.name,
|
|
7135
|
-
description: opts.description ?? "",
|
|
7136
|
-
parameters: opts.parameters ?? { type: "object", properties: {} }
|
|
7137
|
-
};
|
|
7138
|
-
if (opts.handler) {
|
|
7139
|
-
t.handler = opts.handler;
|
|
7140
|
-
}
|
|
7141
|
-
if (opts.webhookUrl) {
|
|
7142
|
-
t.webhookUrl = opts.webhookUrl;
|
|
7143
|
-
}
|
|
7144
|
-
return t;
|
|
7145
|
-
}
|
|
7146
7259
|
// Internal
|
|
7147
7260
|
async registerNumber(provider, providerKey, number, providerSecret, country = "US", stt, tts) {
|
|
7148
7261
|
const credentials = { api_key: providerKey };
|
|
@@ -7158,8 +7271,8 @@ var Patter = class {
|
|
|
7158
7271
|
provider,
|
|
7159
7272
|
provider_credentials: credentials,
|
|
7160
7273
|
country,
|
|
7161
|
-
stt_config: stt?.
|
|
7162
|
-
tts_config: tts?.
|
|
7274
|
+
stt_config: stt ? stt.toDict?.() ?? sttConfigToDict(stt) : null,
|
|
7275
|
+
tts_config: tts ? tts.toDict?.() ?? ttsConfigToDict(tts) : null
|
|
7163
7276
|
})
|
|
7164
7277
|
});
|
|
7165
7278
|
if (response.status === 409) return;
|
|
@@ -7237,6 +7350,62 @@ function filterForTTS(text) {
|
|
|
7237
7350
|
return filterEmoji(filterMarkdown(text));
|
|
7238
7351
|
}
|
|
7239
7352
|
|
|
7353
|
+
// src/providers.ts
|
|
7354
|
+
var STTConfigImpl = class {
|
|
7355
|
+
provider;
|
|
7356
|
+
apiKey;
|
|
7357
|
+
language;
|
|
7358
|
+
options;
|
|
7359
|
+
constructor(provider, apiKey, language = "en", options) {
|
|
7360
|
+
this.provider = provider;
|
|
7361
|
+
this.apiKey = apiKey;
|
|
7362
|
+
this.language = language;
|
|
7363
|
+
if (options) this.options = options;
|
|
7364
|
+
}
|
|
7365
|
+
toDict() {
|
|
7366
|
+
const out = {
|
|
7367
|
+
provider: this.provider,
|
|
7368
|
+
api_key: this.apiKey,
|
|
7369
|
+
language: this.language
|
|
7370
|
+
};
|
|
7371
|
+
if (this.options) out.options = { ...this.options };
|
|
7372
|
+
return out;
|
|
7373
|
+
}
|
|
7374
|
+
};
|
|
7375
|
+
var TTSConfigImpl = class {
|
|
7376
|
+
provider;
|
|
7377
|
+
apiKey;
|
|
7378
|
+
voice;
|
|
7379
|
+
constructor(provider, apiKey, voice = "alloy") {
|
|
7380
|
+
this.provider = provider;
|
|
7381
|
+
this.apiKey = apiKey;
|
|
7382
|
+
this.voice = voice;
|
|
7383
|
+
}
|
|
7384
|
+
toDict() {
|
|
7385
|
+
return { provider: this.provider, api_key: this.apiKey, voice: this.voice };
|
|
7386
|
+
}
|
|
7387
|
+
};
|
|
7388
|
+
function deepgram(opts) {
|
|
7389
|
+
const options = {
|
|
7390
|
+
model: opts.model ?? "nova-3",
|
|
7391
|
+
endpointing_ms: opts.endpointingMs ?? 150,
|
|
7392
|
+
utterance_end_ms: opts.utteranceEndMs === null ? null : opts.utteranceEndMs ?? 1e3,
|
|
7393
|
+
smart_format: opts.smartFormat ?? true,
|
|
7394
|
+
interim_results: opts.interimResults ?? true
|
|
7395
|
+
};
|
|
7396
|
+
if (opts.vadEvents !== void 0) options.vad_events = opts.vadEvents;
|
|
7397
|
+
return new STTConfigImpl("deepgram", opts.apiKey, opts.language ?? "en", options);
|
|
7398
|
+
}
|
|
7399
|
+
function whisper(opts) {
|
|
7400
|
+
return new STTConfigImpl("whisper", opts.apiKey, opts.language ?? "en");
|
|
7401
|
+
}
|
|
7402
|
+
function elevenlabs(opts) {
|
|
7403
|
+
return new TTSConfigImpl("elevenlabs", opts.apiKey, opts.voice ?? "rachel");
|
|
7404
|
+
}
|
|
7405
|
+
function openaiTts(opts) {
|
|
7406
|
+
return new TTSConfigImpl("openai", opts.apiKey, opts.voice ?? "alloy");
|
|
7407
|
+
}
|
|
7408
|
+
|
|
7240
7409
|
// src/index.ts
|
|
7241
7410
|
init_pricing();
|
|
7242
7411
|
init_metrics();
|
|
@@ -7293,6 +7462,37 @@ var FallbackLLMProvider = class {
|
|
|
7293
7462
|
}
|
|
7294
7463
|
}
|
|
7295
7464
|
}
|
|
7465
|
+
/**
|
|
7466
|
+
* Async-friendly disposer. Parity with Python's ``FallbackLLMProvider.aclose()``
|
|
7467
|
+
* — safe to call multiple times, returns a resolved Promise once all probe
|
|
7468
|
+
* timers are cleared. Prefer this in async contexts so awaiting the
|
|
7469
|
+
* shutdown integrates naturally with the owning lifecycle.
|
|
7470
|
+
*/
|
|
7471
|
+
async aclose() {
|
|
7472
|
+
this.destroy();
|
|
7473
|
+
}
|
|
7474
|
+
/**
|
|
7475
|
+
* Explicit-resource-management hook so callers can write
|
|
7476
|
+
* ``await using fallback = new FallbackLLMProvider([...])`` and have
|
|
7477
|
+
* background probe timers cleared automatically when the block exits.
|
|
7478
|
+
* Mirrors Python's ``async with FallbackLLMProvider(...)``.
|
|
7479
|
+
*/
|
|
7480
|
+
async [Symbol.asyncDispose]() {
|
|
7481
|
+
await this.aclose();
|
|
7482
|
+
}
|
|
7483
|
+
/**
|
|
7484
|
+
* Stream only the text deltas, flattening the chunk envelope. Parity with
|
|
7485
|
+
* Python's ``FallbackLLMProvider.complete_stream``. Tool-call and done
|
|
7486
|
+
* markers are filtered out so callers can concatenate the yielded strings
|
|
7487
|
+
* directly.
|
|
7488
|
+
*/
|
|
7489
|
+
async *completeStream(messages, tools) {
|
|
7490
|
+
for await (const chunk of this.stream(messages, tools)) {
|
|
7491
|
+
if (chunk.type === "text") {
|
|
7492
|
+
yield chunk.content ?? "";
|
|
7493
|
+
}
|
|
7494
|
+
}
|
|
7495
|
+
}
|
|
7296
7496
|
// -----------------------------------------------------------------------
|
|
7297
7497
|
// LLMProvider implementation
|
|
7298
7498
|
// -----------------------------------------------------------------------
|
|
@@ -7815,13 +8015,37 @@ function wrapCallback(cb) {
|
|
|
7815
8015
|
}
|
|
7816
8016
|
};
|
|
7817
8017
|
}
|
|
7818
|
-
|
|
7819
|
-
|
|
7820
|
-
|
|
7821
|
-
|
|
7822
|
-
|
|
7823
|
-
|
|
7824
|
-
|
|
8018
|
+
function scheduleCron(cron, callback) {
|
|
8019
|
+
let cancelled = false;
|
|
8020
|
+
let task = null;
|
|
8021
|
+
const jobId = `cron-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
|
|
8022
|
+
loadCron().then((cm) => {
|
|
8023
|
+
if (cancelled) return;
|
|
8024
|
+
if (!cm.validate(cron)) {
|
|
8025
|
+
throw new Error(`Invalid cron expression: ${cron}`);
|
|
8026
|
+
}
|
|
8027
|
+
task = cm.schedule(cron, wrapCallback(callback));
|
|
8028
|
+
}).catch((err) => getLogger().error(`scheduleCron failed: ${String(err)}`));
|
|
8029
|
+
return {
|
|
8030
|
+
jobId,
|
|
8031
|
+
cancel() {
|
|
8032
|
+
if (cancelled) return;
|
|
8033
|
+
cancelled = true;
|
|
8034
|
+
if (task) {
|
|
8035
|
+
try {
|
|
8036
|
+
task.stop();
|
|
8037
|
+
} catch {
|
|
8038
|
+
}
|
|
8039
|
+
try {
|
|
8040
|
+
task.destroy?.();
|
|
8041
|
+
} catch {
|
|
8042
|
+
}
|
|
8043
|
+
}
|
|
8044
|
+
},
|
|
8045
|
+
get pending() {
|
|
8046
|
+
return !cancelled;
|
|
8047
|
+
}
|
|
8048
|
+
};
|
|
7825
8049
|
}
|
|
7826
8050
|
function scheduleOnce(at, callback) {
|
|
7827
8051
|
const delayMs = at.getTime() - Date.now();
|
|
@@ -7843,8 +8067,18 @@ function scheduleOnce(at, callback) {
|
|
|
7843
8067
|
}
|
|
7844
8068
|
};
|
|
7845
8069
|
}
|
|
7846
|
-
function scheduleInterval(
|
|
7847
|
-
|
|
8070
|
+
function scheduleInterval(intervalOrOpts, callback) {
|
|
8071
|
+
let intervalMs;
|
|
8072
|
+
if (typeof intervalOrOpts === "number") {
|
|
8073
|
+
intervalMs = intervalOrOpts;
|
|
8074
|
+
} else if (intervalOrOpts.intervalMs !== void 0) {
|
|
8075
|
+
intervalMs = intervalOrOpts.intervalMs;
|
|
8076
|
+
} else if (intervalOrOpts.seconds !== void 0) {
|
|
8077
|
+
intervalMs = intervalOrOpts.seconds * 1e3;
|
|
8078
|
+
} else {
|
|
8079
|
+
throw new Error("scheduleInterval requires seconds or intervalMs");
|
|
8080
|
+
}
|
|
8081
|
+
if (intervalMs <= 0) throw new Error("interval must be positive");
|
|
7848
8082
|
let cancelled = false;
|
|
7849
8083
|
const wrapped = wrapCallback(callback);
|
|
7850
8084
|
const timer = setInterval(() => {
|
|
@@ -7861,111 +8095,404 @@ function scheduleInterval(intervalMs, callback) {
|
|
|
7861
8095
|
}
|
|
7862
8096
|
};
|
|
7863
8097
|
}
|
|
7864
|
-
function makeHandle(jobId, task) {
|
|
7865
|
-
let cancelled = false;
|
|
7866
|
-
return {
|
|
7867
|
-
jobId,
|
|
7868
|
-
cancel() {
|
|
7869
|
-
if (cancelled) return;
|
|
7870
|
-
cancelled = true;
|
|
7871
|
-
try {
|
|
7872
|
-
task.stop();
|
|
7873
|
-
} catch {
|
|
7874
|
-
}
|
|
7875
|
-
try {
|
|
7876
|
-
task.destroy?.();
|
|
7877
|
-
} catch {
|
|
7878
|
-
}
|
|
7879
|
-
},
|
|
7880
|
-
get pending() {
|
|
7881
|
-
return !cancelled;
|
|
7882
|
-
}
|
|
7883
|
-
};
|
|
7884
|
-
}
|
|
7885
8098
|
|
|
7886
|
-
// src/
|
|
8099
|
+
// src/stt/deepgram.ts
|
|
7887
8100
|
init_deepgram_stt();
|
|
8101
|
+
var STT = class extends DeepgramSTT {
|
|
8102
|
+
constructor(opts = {}) {
|
|
8103
|
+
const key = opts.apiKey ?? process.env.DEEPGRAM_API_KEY;
|
|
8104
|
+
if (!key) {
|
|
8105
|
+
throw new Error(
|
|
8106
|
+
"Deepgram STT requires an apiKey. Pass { apiKey: 'dg_...' } or set DEEPGRAM_API_KEY in the environment."
|
|
8107
|
+
);
|
|
8108
|
+
}
|
|
8109
|
+
super(
|
|
8110
|
+
key,
|
|
8111
|
+
opts.language ?? "en",
|
|
8112
|
+
opts.model ?? "nova-3",
|
|
8113
|
+
opts.encoding ?? "linear16",
|
|
8114
|
+
opts.sampleRate ?? 16e3,
|
|
8115
|
+
{
|
|
8116
|
+
endpointingMs: opts.endpointingMs ?? 150,
|
|
8117
|
+
utteranceEndMs: opts.utteranceEndMs === null ? null : opts.utteranceEndMs ?? 1e3,
|
|
8118
|
+
smartFormat: opts.smartFormat ?? true,
|
|
8119
|
+
interimResults: opts.interimResults ?? true,
|
|
8120
|
+
...opts.vadEvents !== void 0 ? { vadEvents: opts.vadEvents } : {}
|
|
8121
|
+
}
|
|
8122
|
+
);
|
|
8123
|
+
}
|
|
8124
|
+
};
|
|
7888
8125
|
|
|
7889
|
-
// src/providers/
|
|
7890
|
-
var import_ws7 = __toESM(require("ws"));
|
|
8126
|
+
// src/providers/whisper-stt.ts
|
|
7891
8127
|
init_logger();
|
|
7892
|
-
var
|
|
7893
|
-
var
|
|
7894
|
-
|
|
7895
|
-
|
|
7896
|
-
|
|
7897
|
-
|
|
7898
|
-
|
|
8128
|
+
var OPENAI_TRANSCRIPTION_URL = "https://api.openai.com/v1/audio/transcriptions";
|
|
8129
|
+
var DEFAULT_BUFFER_SIZE = 16e3 * 2;
|
|
8130
|
+
function wrapPcmInWav(pcm, sampleRate = 16e3, channels = 1, bitsPerSample = 16) {
|
|
8131
|
+
const dataSize = pcm.length;
|
|
8132
|
+
const header = Buffer.alloc(44);
|
|
8133
|
+
header.write("RIFF", 0);
|
|
8134
|
+
header.writeUInt32LE(36 + dataSize, 4);
|
|
8135
|
+
header.write("WAVE", 8);
|
|
8136
|
+
header.write("fmt ", 12);
|
|
8137
|
+
header.writeUInt32LE(16, 16);
|
|
8138
|
+
header.writeUInt16LE(1, 20);
|
|
8139
|
+
header.writeUInt16LE(channels, 22);
|
|
8140
|
+
header.writeUInt32LE(sampleRate, 24);
|
|
8141
|
+
header.writeUInt32LE(sampleRate * channels * (bitsPerSample / 8), 28);
|
|
8142
|
+
header.writeUInt16LE(channels * (bitsPerSample / 8), 32);
|
|
8143
|
+
header.writeUInt16LE(bitsPerSample, 34);
|
|
8144
|
+
header.write("data", 36);
|
|
8145
|
+
header.writeUInt32LE(dataSize, 40);
|
|
8146
|
+
return Buffer.concat([header, pcm]);
|
|
7899
8147
|
}
|
|
7900
|
-
var
|
|
7901
|
-
|
|
7902
|
-
|
|
7903
|
-
|
|
7904
|
-
|
|
7905
|
-
|
|
7906
|
-
|
|
7907
|
-
|
|
7908
|
-
|
|
7909
|
-
|
|
7910
|
-
|
|
7911
|
-
|
|
8148
|
+
var WhisperSTT = class _WhisperSTT {
|
|
8149
|
+
apiKey;
|
|
8150
|
+
model;
|
|
8151
|
+
language;
|
|
8152
|
+
bufferSize;
|
|
8153
|
+
buffer = Buffer.alloc(0);
|
|
8154
|
+
callbacks = [];
|
|
8155
|
+
running = false;
|
|
8156
|
+
pendingTranscriptions = [];
|
|
8157
|
+
constructor(apiKey, model = "whisper-1", language, bufferSize = DEFAULT_BUFFER_SIZE) {
|
|
8158
|
+
this.apiKey = apiKey;
|
|
8159
|
+
this.model = model;
|
|
8160
|
+
this.language = language;
|
|
8161
|
+
this.bufferSize = bufferSize;
|
|
7912
8162
|
}
|
|
7913
|
-
|
|
7914
|
-
|
|
8163
|
+
/** Factory for Twilio calls — mulaw 8 kHz is transcoded upstream, so we still receive PCM 16-bit. */
|
|
8164
|
+
static forTwilio(apiKey, language = "en", model = "whisper-1") {
|
|
8165
|
+
return new _WhisperSTT(apiKey, model, language);
|
|
7915
8166
|
}
|
|
7916
|
-
|
|
7917
|
-
this.
|
|
7918
|
-
this.
|
|
7919
|
-
this.confCount = 0;
|
|
8167
|
+
async connect() {
|
|
8168
|
+
this.running = true;
|
|
8169
|
+
this.buffer = Buffer.alloc(0);
|
|
7920
8170
|
}
|
|
7921
|
-
|
|
7922
|
-
|
|
8171
|
+
sendAudio(audio) {
|
|
8172
|
+
if (!this.running) return;
|
|
8173
|
+
this.buffer = Buffer.concat([this.buffer, audio]);
|
|
8174
|
+
if (this.buffer.length >= this.bufferSize) {
|
|
8175
|
+
const pcm = this.buffer;
|
|
8176
|
+
this.buffer = Buffer.alloc(0);
|
|
8177
|
+
this.trackTranscription(this.transcribeBuffer(pcm));
|
|
8178
|
+
}
|
|
8179
|
+
}
|
|
8180
|
+
trackTranscription(promise) {
|
|
8181
|
+
const wrapped = promise.finally(() => {
|
|
8182
|
+
const idx = this.pendingTranscriptions.indexOf(wrapped);
|
|
8183
|
+
if (idx !== -1) this.pendingTranscriptions.splice(idx, 1);
|
|
8184
|
+
});
|
|
8185
|
+
this.pendingTranscriptions.push(wrapped);
|
|
7923
8186
|
}
|
|
7924
|
-
|
|
7925
|
-
|
|
7926
|
-
|
|
7927
|
-
|
|
7928
|
-
|
|
7929
|
-
keepaliveTimer = null;
|
|
7930
|
-
apiKey;
|
|
7931
|
-
model;
|
|
7932
|
-
languageHints;
|
|
7933
|
-
languageHintsStrict;
|
|
7934
|
-
sampleRate;
|
|
7935
|
-
numChannels;
|
|
7936
|
-
enableSpeakerDiarization;
|
|
7937
|
-
enableLanguageIdentification;
|
|
7938
|
-
maxEndpointDelayMs;
|
|
7939
|
-
clientReferenceId;
|
|
7940
|
-
baseUrl;
|
|
7941
|
-
constructor(apiKey, options = {}) {
|
|
7942
|
-
if (!apiKey) {
|
|
7943
|
-
throw new Error("Soniox apiKey is required");
|
|
8187
|
+
onTranscript(callback) {
|
|
8188
|
+
if (this.callbacks.length >= 10) {
|
|
8189
|
+
getLogger().warn("WhisperSTT: maximum of 10 onTranscript callbacks reached; replacing the last callback.");
|
|
8190
|
+
this.callbacks[this.callbacks.length - 1] = callback;
|
|
8191
|
+
return;
|
|
7944
8192
|
}
|
|
7945
|
-
|
|
7946
|
-
|
|
7947
|
-
|
|
8193
|
+
this.callbacks.push(callback);
|
|
8194
|
+
}
|
|
8195
|
+
async close() {
|
|
8196
|
+
this.running = false;
|
|
8197
|
+
if (this.buffer.length >= this.bufferSize / 4) {
|
|
8198
|
+
const pcm = this.buffer;
|
|
8199
|
+
this.buffer = Buffer.alloc(0);
|
|
8200
|
+
this.trackTranscription(this.transcribeBuffer(pcm));
|
|
8201
|
+
} else {
|
|
8202
|
+
this.buffer = Buffer.alloc(0);
|
|
8203
|
+
}
|
|
8204
|
+
await Promise.allSettled(this.pendingTranscriptions);
|
|
8205
|
+
this.callbacks = [];
|
|
8206
|
+
}
|
|
8207
|
+
// ------------------------------------------------------------------
|
|
8208
|
+
// Private
|
|
8209
|
+
// ------------------------------------------------------------------
|
|
8210
|
+
async transcribeBuffer(pcm) {
|
|
8211
|
+
const wav = wrapPcmInWav(pcm);
|
|
8212
|
+
const formData = new FormData();
|
|
8213
|
+
formData.append("file", new Blob([wav.buffer.slice(wav.byteOffset, wav.byteOffset + wav.byteLength)], { type: "audio/wav" }), "audio.wav");
|
|
8214
|
+
formData.append("model", this.model);
|
|
8215
|
+
if (this.language) {
|
|
8216
|
+
formData.append("language", this.language);
|
|
8217
|
+
}
|
|
8218
|
+
try {
|
|
8219
|
+
const resp = await fetch(OPENAI_TRANSCRIPTION_URL, {
|
|
8220
|
+
method: "POST",
|
|
8221
|
+
headers: { Authorization: `Bearer ${this.apiKey}` },
|
|
8222
|
+
body: formData,
|
|
8223
|
+
signal: AbortSignal.timeout(15e3)
|
|
8224
|
+
});
|
|
8225
|
+
if (!resp.ok) {
|
|
8226
|
+
const body = await resp.text();
|
|
8227
|
+
getLogger().error(`WhisperSTT transcription error: ${resp.status} ${body}`);
|
|
8228
|
+
return;
|
|
8229
|
+
}
|
|
8230
|
+
const json = await resp.json();
|
|
8231
|
+
const text = (json.text ?? "").trim();
|
|
8232
|
+
if (!text) return;
|
|
8233
|
+
const transcript = {
|
|
8234
|
+
text,
|
|
8235
|
+
isFinal: true,
|
|
8236
|
+
confidence: 1
|
|
8237
|
+
};
|
|
8238
|
+
for (const cb of this.callbacks) {
|
|
8239
|
+
cb(transcript);
|
|
8240
|
+
}
|
|
8241
|
+
} catch (err) {
|
|
8242
|
+
getLogger().error(`WhisperSTT transcription error: ${String(err)}`);
|
|
7948
8243
|
}
|
|
7949
|
-
this.apiKey = apiKey;
|
|
7950
|
-
this.model = options.model ?? "stt-rt-v4";
|
|
7951
|
-
this.languageHints = options.languageHints;
|
|
7952
|
-
this.languageHintsStrict = options.languageHintsStrict ?? false;
|
|
7953
|
-
this.sampleRate = options.sampleRate ?? 16e3;
|
|
7954
|
-
this.numChannels = options.numChannels ?? 1;
|
|
7955
|
-
this.enableSpeakerDiarization = options.enableSpeakerDiarization ?? false;
|
|
7956
|
-
this.enableLanguageIdentification = options.enableLanguageIdentification ?? true;
|
|
7957
|
-
this.maxEndpointDelayMs = maxEndpointDelayMs;
|
|
7958
|
-
this.clientReferenceId = options.clientReferenceId;
|
|
7959
|
-
this.baseUrl = options.baseUrl ?? SONIOX_WS_URL;
|
|
7960
8244
|
}
|
|
7961
|
-
|
|
7962
|
-
|
|
7963
|
-
|
|
8245
|
+
};
|
|
8246
|
+
|
|
8247
|
+
// src/stt/whisper.ts
|
|
8248
|
+
var STT2 = class extends WhisperSTT {
|
|
8249
|
+
constructor(opts = {}) {
|
|
8250
|
+
const key = opts.apiKey ?? process.env.OPENAI_API_KEY;
|
|
8251
|
+
if (!key) {
|
|
8252
|
+
throw new Error(
|
|
8253
|
+
"Whisper STT requires an apiKey. Pass { apiKey: 'sk-...' } or set OPENAI_API_KEY in the environment."
|
|
8254
|
+
);
|
|
8255
|
+
}
|
|
8256
|
+
super(key, opts.model ?? "whisper-1", opts.language, opts.bufferSize);
|
|
7964
8257
|
}
|
|
7965
|
-
|
|
7966
|
-
|
|
7967
|
-
|
|
7968
|
-
|
|
8258
|
+
};
|
|
8259
|
+
|
|
8260
|
+
// src/providers/cartesia-stt.ts
|
|
8261
|
+
var import_ws7 = __toESM(require("ws"));
|
|
8262
|
+
init_logger();
|
|
8263
|
+
var DEFAULT_BASE_URL = "https://api.cartesia.ai";
|
|
8264
|
+
var API_VERSION = "2025-04-16";
|
|
8265
|
+
var USER_AGENT = "Patter/1.0 (integration=LiveKit-port; provider=Cartesia)";
|
|
8266
|
+
var KEEPALIVE_INTERVAL_MS = 3e4;
|
|
8267
|
+
var CONNECT_TIMEOUT_MS = 1e4;
|
|
8268
|
+
var MAX_CALLBACKS = 10;
|
|
8269
|
+
var CartesiaSTT = class {
|
|
8270
|
+
constructor(apiKey, options = {}) {
|
|
8271
|
+
this.apiKey = apiKey;
|
|
8272
|
+
this.options = options;
|
|
8273
|
+
if (!apiKey) {
|
|
8274
|
+
throw new Error("CartesiaSTT requires a non-empty apiKey");
|
|
8275
|
+
}
|
|
8276
|
+
}
|
|
8277
|
+
ws = null;
|
|
8278
|
+
callbacks = [];
|
|
8279
|
+
keepaliveTimer = null;
|
|
8280
|
+
/** Cartesia request id — set from the server transcript events. */
|
|
8281
|
+
requestId = "";
|
|
8282
|
+
buildWsUrl() {
|
|
8283
|
+
const opts = this.options;
|
|
8284
|
+
const rawBase = opts.baseUrl ?? DEFAULT_BASE_URL;
|
|
8285
|
+
let base;
|
|
8286
|
+
if (rawBase.startsWith("http://")) {
|
|
8287
|
+
base = `ws://${rawBase.slice("http://".length)}`;
|
|
8288
|
+
} else if (rawBase.startsWith("https://")) {
|
|
8289
|
+
base = `wss://${rawBase.slice("https://".length)}`;
|
|
8290
|
+
} else if (rawBase.startsWith("ws://") || rawBase.startsWith("wss://")) {
|
|
8291
|
+
base = rawBase;
|
|
8292
|
+
} else {
|
|
8293
|
+
base = `wss://${rawBase}`;
|
|
8294
|
+
}
|
|
8295
|
+
const language = opts.language ?? "en";
|
|
8296
|
+
const params = new URLSearchParams({
|
|
8297
|
+
model: opts.model ?? "ink-whisper",
|
|
8298
|
+
sample_rate: String(opts.sampleRate ?? 16e3),
|
|
8299
|
+
encoding: opts.encoding ?? "pcm_s16le",
|
|
8300
|
+
cartesia_version: API_VERSION,
|
|
8301
|
+
api_key: this.apiKey,
|
|
8302
|
+
language
|
|
8303
|
+
});
|
|
8304
|
+
return `${base}/stt/websocket?${params.toString()}`;
|
|
8305
|
+
}
|
|
8306
|
+
async connect() {
|
|
8307
|
+
const url = this.buildWsUrl();
|
|
8308
|
+
this.ws = new import_ws7.default(url, {
|
|
8309
|
+
headers: { "User-Agent": USER_AGENT }
|
|
8310
|
+
});
|
|
8311
|
+
await new Promise((resolve, reject) => {
|
|
8312
|
+
const timer = setTimeout(
|
|
8313
|
+
() => reject(new Error("Cartesia STT connect timeout")),
|
|
8314
|
+
CONNECT_TIMEOUT_MS
|
|
8315
|
+
);
|
|
8316
|
+
this.ws.once("open", () => {
|
|
8317
|
+
clearTimeout(timer);
|
|
8318
|
+
resolve();
|
|
8319
|
+
});
|
|
8320
|
+
this.ws.once("error", (err) => {
|
|
8321
|
+
clearTimeout(timer);
|
|
8322
|
+
reject(err);
|
|
8323
|
+
});
|
|
8324
|
+
});
|
|
8325
|
+
this.ws.on("message", (raw) => {
|
|
8326
|
+
let event;
|
|
8327
|
+
try {
|
|
8328
|
+
event = JSON.parse(raw.toString());
|
|
8329
|
+
} catch {
|
|
8330
|
+
return;
|
|
8331
|
+
}
|
|
8332
|
+
this.handleEvent(event);
|
|
8333
|
+
});
|
|
8334
|
+
this.keepaliveTimer = setInterval(() => {
|
|
8335
|
+
if (this.ws && this.ws.readyState === import_ws7.default.OPEN) {
|
|
8336
|
+
try {
|
|
8337
|
+
this.ws.ping();
|
|
8338
|
+
} catch {
|
|
8339
|
+
}
|
|
8340
|
+
}
|
|
8341
|
+
}, KEEPALIVE_INTERVAL_MS);
|
|
8342
|
+
}
|
|
8343
|
+
handleEvent(event) {
|
|
8344
|
+
const type = event.type;
|
|
8345
|
+
if (type === "transcript") {
|
|
8346
|
+
const text = (event.text ?? "").trim();
|
|
8347
|
+
const isFinal = Boolean(event.is_final);
|
|
8348
|
+
if (!text && !isFinal) return;
|
|
8349
|
+
if (event.request_id) {
|
|
8350
|
+
this.requestId = event.request_id;
|
|
8351
|
+
}
|
|
8352
|
+
if (!text) return;
|
|
8353
|
+
const confidence = Number(event.probability ?? 1);
|
|
8354
|
+
this.emit({ text, isFinal, confidence });
|
|
8355
|
+
return;
|
|
8356
|
+
}
|
|
8357
|
+
if (type === "error") {
|
|
8358
|
+
getLogger().error(`Cartesia STT error: ${event.message ?? "unknown"}`);
|
|
8359
|
+
return;
|
|
8360
|
+
}
|
|
8361
|
+
}
|
|
8362
|
+
emit(transcript) {
|
|
8363
|
+
for (const cb of this.callbacks) {
|
|
8364
|
+
cb(transcript);
|
|
8365
|
+
}
|
|
8366
|
+
}
|
|
8367
|
+
sendAudio(audio) {
|
|
8368
|
+
if (!this.ws || this.ws.readyState !== import_ws7.default.OPEN) return;
|
|
8369
|
+
this.ws.send(audio);
|
|
8370
|
+
}
|
|
8371
|
+
onTranscript(callback) {
|
|
8372
|
+
if (this.callbacks.length >= MAX_CALLBACKS) {
|
|
8373
|
+
getLogger().warn(
|
|
8374
|
+
"CartesiaSTT: maximum of 10 onTranscript callbacks reached; replacing the last callback."
|
|
8375
|
+
);
|
|
8376
|
+
this.callbacks[this.callbacks.length - 1] = callback;
|
|
8377
|
+
return;
|
|
8378
|
+
}
|
|
8379
|
+
this.callbacks.push(callback);
|
|
8380
|
+
}
|
|
8381
|
+
close() {
|
|
8382
|
+
if (this.keepaliveTimer) {
|
|
8383
|
+
clearInterval(this.keepaliveTimer);
|
|
8384
|
+
this.keepaliveTimer = null;
|
|
8385
|
+
}
|
|
8386
|
+
if (this.ws) {
|
|
8387
|
+
try {
|
|
8388
|
+
this.ws.send("finalize");
|
|
8389
|
+
} catch {
|
|
8390
|
+
}
|
|
8391
|
+
this.ws.close();
|
|
8392
|
+
this.ws = null;
|
|
8393
|
+
}
|
|
8394
|
+
}
|
|
8395
|
+
};
|
|
8396
|
+
|
|
8397
|
+
// src/stt/cartesia.ts
|
|
8398
|
+
var STT3 = class extends CartesiaSTT {
|
|
8399
|
+
constructor(opts = {}) {
|
|
8400
|
+
const key = opts.apiKey ?? process.env.CARTESIA_API_KEY;
|
|
8401
|
+
if (!key) {
|
|
8402
|
+
throw new Error(
|
|
8403
|
+
"Cartesia STT requires an apiKey. Pass { apiKey: '...' } or set CARTESIA_API_KEY in the environment."
|
|
8404
|
+
);
|
|
8405
|
+
}
|
|
8406
|
+
super(key, {
|
|
8407
|
+
model: opts.model,
|
|
8408
|
+
language: opts.language,
|
|
8409
|
+
encoding: opts.encoding,
|
|
8410
|
+
sampleRate: opts.sampleRate,
|
|
8411
|
+
baseUrl: opts.baseUrl
|
|
8412
|
+
});
|
|
8413
|
+
}
|
|
8414
|
+
};
|
|
8415
|
+
|
|
8416
|
+
// src/providers/soniox-stt.ts
|
|
8417
|
+
var import_ws8 = __toESM(require("ws"));
|
|
8418
|
+
init_logger();
|
|
8419
|
+
var SONIOX_WS_URL = "wss://stt-rt.soniox.com/transcribe-websocket";
|
|
8420
|
+
var KEEPALIVE_MESSAGE = '{"type": "keepalive"}';
|
|
8421
|
+
var END_TOKEN = "<end>";
|
|
8422
|
+
var FINALIZED_TOKEN = "<fin>";
|
|
8423
|
+
var KEEPALIVE_INTERVAL_MS2 = 5e3;
|
|
8424
|
+
function isEndToken(token) {
|
|
8425
|
+
return token.text === END_TOKEN || token.text === FINALIZED_TOKEN;
|
|
8426
|
+
}
|
|
8427
|
+
var TokenAccumulator = class {
|
|
8428
|
+
text = "";
|
|
8429
|
+
confSum = 0;
|
|
8430
|
+
confCount = 0;
|
|
8431
|
+
update(token) {
|
|
8432
|
+
if (token.text) {
|
|
8433
|
+
this.text += token.text;
|
|
8434
|
+
}
|
|
8435
|
+
if (typeof token.confidence === "number") {
|
|
8436
|
+
this.confSum += token.confidence;
|
|
8437
|
+
this.confCount += 1;
|
|
8438
|
+
}
|
|
8439
|
+
}
|
|
8440
|
+
get confidence() {
|
|
8441
|
+
return this.confCount === 0 ? 0 : this.confSum / this.confCount;
|
|
8442
|
+
}
|
|
8443
|
+
reset() {
|
|
8444
|
+
this.text = "";
|
|
8445
|
+
this.confSum = 0;
|
|
8446
|
+
this.confCount = 0;
|
|
8447
|
+
}
|
|
8448
|
+
get raw() {
|
|
8449
|
+
return { sum: this.confSum, count: this.confCount };
|
|
8450
|
+
}
|
|
8451
|
+
};
|
|
8452
|
+
var SonioxSTT = class _SonioxSTT {
|
|
8453
|
+
ws = null;
|
|
8454
|
+
callbacks = [];
|
|
8455
|
+
final = new TokenAccumulator();
|
|
8456
|
+
keepaliveTimer = null;
|
|
8457
|
+
apiKey;
|
|
8458
|
+
model;
|
|
8459
|
+
languageHints;
|
|
8460
|
+
languageHintsStrict;
|
|
8461
|
+
sampleRate;
|
|
8462
|
+
numChannels;
|
|
8463
|
+
enableSpeakerDiarization;
|
|
8464
|
+
enableLanguageIdentification;
|
|
8465
|
+
maxEndpointDelayMs;
|
|
8466
|
+
clientReferenceId;
|
|
8467
|
+
baseUrl;
|
|
8468
|
+
constructor(apiKey, options = {}) {
|
|
8469
|
+
if (!apiKey) {
|
|
8470
|
+
throw new Error("Soniox apiKey is required");
|
|
8471
|
+
}
|
|
8472
|
+
const maxEndpointDelayMs = options.maxEndpointDelayMs ?? 500;
|
|
8473
|
+
if (maxEndpointDelayMs < 500 || maxEndpointDelayMs > 3e3) {
|
|
8474
|
+
throw new Error("maxEndpointDelayMs must be between 500 and 3000");
|
|
8475
|
+
}
|
|
8476
|
+
this.apiKey = apiKey;
|
|
8477
|
+
this.model = options.model ?? "stt-rt-v4";
|
|
8478
|
+
this.languageHints = options.languageHints;
|
|
8479
|
+
this.languageHintsStrict = options.languageHintsStrict ?? false;
|
|
8480
|
+
this.sampleRate = options.sampleRate ?? 16e3;
|
|
8481
|
+
this.numChannels = options.numChannels ?? 1;
|
|
8482
|
+
this.enableSpeakerDiarization = options.enableSpeakerDiarization ?? false;
|
|
8483
|
+
this.enableLanguageIdentification = options.enableLanguageIdentification ?? true;
|
|
8484
|
+
this.maxEndpointDelayMs = maxEndpointDelayMs;
|
|
8485
|
+
this.clientReferenceId = options.clientReferenceId;
|
|
8486
|
+
this.baseUrl = options.baseUrl ?? SONIOX_WS_URL;
|
|
8487
|
+
}
|
|
8488
|
+
/** Factory for Twilio-style 8 kHz linear PCM. */
|
|
8489
|
+
static forTwilio(apiKey, languageHints) {
|
|
8490
|
+
return new _SonioxSTT(apiKey, { sampleRate: 8e3, languageHints });
|
|
8491
|
+
}
|
|
8492
|
+
buildConfig() {
|
|
8493
|
+
const config = {
|
|
8494
|
+
api_key: this.apiKey,
|
|
8495
|
+
model: this.model,
|
|
7969
8496
|
audio_format: "pcm_s16le",
|
|
7970
8497
|
num_channels: this.numChannels,
|
|
7971
8498
|
sample_rate: this.sampleRate,
|
|
@@ -7984,7 +8511,7 @@ var SonioxSTT = class _SonioxSTT {
|
|
|
7984
8511
|
return config;
|
|
7985
8512
|
}
|
|
7986
8513
|
async connect() {
|
|
7987
|
-
this.ws = new
|
|
8514
|
+
this.ws = new import_ws8.default(this.baseUrl);
|
|
7988
8515
|
await new Promise((resolve, reject) => {
|
|
7989
8516
|
const timer = setTimeout(() => reject(new Error("Soniox connect timeout")), 1e4);
|
|
7990
8517
|
this.ws.once("open", () => {
|
|
@@ -8003,13 +8530,13 @@ var SonioxSTT = class _SonioxSTT {
|
|
|
8003
8530
|
getLogger().error(`SonioxSTT WebSocket error: ${String(err)}`);
|
|
8004
8531
|
});
|
|
8005
8532
|
this.keepaliveTimer = setInterval(() => {
|
|
8006
|
-
if (this.ws && this.ws.readyState ===
|
|
8533
|
+
if (this.ws && this.ws.readyState === import_ws8.default.OPEN) {
|
|
8007
8534
|
try {
|
|
8008
8535
|
this.ws.send(KEEPALIVE_MESSAGE);
|
|
8009
8536
|
} catch {
|
|
8010
8537
|
}
|
|
8011
8538
|
}
|
|
8012
|
-
},
|
|
8539
|
+
}, KEEPALIVE_INTERVAL_MS2);
|
|
8013
8540
|
}
|
|
8014
8541
|
clearKeepalive() {
|
|
8015
8542
|
if (this.keepaliveTimer) {
|
|
@@ -8076,7 +8603,7 @@ var SonioxSTT = class _SonioxSTT {
|
|
|
8076
8603
|
}
|
|
8077
8604
|
}
|
|
8078
8605
|
sendAudio(audio) {
|
|
8079
|
-
if (!this.ws || this.ws.readyState !==
|
|
8606
|
+
if (!this.ws || this.ws.readyState !== import_ws8.default.OPEN) return;
|
|
8080
8607
|
if (audio.length === 0) return;
|
|
8081
8608
|
this.ws.send(audio);
|
|
8082
8609
|
}
|
|
@@ -8106,16 +8633,28 @@ var SonioxSTT = class _SonioxSTT {
|
|
|
8106
8633
|
}
|
|
8107
8634
|
};
|
|
8108
8635
|
|
|
8109
|
-
// src/
|
|
8110
|
-
|
|
8636
|
+
// src/stt/soniox.ts
|
|
8637
|
+
var STT4 = class extends SonioxSTT {
|
|
8638
|
+
constructor(opts = {}) {
|
|
8639
|
+
const key = opts.apiKey ?? process.env.SONIOX_API_KEY;
|
|
8640
|
+
if (!key) {
|
|
8641
|
+
throw new Error(
|
|
8642
|
+
"Soniox STT requires an apiKey. Pass { apiKey: '...' } or set SONIOX_API_KEY in the environment."
|
|
8643
|
+
);
|
|
8644
|
+
}
|
|
8645
|
+
const { apiKey: _ignored, ...rest } = opts;
|
|
8646
|
+
void _ignored;
|
|
8647
|
+
super(key, rest);
|
|
8648
|
+
}
|
|
8649
|
+
};
|
|
8111
8650
|
|
|
8112
8651
|
// src/providers/assemblyai-stt.ts
|
|
8113
|
-
var
|
|
8652
|
+
var import_ws9 = __toESM(require("ws"));
|
|
8114
8653
|
init_logger();
|
|
8115
|
-
var
|
|
8654
|
+
var DEFAULT_BASE_URL2 = "wss://streaming.assemblyai.com";
|
|
8116
8655
|
var DEFAULT_MIN_TURN_SILENCE_MS = 100;
|
|
8117
|
-
var
|
|
8118
|
-
var
|
|
8656
|
+
var CONNECT_TIMEOUT_MS2 = 1e4;
|
|
8657
|
+
var MAX_CALLBACKS2 = 10;
|
|
8119
8658
|
var AssemblyAISTT = class _AssemblyAISTT {
|
|
8120
8659
|
constructor(apiKey, options = {}) {
|
|
8121
8660
|
this.apiKey = apiKey;
|
|
@@ -8172,174 +8711,27 @@ var AssemblyAISTT = class _AssemblyAISTT {
|
|
|
8172
8711
|
const params = new URLSearchParams();
|
|
8173
8712
|
for (const [key, value] of Object.entries(raw)) {
|
|
8174
8713
|
if (value === void 0 || value === null) continue;
|
|
8175
|
-
if (typeof value === "boolean") {
|
|
8176
|
-
params.set(key, value ? "true" : "false");
|
|
8177
|
-
} else {
|
|
8178
|
-
params.set(key, String(value));
|
|
8179
|
-
}
|
|
8180
|
-
}
|
|
8181
|
-
const base = opts.baseUrl ??
|
|
8182
|
-
return `${base}/v3/ws?${params.toString()}`;
|
|
8183
|
-
}
|
|
8184
|
-
async connect() {
|
|
8185
|
-
const url = this.buildUrl();
|
|
8186
|
-
this.ws = new import_ws8.default(url, {
|
|
8187
|
-
headers: {
|
|
8188
|
-
Authorization: this.apiKey,
|
|
8189
|
-
"Content-Type": "application/json",
|
|
8190
|
-
"User-Agent": "Patter/1.0 (integration=LiveKit-port)"
|
|
8191
|
-
}
|
|
8192
|
-
});
|
|
8193
|
-
await new Promise((resolve, reject) => {
|
|
8194
|
-
const timer = setTimeout(
|
|
8195
|
-
() => reject(new Error("AssemblyAI connect timeout")),
|
|
8196
|
-
CONNECT_TIMEOUT_MS
|
|
8197
|
-
);
|
|
8198
|
-
this.ws.once("open", () => {
|
|
8199
|
-
clearTimeout(timer);
|
|
8200
|
-
resolve();
|
|
8201
|
-
});
|
|
8202
|
-
this.ws.once("error", (err) => {
|
|
8203
|
-
clearTimeout(timer);
|
|
8204
|
-
reject(err);
|
|
8205
|
-
});
|
|
8206
|
-
});
|
|
8207
|
-
this.ws.on("message", (raw) => {
|
|
8208
|
-
let event;
|
|
8209
|
-
try {
|
|
8210
|
-
event = JSON.parse(raw.toString());
|
|
8211
|
-
} catch {
|
|
8212
|
-
return;
|
|
8213
|
-
}
|
|
8214
|
-
this.handleEvent(event);
|
|
8215
|
-
});
|
|
8216
|
-
}
|
|
8217
|
-
handleEvent(event) {
|
|
8218
|
-
const type = event.type;
|
|
8219
|
-
if (type === "Begin") {
|
|
8220
|
-
this.sessionId = event.id ?? "";
|
|
8221
|
-
this.expiresAt = event.expires_at ?? 0;
|
|
8222
|
-
return;
|
|
8223
|
-
}
|
|
8224
|
-
if (type !== "Turn") {
|
|
8225
|
-
return;
|
|
8226
|
-
}
|
|
8227
|
-
const endOfTurn = Boolean(event.end_of_turn);
|
|
8228
|
-
const turnIsFormatted = Boolean(event.turn_is_formatted);
|
|
8229
|
-
const words = event.words ?? [];
|
|
8230
|
-
const transcriptText = (event.transcript ?? "").trim();
|
|
8231
|
-
if (endOfTurn) {
|
|
8232
|
-
if (this.options.formatTurns && !turnIsFormatted) return;
|
|
8233
|
-
if (!transcriptText) return;
|
|
8234
|
-
this.emit({
|
|
8235
|
-
text: transcriptText,
|
|
8236
|
-
isFinal: true,
|
|
8237
|
-
confidence: averageConfidence(words)
|
|
8238
|
-
});
|
|
8239
|
-
return;
|
|
8240
|
-
}
|
|
8241
|
-
if (!words.length) return;
|
|
8242
|
-
const interim = words.map((w) => (w.text ?? "").trim()).filter(Boolean).join(" ");
|
|
8243
|
-
if (!interim) return;
|
|
8244
|
-
this.emit({
|
|
8245
|
-
text: interim,
|
|
8246
|
-
isFinal: false,
|
|
8247
|
-
confidence: averageConfidence(words)
|
|
8248
|
-
});
|
|
8249
|
-
}
|
|
8250
|
-
emit(transcript) {
|
|
8251
|
-
for (const cb of this.callbacks) {
|
|
8252
|
-
cb(transcript);
|
|
8253
|
-
}
|
|
8254
|
-
}
|
|
8255
|
-
sendAudio(audio) {
|
|
8256
|
-
if (!this.ws || this.ws.readyState !== import_ws8.default.OPEN) return;
|
|
8257
|
-
this.ws.send(audio);
|
|
8258
|
-
}
|
|
8259
|
-
onTranscript(callback) {
|
|
8260
|
-
if (this.callbacks.length >= MAX_CALLBACKS) {
|
|
8261
|
-
getLogger().warn(
|
|
8262
|
-
"AssemblyAISTT: maximum of 10 onTranscript callbacks reached; replacing the last callback."
|
|
8263
|
-
);
|
|
8264
|
-
this.callbacks[this.callbacks.length - 1] = callback;
|
|
8265
|
-
return;
|
|
8266
|
-
}
|
|
8267
|
-
this.callbacks.push(callback);
|
|
8268
|
-
}
|
|
8269
|
-
close() {
|
|
8270
|
-
if (this.ws) {
|
|
8271
|
-
try {
|
|
8272
|
-
this.ws.send(JSON.stringify({ type: "Terminate" }));
|
|
8273
|
-
} catch {
|
|
8274
|
-
}
|
|
8275
|
-
this.ws.close();
|
|
8276
|
-
this.ws = null;
|
|
8277
|
-
}
|
|
8278
|
-
}
|
|
8279
|
-
};
|
|
8280
|
-
function averageConfidence(words) {
|
|
8281
|
-
if (!words.length) return 0;
|
|
8282
|
-
let total = 0;
|
|
8283
|
-
for (const w of words) {
|
|
8284
|
-
total += Number(w.confidence ?? 0);
|
|
8285
|
-
}
|
|
8286
|
-
return total / words.length;
|
|
8287
|
-
}
|
|
8288
|
-
|
|
8289
|
-
// src/providers/cartesia-stt.ts
|
|
8290
|
-
var import_ws9 = __toESM(require("ws"));
|
|
8291
|
-
init_logger();
|
|
8292
|
-
var DEFAULT_BASE_URL2 = "https://api.cartesia.ai";
|
|
8293
|
-
var API_VERSION = "2025-04-16";
|
|
8294
|
-
var USER_AGENT = "Patter/1.0 (integration=LiveKit-port; provider=Cartesia)";
|
|
8295
|
-
var KEEPALIVE_INTERVAL_MS2 = 3e4;
|
|
8296
|
-
var CONNECT_TIMEOUT_MS2 = 1e4;
|
|
8297
|
-
var MAX_CALLBACKS2 = 10;
|
|
8298
|
-
var CartesiaSTT = class {
|
|
8299
|
-
constructor(apiKey, options = {}) {
|
|
8300
|
-
this.apiKey = apiKey;
|
|
8301
|
-
this.options = options;
|
|
8302
|
-
if (!apiKey) {
|
|
8303
|
-
throw new Error("CartesiaSTT requires a non-empty apiKey");
|
|
8304
|
-
}
|
|
8305
|
-
}
|
|
8306
|
-
ws = null;
|
|
8307
|
-
callbacks = [];
|
|
8308
|
-
keepaliveTimer = null;
|
|
8309
|
-
/** Cartesia request id — set from the server transcript events. */
|
|
8310
|
-
requestId = "";
|
|
8311
|
-
buildWsUrl() {
|
|
8312
|
-
const opts = this.options;
|
|
8313
|
-
const rawBase = opts.baseUrl ?? DEFAULT_BASE_URL2;
|
|
8314
|
-
let base;
|
|
8315
|
-
if (rawBase.startsWith("http://")) {
|
|
8316
|
-
base = `ws://${rawBase.slice("http://".length)}`;
|
|
8317
|
-
} else if (rawBase.startsWith("https://")) {
|
|
8318
|
-
base = `wss://${rawBase.slice("https://".length)}`;
|
|
8319
|
-
} else if (rawBase.startsWith("ws://") || rawBase.startsWith("wss://")) {
|
|
8320
|
-
base = rawBase;
|
|
8321
|
-
} else {
|
|
8322
|
-
base = `wss://${rawBase}`;
|
|
8323
|
-
}
|
|
8324
|
-
const language = opts.language ?? "en";
|
|
8325
|
-
const params = new URLSearchParams({
|
|
8326
|
-
model: opts.model ?? "ink-whisper",
|
|
8327
|
-
sample_rate: String(opts.sampleRate ?? 16e3),
|
|
8328
|
-
encoding: opts.encoding ?? "pcm_s16le",
|
|
8329
|
-
cartesia_version: API_VERSION,
|
|
8330
|
-
api_key: this.apiKey,
|
|
8331
|
-
language
|
|
8332
|
-
});
|
|
8333
|
-
return `${base}/stt/websocket?${params.toString()}`;
|
|
8714
|
+
if (typeof value === "boolean") {
|
|
8715
|
+
params.set(key, value ? "true" : "false");
|
|
8716
|
+
} else {
|
|
8717
|
+
params.set(key, String(value));
|
|
8718
|
+
}
|
|
8719
|
+
}
|
|
8720
|
+
const base = opts.baseUrl ?? DEFAULT_BASE_URL2;
|
|
8721
|
+
return `${base}/v3/ws?${params.toString()}`;
|
|
8334
8722
|
}
|
|
8335
8723
|
async connect() {
|
|
8336
|
-
const url = this.
|
|
8724
|
+
const url = this.buildUrl();
|
|
8337
8725
|
this.ws = new import_ws9.default(url, {
|
|
8338
|
-
headers: {
|
|
8726
|
+
headers: {
|
|
8727
|
+
Authorization: this.apiKey,
|
|
8728
|
+
"Content-Type": "application/json",
|
|
8729
|
+
"User-Agent": "Patter/1.0 (integration=LiveKit-port)"
|
|
8730
|
+
}
|
|
8339
8731
|
});
|
|
8340
8732
|
await new Promise((resolve, reject) => {
|
|
8341
8733
|
const timer = setTimeout(
|
|
8342
|
-
() => reject(new Error("
|
|
8734
|
+
() => reject(new Error("AssemblyAI connect timeout")),
|
|
8343
8735
|
CONNECT_TIMEOUT_MS2
|
|
8344
8736
|
);
|
|
8345
8737
|
this.ws.once("open", () => {
|
|
@@ -8360,33 +8752,39 @@ var CartesiaSTT = class {
|
|
|
8360
8752
|
}
|
|
8361
8753
|
this.handleEvent(event);
|
|
8362
8754
|
});
|
|
8363
|
-
this.keepaliveTimer = setInterval(() => {
|
|
8364
|
-
if (this.ws && this.ws.readyState === import_ws9.default.OPEN) {
|
|
8365
|
-
try {
|
|
8366
|
-
this.ws.ping();
|
|
8367
|
-
} catch {
|
|
8368
|
-
}
|
|
8369
|
-
}
|
|
8370
|
-
}, KEEPALIVE_INTERVAL_MS2);
|
|
8371
8755
|
}
|
|
8372
8756
|
handleEvent(event) {
|
|
8373
8757
|
const type = event.type;
|
|
8374
|
-
if (type === "
|
|
8375
|
-
|
|
8376
|
-
|
|
8377
|
-
if (!text && !isFinal) return;
|
|
8378
|
-
if (event.request_id) {
|
|
8379
|
-
this.requestId = event.request_id;
|
|
8380
|
-
}
|
|
8381
|
-
if (!text) return;
|
|
8382
|
-
const confidence = Number(event.probability ?? 1);
|
|
8383
|
-
this.emit({ text, isFinal, confidence });
|
|
8758
|
+
if (type === "Begin") {
|
|
8759
|
+
this.sessionId = event.id ?? "";
|
|
8760
|
+
this.expiresAt = event.expires_at ?? 0;
|
|
8384
8761
|
return;
|
|
8385
8762
|
}
|
|
8386
|
-
if (type
|
|
8387
|
-
|
|
8763
|
+
if (type !== "Turn") {
|
|
8764
|
+
return;
|
|
8765
|
+
}
|
|
8766
|
+
const endOfTurn = Boolean(event.end_of_turn);
|
|
8767
|
+
const turnIsFormatted = Boolean(event.turn_is_formatted);
|
|
8768
|
+
const words = event.words ?? [];
|
|
8769
|
+
const transcriptText = (event.transcript ?? "").trim();
|
|
8770
|
+
if (endOfTurn) {
|
|
8771
|
+
if (this.options.formatTurns && !turnIsFormatted) return;
|
|
8772
|
+
if (!transcriptText) return;
|
|
8773
|
+
this.emit({
|
|
8774
|
+
text: transcriptText,
|
|
8775
|
+
isFinal: true,
|
|
8776
|
+
confidence: averageConfidence(words)
|
|
8777
|
+
});
|
|
8388
8778
|
return;
|
|
8389
8779
|
}
|
|
8780
|
+
if (!words.length) return;
|
|
8781
|
+
const interim = words.map((w) => (w.text ?? "").trim()).filter(Boolean).join(" ");
|
|
8782
|
+
if (!interim) return;
|
|
8783
|
+
this.emit({
|
|
8784
|
+
text: interim,
|
|
8785
|
+
isFinal: false,
|
|
8786
|
+
confidence: averageConfidence(words)
|
|
8787
|
+
});
|
|
8390
8788
|
}
|
|
8391
8789
|
emit(transcript) {
|
|
8392
8790
|
for (const cb of this.callbacks) {
|
|
@@ -8400,7 +8798,7 @@ var CartesiaSTT = class {
|
|
|
8400
8798
|
onTranscript(callback) {
|
|
8401
8799
|
if (this.callbacks.length >= MAX_CALLBACKS2) {
|
|
8402
8800
|
getLogger().warn(
|
|
8403
|
-
"
|
|
8801
|
+
"AssemblyAISTT: maximum of 10 onTranscript callbacks reached; replacing the last callback."
|
|
8404
8802
|
);
|
|
8405
8803
|
this.callbacks[this.callbacks.length - 1] = callback;
|
|
8406
8804
|
return;
|
|
@@ -8408,13 +8806,9 @@ var CartesiaSTT = class {
|
|
|
8408
8806
|
this.callbacks.push(callback);
|
|
8409
8807
|
}
|
|
8410
8808
|
close() {
|
|
8411
|
-
if (this.keepaliveTimer) {
|
|
8412
|
-
clearInterval(this.keepaliveTimer);
|
|
8413
|
-
this.keepaliveTimer = null;
|
|
8414
|
-
}
|
|
8415
8809
|
if (this.ws) {
|
|
8416
8810
|
try {
|
|
8417
|
-
this.ws.send("
|
|
8811
|
+
this.ws.send(JSON.stringify({ type: "Terminate" }));
|
|
8418
8812
|
} catch {
|
|
8419
8813
|
}
|
|
8420
8814
|
this.ws.close();
|
|
@@ -8422,10 +8816,305 @@ var CartesiaSTT = class {
|
|
|
8422
8816
|
}
|
|
8423
8817
|
}
|
|
8424
8818
|
};
|
|
8819
|
+
function averageConfidence(words) {
|
|
8820
|
+
if (!words.length) return 0;
|
|
8821
|
+
let total = 0;
|
|
8822
|
+
for (const w of words) {
|
|
8823
|
+
total += Number(w.confidence ?? 0);
|
|
8824
|
+
}
|
|
8825
|
+
return total / words.length;
|
|
8826
|
+
}
|
|
8425
8827
|
|
|
8426
|
-
// src/
|
|
8427
|
-
|
|
8428
|
-
|
|
8828
|
+
// src/stt/assemblyai.ts
|
|
8829
|
+
var STT5 = class extends AssemblyAISTT {
|
|
8830
|
+
constructor(opts = {}) {
|
|
8831
|
+
const key = opts.apiKey ?? process.env.ASSEMBLYAI_API_KEY;
|
|
8832
|
+
if (!key) {
|
|
8833
|
+
throw new Error(
|
|
8834
|
+
"AssemblyAI STT requires an apiKey. Pass { apiKey: '...' } or set ASSEMBLYAI_API_KEY in the environment."
|
|
8835
|
+
);
|
|
8836
|
+
}
|
|
8837
|
+
const { apiKey: _ignored, ...rest } = opts;
|
|
8838
|
+
void _ignored;
|
|
8839
|
+
super(key, rest);
|
|
8840
|
+
}
|
|
8841
|
+
};
|
|
8842
|
+
|
|
8843
|
+
// src/providers/elevenlabs-tts.ts
|
|
8844
|
+
var ELEVENLABS_BASE_URL = "https://api.elevenlabs.io/v1";
|
|
8845
|
+
var ELEVENLABS_VOICE_ID_BY_NAME = {
|
|
8846
|
+
rachel: "21m00Tcm4TlvDq8ikWAM",
|
|
8847
|
+
drew: "29vD33N1CtxCmqQRPOHJ",
|
|
8848
|
+
clyde: "2EiwWnXFnvU5JabPnv8n",
|
|
8849
|
+
paul: "5Q0t7uMcjvnagumLfvZi",
|
|
8850
|
+
domi: "AZnzlk1XvdvUeBnXmlld",
|
|
8851
|
+
dave: "CYw3kZ02Hs0563khs1Fj",
|
|
8852
|
+
fin: "D38z5RcWu1voky8WS1ja",
|
|
8853
|
+
bella: "EXAVITQu4vr4xnSDxMaL",
|
|
8854
|
+
antoni: "ErXwobaYiN019PkySvjV",
|
|
8855
|
+
thomas: "GBv7mTt0atIp3Br8iCZE",
|
|
8856
|
+
charlie: "IKne3meq5aSn9XLyUdCD",
|
|
8857
|
+
george: "JBFqnCBsd6RMkjVDRZzb",
|
|
8858
|
+
emily: "LcfcDJNUP1GQjkzn1xUU",
|
|
8859
|
+
elli: "MF3mGyEYCl7XYWbV9V6O",
|
|
8860
|
+
callum: "N2lVS1w4EtoT3dr4eOWO",
|
|
8861
|
+
patrick: "ODq5zmih8GrVes37Dizd",
|
|
8862
|
+
harry: "SOYHLrjzK2X1ezoPC6cr",
|
|
8863
|
+
liam: "TX3LPaxmHKxFdv7VOQHJ",
|
|
8864
|
+
dorothy: "ThT5KcBeYPX3keUQqHPh",
|
|
8865
|
+
josh: "TxGEqnHWrfWFTfGW9XjX",
|
|
8866
|
+
arnold: "VR6AewLTigWG4xSOukaG",
|
|
8867
|
+
charlotte: "XB0fDUnXU5powFXDhCwa",
|
|
8868
|
+
matilda: "XrExE9yKIg1WjnnlVkGX",
|
|
8869
|
+
matthew: "Yko7PKHZNXotIFUBG7I9",
|
|
8870
|
+
james: "ZQe5CZNOzWyzPSCn5a3c",
|
|
8871
|
+
joseph: "Zlb1dXrM653N07WRdFW3",
|
|
8872
|
+
jeremy: "bVMeCyTHy58xNoL34h3p",
|
|
8873
|
+
michael: "flq6f7yk4E4fJM5XTYuZ",
|
|
8874
|
+
ethan: "g5CIjZEefAph4nQFvHAz",
|
|
8875
|
+
gigi: "jBpfuIE2acCO8z3wKNLl",
|
|
8876
|
+
freya: "jsCqWAovK2LkecY7zXl4",
|
|
8877
|
+
brian: "nPczCjzI2devNBz1zQrb",
|
|
8878
|
+
grace: "oWAxZDx7w5VEj9dCyTzz",
|
|
8879
|
+
daniel: "onwK4e9ZLuTAKqWW03F9",
|
|
8880
|
+
lily: "pFZP5JQG7iQjIQuC4Bku",
|
|
8881
|
+
serena: "pMsXgVXv3BLzUgSXRplE",
|
|
8882
|
+
adam: "pNInz6obpgDQGcFmaJgB",
|
|
8883
|
+
nicole: "piTKgcLEGmPE4e6mEKli",
|
|
8884
|
+
bill: "pqHfZKP75CvOlQylNhV4",
|
|
8885
|
+
jessie: "t0jbNlBVZ17f02VDIeMI",
|
|
8886
|
+
ryan: "wViXBPUzp2ZZixB1xQuM",
|
|
8887
|
+
sam: "yoZ06aMxZJJ28mfd3POQ",
|
|
8888
|
+
glinda: "z9fAnlkpzviPz146aGWa",
|
|
8889
|
+
giovanni: "zcAOhNBS3c14rBihAFp1",
|
|
8890
|
+
mimi: "zrHiDhphv9ZnVXBqCLjz",
|
|
8891
|
+
alloy: "21m00Tcm4TlvDq8ikWAM"
|
|
8892
|
+
};
|
|
8893
|
+
var VOICE_ID_PATTERN = /^[A-Za-z0-9]{20}$/;
|
|
8894
|
+
function resolveVoiceId(voice) {
|
|
8895
|
+
if (!voice) return voice;
|
|
8896
|
+
if (VOICE_ID_PATTERN.test(voice)) return voice;
|
|
8897
|
+
return ELEVENLABS_VOICE_ID_BY_NAME[voice.toLowerCase()] ?? voice;
|
|
8898
|
+
}
|
|
8899
|
+
var ElevenLabsTTS = class {
|
|
8900
|
+
constructor(apiKey, voiceId = "21m00Tcm4TlvDq8ikWAM", modelId = "eleven_turbo_v2_5", outputFormat = "pcm_16000") {
|
|
8901
|
+
this.apiKey = apiKey;
|
|
8902
|
+
this.modelId = modelId;
|
|
8903
|
+
this.outputFormat = outputFormat;
|
|
8904
|
+
this.voiceId = resolveVoiceId(voiceId);
|
|
8905
|
+
}
|
|
8906
|
+
voiceId;
|
|
8907
|
+
/**
|
|
8908
|
+
* Synthesise text to speech and return the full audio as a single Buffer.
|
|
8909
|
+
*
|
|
8910
|
+
* For large chunks (or when latency matters) call `synthesizeStream` instead.
|
|
8911
|
+
*/
|
|
8912
|
+
async synthesize(text) {
|
|
8913
|
+
const chunks = [];
|
|
8914
|
+
for await (const chunk of this.synthesizeStream(text)) {
|
|
8915
|
+
chunks.push(chunk);
|
|
8916
|
+
}
|
|
8917
|
+
return Buffer.concat(chunks);
|
|
8918
|
+
}
|
|
8919
|
+
/**
|
|
8920
|
+
* Synthesise text and yield audio chunks as they arrive (streaming).
|
|
8921
|
+
*
|
|
8922
|
+
* The yielded buffers are raw PCM at 16 kHz (or whatever `outputFormat` is
|
|
8923
|
+
* configured to).
|
|
8924
|
+
*/
|
|
8925
|
+
async *synthesizeStream(text) {
|
|
8926
|
+
const url = `${ELEVENLABS_BASE_URL}/text-to-speech/${encodeURIComponent(this.voiceId)}/stream?output_format=${encodeURIComponent(this.outputFormat)}`;
|
|
8927
|
+
const response = await fetch(url, {
|
|
8928
|
+
method: "POST",
|
|
8929
|
+
headers: {
|
|
8930
|
+
"xi-api-key": this.apiKey,
|
|
8931
|
+
"Content-Type": "application/json"
|
|
8932
|
+
},
|
|
8933
|
+
body: JSON.stringify({ text, model_id: this.modelId }),
|
|
8934
|
+
signal: AbortSignal.timeout(3e4)
|
|
8935
|
+
});
|
|
8936
|
+
if (!response.ok) {
|
|
8937
|
+
const body = await response.text();
|
|
8938
|
+
throw new Error(`ElevenLabs TTS error ${response.status}: ${body}`);
|
|
8939
|
+
}
|
|
8940
|
+
if (!response.body) {
|
|
8941
|
+
throw new Error("ElevenLabs TTS: no response body");
|
|
8942
|
+
}
|
|
8943
|
+
const reader = response.body.getReader();
|
|
8944
|
+
try {
|
|
8945
|
+
while (true) {
|
|
8946
|
+
const { done, value } = await reader.read();
|
|
8947
|
+
if (done) break;
|
|
8948
|
+
if (value && value.length > 0) {
|
|
8949
|
+
yield Buffer.from(value);
|
|
8950
|
+
}
|
|
8951
|
+
}
|
|
8952
|
+
} finally {
|
|
8953
|
+
if (typeof reader.cancel === "function") await reader.cancel().catch(() => {
|
|
8954
|
+
});
|
|
8955
|
+
reader.releaseLock();
|
|
8956
|
+
}
|
|
8957
|
+
}
|
|
8958
|
+
};
|
|
8959
|
+
|
|
8960
|
+
// src/tts/elevenlabs.ts
|
|
8961
|
+
var TTS = class extends ElevenLabsTTS {
|
|
8962
|
+
constructor(opts = {}) {
|
|
8963
|
+
const key = opts.apiKey ?? process.env.ELEVENLABS_API_KEY;
|
|
8964
|
+
if (!key) {
|
|
8965
|
+
throw new Error(
|
|
8966
|
+
"ElevenLabs TTS requires an apiKey. Pass { apiKey: '...' } or set ELEVENLABS_API_KEY in the environment."
|
|
8967
|
+
);
|
|
8968
|
+
}
|
|
8969
|
+
super(
|
|
8970
|
+
key,
|
|
8971
|
+
opts.voiceId ?? "21m00Tcm4TlvDq8ikWAM",
|
|
8972
|
+
opts.modelId ?? "eleven_turbo_v2_5",
|
|
8973
|
+
opts.outputFormat ?? "pcm_16000"
|
|
8974
|
+
);
|
|
8975
|
+
}
|
|
8976
|
+
};
|
|
8977
|
+
|
|
8978
|
+
// src/providers/openai-tts.ts
|
|
8979
|
+
var OPENAI_TTS_URL = "https://api.openai.com/v1/audio/speech";
|
|
8980
|
+
var OpenAITTS = class _OpenAITTS {
|
|
8981
|
+
constructor(apiKey, voice = "alloy", model = "tts-1") {
|
|
8982
|
+
this.apiKey = apiKey;
|
|
8983
|
+
this.voice = voice;
|
|
8984
|
+
this.model = model;
|
|
8985
|
+
}
|
|
8986
|
+
/**
|
|
8987
|
+
* Synthesise text to speech and return the full audio as a single Buffer.
|
|
8988
|
+
*
|
|
8989
|
+
* For large chunks (or when latency matters) call `synthesizeStream` instead.
|
|
8990
|
+
*/
|
|
8991
|
+
async synthesize(text) {
|
|
8992
|
+
const chunks = [];
|
|
8993
|
+
for await (const chunk of this.synthesizeStream(text)) {
|
|
8994
|
+
chunks.push(chunk);
|
|
8995
|
+
}
|
|
8996
|
+
return Buffer.concat(chunks);
|
|
8997
|
+
}
|
|
8998
|
+
/**
|
|
8999
|
+
* Synthesise text and yield audio chunks as they arrive (streaming).
|
|
9000
|
+
*
|
|
9001
|
+
* OpenAI returns 24 kHz PCM16; each chunk is resampled to 16 kHz before
|
|
9002
|
+
* yielding so the output is ready for telephony pipelines.
|
|
9003
|
+
*
|
|
9004
|
+
* The resampler carries state (buffered samples + odd trailing byte)
|
|
9005
|
+
* between chunks — without that state cross-chunk sample alignment drifts
|
|
9006
|
+
* and the caller hears pops / dropped audio (BUG #23, mirror of the
|
|
9007
|
+
* Python `audioop.ratecv` fix).
|
|
9008
|
+
*/
|
|
9009
|
+
async *synthesizeStream(text) {
|
|
9010
|
+
const response = await fetch(OPENAI_TTS_URL, {
|
|
9011
|
+
method: "POST",
|
|
9012
|
+
headers: {
|
|
9013
|
+
"Authorization": `Bearer ${this.apiKey}`,
|
|
9014
|
+
"Content-Type": "application/json"
|
|
9015
|
+
},
|
|
9016
|
+
body: JSON.stringify({
|
|
9017
|
+
model: this.model,
|
|
9018
|
+
input: text,
|
|
9019
|
+
voice: this.voice,
|
|
9020
|
+
response_format: "pcm"
|
|
9021
|
+
}),
|
|
9022
|
+
signal: AbortSignal.timeout(3e4)
|
|
9023
|
+
});
|
|
9024
|
+
if (!response.ok) {
|
|
9025
|
+
const body = await response.text();
|
|
9026
|
+
throw new Error(`OpenAI TTS error ${response.status}: ${body}`);
|
|
9027
|
+
}
|
|
9028
|
+
if (!response.body) {
|
|
9029
|
+
throw new Error("OpenAI TTS: no response body");
|
|
9030
|
+
}
|
|
9031
|
+
const ctx = { carryByte: null, leftover: [] };
|
|
9032
|
+
const reader = response.body.getReader();
|
|
9033
|
+
try {
|
|
9034
|
+
while (true) {
|
|
9035
|
+
const { done, value } = await reader.read();
|
|
9036
|
+
if (done) break;
|
|
9037
|
+
if (value && value.length > 0) {
|
|
9038
|
+
const out = _OpenAITTS.resampleStreaming(Buffer.from(value), ctx);
|
|
9039
|
+
if (out.length > 0) yield out;
|
|
9040
|
+
}
|
|
9041
|
+
}
|
|
9042
|
+
if (ctx.leftover.length > 0) {
|
|
9043
|
+
const tail = Buffer.alloc(ctx.leftover.length * 2);
|
|
9044
|
+
for (let i = 0; i < ctx.leftover.length; i++) {
|
|
9045
|
+
tail.writeInt16LE(ctx.leftover[i], i * 2);
|
|
9046
|
+
}
|
|
9047
|
+
yield tail;
|
|
9048
|
+
}
|
|
9049
|
+
} finally {
|
|
9050
|
+
if (typeof reader.cancel === "function") await reader.cancel().catch(() => {
|
|
9051
|
+
});
|
|
9052
|
+
reader.releaseLock();
|
|
9053
|
+
}
|
|
9054
|
+
}
|
|
9055
|
+
/**
|
|
9056
|
+
* Streaming 24 kHz → 16 kHz resampler (PCM16-LE). Maintains cross-chunk
|
|
9057
|
+
* state so the 3:2 pattern doesn't reset at every network read.
|
|
9058
|
+
*/
|
|
9059
|
+
static resampleStreaming(audio, ctx) {
|
|
9060
|
+
let buf;
|
|
9061
|
+
if (ctx.carryByte !== null) {
|
|
9062
|
+
buf = Buffer.concat([Buffer.from([ctx.carryByte]), audio]);
|
|
9063
|
+
ctx.carryByte = null;
|
|
9064
|
+
} else {
|
|
9065
|
+
buf = audio;
|
|
9066
|
+
}
|
|
9067
|
+
if (buf.length % 2 === 1) {
|
|
9068
|
+
ctx.carryByte = buf[buf.length - 1];
|
|
9069
|
+
buf = buf.subarray(0, buf.length - 1);
|
|
9070
|
+
}
|
|
9071
|
+
if (buf.length === 0 && ctx.leftover.length === 0) {
|
|
9072
|
+
return Buffer.alloc(0);
|
|
9073
|
+
}
|
|
9074
|
+
const sampleCount = buf.length / 2;
|
|
9075
|
+
const samples = ctx.leftover.slice();
|
|
9076
|
+
for (let i2 = 0; i2 < sampleCount; i2++) {
|
|
9077
|
+
samples.push(buf.readInt16LE(i2 * 2));
|
|
9078
|
+
}
|
|
9079
|
+
const out = [];
|
|
9080
|
+
let i = 0;
|
|
9081
|
+
while (i + 2 < samples.length) {
|
|
9082
|
+
out.push(samples[i]);
|
|
9083
|
+
out.push(Math.trunc((samples[i + 1] + samples[i + 2]) / 2));
|
|
9084
|
+
i += 3;
|
|
9085
|
+
}
|
|
9086
|
+
ctx.leftover = samples.slice(i);
|
|
9087
|
+
const buffer = Buffer.alloc(out.length * 2);
|
|
9088
|
+
for (let j = 0; j < out.length; j++) {
|
|
9089
|
+
buffer.writeInt16LE(out[j], j * 2);
|
|
9090
|
+
}
|
|
9091
|
+
return buffer;
|
|
9092
|
+
}
|
|
9093
|
+
/** @deprecated use {@link resampleStreaming} with persistent state. */
|
|
9094
|
+
static resample24kTo16k(audio) {
|
|
9095
|
+
const ctx = { carryByte: null, leftover: [] };
|
|
9096
|
+
const out = _OpenAITTS.resampleStreaming(audio, ctx);
|
|
9097
|
+
if (ctx.leftover.length === 0) return out;
|
|
9098
|
+
const tail = Buffer.alloc(ctx.leftover.length * 2);
|
|
9099
|
+
for (let i = 0; i < ctx.leftover.length; i++) {
|
|
9100
|
+
tail.writeInt16LE(ctx.leftover[i], i * 2);
|
|
9101
|
+
}
|
|
9102
|
+
return Buffer.concat([out, tail]);
|
|
9103
|
+
}
|
|
9104
|
+
};
|
|
9105
|
+
|
|
9106
|
+
// src/tts/openai.ts
|
|
9107
|
+
var TTS2 = class extends OpenAITTS {
|
|
9108
|
+
constructor(opts = {}) {
|
|
9109
|
+
const key = opts.apiKey ?? process.env.OPENAI_API_KEY;
|
|
9110
|
+
if (!key) {
|
|
9111
|
+
throw new Error(
|
|
9112
|
+
"OpenAI TTS requires an apiKey. Pass { apiKey: 'sk-...' } or set OPENAI_API_KEY in the environment."
|
|
9113
|
+
);
|
|
9114
|
+
}
|
|
9115
|
+
super(key, opts.voice ?? "alloy", opts.model ?? "tts-1");
|
|
9116
|
+
}
|
|
9117
|
+
};
|
|
8429
9118
|
|
|
8430
9119
|
// src/providers/cartesia-tts.ts
|
|
8431
9120
|
var CARTESIA_BASE_URL = "https://api.cartesia.ai";
|
|
@@ -8525,6 +9214,21 @@ var CartesiaTTS = class {
|
|
|
8525
9214
|
}
|
|
8526
9215
|
};
|
|
8527
9216
|
|
|
9217
|
+
// src/tts/cartesia.ts
|
|
9218
|
+
var TTS3 = class extends CartesiaTTS {
|
|
9219
|
+
constructor(opts = {}) {
|
|
9220
|
+
const key = opts.apiKey ?? process.env.CARTESIA_API_KEY;
|
|
9221
|
+
if (!key) {
|
|
9222
|
+
throw new Error(
|
|
9223
|
+
"Cartesia TTS requires an apiKey. Pass { apiKey: '...' } or set CARTESIA_API_KEY in the environment."
|
|
9224
|
+
);
|
|
9225
|
+
}
|
|
9226
|
+
const { apiKey: _ignored, ...rest } = opts;
|
|
9227
|
+
void _ignored;
|
|
9228
|
+
super(key, rest);
|
|
9229
|
+
}
|
|
9230
|
+
};
|
|
9231
|
+
|
|
8528
9232
|
// src/providers/rime-tts.ts
|
|
8529
9233
|
var RIME_BASE_URL = "https://users.rime.ai/v1/rime-tts";
|
|
8530
9234
|
var ARCANA_MODEL_TIMEOUT_MS = 60 * 4 * 1e3;
|
|
@@ -8652,6 +9356,21 @@ var RimeTTS = class {
|
|
|
8652
9356
|
}
|
|
8653
9357
|
};
|
|
8654
9358
|
|
|
9359
|
+
// src/tts/rime.ts
|
|
9360
|
+
var TTS4 = class extends RimeTTS {
|
|
9361
|
+
constructor(opts = {}) {
|
|
9362
|
+
const key = opts.apiKey ?? process.env.RIME_API_KEY;
|
|
9363
|
+
if (!key) {
|
|
9364
|
+
throw new Error(
|
|
9365
|
+
"Rime TTS requires an apiKey. Pass { apiKey: '...' } or set RIME_API_KEY in the environment."
|
|
9366
|
+
);
|
|
9367
|
+
}
|
|
9368
|
+
const { apiKey: _ignored, ...rest } = opts;
|
|
9369
|
+
void _ignored;
|
|
9370
|
+
super(key, rest);
|
|
9371
|
+
}
|
|
9372
|
+
};
|
|
9373
|
+
|
|
8655
9374
|
// src/providers/lmnt-tts.ts
|
|
8656
9375
|
var LMNT_BASE_URL = "https://api.lmnt.com/v1/ai/speech/bytes";
|
|
8657
9376
|
var LMNTTTS = class {
|
|
@@ -8730,6 +9449,119 @@ var LMNTTTS = class {
|
|
|
8730
9449
|
}
|
|
8731
9450
|
};
|
|
8732
9451
|
|
|
9452
|
+
// src/tts/lmnt.ts
|
|
9453
|
+
var TTS5 = class extends LMNTTTS {
|
|
9454
|
+
constructor(opts = {}) {
|
|
9455
|
+
const key = opts.apiKey ?? process.env.LMNT_API_KEY;
|
|
9456
|
+
if (!key) {
|
|
9457
|
+
throw new Error(
|
|
9458
|
+
"LMNT TTS requires an apiKey. Pass { apiKey: '...' } or set LMNT_API_KEY in the environment."
|
|
9459
|
+
);
|
|
9460
|
+
}
|
|
9461
|
+
const { apiKey: _ignored, ...rest } = opts;
|
|
9462
|
+
void _ignored;
|
|
9463
|
+
super(key, rest);
|
|
9464
|
+
}
|
|
9465
|
+
};
|
|
9466
|
+
|
|
9467
|
+
// src/carriers/twilio.ts
|
|
9468
|
+
var Carrier = class {
|
|
9469
|
+
kind = "twilio";
|
|
9470
|
+
accountSid;
|
|
9471
|
+
authToken;
|
|
9472
|
+
constructor(opts = {}) {
|
|
9473
|
+
const sid = opts.accountSid ?? process.env.TWILIO_ACCOUNT_SID;
|
|
9474
|
+
const tok = opts.authToken ?? process.env.TWILIO_AUTH_TOKEN;
|
|
9475
|
+
if (!sid) {
|
|
9476
|
+
throw new Error(
|
|
9477
|
+
"Twilio carrier requires accountSid. Pass { accountSid: 'AC...' } or set TWILIO_ACCOUNT_SID in the environment."
|
|
9478
|
+
);
|
|
9479
|
+
}
|
|
9480
|
+
if (!tok) {
|
|
9481
|
+
throw new Error(
|
|
9482
|
+
"Twilio carrier requires authToken. Pass { authToken: '...' } or set TWILIO_AUTH_TOKEN in the environment."
|
|
9483
|
+
);
|
|
9484
|
+
}
|
|
9485
|
+
this.accountSid = sid;
|
|
9486
|
+
this.authToken = tok;
|
|
9487
|
+
}
|
|
9488
|
+
};
|
|
9489
|
+
|
|
9490
|
+
// src/carriers/telnyx.ts
|
|
9491
|
+
var Carrier2 = class {
|
|
9492
|
+
kind = "telnyx";
|
|
9493
|
+
apiKey;
|
|
9494
|
+
connectionId;
|
|
9495
|
+
publicKey;
|
|
9496
|
+
constructor(opts = {}) {
|
|
9497
|
+
const key = opts.apiKey ?? process.env.TELNYX_API_KEY;
|
|
9498
|
+
const conn = opts.connectionId ?? process.env.TELNYX_CONNECTION_ID;
|
|
9499
|
+
const pub = opts.publicKey ?? process.env.TELNYX_PUBLIC_KEY;
|
|
9500
|
+
if (!key) {
|
|
9501
|
+
throw new Error(
|
|
9502
|
+
"Telnyx carrier requires apiKey. Pass { apiKey: '...' } or set TELNYX_API_KEY in the environment."
|
|
9503
|
+
);
|
|
9504
|
+
}
|
|
9505
|
+
if (!conn) {
|
|
9506
|
+
throw new Error(
|
|
9507
|
+
"Telnyx carrier requires connectionId. Pass { connectionId: '...' } or set TELNYX_CONNECTION_ID in the environment."
|
|
9508
|
+
);
|
|
9509
|
+
}
|
|
9510
|
+
this.apiKey = key;
|
|
9511
|
+
this.connectionId = conn;
|
|
9512
|
+
this.publicKey = pub;
|
|
9513
|
+
}
|
|
9514
|
+
};
|
|
9515
|
+
|
|
9516
|
+
// src/public-api.ts
|
|
9517
|
+
var DEFAULT_GUARDRAIL_REPLACEMENT = "I'm sorry, I can't respond to that.";
|
|
9518
|
+
var Guardrail = class {
|
|
9519
|
+
name;
|
|
9520
|
+
blockedTerms;
|
|
9521
|
+
check;
|
|
9522
|
+
replacement;
|
|
9523
|
+
constructor(opts) {
|
|
9524
|
+
if (!opts.name) {
|
|
9525
|
+
throw new Error("Guardrail requires a non-empty name.");
|
|
9526
|
+
}
|
|
9527
|
+
this.name = opts.name;
|
|
9528
|
+
if (opts.blockedTerms) this.blockedTerms = opts.blockedTerms;
|
|
9529
|
+
if (opts.check) this.check = opts.check;
|
|
9530
|
+
this.replacement = opts.replacement ?? DEFAULT_GUARDRAIL_REPLACEMENT;
|
|
9531
|
+
}
|
|
9532
|
+
};
|
|
9533
|
+
function guardrail(opts) {
|
|
9534
|
+
return new Guardrail(opts);
|
|
9535
|
+
}
|
|
9536
|
+
var Tool = class {
|
|
9537
|
+
name;
|
|
9538
|
+
description;
|
|
9539
|
+
parameters;
|
|
9540
|
+
handler;
|
|
9541
|
+
webhookUrl;
|
|
9542
|
+
constructor(opts) {
|
|
9543
|
+
if (!opts.name) {
|
|
9544
|
+
throw new Error("Tool requires a non-empty name.");
|
|
9545
|
+
}
|
|
9546
|
+
const hasHandler = typeof opts.handler === "function";
|
|
9547
|
+
const hasWebhook = typeof opts.webhookUrl === "string" && opts.webhookUrl.length > 0;
|
|
9548
|
+
if (!hasHandler && !hasWebhook) {
|
|
9549
|
+
throw new Error("Tool requires either handler or webhookUrl.");
|
|
9550
|
+
}
|
|
9551
|
+
if (hasHandler && hasWebhook) {
|
|
9552
|
+
throw new Error("Tool accepts handler OR webhookUrl, not both.");
|
|
9553
|
+
}
|
|
9554
|
+
this.name = opts.name;
|
|
9555
|
+
this.description = opts.description ?? "";
|
|
9556
|
+
this.parameters = opts.parameters ?? { type: "object", properties: {} };
|
|
9557
|
+
if (hasHandler) this.handler = opts.handler;
|
|
9558
|
+
if (hasWebhook) this.webhookUrl = opts.webhookUrl;
|
|
9559
|
+
}
|
|
9560
|
+
};
|
|
9561
|
+
function tool(opts) {
|
|
9562
|
+
return new Tool(opts);
|
|
9563
|
+
}
|
|
9564
|
+
|
|
8733
9565
|
// src/index.ts
|
|
8734
9566
|
init_transcoding();
|
|
8735
9567
|
init_tunnel();
|
|
@@ -9354,21 +10186,25 @@ function isAudioConfig(value) {
|
|
|
9354
10186
|
CartesiaSTT,
|
|
9355
10187
|
CartesiaTTS,
|
|
9356
10188
|
ChatContext,
|
|
10189
|
+
CloudflareTunnel,
|
|
9357
10190
|
DEFAULT_MIN_SENTENCE_LEN,
|
|
9358
10191
|
DEFAULT_PRICING,
|
|
9359
10192
|
DTMF_EVENTS,
|
|
9360
10193
|
DeepgramSTT,
|
|
10194
|
+
ElevenLabsConvAI,
|
|
9361
10195
|
ElevenLabsConvAIAdapter,
|
|
9362
10196
|
ElevenLabsTTS,
|
|
9363
10197
|
FallbackLLMProvider,
|
|
9364
10198
|
GEMINI_DEFAULT_INPUT_SR,
|
|
9365
10199
|
GEMINI_DEFAULT_OUTPUT_SR,
|
|
9366
10200
|
GeminiLiveAdapter,
|
|
10201
|
+
Guardrail,
|
|
9367
10202
|
IVRActivity,
|
|
9368
10203
|
LLMLoop,
|
|
9369
10204
|
LMNTTTS,
|
|
9370
10205
|
MetricsStore,
|
|
9371
10206
|
OpenAILLMProvider,
|
|
10207
|
+
OpenAIRealtime,
|
|
9372
10208
|
OpenAIRealtimeAdapter,
|
|
9373
10209
|
OpenAITTS,
|
|
9374
10210
|
PartialStreamError,
|
|
@@ -9381,8 +10217,12 @@ function isAudioConfig(value) {
|
|
|
9381
10217
|
RimeTTS,
|
|
9382
10218
|
SentenceChunker,
|
|
9383
10219
|
SonioxSTT,
|
|
10220
|
+
StaticTunnel,
|
|
10221
|
+
Telnyx,
|
|
9384
10222
|
TestSession,
|
|
9385
10223
|
TfidfLoopDetector,
|
|
10224
|
+
Tool,
|
|
10225
|
+
Twilio,
|
|
9386
10226
|
ULTRAVOX_DEFAULT_API_BASE,
|
|
9387
10227
|
ULTRAVOX_DEFAULT_SR,
|
|
9388
10228
|
UltravoxRealtimeAdapter,
|
|
@@ -9402,6 +10242,7 @@ function isAudioConfig(value) {
|
|
|
9402
10242
|
filterMarkdown,
|
|
9403
10243
|
formatDtmf,
|
|
9404
10244
|
getLogger,
|
|
10245
|
+
guardrail,
|
|
9405
10246
|
isRemoteUrl,
|
|
9406
10247
|
isWebSocketUrl,
|
|
9407
10248
|
makeAuthMiddleware,
|
|
@@ -9423,5 +10264,6 @@ function isAudioConfig(value) {
|
|
|
9423
10264
|
selectSoundFromList,
|
|
9424
10265
|
setLogger,
|
|
9425
10266
|
startTunnel,
|
|
10267
|
+
tool,
|
|
9426
10268
|
whisper
|
|
9427
10269
|
});
|