getpatter 0.4.3 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +136 -162
- package/dist/carrier-config-CPG5CROM.mjs +84 -0
- package/dist/{chunk-35EVXMGB.mjs → chunk-757NVN4L.mjs} +396 -458
- package/dist/cli.js +92 -5
- package/dist/index.d.mts +901 -241
- package/dist/index.d.ts +901 -241
- package/dist/index.js +1763 -921
- package/dist/index.mjs +1240 -419
- package/dist/{test-mode-RH65MMSP.mjs → test-mode-YFOL2HYH.mjs} +1 -1
- package/package.json +1 -1
|
@@ -247,223 +247,13 @@ var ElevenLabsConvAIAdapter = class {
|
|
|
247
247
|
}
|
|
248
248
|
};
|
|
249
249
|
|
|
250
|
-
// src/
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
this.language = language;
|
|
257
|
-
this.model = model;
|
|
258
|
-
this.encoding = encoding;
|
|
259
|
-
this.sampleRate = sampleRate;
|
|
260
|
-
}
|
|
261
|
-
ws = null;
|
|
262
|
-
callbacks = [];
|
|
263
|
-
/** Request ID from Deepgram — used to query actual cost post-call. */
|
|
264
|
-
requestId = "";
|
|
265
|
-
/** Factory for Twilio calls — mulaw 8 kHz. */
|
|
266
|
-
static forTwilio(apiKey, language = "en", model = "nova-3") {
|
|
267
|
-
return new _DeepgramSTT(apiKey, language, model, "mulaw", 8e3);
|
|
268
|
-
}
|
|
269
|
-
async connect() {
|
|
270
|
-
const params = new URLSearchParams({
|
|
271
|
-
model: this.model,
|
|
272
|
-
language: this.language,
|
|
273
|
-
encoding: this.encoding,
|
|
274
|
-
sample_rate: String(this.sampleRate),
|
|
275
|
-
channels: "1",
|
|
276
|
-
interim_results: "true",
|
|
277
|
-
endpointing: "300",
|
|
278
|
-
smart_format: "true",
|
|
279
|
-
vad_events: "true",
|
|
280
|
-
no_delay: "true"
|
|
281
|
-
});
|
|
282
|
-
const url = `${DEEPGRAM_WS_URL}?${params.toString()}`;
|
|
283
|
-
this.ws = new WebSocket3(url, {
|
|
284
|
-
headers: { Authorization: `Token ${this.apiKey}` }
|
|
285
|
-
});
|
|
286
|
-
await new Promise((resolve, reject) => {
|
|
287
|
-
const timer = setTimeout(() => reject(new Error("Deepgram connect timeout")), 1e4);
|
|
288
|
-
this.ws.once("open", () => {
|
|
289
|
-
clearTimeout(timer);
|
|
290
|
-
resolve();
|
|
291
|
-
});
|
|
292
|
-
this.ws.once("error", (err) => {
|
|
293
|
-
clearTimeout(timer);
|
|
294
|
-
reject(err);
|
|
295
|
-
});
|
|
296
|
-
});
|
|
297
|
-
this.ws.on("message", (raw) => {
|
|
298
|
-
let data;
|
|
299
|
-
try {
|
|
300
|
-
data = JSON.parse(raw.toString());
|
|
301
|
-
} catch {
|
|
302
|
-
return;
|
|
303
|
-
}
|
|
304
|
-
if (data.type === "Metadata" && data.request_id) {
|
|
305
|
-
this.requestId = data.request_id;
|
|
306
|
-
return;
|
|
307
|
-
}
|
|
308
|
-
if (data.type !== "Results") return;
|
|
309
|
-
const alternatives = data.channel?.alternatives ?? [];
|
|
310
|
-
if (!alternatives.length) return;
|
|
311
|
-
const best = alternatives[0];
|
|
312
|
-
const text = (best.transcript ?? "").trim();
|
|
313
|
-
if (!text) return;
|
|
314
|
-
const transcript = {
|
|
315
|
-
text,
|
|
316
|
-
isFinal: Boolean(data.is_final) && Boolean(data.speech_final),
|
|
317
|
-
confidence: best.confidence ?? 0
|
|
318
|
-
};
|
|
319
|
-
for (const cb of this.callbacks) {
|
|
320
|
-
cb(transcript);
|
|
321
|
-
}
|
|
322
|
-
});
|
|
323
|
-
}
|
|
324
|
-
sendAudio(audio) {
|
|
325
|
-
if (!this.ws || this.ws.readyState !== WebSocket3.OPEN) return;
|
|
326
|
-
this.ws.send(audio);
|
|
327
|
-
}
|
|
328
|
-
onTranscript(callback) {
|
|
329
|
-
if (this.callbacks.length >= 10) {
|
|
330
|
-
getLogger().warn("DeepgramSTT: maximum of 10 onTranscript callbacks reached; replacing the last callback.");
|
|
331
|
-
this.callbacks[this.callbacks.length - 1] = callback;
|
|
332
|
-
return;
|
|
333
|
-
}
|
|
334
|
-
this.callbacks.push(callback);
|
|
335
|
-
}
|
|
336
|
-
close() {
|
|
337
|
-
if (this.ws) {
|
|
338
|
-
try {
|
|
339
|
-
this.ws.send(JSON.stringify({ type: "CloseStream" }));
|
|
340
|
-
} catch {
|
|
341
|
-
}
|
|
342
|
-
this.ws.close();
|
|
343
|
-
this.ws = null;
|
|
344
|
-
}
|
|
345
|
-
}
|
|
346
|
-
};
|
|
347
|
-
|
|
348
|
-
// src/providers/whisper-stt.ts
|
|
349
|
-
var OPENAI_TRANSCRIPTION_URL = "https://api.openai.com/v1/audio/transcriptions";
|
|
350
|
-
var DEFAULT_BUFFER_SIZE = 16e3 * 2;
|
|
351
|
-
function wrapPcmInWav(pcm, sampleRate = 16e3, channels = 1, bitsPerSample = 16) {
|
|
352
|
-
const dataSize = pcm.length;
|
|
353
|
-
const header = Buffer.alloc(44);
|
|
354
|
-
header.write("RIFF", 0);
|
|
355
|
-
header.writeUInt32LE(36 + dataSize, 4);
|
|
356
|
-
header.write("WAVE", 8);
|
|
357
|
-
header.write("fmt ", 12);
|
|
358
|
-
header.writeUInt32LE(16, 16);
|
|
359
|
-
header.writeUInt16LE(1, 20);
|
|
360
|
-
header.writeUInt16LE(channels, 22);
|
|
361
|
-
header.writeUInt32LE(sampleRate, 24);
|
|
362
|
-
header.writeUInt32LE(sampleRate * channels * (bitsPerSample / 8), 28);
|
|
363
|
-
header.writeUInt16LE(channels * (bitsPerSample / 8), 32);
|
|
364
|
-
header.writeUInt16LE(bitsPerSample, 34);
|
|
365
|
-
header.write("data", 36);
|
|
366
|
-
header.writeUInt32LE(dataSize, 40);
|
|
367
|
-
return Buffer.concat([header, pcm]);
|
|
250
|
+
// src/provider-factory.ts
|
|
251
|
+
async function createSTT(agent) {
|
|
252
|
+
return agent.stt ?? null;
|
|
253
|
+
}
|
|
254
|
+
async function createTTS(agent) {
|
|
255
|
+
return agent.tts ?? null;
|
|
368
256
|
}
|
|
369
|
-
var WhisperSTT = class _WhisperSTT {
|
|
370
|
-
apiKey;
|
|
371
|
-
model;
|
|
372
|
-
language;
|
|
373
|
-
bufferSize;
|
|
374
|
-
buffer = Buffer.alloc(0);
|
|
375
|
-
callbacks = [];
|
|
376
|
-
running = false;
|
|
377
|
-
pendingTranscriptions = [];
|
|
378
|
-
constructor(apiKey, model = "whisper-1", language, bufferSize = DEFAULT_BUFFER_SIZE) {
|
|
379
|
-
this.apiKey = apiKey;
|
|
380
|
-
this.model = model;
|
|
381
|
-
this.language = language;
|
|
382
|
-
this.bufferSize = bufferSize;
|
|
383
|
-
}
|
|
384
|
-
/** Factory for Twilio calls — mulaw 8 kHz is transcoded upstream, so we still receive PCM 16-bit. */
|
|
385
|
-
static forTwilio(apiKey, language = "en", model = "whisper-1") {
|
|
386
|
-
return new _WhisperSTT(apiKey, model, language);
|
|
387
|
-
}
|
|
388
|
-
async connect() {
|
|
389
|
-
this.running = true;
|
|
390
|
-
this.buffer = Buffer.alloc(0);
|
|
391
|
-
}
|
|
392
|
-
sendAudio(audio) {
|
|
393
|
-
if (!this.running) return;
|
|
394
|
-
this.buffer = Buffer.concat([this.buffer, audio]);
|
|
395
|
-
if (this.buffer.length >= this.bufferSize) {
|
|
396
|
-
const pcm = this.buffer;
|
|
397
|
-
this.buffer = Buffer.alloc(0);
|
|
398
|
-
this.trackTranscription(this.transcribeBuffer(pcm));
|
|
399
|
-
}
|
|
400
|
-
}
|
|
401
|
-
trackTranscription(promise) {
|
|
402
|
-
const wrapped = promise.finally(() => {
|
|
403
|
-
const idx = this.pendingTranscriptions.indexOf(wrapped);
|
|
404
|
-
if (idx !== -1) this.pendingTranscriptions.splice(idx, 1);
|
|
405
|
-
});
|
|
406
|
-
this.pendingTranscriptions.push(wrapped);
|
|
407
|
-
}
|
|
408
|
-
onTranscript(callback) {
|
|
409
|
-
if (this.callbacks.length >= 10) {
|
|
410
|
-
getLogger().warn("WhisperSTT: maximum of 10 onTranscript callbacks reached; replacing the last callback.");
|
|
411
|
-
this.callbacks[this.callbacks.length - 1] = callback;
|
|
412
|
-
return;
|
|
413
|
-
}
|
|
414
|
-
this.callbacks.push(callback);
|
|
415
|
-
}
|
|
416
|
-
async close() {
|
|
417
|
-
this.running = false;
|
|
418
|
-
if (this.buffer.length >= this.bufferSize / 4) {
|
|
419
|
-
const pcm = this.buffer;
|
|
420
|
-
this.buffer = Buffer.alloc(0);
|
|
421
|
-
this.trackTranscription(this.transcribeBuffer(pcm));
|
|
422
|
-
} else {
|
|
423
|
-
this.buffer = Buffer.alloc(0);
|
|
424
|
-
}
|
|
425
|
-
await Promise.allSettled(this.pendingTranscriptions);
|
|
426
|
-
this.callbacks = [];
|
|
427
|
-
}
|
|
428
|
-
// ------------------------------------------------------------------
|
|
429
|
-
// Private
|
|
430
|
-
// ------------------------------------------------------------------
|
|
431
|
-
async transcribeBuffer(pcm) {
|
|
432
|
-
const wav = wrapPcmInWav(pcm);
|
|
433
|
-
const formData = new FormData();
|
|
434
|
-
formData.append("file", new Blob([wav.buffer.slice(wav.byteOffset, wav.byteOffset + wav.byteLength)], { type: "audio/wav" }), "audio.wav");
|
|
435
|
-
formData.append("model", this.model);
|
|
436
|
-
if (this.language) {
|
|
437
|
-
formData.append("language", this.language);
|
|
438
|
-
}
|
|
439
|
-
try {
|
|
440
|
-
const resp = await fetch(OPENAI_TRANSCRIPTION_URL, {
|
|
441
|
-
method: "POST",
|
|
442
|
-
headers: { Authorization: `Bearer ${this.apiKey}` },
|
|
443
|
-
body: formData,
|
|
444
|
-
signal: AbortSignal.timeout(15e3)
|
|
445
|
-
});
|
|
446
|
-
if (!resp.ok) {
|
|
447
|
-
const body = await resp.text();
|
|
448
|
-
getLogger().error(`WhisperSTT transcription error: ${resp.status} ${body}`);
|
|
449
|
-
return;
|
|
450
|
-
}
|
|
451
|
-
const json = await resp.json();
|
|
452
|
-
const text = (json.text ?? "").trim();
|
|
453
|
-
if (!text) return;
|
|
454
|
-
const transcript = {
|
|
455
|
-
text,
|
|
456
|
-
isFinal: true,
|
|
457
|
-
confidence: 1
|
|
458
|
-
};
|
|
459
|
-
for (const cb of this.callbacks) {
|
|
460
|
-
cb(transcript);
|
|
461
|
-
}
|
|
462
|
-
} catch (err) {
|
|
463
|
-
getLogger().error(`WhisperSTT transcription error: ${String(err)}`);
|
|
464
|
-
}
|
|
465
|
-
}
|
|
466
|
-
};
|
|
467
257
|
|
|
468
258
|
// src/pricing.ts
|
|
469
259
|
var DEFAULT_PRICING = {
|
|
@@ -534,9 +324,15 @@ var MetricsStore = class extends EventEmitter {
|
|
|
534
324
|
maxCalls;
|
|
535
325
|
calls = [];
|
|
536
326
|
activeCalls = /* @__PURE__ */ new Map();
|
|
537
|
-
|
|
327
|
+
/**
|
|
328
|
+
* Accepts either a numeric ``maxCalls`` (legacy positional — matches the
|
|
329
|
+
* original TS API) or an options object ``{ maxCalls }`` to align with the
|
|
330
|
+
* Python SDK's keyword-argument style. Plain literals also work:
|
|
331
|
+
* ``new MetricsStore()`` / ``new MetricsStore(100)`` / ``new MetricsStore({ maxCalls: 100 })``.
|
|
332
|
+
*/
|
|
333
|
+
constructor(maxCallsOrOpts = 500) {
|
|
538
334
|
super();
|
|
539
|
-
this.maxCalls = maxCalls;
|
|
335
|
+
this.maxCalls = typeof maxCallsOrOpts === "number" ? maxCallsOrOpts : maxCallsOrOpts.maxCalls ?? 500;
|
|
540
336
|
}
|
|
541
337
|
publish(eventType, data) {
|
|
542
338
|
this.emit("sse", { type: eventType, data });
|
|
@@ -544,22 +340,100 @@ var MetricsStore = class extends EventEmitter {
|
|
|
544
340
|
recordCallStart(data) {
|
|
545
341
|
const callId = data.call_id || "";
|
|
546
342
|
if (!callId) return;
|
|
343
|
+
const existing = this.activeCalls.get(callId);
|
|
344
|
+
if (existing) {
|
|
345
|
+
existing.caller = data.caller || existing.caller;
|
|
346
|
+
existing.callee = data.callee || existing.callee;
|
|
347
|
+
existing.direction = data.direction || existing.direction;
|
|
348
|
+
existing.status = "in-progress";
|
|
349
|
+
existing.turns = existing.turns || [];
|
|
350
|
+
} else {
|
|
351
|
+
const record = {
|
|
352
|
+
call_id: callId,
|
|
353
|
+
caller: data.caller || "",
|
|
354
|
+
callee: data.callee || "",
|
|
355
|
+
direction: data.direction || "inbound",
|
|
356
|
+
started_at: Date.now() / 1e3,
|
|
357
|
+
status: "in-progress",
|
|
358
|
+
turns: []
|
|
359
|
+
};
|
|
360
|
+
this.activeCalls.set(callId, record);
|
|
361
|
+
}
|
|
362
|
+
this.publish("call_start", {
|
|
363
|
+
call_id: callId,
|
|
364
|
+
caller: data.caller || "",
|
|
365
|
+
callee: data.callee || "",
|
|
366
|
+
direction: data.direction || "inbound"
|
|
367
|
+
});
|
|
368
|
+
}
|
|
369
|
+
/**
|
|
370
|
+
* Pre-register an outbound call before any webhook fires. Lets the
|
|
371
|
+
* dashboard surface attempts that never reach media (no-answer, busy,
|
|
372
|
+
* carrier-rejected). Mirrors the Python ``record_call_initiated``.
|
|
373
|
+
*/
|
|
374
|
+
recordCallInitiated(data) {
|
|
375
|
+
const callId = data.call_id || "";
|
|
376
|
+
if (!callId) return;
|
|
377
|
+
if (this.activeCalls.has(callId)) return;
|
|
547
378
|
const record = {
|
|
548
379
|
call_id: callId,
|
|
549
380
|
caller: data.caller || "",
|
|
550
381
|
callee: data.callee || "",
|
|
551
|
-
direction: data.direction || "
|
|
382
|
+
direction: data.direction || "outbound",
|
|
552
383
|
started_at: Date.now() / 1e3,
|
|
384
|
+
status: "initiated",
|
|
553
385
|
turns: []
|
|
554
386
|
};
|
|
555
387
|
this.activeCalls.set(callId, record);
|
|
556
|
-
this.publish("
|
|
388
|
+
this.publish("call_initiated", {
|
|
557
389
|
call_id: callId,
|
|
558
390
|
caller: record.caller,
|
|
559
391
|
callee: record.callee,
|
|
560
|
-
direction: record.direction
|
|
392
|
+
direction: record.direction,
|
|
393
|
+
status: record.status
|
|
561
394
|
});
|
|
562
395
|
}
|
|
396
|
+
/**
|
|
397
|
+
* Update the status of an active or completed call. Terminal states
|
|
398
|
+
* (completed, no-answer, busy, failed, canceled, webhook_error) move the
|
|
399
|
+
* row from active to completed so the UI freezes the live duration timer.
|
|
400
|
+
*/
|
|
401
|
+
updateCallStatus(callId, status, extra = {}) {
|
|
402
|
+
if (!callId || !status) return;
|
|
403
|
+
const TERMINAL = /* @__PURE__ */ new Set(["completed", "no-answer", "busy", "failed", "canceled", "webhook_error"]);
|
|
404
|
+
const active = this.activeCalls.get(callId);
|
|
405
|
+
if (active) {
|
|
406
|
+
active.status = status;
|
|
407
|
+
Object.assign(active, extra);
|
|
408
|
+
if (TERMINAL.has(status)) {
|
|
409
|
+
const entry = {
|
|
410
|
+
call_id: callId,
|
|
411
|
+
caller: active.caller || "",
|
|
412
|
+
callee: active.callee || "",
|
|
413
|
+
direction: active.direction || "outbound",
|
|
414
|
+
started_at: active.started_at || 0,
|
|
415
|
+
ended_at: Date.now() / 1e3,
|
|
416
|
+
status,
|
|
417
|
+
metrics: null,
|
|
418
|
+
...extra
|
|
419
|
+
};
|
|
420
|
+
this.activeCalls.delete(callId);
|
|
421
|
+
this.calls.push(entry);
|
|
422
|
+
if (this.calls.length > this.maxCalls) {
|
|
423
|
+
this.calls = this.calls.slice(-this.maxCalls);
|
|
424
|
+
}
|
|
425
|
+
}
|
|
426
|
+
} else {
|
|
427
|
+
for (let i = this.calls.length - 1; i >= 0; i--) {
|
|
428
|
+
if (this.calls[i].call_id === callId) {
|
|
429
|
+
this.calls[i].status = status;
|
|
430
|
+
Object.assign(this.calls[i], extra);
|
|
431
|
+
break;
|
|
432
|
+
}
|
|
433
|
+
}
|
|
434
|
+
}
|
|
435
|
+
this.publish("call_status", { call_id: callId, status, ...extra });
|
|
436
|
+
}
|
|
563
437
|
recordTurn(data) {
|
|
564
438
|
const callId = data.call_id || "";
|
|
565
439
|
const turn = data.turn;
|
|
@@ -576,6 +450,8 @@ var MetricsStore = class extends EventEmitter {
|
|
|
576
450
|
if (!callId) return;
|
|
577
451
|
const active = this.activeCalls.get(callId);
|
|
578
452
|
this.activeCalls.delete(callId);
|
|
453
|
+
const activeStatus = active?.status;
|
|
454
|
+
const resolvedStatus = activeStatus && activeStatus !== "in-progress" ? activeStatus : "completed";
|
|
579
455
|
const entry = {
|
|
580
456
|
call_id: callId,
|
|
581
457
|
caller: data.caller || active?.caller || "",
|
|
@@ -584,6 +460,7 @@ var MetricsStore = class extends EventEmitter {
|
|
|
584
460
|
started_at: active?.started_at || 0,
|
|
585
461
|
ended_at: Date.now() / 1e3,
|
|
586
462
|
transcript: data.transcript || [],
|
|
463
|
+
status: resolvedStatus,
|
|
587
464
|
metrics: metrics ?? null
|
|
588
465
|
};
|
|
589
466
|
this.calls.push(entry);
|
|
@@ -1749,161 +1626,120 @@ function isWebSocketUrl(url) {
|
|
|
1749
1626
|
return url.startsWith("ws://") || url.startsWith("wss://");
|
|
1750
1627
|
}
|
|
1751
1628
|
|
|
1752
|
-
// src/providers/
|
|
1753
|
-
|
|
1754
|
-
var
|
|
1755
|
-
|
|
1629
|
+
// src/providers/deepgram-stt.ts
|
|
1630
|
+
import WebSocket3 from "ws";
|
|
1631
|
+
var DEEPGRAM_WS_URL = "wss://api.deepgram.com/v1/listen";
|
|
1632
|
+
var DeepgramSTT = class _DeepgramSTT {
|
|
1633
|
+
ws = null;
|
|
1634
|
+
callbacks = [];
|
|
1635
|
+
/** Request ID from Deepgram — used to query actual cost post-call. */
|
|
1636
|
+
requestId = "";
|
|
1637
|
+
apiKey;
|
|
1638
|
+
language;
|
|
1639
|
+
model;
|
|
1640
|
+
encoding;
|
|
1641
|
+
sampleRate;
|
|
1642
|
+
endpointingMs;
|
|
1643
|
+
utteranceEndMs;
|
|
1644
|
+
smartFormat;
|
|
1645
|
+
interimResults;
|
|
1646
|
+
vadEvents;
|
|
1647
|
+
constructor(apiKey, languageOrOptions, model, encoding, sampleRate, options) {
|
|
1756
1648
|
this.apiKey = apiKey;
|
|
1757
|
-
|
|
1758
|
-
this.
|
|
1759
|
-
this.
|
|
1760
|
-
|
|
1761
|
-
|
|
1762
|
-
|
|
1763
|
-
|
|
1764
|
-
|
|
1765
|
-
|
|
1766
|
-
|
|
1767
|
-
|
|
1768
|
-
|
|
1769
|
-
|
|
1770
|
-
|
|
1771
|
-
return Buffer.concat(chunks);
|
|
1649
|
+
const opts = typeof languageOrOptions === "object" && languageOrOptions !== null ? languageOrOptions : options ?? {};
|
|
1650
|
+
this.language = (typeof languageOrOptions === "string" ? languageOrOptions : opts.language) ?? "en";
|
|
1651
|
+
this.model = model ?? opts.model ?? "nova-3";
|
|
1652
|
+
this.encoding = encoding ?? opts.encoding ?? "linear16";
|
|
1653
|
+
this.sampleRate = sampleRate ?? opts.sampleRate ?? 16e3;
|
|
1654
|
+
this.endpointingMs = opts.endpointingMs ?? 150;
|
|
1655
|
+
this.utteranceEndMs = opts.utteranceEndMs === null ? null : opts.utteranceEndMs ?? 1e3;
|
|
1656
|
+
this.smartFormat = opts.smartFormat ?? true;
|
|
1657
|
+
this.interimResults = opts.interimResults ?? true;
|
|
1658
|
+
this.vadEvents = opts.vadEvents ?? true;
|
|
1659
|
+
}
|
|
1660
|
+
/** Factory for Twilio calls — mulaw 8 kHz. Forwards tuning options through. */
|
|
1661
|
+
static forTwilio(apiKey, language = "en", model = "nova-3", options = {}) {
|
|
1662
|
+
return new _DeepgramSTT(apiKey, language, model, "mulaw", 8e3, options);
|
|
1772
1663
|
}
|
|
1773
|
-
|
|
1774
|
-
|
|
1775
|
-
|
|
1776
|
-
|
|
1777
|
-
|
|
1778
|
-
|
|
1779
|
-
|
|
1780
|
-
|
|
1781
|
-
|
|
1782
|
-
|
|
1783
|
-
|
|
1784
|
-
|
|
1785
|
-
"Content-Type": "application/json"
|
|
1786
|
-
},
|
|
1787
|
-
body: JSON.stringify({ text, model_id: this.modelId }),
|
|
1788
|
-
signal: AbortSignal.timeout(3e4)
|
|
1664
|
+
async connect() {
|
|
1665
|
+
const params = new URLSearchParams({
|
|
1666
|
+
model: this.model,
|
|
1667
|
+
language: this.language,
|
|
1668
|
+
encoding: this.encoding,
|
|
1669
|
+
sample_rate: String(this.sampleRate),
|
|
1670
|
+
channels: "1",
|
|
1671
|
+
interim_results: this.interimResults ? "true" : "false",
|
|
1672
|
+
endpointing: String(this.endpointingMs),
|
|
1673
|
+
smart_format: this.smartFormat ? "true" : "false",
|
|
1674
|
+
vad_events: this.vadEvents ? "true" : "false",
|
|
1675
|
+
no_delay: "true"
|
|
1789
1676
|
});
|
|
1790
|
-
if (
|
|
1791
|
-
|
|
1792
|
-
throw new Error(`ElevenLabs TTS error ${response.status}: ${body}`);
|
|
1793
|
-
}
|
|
1794
|
-
if (!response.body) {
|
|
1795
|
-
throw new Error("ElevenLabs TTS: no response body");
|
|
1677
|
+
if (this.utteranceEndMs !== null) {
|
|
1678
|
+
params.set("utterance_end_ms", String(Math.max(this.utteranceEndMs, 1e3)));
|
|
1796
1679
|
}
|
|
1797
|
-
const
|
|
1798
|
-
|
|
1799
|
-
|
|
1800
|
-
|
|
1801
|
-
|
|
1802
|
-
|
|
1803
|
-
|
|
1804
|
-
|
|
1805
|
-
|
|
1806
|
-
|
|
1807
|
-
|
|
1680
|
+
const url = `${DEEPGRAM_WS_URL}?${params.toString()}`;
|
|
1681
|
+
this.ws = new WebSocket3(url, {
|
|
1682
|
+
headers: { Authorization: `Token ${this.apiKey}` }
|
|
1683
|
+
});
|
|
1684
|
+
await new Promise((resolve, reject) => {
|
|
1685
|
+
const timer = setTimeout(() => reject(new Error("Deepgram connect timeout")), 1e4);
|
|
1686
|
+
this.ws.once("open", () => {
|
|
1687
|
+
clearTimeout(timer);
|
|
1688
|
+
resolve();
|
|
1689
|
+
});
|
|
1690
|
+
this.ws.once("error", (err) => {
|
|
1691
|
+
clearTimeout(timer);
|
|
1692
|
+
reject(err);
|
|
1808
1693
|
});
|
|
1809
|
-
reader.releaseLock();
|
|
1810
|
-
}
|
|
1811
|
-
}
|
|
1812
|
-
};
|
|
1813
|
-
|
|
1814
|
-
// src/providers/openai-tts.ts
|
|
1815
|
-
var OPENAI_TTS_URL = "https://api.openai.com/v1/audio/speech";
|
|
1816
|
-
var OpenAITTS = class _OpenAITTS {
|
|
1817
|
-
constructor(apiKey, voice = "alloy", model = "tts-1") {
|
|
1818
|
-
this.apiKey = apiKey;
|
|
1819
|
-
this.voice = voice;
|
|
1820
|
-
this.model = model;
|
|
1821
|
-
}
|
|
1822
|
-
/**
|
|
1823
|
-
* Synthesise text to speech and return the full audio as a single Buffer.
|
|
1824
|
-
*
|
|
1825
|
-
* For large chunks (or when latency matters) call `synthesizeStream` instead.
|
|
1826
|
-
*/
|
|
1827
|
-
async synthesize(text) {
|
|
1828
|
-
const chunks = [];
|
|
1829
|
-
for await (const chunk of this.synthesizeStream(text)) {
|
|
1830
|
-
chunks.push(chunk);
|
|
1831
|
-
}
|
|
1832
|
-
return Buffer.concat(chunks);
|
|
1833
|
-
}
|
|
1834
|
-
/**
|
|
1835
|
-
* Synthesise text and yield audio chunks as they arrive (streaming).
|
|
1836
|
-
*
|
|
1837
|
-
* OpenAI returns 24 kHz PCM16; each chunk is resampled to 16 kHz before
|
|
1838
|
-
* yielding so the output is ready for telephony pipelines.
|
|
1839
|
-
*/
|
|
1840
|
-
async *synthesizeStream(text) {
|
|
1841
|
-
const response = await fetch(OPENAI_TTS_URL, {
|
|
1842
|
-
method: "POST",
|
|
1843
|
-
headers: {
|
|
1844
|
-
"Authorization": `Bearer ${this.apiKey}`,
|
|
1845
|
-
"Content-Type": "application/json"
|
|
1846
|
-
},
|
|
1847
|
-
body: JSON.stringify({
|
|
1848
|
-
model: this.model,
|
|
1849
|
-
input: text,
|
|
1850
|
-
voice: this.voice,
|
|
1851
|
-
response_format: "pcm"
|
|
1852
|
-
}),
|
|
1853
|
-
signal: AbortSignal.timeout(3e4)
|
|
1854
1694
|
});
|
|
1855
|
-
|
|
1856
|
-
|
|
1857
|
-
|
|
1858
|
-
|
|
1859
|
-
|
|
1860
|
-
|
|
1861
|
-
}
|
|
1862
|
-
const reader = response.body.getReader();
|
|
1863
|
-
try {
|
|
1864
|
-
while (true) {
|
|
1865
|
-
const { done, value } = await reader.read();
|
|
1866
|
-
if (done) break;
|
|
1867
|
-
if (value && value.length > 0) {
|
|
1868
|
-
yield _OpenAITTS.resample24kTo16k(Buffer.from(value));
|
|
1869
|
-
}
|
|
1695
|
+
this.ws.on("message", (raw) => {
|
|
1696
|
+
let data;
|
|
1697
|
+
try {
|
|
1698
|
+
data = JSON.parse(raw.toString());
|
|
1699
|
+
} catch {
|
|
1700
|
+
return;
|
|
1870
1701
|
}
|
|
1871
|
-
|
|
1872
|
-
|
|
1873
|
-
|
|
1874
|
-
|
|
1702
|
+
if (data.type === "Metadata" && data.request_id) {
|
|
1703
|
+
this.requestId = data.request_id;
|
|
1704
|
+
return;
|
|
1705
|
+
}
|
|
1706
|
+
if (data.type !== "Results") return;
|
|
1707
|
+
const alternatives = data.channel?.alternatives ?? [];
|
|
1708
|
+
if (!alternatives.length) return;
|
|
1709
|
+
const best = alternatives[0];
|
|
1710
|
+
const text = (best.transcript ?? "").trim();
|
|
1711
|
+
if (!text) return;
|
|
1712
|
+
const transcript = {
|
|
1713
|
+
text,
|
|
1714
|
+
isFinal: Boolean(data.is_final) || Boolean(data.speech_final),
|
|
1715
|
+
confidence: best.confidence ?? 0
|
|
1716
|
+
};
|
|
1717
|
+
for (const cb of this.callbacks) {
|
|
1718
|
+
cb(transcript);
|
|
1719
|
+
}
|
|
1720
|
+
});
|
|
1721
|
+
}
|
|
1722
|
+
sendAudio(audio) {
|
|
1723
|
+
if (!this.ws || this.ws.readyState !== WebSocket3.OPEN) return;
|
|
1724
|
+
this.ws.send(audio);
|
|
1725
|
+
}
|
|
1726
|
+
onTranscript(callback) {
|
|
1727
|
+
if (this.callbacks.length >= 10) {
|
|
1728
|
+
getLogger().warn("DeepgramSTT: maximum of 10 onTranscript callbacks reached; replacing the last callback.");
|
|
1729
|
+
this.callbacks[this.callbacks.length - 1] = callback;
|
|
1730
|
+
return;
|
|
1875
1731
|
}
|
|
1732
|
+
this.callbacks.push(callback);
|
|
1876
1733
|
}
|
|
1877
|
-
|
|
1878
|
-
|
|
1879
|
-
|
|
1880
|
-
|
|
1881
|
-
|
|
1882
|
-
* Python SDK implementation.
|
|
1883
|
-
*/
|
|
1884
|
-
static resample24kTo16k(audio) {
|
|
1885
|
-
if (audio.length < 2) return audio;
|
|
1886
|
-
const sampleCount = Math.floor(audio.length / 2);
|
|
1887
|
-
const samples = new Int16Array(sampleCount);
|
|
1888
|
-
for (let i = 0; i < sampleCount; i++) {
|
|
1889
|
-
samples[i] = audio.readInt16LE(i * 2);
|
|
1890
|
-
}
|
|
1891
|
-
const resampled = [];
|
|
1892
|
-
for (let i = 0; i < samples.length; i += 3) {
|
|
1893
|
-
resampled.push(samples[i]);
|
|
1894
|
-
if (i + 1 < samples.length) {
|
|
1895
|
-
if (i + 2 < samples.length) {
|
|
1896
|
-
resampled.push(Math.trunc((samples[i + 1] + samples[i + 2]) / 2));
|
|
1897
|
-
} else {
|
|
1898
|
-
resampled.push(samples[i + 1]);
|
|
1899
|
-
}
|
|
1734
|
+
close() {
|
|
1735
|
+
if (this.ws) {
|
|
1736
|
+
try {
|
|
1737
|
+
this.ws.send(JSON.stringify({ type: "CloseStream" }));
|
|
1738
|
+
} catch {
|
|
1900
1739
|
}
|
|
1740
|
+
this.ws.close();
|
|
1741
|
+
this.ws = null;
|
|
1901
1742
|
}
|
|
1902
|
-
const out = Buffer.alloc(resampled.length * 2);
|
|
1903
|
-
for (let i = 0; i < resampled.length; i++) {
|
|
1904
|
-
out.writeInt16LE(resampled[i], i * 2);
|
|
1905
|
-
}
|
|
1906
|
-
return out;
|
|
1907
1743
|
}
|
|
1908
1744
|
};
|
|
1909
1745
|
|
|
@@ -2493,6 +2329,9 @@ var StreamHandler = class {
|
|
|
2493
2329
|
maxDurationTimer = null;
|
|
2494
2330
|
transcriptProcessing = false;
|
|
2495
2331
|
transcriptQueue = [];
|
|
2332
|
+
// BUG #22 throttle state — mirror Python impl.
|
|
2333
|
+
lastCommitText = "";
|
|
2334
|
+
lastCommitAt = 0;
|
|
2496
2335
|
history;
|
|
2497
2336
|
metricsAcc;
|
|
2498
2337
|
constructor(deps, ws, caller, callee) {
|
|
@@ -2501,8 +2340,8 @@ var StreamHandler = class {
|
|
|
2501
2340
|
this.caller = caller;
|
|
2502
2341
|
this.callee = callee;
|
|
2503
2342
|
this.history = createHistoryManager(200);
|
|
2504
|
-
const sttProviderName = deps.agent.stt
|
|
2505
|
-
const ttsProviderName = deps.agent.tts
|
|
2343
|
+
const sttProviderName = deps.agent.stt ? deps.agent.stt.constructor?.name ?? "custom" : void 0;
|
|
2344
|
+
const ttsProviderName = deps.agent.tts ? deps.agent.tts.constructor?.name ?? "custom" : void 0;
|
|
2506
2345
|
const providerMode = deps.agent.provider ?? "openai_realtime";
|
|
2507
2346
|
this.metricsAcc = new CallMetricsAccumulator({
|
|
2508
2347
|
callId: "",
|
|
@@ -2603,15 +2442,23 @@ var StreamHandler = class {
|
|
|
2603
2442
|
this.streamSid = sid;
|
|
2604
2443
|
}
|
|
2605
2444
|
/** Handle an incoming audio chunk (already decoded from base64). */
|
|
2606
|
-
handleAudio(audioBuffer) {
|
|
2445
|
+
async handleAudio(audioBuffer) {
|
|
2607
2446
|
const provider = this.deps.agent.provider ?? "openai_realtime";
|
|
2608
|
-
if (provider === "pipeline" && this.stt
|
|
2609
|
-
if (this.deps.
|
|
2610
|
-
|
|
2611
|
-
|
|
2612
|
-
|
|
2447
|
+
if (provider === "pipeline" && this.stt) {
|
|
2448
|
+
if (this.isSpeaking && (this.deps.agent.bargeInThresholdMs ?? 300) === 0) {
|
|
2449
|
+
return;
|
|
2450
|
+
}
|
|
2451
|
+
const pcm8k = mulawToPcm16(audioBuffer);
|
|
2452
|
+
const pcm16k = resample8kTo16k(pcm8k);
|
|
2453
|
+
const hooks = this.deps.agent.hooks;
|
|
2454
|
+
if (hooks) {
|
|
2455
|
+
const hookExecutor = new PipelineHookExecutor(hooks);
|
|
2456
|
+
const hookCtx = this.buildHookContext();
|
|
2457
|
+
const processed = await hookExecutor.runBeforeSendToStt(pcm16k, hookCtx);
|
|
2458
|
+
if (processed === null) return;
|
|
2459
|
+
this.stt.sendAudio(processed);
|
|
2613
2460
|
} else {
|
|
2614
|
-
this.stt.sendAudio(
|
|
2461
|
+
this.stt.sendAudio(pcm16k);
|
|
2615
2462
|
}
|
|
2616
2463
|
} else if (this.adapter) {
|
|
2617
2464
|
if (this.adapter instanceof ElevenLabsConvAIAdapter && this.deps.bridge.telephonyProvider === "twilio") {
|
|
@@ -2684,18 +2531,8 @@ var StreamHandler = class {
|
|
|
2684
2531
|
// ---------------------------------------------------------------------------
|
|
2685
2532
|
async initPipeline(resolvedPrompt) {
|
|
2686
2533
|
const label = this.deps.bridge.label;
|
|
2687
|
-
this.stt = this.deps.bridge.createStt(this.deps.agent);
|
|
2688
|
-
|
|
2689
|
-
if (this.deps.agent.tts.provider === "elevenlabs") {
|
|
2690
|
-
this.tts = new ElevenLabsTTS(this.deps.agent.tts.apiKey, this.deps.agent.tts.voice ?? "21m00Tcm4TlvDq8ikWAM");
|
|
2691
|
-
}
|
|
2692
|
-
if (this.deps.agent.tts.provider === "openai") {
|
|
2693
|
-
this.tts = new OpenAITTS(this.deps.agent.tts.apiKey, this.deps.agent.tts.voice ?? "alloy");
|
|
2694
|
-
}
|
|
2695
|
-
} else if (this.deps.agent.elevenlabsKey) {
|
|
2696
|
-
const voiceId = this.deps.agent.voice && this.deps.agent.voice !== "alloy" ? this.deps.agent.voice : "21m00Tcm4TlvDq8ikWAM";
|
|
2697
|
-
this.tts = new ElevenLabsTTS(this.deps.agent.elevenlabsKey, voiceId);
|
|
2698
|
-
}
|
|
2534
|
+
this.stt = await this.deps.bridge.createStt(this.deps.agent);
|
|
2535
|
+
this.tts = await createTTS(this.deps.agent);
|
|
2699
2536
|
if (!this.stt) {
|
|
2700
2537
|
getLogger().info(`Pipeline mode (${label}): no STT configured`);
|
|
2701
2538
|
}
|
|
@@ -2806,7 +2643,59 @@ var StreamHandler = class {
|
|
|
2806
2643
|
}
|
|
2807
2644
|
}
|
|
2808
2645
|
async processTranscript(transcript) {
|
|
2646
|
+
if (transcript.text && this.isSpeaking) {
|
|
2647
|
+
getLogger().info(
|
|
2648
|
+
`Barge-in: caller spoke over agent (${sanitizeLogValue(transcript.text.slice(0, 40))})`
|
|
2649
|
+
);
|
|
2650
|
+
this.isSpeaking = false;
|
|
2651
|
+
try {
|
|
2652
|
+
this.deps.bridge.sendClear(this.ws, this.streamSid);
|
|
2653
|
+
} catch (err) {
|
|
2654
|
+
getLogger().debug(`sendClear during barge-in failed: ${String(err)}`);
|
|
2655
|
+
}
|
|
2656
|
+
this.metricsAcc.recordTurnInterrupted();
|
|
2657
|
+
}
|
|
2809
2658
|
if (!transcript.isFinal || !transcript.text) return;
|
|
2659
|
+
const now = Date.now();
|
|
2660
|
+
const normalised = transcript.text.trim().toLowerCase();
|
|
2661
|
+
const stripped = normalised.replace(/[.,!?;: ]+$/, "").trim();
|
|
2662
|
+
const sinceLastMs = now - this.lastCommitAt;
|
|
2663
|
+
const HALLUCINATIONS = /* @__PURE__ */ new Set([
|
|
2664
|
+
"you",
|
|
2665
|
+
"thank you",
|
|
2666
|
+
"thanks",
|
|
2667
|
+
"yeah",
|
|
2668
|
+
"yes",
|
|
2669
|
+
"no",
|
|
2670
|
+
"okay",
|
|
2671
|
+
"ok",
|
|
2672
|
+
"uh",
|
|
2673
|
+
"um",
|
|
2674
|
+
"mmm",
|
|
2675
|
+
"hmm",
|
|
2676
|
+
".",
|
|
2677
|
+
"bye",
|
|
2678
|
+
"right",
|
|
2679
|
+
"cool"
|
|
2680
|
+
]);
|
|
2681
|
+
if (HALLUCINATIONS.has(stripped) || stripped === "") {
|
|
2682
|
+
getLogger().info(`Dropped likely STT hallucination: ${sanitizeLogValue(normalised.slice(0, 40))}`);
|
|
2683
|
+
return;
|
|
2684
|
+
}
|
|
2685
|
+
if (sinceLastMs < 2e3 && normalised === this.lastCommitText) {
|
|
2686
|
+
getLogger().info(
|
|
2687
|
+
`Dropped duplicate final transcript (${(sinceLastMs / 1e3).toFixed(1)}s since last): ${sanitizeLogValue(normalised.slice(0, 40))}`
|
|
2688
|
+
);
|
|
2689
|
+
return;
|
|
2690
|
+
}
|
|
2691
|
+
if (sinceLastMs < 500) {
|
|
2692
|
+
getLogger().info(
|
|
2693
|
+
`Dropped back-to-back final transcript (${(sinceLastMs / 1e3).toFixed(2)}s since last): ${sanitizeLogValue(normalised.slice(0, 40))}`
|
|
2694
|
+
);
|
|
2695
|
+
return;
|
|
2696
|
+
}
|
|
2697
|
+
this.lastCommitText = normalised;
|
|
2698
|
+
this.lastCommitAt = now;
|
|
2810
2699
|
const label = this.deps.bridge.label;
|
|
2811
2700
|
getLogger().info(`User (${label} pipeline): ${sanitizeLogValue(transcript.text)}`);
|
|
2812
2701
|
this.metricsAcc.startTurn();
|
|
@@ -3161,10 +3050,11 @@ var StreamHandler = class {
|
|
|
3161
3050
|
this.maxDurationTimer = null;
|
|
3162
3051
|
}
|
|
3163
3052
|
await this.deps.bridge.queryTelephonyCost(this.metricsAcc, this.callId);
|
|
3164
|
-
|
|
3165
|
-
|
|
3166
|
-
|
|
3167
|
-
|
|
3053
|
+
if (this.stt instanceof DeepgramSTT && this.stt.requestId) {
|
|
3054
|
+
const dgKey = this.stt.apiKey;
|
|
3055
|
+
if (dgKey) {
|
|
3056
|
+
await queryDeepgramCost(this.metricsAcc, dgKey, this.stt.requestId);
|
|
3057
|
+
}
|
|
3168
3058
|
}
|
|
3169
3059
|
const finalMetrics = this.metricsAcc.endCall();
|
|
3170
3060
|
const callEndData = {
|
|
@@ -3321,11 +3211,16 @@ function resolveVariables(template, variables) {
|
|
|
3321
3211
|
return result;
|
|
3322
3212
|
}
|
|
3323
3213
|
function buildAIAdapter(config, agent, resolvedPrompt) {
|
|
3214
|
+
const engine = agent.engine;
|
|
3324
3215
|
if (agent.provider === "elevenlabs_convai") {
|
|
3325
|
-
|
|
3216
|
+
if (!engine || engine.kind !== "elevenlabs_convai") {
|
|
3217
|
+
throw new Error(
|
|
3218
|
+
"ElevenLabs ConvAI mode requires `agent.engine = new ElevenLabsConvAI({...})`."
|
|
3219
|
+
);
|
|
3220
|
+
}
|
|
3326
3221
|
return new ElevenLabsConvAIAdapter(
|
|
3327
|
-
|
|
3328
|
-
|
|
3222
|
+
engine.apiKey,
|
|
3223
|
+
engine.agentId,
|
|
3329
3224
|
agent.voice ?? "21m00Tcm4TlvDq8ikWAM",
|
|
3330
3225
|
"eleven_turbo_v2_5",
|
|
3331
3226
|
agent.language ?? "en",
|
|
@@ -3338,8 +3233,9 @@ function buildAIAdapter(config, agent, resolvedPrompt) {
|
|
|
3338
3233
|
parameters: t.parameters
|
|
3339
3234
|
})) ?? [];
|
|
3340
3235
|
const tools = [...agentTools, TRANSFER_CALL_TOOL, END_CALL_TOOL];
|
|
3236
|
+
const openaiKey = engine && engine.kind === "openai_realtime" ? engine.apiKey : config.openaiKey ?? "";
|
|
3341
3237
|
return new OpenAIRealtimeAdapter(
|
|
3342
|
-
|
|
3238
|
+
openaiKey,
|
|
3343
3239
|
agent.model,
|
|
3344
3240
|
agent.voice,
|
|
3345
3241
|
resolvedPrompt ?? agent.systemPrompt,
|
|
@@ -3397,16 +3293,7 @@ var TwilioBridge = class {
|
|
|
3397
3293
|
}
|
|
3398
3294
|
}
|
|
3399
3295
|
createStt(agent) {
|
|
3400
|
-
|
|
3401
|
-
if (agent.stt.provider === "deepgram") {
|
|
3402
|
-
return DeepgramSTT.forTwilio(agent.stt.apiKey, agent.stt.language ?? "en");
|
|
3403
|
-
} else if (agent.stt.provider === "whisper") {
|
|
3404
|
-
return WhisperSTT.forTwilio(agent.stt.apiKey, agent.stt.language ?? "en");
|
|
3405
|
-
}
|
|
3406
|
-
} else if (agent.deepgramKey) {
|
|
3407
|
-
return DeepgramSTT.forTwilio(agent.deepgramKey, agent.language ?? "en");
|
|
3408
|
-
}
|
|
3409
|
-
return null;
|
|
3296
|
+
return createSTT(agent);
|
|
3410
3297
|
}
|
|
3411
3298
|
async queryTelephonyCost(metricsAcc, callId) {
|
|
3412
3299
|
if (this.config.twilioSid && this.config.twilioToken && callId) {
|
|
@@ -3454,12 +3341,12 @@ var TelnyxBridge = class {
|
|
|
3454
3341
|
label = "Telnyx";
|
|
3455
3342
|
telephonyProvider = "telnyx";
|
|
3456
3343
|
sendAudio(ws, audioBase64, _streamSid) {
|
|
3457
|
-
ws.send(JSON.stringify({
|
|
3344
|
+
ws.send(JSON.stringify({ event: "media", media: { payload: audioBase64 } }));
|
|
3458
3345
|
}
|
|
3459
3346
|
sendMark(_ws, _markName, _streamSid) {
|
|
3460
3347
|
}
|
|
3461
3348
|
sendClear(ws, _streamSid) {
|
|
3462
|
-
ws.send(JSON.stringify({
|
|
3349
|
+
ws.send(JSON.stringify({ event: "clear" }));
|
|
3463
3350
|
}
|
|
3464
3351
|
async transferCall(callId, toNumber) {
|
|
3465
3352
|
if (!isValidTelnyxTransferTarget(toNumber)) {
|
|
@@ -3553,16 +3440,7 @@ var TelnyxBridge = class {
|
|
|
3553
3440
|
ws.close();
|
|
3554
3441
|
}
|
|
3555
3442
|
createStt(agent) {
|
|
3556
|
-
|
|
3557
|
-
if (agent.stt.provider === "deepgram") {
|
|
3558
|
-
return new DeepgramSTT(agent.stt.apiKey, agent.stt.language ?? "en", "nova-3", "linear16", 16e3);
|
|
3559
|
-
} else if (agent.stt.provider === "whisper") {
|
|
3560
|
-
return new WhisperSTT(agent.stt.apiKey, "whisper-1", agent.stt.language ?? "en");
|
|
3561
|
-
}
|
|
3562
|
-
} else if (agent.deepgramKey) {
|
|
3563
|
-
return new DeepgramSTT(agent.deepgramKey, agent.language ?? "en", "nova-3", "linear16", 16e3);
|
|
3564
|
-
}
|
|
3565
|
-
return null;
|
|
3443
|
+
return createSTT(agent);
|
|
3566
3444
|
}
|
|
3567
3445
|
async queryTelephonyCost(metricsAcc, callId) {
|
|
3568
3446
|
if (this.config.telnyxKey && callId) {
|
|
@@ -3607,6 +3485,7 @@ var EmbeddedServer = class {
|
|
|
3607
3485
|
server = null;
|
|
3608
3486
|
wss = null;
|
|
3609
3487
|
twilioTokenWarningLogged = false;
|
|
3488
|
+
telnyxSigWarningLogged = false;
|
|
3610
3489
|
metricsStore;
|
|
3611
3490
|
pricing;
|
|
3612
3491
|
remoteHandler = new RemoteMessageHandler();
|
|
@@ -3654,6 +3533,31 @@ var EmbeddedServer = class {
|
|
|
3654
3533
|
mountApi(app, this.metricsStore, this.dashboardToken);
|
|
3655
3534
|
getLogger().info("Dashboard: http://127.0.0.1:" + port + "/");
|
|
3656
3535
|
}
|
|
3536
|
+
app.post("/webhooks/twilio/status", (req, res) => {
|
|
3537
|
+
if (this.config.twilioToken) {
|
|
3538
|
+
const signature = req.headers["x-twilio-signature"] || "";
|
|
3539
|
+
const url = `https://${this.config.webhookUrl}${req.originalUrl}`;
|
|
3540
|
+
const params = req.body ?? {};
|
|
3541
|
+
if (!validateTwilioSignature(url, params, signature, this.config.twilioToken)) {
|
|
3542
|
+
res.status(403).send("Invalid signature");
|
|
3543
|
+
return;
|
|
3544
|
+
}
|
|
3545
|
+
}
|
|
3546
|
+
const body = req.body;
|
|
3547
|
+
const callSid = sanitizeLogValue(body["CallSid"] ?? "");
|
|
3548
|
+
const callStatus = sanitizeLogValue(body["CallStatus"] ?? "");
|
|
3549
|
+
const duration = body["CallDuration"] ?? body["Duration"] ?? "";
|
|
3550
|
+
getLogger().info(
|
|
3551
|
+
`Twilio status ${callStatus} for call ${callSid} (duration=${duration})`
|
|
3552
|
+
);
|
|
3553
|
+
if (callSid && callStatus) {
|
|
3554
|
+
const extra = {};
|
|
3555
|
+
const parsed = parseFloat(duration);
|
|
3556
|
+
if (!Number.isNaN(parsed)) extra.duration_seconds = parsed;
|
|
3557
|
+
this.metricsStore.updateCallStatus(callSid, callStatus, extra);
|
|
3558
|
+
}
|
|
3559
|
+
res.status(204).send();
|
|
3560
|
+
});
|
|
3657
3561
|
app.post("/webhooks/twilio/recording", (req, res) => {
|
|
3658
3562
|
if (this.config.twilioToken) {
|
|
3659
3563
|
const signature = req.headers["x-twilio-signature"] || "";
|
|
@@ -3739,7 +3643,7 @@ var EmbeddedServer = class {
|
|
|
3739
3643
|
const twiml = `<?xml version="1.0" encoding="UTF-8"?><Response><Connect><Stream url="${xmlStreamUrl}"><Parameter name="caller" value="${xmlEscape(caller)}"/><Parameter name="callee" value="${xmlEscape(callee)}"/></Stream></Connect></Response>`;
|
|
3740
3644
|
res.type("text/xml").send(twiml);
|
|
3741
3645
|
});
|
|
3742
|
-
app.post("/webhooks/telnyx/voice", (req, res) => {
|
|
3646
|
+
app.post("/webhooks/telnyx/voice", async (req, res) => {
|
|
3743
3647
|
if (this.config.telnyxPublicKey) {
|
|
3744
3648
|
const rawBody = req.rawBody ?? "";
|
|
3745
3649
|
const signature = req.headers["telnyx-signature-ed25519"] ?? "";
|
|
@@ -3748,7 +3652,8 @@ var EmbeddedServer = class {
|
|
|
3748
3652
|
getLogger().warn("Telnyx webhook rejected: invalid or missing Ed25519 signature");
|
|
3749
3653
|
return res.status(403).send("Invalid signature");
|
|
3750
3654
|
}
|
|
3751
|
-
} else {
|
|
3655
|
+
} else if (!this.telnyxSigWarningLogged) {
|
|
3656
|
+
this.telnyxSigWarningLogged = true;
|
|
3752
3657
|
getLogger().warn("Telnyx webhook signature verification is disabled. Set telnyxPublicKey in LocalOptions for production use.");
|
|
3753
3658
|
}
|
|
3754
3659
|
const body = req.body;
|
|
@@ -3758,41 +3663,77 @@ var EmbeddedServer = class {
|
|
|
3758
3663
|
if (typeof body.data.event_type !== "string" || typeof body.data.payload !== "object" || body.data.payload === null) {
|
|
3759
3664
|
return res.status(400).send("Invalid body");
|
|
3760
3665
|
}
|
|
3761
|
-
const eventType = body
|
|
3666
|
+
const eventType = body.data.event_type ?? "";
|
|
3667
|
+
const payload = body.data.payload ?? {};
|
|
3762
3668
|
if (eventType === "call.dtmf.received") {
|
|
3763
|
-
const digit = String(
|
|
3669
|
+
const digit = String(payload.digit ?? "").trim();
|
|
3764
3670
|
if (digit) {
|
|
3765
3671
|
getLogger().info(`Telnyx DTMF received (webhook): ${sanitizeLogValue(digit)}`);
|
|
3766
3672
|
}
|
|
3767
|
-
return res.
|
|
3673
|
+
return res.status(200).send();
|
|
3768
3674
|
}
|
|
3769
3675
|
if (eventType === "call.recording.saved") {
|
|
3770
|
-
const recordingUrl =
|
|
3676
|
+
const recordingUrl = payload.recording_urls?.mp3 ?? payload.recording_urls?.wav ?? payload.public_recording_urls?.mp3 ?? "";
|
|
3771
3677
|
if (recordingUrl) {
|
|
3772
3678
|
getLogger().info(`Telnyx recording saved (webhook): ${sanitizeLogValue(recordingUrl)}`);
|
|
3773
3679
|
}
|
|
3774
|
-
return res.
|
|
3680
|
+
return res.status(200).send();
|
|
3775
3681
|
}
|
|
3776
|
-
|
|
3777
|
-
|
|
3778
|
-
|
|
3779
|
-
|
|
3780
|
-
|
|
3781
|
-
|
|
3782
|
-
|
|
3783
|
-
|
|
3784
|
-
|
|
3785
|
-
|
|
3786
|
-
|
|
3682
|
+
const callControlId = payload.call_control_id ?? "";
|
|
3683
|
+
if (!callControlId) {
|
|
3684
|
+
getLogger().warn("Telnyx webhook rejected: missing call_control_id");
|
|
3685
|
+
return res.status(400).send("Invalid webhook payload");
|
|
3686
|
+
}
|
|
3687
|
+
const apiKey = this.config.telnyxKey;
|
|
3688
|
+
if (!apiKey) {
|
|
3689
|
+
getLogger().warn("Telnyx webhook: missing telnyxKey in LocalOptions");
|
|
3690
|
+
return res.status(500).send("Missing Telnyx API key");
|
|
3691
|
+
}
|
|
3692
|
+
const apiBase = "https://api.telnyx.com/v2";
|
|
3693
|
+
const authHeaders = {
|
|
3694
|
+
"Content-Type": "application/json",
|
|
3695
|
+
Authorization: `Bearer ${apiKey}`
|
|
3696
|
+
};
|
|
3697
|
+
try {
|
|
3698
|
+
if (eventType === "call.initiated") {
|
|
3699
|
+
getLogger().info(`Telnyx call.initiated ${callControlId} \u2014 answering`);
|
|
3700
|
+
const resp = await fetch(`${apiBase}/calls/${encodeURIComponent(callControlId)}/actions/answer`, {
|
|
3701
|
+
method: "POST",
|
|
3702
|
+
headers: authHeaders,
|
|
3703
|
+
body: JSON.stringify({}),
|
|
3704
|
+
signal: AbortSignal.timeout(1e4)
|
|
3705
|
+
});
|
|
3706
|
+
if (!resp.ok) {
|
|
3707
|
+
getLogger().warn(`Telnyx answer failed: ${resp.status} ${(await resp.text()).slice(0, 200)}`);
|
|
3708
|
+
}
|
|
3709
|
+
} else if (eventType === "call.answered") {
|
|
3710
|
+
const caller = payload.from ?? "";
|
|
3711
|
+
const callee = payload.to ?? "";
|
|
3712
|
+
const streamUrl = `wss://${this.config.webhookUrl}/ws/stream/${encodeURIComponent(callControlId)}?caller=${encodeURIComponent(caller)}&callee=${encodeURIComponent(callee)}`;
|
|
3713
|
+
getLogger().info(`Telnyx call.answered ${callControlId} \u2014 starting stream`);
|
|
3714
|
+
const resp = await fetch(`${apiBase}/calls/${encodeURIComponent(callControlId)}/actions/streaming_start`, {
|
|
3715
|
+
method: "POST",
|
|
3716
|
+
headers: authHeaders,
|
|
3717
|
+
body: JSON.stringify({
|
|
3787
3718
|
stream_url: streamUrl,
|
|
3788
|
-
stream_track: "both_tracks"
|
|
3789
|
-
|
|
3719
|
+
stream_track: "both_tracks",
|
|
3720
|
+
stream_bidirectional_mode: "rtp",
|
|
3721
|
+
stream_bidirectional_codec: "PCMU",
|
|
3722
|
+
stream_bidirectional_sampling_rate: 8e3,
|
|
3723
|
+
stream_bidirectional_target_legs: "self"
|
|
3724
|
+
}),
|
|
3725
|
+
signal: AbortSignal.timeout(1e4)
|
|
3726
|
+
});
|
|
3727
|
+
if (!resp.ok) {
|
|
3728
|
+
getLogger().warn(`Telnyx streaming_start failed: ${resp.status} ${(await resp.text()).slice(0, 200)}`);
|
|
3790
3729
|
}
|
|
3791
|
-
|
|
3792
|
-
|
|
3793
|
-
|
|
3794
|
-
|
|
3730
|
+
} else {
|
|
3731
|
+
getLogger().debug(`Telnyx event ignored: ${eventType}`);
|
|
3732
|
+
}
|
|
3733
|
+
} catch (e) {
|
|
3734
|
+
getLogger().error(`Telnyx webhook handler error: ${String(e)}`);
|
|
3795
3735
|
}
|
|
3736
|
+
return res.status(200).send();
|
|
3796
3737
|
});
|
|
3797
3738
|
this.server = createServer(app);
|
|
3798
3739
|
this.wss = new WebSocketServer({ noServer: true });
|
|
@@ -3939,11 +3880,12 @@ Connect AI agents to phone numbers in 4 lines of code
|
|
|
3939
3880
|
getLogger().error("Failed to parse Telnyx WS message:", e);
|
|
3940
3881
|
return;
|
|
3941
3882
|
}
|
|
3942
|
-
const
|
|
3943
|
-
|
|
3944
|
-
|
|
3883
|
+
const event = data.event ?? "";
|
|
3884
|
+
if (event === "connected") return;
|
|
3885
|
+
getLogger().info(`Telnyx event: ${event}`);
|
|
3886
|
+
if (event === "start" && !streamStarted) {
|
|
3945
3887
|
streamStarted = true;
|
|
3946
|
-
const callControlId = data.
|
|
3888
|
+
const callControlId = data.start?.call_control_id ?? "";
|
|
3947
3889
|
if (callControlId) this.activeCallIds.set(ws, callControlId);
|
|
3948
3890
|
await handler.handleCallStart(callControlId);
|
|
3949
3891
|
if (this.recording) {
|
|
@@ -3953,22 +3895,21 @@ Connect AI agents to phone numbers in 4 lines of code
|
|
|
3953
3895
|
getLogger().warn(`Could not start recording: ${String(e)}`);
|
|
3954
3896
|
}
|
|
3955
3897
|
}
|
|
3956
|
-
} else if (
|
|
3957
|
-
const
|
|
3898
|
+
} else if (event === "media") {
|
|
3899
|
+
const track = data.media?.track ?? "inbound";
|
|
3900
|
+
if (track !== "inbound") return;
|
|
3901
|
+
const audioChunk = data.media?.payload ?? "";
|
|
3958
3902
|
if (!audioChunk) return;
|
|
3959
3903
|
handler.handleAudio(Buffer.from(audioChunk, "base64"));
|
|
3960
|
-
} else if (
|
|
3961
|
-
const digit = String(data.
|
|
3904
|
+
} else if (event === "dtmf") {
|
|
3905
|
+
const digit = String(data.dtmf?.digit ?? "").trim();
|
|
3962
3906
|
if (digit) {
|
|
3963
3907
|
getLogger().info(`Telnyx DTMF received: ${digit}`);
|
|
3964
3908
|
await handler.handleDtmf(digit);
|
|
3965
3909
|
}
|
|
3966
|
-
} else if (
|
|
3967
|
-
|
|
3968
|
-
|
|
3969
|
-
getLogger().info(`Telnyx recording saved: ${recordingUrl}`);
|
|
3970
|
-
}
|
|
3971
|
-
} else if (eventType === "stream_stopped") {
|
|
3910
|
+
} else if (event === "error") {
|
|
3911
|
+
getLogger().warn(`Telnyx stream error: ${JSON.stringify(data)}`);
|
|
3912
|
+
} else if (event === "stop") {
|
|
3972
3913
|
await handler.handleStop();
|
|
3973
3914
|
}
|
|
3974
3915
|
} catch (err) {
|
|
@@ -4437,8 +4378,6 @@ var TestSession = class {
|
|
|
4437
4378
|
export {
|
|
4438
4379
|
OpenAIRealtimeAdapter,
|
|
4439
4380
|
ElevenLabsConvAIAdapter,
|
|
4440
|
-
DeepgramSTT,
|
|
4441
|
-
WhisperSTT,
|
|
4442
4381
|
DEFAULT_PRICING,
|
|
4443
4382
|
mergePricing,
|
|
4444
4383
|
calculateSttCost,
|
|
@@ -4454,8 +4393,7 @@ export {
|
|
|
4454
4393
|
RemoteMessageHandler,
|
|
4455
4394
|
isRemoteUrl,
|
|
4456
4395
|
isWebSocketUrl,
|
|
4457
|
-
|
|
4458
|
-
OpenAITTS,
|
|
4396
|
+
DeepgramSTT,
|
|
4459
4397
|
CallMetricsAccumulator,
|
|
4460
4398
|
mulawToPcm16,
|
|
4461
4399
|
pcm16ToMulaw,
|