getpatter 0.4.3 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -247,223 +247,13 @@ var ElevenLabsConvAIAdapter = class {
247
247
  }
248
248
  };
249
249
 
250
- // src/providers/deepgram-stt.ts
251
- import WebSocket3 from "ws";
252
- var DEEPGRAM_WS_URL = "wss://api.deepgram.com/v1/listen";
253
- var DeepgramSTT = class _DeepgramSTT {
254
- constructor(apiKey, language = "en", model = "nova-3", encoding = "linear16", sampleRate = 16e3) {
255
- this.apiKey = apiKey;
256
- this.language = language;
257
- this.model = model;
258
- this.encoding = encoding;
259
- this.sampleRate = sampleRate;
260
- }
261
- ws = null;
262
- callbacks = [];
263
- /** Request ID from Deepgram — used to query actual cost post-call. */
264
- requestId = "";
265
- /** Factory for Twilio calls — mulaw 8 kHz. */
266
- static forTwilio(apiKey, language = "en", model = "nova-3") {
267
- return new _DeepgramSTT(apiKey, language, model, "mulaw", 8e3);
268
- }
269
- async connect() {
270
- const params = new URLSearchParams({
271
- model: this.model,
272
- language: this.language,
273
- encoding: this.encoding,
274
- sample_rate: String(this.sampleRate),
275
- channels: "1",
276
- interim_results: "true",
277
- endpointing: "300",
278
- smart_format: "true",
279
- vad_events: "true",
280
- no_delay: "true"
281
- });
282
- const url = `${DEEPGRAM_WS_URL}?${params.toString()}`;
283
- this.ws = new WebSocket3(url, {
284
- headers: { Authorization: `Token ${this.apiKey}` }
285
- });
286
- await new Promise((resolve, reject) => {
287
- const timer = setTimeout(() => reject(new Error("Deepgram connect timeout")), 1e4);
288
- this.ws.once("open", () => {
289
- clearTimeout(timer);
290
- resolve();
291
- });
292
- this.ws.once("error", (err) => {
293
- clearTimeout(timer);
294
- reject(err);
295
- });
296
- });
297
- this.ws.on("message", (raw) => {
298
- let data;
299
- try {
300
- data = JSON.parse(raw.toString());
301
- } catch {
302
- return;
303
- }
304
- if (data.type === "Metadata" && data.request_id) {
305
- this.requestId = data.request_id;
306
- return;
307
- }
308
- if (data.type !== "Results") return;
309
- const alternatives = data.channel?.alternatives ?? [];
310
- if (!alternatives.length) return;
311
- const best = alternatives[0];
312
- const text = (best.transcript ?? "").trim();
313
- if (!text) return;
314
- const transcript = {
315
- text,
316
- isFinal: Boolean(data.is_final) && Boolean(data.speech_final),
317
- confidence: best.confidence ?? 0
318
- };
319
- for (const cb of this.callbacks) {
320
- cb(transcript);
321
- }
322
- });
323
- }
324
- sendAudio(audio) {
325
- if (!this.ws || this.ws.readyState !== WebSocket3.OPEN) return;
326
- this.ws.send(audio);
327
- }
328
- onTranscript(callback) {
329
- if (this.callbacks.length >= 10) {
330
- getLogger().warn("DeepgramSTT: maximum of 10 onTranscript callbacks reached; replacing the last callback.");
331
- this.callbacks[this.callbacks.length - 1] = callback;
332
- return;
333
- }
334
- this.callbacks.push(callback);
335
- }
336
- close() {
337
- if (this.ws) {
338
- try {
339
- this.ws.send(JSON.stringify({ type: "CloseStream" }));
340
- } catch {
341
- }
342
- this.ws.close();
343
- this.ws = null;
344
- }
345
- }
346
- };
347
-
348
- // src/providers/whisper-stt.ts
349
- var OPENAI_TRANSCRIPTION_URL = "https://api.openai.com/v1/audio/transcriptions";
350
- var DEFAULT_BUFFER_SIZE = 16e3 * 2;
351
- function wrapPcmInWav(pcm, sampleRate = 16e3, channels = 1, bitsPerSample = 16) {
352
- const dataSize = pcm.length;
353
- const header = Buffer.alloc(44);
354
- header.write("RIFF", 0);
355
- header.writeUInt32LE(36 + dataSize, 4);
356
- header.write("WAVE", 8);
357
- header.write("fmt ", 12);
358
- header.writeUInt32LE(16, 16);
359
- header.writeUInt16LE(1, 20);
360
- header.writeUInt16LE(channels, 22);
361
- header.writeUInt32LE(sampleRate, 24);
362
- header.writeUInt32LE(sampleRate * channels * (bitsPerSample / 8), 28);
363
- header.writeUInt16LE(channels * (bitsPerSample / 8), 32);
364
- header.writeUInt16LE(bitsPerSample, 34);
365
- header.write("data", 36);
366
- header.writeUInt32LE(dataSize, 40);
367
- return Buffer.concat([header, pcm]);
250
+ // src/provider-factory.ts
251
+ async function createSTT(agent) {
252
+ return agent.stt ?? null;
253
+ }
254
+ async function createTTS(agent) {
255
+ return agent.tts ?? null;
368
256
  }
369
- var WhisperSTT = class _WhisperSTT {
370
- apiKey;
371
- model;
372
- language;
373
- bufferSize;
374
- buffer = Buffer.alloc(0);
375
- callbacks = [];
376
- running = false;
377
- pendingTranscriptions = [];
378
- constructor(apiKey, model = "whisper-1", language, bufferSize = DEFAULT_BUFFER_SIZE) {
379
- this.apiKey = apiKey;
380
- this.model = model;
381
- this.language = language;
382
- this.bufferSize = bufferSize;
383
- }
384
- /** Factory for Twilio calls — mulaw 8 kHz is transcoded upstream, so we still receive PCM 16-bit. */
385
- static forTwilio(apiKey, language = "en", model = "whisper-1") {
386
- return new _WhisperSTT(apiKey, model, language);
387
- }
388
- async connect() {
389
- this.running = true;
390
- this.buffer = Buffer.alloc(0);
391
- }
392
- sendAudio(audio) {
393
- if (!this.running) return;
394
- this.buffer = Buffer.concat([this.buffer, audio]);
395
- if (this.buffer.length >= this.bufferSize) {
396
- const pcm = this.buffer;
397
- this.buffer = Buffer.alloc(0);
398
- this.trackTranscription(this.transcribeBuffer(pcm));
399
- }
400
- }
401
- trackTranscription(promise) {
402
- const wrapped = promise.finally(() => {
403
- const idx = this.pendingTranscriptions.indexOf(wrapped);
404
- if (idx !== -1) this.pendingTranscriptions.splice(idx, 1);
405
- });
406
- this.pendingTranscriptions.push(wrapped);
407
- }
408
- onTranscript(callback) {
409
- if (this.callbacks.length >= 10) {
410
- getLogger().warn("WhisperSTT: maximum of 10 onTranscript callbacks reached; replacing the last callback.");
411
- this.callbacks[this.callbacks.length - 1] = callback;
412
- return;
413
- }
414
- this.callbacks.push(callback);
415
- }
416
- async close() {
417
- this.running = false;
418
- if (this.buffer.length >= this.bufferSize / 4) {
419
- const pcm = this.buffer;
420
- this.buffer = Buffer.alloc(0);
421
- this.trackTranscription(this.transcribeBuffer(pcm));
422
- } else {
423
- this.buffer = Buffer.alloc(0);
424
- }
425
- await Promise.allSettled(this.pendingTranscriptions);
426
- this.callbacks = [];
427
- }
428
- // ------------------------------------------------------------------
429
- // Private
430
- // ------------------------------------------------------------------
431
- async transcribeBuffer(pcm) {
432
- const wav = wrapPcmInWav(pcm);
433
- const formData = new FormData();
434
- formData.append("file", new Blob([wav.buffer.slice(wav.byteOffset, wav.byteOffset + wav.byteLength)], { type: "audio/wav" }), "audio.wav");
435
- formData.append("model", this.model);
436
- if (this.language) {
437
- formData.append("language", this.language);
438
- }
439
- try {
440
- const resp = await fetch(OPENAI_TRANSCRIPTION_URL, {
441
- method: "POST",
442
- headers: { Authorization: `Bearer ${this.apiKey}` },
443
- body: formData,
444
- signal: AbortSignal.timeout(15e3)
445
- });
446
- if (!resp.ok) {
447
- const body = await resp.text();
448
- getLogger().error(`WhisperSTT transcription error: ${resp.status} ${body}`);
449
- return;
450
- }
451
- const json = await resp.json();
452
- const text = (json.text ?? "").trim();
453
- if (!text) return;
454
- const transcript = {
455
- text,
456
- isFinal: true,
457
- confidence: 1
458
- };
459
- for (const cb of this.callbacks) {
460
- cb(transcript);
461
- }
462
- } catch (err) {
463
- getLogger().error(`WhisperSTT transcription error: ${String(err)}`);
464
- }
465
- }
466
- };
467
257
 
468
258
  // src/pricing.ts
469
259
  var DEFAULT_PRICING = {
@@ -534,9 +324,15 @@ var MetricsStore = class extends EventEmitter {
534
324
  maxCalls;
535
325
  calls = [];
536
326
  activeCalls = /* @__PURE__ */ new Map();
537
- constructor(maxCalls = 500) {
327
+ /**
328
+ * Accepts either a numeric ``maxCalls`` (legacy positional — matches the
329
+ * original TS API) or an options object ``{ maxCalls }`` to align with the
330
+ * Python SDK's keyword-argument style. Plain literals also work:
331
+ * ``new MetricsStore()`` / ``new MetricsStore(100)`` / ``new MetricsStore({ maxCalls: 100 })``.
332
+ */
333
+ constructor(maxCallsOrOpts = 500) {
538
334
  super();
539
- this.maxCalls = maxCalls;
335
+ this.maxCalls = typeof maxCallsOrOpts === "number" ? maxCallsOrOpts : maxCallsOrOpts.maxCalls ?? 500;
540
336
  }
541
337
  publish(eventType, data) {
542
338
  this.emit("sse", { type: eventType, data });
@@ -544,22 +340,100 @@ var MetricsStore = class extends EventEmitter {
544
340
  recordCallStart(data) {
545
341
  const callId = data.call_id || "";
546
342
  if (!callId) return;
343
+ const existing = this.activeCalls.get(callId);
344
+ if (existing) {
345
+ existing.caller = data.caller || existing.caller;
346
+ existing.callee = data.callee || existing.callee;
347
+ existing.direction = data.direction || existing.direction;
348
+ existing.status = "in-progress";
349
+ existing.turns = existing.turns || [];
350
+ } else {
351
+ const record = {
352
+ call_id: callId,
353
+ caller: data.caller || "",
354
+ callee: data.callee || "",
355
+ direction: data.direction || "inbound",
356
+ started_at: Date.now() / 1e3,
357
+ status: "in-progress",
358
+ turns: []
359
+ };
360
+ this.activeCalls.set(callId, record);
361
+ }
362
+ this.publish("call_start", {
363
+ call_id: callId,
364
+ caller: data.caller || "",
365
+ callee: data.callee || "",
366
+ direction: data.direction || "inbound"
367
+ });
368
+ }
369
+ /**
370
+ * Pre-register an outbound call before any webhook fires. Lets the
371
+ * dashboard surface attempts that never reach media (no-answer, busy,
372
+ * carrier-rejected). Mirrors the Python ``record_call_initiated``.
373
+ */
374
+ recordCallInitiated(data) {
375
+ const callId = data.call_id || "";
376
+ if (!callId) return;
377
+ if (this.activeCalls.has(callId)) return;
547
378
  const record = {
548
379
  call_id: callId,
549
380
  caller: data.caller || "",
550
381
  callee: data.callee || "",
551
- direction: data.direction || "inbound",
382
+ direction: data.direction || "outbound",
552
383
  started_at: Date.now() / 1e3,
384
+ status: "initiated",
553
385
  turns: []
554
386
  };
555
387
  this.activeCalls.set(callId, record);
556
- this.publish("call_start", {
388
+ this.publish("call_initiated", {
557
389
  call_id: callId,
558
390
  caller: record.caller,
559
391
  callee: record.callee,
560
- direction: record.direction
392
+ direction: record.direction,
393
+ status: record.status
561
394
  });
562
395
  }
396
+ /**
397
+ * Update the status of an active or completed call. Terminal states
398
+ * (completed, no-answer, busy, failed, canceled, webhook_error) move the
399
+ * row from active to completed so the UI freezes the live duration timer.
400
+ */
401
+ updateCallStatus(callId, status, extra = {}) {
402
+ if (!callId || !status) return;
403
+ const TERMINAL = /* @__PURE__ */ new Set(["completed", "no-answer", "busy", "failed", "canceled", "webhook_error"]);
404
+ const active = this.activeCalls.get(callId);
405
+ if (active) {
406
+ active.status = status;
407
+ Object.assign(active, extra);
408
+ if (TERMINAL.has(status)) {
409
+ const entry = {
410
+ call_id: callId,
411
+ caller: active.caller || "",
412
+ callee: active.callee || "",
413
+ direction: active.direction || "outbound",
414
+ started_at: active.started_at || 0,
415
+ ended_at: Date.now() / 1e3,
416
+ status,
417
+ metrics: null,
418
+ ...extra
419
+ };
420
+ this.activeCalls.delete(callId);
421
+ this.calls.push(entry);
422
+ if (this.calls.length > this.maxCalls) {
423
+ this.calls = this.calls.slice(-this.maxCalls);
424
+ }
425
+ }
426
+ } else {
427
+ for (let i = this.calls.length - 1; i >= 0; i--) {
428
+ if (this.calls[i].call_id === callId) {
429
+ this.calls[i].status = status;
430
+ Object.assign(this.calls[i], extra);
431
+ break;
432
+ }
433
+ }
434
+ }
435
+ this.publish("call_status", { call_id: callId, status, ...extra });
436
+ }
563
437
  recordTurn(data) {
564
438
  const callId = data.call_id || "";
565
439
  const turn = data.turn;
@@ -576,6 +450,8 @@ var MetricsStore = class extends EventEmitter {
576
450
  if (!callId) return;
577
451
  const active = this.activeCalls.get(callId);
578
452
  this.activeCalls.delete(callId);
453
+ const activeStatus = active?.status;
454
+ const resolvedStatus = activeStatus && activeStatus !== "in-progress" ? activeStatus : "completed";
579
455
  const entry = {
580
456
  call_id: callId,
581
457
  caller: data.caller || active?.caller || "",
@@ -584,6 +460,7 @@ var MetricsStore = class extends EventEmitter {
584
460
  started_at: active?.started_at || 0,
585
461
  ended_at: Date.now() / 1e3,
586
462
  transcript: data.transcript || [],
463
+ status: resolvedStatus,
587
464
  metrics: metrics ?? null
588
465
  };
589
466
  this.calls.push(entry);
@@ -1749,161 +1626,120 @@ function isWebSocketUrl(url) {
1749
1626
  return url.startsWith("ws://") || url.startsWith("wss://");
1750
1627
  }
1751
1628
 
1752
- // src/providers/elevenlabs-tts.ts
1753
- var ELEVENLABS_BASE_URL = "https://api.elevenlabs.io/v1";
1754
- var ElevenLabsTTS = class {
1755
- constructor(apiKey, voiceId = "21m00Tcm4TlvDq8ikWAM", modelId = "eleven_turbo_v2_5", outputFormat = "pcm_16000") {
1629
+ // src/providers/deepgram-stt.ts
1630
+ import WebSocket3 from "ws";
1631
+ var DEEPGRAM_WS_URL = "wss://api.deepgram.com/v1/listen";
1632
+ var DeepgramSTT = class _DeepgramSTT {
1633
+ ws = null;
1634
+ callbacks = [];
1635
+ /** Request ID from Deepgram — used to query actual cost post-call. */
1636
+ requestId = "";
1637
+ apiKey;
1638
+ language;
1639
+ model;
1640
+ encoding;
1641
+ sampleRate;
1642
+ endpointingMs;
1643
+ utteranceEndMs;
1644
+ smartFormat;
1645
+ interimResults;
1646
+ vadEvents;
1647
+ constructor(apiKey, languageOrOptions, model, encoding, sampleRate, options) {
1756
1648
  this.apiKey = apiKey;
1757
- this.voiceId = voiceId;
1758
- this.modelId = modelId;
1759
- this.outputFormat = outputFormat;
1760
- }
1761
- /**
1762
- * Synthesise text to speech and return the full audio as a single Buffer.
1763
- *
1764
- * For large chunks (or when latency matters) call `synthesizeStream` instead.
1765
- */
1766
- async synthesize(text) {
1767
- const chunks = [];
1768
- for await (const chunk of this.synthesizeStream(text)) {
1769
- chunks.push(chunk);
1770
- }
1771
- return Buffer.concat(chunks);
1649
+ const opts = typeof languageOrOptions === "object" && languageOrOptions !== null ? languageOrOptions : options ?? {};
1650
+ this.language = (typeof languageOrOptions === "string" ? languageOrOptions : opts.language) ?? "en";
1651
+ this.model = model ?? opts.model ?? "nova-3";
1652
+ this.encoding = encoding ?? opts.encoding ?? "linear16";
1653
+ this.sampleRate = sampleRate ?? opts.sampleRate ?? 16e3;
1654
+ this.endpointingMs = opts.endpointingMs ?? 150;
1655
+ this.utteranceEndMs = opts.utteranceEndMs === null ? null : opts.utteranceEndMs ?? 1e3;
1656
+ this.smartFormat = opts.smartFormat ?? true;
1657
+ this.interimResults = opts.interimResults ?? true;
1658
+ this.vadEvents = opts.vadEvents ?? true;
1659
+ }
1660
+ /** Factory for Twilio calls mulaw 8 kHz. Forwards tuning options through. */
1661
+ static forTwilio(apiKey, language = "en", model = "nova-3", options = {}) {
1662
+ return new _DeepgramSTT(apiKey, language, model, "mulaw", 8e3, options);
1772
1663
  }
1773
- /**
1774
- * Synthesise text and yield audio chunks as they arrive (streaming).
1775
- *
1776
- * The yielded buffers are raw PCM at 16 kHz (or whatever `outputFormat` is
1777
- * configured to).
1778
- */
1779
- async *synthesizeStream(text) {
1780
- const url = `${ELEVENLABS_BASE_URL}/text-to-speech/${encodeURIComponent(this.voiceId)}/stream?output_format=${encodeURIComponent(this.outputFormat)}`;
1781
- const response = await fetch(url, {
1782
- method: "POST",
1783
- headers: {
1784
- "xi-api-key": this.apiKey,
1785
- "Content-Type": "application/json"
1786
- },
1787
- body: JSON.stringify({ text, model_id: this.modelId }),
1788
- signal: AbortSignal.timeout(3e4)
1664
+ async connect() {
1665
+ const params = new URLSearchParams({
1666
+ model: this.model,
1667
+ language: this.language,
1668
+ encoding: this.encoding,
1669
+ sample_rate: String(this.sampleRate),
1670
+ channels: "1",
1671
+ interim_results: this.interimResults ? "true" : "false",
1672
+ endpointing: String(this.endpointingMs),
1673
+ smart_format: this.smartFormat ? "true" : "false",
1674
+ vad_events: this.vadEvents ? "true" : "false",
1675
+ no_delay: "true"
1789
1676
  });
1790
- if (!response.ok) {
1791
- const body = await response.text();
1792
- throw new Error(`ElevenLabs TTS error ${response.status}: ${body}`);
1793
- }
1794
- if (!response.body) {
1795
- throw new Error("ElevenLabs TTS: no response body");
1677
+ if (this.utteranceEndMs !== null) {
1678
+ params.set("utterance_end_ms", String(Math.max(this.utteranceEndMs, 1e3)));
1796
1679
  }
1797
- const reader = response.body.getReader();
1798
- try {
1799
- while (true) {
1800
- const { done, value } = await reader.read();
1801
- if (done) break;
1802
- if (value && value.length > 0) {
1803
- yield Buffer.from(value);
1804
- }
1805
- }
1806
- } finally {
1807
- if (typeof reader.cancel === "function") await reader.cancel().catch(() => {
1680
+ const url = `${DEEPGRAM_WS_URL}?${params.toString()}`;
1681
+ this.ws = new WebSocket3(url, {
1682
+ headers: { Authorization: `Token ${this.apiKey}` }
1683
+ });
1684
+ await new Promise((resolve, reject) => {
1685
+ const timer = setTimeout(() => reject(new Error("Deepgram connect timeout")), 1e4);
1686
+ this.ws.once("open", () => {
1687
+ clearTimeout(timer);
1688
+ resolve();
1689
+ });
1690
+ this.ws.once("error", (err) => {
1691
+ clearTimeout(timer);
1692
+ reject(err);
1808
1693
  });
1809
- reader.releaseLock();
1810
- }
1811
- }
1812
- };
1813
-
1814
- // src/providers/openai-tts.ts
1815
- var OPENAI_TTS_URL = "https://api.openai.com/v1/audio/speech";
1816
- var OpenAITTS = class _OpenAITTS {
1817
- constructor(apiKey, voice = "alloy", model = "tts-1") {
1818
- this.apiKey = apiKey;
1819
- this.voice = voice;
1820
- this.model = model;
1821
- }
1822
- /**
1823
- * Synthesise text to speech and return the full audio as a single Buffer.
1824
- *
1825
- * For large chunks (or when latency matters) call `synthesizeStream` instead.
1826
- */
1827
- async synthesize(text) {
1828
- const chunks = [];
1829
- for await (const chunk of this.synthesizeStream(text)) {
1830
- chunks.push(chunk);
1831
- }
1832
- return Buffer.concat(chunks);
1833
- }
1834
- /**
1835
- * Synthesise text and yield audio chunks as they arrive (streaming).
1836
- *
1837
- * OpenAI returns 24 kHz PCM16; each chunk is resampled to 16 kHz before
1838
- * yielding so the output is ready for telephony pipelines.
1839
- */
1840
- async *synthesizeStream(text) {
1841
- const response = await fetch(OPENAI_TTS_URL, {
1842
- method: "POST",
1843
- headers: {
1844
- "Authorization": `Bearer ${this.apiKey}`,
1845
- "Content-Type": "application/json"
1846
- },
1847
- body: JSON.stringify({
1848
- model: this.model,
1849
- input: text,
1850
- voice: this.voice,
1851
- response_format: "pcm"
1852
- }),
1853
- signal: AbortSignal.timeout(3e4)
1854
1694
  });
1855
- if (!response.ok) {
1856
- const body = await response.text();
1857
- throw new Error(`OpenAI TTS error ${response.status}: ${body}`);
1858
- }
1859
- if (!response.body) {
1860
- throw new Error("OpenAI TTS: no response body");
1861
- }
1862
- const reader = response.body.getReader();
1863
- try {
1864
- while (true) {
1865
- const { done, value } = await reader.read();
1866
- if (done) break;
1867
- if (value && value.length > 0) {
1868
- yield _OpenAITTS.resample24kTo16k(Buffer.from(value));
1869
- }
1695
+ this.ws.on("message", (raw) => {
1696
+ let data;
1697
+ try {
1698
+ data = JSON.parse(raw.toString());
1699
+ } catch {
1700
+ return;
1870
1701
  }
1871
- } finally {
1872
- if (typeof reader.cancel === "function") await reader.cancel().catch(() => {
1873
- });
1874
- reader.releaseLock();
1702
+ if (data.type === "Metadata" && data.request_id) {
1703
+ this.requestId = data.request_id;
1704
+ return;
1705
+ }
1706
+ if (data.type !== "Results") return;
1707
+ const alternatives = data.channel?.alternatives ?? [];
1708
+ if (!alternatives.length) return;
1709
+ const best = alternatives[0];
1710
+ const text = (best.transcript ?? "").trim();
1711
+ if (!text) return;
1712
+ const transcript = {
1713
+ text,
1714
+ isFinal: Boolean(data.is_final) || Boolean(data.speech_final),
1715
+ confidence: best.confidence ?? 0
1716
+ };
1717
+ for (const cb of this.callbacks) {
1718
+ cb(transcript);
1719
+ }
1720
+ });
1721
+ }
1722
+ sendAudio(audio) {
1723
+ if (!this.ws || this.ws.readyState !== WebSocket3.OPEN) return;
1724
+ this.ws.send(audio);
1725
+ }
1726
+ onTranscript(callback) {
1727
+ if (this.callbacks.length >= 10) {
1728
+ getLogger().warn("DeepgramSTT: maximum of 10 onTranscript callbacks reached; replacing the last callback.");
1729
+ this.callbacks[this.callbacks.length - 1] = callback;
1730
+ return;
1875
1731
  }
1732
+ this.callbacks.push(callback);
1876
1733
  }
1877
- /**
1878
- * Resample 24 kHz PCM16-LE to 16 kHz by taking 2 out of every 3 samples.
1879
- *
1880
- * For each group of 3 input samples the first is kept as-is and the second
1881
- * output sample is the average of input samples 2 and 3. This matches the
1882
- * Python SDK implementation.
1883
- */
1884
- static resample24kTo16k(audio) {
1885
- if (audio.length < 2) return audio;
1886
- const sampleCount = Math.floor(audio.length / 2);
1887
- const samples = new Int16Array(sampleCount);
1888
- for (let i = 0; i < sampleCount; i++) {
1889
- samples[i] = audio.readInt16LE(i * 2);
1890
- }
1891
- const resampled = [];
1892
- for (let i = 0; i < samples.length; i += 3) {
1893
- resampled.push(samples[i]);
1894
- if (i + 1 < samples.length) {
1895
- if (i + 2 < samples.length) {
1896
- resampled.push(Math.trunc((samples[i + 1] + samples[i + 2]) / 2));
1897
- } else {
1898
- resampled.push(samples[i + 1]);
1899
- }
1734
+ close() {
1735
+ if (this.ws) {
1736
+ try {
1737
+ this.ws.send(JSON.stringify({ type: "CloseStream" }));
1738
+ } catch {
1900
1739
  }
1740
+ this.ws.close();
1741
+ this.ws = null;
1901
1742
  }
1902
- const out = Buffer.alloc(resampled.length * 2);
1903
- for (let i = 0; i < resampled.length; i++) {
1904
- out.writeInt16LE(resampled[i], i * 2);
1905
- }
1906
- return out;
1907
1743
  }
1908
1744
  };
1909
1745
 
@@ -2493,6 +2329,9 @@ var StreamHandler = class {
2493
2329
  maxDurationTimer = null;
2494
2330
  transcriptProcessing = false;
2495
2331
  transcriptQueue = [];
2332
+ // BUG #22 throttle state — mirror Python impl.
2333
+ lastCommitText = "";
2334
+ lastCommitAt = 0;
2496
2335
  history;
2497
2336
  metricsAcc;
2498
2337
  constructor(deps, ws, caller, callee) {
@@ -2501,8 +2340,8 @@ var StreamHandler = class {
2501
2340
  this.caller = caller;
2502
2341
  this.callee = callee;
2503
2342
  this.history = createHistoryManager(200);
2504
- const sttProviderName = deps.agent.stt?.provider || (deps.agent.deepgramKey ? "deepgram" : void 0);
2505
- const ttsProviderName = deps.agent.tts?.provider === "elevenlabs" ? "elevenlabs" : deps.agent.tts?.provider === "openai" ? "openai_tts" : deps.agent.elevenlabsKey ? "elevenlabs" : void 0;
2343
+ const sttProviderName = deps.agent.stt ? deps.agent.stt.constructor?.name ?? "custom" : void 0;
2344
+ const ttsProviderName = deps.agent.tts ? deps.agent.tts.constructor?.name ?? "custom" : void 0;
2506
2345
  const providerMode = deps.agent.provider ?? "openai_realtime";
2507
2346
  this.metricsAcc = new CallMetricsAccumulator({
2508
2347
  callId: "",
@@ -2603,15 +2442,23 @@ var StreamHandler = class {
2603
2442
  this.streamSid = sid;
2604
2443
  }
2605
2444
  /** Handle an incoming audio chunk (already decoded from base64). */
2606
- handleAudio(audioBuffer) {
2445
+ async handleAudio(audioBuffer) {
2607
2446
  const provider = this.deps.agent.provider ?? "openai_realtime";
2608
- if (provider === "pipeline" && this.stt && !this.isSpeaking) {
2609
- if (this.deps.bridge.telephonyProvider === "twilio") {
2610
- const pcm8k = mulawToPcm16(audioBuffer);
2611
- const pcm16k = resample8kTo16k(pcm8k);
2612
- this.stt.sendAudio(pcm16k);
2447
+ if (provider === "pipeline" && this.stt) {
2448
+ if (this.isSpeaking && (this.deps.agent.bargeInThresholdMs ?? 300) === 0) {
2449
+ return;
2450
+ }
2451
+ const pcm8k = mulawToPcm16(audioBuffer);
2452
+ const pcm16k = resample8kTo16k(pcm8k);
2453
+ const hooks = this.deps.agent.hooks;
2454
+ if (hooks) {
2455
+ const hookExecutor = new PipelineHookExecutor(hooks);
2456
+ const hookCtx = this.buildHookContext();
2457
+ const processed = await hookExecutor.runBeforeSendToStt(pcm16k, hookCtx);
2458
+ if (processed === null) return;
2459
+ this.stt.sendAudio(processed);
2613
2460
  } else {
2614
- this.stt.sendAudio(audioBuffer);
2461
+ this.stt.sendAudio(pcm16k);
2615
2462
  }
2616
2463
  } else if (this.adapter) {
2617
2464
  if (this.adapter instanceof ElevenLabsConvAIAdapter && this.deps.bridge.telephonyProvider === "twilio") {
@@ -2684,18 +2531,8 @@ var StreamHandler = class {
2684
2531
  // ---------------------------------------------------------------------------
2685
2532
  async initPipeline(resolvedPrompt) {
2686
2533
  const label = this.deps.bridge.label;
2687
- this.stt = this.deps.bridge.createStt(this.deps.agent);
2688
- if (this.deps.agent.tts) {
2689
- if (this.deps.agent.tts.provider === "elevenlabs") {
2690
- this.tts = new ElevenLabsTTS(this.deps.agent.tts.apiKey, this.deps.agent.tts.voice ?? "21m00Tcm4TlvDq8ikWAM");
2691
- }
2692
- if (this.deps.agent.tts.provider === "openai") {
2693
- this.tts = new OpenAITTS(this.deps.agent.tts.apiKey, this.deps.agent.tts.voice ?? "alloy");
2694
- }
2695
- } else if (this.deps.agent.elevenlabsKey) {
2696
- const voiceId = this.deps.agent.voice && this.deps.agent.voice !== "alloy" ? this.deps.agent.voice : "21m00Tcm4TlvDq8ikWAM";
2697
- this.tts = new ElevenLabsTTS(this.deps.agent.elevenlabsKey, voiceId);
2698
- }
2534
+ this.stt = await this.deps.bridge.createStt(this.deps.agent);
2535
+ this.tts = await createTTS(this.deps.agent);
2699
2536
  if (!this.stt) {
2700
2537
  getLogger().info(`Pipeline mode (${label}): no STT configured`);
2701
2538
  }
@@ -2806,7 +2643,59 @@ var StreamHandler = class {
2806
2643
  }
2807
2644
  }
2808
2645
  async processTranscript(transcript) {
2646
+ if (transcript.text && this.isSpeaking) {
2647
+ getLogger().info(
2648
+ `Barge-in: caller spoke over agent (${sanitizeLogValue(transcript.text.slice(0, 40))})`
2649
+ );
2650
+ this.isSpeaking = false;
2651
+ try {
2652
+ this.deps.bridge.sendClear(this.ws, this.streamSid);
2653
+ } catch (err) {
2654
+ getLogger().debug(`sendClear during barge-in failed: ${String(err)}`);
2655
+ }
2656
+ this.metricsAcc.recordTurnInterrupted();
2657
+ }
2809
2658
  if (!transcript.isFinal || !transcript.text) return;
2659
+ const now = Date.now();
2660
+ const normalised = transcript.text.trim().toLowerCase();
2661
+ const stripped = normalised.replace(/[.,!?;: ]+$/, "").trim();
2662
+ const sinceLastMs = now - this.lastCommitAt;
2663
+ const HALLUCINATIONS = /* @__PURE__ */ new Set([
2664
+ "you",
2665
+ "thank you",
2666
+ "thanks",
2667
+ "yeah",
2668
+ "yes",
2669
+ "no",
2670
+ "okay",
2671
+ "ok",
2672
+ "uh",
2673
+ "um",
2674
+ "mmm",
2675
+ "hmm",
2676
+ ".",
2677
+ "bye",
2678
+ "right",
2679
+ "cool"
2680
+ ]);
2681
+ if (HALLUCINATIONS.has(stripped) || stripped === "") {
2682
+ getLogger().info(`Dropped likely STT hallucination: ${sanitizeLogValue(normalised.slice(0, 40))}`);
2683
+ return;
2684
+ }
2685
+ if (sinceLastMs < 2e3 && normalised === this.lastCommitText) {
2686
+ getLogger().info(
2687
+ `Dropped duplicate final transcript (${(sinceLastMs / 1e3).toFixed(1)}s since last): ${sanitizeLogValue(normalised.slice(0, 40))}`
2688
+ );
2689
+ return;
2690
+ }
2691
+ if (sinceLastMs < 500) {
2692
+ getLogger().info(
2693
+ `Dropped back-to-back final transcript (${(sinceLastMs / 1e3).toFixed(2)}s since last): ${sanitizeLogValue(normalised.slice(0, 40))}`
2694
+ );
2695
+ return;
2696
+ }
2697
+ this.lastCommitText = normalised;
2698
+ this.lastCommitAt = now;
2810
2699
  const label = this.deps.bridge.label;
2811
2700
  getLogger().info(`User (${label} pipeline): ${sanitizeLogValue(transcript.text)}`);
2812
2701
  this.metricsAcc.startTurn();
@@ -3161,10 +3050,11 @@ var StreamHandler = class {
3161
3050
  this.maxDurationTimer = null;
3162
3051
  }
3163
3052
  await this.deps.bridge.queryTelephonyCost(this.metricsAcc, this.callId);
3164
- const deepgramKey = this.deps.agent.deepgramKey;
3165
- const deepgramRequestId = this.stt?.requestId;
3166
- if (deepgramKey && deepgramRequestId) {
3167
- await queryDeepgramCost(this.metricsAcc, deepgramKey, deepgramRequestId);
3053
+ if (this.stt instanceof DeepgramSTT && this.stt.requestId) {
3054
+ const dgKey = this.stt.apiKey;
3055
+ if (dgKey) {
3056
+ await queryDeepgramCost(this.metricsAcc, dgKey, this.stt.requestId);
3057
+ }
3168
3058
  }
3169
3059
  const finalMetrics = this.metricsAcc.endCall();
3170
3060
  const callEndData = {
@@ -3321,11 +3211,16 @@ function resolveVariables(template, variables) {
3321
3211
  return result;
3322
3212
  }
3323
3213
  function buildAIAdapter(config, agent, resolvedPrompt) {
3214
+ const engine = agent.engine;
3324
3215
  if (agent.provider === "elevenlabs_convai") {
3325
- const key = agent.elevenlabsKey ?? "";
3216
+ if (!engine || engine.kind !== "elevenlabs_convai") {
3217
+ throw new Error(
3218
+ "ElevenLabs ConvAI mode requires `agent.engine = new ElevenLabsConvAI({...})`."
3219
+ );
3220
+ }
3326
3221
  return new ElevenLabsConvAIAdapter(
3327
- key,
3328
- agent.elevenlabsAgentId ?? "",
3222
+ engine.apiKey,
3223
+ engine.agentId,
3329
3224
  agent.voice ?? "21m00Tcm4TlvDq8ikWAM",
3330
3225
  "eleven_turbo_v2_5",
3331
3226
  agent.language ?? "en",
@@ -3338,8 +3233,9 @@ function buildAIAdapter(config, agent, resolvedPrompt) {
3338
3233
  parameters: t.parameters
3339
3234
  })) ?? [];
3340
3235
  const tools = [...agentTools, TRANSFER_CALL_TOOL, END_CALL_TOOL];
3236
+ const openaiKey = engine && engine.kind === "openai_realtime" ? engine.apiKey : config.openaiKey ?? "";
3341
3237
  return new OpenAIRealtimeAdapter(
3342
- config.openaiKey ?? "",
3238
+ openaiKey,
3343
3239
  agent.model,
3344
3240
  agent.voice,
3345
3241
  resolvedPrompt ?? agent.systemPrompt,
@@ -3397,16 +3293,7 @@ var TwilioBridge = class {
3397
3293
  }
3398
3294
  }
3399
3295
  createStt(agent) {
3400
- if (agent.stt) {
3401
- if (agent.stt.provider === "deepgram") {
3402
- return DeepgramSTT.forTwilio(agent.stt.apiKey, agent.stt.language ?? "en");
3403
- } else if (agent.stt.provider === "whisper") {
3404
- return WhisperSTT.forTwilio(agent.stt.apiKey, agent.stt.language ?? "en");
3405
- }
3406
- } else if (agent.deepgramKey) {
3407
- return DeepgramSTT.forTwilio(agent.deepgramKey, agent.language ?? "en");
3408
- }
3409
- return null;
3296
+ return createSTT(agent);
3410
3297
  }
3411
3298
  async queryTelephonyCost(metricsAcc, callId) {
3412
3299
  if (this.config.twilioSid && this.config.twilioToken && callId) {
@@ -3454,12 +3341,12 @@ var TelnyxBridge = class {
3454
3341
  label = "Telnyx";
3455
3342
  telephonyProvider = "telnyx";
3456
3343
  sendAudio(ws, audioBase64, _streamSid) {
3457
- ws.send(JSON.stringify({ event_type: "media", payload: { audio: { chunk: audioBase64 } } }));
3344
+ ws.send(JSON.stringify({ event: "media", media: { payload: audioBase64 } }));
3458
3345
  }
3459
3346
  sendMark(_ws, _markName, _streamSid) {
3460
3347
  }
3461
3348
  sendClear(ws, _streamSid) {
3462
- ws.send(JSON.stringify({ event_type: "media_stop" }));
3349
+ ws.send(JSON.stringify({ event: "clear" }));
3463
3350
  }
3464
3351
  async transferCall(callId, toNumber) {
3465
3352
  if (!isValidTelnyxTransferTarget(toNumber)) {
@@ -3553,16 +3440,7 @@ var TelnyxBridge = class {
3553
3440
  ws.close();
3554
3441
  }
3555
3442
  createStt(agent) {
3556
- if (agent.stt) {
3557
- if (agent.stt.provider === "deepgram") {
3558
- return new DeepgramSTT(agent.stt.apiKey, agent.stt.language ?? "en", "nova-3", "linear16", 16e3);
3559
- } else if (agent.stt.provider === "whisper") {
3560
- return new WhisperSTT(agent.stt.apiKey, "whisper-1", agent.stt.language ?? "en");
3561
- }
3562
- } else if (agent.deepgramKey) {
3563
- return new DeepgramSTT(agent.deepgramKey, agent.language ?? "en", "nova-3", "linear16", 16e3);
3564
- }
3565
- return null;
3443
+ return createSTT(agent);
3566
3444
  }
3567
3445
  async queryTelephonyCost(metricsAcc, callId) {
3568
3446
  if (this.config.telnyxKey && callId) {
@@ -3607,6 +3485,7 @@ var EmbeddedServer = class {
3607
3485
  server = null;
3608
3486
  wss = null;
3609
3487
  twilioTokenWarningLogged = false;
3488
+ telnyxSigWarningLogged = false;
3610
3489
  metricsStore;
3611
3490
  pricing;
3612
3491
  remoteHandler = new RemoteMessageHandler();
@@ -3654,6 +3533,31 @@ var EmbeddedServer = class {
3654
3533
  mountApi(app, this.metricsStore, this.dashboardToken);
3655
3534
  getLogger().info("Dashboard: http://127.0.0.1:" + port + "/");
3656
3535
  }
3536
+ app.post("/webhooks/twilio/status", (req, res) => {
3537
+ if (this.config.twilioToken) {
3538
+ const signature = req.headers["x-twilio-signature"] || "";
3539
+ const url = `https://${this.config.webhookUrl}${req.originalUrl}`;
3540
+ const params = req.body ?? {};
3541
+ if (!validateTwilioSignature(url, params, signature, this.config.twilioToken)) {
3542
+ res.status(403).send("Invalid signature");
3543
+ return;
3544
+ }
3545
+ }
3546
+ const body = req.body;
3547
+ const callSid = sanitizeLogValue(body["CallSid"] ?? "");
3548
+ const callStatus = sanitizeLogValue(body["CallStatus"] ?? "");
3549
+ const duration = body["CallDuration"] ?? body["Duration"] ?? "";
3550
+ getLogger().info(
3551
+ `Twilio status ${callStatus} for call ${callSid} (duration=${duration})`
3552
+ );
3553
+ if (callSid && callStatus) {
3554
+ const extra = {};
3555
+ const parsed = parseFloat(duration);
3556
+ if (!Number.isNaN(parsed)) extra.duration_seconds = parsed;
3557
+ this.metricsStore.updateCallStatus(callSid, callStatus, extra);
3558
+ }
3559
+ res.status(204).send();
3560
+ });
3657
3561
  app.post("/webhooks/twilio/recording", (req, res) => {
3658
3562
  if (this.config.twilioToken) {
3659
3563
  const signature = req.headers["x-twilio-signature"] || "";
@@ -3739,7 +3643,7 @@ var EmbeddedServer = class {
3739
3643
  const twiml = `<?xml version="1.0" encoding="UTF-8"?><Response><Connect><Stream url="${xmlStreamUrl}"><Parameter name="caller" value="${xmlEscape(caller)}"/><Parameter name="callee" value="${xmlEscape(callee)}"/></Stream></Connect></Response>`;
3740
3644
  res.type("text/xml").send(twiml);
3741
3645
  });
3742
- app.post("/webhooks/telnyx/voice", (req, res) => {
3646
+ app.post("/webhooks/telnyx/voice", async (req, res) => {
3743
3647
  if (this.config.telnyxPublicKey) {
3744
3648
  const rawBody = req.rawBody ?? "";
3745
3649
  const signature = req.headers["telnyx-signature-ed25519"] ?? "";
@@ -3748,7 +3652,8 @@ var EmbeddedServer = class {
3748
3652
  getLogger().warn("Telnyx webhook rejected: invalid or missing Ed25519 signature");
3749
3653
  return res.status(403).send("Invalid signature");
3750
3654
  }
3751
- } else {
3655
+ } else if (!this.telnyxSigWarningLogged) {
3656
+ this.telnyxSigWarningLogged = true;
3752
3657
  getLogger().warn("Telnyx webhook signature verification is disabled. Set telnyxPublicKey in LocalOptions for production use.");
3753
3658
  }
3754
3659
  const body = req.body;
@@ -3758,41 +3663,77 @@ var EmbeddedServer = class {
3758
3663
  if (typeof body.data.event_type !== "string" || typeof body.data.payload !== "object" || body.data.payload === null) {
3759
3664
  return res.status(400).send("Invalid body");
3760
3665
  }
3761
- const eventType = body?.data?.event_type ?? "";
3666
+ const eventType = body.data.event_type ?? "";
3667
+ const payload = body.data.payload ?? {};
3762
3668
  if (eventType === "call.dtmf.received") {
3763
- const digit = String(body.data?.payload?.digit ?? "").trim();
3669
+ const digit = String(payload.digit ?? "").trim();
3764
3670
  if (digit) {
3765
3671
  getLogger().info(`Telnyx DTMF received (webhook): ${sanitizeLogValue(digit)}`);
3766
3672
  }
3767
- return res.json({ received: true });
3673
+ return res.status(200).send();
3768
3674
  }
3769
3675
  if (eventType === "call.recording.saved") {
3770
- const recordingUrl = body.data?.payload?.recording_urls?.mp3 ?? body.data?.payload?.recording_urls?.wav ?? body.data?.payload?.public_recording_urls?.mp3 ?? "";
3676
+ const recordingUrl = payload.recording_urls?.mp3 ?? payload.recording_urls?.wav ?? payload.public_recording_urls?.mp3 ?? "";
3771
3677
  if (recordingUrl) {
3772
3678
  getLogger().info(`Telnyx recording saved (webhook): ${sanitizeLogValue(recordingUrl)}`);
3773
3679
  }
3774
- return res.json({ received: true });
3680
+ return res.status(200).send();
3775
3681
  }
3776
- if (eventType === "call.initiated") {
3777
- const payload = body?.data?.payload ?? {};
3778
- const callControlId = payload.call_control_id ?? "";
3779
- const caller = payload.from ?? "";
3780
- const callee = payload.to ?? "";
3781
- const streamUrl = `wss://${this.config.webhookUrl}/ws/stream/${encodeURIComponent(callControlId)}?caller=${encodeURIComponent(caller)}&callee=${encodeURIComponent(callee)}`;
3782
- const commands = [
3783
- { command: "answer" },
3784
- {
3785
- command: "stream_start",
3786
- params: {
3682
+ const callControlId = payload.call_control_id ?? "";
3683
+ if (!callControlId) {
3684
+ getLogger().warn("Telnyx webhook rejected: missing call_control_id");
3685
+ return res.status(400).send("Invalid webhook payload");
3686
+ }
3687
+ const apiKey = this.config.telnyxKey;
3688
+ if (!apiKey) {
3689
+ getLogger().warn("Telnyx webhook: missing telnyxKey in LocalOptions");
3690
+ return res.status(500).send("Missing Telnyx API key");
3691
+ }
3692
+ const apiBase = "https://api.telnyx.com/v2";
3693
+ const authHeaders = {
3694
+ "Content-Type": "application/json",
3695
+ Authorization: `Bearer ${apiKey}`
3696
+ };
3697
+ try {
3698
+ if (eventType === "call.initiated") {
3699
+ getLogger().info(`Telnyx call.initiated ${callControlId} \u2014 answering`);
3700
+ const resp = await fetch(`${apiBase}/calls/${encodeURIComponent(callControlId)}/actions/answer`, {
3701
+ method: "POST",
3702
+ headers: authHeaders,
3703
+ body: JSON.stringify({}),
3704
+ signal: AbortSignal.timeout(1e4)
3705
+ });
3706
+ if (!resp.ok) {
3707
+ getLogger().warn(`Telnyx answer failed: ${resp.status} ${(await resp.text()).slice(0, 200)}`);
3708
+ }
3709
+ } else if (eventType === "call.answered") {
3710
+ const caller = payload.from ?? "";
3711
+ const callee = payload.to ?? "";
3712
+ const streamUrl = `wss://${this.config.webhookUrl}/ws/stream/${encodeURIComponent(callControlId)}?caller=${encodeURIComponent(caller)}&callee=${encodeURIComponent(callee)}`;
3713
+ getLogger().info(`Telnyx call.answered ${callControlId} \u2014 starting stream`);
3714
+ const resp = await fetch(`${apiBase}/calls/${encodeURIComponent(callControlId)}/actions/streaming_start`, {
3715
+ method: "POST",
3716
+ headers: authHeaders,
3717
+ body: JSON.stringify({
3787
3718
  stream_url: streamUrl,
3788
- stream_track: "both_tracks"
3789
- }
3719
+ stream_track: "both_tracks",
3720
+ stream_bidirectional_mode: "rtp",
3721
+ stream_bidirectional_codec: "PCMU",
3722
+ stream_bidirectional_sampling_rate: 8e3,
3723
+ stream_bidirectional_target_legs: "self"
3724
+ }),
3725
+ signal: AbortSignal.timeout(1e4)
3726
+ });
3727
+ if (!resp.ok) {
3728
+ getLogger().warn(`Telnyx streaming_start failed: ${resp.status} ${(await resp.text()).slice(0, 200)}`);
3790
3729
  }
3791
- ];
3792
- res.json({ commands });
3793
- } else {
3794
- res.json({ received: true });
3730
+ } else {
3731
+ getLogger().debug(`Telnyx event ignored: ${eventType}`);
3732
+ }
3733
+ } catch (e) {
3734
+ getLogger().error(`Telnyx webhook handler error: ${String(e)}`);
3795
3735
  }
3736
+ return res.status(200).send();
3796
3737
  });
3797
3738
  this.server = createServer(app);
3798
3739
  this.wss = new WebSocketServer({ noServer: true });
@@ -3939,11 +3880,12 @@ Connect AI agents to phone numbers in 4 lines of code
3939
3880
  getLogger().error("Failed to parse Telnyx WS message:", e);
3940
3881
  return;
3941
3882
  }
3942
- const eventType = data.event_type ?? "";
3943
- getLogger().info(`Telnyx event: ${eventType}`);
3944
- if (eventType === "stream_started" && !streamStarted) {
3883
+ const event = data.event ?? "";
3884
+ if (event === "connected") return;
3885
+ getLogger().info(`Telnyx event: ${event}`);
3886
+ if (event === "start" && !streamStarted) {
3945
3887
  streamStarted = true;
3946
- const callControlId = data.payload?.call_control_id ?? "";
3888
+ const callControlId = data.start?.call_control_id ?? "";
3947
3889
  if (callControlId) this.activeCallIds.set(ws, callControlId);
3948
3890
  await handler.handleCallStart(callControlId);
3949
3891
  if (this.recording) {
@@ -3953,22 +3895,21 @@ Connect AI agents to phone numbers in 4 lines of code
3953
3895
  getLogger().warn(`Could not start recording: ${String(e)}`);
3954
3896
  }
3955
3897
  }
3956
- } else if (eventType === "media") {
3957
- const audioChunk = data.payload?.audio?.chunk ?? "";
3898
+ } else if (event === "media") {
3899
+ const track = data.media?.track ?? "inbound";
3900
+ if (track !== "inbound") return;
3901
+ const audioChunk = data.media?.payload ?? "";
3958
3902
  if (!audioChunk) return;
3959
3903
  handler.handleAudio(Buffer.from(audioChunk, "base64"));
3960
- } else if (eventType === "call.dtmf.received") {
3961
- const digit = String(data.payload?.digit ?? "").trim();
3904
+ } else if (event === "dtmf") {
3905
+ const digit = String(data.dtmf?.digit ?? "").trim();
3962
3906
  if (digit) {
3963
3907
  getLogger().info(`Telnyx DTMF received: ${digit}`);
3964
3908
  await handler.handleDtmf(digit);
3965
3909
  }
3966
- } else if (eventType === "call.recording.saved") {
3967
- const recordingUrl = data.payload?.recording_urls?.mp3 ?? data.payload?.recording_urls?.wav ?? data.payload?.public_recording_urls?.mp3 ?? "";
3968
- if (recordingUrl) {
3969
- getLogger().info(`Telnyx recording saved: ${recordingUrl}`);
3970
- }
3971
- } else if (eventType === "stream_stopped") {
3910
+ } else if (event === "error") {
3911
+ getLogger().warn(`Telnyx stream error: ${JSON.stringify(data)}`);
3912
+ } else if (event === "stop") {
3972
3913
  await handler.handleStop();
3973
3914
  }
3974
3915
  } catch (err) {
@@ -4437,8 +4378,6 @@ var TestSession = class {
4437
4378
  export {
4438
4379
  OpenAIRealtimeAdapter,
4439
4380
  ElevenLabsConvAIAdapter,
4440
- DeepgramSTT,
4441
- WhisperSTT,
4442
4381
  DEFAULT_PRICING,
4443
4382
  mergePricing,
4444
4383
  calculateSttCost,
@@ -4454,8 +4393,7 @@ export {
4454
4393
  RemoteMessageHandler,
4455
4394
  isRemoteUrl,
4456
4395
  isWebSocketUrl,
4457
- ElevenLabsTTS,
4458
- OpenAITTS,
4396
+ DeepgramSTT,
4459
4397
  CallMetricsAccumulator,
4460
4398
  mulawToPcm16,
4461
4399
  pcm16ToMulaw,