getpatter 0.4.3 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -305,235 +305,16 @@ var init_elevenlabs_convai = __esm({
305
305
  }
306
306
  });
307
307
 
308
- // src/providers/deepgram-stt.ts
309
- var import_ws4, DEEPGRAM_WS_URL, DeepgramSTT;
310
- var init_deepgram_stt = __esm({
311
- "src/providers/deepgram-stt.ts"() {
312
- "use strict";
313
- import_ws4 = __toESM(require("ws"));
314
- init_logger();
315
- DEEPGRAM_WS_URL = "wss://api.deepgram.com/v1/listen";
316
- DeepgramSTT = class _DeepgramSTT {
317
- constructor(apiKey, language = "en", model = "nova-3", encoding = "linear16", sampleRate = 16e3) {
318
- this.apiKey = apiKey;
319
- this.language = language;
320
- this.model = model;
321
- this.encoding = encoding;
322
- this.sampleRate = sampleRate;
323
- }
324
- ws = null;
325
- callbacks = [];
326
- /** Request ID from Deepgram — used to query actual cost post-call. */
327
- requestId = "";
328
- /** Factory for Twilio calls — mulaw 8 kHz. */
329
- static forTwilio(apiKey, language = "en", model = "nova-3") {
330
- return new _DeepgramSTT(apiKey, language, model, "mulaw", 8e3);
331
- }
332
- async connect() {
333
- const params = new URLSearchParams({
334
- model: this.model,
335
- language: this.language,
336
- encoding: this.encoding,
337
- sample_rate: String(this.sampleRate),
338
- channels: "1",
339
- interim_results: "true",
340
- endpointing: "300",
341
- smart_format: "true",
342
- vad_events: "true",
343
- no_delay: "true"
344
- });
345
- const url = `${DEEPGRAM_WS_URL}?${params.toString()}`;
346
- this.ws = new import_ws4.default(url, {
347
- headers: { Authorization: `Token ${this.apiKey}` }
348
- });
349
- await new Promise((resolve, reject) => {
350
- const timer = setTimeout(() => reject(new Error("Deepgram connect timeout")), 1e4);
351
- this.ws.once("open", () => {
352
- clearTimeout(timer);
353
- resolve();
354
- });
355
- this.ws.once("error", (err) => {
356
- clearTimeout(timer);
357
- reject(err);
358
- });
359
- });
360
- this.ws.on("message", (raw) => {
361
- let data;
362
- try {
363
- data = JSON.parse(raw.toString());
364
- } catch {
365
- return;
366
- }
367
- if (data.type === "Metadata" && data.request_id) {
368
- this.requestId = data.request_id;
369
- return;
370
- }
371
- if (data.type !== "Results") return;
372
- const alternatives = data.channel?.alternatives ?? [];
373
- if (!alternatives.length) return;
374
- const best = alternatives[0];
375
- const text = (best.transcript ?? "").trim();
376
- if (!text) return;
377
- const transcript = {
378
- text,
379
- isFinal: Boolean(data.is_final) && Boolean(data.speech_final),
380
- confidence: best.confidence ?? 0
381
- };
382
- for (const cb of this.callbacks) {
383
- cb(transcript);
384
- }
385
- });
386
- }
387
- sendAudio(audio) {
388
- if (!this.ws || this.ws.readyState !== import_ws4.default.OPEN) return;
389
- this.ws.send(audio);
390
- }
391
- onTranscript(callback) {
392
- if (this.callbacks.length >= 10) {
393
- getLogger().warn("DeepgramSTT: maximum of 10 onTranscript callbacks reached; replacing the last callback.");
394
- this.callbacks[this.callbacks.length - 1] = callback;
395
- return;
396
- }
397
- this.callbacks.push(callback);
398
- }
399
- close() {
400
- if (this.ws) {
401
- try {
402
- this.ws.send(JSON.stringify({ type: "CloseStream" }));
403
- } catch {
404
- }
405
- this.ws.close();
406
- this.ws = null;
407
- }
408
- }
409
- };
410
- }
411
- });
412
-
413
- // src/providers/whisper-stt.ts
414
- function wrapPcmInWav(pcm, sampleRate = 16e3, channels = 1, bitsPerSample = 16) {
415
- const dataSize = pcm.length;
416
- const header = Buffer.alloc(44);
417
- header.write("RIFF", 0);
418
- header.writeUInt32LE(36 + dataSize, 4);
419
- header.write("WAVE", 8);
420
- header.write("fmt ", 12);
421
- header.writeUInt32LE(16, 16);
422
- header.writeUInt16LE(1, 20);
423
- header.writeUInt16LE(channels, 22);
424
- header.writeUInt32LE(sampleRate, 24);
425
- header.writeUInt32LE(sampleRate * channels * (bitsPerSample / 8), 28);
426
- header.writeUInt16LE(channels * (bitsPerSample / 8), 32);
427
- header.writeUInt16LE(bitsPerSample, 34);
428
- header.write("data", 36);
429
- header.writeUInt32LE(dataSize, 40);
430
- return Buffer.concat([header, pcm]);
308
+ // src/provider-factory.ts
309
+ async function createSTT(agent) {
310
+ return agent.stt ?? null;
311
+ }
312
+ async function createTTS(agent) {
313
+ return agent.tts ?? null;
431
314
  }
432
- var OPENAI_TRANSCRIPTION_URL, DEFAULT_BUFFER_SIZE, WhisperSTT;
433
- var init_whisper_stt = __esm({
434
- "src/providers/whisper-stt.ts"() {
315
+ var init_provider_factory = __esm({
316
+ "src/provider-factory.ts"() {
435
317
  "use strict";
436
- init_logger();
437
- OPENAI_TRANSCRIPTION_URL = "https://api.openai.com/v1/audio/transcriptions";
438
- DEFAULT_BUFFER_SIZE = 16e3 * 2;
439
- WhisperSTT = class _WhisperSTT {
440
- apiKey;
441
- model;
442
- language;
443
- bufferSize;
444
- buffer = Buffer.alloc(0);
445
- callbacks = [];
446
- running = false;
447
- pendingTranscriptions = [];
448
- constructor(apiKey, model = "whisper-1", language, bufferSize = DEFAULT_BUFFER_SIZE) {
449
- this.apiKey = apiKey;
450
- this.model = model;
451
- this.language = language;
452
- this.bufferSize = bufferSize;
453
- }
454
- /** Factory for Twilio calls — mulaw 8 kHz is transcoded upstream, so we still receive PCM 16-bit. */
455
- static forTwilio(apiKey, language = "en", model = "whisper-1") {
456
- return new _WhisperSTT(apiKey, model, language);
457
- }
458
- async connect() {
459
- this.running = true;
460
- this.buffer = Buffer.alloc(0);
461
- }
462
- sendAudio(audio) {
463
- if (!this.running) return;
464
- this.buffer = Buffer.concat([this.buffer, audio]);
465
- if (this.buffer.length >= this.bufferSize) {
466
- const pcm = this.buffer;
467
- this.buffer = Buffer.alloc(0);
468
- this.trackTranscription(this.transcribeBuffer(pcm));
469
- }
470
- }
471
- trackTranscription(promise) {
472
- const wrapped = promise.finally(() => {
473
- const idx = this.pendingTranscriptions.indexOf(wrapped);
474
- if (idx !== -1) this.pendingTranscriptions.splice(idx, 1);
475
- });
476
- this.pendingTranscriptions.push(wrapped);
477
- }
478
- onTranscript(callback) {
479
- if (this.callbacks.length >= 10) {
480
- getLogger().warn("WhisperSTT: maximum of 10 onTranscript callbacks reached; replacing the last callback.");
481
- this.callbacks[this.callbacks.length - 1] = callback;
482
- return;
483
- }
484
- this.callbacks.push(callback);
485
- }
486
- async close() {
487
- this.running = false;
488
- if (this.buffer.length >= this.bufferSize / 4) {
489
- const pcm = this.buffer;
490
- this.buffer = Buffer.alloc(0);
491
- this.trackTranscription(this.transcribeBuffer(pcm));
492
- } else {
493
- this.buffer = Buffer.alloc(0);
494
- }
495
- await Promise.allSettled(this.pendingTranscriptions);
496
- this.callbacks = [];
497
- }
498
- // ------------------------------------------------------------------
499
- // Private
500
- // ------------------------------------------------------------------
501
- async transcribeBuffer(pcm) {
502
- const wav = wrapPcmInWav(pcm);
503
- const formData = new FormData();
504
- formData.append("file", new Blob([wav.buffer.slice(wav.byteOffset, wav.byteOffset + wav.byteLength)], { type: "audio/wav" }), "audio.wav");
505
- formData.append("model", this.model);
506
- if (this.language) {
507
- formData.append("language", this.language);
508
- }
509
- try {
510
- const resp = await fetch(OPENAI_TRANSCRIPTION_URL, {
511
- method: "POST",
512
- headers: { Authorization: `Bearer ${this.apiKey}` },
513
- body: formData,
514
- signal: AbortSignal.timeout(15e3)
515
- });
516
- if (!resp.ok) {
517
- const body = await resp.text();
518
- getLogger().error(`WhisperSTT transcription error: ${resp.status} ${body}`);
519
- return;
520
- }
521
- const json = await resp.json();
522
- const text = (json.text ?? "").trim();
523
- if (!text) return;
524
- const transcript = {
525
- text,
526
- isFinal: true,
527
- confidence: 1
528
- };
529
- for (const cb of this.callbacks) {
530
- cb(transcript);
531
- }
532
- } catch (err) {
533
- getLogger().error(`WhisperSTT transcription error: ${String(err)}`);
534
- }
535
- }
536
- };
537
318
  }
538
319
  });
539
320
 
@@ -616,9 +397,15 @@ var init_store = __esm({
616
397
  maxCalls;
617
398
  calls = [];
618
399
  activeCalls = /* @__PURE__ */ new Map();
619
- constructor(maxCalls = 500) {
400
+ /**
401
+ * Accepts either a numeric ``maxCalls`` (legacy positional — matches the
402
+ * original TS API) or an options object ``{ maxCalls }`` to align with the
403
+ * Python SDK's keyword-argument style. Plain literals also work:
404
+ * ``new MetricsStore()`` / ``new MetricsStore(100)`` / ``new MetricsStore({ maxCalls: 100 })``.
405
+ */
406
+ constructor(maxCallsOrOpts = 500) {
620
407
  super();
621
- this.maxCalls = maxCalls;
408
+ this.maxCalls = typeof maxCallsOrOpts === "number" ? maxCallsOrOpts : maxCallsOrOpts.maxCalls ?? 500;
622
409
  }
623
410
  publish(eventType, data) {
624
411
  this.emit("sse", { type: eventType, data });
@@ -626,22 +413,100 @@ var init_store = __esm({
626
413
  recordCallStart(data) {
627
414
  const callId = data.call_id || "";
628
415
  if (!callId) return;
416
+ const existing = this.activeCalls.get(callId);
417
+ if (existing) {
418
+ existing.caller = data.caller || existing.caller;
419
+ existing.callee = data.callee || existing.callee;
420
+ existing.direction = data.direction || existing.direction;
421
+ existing.status = "in-progress";
422
+ existing.turns = existing.turns || [];
423
+ } else {
424
+ const record = {
425
+ call_id: callId,
426
+ caller: data.caller || "",
427
+ callee: data.callee || "",
428
+ direction: data.direction || "inbound",
429
+ started_at: Date.now() / 1e3,
430
+ status: "in-progress",
431
+ turns: []
432
+ };
433
+ this.activeCalls.set(callId, record);
434
+ }
435
+ this.publish("call_start", {
436
+ call_id: callId,
437
+ caller: data.caller || "",
438
+ callee: data.callee || "",
439
+ direction: data.direction || "inbound"
440
+ });
441
+ }
442
+ /**
443
+ * Pre-register an outbound call before any webhook fires. Lets the
444
+ * dashboard surface attempts that never reach media (no-answer, busy,
445
+ * carrier-rejected). Mirrors the Python ``record_call_initiated``.
446
+ */
447
+ recordCallInitiated(data) {
448
+ const callId = data.call_id || "";
449
+ if (!callId) return;
450
+ if (this.activeCalls.has(callId)) return;
629
451
  const record = {
630
452
  call_id: callId,
631
453
  caller: data.caller || "",
632
454
  callee: data.callee || "",
633
- direction: data.direction || "inbound",
455
+ direction: data.direction || "outbound",
634
456
  started_at: Date.now() / 1e3,
457
+ status: "initiated",
635
458
  turns: []
636
459
  };
637
460
  this.activeCalls.set(callId, record);
638
- this.publish("call_start", {
461
+ this.publish("call_initiated", {
639
462
  call_id: callId,
640
463
  caller: record.caller,
641
464
  callee: record.callee,
642
- direction: record.direction
465
+ direction: record.direction,
466
+ status: record.status
643
467
  });
644
468
  }
469
+ /**
470
+ * Update the status of an active or completed call. Terminal states
471
+ * (completed, no-answer, busy, failed, canceled, webhook_error) move the
472
+ * row from active to completed so the UI freezes the live duration timer.
473
+ */
474
+ updateCallStatus(callId, status, extra = {}) {
475
+ if (!callId || !status) return;
476
+ const TERMINAL = /* @__PURE__ */ new Set(["completed", "no-answer", "busy", "failed", "canceled", "webhook_error"]);
477
+ const active = this.activeCalls.get(callId);
478
+ if (active) {
479
+ active.status = status;
480
+ Object.assign(active, extra);
481
+ if (TERMINAL.has(status)) {
482
+ const entry = {
483
+ call_id: callId,
484
+ caller: active.caller || "",
485
+ callee: active.callee || "",
486
+ direction: active.direction || "outbound",
487
+ started_at: active.started_at || 0,
488
+ ended_at: Date.now() / 1e3,
489
+ status,
490
+ metrics: null,
491
+ ...extra
492
+ };
493
+ this.activeCalls.delete(callId);
494
+ this.calls.push(entry);
495
+ if (this.calls.length > this.maxCalls) {
496
+ this.calls = this.calls.slice(-this.maxCalls);
497
+ }
498
+ }
499
+ } else {
500
+ for (let i = this.calls.length - 1; i >= 0; i--) {
501
+ if (this.calls[i].call_id === callId) {
502
+ this.calls[i].status = status;
503
+ Object.assign(this.calls[i], extra);
504
+ break;
505
+ }
506
+ }
507
+ }
508
+ this.publish("call_status", { call_id: callId, status, ...extra });
509
+ }
645
510
  recordTurn(data) {
646
511
  const callId = data.call_id || "";
647
512
  const turn = data.turn;
@@ -658,6 +523,8 @@ var init_store = __esm({
658
523
  if (!callId) return;
659
524
  const active = this.activeCalls.get(callId);
660
525
  this.activeCalls.delete(callId);
526
+ const activeStatus = active?.status;
527
+ const resolvedStatus = activeStatus && activeStatus !== "in-progress" ? activeStatus : "completed";
661
528
  const entry = {
662
529
  call_id: callId,
663
530
  caller: data.caller || active?.caller || "",
@@ -666,6 +533,7 @@ var init_store = __esm({
666
533
  started_at: active?.started_at || 0,
667
534
  ended_at: Date.now() / 1e3,
668
535
  transcript: data.transcript || [],
536
+ status: resolvedStatus,
669
537
  metrics: metrics ?? null
670
538
  };
671
539
  this.calls.push(entry);
@@ -1866,171 +1734,125 @@ var init_remote_message = __esm({
1866
1734
  }
1867
1735
  });
1868
1736
 
1869
- // src/providers/elevenlabs-tts.ts
1870
- var ELEVENLABS_BASE_URL, ElevenLabsTTS;
1871
- var init_elevenlabs_tts = __esm({
1872
- "src/providers/elevenlabs-tts.ts"() {
1737
+ // src/providers/deepgram-stt.ts
1738
+ var import_ws4, DEEPGRAM_WS_URL, DeepgramSTT;
1739
+ var init_deepgram_stt = __esm({
1740
+ "src/providers/deepgram-stt.ts"() {
1873
1741
  "use strict";
1874
- ELEVENLABS_BASE_URL = "https://api.elevenlabs.io/v1";
1875
- ElevenLabsTTS = class {
1876
- constructor(apiKey, voiceId = "21m00Tcm4TlvDq8ikWAM", modelId = "eleven_turbo_v2_5", outputFormat = "pcm_16000") {
1742
+ import_ws4 = __toESM(require("ws"));
1743
+ init_logger();
1744
+ DEEPGRAM_WS_URL = "wss://api.deepgram.com/v1/listen";
1745
+ DeepgramSTT = class _DeepgramSTT {
1746
+ ws = null;
1747
+ callbacks = [];
1748
+ /** Request ID from Deepgram — used to query actual cost post-call. */
1749
+ requestId = "";
1750
+ apiKey;
1751
+ language;
1752
+ model;
1753
+ encoding;
1754
+ sampleRate;
1755
+ endpointingMs;
1756
+ utteranceEndMs;
1757
+ smartFormat;
1758
+ interimResults;
1759
+ vadEvents;
1760
+ constructor(apiKey, languageOrOptions, model, encoding, sampleRate, options) {
1877
1761
  this.apiKey = apiKey;
1878
- this.voiceId = voiceId;
1879
- this.modelId = modelId;
1880
- this.outputFormat = outputFormat;
1881
- }
1882
- /**
1883
- * Synthesise text to speech and return the full audio as a single Buffer.
1884
- *
1885
- * For large chunks (or when latency matters) call `synthesizeStream` instead.
1886
- */
1887
- async synthesize(text) {
1888
- const chunks = [];
1889
- for await (const chunk of this.synthesizeStream(text)) {
1890
- chunks.push(chunk);
1891
- }
1892
- return Buffer.concat(chunks);
1762
+ const opts = typeof languageOrOptions === "object" && languageOrOptions !== null ? languageOrOptions : options ?? {};
1763
+ this.language = (typeof languageOrOptions === "string" ? languageOrOptions : opts.language) ?? "en";
1764
+ this.model = model ?? opts.model ?? "nova-3";
1765
+ this.encoding = encoding ?? opts.encoding ?? "linear16";
1766
+ this.sampleRate = sampleRate ?? opts.sampleRate ?? 16e3;
1767
+ this.endpointingMs = opts.endpointingMs ?? 150;
1768
+ this.utteranceEndMs = opts.utteranceEndMs === null ? null : opts.utteranceEndMs ?? 1e3;
1769
+ this.smartFormat = opts.smartFormat ?? true;
1770
+ this.interimResults = opts.interimResults ?? true;
1771
+ this.vadEvents = opts.vadEvents ?? true;
1772
+ }
1773
+ /** Factory for Twilio calls mulaw 8 kHz. Forwards tuning options through. */
1774
+ static forTwilio(apiKey, language = "en", model = "nova-3", options = {}) {
1775
+ return new _DeepgramSTT(apiKey, language, model, "mulaw", 8e3, options);
1893
1776
  }
1894
- /**
1895
- * Synthesise text and yield audio chunks as they arrive (streaming).
1896
- *
1897
- * The yielded buffers are raw PCM at 16 kHz (or whatever `outputFormat` is
1898
- * configured to).
1899
- */
1900
- async *synthesizeStream(text) {
1901
- const url = `${ELEVENLABS_BASE_URL}/text-to-speech/${encodeURIComponent(this.voiceId)}/stream?output_format=${encodeURIComponent(this.outputFormat)}`;
1902
- const response = await fetch(url, {
1903
- method: "POST",
1904
- headers: {
1905
- "xi-api-key": this.apiKey,
1906
- "Content-Type": "application/json"
1907
- },
1908
- body: JSON.stringify({ text, model_id: this.modelId }),
1909
- signal: AbortSignal.timeout(3e4)
1777
+ async connect() {
1778
+ const params = new URLSearchParams({
1779
+ model: this.model,
1780
+ language: this.language,
1781
+ encoding: this.encoding,
1782
+ sample_rate: String(this.sampleRate),
1783
+ channels: "1",
1784
+ interim_results: this.interimResults ? "true" : "false",
1785
+ endpointing: String(this.endpointingMs),
1786
+ smart_format: this.smartFormat ? "true" : "false",
1787
+ vad_events: this.vadEvents ? "true" : "false",
1788
+ no_delay: "true"
1910
1789
  });
1911
- if (!response.ok) {
1912
- const body = await response.text();
1913
- throw new Error(`ElevenLabs TTS error ${response.status}: ${body}`);
1790
+ if (this.utteranceEndMs !== null) {
1791
+ params.set("utterance_end_ms", String(Math.max(this.utteranceEndMs, 1e3)));
1914
1792
  }
1915
- if (!response.body) {
1916
- throw new Error("ElevenLabs TTS: no response body");
1917
- }
1918
- const reader = response.body.getReader();
1919
- try {
1920
- while (true) {
1921
- const { done, value } = await reader.read();
1922
- if (done) break;
1923
- if (value && value.length > 0) {
1924
- yield Buffer.from(value);
1925
- }
1926
- }
1927
- } finally {
1928
- if (typeof reader.cancel === "function") await reader.cancel().catch(() => {
1793
+ const url = `${DEEPGRAM_WS_URL}?${params.toString()}`;
1794
+ this.ws = new import_ws4.default(url, {
1795
+ headers: { Authorization: `Token ${this.apiKey}` }
1796
+ });
1797
+ await new Promise((resolve, reject) => {
1798
+ const timer = setTimeout(() => reject(new Error("Deepgram connect timeout")), 1e4);
1799
+ this.ws.once("open", () => {
1800
+ clearTimeout(timer);
1801
+ resolve();
1802
+ });
1803
+ this.ws.once("error", (err) => {
1804
+ clearTimeout(timer);
1805
+ reject(err);
1929
1806
  });
1930
- reader.releaseLock();
1931
- }
1932
- }
1933
- };
1934
- }
1935
- });
1936
-
1937
- // src/providers/openai-tts.ts
1938
- var OPENAI_TTS_URL, OpenAITTS;
1939
- var init_openai_tts = __esm({
1940
- "src/providers/openai-tts.ts"() {
1941
- "use strict";
1942
- OPENAI_TTS_URL = "https://api.openai.com/v1/audio/speech";
1943
- OpenAITTS = class _OpenAITTS {
1944
- constructor(apiKey, voice = "alloy", model = "tts-1") {
1945
- this.apiKey = apiKey;
1946
- this.voice = voice;
1947
- this.model = model;
1948
- }
1949
- /**
1950
- * Synthesise text to speech and return the full audio as a single Buffer.
1951
- *
1952
- * For large chunks (or when latency matters) call `synthesizeStream` instead.
1953
- */
1954
- async synthesize(text) {
1955
- const chunks = [];
1956
- for await (const chunk of this.synthesizeStream(text)) {
1957
- chunks.push(chunk);
1958
- }
1959
- return Buffer.concat(chunks);
1960
- }
1961
- /**
1962
- * Synthesise text and yield audio chunks as they arrive (streaming).
1963
- *
1964
- * OpenAI returns 24 kHz PCM16; each chunk is resampled to 16 kHz before
1965
- * yielding so the output is ready for telephony pipelines.
1966
- */
1967
- async *synthesizeStream(text) {
1968
- const response = await fetch(OPENAI_TTS_URL, {
1969
- method: "POST",
1970
- headers: {
1971
- "Authorization": `Bearer ${this.apiKey}`,
1972
- "Content-Type": "application/json"
1973
- },
1974
- body: JSON.stringify({
1975
- model: this.model,
1976
- input: text,
1977
- voice: this.voice,
1978
- response_format: "pcm"
1979
- }),
1980
- signal: AbortSignal.timeout(3e4)
1981
1807
  });
1982
- if (!response.ok) {
1983
- const body = await response.text();
1984
- throw new Error(`OpenAI TTS error ${response.status}: ${body}`);
1985
- }
1986
- if (!response.body) {
1987
- throw new Error("OpenAI TTS: no response body");
1988
- }
1989
- const reader = response.body.getReader();
1990
- try {
1991
- while (true) {
1992
- const { done, value } = await reader.read();
1993
- if (done) break;
1994
- if (value && value.length > 0) {
1995
- yield _OpenAITTS.resample24kTo16k(Buffer.from(value));
1996
- }
1808
+ this.ws.on("message", (raw) => {
1809
+ let data;
1810
+ try {
1811
+ data = JSON.parse(raw.toString());
1812
+ } catch {
1813
+ return;
1997
1814
  }
1998
- } finally {
1999
- if (typeof reader.cancel === "function") await reader.cancel().catch(() => {
2000
- });
2001
- reader.releaseLock();
1815
+ if (data.type === "Metadata" && data.request_id) {
1816
+ this.requestId = data.request_id;
1817
+ return;
1818
+ }
1819
+ if (data.type !== "Results") return;
1820
+ const alternatives = data.channel?.alternatives ?? [];
1821
+ if (!alternatives.length) return;
1822
+ const best = alternatives[0];
1823
+ const text = (best.transcript ?? "").trim();
1824
+ if (!text) return;
1825
+ const transcript = {
1826
+ text,
1827
+ isFinal: Boolean(data.is_final) || Boolean(data.speech_final),
1828
+ confidence: best.confidence ?? 0
1829
+ };
1830
+ for (const cb of this.callbacks) {
1831
+ cb(transcript);
1832
+ }
1833
+ });
1834
+ }
1835
+ sendAudio(audio) {
1836
+ if (!this.ws || this.ws.readyState !== import_ws4.default.OPEN) return;
1837
+ this.ws.send(audio);
1838
+ }
1839
+ onTranscript(callback) {
1840
+ if (this.callbacks.length >= 10) {
1841
+ getLogger().warn("DeepgramSTT: maximum of 10 onTranscript callbacks reached; replacing the last callback.");
1842
+ this.callbacks[this.callbacks.length - 1] = callback;
1843
+ return;
2002
1844
  }
1845
+ this.callbacks.push(callback);
2003
1846
  }
2004
- /**
2005
- * Resample 24 kHz PCM16-LE to 16 kHz by taking 2 out of every 3 samples.
2006
- *
2007
- * For each group of 3 input samples the first is kept as-is and the second
2008
- * output sample is the average of input samples 2 and 3. This matches the
2009
- * Python SDK implementation.
2010
- */
2011
- static resample24kTo16k(audio) {
2012
- if (audio.length < 2) return audio;
2013
- const sampleCount = Math.floor(audio.length / 2);
2014
- const samples = new Int16Array(sampleCount);
2015
- for (let i = 0; i < sampleCount; i++) {
2016
- samples[i] = audio.readInt16LE(i * 2);
2017
- }
2018
- const resampled = [];
2019
- for (let i = 0; i < samples.length; i += 3) {
2020
- resampled.push(samples[i]);
2021
- if (i + 1 < samples.length) {
2022
- if (i + 2 < samples.length) {
2023
- resampled.push(Math.trunc((samples[i + 1] + samples[i + 2]) / 2));
2024
- } else {
2025
- resampled.push(samples[i + 1]);
2026
- }
1847
+ close() {
1848
+ if (this.ws) {
1849
+ try {
1850
+ this.ws.send(JSON.stringify({ type: "CloseStream" }));
1851
+ } catch {
2027
1852
  }
1853
+ this.ws.close();
1854
+ this.ws = null;
2028
1855
  }
2029
- const out = Buffer.alloc(resampled.length * 2);
2030
- for (let i = 0; i < resampled.length; i++) {
2031
- out.writeInt16LE(resampled[i], i * 2);
2032
- }
2033
- return out;
2034
1856
  }
2035
1857
  };
2036
1858
  }
@@ -2940,8 +2762,8 @@ var init_stream_handler = __esm({
2940
2762
  "use strict";
2941
2763
  init_openai_realtime();
2942
2764
  init_elevenlabs_convai();
2943
- init_elevenlabs_tts();
2944
- init_openai_tts();
2765
+ init_deepgram_stt();
2766
+ init_provider_factory();
2945
2767
  init_metrics();
2946
2768
  init_transcoding();
2947
2769
  init_llm_loop();
@@ -2972,6 +2794,9 @@ var init_stream_handler = __esm({
2972
2794
  maxDurationTimer = null;
2973
2795
  transcriptProcessing = false;
2974
2796
  transcriptQueue = [];
2797
+ // BUG #22 throttle state — mirror Python impl.
2798
+ lastCommitText = "";
2799
+ lastCommitAt = 0;
2975
2800
  history;
2976
2801
  metricsAcc;
2977
2802
  constructor(deps, ws, caller, callee) {
@@ -2980,8 +2805,8 @@ var init_stream_handler = __esm({
2980
2805
  this.caller = caller;
2981
2806
  this.callee = callee;
2982
2807
  this.history = createHistoryManager(200);
2983
- const sttProviderName = deps.agent.stt?.provider || (deps.agent.deepgramKey ? "deepgram" : void 0);
2984
- const ttsProviderName = deps.agent.tts?.provider === "elevenlabs" ? "elevenlabs" : deps.agent.tts?.provider === "openai" ? "openai_tts" : deps.agent.elevenlabsKey ? "elevenlabs" : void 0;
2808
+ const sttProviderName = deps.agent.stt ? deps.agent.stt.constructor?.name ?? "custom" : void 0;
2809
+ const ttsProviderName = deps.agent.tts ? deps.agent.tts.constructor?.name ?? "custom" : void 0;
2985
2810
  const providerMode = deps.agent.provider ?? "openai_realtime";
2986
2811
  this.metricsAcc = new CallMetricsAccumulator({
2987
2812
  callId: "",
@@ -3082,15 +2907,23 @@ var init_stream_handler = __esm({
3082
2907
  this.streamSid = sid;
3083
2908
  }
3084
2909
  /** Handle an incoming audio chunk (already decoded from base64). */
3085
- handleAudio(audioBuffer) {
2910
+ async handleAudio(audioBuffer) {
3086
2911
  const provider = this.deps.agent.provider ?? "openai_realtime";
3087
- if (provider === "pipeline" && this.stt && !this.isSpeaking) {
3088
- if (this.deps.bridge.telephonyProvider === "twilio") {
3089
- const pcm8k = mulawToPcm16(audioBuffer);
3090
- const pcm16k = resample8kTo16k(pcm8k);
3091
- this.stt.sendAudio(pcm16k);
2912
+ if (provider === "pipeline" && this.stt) {
2913
+ if (this.isSpeaking && (this.deps.agent.bargeInThresholdMs ?? 300) === 0) {
2914
+ return;
2915
+ }
2916
+ const pcm8k = mulawToPcm16(audioBuffer);
2917
+ const pcm16k = resample8kTo16k(pcm8k);
2918
+ const hooks = this.deps.agent.hooks;
2919
+ if (hooks) {
2920
+ const hookExecutor = new PipelineHookExecutor(hooks);
2921
+ const hookCtx = this.buildHookContext();
2922
+ const processed = await hookExecutor.runBeforeSendToStt(pcm16k, hookCtx);
2923
+ if (processed === null) return;
2924
+ this.stt.sendAudio(processed);
3092
2925
  } else {
3093
- this.stt.sendAudio(audioBuffer);
2926
+ this.stt.sendAudio(pcm16k);
3094
2927
  }
3095
2928
  } else if (this.adapter) {
3096
2929
  if (this.adapter instanceof ElevenLabsConvAIAdapter && this.deps.bridge.telephonyProvider === "twilio") {
@@ -3163,18 +2996,8 @@ var init_stream_handler = __esm({
3163
2996
  // ---------------------------------------------------------------------------
3164
2997
  async initPipeline(resolvedPrompt) {
3165
2998
  const label = this.deps.bridge.label;
3166
- this.stt = this.deps.bridge.createStt(this.deps.agent);
3167
- if (this.deps.agent.tts) {
3168
- if (this.deps.agent.tts.provider === "elevenlabs") {
3169
- this.tts = new ElevenLabsTTS(this.deps.agent.tts.apiKey, this.deps.agent.tts.voice ?? "21m00Tcm4TlvDq8ikWAM");
3170
- }
3171
- if (this.deps.agent.tts.provider === "openai") {
3172
- this.tts = new OpenAITTS(this.deps.agent.tts.apiKey, this.deps.agent.tts.voice ?? "alloy");
3173
- }
3174
- } else if (this.deps.agent.elevenlabsKey) {
3175
- const voiceId = this.deps.agent.voice && this.deps.agent.voice !== "alloy" ? this.deps.agent.voice : "21m00Tcm4TlvDq8ikWAM";
3176
- this.tts = new ElevenLabsTTS(this.deps.agent.elevenlabsKey, voiceId);
3177
- }
2999
+ this.stt = await this.deps.bridge.createStt(this.deps.agent);
3000
+ this.tts = await createTTS(this.deps.agent);
3178
3001
  if (!this.stt) {
3179
3002
  getLogger().info(`Pipeline mode (${label}): no STT configured`);
3180
3003
  }
@@ -3285,7 +3108,59 @@ var init_stream_handler = __esm({
3285
3108
  }
3286
3109
  }
3287
3110
  async processTranscript(transcript) {
3111
+ if (transcript.text && this.isSpeaking) {
3112
+ getLogger().info(
3113
+ `Barge-in: caller spoke over agent (${sanitizeLogValue(transcript.text.slice(0, 40))})`
3114
+ );
3115
+ this.isSpeaking = false;
3116
+ try {
3117
+ this.deps.bridge.sendClear(this.ws, this.streamSid);
3118
+ } catch (err) {
3119
+ getLogger().debug(`sendClear during barge-in failed: ${String(err)}`);
3120
+ }
3121
+ this.metricsAcc.recordTurnInterrupted();
3122
+ }
3288
3123
  if (!transcript.isFinal || !transcript.text) return;
3124
+ const now = Date.now();
3125
+ const normalised = transcript.text.trim().toLowerCase();
3126
+ const stripped = normalised.replace(/[.,!?;: ]+$/, "").trim();
3127
+ const sinceLastMs = now - this.lastCommitAt;
3128
+ const HALLUCINATIONS = /* @__PURE__ */ new Set([
3129
+ "you",
3130
+ "thank you",
3131
+ "thanks",
3132
+ "yeah",
3133
+ "yes",
3134
+ "no",
3135
+ "okay",
3136
+ "ok",
3137
+ "uh",
3138
+ "um",
3139
+ "mmm",
3140
+ "hmm",
3141
+ ".",
3142
+ "bye",
3143
+ "right",
3144
+ "cool"
3145
+ ]);
3146
+ if (HALLUCINATIONS.has(stripped) || stripped === "") {
3147
+ getLogger().info(`Dropped likely STT hallucination: ${sanitizeLogValue(normalised.slice(0, 40))}`);
3148
+ return;
3149
+ }
3150
+ if (sinceLastMs < 2e3 && normalised === this.lastCommitText) {
3151
+ getLogger().info(
3152
+ `Dropped duplicate final transcript (${(sinceLastMs / 1e3).toFixed(1)}s since last): ${sanitizeLogValue(normalised.slice(0, 40))}`
3153
+ );
3154
+ return;
3155
+ }
3156
+ if (sinceLastMs < 500) {
3157
+ getLogger().info(
3158
+ `Dropped back-to-back final transcript (${(sinceLastMs / 1e3).toFixed(2)}s since last): ${sanitizeLogValue(normalised.slice(0, 40))}`
3159
+ );
3160
+ return;
3161
+ }
3162
+ this.lastCommitText = normalised;
3163
+ this.lastCommitAt = now;
3289
3164
  const label = this.deps.bridge.label;
3290
3165
  getLogger().info(`User (${label} pipeline): ${sanitizeLogValue(transcript.text)}`);
3291
3166
  this.metricsAcc.startTurn();
@@ -3640,10 +3515,11 @@ var init_stream_handler = __esm({
3640
3515
  this.maxDurationTimer = null;
3641
3516
  }
3642
3517
  await this.deps.bridge.queryTelephonyCost(this.metricsAcc, this.callId);
3643
- const deepgramKey = this.deps.agent.deepgramKey;
3644
- const deepgramRequestId = this.stt?.requestId;
3645
- if (deepgramKey && deepgramRequestId) {
3646
- await queryDeepgramCost(this.metricsAcc, deepgramKey, deepgramRequestId);
3518
+ if (this.stt instanceof DeepgramSTT && this.stt.requestId) {
3519
+ const dgKey = this.stt.apiKey;
3520
+ if (dgKey) {
3521
+ await queryDeepgramCost(this.metricsAcc, dgKey, this.stt.requestId);
3522
+ }
3647
3523
  }
3648
3524
  const finalMetrics = this.metricsAcc.endCall();
3649
3525
  const callEndData = {
@@ -3745,11 +3621,16 @@ function resolveVariables(template, variables) {
3745
3621
  return result;
3746
3622
  }
3747
3623
  function buildAIAdapter(config, agent, resolvedPrompt) {
3624
+ const engine = agent.engine;
3748
3625
  if (agent.provider === "elevenlabs_convai") {
3749
- const key = agent.elevenlabsKey ?? "";
3626
+ if (!engine || engine.kind !== "elevenlabs_convai") {
3627
+ throw new Error(
3628
+ "ElevenLabs ConvAI mode requires `agent.engine = new ElevenLabsConvAI({...})`."
3629
+ );
3630
+ }
3750
3631
  return new ElevenLabsConvAIAdapter(
3751
- key,
3752
- agent.elevenlabsAgentId ?? "",
3632
+ engine.apiKey,
3633
+ engine.agentId,
3753
3634
  agent.voice ?? "21m00Tcm4TlvDq8ikWAM",
3754
3635
  "eleven_turbo_v2_5",
3755
3636
  agent.language ?? "en",
@@ -3762,8 +3643,9 @@ function buildAIAdapter(config, agent, resolvedPrompt) {
3762
3643
  parameters: t.parameters
3763
3644
  })) ?? [];
3764
3645
  const tools = [...agentTools, TRANSFER_CALL_TOOL, END_CALL_TOOL];
3646
+ const openaiKey = engine && engine.kind === "openai_realtime" ? engine.apiKey : config.openaiKey ?? "";
3765
3647
  return new OpenAIRealtimeAdapter(
3766
- config.openaiKey ?? "",
3648
+ openaiKey,
3767
3649
  agent.model,
3768
3650
  agent.voice,
3769
3651
  resolvedPrompt ?? agent.systemPrompt,
@@ -3789,8 +3671,7 @@ var init_server = __esm({
3789
3671
  import_ws5 = require("ws");
3790
3672
  init_openai_realtime();
3791
3673
  init_elevenlabs_convai();
3792
- init_deepgram_stt();
3793
- init_whisper_stt();
3674
+ init_provider_factory();
3794
3675
  init_pricing();
3795
3676
  init_store();
3796
3677
  init_routes();
@@ -3875,16 +3756,7 @@ var init_server = __esm({
3875
3756
  }
3876
3757
  }
3877
3758
  createStt(agent) {
3878
- if (agent.stt) {
3879
- if (agent.stt.provider === "deepgram") {
3880
- return DeepgramSTT.forTwilio(agent.stt.apiKey, agent.stt.language ?? "en");
3881
- } else if (agent.stt.provider === "whisper") {
3882
- return WhisperSTT.forTwilio(agent.stt.apiKey, agent.stt.language ?? "en");
3883
- }
3884
- } else if (agent.deepgramKey) {
3885
- return DeepgramSTT.forTwilio(agent.deepgramKey, agent.language ?? "en");
3886
- }
3887
- return null;
3759
+ return createSTT(agent);
3888
3760
  }
3889
3761
  async queryTelephonyCost(metricsAcc, callId) {
3890
3762
  if (this.config.twilioSid && this.config.twilioToken && callId) {
@@ -3923,12 +3795,12 @@ var init_server = __esm({
3923
3795
  label = "Telnyx";
3924
3796
  telephonyProvider = "telnyx";
3925
3797
  sendAudio(ws, audioBase64, _streamSid) {
3926
- ws.send(JSON.stringify({ event_type: "media", payload: { audio: { chunk: audioBase64 } } }));
3798
+ ws.send(JSON.stringify({ event: "media", media: { payload: audioBase64 } }));
3927
3799
  }
3928
3800
  sendMark(_ws, _markName, _streamSid) {
3929
3801
  }
3930
3802
  sendClear(ws, _streamSid) {
3931
- ws.send(JSON.stringify({ event_type: "media_stop" }));
3803
+ ws.send(JSON.stringify({ event: "clear" }));
3932
3804
  }
3933
3805
  async transferCall(callId, toNumber) {
3934
3806
  if (!isValidTelnyxTransferTarget(toNumber)) {
@@ -4022,16 +3894,7 @@ var init_server = __esm({
4022
3894
  ws.close();
4023
3895
  }
4024
3896
  createStt(agent) {
4025
- if (agent.stt) {
4026
- if (agent.stt.provider === "deepgram") {
4027
- return new DeepgramSTT(agent.stt.apiKey, agent.stt.language ?? "en", "nova-3", "linear16", 16e3);
4028
- } else if (agent.stt.provider === "whisper") {
4029
- return new WhisperSTT(agent.stt.apiKey, "whisper-1", agent.stt.language ?? "en");
4030
- }
4031
- } else if (agent.deepgramKey) {
4032
- return new DeepgramSTT(agent.deepgramKey, agent.language ?? "en", "nova-3", "linear16", 16e3);
4033
- }
4034
- return null;
3897
+ return createSTT(agent);
4035
3898
  }
4036
3899
  async queryTelephonyCost(metricsAcc, callId) {
4037
3900
  if (this.config.telnyxKey && callId) {
@@ -4076,6 +3939,7 @@ var init_server = __esm({
4076
3939
  server = null;
4077
3940
  wss = null;
4078
3941
  twilioTokenWarningLogged = false;
3942
+ telnyxSigWarningLogged = false;
4079
3943
  metricsStore;
4080
3944
  pricing;
4081
3945
  remoteHandler = new RemoteMessageHandler();
@@ -4123,6 +3987,31 @@ var init_server = __esm({
4123
3987
  mountApi(app, this.metricsStore, this.dashboardToken);
4124
3988
  getLogger().info("Dashboard: http://127.0.0.1:" + port + "/");
4125
3989
  }
3990
+ app.post("/webhooks/twilio/status", (req, res) => {
3991
+ if (this.config.twilioToken) {
3992
+ const signature = req.headers["x-twilio-signature"] || "";
3993
+ const url = `https://${this.config.webhookUrl}${req.originalUrl}`;
3994
+ const params = req.body ?? {};
3995
+ if (!validateTwilioSignature(url, params, signature, this.config.twilioToken)) {
3996
+ res.status(403).send("Invalid signature");
3997
+ return;
3998
+ }
3999
+ }
4000
+ const body = req.body;
4001
+ const callSid = sanitizeLogValue(body["CallSid"] ?? "");
4002
+ const callStatus = sanitizeLogValue(body["CallStatus"] ?? "");
4003
+ const duration = body["CallDuration"] ?? body["Duration"] ?? "";
4004
+ getLogger().info(
4005
+ `Twilio status ${callStatus} for call ${callSid} (duration=${duration})`
4006
+ );
4007
+ if (callSid && callStatus) {
4008
+ const extra = {};
4009
+ const parsed = parseFloat(duration);
4010
+ if (!Number.isNaN(parsed)) extra.duration_seconds = parsed;
4011
+ this.metricsStore.updateCallStatus(callSid, callStatus, extra);
4012
+ }
4013
+ res.status(204).send();
4014
+ });
4126
4015
  app.post("/webhooks/twilio/recording", (req, res) => {
4127
4016
  if (this.config.twilioToken) {
4128
4017
  const signature = req.headers["x-twilio-signature"] || "";
@@ -4208,7 +4097,7 @@ var init_server = __esm({
4208
4097
  const twiml = `<?xml version="1.0" encoding="UTF-8"?><Response><Connect><Stream url="${xmlStreamUrl}"><Parameter name="caller" value="${xmlEscape(caller)}"/><Parameter name="callee" value="${xmlEscape(callee)}"/></Stream></Connect></Response>`;
4209
4098
  res.type("text/xml").send(twiml);
4210
4099
  });
4211
- app.post("/webhooks/telnyx/voice", (req, res) => {
4100
+ app.post("/webhooks/telnyx/voice", async (req, res) => {
4212
4101
  if (this.config.telnyxPublicKey) {
4213
4102
  const rawBody = req.rawBody ?? "";
4214
4103
  const signature = req.headers["telnyx-signature-ed25519"] ?? "";
@@ -4217,7 +4106,8 @@ var init_server = __esm({
4217
4106
  getLogger().warn("Telnyx webhook rejected: invalid or missing Ed25519 signature");
4218
4107
  return res.status(403).send("Invalid signature");
4219
4108
  }
4220
- } else {
4109
+ } else if (!this.telnyxSigWarningLogged) {
4110
+ this.telnyxSigWarningLogged = true;
4221
4111
  getLogger().warn("Telnyx webhook signature verification is disabled. Set telnyxPublicKey in LocalOptions for production use.");
4222
4112
  }
4223
4113
  const body = req.body;
@@ -4227,41 +4117,77 @@ var init_server = __esm({
4227
4117
  if (typeof body.data.event_type !== "string" || typeof body.data.payload !== "object" || body.data.payload === null) {
4228
4118
  return res.status(400).send("Invalid body");
4229
4119
  }
4230
- const eventType = body?.data?.event_type ?? "";
4120
+ const eventType = body.data.event_type ?? "";
4121
+ const payload = body.data.payload ?? {};
4231
4122
  if (eventType === "call.dtmf.received") {
4232
- const digit = String(body.data?.payload?.digit ?? "").trim();
4123
+ const digit = String(payload.digit ?? "").trim();
4233
4124
  if (digit) {
4234
4125
  getLogger().info(`Telnyx DTMF received (webhook): ${sanitizeLogValue(digit)}`);
4235
4126
  }
4236
- return res.json({ received: true });
4127
+ return res.status(200).send();
4237
4128
  }
4238
4129
  if (eventType === "call.recording.saved") {
4239
- const recordingUrl = body.data?.payload?.recording_urls?.mp3 ?? body.data?.payload?.recording_urls?.wav ?? body.data?.payload?.public_recording_urls?.mp3 ?? "";
4130
+ const recordingUrl = payload.recording_urls?.mp3 ?? payload.recording_urls?.wav ?? payload.public_recording_urls?.mp3 ?? "";
4240
4131
  if (recordingUrl) {
4241
4132
  getLogger().info(`Telnyx recording saved (webhook): ${sanitizeLogValue(recordingUrl)}`);
4242
4133
  }
4243
- return res.json({ received: true });
4244
- }
4245
- if (eventType === "call.initiated") {
4246
- const payload = body?.data?.payload ?? {};
4247
- const callControlId = payload.call_control_id ?? "";
4248
- const caller = payload.from ?? "";
4249
- const callee = payload.to ?? "";
4250
- const streamUrl = `wss://${this.config.webhookUrl}/ws/stream/${encodeURIComponent(callControlId)}?caller=${encodeURIComponent(caller)}&callee=${encodeURIComponent(callee)}`;
4251
- const commands = [
4252
- { command: "answer" },
4253
- {
4254
- command: "stream_start",
4255
- params: {
4134
+ return res.status(200).send();
4135
+ }
4136
+ const callControlId = payload.call_control_id ?? "";
4137
+ if (!callControlId) {
4138
+ getLogger().warn("Telnyx webhook rejected: missing call_control_id");
4139
+ return res.status(400).send("Invalid webhook payload");
4140
+ }
4141
+ const apiKey = this.config.telnyxKey;
4142
+ if (!apiKey) {
4143
+ getLogger().warn("Telnyx webhook: missing telnyxKey in LocalOptions");
4144
+ return res.status(500).send("Missing Telnyx API key");
4145
+ }
4146
+ const apiBase = "https://api.telnyx.com/v2";
4147
+ const authHeaders = {
4148
+ "Content-Type": "application/json",
4149
+ Authorization: `Bearer ${apiKey}`
4150
+ };
4151
+ try {
4152
+ if (eventType === "call.initiated") {
4153
+ getLogger().info(`Telnyx call.initiated ${callControlId} \u2014 answering`);
4154
+ const resp = await fetch(`${apiBase}/calls/${encodeURIComponent(callControlId)}/actions/answer`, {
4155
+ method: "POST",
4156
+ headers: authHeaders,
4157
+ body: JSON.stringify({}),
4158
+ signal: AbortSignal.timeout(1e4)
4159
+ });
4160
+ if (!resp.ok) {
4161
+ getLogger().warn(`Telnyx answer failed: ${resp.status} ${(await resp.text()).slice(0, 200)}`);
4162
+ }
4163
+ } else if (eventType === "call.answered") {
4164
+ const caller = payload.from ?? "";
4165
+ const callee = payload.to ?? "";
4166
+ const streamUrl = `wss://${this.config.webhookUrl}/ws/stream/${encodeURIComponent(callControlId)}?caller=${encodeURIComponent(caller)}&callee=${encodeURIComponent(callee)}`;
4167
+ getLogger().info(`Telnyx call.answered ${callControlId} \u2014 starting stream`);
4168
+ const resp = await fetch(`${apiBase}/calls/${encodeURIComponent(callControlId)}/actions/streaming_start`, {
4169
+ method: "POST",
4170
+ headers: authHeaders,
4171
+ body: JSON.stringify({
4256
4172
  stream_url: streamUrl,
4257
- stream_track: "both_tracks"
4258
- }
4173
+ stream_track: "both_tracks",
4174
+ stream_bidirectional_mode: "rtp",
4175
+ stream_bidirectional_codec: "PCMU",
4176
+ stream_bidirectional_sampling_rate: 8e3,
4177
+ stream_bidirectional_target_legs: "self"
4178
+ }),
4179
+ signal: AbortSignal.timeout(1e4)
4180
+ });
4181
+ if (!resp.ok) {
4182
+ getLogger().warn(`Telnyx streaming_start failed: ${resp.status} ${(await resp.text()).slice(0, 200)}`);
4259
4183
  }
4260
- ];
4261
- res.json({ commands });
4262
- } else {
4263
- res.json({ received: true });
4184
+ } else {
4185
+ getLogger().debug(`Telnyx event ignored: ${eventType}`);
4186
+ }
4187
+ } catch (e) {
4188
+ getLogger().error(`Telnyx webhook handler error: ${String(e)}`);
4264
4189
  }
4190
+ return res.status(200).send();
4265
4191
  });
4266
4192
  this.server = (0, import_http.createServer)(app);
4267
4193
  this.wss = new import_ws5.WebSocketServer({ noServer: true });
@@ -4408,11 +4334,12 @@ Connect AI agents to phone numbers in 4 lines of code
4408
4334
  getLogger().error("Failed to parse Telnyx WS message:", e);
4409
4335
  return;
4410
4336
  }
4411
- const eventType = data.event_type ?? "";
4412
- getLogger().info(`Telnyx event: ${eventType}`);
4413
- if (eventType === "stream_started" && !streamStarted) {
4337
+ const event = data.event ?? "";
4338
+ if (event === "connected") return;
4339
+ getLogger().info(`Telnyx event: ${event}`);
4340
+ if (event === "start" && !streamStarted) {
4414
4341
  streamStarted = true;
4415
- const callControlId = data.payload?.call_control_id ?? "";
4342
+ const callControlId = data.start?.call_control_id ?? "";
4416
4343
  if (callControlId) this.activeCallIds.set(ws, callControlId);
4417
4344
  await handler.handleCallStart(callControlId);
4418
4345
  if (this.recording) {
@@ -4422,22 +4349,21 @@ Connect AI agents to phone numbers in 4 lines of code
4422
4349
  getLogger().warn(`Could not start recording: ${String(e)}`);
4423
4350
  }
4424
4351
  }
4425
- } else if (eventType === "media") {
4426
- const audioChunk = data.payload?.audio?.chunk ?? "";
4352
+ } else if (event === "media") {
4353
+ const track = data.media?.track ?? "inbound";
4354
+ if (track !== "inbound") return;
4355
+ const audioChunk = data.media?.payload ?? "";
4427
4356
  if (!audioChunk) return;
4428
4357
  handler.handleAudio(Buffer.from(audioChunk, "base64"));
4429
- } else if (eventType === "call.dtmf.received") {
4430
- const digit = String(data.payload?.digit ?? "").trim();
4358
+ } else if (event === "dtmf") {
4359
+ const digit = String(data.dtmf?.digit ?? "").trim();
4431
4360
  if (digit) {
4432
4361
  getLogger().info(`Telnyx DTMF received: ${digit}`);
4433
4362
  await handler.handleDtmf(digit);
4434
4363
  }
4435
- } else if (eventType === "call.recording.saved") {
4436
- const recordingUrl = data.payload?.recording_urls?.mp3 ?? data.payload?.recording_urls?.wav ?? data.payload?.public_recording_urls?.mp3 ?? "";
4437
- if (recordingUrl) {
4438
- getLogger().info(`Telnyx recording saved: ${recordingUrl}`);
4439
- }
4440
- } else if (eventType === "stream_stopped") {
4364
+ } else if (event === "error") {
4365
+ getLogger().warn(`Telnyx stream error: ${JSON.stringify(data)}`);
4366
+ } else if (event === "stop") {
4441
4367
  await handler.handleStop();
4442
4368
  }
4443
4369
  } catch (err) {
@@ -5407,6 +5333,94 @@ var init_tunnel = __esm({
5407
5333
  }
5408
5334
  });
5409
5335
 
5336
+ // src/carrier-config.ts
5337
+ var carrier_config_exports = {};
5338
+ __export(carrier_config_exports, {
5339
+ autoConfigureCarrier: () => autoConfigureCarrier,
5340
+ configureTelnyxNumber: () => configureTelnyxNumber,
5341
+ configureTwilioNumber: () => configureTwilioNumber
5342
+ });
5343
+ async function configureTwilioNumber(accountSid, authToken, phoneNumber, voiceUrl) {
5344
+ const auth = `Basic ${Buffer.from(`${accountSid}:${authToken}`).toString("base64")}`;
5345
+ const listUrl = `${TWILIO_API_BASE}/Accounts/${accountSid}/IncomingPhoneNumbers.json?PhoneNumber=${encodeURIComponent(phoneNumber)}`;
5346
+ const listResp = await fetch(listUrl, {
5347
+ method: "GET",
5348
+ headers: { Authorization: auth }
5349
+ });
5350
+ if (!listResp.ok) {
5351
+ throw new Error(
5352
+ `Twilio IncomingPhoneNumbers.list failed: ${listResp.status} ${await listResp.text()}`
5353
+ );
5354
+ }
5355
+ const body = await listResp.json();
5356
+ const match = body.incoming_phone_numbers?.[0];
5357
+ if (!match) {
5358
+ throw new Error(`Twilio number ${phoneNumber} not found on account ${accountSid}`);
5359
+ }
5360
+ const updateUrl = `${TWILIO_API_BASE}/Accounts/${accountSid}/IncomingPhoneNumbers/${match.sid}.json`;
5361
+ const form = new URLSearchParams({ VoiceUrl: voiceUrl, VoiceMethod: "POST" });
5362
+ const updateResp = await fetch(updateUrl, {
5363
+ method: "POST",
5364
+ headers: {
5365
+ Authorization: auth,
5366
+ "Content-Type": "application/x-www-form-urlencoded"
5367
+ },
5368
+ body: form.toString()
5369
+ });
5370
+ if (!updateResp.ok) {
5371
+ throw new Error(
5372
+ `Twilio IncomingPhoneNumbers.update failed: ${updateResp.status} ${await updateResp.text()}`
5373
+ );
5374
+ }
5375
+ }
5376
+ async function configureTelnyxNumber(apiKey, connectionId, phoneNumber) {
5377
+ const resp = await fetch(`${TELNYX_API_BASE}/phone_numbers/${encodeURIComponent(phoneNumber)}`, {
5378
+ method: "PATCH",
5379
+ headers: {
5380
+ Authorization: `Bearer ${apiKey}`,
5381
+ "Content-Type": "application/json"
5382
+ },
5383
+ body: JSON.stringify({ connection_id: connectionId })
5384
+ });
5385
+ if (!resp.ok) {
5386
+ throw new Error(
5387
+ `Telnyx PATCH /phone_numbers/${phoneNumber} failed: ${resp.status} ${await resp.text()}`
5388
+ );
5389
+ }
5390
+ }
5391
+ async function autoConfigureCarrier(params) {
5392
+ const log2 = getLogger();
5393
+ const provider = params.telephonyProvider ?? (params.twilioSid ? "twilio" : "telnyx");
5394
+ if (provider === "twilio" && params.twilioSid && params.twilioToken) {
5395
+ const voiceUrl = `https://${params.webhookHost}/webhooks/twilio/voice`;
5396
+ try {
5397
+ await configureTwilioNumber(params.twilioSid, params.twilioToken, params.phoneNumber, voiceUrl);
5398
+ log2.info("Twilio webhook set to %s", voiceUrl);
5399
+ } catch (err) {
5400
+ log2.warn("Could not auto-configure Twilio webhook: %s", err instanceof Error ? err.message : String(err));
5401
+ log2.info("Set webhook manually to: %s", voiceUrl);
5402
+ }
5403
+ return;
5404
+ }
5405
+ if (provider === "telnyx" && params.telnyxKey && params.telnyxConnectionId) {
5406
+ try {
5407
+ await configureTelnyxNumber(params.telnyxKey, params.telnyxConnectionId, params.phoneNumber);
5408
+ log2.info("Telnyx number %s associated with connection %s", params.phoneNumber, params.telnyxConnectionId);
5409
+ } catch (err) {
5410
+ log2.warn("Could not auto-configure Telnyx number: %s", err instanceof Error ? err.message : String(err));
5411
+ }
5412
+ }
5413
+ }
5414
+ var TWILIO_API_BASE, TELNYX_API_BASE;
5415
+ var init_carrier_config = __esm({
5416
+ "src/carrier-config.ts"() {
5417
+ "use strict";
5418
+ init_logger();
5419
+ TWILIO_API_BASE = "https://api.twilio.com/2010-04-01";
5420
+ TELNYX_API_BASE = "https://api.telnyx.com/v2";
5421
+ }
5422
+ });
5423
+
5410
5424
  // src/test-mode.ts
5411
5425
  var test_mode_exports = {};
5412
5426
  __export(test_mode_exports, {
@@ -6521,31 +6535,35 @@ var require_node_cron = __commonJS({
6521
6535
  var index_exports = {};
6522
6536
  __export(index_exports, {
6523
6537
  AllProvidersFailedError: () => AllProvidersFailedError,
6524
- AssemblyAISTT: () => AssemblyAISTT,
6538
+ AssemblyAISTT: () => STT5,
6525
6539
  AuthenticationError: () => AuthenticationError,
6526
6540
  BackgroundAudioPlayer: () => BackgroundAudioPlayer,
6527
6541
  BuiltinAudioClip: () => BuiltinAudioClip,
6528
6542
  CallMetricsAccumulator: () => CallMetricsAccumulator,
6529
- CartesiaSTT: () => CartesiaSTT,
6530
- CartesiaTTS: () => CartesiaTTS,
6543
+ CartesiaSTT: () => STT3,
6544
+ CartesiaTTS: () => TTS3,
6531
6545
  ChatContext: () => ChatContext,
6546
+ CloudflareTunnel: () => CloudflareTunnel,
6532
6547
  DEFAULT_MIN_SENTENCE_LEN: () => DEFAULT_MIN_SENTENCE_LEN,
6533
6548
  DEFAULT_PRICING: () => DEFAULT_PRICING,
6534
6549
  DTMF_EVENTS: () => DTMF_EVENTS,
6535
- DeepgramSTT: () => DeepgramSTT,
6550
+ DeepgramSTT: () => STT,
6551
+ ElevenLabsConvAI: () => ConvAI,
6536
6552
  ElevenLabsConvAIAdapter: () => ElevenLabsConvAIAdapter,
6537
- ElevenLabsTTS: () => ElevenLabsTTS,
6553
+ ElevenLabsTTS: () => TTS,
6538
6554
  FallbackLLMProvider: () => FallbackLLMProvider,
6539
6555
  GEMINI_DEFAULT_INPUT_SR: () => GEMINI_DEFAULT_INPUT_SR,
6540
6556
  GEMINI_DEFAULT_OUTPUT_SR: () => GEMINI_DEFAULT_OUTPUT_SR,
6541
6557
  GeminiLiveAdapter: () => GeminiLiveAdapter,
6558
+ Guardrail: () => Guardrail,
6542
6559
  IVRActivity: () => IVRActivity,
6543
6560
  LLMLoop: () => LLMLoop,
6544
- LMNTTTS: () => LMNTTTS,
6561
+ LMNTTTS: () => TTS5,
6545
6562
  MetricsStore: () => MetricsStore,
6546
6563
  OpenAILLMProvider: () => OpenAILLMProvider,
6564
+ OpenAIRealtime: () => Realtime,
6547
6565
  OpenAIRealtimeAdapter: () => OpenAIRealtimeAdapter,
6548
- OpenAITTS: () => OpenAITTS,
6566
+ OpenAITTS: () => TTS2,
6549
6567
  PartialStreamError: () => PartialStreamError,
6550
6568
  Patter: () => Patter,
6551
6569
  PatterConnectionError: () => PatterConnectionError,
@@ -6553,15 +6571,19 @@ __export(index_exports, {
6553
6571
  PipelineHookExecutor: () => PipelineHookExecutor,
6554
6572
  ProvisionError: () => ProvisionError,
6555
6573
  RemoteMessageHandler: () => RemoteMessageHandler,
6556
- RimeTTS: () => RimeTTS,
6574
+ RimeTTS: () => TTS4,
6557
6575
  SentenceChunker: () => SentenceChunker,
6558
- SonioxSTT: () => SonioxSTT,
6576
+ SonioxSTT: () => STT4,
6577
+ StaticTunnel: () => Static,
6578
+ Telnyx: () => Carrier2,
6559
6579
  TestSession: () => TestSession,
6560
6580
  TfidfLoopDetector: () => TfidfLoopDetector,
6581
+ Tool: () => Tool,
6582
+ Twilio: () => Carrier,
6561
6583
  ULTRAVOX_DEFAULT_API_BASE: () => ULTRAVOX_DEFAULT_API_BASE,
6562
6584
  ULTRAVOX_DEFAULT_SR: () => ULTRAVOX_DEFAULT_SR,
6563
6585
  UltravoxRealtimeAdapter: () => UltravoxRealtimeAdapter,
6564
- WhisperSTT: () => WhisperSTT,
6586
+ WhisperSTT: () => STT2,
6565
6587
  builtinClipPath: () => builtinClipPath,
6566
6588
  calculateRealtimeCost: () => calculateRealtimeCost,
6567
6589
  calculateSttCost: () => calculateSttCost,
@@ -6577,6 +6599,7 @@ __export(index_exports, {
6577
6599
  filterMarkdown: () => filterMarkdown,
6578
6600
  formatDtmf: () => formatDtmf,
6579
6601
  getLogger: () => getLogger,
6602
+ guardrail: () => guardrail,
6580
6603
  isRemoteUrl: () => isRemoteUrl,
6581
6604
  isWebSocketUrl: () => isWebSocketUrl,
6582
6605
  makeAuthMiddleware: () => makeAuthMiddleware,
@@ -6598,6 +6621,7 @@ __export(index_exports, {
6598
6621
  selectSoundFromList: () => selectSoundFromList,
6599
6622
  setLogger: () => setLogger,
6600
6623
  startTunnel: () => startTunnel,
6624
+ tool: () => tool,
6601
6625
  whisper: () => whisper
6602
6626
  });
6603
6627
  module.exports = __toCommonJS(index_exports);
@@ -6743,50 +6767,89 @@ var PatterConnection = class {
6743
6767
  }
6744
6768
  };
6745
6769
 
6746
- // src/providers.ts
6747
- var STTConfigImpl = class {
6748
- provider;
6770
+ // src/client.ts
6771
+ init_server();
6772
+
6773
+ // src/engines/openai.ts
6774
+ var Realtime = class {
6775
+ kind = "openai_realtime";
6749
6776
  apiKey;
6750
- language;
6751
- constructor(provider, apiKey, language = "en") {
6752
- this.provider = provider;
6753
- this.apiKey = apiKey;
6754
- this.language = language;
6755
- }
6756
- toDict() {
6757
- return { provider: this.provider, api_key: this.apiKey, language: this.language };
6777
+ model;
6778
+ voice;
6779
+ constructor(opts = {}) {
6780
+ const key = opts.apiKey ?? process.env.OPENAI_API_KEY;
6781
+ if (!key) {
6782
+ throw new Error(
6783
+ "OpenAI Realtime requires an apiKey. Pass { apiKey: 'sk-...' } or set OPENAI_API_KEY in the environment."
6784
+ );
6785
+ }
6786
+ this.apiKey = key;
6787
+ this.model = opts.model ?? "gpt-4o-mini-realtime-preview";
6788
+ this.voice = opts.voice ?? "alloy";
6758
6789
  }
6759
6790
  };
6760
- var TTSConfigImpl = class {
6761
- provider;
6791
+
6792
+ // src/engines/elevenlabs.ts
6793
+ var ConvAI = class {
6794
+ kind = "elevenlabs_convai";
6762
6795
  apiKey;
6796
+ agentId;
6763
6797
  voice;
6764
- constructor(provider, apiKey, voice = "alloy") {
6765
- this.provider = provider;
6766
- this.apiKey = apiKey;
6767
- this.voice = voice;
6798
+ constructor(opts = {}) {
6799
+ const key = opts.apiKey ?? process.env.ELEVENLABS_API_KEY;
6800
+ const agent = opts.agentId ?? process.env.ELEVENLABS_AGENT_ID;
6801
+ if (!key) {
6802
+ throw new Error(
6803
+ "ElevenLabs ConvAI requires an apiKey. Pass { apiKey: '...' } or set ELEVENLABS_API_KEY in the environment."
6804
+ );
6805
+ }
6806
+ if (!agent) {
6807
+ throw new Error(
6808
+ "ElevenLabs ConvAI requires an agentId. Pass { agentId: 'agent_...' } or set ELEVENLABS_AGENT_ID in the environment."
6809
+ );
6810
+ }
6811
+ this.apiKey = key;
6812
+ this.agentId = agent;
6813
+ this.voice = opts.voice;
6768
6814
  }
6769
- toDict() {
6770
- return { provider: this.provider, api_key: this.apiKey, voice: this.voice };
6815
+ };
6816
+
6817
+ // src/tunnels/index.ts
6818
+ var CloudflareTunnel = class {
6819
+ kind = "cloudflare";
6820
+ };
6821
+ var Static = class {
6822
+ kind = "static";
6823
+ hostname;
6824
+ constructor(opts) {
6825
+ if (!opts.hostname) {
6826
+ throw new Error("Static tunnel requires a non-empty hostname.");
6827
+ }
6828
+ this.hostname = opts.hostname;
6771
6829
  }
6772
6830
  };
6773
- function deepgram(opts) {
6774
- return new STTConfigImpl("deepgram", opts.apiKey, opts.language ?? "en");
6775
- }
6776
- function whisper(opts) {
6777
- return new STTConfigImpl("whisper", opts.apiKey, opts.language ?? "en");
6778
- }
6779
- function elevenlabs(opts) {
6780
- return new TTSConfigImpl("elevenlabs", opts.apiKey, opts.voice ?? "rachel");
6781
- }
6782
- function openaiTts(opts) {
6783
- return new TTSConfigImpl("openai", opts.apiKey, opts.voice ?? "alloy");
6784
- }
6785
6831
 
6786
6832
  // src/client.ts
6787
- init_server();
6788
6833
  var DEFAULT_BACKEND_URL2 = "wss://api.getpatter.com";
6789
6834
  var DEFAULT_REST_URL = "https://api.getpatter.com";
6835
+ function sttConfigToDict(cfg) {
6836
+ const out = {
6837
+ provider: cfg.provider,
6838
+ api_key: cfg.apiKey,
6839
+ language: cfg.language
6840
+ };
6841
+ if (cfg.options) out.options = { ...cfg.options };
6842
+ return out;
6843
+ }
6844
+ function ttsConfigToDict(cfg) {
6845
+ const out = {
6846
+ provider: cfg.provider,
6847
+ api_key: cfg.apiKey,
6848
+ voice: cfg.voice
6849
+ };
6850
+ if (cfg.options) out.options = { ...cfg.options };
6851
+ return out;
6852
+ }
6790
6853
  var Patter = class {
6791
6854
  apiKey;
6792
6855
  backendUrl;
@@ -6797,20 +6860,39 @@ var Patter = class {
6797
6860
  embeddedServer = null;
6798
6861
  tunnelHandle = null;
6799
6862
  constructor(options) {
6800
- if ("mode" in options && options.mode === "local") {
6863
+ const hasCarrier = "carrier" in options && options.carrier !== void 0;
6864
+ const isLocal = "mode" in options && options.mode === "local" || hasCarrier;
6865
+ if (isLocal) {
6801
6866
  const local = options;
6802
6867
  if (!local.phoneNumber) {
6803
6868
  throw new Error("Local mode requires phoneNumber");
6804
6869
  }
6805
- if (!local.twilioSid && !local.telnyxKey) {
6806
- throw new Error("Local mode requires twilioSid or telnyxKey");
6870
+ if (!local.carrier) {
6871
+ throw new Error(
6872
+ "Local mode requires a `carrier` instance. Pass `carrier: new Twilio({...})` or `carrier: new Telnyx({...})`."
6873
+ );
6807
6874
  }
6808
- if (local.twilioSid && !local.twilioToken) {
6809
- throw new Error("twilioToken is required when using twilioSid");
6875
+ const carrier = local.carrier;
6876
+ const tunnel = local.tunnel;
6877
+ let tunnelWebhookUrl;
6878
+ if (tunnel instanceof Static) {
6879
+ if (local.webhookUrl) {
6880
+ throw new Error(
6881
+ "Cannot use both `tunnel: new StaticTunnel(...)` and `webhookUrl`. Pick one."
6882
+ );
6883
+ }
6884
+ tunnelWebhookUrl = tunnel.hostname;
6810
6885
  }
6811
6886
  this.mode = "local";
6812
- const normalizedLocal = local.webhookUrl ? { ...local, webhookUrl: local.webhookUrl.replace(/^https?:\/\//, "").replace(/\/$/, "") } : local;
6813
- this.localConfig = normalizedLocal;
6887
+ const rawWebhook = tunnelWebhookUrl ?? local.webhookUrl;
6888
+ const normalizedWebhook = rawWebhook ? rawWebhook.replace(/^https?:\/\//, "").replace(/\/$/, "") : void 0;
6889
+ this.localConfig = {
6890
+ carrier,
6891
+ phoneNumber: local.phoneNumber,
6892
+ webhookUrl: normalizedWebhook,
6893
+ tunnel: local.tunnel,
6894
+ openaiKey: local.openaiKey
6895
+ };
6814
6896
  this.apiKey = "";
6815
6897
  this.backendUrl = DEFAULT_BACKEND_URL2;
6816
6898
  this.restUrl = DEFAULT_REST_URL;
@@ -6827,25 +6909,55 @@ var Patter = class {
6827
6909
  }
6828
6910
  // === Local mode ===
6829
6911
  agent(opts) {
6830
- if (opts.provider) {
6912
+ let working = { ...opts };
6913
+ if (opts.engine) {
6914
+ if (opts.provider) {
6915
+ throw new Error(
6916
+ "Cannot pass both `engine:` and `provider:`. Use one (engine is preferred)."
6917
+ );
6918
+ }
6919
+ const engine = opts.engine;
6920
+ if (engine instanceof Realtime) {
6921
+ working = {
6922
+ ...working,
6923
+ provider: "openai_realtime",
6924
+ model: working.model ?? engine.model,
6925
+ voice: working.voice ?? engine.voice
6926
+ };
6927
+ if (this.localConfig && !this.localConfig.openaiKey) {
6928
+ this.localConfig = { ...this.localConfig, openaiKey: engine.apiKey };
6929
+ }
6930
+ } else if (engine instanceof ConvAI) {
6931
+ working = {
6932
+ ...working,
6933
+ provider: "elevenlabs_convai",
6934
+ voice: working.voice ?? engine.voice
6935
+ };
6936
+ } else {
6937
+ throw new Error(
6938
+ "Unknown engine. Expected OpenAIRealtime or ElevenLabsConvAI instance."
6939
+ );
6940
+ }
6941
+ }
6942
+ if (working.provider) {
6831
6943
  const valid = ["openai_realtime", "elevenlabs_convai", "pipeline"];
6832
- if (!valid.includes(opts.provider)) {
6833
- throw new Error(`provider must be one of: ${valid.join(", ")}. Got: '${opts.provider}'`);
6944
+ if (!valid.includes(working.provider)) {
6945
+ throw new Error(`provider must be one of: ${valid.join(", ")}. Got: '${working.provider}'`);
6834
6946
  }
6835
6947
  }
6836
- if (opts.tools) {
6837
- if (!Array.isArray(opts.tools)) {
6948
+ if (working.tools) {
6949
+ if (!Array.isArray(working.tools)) {
6838
6950
  throw new TypeError("tools must be an array");
6839
6951
  }
6840
- opts.tools.forEach((tool, i) => {
6841
- if (!tool.name) throw new Error(`tools[${i}] missing required 'name' field`);
6842
- if (!tool.webhookUrl && !tool.handler) throw new Error(`tools[${i}] requires either 'webhookUrl' or 'handler'`);
6952
+ working.tools.forEach((tool2, i) => {
6953
+ if (!tool2.name) throw new Error(`tools[${i}] missing required 'name' field`);
6954
+ if (!tool2.webhookUrl && !tool2.handler) throw new Error(`tools[${i}] requires either 'webhookUrl' or 'handler'`);
6843
6955
  });
6844
6956
  }
6845
- if (opts.variables !== void 0 && (typeof opts.variables !== "object" || Array.isArray(opts.variables))) {
6957
+ if (working.variables !== void 0 && (typeof working.variables !== "object" || Array.isArray(working.variables))) {
6846
6958
  throw new TypeError("variables must be an object");
6847
6959
  }
6848
- return { ...opts };
6960
+ return working;
6849
6961
  }
6850
6962
  async serve(opts) {
6851
6963
  if (this.mode !== "local" || !this.localConfig) {
@@ -6868,10 +6980,14 @@ var Patter = class {
6868
6980
  }
6869
6981
  let webhookUrl = this.localConfig.webhookUrl ?? "";
6870
6982
  const port = opts.port ?? 8e3;
6871
- if (opts.tunnel && webhookUrl) {
6983
+ const ctorTunnel = this.localConfig.tunnel;
6984
+ const wantsCloudflaredFromServe = opts.tunnel === true;
6985
+ const wantsCloudflaredFromCtor = ctorTunnel === true || ctorTunnel instanceof CloudflareTunnel;
6986
+ const wantsCloudflared = wantsCloudflaredFromServe || wantsCloudflaredFromCtor;
6987
+ if (wantsCloudflared && webhookUrl) {
6872
6988
  throw new Error("Cannot use both tunnel: true and webhookUrl. Pick one.");
6873
6989
  }
6874
- if (opts.tunnel) {
6990
+ if (wantsCloudflared) {
6875
6991
  const { startTunnel: startTunnel2 } = await Promise.resolve().then(() => (init_tunnel(), tunnel_exports));
6876
6992
  this.tunnelHandle = await startTunnel2(port);
6877
6993
  webhookUrl = this.tunnelHandle.hostname;
@@ -6881,17 +6997,29 @@ var Patter = class {
6881
6997
  "No webhookUrl configured. Either:\n - Pass webhookUrl in the Patter constructor\n - Use tunnel: true in serve() to auto-create a tunnel"
6882
6998
  );
6883
6999
  }
7000
+ const carrier = this.localConfig.carrier;
7001
+ const telephonyProvider = carrier.kind === "twilio" ? "twilio" : "telnyx";
7002
+ const { autoConfigureCarrier: autoConfigureCarrier2 } = await Promise.resolve().then(() => (init_carrier_config(), carrier_config_exports));
7003
+ await autoConfigureCarrier2({
7004
+ telephonyProvider,
7005
+ twilioSid: carrier.kind === "twilio" ? carrier.accountSid : void 0,
7006
+ twilioToken: carrier.kind === "twilio" ? carrier.authToken : void 0,
7007
+ telnyxKey: carrier.kind === "telnyx" ? carrier.apiKey : void 0,
7008
+ telnyxConnectionId: carrier.kind === "telnyx" ? carrier.connectionId : void 0,
7009
+ phoneNumber: this.localConfig.phoneNumber,
7010
+ webhookHost: webhookUrl
7011
+ });
6884
7012
  this.embeddedServer = new EmbeddedServer(
6885
7013
  {
6886
- twilioSid: this.localConfig.twilioSid,
6887
- twilioToken: this.localConfig.twilioToken,
7014
+ twilioSid: carrier.kind === "twilio" ? carrier.accountSid : void 0,
7015
+ twilioToken: carrier.kind === "twilio" ? carrier.authToken : void 0,
6888
7016
  openaiKey: this.localConfig.openaiKey,
6889
7017
  phoneNumber: this.localConfig.phoneNumber,
6890
7018
  webhookUrl,
6891
- telephonyProvider: this.localConfig.telephonyProvider,
6892
- telnyxKey: this.localConfig.telnyxKey,
6893
- telnyxConnectionId: this.localConfig.telnyxConnectionId,
6894
- telnyxPublicKey: this.localConfig.telnyxPublicKey
7019
+ telephonyProvider,
7020
+ telnyxKey: carrier.kind === "telnyx" ? carrier.apiKey : void 0,
7021
+ telnyxConnectionId: carrier.kind === "telnyx" ? carrier.connectionId : void 0,
7022
+ telnyxPublicKey: carrier.kind === "telnyx" ? carrier.publicKey : void 0
6895
7023
  },
6896
7024
  opts.agent,
6897
7025
  opts.onCallStart,
@@ -6952,32 +7080,51 @@ var Patter = class {
6952
7080
  if (!this.localConfig) {
6953
7081
  throw new Error("local config missing");
6954
7082
  }
6955
- const { phoneNumber, webhookUrl, telephonyProvider } = this.localConfig;
6956
- if (telephonyProvider === "telnyx") {
6957
- const telnyxKey = this.localConfig.telnyxKey ?? "";
6958
- const connectionId = this.localConfig.telnyxConnectionId ?? "";
7083
+ const { phoneNumber, webhookUrl, carrier } = this.localConfig;
7084
+ if (carrier.kind === "telnyx") {
7085
+ const telnyxKey = carrier.apiKey;
7086
+ const connectionId = carrier.connectionId;
6959
7087
  const streamUrl = `wss://${webhookUrl}/ws/stream/${encodeURIComponent(localOpts.to)}?caller=${encodeURIComponent(phoneNumber)}&callee=${encodeURIComponent(localOpts.to)}`;
7088
+ const telnyxPayload = {
7089
+ connection_id: connectionId,
7090
+ from: phoneNumber,
7091
+ to: localOpts.to,
7092
+ stream_url: streamUrl,
7093
+ stream_track: "both_tracks"
7094
+ };
7095
+ if (localOpts.ringTimeout !== void 0) {
7096
+ telnyxPayload.timeout_secs = Math.max(1, Math.floor(localOpts.ringTimeout));
7097
+ }
6960
7098
  const response2 = await fetch("https://api.telnyx.com/v2/calls", {
6961
7099
  method: "POST",
6962
7100
  headers: {
6963
7101
  "Content-Type": "application/json",
6964
7102
  Authorization: `Bearer ${telnyxKey}`
6965
7103
  },
6966
- body: JSON.stringify({
6967
- connection_id: connectionId,
6968
- from: phoneNumber,
6969
- to: localOpts.to,
6970
- stream_url: streamUrl,
6971
- stream_track: "both_tracks"
6972
- })
7104
+ body: JSON.stringify(telnyxPayload)
6973
7105
  });
6974
7106
  if (!response2.ok) {
6975
7107
  throw new ProvisionError(`Failed to initiate Telnyx call: ${await response2.text()}`);
6976
7108
  }
6977
- return;
6978
- }
6979
- const twilioSid = this.localConfig.twilioSid ?? "";
6980
- const twilioToken = this.localConfig.twilioToken ?? "";
7109
+ if (this.embeddedServer) {
7110
+ try {
7111
+ const body = await response2.clone().json();
7112
+ const callId = body.data?.call_control_id;
7113
+ if (callId) {
7114
+ this.embeddedServer.metricsStore.recordCallInitiated({
7115
+ call_id: callId,
7116
+ caller: phoneNumber,
7117
+ callee: localOpts.to,
7118
+ direction: "outbound"
7119
+ });
7120
+ }
7121
+ } catch {
7122
+ }
7123
+ }
7124
+ return;
7125
+ }
7126
+ const twilioSid = carrier.accountSid;
7127
+ const twilioToken = carrier.authToken;
6981
7128
  const statusCallbackUrl = `https://${webhookUrl}/webhooks/twilio/status`;
6982
7129
  const url = `https://api.twilio.com/2010-04-01/Accounts/${twilioSid}/Calls.json`;
6983
7130
  const params = new URLSearchParams({
@@ -6985,13 +7132,19 @@ var Patter = class {
6985
7132
  From: phoneNumber,
6986
7133
  Url: `https://${webhookUrl}/webhooks/twilio/voice`,
6987
7134
  StatusCallback: statusCallbackUrl,
6988
- StatusCallbackMethod: "POST"
7135
+ StatusCallbackMethod: "POST",
7136
+ // Full lifecycle so the dashboard sees ringing/no-answer/busy/failed
7137
+ // transitions even when media never arrives.
7138
+ StatusCallbackEvent: "initiated ringing answered completed"
6989
7139
  });
6990
7140
  if (localOpts.machineDetection) {
6991
7141
  params.append("MachineDetection", "DetectMessageEnd");
6992
7142
  params.append("AsyncAmd", "true");
6993
7143
  params.append("AsyncAmdStatusCallback", `https://${webhookUrl}/webhooks/twilio/amd`);
6994
7144
  }
7145
+ if (localOpts.ringTimeout !== void 0) {
7146
+ params.append("Timeout", String(Math.max(1, Math.floor(localOpts.ringTimeout))));
7147
+ }
6995
7148
  if (localOpts.voicemailMessage && this.embeddedServer) {
6996
7149
  this.embeddedServer.voicemailMessage = localOpts.voicemailMessage;
6997
7150
  }
@@ -7006,6 +7159,21 @@ var Patter = class {
7006
7159
  if (!response.ok) {
7007
7160
  throw new ProvisionError(`Failed to initiate call: ${await response.text()}`);
7008
7161
  }
7162
+ if (this.embeddedServer) {
7163
+ try {
7164
+ const body = await response.clone().json();
7165
+ const callSid = body.sid;
7166
+ if (callSid) {
7167
+ this.embeddedServer.metricsStore.recordCallInitiated({
7168
+ call_id: callSid,
7169
+ caller: phoneNumber,
7170
+ callee: localOpts.to,
7171
+ direction: "outbound"
7172
+ });
7173
+ }
7174
+ } catch {
7175
+ }
7176
+ }
7009
7177
  return;
7010
7178
  }
7011
7179
  const cloudOpts = options;
@@ -7088,61 +7256,6 @@ var Patter = class {
7088
7256
  const data = await response.json();
7089
7257
  return data.map((c) => ({ id: c.id, direction: c.direction, caller: c.caller, callee: c.callee, startedAt: c.started_at, endedAt: c.ended_at, durationSeconds: c.duration_seconds, status: c.status, transcript: c.transcript }));
7090
7258
  }
7091
- // Provider helpers
7092
- static deepgram = deepgram;
7093
- static whisper = whisper;
7094
- static elevenlabs = elevenlabs;
7095
- static openaiTts = openaiTts;
7096
- static guardrail(opts) {
7097
- return {
7098
- name: opts.name,
7099
- blockedTerms: opts.blockedTerms,
7100
- check: opts.check,
7101
- replacement: opts.replacement ?? "I'm sorry, I can't respond to that."
7102
- };
7103
- }
7104
- /**
7105
- * Create a tool definition for use with `agent({ tools: [...] })`.
7106
- *
7107
- * Either `handler` (a function) or `webhookUrl` must be provided.
7108
- *
7109
- * @param opts.name - Tool name (visible to the LLM).
7110
- * @param opts.description - What the tool does (visible to the LLM).
7111
- * @param opts.parameters - JSON Schema for tool arguments.
7112
- * @param opts.handler - Async function called in-process when the LLM invokes the tool.
7113
- * @param opts.webhookUrl - URL to POST to when the LLM invokes the tool.
7114
- *
7115
- * @example
7116
- * ```ts
7117
- * phone.agent({
7118
- * systemPrompt: 'You are a pizza bot.',
7119
- * tools: [
7120
- * Patter.tool({
7121
- * name: 'check_menu',
7122
- * description: 'Check available menu items',
7123
- * handler: async (args) => JSON.stringify({ items: ['margherita'] }),
7124
- * }),
7125
- * ],
7126
- * });
7127
- * ```
7128
- */
7129
- static tool(opts) {
7130
- if (!opts.handler && !opts.webhookUrl) {
7131
- throw new Error("tool() requires either handler or webhookUrl");
7132
- }
7133
- const t = {
7134
- name: opts.name,
7135
- description: opts.description ?? "",
7136
- parameters: opts.parameters ?? { type: "object", properties: {} }
7137
- };
7138
- if (opts.handler) {
7139
- t.handler = opts.handler;
7140
- }
7141
- if (opts.webhookUrl) {
7142
- t.webhookUrl = opts.webhookUrl;
7143
- }
7144
- return t;
7145
- }
7146
7259
  // Internal
7147
7260
  async registerNumber(provider, providerKey, number, providerSecret, country = "US", stt, tts) {
7148
7261
  const credentials = { api_key: providerKey };
@@ -7158,8 +7271,8 @@ var Patter = class {
7158
7271
  provider,
7159
7272
  provider_credentials: credentials,
7160
7273
  country,
7161
- stt_config: stt?.toDict() ?? null,
7162
- tts_config: tts?.toDict() ?? null
7274
+ stt_config: stt ? stt.toDict?.() ?? sttConfigToDict(stt) : null,
7275
+ tts_config: tts ? tts.toDict?.() ?? ttsConfigToDict(tts) : null
7163
7276
  })
7164
7277
  });
7165
7278
  if (response.status === 409) return;
@@ -7237,6 +7350,62 @@ function filterForTTS(text) {
7237
7350
  return filterEmoji(filterMarkdown(text));
7238
7351
  }
7239
7352
 
7353
+ // src/providers.ts
7354
+ var STTConfigImpl = class {
7355
+ provider;
7356
+ apiKey;
7357
+ language;
7358
+ options;
7359
+ constructor(provider, apiKey, language = "en", options) {
7360
+ this.provider = provider;
7361
+ this.apiKey = apiKey;
7362
+ this.language = language;
7363
+ if (options) this.options = options;
7364
+ }
7365
+ toDict() {
7366
+ const out = {
7367
+ provider: this.provider,
7368
+ api_key: this.apiKey,
7369
+ language: this.language
7370
+ };
7371
+ if (this.options) out.options = { ...this.options };
7372
+ return out;
7373
+ }
7374
+ };
7375
+ var TTSConfigImpl = class {
7376
+ provider;
7377
+ apiKey;
7378
+ voice;
7379
+ constructor(provider, apiKey, voice = "alloy") {
7380
+ this.provider = provider;
7381
+ this.apiKey = apiKey;
7382
+ this.voice = voice;
7383
+ }
7384
+ toDict() {
7385
+ return { provider: this.provider, api_key: this.apiKey, voice: this.voice };
7386
+ }
7387
+ };
7388
+ function deepgram(opts) {
7389
+ const options = {
7390
+ model: opts.model ?? "nova-3",
7391
+ endpointing_ms: opts.endpointingMs ?? 150,
7392
+ utterance_end_ms: opts.utteranceEndMs === null ? null : opts.utteranceEndMs ?? 1e3,
7393
+ smart_format: opts.smartFormat ?? true,
7394
+ interim_results: opts.interimResults ?? true
7395
+ };
7396
+ if (opts.vadEvents !== void 0) options.vad_events = opts.vadEvents;
7397
+ return new STTConfigImpl("deepgram", opts.apiKey, opts.language ?? "en", options);
7398
+ }
7399
+ function whisper(opts) {
7400
+ return new STTConfigImpl("whisper", opts.apiKey, opts.language ?? "en");
7401
+ }
7402
+ function elevenlabs(opts) {
7403
+ return new TTSConfigImpl("elevenlabs", opts.apiKey, opts.voice ?? "rachel");
7404
+ }
7405
+ function openaiTts(opts) {
7406
+ return new TTSConfigImpl("openai", opts.apiKey, opts.voice ?? "alloy");
7407
+ }
7408
+
7240
7409
  // src/index.ts
7241
7410
  init_pricing();
7242
7411
  init_metrics();
@@ -7293,6 +7462,37 @@ var FallbackLLMProvider = class {
7293
7462
  }
7294
7463
  }
7295
7464
  }
7465
+ /**
7466
+ * Async-friendly disposer. Parity with Python's ``FallbackLLMProvider.aclose()``
7467
+ * — safe to call multiple times, returns a resolved Promise once all probe
7468
+ * timers are cleared. Prefer this in async contexts so awaiting the
7469
+ * shutdown integrates naturally with the owning lifecycle.
7470
+ */
7471
+ async aclose() {
7472
+ this.destroy();
7473
+ }
7474
+ /**
7475
+ * Explicit-resource-management hook so callers can write
7476
+ * ``await using fallback = new FallbackLLMProvider([...])`` and have
7477
+ * background probe timers cleared automatically when the block exits.
7478
+ * Mirrors Python's ``async with FallbackLLMProvider(...)``.
7479
+ */
7480
+ async [Symbol.asyncDispose]() {
7481
+ await this.aclose();
7482
+ }
7483
+ /**
7484
+ * Stream only the text deltas, flattening the chunk envelope. Parity with
7485
+ * Python's ``FallbackLLMProvider.complete_stream``. Tool-call and done
7486
+ * markers are filtered out so callers can concatenate the yielded strings
7487
+ * directly.
7488
+ */
7489
+ async *completeStream(messages, tools) {
7490
+ for await (const chunk of this.stream(messages, tools)) {
7491
+ if (chunk.type === "text") {
7492
+ yield chunk.content ?? "";
7493
+ }
7494
+ }
7495
+ }
7296
7496
  // -----------------------------------------------------------------------
7297
7497
  // LLMProvider implementation
7298
7498
  // -----------------------------------------------------------------------
@@ -7815,13 +8015,37 @@ function wrapCallback(cb) {
7815
8015
  }
7816
8016
  };
7817
8017
  }
7818
- async function scheduleCron(cron, callback) {
7819
- const cm = await loadCron();
7820
- if (!cm.validate(cron)) {
7821
- throw new Error(`Invalid cron expression: ${cron}`);
7822
- }
7823
- const task = cm.schedule(cron, wrapCallback(callback));
7824
- return makeHandle(`cron-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`, task);
8018
+ function scheduleCron(cron, callback) {
8019
+ let cancelled = false;
8020
+ let task = null;
8021
+ const jobId = `cron-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
8022
+ loadCron().then((cm) => {
8023
+ if (cancelled) return;
8024
+ if (!cm.validate(cron)) {
8025
+ throw new Error(`Invalid cron expression: ${cron}`);
8026
+ }
8027
+ task = cm.schedule(cron, wrapCallback(callback));
8028
+ }).catch((err) => getLogger().error(`scheduleCron failed: ${String(err)}`));
8029
+ return {
8030
+ jobId,
8031
+ cancel() {
8032
+ if (cancelled) return;
8033
+ cancelled = true;
8034
+ if (task) {
8035
+ try {
8036
+ task.stop();
8037
+ } catch {
8038
+ }
8039
+ try {
8040
+ task.destroy?.();
8041
+ } catch {
8042
+ }
8043
+ }
8044
+ },
8045
+ get pending() {
8046
+ return !cancelled;
8047
+ }
8048
+ };
7825
8049
  }
7826
8050
  function scheduleOnce(at, callback) {
7827
8051
  const delayMs = at.getTime() - Date.now();
@@ -7843,8 +8067,18 @@ function scheduleOnce(at, callback) {
7843
8067
  }
7844
8068
  };
7845
8069
  }
7846
- function scheduleInterval(intervalMs, callback) {
7847
- if (intervalMs <= 0) throw new Error("intervalMs must be positive");
8070
+ function scheduleInterval(intervalOrOpts, callback) {
8071
+ let intervalMs;
8072
+ if (typeof intervalOrOpts === "number") {
8073
+ intervalMs = intervalOrOpts;
8074
+ } else if (intervalOrOpts.intervalMs !== void 0) {
8075
+ intervalMs = intervalOrOpts.intervalMs;
8076
+ } else if (intervalOrOpts.seconds !== void 0) {
8077
+ intervalMs = intervalOrOpts.seconds * 1e3;
8078
+ } else {
8079
+ throw new Error("scheduleInterval requires seconds or intervalMs");
8080
+ }
8081
+ if (intervalMs <= 0) throw new Error("interval must be positive");
7848
8082
  let cancelled = false;
7849
8083
  const wrapped = wrapCallback(callback);
7850
8084
  const timer = setInterval(() => {
@@ -7861,111 +8095,404 @@ function scheduleInterval(intervalMs, callback) {
7861
8095
  }
7862
8096
  };
7863
8097
  }
7864
- function makeHandle(jobId, task) {
7865
- let cancelled = false;
7866
- return {
7867
- jobId,
7868
- cancel() {
7869
- if (cancelled) return;
7870
- cancelled = true;
7871
- try {
7872
- task.stop();
7873
- } catch {
7874
- }
7875
- try {
7876
- task.destroy?.();
7877
- } catch {
7878
- }
7879
- },
7880
- get pending() {
7881
- return !cancelled;
7882
- }
7883
- };
7884
- }
7885
8098
 
7886
- // src/index.ts
8099
+ // src/stt/deepgram.ts
7887
8100
  init_deepgram_stt();
8101
+ var STT = class extends DeepgramSTT {
8102
+ constructor(opts = {}) {
8103
+ const key = opts.apiKey ?? process.env.DEEPGRAM_API_KEY;
8104
+ if (!key) {
8105
+ throw new Error(
8106
+ "Deepgram STT requires an apiKey. Pass { apiKey: 'dg_...' } or set DEEPGRAM_API_KEY in the environment."
8107
+ );
8108
+ }
8109
+ super(
8110
+ key,
8111
+ opts.language ?? "en",
8112
+ opts.model ?? "nova-3",
8113
+ opts.encoding ?? "linear16",
8114
+ opts.sampleRate ?? 16e3,
8115
+ {
8116
+ endpointingMs: opts.endpointingMs ?? 150,
8117
+ utteranceEndMs: opts.utteranceEndMs === null ? null : opts.utteranceEndMs ?? 1e3,
8118
+ smartFormat: opts.smartFormat ?? true,
8119
+ interimResults: opts.interimResults ?? true,
8120
+ ...opts.vadEvents !== void 0 ? { vadEvents: opts.vadEvents } : {}
8121
+ }
8122
+ );
8123
+ }
8124
+ };
7888
8125
 
7889
- // src/providers/soniox-stt.ts
7890
- var import_ws7 = __toESM(require("ws"));
8126
+ // src/providers/whisper-stt.ts
7891
8127
  init_logger();
7892
- var SONIOX_WS_URL = "wss://stt-rt.soniox.com/transcribe-websocket";
7893
- var KEEPALIVE_MESSAGE = '{"type": "keepalive"}';
7894
- var END_TOKEN = "<end>";
7895
- var FINALIZED_TOKEN = "<fin>";
7896
- var KEEPALIVE_INTERVAL_MS = 5e3;
7897
- function isEndToken(token) {
7898
- return token.text === END_TOKEN || token.text === FINALIZED_TOKEN;
8128
+ var OPENAI_TRANSCRIPTION_URL = "https://api.openai.com/v1/audio/transcriptions";
8129
+ var DEFAULT_BUFFER_SIZE = 16e3 * 2;
8130
+ function wrapPcmInWav(pcm, sampleRate = 16e3, channels = 1, bitsPerSample = 16) {
8131
+ const dataSize = pcm.length;
8132
+ const header = Buffer.alloc(44);
8133
+ header.write("RIFF", 0);
8134
+ header.writeUInt32LE(36 + dataSize, 4);
8135
+ header.write("WAVE", 8);
8136
+ header.write("fmt ", 12);
8137
+ header.writeUInt32LE(16, 16);
8138
+ header.writeUInt16LE(1, 20);
8139
+ header.writeUInt16LE(channels, 22);
8140
+ header.writeUInt32LE(sampleRate, 24);
8141
+ header.writeUInt32LE(sampleRate * channels * (bitsPerSample / 8), 28);
8142
+ header.writeUInt16LE(channels * (bitsPerSample / 8), 32);
8143
+ header.writeUInt16LE(bitsPerSample, 34);
8144
+ header.write("data", 36);
8145
+ header.writeUInt32LE(dataSize, 40);
8146
+ return Buffer.concat([header, pcm]);
7899
8147
  }
7900
- var TokenAccumulator = class {
7901
- text = "";
7902
- confSum = 0;
7903
- confCount = 0;
7904
- update(token) {
7905
- if (token.text) {
7906
- this.text += token.text;
7907
- }
7908
- if (typeof token.confidence === "number") {
7909
- this.confSum += token.confidence;
7910
- this.confCount += 1;
7911
- }
8148
+ var WhisperSTT = class _WhisperSTT {
8149
+ apiKey;
8150
+ model;
8151
+ language;
8152
+ bufferSize;
8153
+ buffer = Buffer.alloc(0);
8154
+ callbacks = [];
8155
+ running = false;
8156
+ pendingTranscriptions = [];
8157
+ constructor(apiKey, model = "whisper-1", language, bufferSize = DEFAULT_BUFFER_SIZE) {
8158
+ this.apiKey = apiKey;
8159
+ this.model = model;
8160
+ this.language = language;
8161
+ this.bufferSize = bufferSize;
7912
8162
  }
7913
- get confidence() {
7914
- return this.confCount === 0 ? 0 : this.confSum / this.confCount;
8163
+ /** Factory for Twilio calls — mulaw 8 kHz is transcoded upstream, so we still receive PCM 16-bit. */
8164
+ static forTwilio(apiKey, language = "en", model = "whisper-1") {
8165
+ return new _WhisperSTT(apiKey, model, language);
7915
8166
  }
7916
- reset() {
7917
- this.text = "";
7918
- this.confSum = 0;
7919
- this.confCount = 0;
8167
+ async connect() {
8168
+ this.running = true;
8169
+ this.buffer = Buffer.alloc(0);
7920
8170
  }
7921
- get raw() {
7922
- return { sum: this.confSum, count: this.confCount };
8171
+ sendAudio(audio) {
8172
+ if (!this.running) return;
8173
+ this.buffer = Buffer.concat([this.buffer, audio]);
8174
+ if (this.buffer.length >= this.bufferSize) {
8175
+ const pcm = this.buffer;
8176
+ this.buffer = Buffer.alloc(0);
8177
+ this.trackTranscription(this.transcribeBuffer(pcm));
8178
+ }
8179
+ }
8180
+ trackTranscription(promise) {
8181
+ const wrapped = promise.finally(() => {
8182
+ const idx = this.pendingTranscriptions.indexOf(wrapped);
8183
+ if (idx !== -1) this.pendingTranscriptions.splice(idx, 1);
8184
+ });
8185
+ this.pendingTranscriptions.push(wrapped);
7923
8186
  }
7924
- };
7925
- var SonioxSTT = class _SonioxSTT {
7926
- ws = null;
7927
- callbacks = [];
7928
- final = new TokenAccumulator();
7929
- keepaliveTimer = null;
7930
- apiKey;
7931
- model;
7932
- languageHints;
7933
- languageHintsStrict;
7934
- sampleRate;
7935
- numChannels;
7936
- enableSpeakerDiarization;
7937
- enableLanguageIdentification;
7938
- maxEndpointDelayMs;
7939
- clientReferenceId;
7940
- baseUrl;
7941
- constructor(apiKey, options = {}) {
7942
- if (!apiKey) {
7943
- throw new Error("Soniox apiKey is required");
8187
+ onTranscript(callback) {
8188
+ if (this.callbacks.length >= 10) {
8189
+ getLogger().warn("WhisperSTT: maximum of 10 onTranscript callbacks reached; replacing the last callback.");
8190
+ this.callbacks[this.callbacks.length - 1] = callback;
8191
+ return;
7944
8192
  }
7945
- const maxEndpointDelayMs = options.maxEndpointDelayMs ?? 500;
7946
- if (maxEndpointDelayMs < 500 || maxEndpointDelayMs > 3e3) {
7947
- throw new Error("maxEndpointDelayMs must be between 500 and 3000");
8193
+ this.callbacks.push(callback);
8194
+ }
8195
+ async close() {
8196
+ this.running = false;
8197
+ if (this.buffer.length >= this.bufferSize / 4) {
8198
+ const pcm = this.buffer;
8199
+ this.buffer = Buffer.alloc(0);
8200
+ this.trackTranscription(this.transcribeBuffer(pcm));
8201
+ } else {
8202
+ this.buffer = Buffer.alloc(0);
8203
+ }
8204
+ await Promise.allSettled(this.pendingTranscriptions);
8205
+ this.callbacks = [];
8206
+ }
8207
+ // ------------------------------------------------------------------
8208
+ // Private
8209
+ // ------------------------------------------------------------------
8210
+ async transcribeBuffer(pcm) {
8211
+ const wav = wrapPcmInWav(pcm);
8212
+ const formData = new FormData();
8213
+ formData.append("file", new Blob([wav.buffer.slice(wav.byteOffset, wav.byteOffset + wav.byteLength)], { type: "audio/wav" }), "audio.wav");
8214
+ formData.append("model", this.model);
8215
+ if (this.language) {
8216
+ formData.append("language", this.language);
8217
+ }
8218
+ try {
8219
+ const resp = await fetch(OPENAI_TRANSCRIPTION_URL, {
8220
+ method: "POST",
8221
+ headers: { Authorization: `Bearer ${this.apiKey}` },
8222
+ body: formData,
8223
+ signal: AbortSignal.timeout(15e3)
8224
+ });
8225
+ if (!resp.ok) {
8226
+ const body = await resp.text();
8227
+ getLogger().error(`WhisperSTT transcription error: ${resp.status} ${body}`);
8228
+ return;
8229
+ }
8230
+ const json = await resp.json();
8231
+ const text = (json.text ?? "").trim();
8232
+ if (!text) return;
8233
+ const transcript = {
8234
+ text,
8235
+ isFinal: true,
8236
+ confidence: 1
8237
+ };
8238
+ for (const cb of this.callbacks) {
8239
+ cb(transcript);
8240
+ }
8241
+ } catch (err) {
8242
+ getLogger().error(`WhisperSTT transcription error: ${String(err)}`);
7948
8243
  }
7949
- this.apiKey = apiKey;
7950
- this.model = options.model ?? "stt-rt-v4";
7951
- this.languageHints = options.languageHints;
7952
- this.languageHintsStrict = options.languageHintsStrict ?? false;
7953
- this.sampleRate = options.sampleRate ?? 16e3;
7954
- this.numChannels = options.numChannels ?? 1;
7955
- this.enableSpeakerDiarization = options.enableSpeakerDiarization ?? false;
7956
- this.enableLanguageIdentification = options.enableLanguageIdentification ?? true;
7957
- this.maxEndpointDelayMs = maxEndpointDelayMs;
7958
- this.clientReferenceId = options.clientReferenceId;
7959
- this.baseUrl = options.baseUrl ?? SONIOX_WS_URL;
7960
8244
  }
7961
- /** Factory for Twilio-style 8 kHz linear PCM. */
7962
- static forTwilio(apiKey, languageHints) {
7963
- return new _SonioxSTT(apiKey, { sampleRate: 8e3, languageHints });
8245
+ };
8246
+
8247
+ // src/stt/whisper.ts
8248
+ var STT2 = class extends WhisperSTT {
8249
+ constructor(opts = {}) {
8250
+ const key = opts.apiKey ?? process.env.OPENAI_API_KEY;
8251
+ if (!key) {
8252
+ throw new Error(
8253
+ "Whisper STT requires an apiKey. Pass { apiKey: 'sk-...' } or set OPENAI_API_KEY in the environment."
8254
+ );
8255
+ }
8256
+ super(key, opts.model ?? "whisper-1", opts.language, opts.bufferSize);
7964
8257
  }
7965
- buildConfig() {
7966
- const config = {
7967
- api_key: this.apiKey,
7968
- model: this.model,
8258
+ };
8259
+
8260
+ // src/providers/cartesia-stt.ts
8261
+ var import_ws7 = __toESM(require("ws"));
8262
+ init_logger();
8263
+ var DEFAULT_BASE_URL = "https://api.cartesia.ai";
8264
+ var API_VERSION = "2025-04-16";
8265
+ var USER_AGENT = "Patter/1.0 (integration=LiveKit-port; provider=Cartesia)";
8266
+ var KEEPALIVE_INTERVAL_MS = 3e4;
8267
+ var CONNECT_TIMEOUT_MS = 1e4;
8268
+ var MAX_CALLBACKS = 10;
8269
+ var CartesiaSTT = class {
8270
+ constructor(apiKey, options = {}) {
8271
+ this.apiKey = apiKey;
8272
+ this.options = options;
8273
+ if (!apiKey) {
8274
+ throw new Error("CartesiaSTT requires a non-empty apiKey");
8275
+ }
8276
+ }
8277
+ ws = null;
8278
+ callbacks = [];
8279
+ keepaliveTimer = null;
8280
+ /** Cartesia request id — set from the server transcript events. */
8281
+ requestId = "";
8282
+ buildWsUrl() {
8283
+ const opts = this.options;
8284
+ const rawBase = opts.baseUrl ?? DEFAULT_BASE_URL;
8285
+ let base;
8286
+ if (rawBase.startsWith("http://")) {
8287
+ base = `ws://${rawBase.slice("http://".length)}`;
8288
+ } else if (rawBase.startsWith("https://")) {
8289
+ base = `wss://${rawBase.slice("https://".length)}`;
8290
+ } else if (rawBase.startsWith("ws://") || rawBase.startsWith("wss://")) {
8291
+ base = rawBase;
8292
+ } else {
8293
+ base = `wss://${rawBase}`;
8294
+ }
8295
+ const language = opts.language ?? "en";
8296
+ const params = new URLSearchParams({
8297
+ model: opts.model ?? "ink-whisper",
8298
+ sample_rate: String(opts.sampleRate ?? 16e3),
8299
+ encoding: opts.encoding ?? "pcm_s16le",
8300
+ cartesia_version: API_VERSION,
8301
+ api_key: this.apiKey,
8302
+ language
8303
+ });
8304
+ return `${base}/stt/websocket?${params.toString()}`;
8305
+ }
8306
+ async connect() {
8307
+ const url = this.buildWsUrl();
8308
+ this.ws = new import_ws7.default(url, {
8309
+ headers: { "User-Agent": USER_AGENT }
8310
+ });
8311
+ await new Promise((resolve, reject) => {
8312
+ const timer = setTimeout(
8313
+ () => reject(new Error("Cartesia STT connect timeout")),
8314
+ CONNECT_TIMEOUT_MS
8315
+ );
8316
+ this.ws.once("open", () => {
8317
+ clearTimeout(timer);
8318
+ resolve();
8319
+ });
8320
+ this.ws.once("error", (err) => {
8321
+ clearTimeout(timer);
8322
+ reject(err);
8323
+ });
8324
+ });
8325
+ this.ws.on("message", (raw) => {
8326
+ let event;
8327
+ try {
8328
+ event = JSON.parse(raw.toString());
8329
+ } catch {
8330
+ return;
8331
+ }
8332
+ this.handleEvent(event);
8333
+ });
8334
+ this.keepaliveTimer = setInterval(() => {
8335
+ if (this.ws && this.ws.readyState === import_ws7.default.OPEN) {
8336
+ try {
8337
+ this.ws.ping();
8338
+ } catch {
8339
+ }
8340
+ }
8341
+ }, KEEPALIVE_INTERVAL_MS);
8342
+ }
8343
+ handleEvent(event) {
8344
+ const type = event.type;
8345
+ if (type === "transcript") {
8346
+ const text = (event.text ?? "").trim();
8347
+ const isFinal = Boolean(event.is_final);
8348
+ if (!text && !isFinal) return;
8349
+ if (event.request_id) {
8350
+ this.requestId = event.request_id;
8351
+ }
8352
+ if (!text) return;
8353
+ const confidence = Number(event.probability ?? 1);
8354
+ this.emit({ text, isFinal, confidence });
8355
+ return;
8356
+ }
8357
+ if (type === "error") {
8358
+ getLogger().error(`Cartesia STT error: ${event.message ?? "unknown"}`);
8359
+ return;
8360
+ }
8361
+ }
8362
+ emit(transcript) {
8363
+ for (const cb of this.callbacks) {
8364
+ cb(transcript);
8365
+ }
8366
+ }
8367
+ sendAudio(audio) {
8368
+ if (!this.ws || this.ws.readyState !== import_ws7.default.OPEN) return;
8369
+ this.ws.send(audio);
8370
+ }
8371
+ onTranscript(callback) {
8372
+ if (this.callbacks.length >= MAX_CALLBACKS) {
8373
+ getLogger().warn(
8374
+ "CartesiaSTT: maximum of 10 onTranscript callbacks reached; replacing the last callback."
8375
+ );
8376
+ this.callbacks[this.callbacks.length - 1] = callback;
8377
+ return;
8378
+ }
8379
+ this.callbacks.push(callback);
8380
+ }
8381
+ close() {
8382
+ if (this.keepaliveTimer) {
8383
+ clearInterval(this.keepaliveTimer);
8384
+ this.keepaliveTimer = null;
8385
+ }
8386
+ if (this.ws) {
8387
+ try {
8388
+ this.ws.send("finalize");
8389
+ } catch {
8390
+ }
8391
+ this.ws.close();
8392
+ this.ws = null;
8393
+ }
8394
+ }
8395
+ };
8396
+
8397
+ // src/stt/cartesia.ts
8398
+ var STT3 = class extends CartesiaSTT {
8399
+ constructor(opts = {}) {
8400
+ const key = opts.apiKey ?? process.env.CARTESIA_API_KEY;
8401
+ if (!key) {
8402
+ throw new Error(
8403
+ "Cartesia STT requires an apiKey. Pass { apiKey: '...' } or set CARTESIA_API_KEY in the environment."
8404
+ );
8405
+ }
8406
+ super(key, {
8407
+ model: opts.model,
8408
+ language: opts.language,
8409
+ encoding: opts.encoding,
8410
+ sampleRate: opts.sampleRate,
8411
+ baseUrl: opts.baseUrl
8412
+ });
8413
+ }
8414
+ };
8415
+
8416
+ // src/providers/soniox-stt.ts
8417
+ var import_ws8 = __toESM(require("ws"));
8418
+ init_logger();
8419
+ var SONIOX_WS_URL = "wss://stt-rt.soniox.com/transcribe-websocket";
8420
+ var KEEPALIVE_MESSAGE = '{"type": "keepalive"}';
8421
+ var END_TOKEN = "<end>";
8422
+ var FINALIZED_TOKEN = "<fin>";
8423
+ var KEEPALIVE_INTERVAL_MS2 = 5e3;
8424
+ function isEndToken(token) {
8425
+ return token.text === END_TOKEN || token.text === FINALIZED_TOKEN;
8426
+ }
8427
+ var TokenAccumulator = class {
8428
+ text = "";
8429
+ confSum = 0;
8430
+ confCount = 0;
8431
+ update(token) {
8432
+ if (token.text) {
8433
+ this.text += token.text;
8434
+ }
8435
+ if (typeof token.confidence === "number") {
8436
+ this.confSum += token.confidence;
8437
+ this.confCount += 1;
8438
+ }
8439
+ }
8440
+ get confidence() {
8441
+ return this.confCount === 0 ? 0 : this.confSum / this.confCount;
8442
+ }
8443
+ reset() {
8444
+ this.text = "";
8445
+ this.confSum = 0;
8446
+ this.confCount = 0;
8447
+ }
8448
+ get raw() {
8449
+ return { sum: this.confSum, count: this.confCount };
8450
+ }
8451
+ };
8452
+ var SonioxSTT = class _SonioxSTT {
8453
+ ws = null;
8454
+ callbacks = [];
8455
+ final = new TokenAccumulator();
8456
+ keepaliveTimer = null;
8457
+ apiKey;
8458
+ model;
8459
+ languageHints;
8460
+ languageHintsStrict;
8461
+ sampleRate;
8462
+ numChannels;
8463
+ enableSpeakerDiarization;
8464
+ enableLanguageIdentification;
8465
+ maxEndpointDelayMs;
8466
+ clientReferenceId;
8467
+ baseUrl;
8468
+ constructor(apiKey, options = {}) {
8469
+ if (!apiKey) {
8470
+ throw new Error("Soniox apiKey is required");
8471
+ }
8472
+ const maxEndpointDelayMs = options.maxEndpointDelayMs ?? 500;
8473
+ if (maxEndpointDelayMs < 500 || maxEndpointDelayMs > 3e3) {
8474
+ throw new Error("maxEndpointDelayMs must be between 500 and 3000");
8475
+ }
8476
+ this.apiKey = apiKey;
8477
+ this.model = options.model ?? "stt-rt-v4";
8478
+ this.languageHints = options.languageHints;
8479
+ this.languageHintsStrict = options.languageHintsStrict ?? false;
8480
+ this.sampleRate = options.sampleRate ?? 16e3;
8481
+ this.numChannels = options.numChannels ?? 1;
8482
+ this.enableSpeakerDiarization = options.enableSpeakerDiarization ?? false;
8483
+ this.enableLanguageIdentification = options.enableLanguageIdentification ?? true;
8484
+ this.maxEndpointDelayMs = maxEndpointDelayMs;
8485
+ this.clientReferenceId = options.clientReferenceId;
8486
+ this.baseUrl = options.baseUrl ?? SONIOX_WS_URL;
8487
+ }
8488
+ /** Factory for Twilio-style 8 kHz linear PCM. */
8489
+ static forTwilio(apiKey, languageHints) {
8490
+ return new _SonioxSTT(apiKey, { sampleRate: 8e3, languageHints });
8491
+ }
8492
+ buildConfig() {
8493
+ const config = {
8494
+ api_key: this.apiKey,
8495
+ model: this.model,
7969
8496
  audio_format: "pcm_s16le",
7970
8497
  num_channels: this.numChannels,
7971
8498
  sample_rate: this.sampleRate,
@@ -7984,7 +8511,7 @@ var SonioxSTT = class _SonioxSTT {
7984
8511
  return config;
7985
8512
  }
7986
8513
  async connect() {
7987
- this.ws = new import_ws7.default(this.baseUrl);
8514
+ this.ws = new import_ws8.default(this.baseUrl);
7988
8515
  await new Promise((resolve, reject) => {
7989
8516
  const timer = setTimeout(() => reject(new Error("Soniox connect timeout")), 1e4);
7990
8517
  this.ws.once("open", () => {
@@ -8003,13 +8530,13 @@ var SonioxSTT = class _SonioxSTT {
8003
8530
  getLogger().error(`SonioxSTT WebSocket error: ${String(err)}`);
8004
8531
  });
8005
8532
  this.keepaliveTimer = setInterval(() => {
8006
- if (this.ws && this.ws.readyState === import_ws7.default.OPEN) {
8533
+ if (this.ws && this.ws.readyState === import_ws8.default.OPEN) {
8007
8534
  try {
8008
8535
  this.ws.send(KEEPALIVE_MESSAGE);
8009
8536
  } catch {
8010
8537
  }
8011
8538
  }
8012
- }, KEEPALIVE_INTERVAL_MS);
8539
+ }, KEEPALIVE_INTERVAL_MS2);
8013
8540
  }
8014
8541
  clearKeepalive() {
8015
8542
  if (this.keepaliveTimer) {
@@ -8076,7 +8603,7 @@ var SonioxSTT = class _SonioxSTT {
8076
8603
  }
8077
8604
  }
8078
8605
  sendAudio(audio) {
8079
- if (!this.ws || this.ws.readyState !== import_ws7.default.OPEN) return;
8606
+ if (!this.ws || this.ws.readyState !== import_ws8.default.OPEN) return;
8080
8607
  if (audio.length === 0) return;
8081
8608
  this.ws.send(audio);
8082
8609
  }
@@ -8106,16 +8633,28 @@ var SonioxSTT = class _SonioxSTT {
8106
8633
  }
8107
8634
  };
8108
8635
 
8109
- // src/index.ts
8110
- init_whisper_stt();
8636
+ // src/stt/soniox.ts
8637
+ var STT4 = class extends SonioxSTT {
8638
+ constructor(opts = {}) {
8639
+ const key = opts.apiKey ?? process.env.SONIOX_API_KEY;
8640
+ if (!key) {
8641
+ throw new Error(
8642
+ "Soniox STT requires an apiKey. Pass { apiKey: '...' } or set SONIOX_API_KEY in the environment."
8643
+ );
8644
+ }
8645
+ const { apiKey: _ignored, ...rest } = opts;
8646
+ void _ignored;
8647
+ super(key, rest);
8648
+ }
8649
+ };
8111
8650
 
8112
8651
  // src/providers/assemblyai-stt.ts
8113
- var import_ws8 = __toESM(require("ws"));
8652
+ var import_ws9 = __toESM(require("ws"));
8114
8653
  init_logger();
8115
- var DEFAULT_BASE_URL = "wss://streaming.assemblyai.com";
8654
+ var DEFAULT_BASE_URL2 = "wss://streaming.assemblyai.com";
8116
8655
  var DEFAULT_MIN_TURN_SILENCE_MS = 100;
8117
- var CONNECT_TIMEOUT_MS = 1e4;
8118
- var MAX_CALLBACKS = 10;
8656
+ var CONNECT_TIMEOUT_MS2 = 1e4;
8657
+ var MAX_CALLBACKS2 = 10;
8119
8658
  var AssemblyAISTT = class _AssemblyAISTT {
8120
8659
  constructor(apiKey, options = {}) {
8121
8660
  this.apiKey = apiKey;
@@ -8172,174 +8711,27 @@ var AssemblyAISTT = class _AssemblyAISTT {
8172
8711
  const params = new URLSearchParams();
8173
8712
  for (const [key, value] of Object.entries(raw)) {
8174
8713
  if (value === void 0 || value === null) continue;
8175
- if (typeof value === "boolean") {
8176
- params.set(key, value ? "true" : "false");
8177
- } else {
8178
- params.set(key, String(value));
8179
- }
8180
- }
8181
- const base = opts.baseUrl ?? DEFAULT_BASE_URL;
8182
- return `${base}/v3/ws?${params.toString()}`;
8183
- }
8184
- async connect() {
8185
- const url = this.buildUrl();
8186
- this.ws = new import_ws8.default(url, {
8187
- headers: {
8188
- Authorization: this.apiKey,
8189
- "Content-Type": "application/json",
8190
- "User-Agent": "Patter/1.0 (integration=LiveKit-port)"
8191
- }
8192
- });
8193
- await new Promise((resolve, reject) => {
8194
- const timer = setTimeout(
8195
- () => reject(new Error("AssemblyAI connect timeout")),
8196
- CONNECT_TIMEOUT_MS
8197
- );
8198
- this.ws.once("open", () => {
8199
- clearTimeout(timer);
8200
- resolve();
8201
- });
8202
- this.ws.once("error", (err) => {
8203
- clearTimeout(timer);
8204
- reject(err);
8205
- });
8206
- });
8207
- this.ws.on("message", (raw) => {
8208
- let event;
8209
- try {
8210
- event = JSON.parse(raw.toString());
8211
- } catch {
8212
- return;
8213
- }
8214
- this.handleEvent(event);
8215
- });
8216
- }
8217
- handleEvent(event) {
8218
- const type = event.type;
8219
- if (type === "Begin") {
8220
- this.sessionId = event.id ?? "";
8221
- this.expiresAt = event.expires_at ?? 0;
8222
- return;
8223
- }
8224
- if (type !== "Turn") {
8225
- return;
8226
- }
8227
- const endOfTurn = Boolean(event.end_of_turn);
8228
- const turnIsFormatted = Boolean(event.turn_is_formatted);
8229
- const words = event.words ?? [];
8230
- const transcriptText = (event.transcript ?? "").trim();
8231
- if (endOfTurn) {
8232
- if (this.options.formatTurns && !turnIsFormatted) return;
8233
- if (!transcriptText) return;
8234
- this.emit({
8235
- text: transcriptText,
8236
- isFinal: true,
8237
- confidence: averageConfidence(words)
8238
- });
8239
- return;
8240
- }
8241
- if (!words.length) return;
8242
- const interim = words.map((w) => (w.text ?? "").trim()).filter(Boolean).join(" ");
8243
- if (!interim) return;
8244
- this.emit({
8245
- text: interim,
8246
- isFinal: false,
8247
- confidence: averageConfidence(words)
8248
- });
8249
- }
8250
- emit(transcript) {
8251
- for (const cb of this.callbacks) {
8252
- cb(transcript);
8253
- }
8254
- }
8255
- sendAudio(audio) {
8256
- if (!this.ws || this.ws.readyState !== import_ws8.default.OPEN) return;
8257
- this.ws.send(audio);
8258
- }
8259
- onTranscript(callback) {
8260
- if (this.callbacks.length >= MAX_CALLBACKS) {
8261
- getLogger().warn(
8262
- "AssemblyAISTT: maximum of 10 onTranscript callbacks reached; replacing the last callback."
8263
- );
8264
- this.callbacks[this.callbacks.length - 1] = callback;
8265
- return;
8266
- }
8267
- this.callbacks.push(callback);
8268
- }
8269
- close() {
8270
- if (this.ws) {
8271
- try {
8272
- this.ws.send(JSON.stringify({ type: "Terminate" }));
8273
- } catch {
8274
- }
8275
- this.ws.close();
8276
- this.ws = null;
8277
- }
8278
- }
8279
- };
8280
- function averageConfidence(words) {
8281
- if (!words.length) return 0;
8282
- let total = 0;
8283
- for (const w of words) {
8284
- total += Number(w.confidence ?? 0);
8285
- }
8286
- return total / words.length;
8287
- }
8288
-
8289
- // src/providers/cartesia-stt.ts
8290
- var import_ws9 = __toESM(require("ws"));
8291
- init_logger();
8292
- var DEFAULT_BASE_URL2 = "https://api.cartesia.ai";
8293
- var API_VERSION = "2025-04-16";
8294
- var USER_AGENT = "Patter/1.0 (integration=LiveKit-port; provider=Cartesia)";
8295
- var KEEPALIVE_INTERVAL_MS2 = 3e4;
8296
- var CONNECT_TIMEOUT_MS2 = 1e4;
8297
- var MAX_CALLBACKS2 = 10;
8298
- var CartesiaSTT = class {
8299
- constructor(apiKey, options = {}) {
8300
- this.apiKey = apiKey;
8301
- this.options = options;
8302
- if (!apiKey) {
8303
- throw new Error("CartesiaSTT requires a non-empty apiKey");
8304
- }
8305
- }
8306
- ws = null;
8307
- callbacks = [];
8308
- keepaliveTimer = null;
8309
- /** Cartesia request id — set from the server transcript events. */
8310
- requestId = "";
8311
- buildWsUrl() {
8312
- const opts = this.options;
8313
- const rawBase = opts.baseUrl ?? DEFAULT_BASE_URL2;
8314
- let base;
8315
- if (rawBase.startsWith("http://")) {
8316
- base = `ws://${rawBase.slice("http://".length)}`;
8317
- } else if (rawBase.startsWith("https://")) {
8318
- base = `wss://${rawBase.slice("https://".length)}`;
8319
- } else if (rawBase.startsWith("ws://") || rawBase.startsWith("wss://")) {
8320
- base = rawBase;
8321
- } else {
8322
- base = `wss://${rawBase}`;
8323
- }
8324
- const language = opts.language ?? "en";
8325
- const params = new URLSearchParams({
8326
- model: opts.model ?? "ink-whisper",
8327
- sample_rate: String(opts.sampleRate ?? 16e3),
8328
- encoding: opts.encoding ?? "pcm_s16le",
8329
- cartesia_version: API_VERSION,
8330
- api_key: this.apiKey,
8331
- language
8332
- });
8333
- return `${base}/stt/websocket?${params.toString()}`;
8714
+ if (typeof value === "boolean") {
8715
+ params.set(key, value ? "true" : "false");
8716
+ } else {
8717
+ params.set(key, String(value));
8718
+ }
8719
+ }
8720
+ const base = opts.baseUrl ?? DEFAULT_BASE_URL2;
8721
+ return `${base}/v3/ws?${params.toString()}`;
8334
8722
  }
8335
8723
  async connect() {
8336
- const url = this.buildWsUrl();
8724
+ const url = this.buildUrl();
8337
8725
  this.ws = new import_ws9.default(url, {
8338
- headers: { "User-Agent": USER_AGENT }
8726
+ headers: {
8727
+ Authorization: this.apiKey,
8728
+ "Content-Type": "application/json",
8729
+ "User-Agent": "Patter/1.0 (integration=LiveKit-port)"
8730
+ }
8339
8731
  });
8340
8732
  await new Promise((resolve, reject) => {
8341
8733
  const timer = setTimeout(
8342
- () => reject(new Error("Cartesia STT connect timeout")),
8734
+ () => reject(new Error("AssemblyAI connect timeout")),
8343
8735
  CONNECT_TIMEOUT_MS2
8344
8736
  );
8345
8737
  this.ws.once("open", () => {
@@ -8360,33 +8752,39 @@ var CartesiaSTT = class {
8360
8752
  }
8361
8753
  this.handleEvent(event);
8362
8754
  });
8363
- this.keepaliveTimer = setInterval(() => {
8364
- if (this.ws && this.ws.readyState === import_ws9.default.OPEN) {
8365
- try {
8366
- this.ws.ping();
8367
- } catch {
8368
- }
8369
- }
8370
- }, KEEPALIVE_INTERVAL_MS2);
8371
8755
  }
8372
8756
  handleEvent(event) {
8373
8757
  const type = event.type;
8374
- if (type === "transcript") {
8375
- const text = (event.text ?? "").trim();
8376
- const isFinal = Boolean(event.is_final);
8377
- if (!text && !isFinal) return;
8378
- if (event.request_id) {
8379
- this.requestId = event.request_id;
8380
- }
8381
- if (!text) return;
8382
- const confidence = Number(event.probability ?? 1);
8383
- this.emit({ text, isFinal, confidence });
8758
+ if (type === "Begin") {
8759
+ this.sessionId = event.id ?? "";
8760
+ this.expiresAt = event.expires_at ?? 0;
8384
8761
  return;
8385
8762
  }
8386
- if (type === "error") {
8387
- getLogger().error(`Cartesia STT error: ${event.message ?? "unknown"}`);
8763
+ if (type !== "Turn") {
8764
+ return;
8765
+ }
8766
+ const endOfTurn = Boolean(event.end_of_turn);
8767
+ const turnIsFormatted = Boolean(event.turn_is_formatted);
8768
+ const words = event.words ?? [];
8769
+ const transcriptText = (event.transcript ?? "").trim();
8770
+ if (endOfTurn) {
8771
+ if (this.options.formatTurns && !turnIsFormatted) return;
8772
+ if (!transcriptText) return;
8773
+ this.emit({
8774
+ text: transcriptText,
8775
+ isFinal: true,
8776
+ confidence: averageConfidence(words)
8777
+ });
8388
8778
  return;
8389
8779
  }
8780
+ if (!words.length) return;
8781
+ const interim = words.map((w) => (w.text ?? "").trim()).filter(Boolean).join(" ");
8782
+ if (!interim) return;
8783
+ this.emit({
8784
+ text: interim,
8785
+ isFinal: false,
8786
+ confidence: averageConfidence(words)
8787
+ });
8390
8788
  }
8391
8789
  emit(transcript) {
8392
8790
  for (const cb of this.callbacks) {
@@ -8400,7 +8798,7 @@ var CartesiaSTT = class {
8400
8798
  onTranscript(callback) {
8401
8799
  if (this.callbacks.length >= MAX_CALLBACKS2) {
8402
8800
  getLogger().warn(
8403
- "CartesiaSTT: maximum of 10 onTranscript callbacks reached; replacing the last callback."
8801
+ "AssemblyAISTT: maximum of 10 onTranscript callbacks reached; replacing the last callback."
8404
8802
  );
8405
8803
  this.callbacks[this.callbacks.length - 1] = callback;
8406
8804
  return;
@@ -8408,13 +8806,9 @@ var CartesiaSTT = class {
8408
8806
  this.callbacks.push(callback);
8409
8807
  }
8410
8808
  close() {
8411
- if (this.keepaliveTimer) {
8412
- clearInterval(this.keepaliveTimer);
8413
- this.keepaliveTimer = null;
8414
- }
8415
8809
  if (this.ws) {
8416
8810
  try {
8417
- this.ws.send("finalize");
8811
+ this.ws.send(JSON.stringify({ type: "Terminate" }));
8418
8812
  } catch {
8419
8813
  }
8420
8814
  this.ws.close();
@@ -8422,10 +8816,305 @@ var CartesiaSTT = class {
8422
8816
  }
8423
8817
  }
8424
8818
  };
8819
+ function averageConfidence(words) {
8820
+ if (!words.length) return 0;
8821
+ let total = 0;
8822
+ for (const w of words) {
8823
+ total += Number(w.confidence ?? 0);
8824
+ }
8825
+ return total / words.length;
8826
+ }
8425
8827
 
8426
- // src/index.ts
8427
- init_elevenlabs_tts();
8428
- init_openai_tts();
8828
+ // src/stt/assemblyai.ts
8829
+ var STT5 = class extends AssemblyAISTT {
8830
+ constructor(opts = {}) {
8831
+ const key = opts.apiKey ?? process.env.ASSEMBLYAI_API_KEY;
8832
+ if (!key) {
8833
+ throw new Error(
8834
+ "AssemblyAI STT requires an apiKey. Pass { apiKey: '...' } or set ASSEMBLYAI_API_KEY in the environment."
8835
+ );
8836
+ }
8837
+ const { apiKey: _ignored, ...rest } = opts;
8838
+ void _ignored;
8839
+ super(key, rest);
8840
+ }
8841
+ };
8842
+
8843
+ // src/providers/elevenlabs-tts.ts
8844
+ var ELEVENLABS_BASE_URL = "https://api.elevenlabs.io/v1";
8845
+ var ELEVENLABS_VOICE_ID_BY_NAME = {
8846
+ rachel: "21m00Tcm4TlvDq8ikWAM",
8847
+ drew: "29vD33N1CtxCmqQRPOHJ",
8848
+ clyde: "2EiwWnXFnvU5JabPnv8n",
8849
+ paul: "5Q0t7uMcjvnagumLfvZi",
8850
+ domi: "AZnzlk1XvdvUeBnXmlld",
8851
+ dave: "CYw3kZ02Hs0563khs1Fj",
8852
+ fin: "D38z5RcWu1voky8WS1ja",
8853
+ bella: "EXAVITQu4vr4xnSDxMaL",
8854
+ antoni: "ErXwobaYiN019PkySvjV",
8855
+ thomas: "GBv7mTt0atIp3Br8iCZE",
8856
+ charlie: "IKne3meq5aSn9XLyUdCD",
8857
+ george: "JBFqnCBsd6RMkjVDRZzb",
8858
+ emily: "LcfcDJNUP1GQjkzn1xUU",
8859
+ elli: "MF3mGyEYCl7XYWbV9V6O",
8860
+ callum: "N2lVS1w4EtoT3dr4eOWO",
8861
+ patrick: "ODq5zmih8GrVes37Dizd",
8862
+ harry: "SOYHLrjzK2X1ezoPC6cr",
8863
+ liam: "TX3LPaxmHKxFdv7VOQHJ",
8864
+ dorothy: "ThT5KcBeYPX3keUQqHPh",
8865
+ josh: "TxGEqnHWrfWFTfGW9XjX",
8866
+ arnold: "VR6AewLTigWG4xSOukaG",
8867
+ charlotte: "XB0fDUnXU5powFXDhCwa",
8868
+ matilda: "XrExE9yKIg1WjnnlVkGX",
8869
+ matthew: "Yko7PKHZNXotIFUBG7I9",
8870
+ james: "ZQe5CZNOzWyzPSCn5a3c",
8871
+ joseph: "Zlb1dXrM653N07WRdFW3",
8872
+ jeremy: "bVMeCyTHy58xNoL34h3p",
8873
+ michael: "flq6f7yk4E4fJM5XTYuZ",
8874
+ ethan: "g5CIjZEefAph4nQFvHAz",
8875
+ gigi: "jBpfuIE2acCO8z3wKNLl",
8876
+ freya: "jsCqWAovK2LkecY7zXl4",
8877
+ brian: "nPczCjzI2devNBz1zQrb",
8878
+ grace: "oWAxZDx7w5VEj9dCyTzz",
8879
+ daniel: "onwK4e9ZLuTAKqWW03F9",
8880
+ lily: "pFZP5JQG7iQjIQuC4Bku",
8881
+ serena: "pMsXgVXv3BLzUgSXRplE",
8882
+ adam: "pNInz6obpgDQGcFmaJgB",
8883
+ nicole: "piTKgcLEGmPE4e6mEKli",
8884
+ bill: "pqHfZKP75CvOlQylNhV4",
8885
+ jessie: "t0jbNlBVZ17f02VDIeMI",
8886
+ ryan: "wViXBPUzp2ZZixB1xQuM",
8887
+ sam: "yoZ06aMxZJJ28mfd3POQ",
8888
+ glinda: "z9fAnlkpzviPz146aGWa",
8889
+ giovanni: "zcAOhNBS3c14rBihAFp1",
8890
+ mimi: "zrHiDhphv9ZnVXBqCLjz",
8891
+ alloy: "21m00Tcm4TlvDq8ikWAM"
8892
+ };
8893
+ var VOICE_ID_PATTERN = /^[A-Za-z0-9]{20}$/;
8894
+ function resolveVoiceId(voice) {
8895
+ if (!voice) return voice;
8896
+ if (VOICE_ID_PATTERN.test(voice)) return voice;
8897
+ return ELEVENLABS_VOICE_ID_BY_NAME[voice.toLowerCase()] ?? voice;
8898
+ }
8899
+ var ElevenLabsTTS = class {
8900
+ constructor(apiKey, voiceId = "21m00Tcm4TlvDq8ikWAM", modelId = "eleven_turbo_v2_5", outputFormat = "pcm_16000") {
8901
+ this.apiKey = apiKey;
8902
+ this.modelId = modelId;
8903
+ this.outputFormat = outputFormat;
8904
+ this.voiceId = resolveVoiceId(voiceId);
8905
+ }
8906
+ voiceId;
8907
+ /**
8908
+ * Synthesise text to speech and return the full audio as a single Buffer.
8909
+ *
8910
+ * For large chunks (or when latency matters) call `synthesizeStream` instead.
8911
+ */
8912
+ async synthesize(text) {
8913
+ const chunks = [];
8914
+ for await (const chunk of this.synthesizeStream(text)) {
8915
+ chunks.push(chunk);
8916
+ }
8917
+ return Buffer.concat(chunks);
8918
+ }
8919
+ /**
8920
+ * Synthesise text and yield audio chunks as they arrive (streaming).
8921
+ *
8922
+ * The yielded buffers are raw PCM at 16 kHz (or whatever `outputFormat` is
8923
+ * configured to).
8924
+ */
8925
+ async *synthesizeStream(text) {
8926
+ const url = `${ELEVENLABS_BASE_URL}/text-to-speech/${encodeURIComponent(this.voiceId)}/stream?output_format=${encodeURIComponent(this.outputFormat)}`;
8927
+ const response = await fetch(url, {
8928
+ method: "POST",
8929
+ headers: {
8930
+ "xi-api-key": this.apiKey,
8931
+ "Content-Type": "application/json"
8932
+ },
8933
+ body: JSON.stringify({ text, model_id: this.modelId }),
8934
+ signal: AbortSignal.timeout(3e4)
8935
+ });
8936
+ if (!response.ok) {
8937
+ const body = await response.text();
8938
+ throw new Error(`ElevenLabs TTS error ${response.status}: ${body}`);
8939
+ }
8940
+ if (!response.body) {
8941
+ throw new Error("ElevenLabs TTS: no response body");
8942
+ }
8943
+ const reader = response.body.getReader();
8944
+ try {
8945
+ while (true) {
8946
+ const { done, value } = await reader.read();
8947
+ if (done) break;
8948
+ if (value && value.length > 0) {
8949
+ yield Buffer.from(value);
8950
+ }
8951
+ }
8952
+ } finally {
8953
+ if (typeof reader.cancel === "function") await reader.cancel().catch(() => {
8954
+ });
8955
+ reader.releaseLock();
8956
+ }
8957
+ }
8958
+ };
8959
+
8960
+ // src/tts/elevenlabs.ts
8961
+ var TTS = class extends ElevenLabsTTS {
8962
+ constructor(opts = {}) {
8963
+ const key = opts.apiKey ?? process.env.ELEVENLABS_API_KEY;
8964
+ if (!key) {
8965
+ throw new Error(
8966
+ "ElevenLabs TTS requires an apiKey. Pass { apiKey: '...' } or set ELEVENLABS_API_KEY in the environment."
8967
+ );
8968
+ }
8969
+ super(
8970
+ key,
8971
+ opts.voiceId ?? "21m00Tcm4TlvDq8ikWAM",
8972
+ opts.modelId ?? "eleven_turbo_v2_5",
8973
+ opts.outputFormat ?? "pcm_16000"
8974
+ );
8975
+ }
8976
+ };
8977
+
8978
+ // src/providers/openai-tts.ts
8979
+ var OPENAI_TTS_URL = "https://api.openai.com/v1/audio/speech";
8980
+ var OpenAITTS = class _OpenAITTS {
8981
+ constructor(apiKey, voice = "alloy", model = "tts-1") {
8982
+ this.apiKey = apiKey;
8983
+ this.voice = voice;
8984
+ this.model = model;
8985
+ }
8986
+ /**
8987
+ * Synthesise text to speech and return the full audio as a single Buffer.
8988
+ *
8989
+ * For large chunks (or when latency matters) call `synthesizeStream` instead.
8990
+ */
8991
+ async synthesize(text) {
8992
+ const chunks = [];
8993
+ for await (const chunk of this.synthesizeStream(text)) {
8994
+ chunks.push(chunk);
8995
+ }
8996
+ return Buffer.concat(chunks);
8997
+ }
8998
+ /**
8999
+ * Synthesise text and yield audio chunks as they arrive (streaming).
9000
+ *
9001
+ * OpenAI returns 24 kHz PCM16; each chunk is resampled to 16 kHz before
9002
+ * yielding so the output is ready for telephony pipelines.
9003
+ *
9004
+ * The resampler carries state (buffered samples + odd trailing byte)
9005
+ * between chunks — without that state cross-chunk sample alignment drifts
9006
+ * and the caller hears pops / dropped audio (BUG #23, mirror of the
9007
+ * Python `audioop.ratecv` fix).
9008
+ */
9009
+ async *synthesizeStream(text) {
9010
+ const response = await fetch(OPENAI_TTS_URL, {
9011
+ method: "POST",
9012
+ headers: {
9013
+ "Authorization": `Bearer ${this.apiKey}`,
9014
+ "Content-Type": "application/json"
9015
+ },
9016
+ body: JSON.stringify({
9017
+ model: this.model,
9018
+ input: text,
9019
+ voice: this.voice,
9020
+ response_format: "pcm"
9021
+ }),
9022
+ signal: AbortSignal.timeout(3e4)
9023
+ });
9024
+ if (!response.ok) {
9025
+ const body = await response.text();
9026
+ throw new Error(`OpenAI TTS error ${response.status}: ${body}`);
9027
+ }
9028
+ if (!response.body) {
9029
+ throw new Error("OpenAI TTS: no response body");
9030
+ }
9031
+ const ctx = { carryByte: null, leftover: [] };
9032
+ const reader = response.body.getReader();
9033
+ try {
9034
+ while (true) {
9035
+ const { done, value } = await reader.read();
9036
+ if (done) break;
9037
+ if (value && value.length > 0) {
9038
+ const out = _OpenAITTS.resampleStreaming(Buffer.from(value), ctx);
9039
+ if (out.length > 0) yield out;
9040
+ }
9041
+ }
9042
+ if (ctx.leftover.length > 0) {
9043
+ const tail = Buffer.alloc(ctx.leftover.length * 2);
9044
+ for (let i = 0; i < ctx.leftover.length; i++) {
9045
+ tail.writeInt16LE(ctx.leftover[i], i * 2);
9046
+ }
9047
+ yield tail;
9048
+ }
9049
+ } finally {
9050
+ if (typeof reader.cancel === "function") await reader.cancel().catch(() => {
9051
+ });
9052
+ reader.releaseLock();
9053
+ }
9054
+ }
9055
+ /**
9056
+ * Streaming 24 kHz → 16 kHz resampler (PCM16-LE). Maintains cross-chunk
9057
+ * state so the 3:2 pattern doesn't reset at every network read.
9058
+ */
9059
+ static resampleStreaming(audio, ctx) {
9060
+ let buf;
9061
+ if (ctx.carryByte !== null) {
9062
+ buf = Buffer.concat([Buffer.from([ctx.carryByte]), audio]);
9063
+ ctx.carryByte = null;
9064
+ } else {
9065
+ buf = audio;
9066
+ }
9067
+ if (buf.length % 2 === 1) {
9068
+ ctx.carryByte = buf[buf.length - 1];
9069
+ buf = buf.subarray(0, buf.length - 1);
9070
+ }
9071
+ if (buf.length === 0 && ctx.leftover.length === 0) {
9072
+ return Buffer.alloc(0);
9073
+ }
9074
+ const sampleCount = buf.length / 2;
9075
+ const samples = ctx.leftover.slice();
9076
+ for (let i2 = 0; i2 < sampleCount; i2++) {
9077
+ samples.push(buf.readInt16LE(i2 * 2));
9078
+ }
9079
+ const out = [];
9080
+ let i = 0;
9081
+ while (i + 2 < samples.length) {
9082
+ out.push(samples[i]);
9083
+ out.push(Math.trunc((samples[i + 1] + samples[i + 2]) / 2));
9084
+ i += 3;
9085
+ }
9086
+ ctx.leftover = samples.slice(i);
9087
+ const buffer = Buffer.alloc(out.length * 2);
9088
+ for (let j = 0; j < out.length; j++) {
9089
+ buffer.writeInt16LE(out[j], j * 2);
9090
+ }
9091
+ return buffer;
9092
+ }
9093
+ /** @deprecated use {@link resampleStreaming} with persistent state. */
9094
+ static resample24kTo16k(audio) {
9095
+ const ctx = { carryByte: null, leftover: [] };
9096
+ const out = _OpenAITTS.resampleStreaming(audio, ctx);
9097
+ if (ctx.leftover.length === 0) return out;
9098
+ const tail = Buffer.alloc(ctx.leftover.length * 2);
9099
+ for (let i = 0; i < ctx.leftover.length; i++) {
9100
+ tail.writeInt16LE(ctx.leftover[i], i * 2);
9101
+ }
9102
+ return Buffer.concat([out, tail]);
9103
+ }
9104
+ };
9105
+
9106
+ // src/tts/openai.ts
9107
+ var TTS2 = class extends OpenAITTS {
9108
+ constructor(opts = {}) {
9109
+ const key = opts.apiKey ?? process.env.OPENAI_API_KEY;
9110
+ if (!key) {
9111
+ throw new Error(
9112
+ "OpenAI TTS requires an apiKey. Pass { apiKey: 'sk-...' } or set OPENAI_API_KEY in the environment."
9113
+ );
9114
+ }
9115
+ super(key, opts.voice ?? "alloy", opts.model ?? "tts-1");
9116
+ }
9117
+ };
8429
9118
 
8430
9119
  // src/providers/cartesia-tts.ts
8431
9120
  var CARTESIA_BASE_URL = "https://api.cartesia.ai";
@@ -8525,6 +9214,21 @@ var CartesiaTTS = class {
8525
9214
  }
8526
9215
  };
8527
9216
 
9217
+ // src/tts/cartesia.ts
9218
+ var TTS3 = class extends CartesiaTTS {
9219
+ constructor(opts = {}) {
9220
+ const key = opts.apiKey ?? process.env.CARTESIA_API_KEY;
9221
+ if (!key) {
9222
+ throw new Error(
9223
+ "Cartesia TTS requires an apiKey. Pass { apiKey: '...' } or set CARTESIA_API_KEY in the environment."
9224
+ );
9225
+ }
9226
+ const { apiKey: _ignored, ...rest } = opts;
9227
+ void _ignored;
9228
+ super(key, rest);
9229
+ }
9230
+ };
9231
+
8528
9232
  // src/providers/rime-tts.ts
8529
9233
  var RIME_BASE_URL = "https://users.rime.ai/v1/rime-tts";
8530
9234
  var ARCANA_MODEL_TIMEOUT_MS = 60 * 4 * 1e3;
@@ -8652,6 +9356,21 @@ var RimeTTS = class {
8652
9356
  }
8653
9357
  };
8654
9358
 
9359
+ // src/tts/rime.ts
9360
+ var TTS4 = class extends RimeTTS {
9361
+ constructor(opts = {}) {
9362
+ const key = opts.apiKey ?? process.env.RIME_API_KEY;
9363
+ if (!key) {
9364
+ throw new Error(
9365
+ "Rime TTS requires an apiKey. Pass { apiKey: '...' } or set RIME_API_KEY in the environment."
9366
+ );
9367
+ }
9368
+ const { apiKey: _ignored, ...rest } = opts;
9369
+ void _ignored;
9370
+ super(key, rest);
9371
+ }
9372
+ };
9373
+
8655
9374
  // src/providers/lmnt-tts.ts
8656
9375
  var LMNT_BASE_URL = "https://api.lmnt.com/v1/ai/speech/bytes";
8657
9376
  var LMNTTTS = class {
@@ -8730,6 +9449,119 @@ var LMNTTTS = class {
8730
9449
  }
8731
9450
  };
8732
9451
 
9452
+ // src/tts/lmnt.ts
9453
+ var TTS5 = class extends LMNTTTS {
9454
+ constructor(opts = {}) {
9455
+ const key = opts.apiKey ?? process.env.LMNT_API_KEY;
9456
+ if (!key) {
9457
+ throw new Error(
9458
+ "LMNT TTS requires an apiKey. Pass { apiKey: '...' } or set LMNT_API_KEY in the environment."
9459
+ );
9460
+ }
9461
+ const { apiKey: _ignored, ...rest } = opts;
9462
+ void _ignored;
9463
+ super(key, rest);
9464
+ }
9465
+ };
9466
+
9467
+ // src/carriers/twilio.ts
9468
+ var Carrier = class {
9469
+ kind = "twilio";
9470
+ accountSid;
9471
+ authToken;
9472
+ constructor(opts = {}) {
9473
+ const sid = opts.accountSid ?? process.env.TWILIO_ACCOUNT_SID;
9474
+ const tok = opts.authToken ?? process.env.TWILIO_AUTH_TOKEN;
9475
+ if (!sid) {
9476
+ throw new Error(
9477
+ "Twilio carrier requires accountSid. Pass { accountSid: 'AC...' } or set TWILIO_ACCOUNT_SID in the environment."
9478
+ );
9479
+ }
9480
+ if (!tok) {
9481
+ throw new Error(
9482
+ "Twilio carrier requires authToken. Pass { authToken: '...' } or set TWILIO_AUTH_TOKEN in the environment."
9483
+ );
9484
+ }
9485
+ this.accountSid = sid;
9486
+ this.authToken = tok;
9487
+ }
9488
+ };
9489
+
9490
+ // src/carriers/telnyx.ts
9491
+ var Carrier2 = class {
9492
+ kind = "telnyx";
9493
+ apiKey;
9494
+ connectionId;
9495
+ publicKey;
9496
+ constructor(opts = {}) {
9497
+ const key = opts.apiKey ?? process.env.TELNYX_API_KEY;
9498
+ const conn = opts.connectionId ?? process.env.TELNYX_CONNECTION_ID;
9499
+ const pub = opts.publicKey ?? process.env.TELNYX_PUBLIC_KEY;
9500
+ if (!key) {
9501
+ throw new Error(
9502
+ "Telnyx carrier requires apiKey. Pass { apiKey: '...' } or set TELNYX_API_KEY in the environment."
9503
+ );
9504
+ }
9505
+ if (!conn) {
9506
+ throw new Error(
9507
+ "Telnyx carrier requires connectionId. Pass { connectionId: '...' } or set TELNYX_CONNECTION_ID in the environment."
9508
+ );
9509
+ }
9510
+ this.apiKey = key;
9511
+ this.connectionId = conn;
9512
+ this.publicKey = pub;
9513
+ }
9514
+ };
9515
+
9516
+ // src/public-api.ts
9517
+ var DEFAULT_GUARDRAIL_REPLACEMENT = "I'm sorry, I can't respond to that.";
9518
+ var Guardrail = class {
9519
+ name;
9520
+ blockedTerms;
9521
+ check;
9522
+ replacement;
9523
+ constructor(opts) {
9524
+ if (!opts.name) {
9525
+ throw new Error("Guardrail requires a non-empty name.");
9526
+ }
9527
+ this.name = opts.name;
9528
+ if (opts.blockedTerms) this.blockedTerms = opts.blockedTerms;
9529
+ if (opts.check) this.check = opts.check;
9530
+ this.replacement = opts.replacement ?? DEFAULT_GUARDRAIL_REPLACEMENT;
9531
+ }
9532
+ };
9533
+ function guardrail(opts) {
9534
+ return new Guardrail(opts);
9535
+ }
9536
+ var Tool = class {
9537
+ name;
9538
+ description;
9539
+ parameters;
9540
+ handler;
9541
+ webhookUrl;
9542
+ constructor(opts) {
9543
+ if (!opts.name) {
9544
+ throw new Error("Tool requires a non-empty name.");
9545
+ }
9546
+ const hasHandler = typeof opts.handler === "function";
9547
+ const hasWebhook = typeof opts.webhookUrl === "string" && opts.webhookUrl.length > 0;
9548
+ if (!hasHandler && !hasWebhook) {
9549
+ throw new Error("Tool requires either handler or webhookUrl.");
9550
+ }
9551
+ if (hasHandler && hasWebhook) {
9552
+ throw new Error("Tool accepts handler OR webhookUrl, not both.");
9553
+ }
9554
+ this.name = opts.name;
9555
+ this.description = opts.description ?? "";
9556
+ this.parameters = opts.parameters ?? { type: "object", properties: {} };
9557
+ if (hasHandler) this.handler = opts.handler;
9558
+ if (hasWebhook) this.webhookUrl = opts.webhookUrl;
9559
+ }
9560
+ };
9561
+ function tool(opts) {
9562
+ return new Tool(opts);
9563
+ }
9564
+
8733
9565
  // src/index.ts
8734
9566
  init_transcoding();
8735
9567
  init_tunnel();
@@ -9354,21 +10186,25 @@ function isAudioConfig(value) {
9354
10186
  CartesiaSTT,
9355
10187
  CartesiaTTS,
9356
10188
  ChatContext,
10189
+ CloudflareTunnel,
9357
10190
  DEFAULT_MIN_SENTENCE_LEN,
9358
10191
  DEFAULT_PRICING,
9359
10192
  DTMF_EVENTS,
9360
10193
  DeepgramSTT,
10194
+ ElevenLabsConvAI,
9361
10195
  ElevenLabsConvAIAdapter,
9362
10196
  ElevenLabsTTS,
9363
10197
  FallbackLLMProvider,
9364
10198
  GEMINI_DEFAULT_INPUT_SR,
9365
10199
  GEMINI_DEFAULT_OUTPUT_SR,
9366
10200
  GeminiLiveAdapter,
10201
+ Guardrail,
9367
10202
  IVRActivity,
9368
10203
  LLMLoop,
9369
10204
  LMNTTTS,
9370
10205
  MetricsStore,
9371
10206
  OpenAILLMProvider,
10207
+ OpenAIRealtime,
9372
10208
  OpenAIRealtimeAdapter,
9373
10209
  OpenAITTS,
9374
10210
  PartialStreamError,
@@ -9381,8 +10217,12 @@ function isAudioConfig(value) {
9381
10217
  RimeTTS,
9382
10218
  SentenceChunker,
9383
10219
  SonioxSTT,
10220
+ StaticTunnel,
10221
+ Telnyx,
9384
10222
  TestSession,
9385
10223
  TfidfLoopDetector,
10224
+ Tool,
10225
+ Twilio,
9386
10226
  ULTRAVOX_DEFAULT_API_BASE,
9387
10227
  ULTRAVOX_DEFAULT_SR,
9388
10228
  UltravoxRealtimeAdapter,
@@ -9402,6 +10242,7 @@ function isAudioConfig(value) {
9402
10242
  filterMarkdown,
9403
10243
  formatDtmf,
9404
10244
  getLogger,
10245
+ guardrail,
9405
10246
  isRemoteUrl,
9406
10247
  isWebSocketUrl,
9407
10248
  makeAuthMiddleware,
@@ -9423,5 +10264,6 @@ function isAudioConfig(value) {
9423
10264
  selectSoundFromList,
9424
10265
  setLogger,
9425
10266
  startTunnel,
10267
+ tool,
9426
10268
  whisper
9427
10269
  });