getpatter 0.4.3 → 0.4.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -251,20 +251,36 @@ var ElevenLabsConvAIAdapter = class {
251
251
  import WebSocket3 from "ws";
252
252
  var DEEPGRAM_WS_URL = "wss://api.deepgram.com/v1/listen";
253
253
  var DeepgramSTT = class _DeepgramSTT {
254
- constructor(apiKey, language = "en", model = "nova-3", encoding = "linear16", sampleRate = 16e3) {
255
- this.apiKey = apiKey;
256
- this.language = language;
257
- this.model = model;
258
- this.encoding = encoding;
259
- this.sampleRate = sampleRate;
260
- }
261
254
  ws = null;
262
255
  callbacks = [];
263
256
  /** Request ID from Deepgram — used to query actual cost post-call. */
264
257
  requestId = "";
265
- /** Factory for Twilio calls — mulaw 8 kHz. */
266
- static forTwilio(apiKey, language = "en", model = "nova-3") {
267
- return new _DeepgramSTT(apiKey, language, model, "mulaw", 8e3);
258
+ apiKey;
259
+ language;
260
+ model;
261
+ encoding;
262
+ sampleRate;
263
+ endpointingMs;
264
+ utteranceEndMs;
265
+ smartFormat;
266
+ interimResults;
267
+ vadEvents;
268
+ constructor(apiKey, languageOrOptions, model, encoding, sampleRate, options) {
269
+ this.apiKey = apiKey;
270
+ const opts = typeof languageOrOptions === "object" && languageOrOptions !== null ? languageOrOptions : options ?? {};
271
+ this.language = (typeof languageOrOptions === "string" ? languageOrOptions : opts.language) ?? "en";
272
+ this.model = model ?? opts.model ?? "nova-3";
273
+ this.encoding = encoding ?? opts.encoding ?? "linear16";
274
+ this.sampleRate = sampleRate ?? opts.sampleRate ?? 16e3;
275
+ this.endpointingMs = opts.endpointingMs ?? 150;
276
+ this.utteranceEndMs = opts.utteranceEndMs === null ? null : opts.utteranceEndMs ?? 1e3;
277
+ this.smartFormat = opts.smartFormat ?? true;
278
+ this.interimResults = opts.interimResults ?? true;
279
+ this.vadEvents = opts.vadEvents ?? true;
280
+ }
281
+ /** Factory for Twilio calls — mulaw 8 kHz. Forwards tuning options through. */
282
+ static forTwilio(apiKey, language = "en", model = "nova-3", options = {}) {
283
+ return new _DeepgramSTT(apiKey, language, model, "mulaw", 8e3, options);
268
284
  }
269
285
  async connect() {
270
286
  const params = new URLSearchParams({
@@ -273,12 +289,15 @@ var DeepgramSTT = class _DeepgramSTT {
273
289
  encoding: this.encoding,
274
290
  sample_rate: String(this.sampleRate),
275
291
  channels: "1",
276
- interim_results: "true",
277
- endpointing: "300",
278
- smart_format: "true",
279
- vad_events: "true",
292
+ interim_results: this.interimResults ? "true" : "false",
293
+ endpointing: String(this.endpointingMs),
294
+ smart_format: this.smartFormat ? "true" : "false",
295
+ vad_events: this.vadEvents ? "true" : "false",
280
296
  no_delay: "true"
281
297
  });
298
+ if (this.utteranceEndMs !== null) {
299
+ params.set("utterance_end_ms", String(Math.max(this.utteranceEndMs, 1e3)));
300
+ }
282
301
  const url = `${DEEPGRAM_WS_URL}?${params.toString()}`;
283
302
  this.ws = new WebSocket3(url, {
284
303
  headers: { Authorization: `Token ${this.apiKey}` }
@@ -313,7 +332,7 @@ var DeepgramSTT = class _DeepgramSTT {
313
332
  if (!text) return;
314
333
  const transcript = {
315
334
  text,
316
- isFinal: Boolean(data.is_final) && Boolean(data.speech_final),
335
+ isFinal: Boolean(data.is_final) || Boolean(data.speech_final),
317
336
  confidence: best.confidence ?? 0
318
337
  };
319
338
  for (const cb of this.callbacks) {
@@ -534,9 +553,15 @@ var MetricsStore = class extends EventEmitter {
534
553
  maxCalls;
535
554
  calls = [];
536
555
  activeCalls = /* @__PURE__ */ new Map();
537
- constructor(maxCalls = 500) {
556
+ /**
557
+ * Accepts either a numeric ``maxCalls`` (legacy positional — matches the
558
+ * original TS API) or an options object ``{ maxCalls }`` to align with the
559
+ * Python SDK's keyword-argument style. Plain literals also work:
560
+ * ``new MetricsStore()`` / ``new MetricsStore(100)`` / ``new MetricsStore({ maxCalls: 100 })``.
561
+ */
562
+ constructor(maxCallsOrOpts = 500) {
538
563
  super();
539
- this.maxCalls = maxCalls;
564
+ this.maxCalls = typeof maxCallsOrOpts === "number" ? maxCallsOrOpts : maxCallsOrOpts.maxCalls ?? 500;
540
565
  }
541
566
  publish(eventType, data) {
542
567
  this.emit("sse", { type: eventType, data });
@@ -544,22 +569,100 @@ var MetricsStore = class extends EventEmitter {
544
569
  recordCallStart(data) {
545
570
  const callId = data.call_id || "";
546
571
  if (!callId) return;
572
+ const existing = this.activeCalls.get(callId);
573
+ if (existing) {
574
+ existing.caller = data.caller || existing.caller;
575
+ existing.callee = data.callee || existing.callee;
576
+ existing.direction = data.direction || existing.direction;
577
+ existing.status = "in-progress";
578
+ existing.turns = existing.turns || [];
579
+ } else {
580
+ const record = {
581
+ call_id: callId,
582
+ caller: data.caller || "",
583
+ callee: data.callee || "",
584
+ direction: data.direction || "inbound",
585
+ started_at: Date.now() / 1e3,
586
+ status: "in-progress",
587
+ turns: []
588
+ };
589
+ this.activeCalls.set(callId, record);
590
+ }
591
+ this.publish("call_start", {
592
+ call_id: callId,
593
+ caller: data.caller || "",
594
+ callee: data.callee || "",
595
+ direction: data.direction || "inbound"
596
+ });
597
+ }
598
+ /**
599
+ * Pre-register an outbound call before any webhook fires. Lets the
600
+ * dashboard surface attempts that never reach media (no-answer, busy,
601
+ * carrier-rejected). Mirrors the Python ``record_call_initiated``.
602
+ */
603
+ recordCallInitiated(data) {
604
+ const callId = data.call_id || "";
605
+ if (!callId) return;
606
+ if (this.activeCalls.has(callId)) return;
547
607
  const record = {
548
608
  call_id: callId,
549
609
  caller: data.caller || "",
550
610
  callee: data.callee || "",
551
- direction: data.direction || "inbound",
611
+ direction: data.direction || "outbound",
552
612
  started_at: Date.now() / 1e3,
613
+ status: "initiated",
553
614
  turns: []
554
615
  };
555
616
  this.activeCalls.set(callId, record);
556
- this.publish("call_start", {
617
+ this.publish("call_initiated", {
557
618
  call_id: callId,
558
619
  caller: record.caller,
559
620
  callee: record.callee,
560
- direction: record.direction
621
+ direction: record.direction,
622
+ status: record.status
561
623
  });
562
624
  }
625
+ /**
626
+ * Update the status of an active or completed call. Terminal states
627
+ * (completed, no-answer, busy, failed, canceled, webhook_error) move the
628
+ * row from active to completed so the UI freezes the live duration timer.
629
+ */
630
+ updateCallStatus(callId, status, extra = {}) {
631
+ if (!callId || !status) return;
632
+ const TERMINAL = /* @__PURE__ */ new Set(["completed", "no-answer", "busy", "failed", "canceled", "webhook_error"]);
633
+ const active = this.activeCalls.get(callId);
634
+ if (active) {
635
+ active.status = status;
636
+ Object.assign(active, extra);
637
+ if (TERMINAL.has(status)) {
638
+ const entry = {
639
+ call_id: callId,
640
+ caller: active.caller || "",
641
+ callee: active.callee || "",
642
+ direction: active.direction || "outbound",
643
+ started_at: active.started_at || 0,
644
+ ended_at: Date.now() / 1e3,
645
+ status,
646
+ metrics: null,
647
+ ...extra
648
+ };
649
+ this.activeCalls.delete(callId);
650
+ this.calls.push(entry);
651
+ if (this.calls.length > this.maxCalls) {
652
+ this.calls = this.calls.slice(-this.maxCalls);
653
+ }
654
+ }
655
+ } else {
656
+ for (let i = this.calls.length - 1; i >= 0; i--) {
657
+ if (this.calls[i].call_id === callId) {
658
+ this.calls[i].status = status;
659
+ Object.assign(this.calls[i], extra);
660
+ break;
661
+ }
662
+ }
663
+ }
664
+ this.publish("call_status", { call_id: callId, status, ...extra });
665
+ }
563
666
  recordTurn(data) {
564
667
  const callId = data.call_id || "";
565
668
  const turn = data.turn;
@@ -576,6 +679,8 @@ var MetricsStore = class extends EventEmitter {
576
679
  if (!callId) return;
577
680
  const active = this.activeCalls.get(callId);
578
681
  this.activeCalls.delete(callId);
682
+ const activeStatus = active?.status;
683
+ const resolvedStatus = activeStatus && activeStatus !== "in-progress" ? activeStatus : "completed";
579
684
  const entry = {
580
685
  call_id: callId,
581
686
  caller: data.caller || active?.caller || "",
@@ -584,6 +689,7 @@ var MetricsStore = class extends EventEmitter {
584
689
  started_at: active?.started_at || 0,
585
690
  ended_at: Date.now() / 1e3,
586
691
  transcript: data.transcript || [],
692
+ status: resolvedStatus,
587
693
  metrics: metrics ?? null
588
694
  };
589
695
  this.calls.push(entry);
@@ -1751,13 +1857,68 @@ function isWebSocketUrl(url) {
1751
1857
 
1752
1858
  // src/providers/elevenlabs-tts.ts
1753
1859
  var ELEVENLABS_BASE_URL = "https://api.elevenlabs.io/v1";
1860
+ var ELEVENLABS_VOICE_ID_BY_NAME = {
1861
+ rachel: "21m00Tcm4TlvDq8ikWAM",
1862
+ drew: "29vD33N1CtxCmqQRPOHJ",
1863
+ clyde: "2EiwWnXFnvU5JabPnv8n",
1864
+ paul: "5Q0t7uMcjvnagumLfvZi",
1865
+ domi: "AZnzlk1XvdvUeBnXmlld",
1866
+ dave: "CYw3kZ02Hs0563khs1Fj",
1867
+ fin: "D38z5RcWu1voky8WS1ja",
1868
+ bella: "EXAVITQu4vr4xnSDxMaL",
1869
+ antoni: "ErXwobaYiN019PkySvjV",
1870
+ thomas: "GBv7mTt0atIp3Br8iCZE",
1871
+ charlie: "IKne3meq5aSn9XLyUdCD",
1872
+ george: "JBFqnCBsd6RMkjVDRZzb",
1873
+ emily: "LcfcDJNUP1GQjkzn1xUU",
1874
+ elli: "MF3mGyEYCl7XYWbV9V6O",
1875
+ callum: "N2lVS1w4EtoT3dr4eOWO",
1876
+ patrick: "ODq5zmih8GrVes37Dizd",
1877
+ harry: "SOYHLrjzK2X1ezoPC6cr",
1878
+ liam: "TX3LPaxmHKxFdv7VOQHJ",
1879
+ dorothy: "ThT5KcBeYPX3keUQqHPh",
1880
+ josh: "TxGEqnHWrfWFTfGW9XjX",
1881
+ arnold: "VR6AewLTigWG4xSOukaG",
1882
+ charlotte: "XB0fDUnXU5powFXDhCwa",
1883
+ matilda: "XrExE9yKIg1WjnnlVkGX",
1884
+ matthew: "Yko7PKHZNXotIFUBG7I9",
1885
+ james: "ZQe5CZNOzWyzPSCn5a3c",
1886
+ joseph: "Zlb1dXrM653N07WRdFW3",
1887
+ jeremy: "bVMeCyTHy58xNoL34h3p",
1888
+ michael: "flq6f7yk4E4fJM5XTYuZ",
1889
+ ethan: "g5CIjZEefAph4nQFvHAz",
1890
+ gigi: "jBpfuIE2acCO8z3wKNLl",
1891
+ freya: "jsCqWAovK2LkecY7zXl4",
1892
+ brian: "nPczCjzI2devNBz1zQrb",
1893
+ grace: "oWAxZDx7w5VEj9dCyTzz",
1894
+ daniel: "onwK4e9ZLuTAKqWW03F9",
1895
+ lily: "pFZP5JQG7iQjIQuC4Bku",
1896
+ serena: "pMsXgVXv3BLzUgSXRplE",
1897
+ adam: "pNInz6obpgDQGcFmaJgB",
1898
+ nicole: "piTKgcLEGmPE4e6mEKli",
1899
+ bill: "pqHfZKP75CvOlQylNhV4",
1900
+ jessie: "t0jbNlBVZ17f02VDIeMI",
1901
+ ryan: "wViXBPUzp2ZZixB1xQuM",
1902
+ sam: "yoZ06aMxZJJ28mfd3POQ",
1903
+ glinda: "z9fAnlkpzviPz146aGWa",
1904
+ giovanni: "zcAOhNBS3c14rBihAFp1",
1905
+ mimi: "zrHiDhphv9ZnVXBqCLjz",
1906
+ alloy: "21m00Tcm4TlvDq8ikWAM"
1907
+ };
1908
+ var VOICE_ID_PATTERN = /^[A-Za-z0-9]{20}$/;
1909
+ function resolveVoiceId(voice) {
1910
+ if (!voice) return voice;
1911
+ if (VOICE_ID_PATTERN.test(voice)) return voice;
1912
+ return ELEVENLABS_VOICE_ID_BY_NAME[voice.toLowerCase()] ?? voice;
1913
+ }
1754
1914
  var ElevenLabsTTS = class {
1755
1915
  constructor(apiKey, voiceId = "21m00Tcm4TlvDq8ikWAM", modelId = "eleven_turbo_v2_5", outputFormat = "pcm_16000") {
1756
1916
  this.apiKey = apiKey;
1757
- this.voiceId = voiceId;
1758
1917
  this.modelId = modelId;
1759
1918
  this.outputFormat = outputFormat;
1919
+ this.voiceId = resolveVoiceId(voiceId);
1760
1920
  }
1921
+ voiceId;
1761
1922
  /**
1762
1923
  * Synthesise text to speech and return the full audio as a single Buffer.
1763
1924
  *
@@ -1836,6 +1997,11 @@ var OpenAITTS = class _OpenAITTS {
1836
1997
  *
1837
1998
  * OpenAI returns 24 kHz PCM16; each chunk is resampled to 16 kHz before
1838
1999
  * yielding so the output is ready for telephony pipelines.
2000
+ *
2001
+ * The resampler carries state (buffered samples + odd trailing byte)
2002
+ * between chunks — without that state cross-chunk sample alignment drifts
2003
+ * and the caller hears pops / dropped audio (BUG #23, mirror of the
2004
+ * Python `audioop.ratecv` fix).
1839
2005
  */
1840
2006
  async *synthesizeStream(text) {
1841
2007
  const response = await fetch(OPENAI_TTS_URL, {
@@ -1859,15 +2025,24 @@ var OpenAITTS = class _OpenAITTS {
1859
2025
  if (!response.body) {
1860
2026
  throw new Error("OpenAI TTS: no response body");
1861
2027
  }
2028
+ const ctx = { carryByte: null, leftover: [] };
1862
2029
  const reader = response.body.getReader();
1863
2030
  try {
1864
2031
  while (true) {
1865
2032
  const { done, value } = await reader.read();
1866
2033
  if (done) break;
1867
2034
  if (value && value.length > 0) {
1868
- yield _OpenAITTS.resample24kTo16k(Buffer.from(value));
2035
+ const out = _OpenAITTS.resampleStreaming(Buffer.from(value), ctx);
2036
+ if (out.length > 0) yield out;
1869
2037
  }
1870
2038
  }
2039
+ if (ctx.leftover.length > 0) {
2040
+ const tail = Buffer.alloc(ctx.leftover.length * 2);
2041
+ for (let i = 0; i < ctx.leftover.length; i++) {
2042
+ tail.writeInt16LE(ctx.leftover[i], i * 2);
2043
+ }
2044
+ yield tail;
2045
+ }
1871
2046
  } finally {
1872
2047
  if (typeof reader.cancel === "function") await reader.cancel().catch(() => {
1873
2048
  });
@@ -1875,35 +2050,53 @@ var OpenAITTS = class _OpenAITTS {
1875
2050
  }
1876
2051
  }
1877
2052
  /**
1878
- * Resample 24 kHz PCM16-LE to 16 kHz by taking 2 out of every 3 samples.
1879
- *
1880
- * For each group of 3 input samples the first is kept as-is and the second
1881
- * output sample is the average of input samples 2 and 3. This matches the
1882
- * Python SDK implementation.
2053
+ * Streaming 24 kHz 16 kHz resampler (PCM16-LE). Maintains cross-chunk
2054
+ * state so the 3:2 pattern doesn't reset at every network read.
1883
2055
  */
1884
- static resample24kTo16k(audio) {
1885
- if (audio.length < 2) return audio;
1886
- const sampleCount = Math.floor(audio.length / 2);
1887
- const samples = new Int16Array(sampleCount);
1888
- for (let i = 0; i < sampleCount; i++) {
1889
- samples[i] = audio.readInt16LE(i * 2);
1890
- }
1891
- const resampled = [];
1892
- for (let i = 0; i < samples.length; i += 3) {
1893
- resampled.push(samples[i]);
1894
- if (i + 1 < samples.length) {
1895
- if (i + 2 < samples.length) {
1896
- resampled.push(Math.trunc((samples[i + 1] + samples[i + 2]) / 2));
1897
- } else {
1898
- resampled.push(samples[i + 1]);
1899
- }
1900
- }
2056
+ static resampleStreaming(audio, ctx) {
2057
+ let buf;
2058
+ if (ctx.carryByte !== null) {
2059
+ buf = Buffer.concat([Buffer.from([ctx.carryByte]), audio]);
2060
+ ctx.carryByte = null;
2061
+ } else {
2062
+ buf = audio;
2063
+ }
2064
+ if (buf.length % 2 === 1) {
2065
+ ctx.carryByte = buf[buf.length - 1];
2066
+ buf = buf.subarray(0, buf.length - 1);
2067
+ }
2068
+ if (buf.length === 0 && ctx.leftover.length === 0) {
2069
+ return Buffer.alloc(0);
2070
+ }
2071
+ const sampleCount = buf.length / 2;
2072
+ const samples = ctx.leftover.slice();
2073
+ for (let i2 = 0; i2 < sampleCount; i2++) {
2074
+ samples.push(buf.readInt16LE(i2 * 2));
2075
+ }
2076
+ const out = [];
2077
+ let i = 0;
2078
+ while (i + 2 < samples.length) {
2079
+ out.push(samples[i]);
2080
+ out.push(Math.trunc((samples[i + 1] + samples[i + 2]) / 2));
2081
+ i += 3;
1901
2082
  }
1902
- const out = Buffer.alloc(resampled.length * 2);
1903
- for (let i = 0; i < resampled.length; i++) {
1904
- out.writeInt16LE(resampled[i], i * 2);
2083
+ ctx.leftover = samples.slice(i);
2084
+ const buffer = Buffer.alloc(out.length * 2);
2085
+ for (let j = 0; j < out.length; j++) {
2086
+ buffer.writeInt16LE(out[j], j * 2);
1905
2087
  }
1906
- return out;
2088
+ return buffer;
2089
+ }
2090
+ /** @deprecated use {@link resampleStreaming} with persistent state. */
2091
+ static resample24kTo16k(audio) {
2092
+ const ctx = { carryByte: null, leftover: [] };
2093
+ const out = _OpenAITTS.resampleStreaming(audio, ctx);
2094
+ if (ctx.leftover.length === 0) return out;
2095
+ const tail = Buffer.alloc(ctx.leftover.length * 2);
2096
+ for (let i = 0; i < ctx.leftover.length; i++) {
2097
+ tail.writeInt16LE(ctx.leftover[i], i * 2);
2098
+ }
2099
+ return Buffer.concat([out, tail]);
1907
2100
  }
1908
2101
  };
1909
2102
 
@@ -2493,6 +2686,9 @@ var StreamHandler = class {
2493
2686
  maxDurationTimer = null;
2494
2687
  transcriptProcessing = false;
2495
2688
  transcriptQueue = [];
2689
+ // BUG #22 throttle state — mirror Python impl.
2690
+ lastCommitText = "";
2691
+ lastCommitAt = 0;
2496
2692
  history;
2497
2693
  metricsAcc;
2498
2694
  constructor(deps, ws, caller, callee) {
@@ -2603,15 +2799,23 @@ var StreamHandler = class {
2603
2799
  this.streamSid = sid;
2604
2800
  }
2605
2801
  /** Handle an incoming audio chunk (already decoded from base64). */
2606
- handleAudio(audioBuffer) {
2802
+ async handleAudio(audioBuffer) {
2607
2803
  const provider = this.deps.agent.provider ?? "openai_realtime";
2608
- if (provider === "pipeline" && this.stt && !this.isSpeaking) {
2609
- if (this.deps.bridge.telephonyProvider === "twilio") {
2610
- const pcm8k = mulawToPcm16(audioBuffer);
2611
- const pcm16k = resample8kTo16k(pcm8k);
2612
- this.stt.sendAudio(pcm16k);
2804
+ if (provider === "pipeline" && this.stt) {
2805
+ if (this.isSpeaking && (this.deps.agent.bargeInThresholdMs ?? 300) === 0) {
2806
+ return;
2807
+ }
2808
+ const pcm8k = mulawToPcm16(audioBuffer);
2809
+ const pcm16k = resample8kTo16k(pcm8k);
2810
+ const hooks = this.deps.agent.hooks;
2811
+ if (hooks) {
2812
+ const hookExecutor = new PipelineHookExecutor(hooks);
2813
+ const hookCtx = this.buildHookContext();
2814
+ const processed = await hookExecutor.runBeforeSendToStt(pcm16k, hookCtx);
2815
+ if (processed === null) return;
2816
+ this.stt.sendAudio(processed);
2613
2817
  } else {
2614
- this.stt.sendAudio(audioBuffer);
2818
+ this.stt.sendAudio(pcm16k);
2615
2819
  }
2616
2820
  } else if (this.adapter) {
2617
2821
  if (this.adapter instanceof ElevenLabsConvAIAdapter && this.deps.bridge.telephonyProvider === "twilio") {
@@ -2693,8 +2897,7 @@ var StreamHandler = class {
2693
2897
  this.tts = new OpenAITTS(this.deps.agent.tts.apiKey, this.deps.agent.tts.voice ?? "alloy");
2694
2898
  }
2695
2899
  } else if (this.deps.agent.elevenlabsKey) {
2696
- const voiceId = this.deps.agent.voice && this.deps.agent.voice !== "alloy" ? this.deps.agent.voice : "21m00Tcm4TlvDq8ikWAM";
2697
- this.tts = new ElevenLabsTTS(this.deps.agent.elevenlabsKey, voiceId);
2900
+ this.tts = new ElevenLabsTTS(this.deps.agent.elevenlabsKey, this.deps.agent.voice || "rachel");
2698
2901
  }
2699
2902
  if (!this.stt) {
2700
2903
  getLogger().info(`Pipeline mode (${label}): no STT configured`);
@@ -2806,7 +3009,59 @@ var StreamHandler = class {
2806
3009
  }
2807
3010
  }
2808
3011
  async processTranscript(transcript) {
3012
+ if (transcript.text && this.isSpeaking) {
3013
+ getLogger().info(
3014
+ `Barge-in: caller spoke over agent (${sanitizeLogValue(transcript.text.slice(0, 40))})`
3015
+ );
3016
+ this.isSpeaking = false;
3017
+ try {
3018
+ this.deps.bridge.sendClear(this.ws, this.streamSid);
3019
+ } catch (err) {
3020
+ getLogger().debug(`sendClear during barge-in failed: ${String(err)}`);
3021
+ }
3022
+ this.metricsAcc.recordTurnInterrupted();
3023
+ }
2809
3024
  if (!transcript.isFinal || !transcript.text) return;
3025
+ const now = Date.now();
3026
+ const normalised = transcript.text.trim().toLowerCase();
3027
+ const stripped = normalised.replace(/[.,!?;: ]+$/, "").trim();
3028
+ const sinceLastMs = now - this.lastCommitAt;
3029
+ const HALLUCINATIONS = /* @__PURE__ */ new Set([
3030
+ "you",
3031
+ "thank you",
3032
+ "thanks",
3033
+ "yeah",
3034
+ "yes",
3035
+ "no",
3036
+ "okay",
3037
+ "ok",
3038
+ "uh",
3039
+ "um",
3040
+ "mmm",
3041
+ "hmm",
3042
+ ".",
3043
+ "bye",
3044
+ "right",
3045
+ "cool"
3046
+ ]);
3047
+ if (HALLUCINATIONS.has(stripped) || stripped === "") {
3048
+ getLogger().info(`Dropped likely STT hallucination: ${sanitizeLogValue(normalised.slice(0, 40))}`);
3049
+ return;
3050
+ }
3051
+ if (sinceLastMs < 2e3 && normalised === this.lastCommitText) {
3052
+ getLogger().info(
3053
+ `Dropped duplicate final transcript (${(sinceLastMs / 1e3).toFixed(1)}s since last): ${sanitizeLogValue(normalised.slice(0, 40))}`
3054
+ );
3055
+ return;
3056
+ }
3057
+ if (sinceLastMs < 500) {
3058
+ getLogger().info(
3059
+ `Dropped back-to-back final transcript (${(sinceLastMs / 1e3).toFixed(2)}s since last): ${sanitizeLogValue(normalised.slice(0, 40))}`
3060
+ );
3061
+ return;
3062
+ }
3063
+ this.lastCommitText = normalised;
3064
+ this.lastCommitAt = now;
2810
3065
  const label = this.deps.bridge.label;
2811
3066
  getLogger().info(`User (${label} pipeline): ${sanitizeLogValue(transcript.text)}`);
2812
3067
  this.metricsAcc.startTurn();
@@ -3346,6 +3601,25 @@ function buildAIAdapter(config, agent, resolvedPrompt) {
3346
3601
  tools
3347
3602
  );
3348
3603
  }
3604
+ function extractDeepgramOptions(options) {
3605
+ if (!options) return {};
3606
+ const get = (snake, camel) => options[snake] ?? options[camel];
3607
+ const out = {};
3608
+ const model = get("model", "model");
3609
+ if (typeof model === "string") out.model = model;
3610
+ const endpointing = get("endpointing_ms", "endpointingMs");
3611
+ if (typeof endpointing === "number") out.endpointingMs = endpointing;
3612
+ const utteranceEnd = get("utterance_end_ms", "utteranceEndMs");
3613
+ if (utteranceEnd === null) out.utteranceEndMs = null;
3614
+ else if (typeof utteranceEnd === "number") out.utteranceEndMs = utteranceEnd;
3615
+ const smart = get("smart_format", "smartFormat");
3616
+ if (typeof smart === "boolean") out.smartFormat = smart;
3617
+ const interim = get("interim_results", "interimResults");
3618
+ if (typeof interim === "boolean") out.interimResults = interim;
3619
+ const vad = get("vad_events", "vadEvents");
3620
+ if (typeof vad === "boolean") out.vadEvents = vad;
3621
+ return out;
3622
+ }
3349
3623
  var TwilioBridge = class {
3350
3624
  constructor(config) {
3351
3625
  this.config = config;
@@ -3397,13 +3671,21 @@ var TwilioBridge = class {
3397
3671
  }
3398
3672
  }
3399
3673
  createStt(agent) {
3674
+ const isPipeline = agent.provider === "pipeline";
3400
3675
  if (agent.stt) {
3401
3676
  if (agent.stt.provider === "deepgram") {
3402
- return DeepgramSTT.forTwilio(agent.stt.apiKey, agent.stt.language ?? "en");
3677
+ const dgOptions = extractDeepgramOptions(agent.stt.options);
3678
+ if (isPipeline) {
3679
+ return new DeepgramSTT(agent.stt.apiKey, agent.stt.language ?? "en", dgOptions.model, "linear16", 16e3, dgOptions);
3680
+ }
3681
+ return DeepgramSTT.forTwilio(agent.stt.apiKey, agent.stt.language ?? "en", dgOptions.model, dgOptions);
3403
3682
  } else if (agent.stt.provider === "whisper") {
3404
- return WhisperSTT.forTwilio(agent.stt.apiKey, agent.stt.language ?? "en");
3683
+ return isPipeline ? new WhisperSTT(agent.stt.apiKey, "whisper-1", agent.stt.language ?? "en") : WhisperSTT.forTwilio(agent.stt.apiKey, agent.stt.language ?? "en");
3405
3684
  }
3406
3685
  } else if (agent.deepgramKey) {
3686
+ if (isPipeline) {
3687
+ return new DeepgramSTT(agent.deepgramKey, agent.language ?? "en", "nova-3", "linear16", 16e3);
3688
+ }
3407
3689
  return DeepgramSTT.forTwilio(agent.deepgramKey, agent.language ?? "en");
3408
3690
  }
3409
3691
  return null;
@@ -3454,12 +3736,12 @@ var TelnyxBridge = class {
3454
3736
  label = "Telnyx";
3455
3737
  telephonyProvider = "telnyx";
3456
3738
  sendAudio(ws, audioBase64, _streamSid) {
3457
- ws.send(JSON.stringify({ event_type: "media", payload: { audio: { chunk: audioBase64 } } }));
3739
+ ws.send(JSON.stringify({ event: "media", media: { payload: audioBase64 } }));
3458
3740
  }
3459
3741
  sendMark(_ws, _markName, _streamSid) {
3460
3742
  }
3461
3743
  sendClear(ws, _streamSid) {
3462
- ws.send(JSON.stringify({ event_type: "media_stop" }));
3744
+ ws.send(JSON.stringify({ event: "clear" }));
3463
3745
  }
3464
3746
  async transferCall(callId, toNumber) {
3465
3747
  if (!isValidTelnyxTransferTarget(toNumber)) {
@@ -3555,7 +3837,15 @@ var TelnyxBridge = class {
3555
3837
  createStt(agent) {
3556
3838
  if (agent.stt) {
3557
3839
  if (agent.stt.provider === "deepgram") {
3558
- return new DeepgramSTT(agent.stt.apiKey, agent.stt.language ?? "en", "nova-3", "linear16", 16e3);
3840
+ const dgOptions = extractDeepgramOptions(agent.stt.options);
3841
+ return new DeepgramSTT(
3842
+ agent.stt.apiKey,
3843
+ agent.stt.language ?? "en",
3844
+ dgOptions.model ?? "nova-3",
3845
+ "linear16",
3846
+ 16e3,
3847
+ dgOptions
3848
+ );
3559
3849
  } else if (agent.stt.provider === "whisper") {
3560
3850
  return new WhisperSTT(agent.stt.apiKey, "whisper-1", agent.stt.language ?? "en");
3561
3851
  }
@@ -3607,6 +3897,7 @@ var EmbeddedServer = class {
3607
3897
  server = null;
3608
3898
  wss = null;
3609
3899
  twilioTokenWarningLogged = false;
3900
+ telnyxSigWarningLogged = false;
3610
3901
  metricsStore;
3611
3902
  pricing;
3612
3903
  remoteHandler = new RemoteMessageHandler();
@@ -3654,6 +3945,31 @@ var EmbeddedServer = class {
3654
3945
  mountApi(app, this.metricsStore, this.dashboardToken);
3655
3946
  getLogger().info("Dashboard: http://127.0.0.1:" + port + "/");
3656
3947
  }
3948
+ app.post("/webhooks/twilio/status", (req, res) => {
3949
+ if (this.config.twilioToken) {
3950
+ const signature = req.headers["x-twilio-signature"] || "";
3951
+ const url = `https://${this.config.webhookUrl}${req.originalUrl}`;
3952
+ const params = req.body ?? {};
3953
+ if (!validateTwilioSignature(url, params, signature, this.config.twilioToken)) {
3954
+ res.status(403).send("Invalid signature");
3955
+ return;
3956
+ }
3957
+ }
3958
+ const body = req.body;
3959
+ const callSid = sanitizeLogValue(body["CallSid"] ?? "");
3960
+ const callStatus = sanitizeLogValue(body["CallStatus"] ?? "");
3961
+ const duration = body["CallDuration"] ?? body["Duration"] ?? "";
3962
+ getLogger().info(
3963
+ `Twilio status ${callStatus} for call ${callSid} (duration=${duration})`
3964
+ );
3965
+ if (callSid && callStatus) {
3966
+ const extra = {};
3967
+ const parsed = parseFloat(duration);
3968
+ if (!Number.isNaN(parsed)) extra.duration_seconds = parsed;
3969
+ this.metricsStore.updateCallStatus(callSid, callStatus, extra);
3970
+ }
3971
+ res.status(204).send();
3972
+ });
3657
3973
  app.post("/webhooks/twilio/recording", (req, res) => {
3658
3974
  if (this.config.twilioToken) {
3659
3975
  const signature = req.headers["x-twilio-signature"] || "";
@@ -3739,7 +4055,7 @@ var EmbeddedServer = class {
3739
4055
  const twiml = `<?xml version="1.0" encoding="UTF-8"?><Response><Connect><Stream url="${xmlStreamUrl}"><Parameter name="caller" value="${xmlEscape(caller)}"/><Parameter name="callee" value="${xmlEscape(callee)}"/></Stream></Connect></Response>`;
3740
4056
  res.type("text/xml").send(twiml);
3741
4057
  });
3742
- app.post("/webhooks/telnyx/voice", (req, res) => {
4058
+ app.post("/webhooks/telnyx/voice", async (req, res) => {
3743
4059
  if (this.config.telnyxPublicKey) {
3744
4060
  const rawBody = req.rawBody ?? "";
3745
4061
  const signature = req.headers["telnyx-signature-ed25519"] ?? "";
@@ -3748,7 +4064,8 @@ var EmbeddedServer = class {
3748
4064
  getLogger().warn("Telnyx webhook rejected: invalid or missing Ed25519 signature");
3749
4065
  return res.status(403).send("Invalid signature");
3750
4066
  }
3751
- } else {
4067
+ } else if (!this.telnyxSigWarningLogged) {
4068
+ this.telnyxSigWarningLogged = true;
3752
4069
  getLogger().warn("Telnyx webhook signature verification is disabled. Set telnyxPublicKey in LocalOptions for production use.");
3753
4070
  }
3754
4071
  const body = req.body;
@@ -3758,41 +4075,77 @@ var EmbeddedServer = class {
3758
4075
  if (typeof body.data.event_type !== "string" || typeof body.data.payload !== "object" || body.data.payload === null) {
3759
4076
  return res.status(400).send("Invalid body");
3760
4077
  }
3761
- const eventType = body?.data?.event_type ?? "";
4078
+ const eventType = body.data.event_type ?? "";
4079
+ const payload = body.data.payload ?? {};
3762
4080
  if (eventType === "call.dtmf.received") {
3763
- const digit = String(body.data?.payload?.digit ?? "").trim();
4081
+ const digit = String(payload.digit ?? "").trim();
3764
4082
  if (digit) {
3765
4083
  getLogger().info(`Telnyx DTMF received (webhook): ${sanitizeLogValue(digit)}`);
3766
4084
  }
3767
- return res.json({ received: true });
4085
+ return res.status(200).send();
3768
4086
  }
3769
4087
  if (eventType === "call.recording.saved") {
3770
- const recordingUrl = body.data?.payload?.recording_urls?.mp3 ?? body.data?.payload?.recording_urls?.wav ?? body.data?.payload?.public_recording_urls?.mp3 ?? "";
4088
+ const recordingUrl = payload.recording_urls?.mp3 ?? payload.recording_urls?.wav ?? payload.public_recording_urls?.mp3 ?? "";
3771
4089
  if (recordingUrl) {
3772
4090
  getLogger().info(`Telnyx recording saved (webhook): ${sanitizeLogValue(recordingUrl)}`);
3773
4091
  }
3774
- return res.json({ received: true });
4092
+ return res.status(200).send();
3775
4093
  }
3776
- if (eventType === "call.initiated") {
3777
- const payload = body?.data?.payload ?? {};
3778
- const callControlId = payload.call_control_id ?? "";
3779
- const caller = payload.from ?? "";
3780
- const callee = payload.to ?? "";
3781
- const streamUrl = `wss://${this.config.webhookUrl}/ws/stream/${encodeURIComponent(callControlId)}?caller=${encodeURIComponent(caller)}&callee=${encodeURIComponent(callee)}`;
3782
- const commands = [
3783
- { command: "answer" },
3784
- {
3785
- command: "stream_start",
3786
- params: {
4094
+ const callControlId = payload.call_control_id ?? "";
4095
+ if (!callControlId) {
4096
+ getLogger().warn("Telnyx webhook rejected: missing call_control_id");
4097
+ return res.status(400).send("Invalid webhook payload");
4098
+ }
4099
+ const apiKey = this.config.telnyxKey;
4100
+ if (!apiKey) {
4101
+ getLogger().warn("Telnyx webhook: missing telnyxKey in LocalOptions");
4102
+ return res.status(500).send("Missing Telnyx API key");
4103
+ }
4104
+ const apiBase = "https://api.telnyx.com/v2";
4105
+ const authHeaders = {
4106
+ "Content-Type": "application/json",
4107
+ Authorization: `Bearer ${apiKey}`
4108
+ };
4109
+ try {
4110
+ if (eventType === "call.initiated") {
4111
+ getLogger().info(`Telnyx call.initiated ${callControlId} \u2014 answering`);
4112
+ const resp = await fetch(`${apiBase}/calls/${encodeURIComponent(callControlId)}/actions/answer`, {
4113
+ method: "POST",
4114
+ headers: authHeaders,
4115
+ body: JSON.stringify({}),
4116
+ signal: AbortSignal.timeout(1e4)
4117
+ });
4118
+ if (!resp.ok) {
4119
+ getLogger().warn(`Telnyx answer failed: ${resp.status} ${(await resp.text()).slice(0, 200)}`);
4120
+ }
4121
+ } else if (eventType === "call.answered") {
4122
+ const caller = payload.from ?? "";
4123
+ const callee = payload.to ?? "";
4124
+ const streamUrl = `wss://${this.config.webhookUrl}/ws/stream/${encodeURIComponent(callControlId)}?caller=${encodeURIComponent(caller)}&callee=${encodeURIComponent(callee)}`;
4125
+ getLogger().info(`Telnyx call.answered ${callControlId} \u2014 starting stream`);
4126
+ const resp = await fetch(`${apiBase}/calls/${encodeURIComponent(callControlId)}/actions/streaming_start`, {
4127
+ method: "POST",
4128
+ headers: authHeaders,
4129
+ body: JSON.stringify({
3787
4130
  stream_url: streamUrl,
3788
- stream_track: "both_tracks"
3789
- }
4131
+ stream_track: "both_tracks",
4132
+ stream_bidirectional_mode: "rtp",
4133
+ stream_bidirectional_codec: "PCMU",
4134
+ stream_bidirectional_sampling_rate: 8e3,
4135
+ stream_bidirectional_target_legs: "self"
4136
+ }),
4137
+ signal: AbortSignal.timeout(1e4)
4138
+ });
4139
+ if (!resp.ok) {
4140
+ getLogger().warn(`Telnyx streaming_start failed: ${resp.status} ${(await resp.text()).slice(0, 200)}`);
3790
4141
  }
3791
- ];
3792
- res.json({ commands });
3793
- } else {
3794
- res.json({ received: true });
4142
+ } else {
4143
+ getLogger().debug(`Telnyx event ignored: ${eventType}`);
4144
+ }
4145
+ } catch (e) {
4146
+ getLogger().error(`Telnyx webhook handler error: ${String(e)}`);
3795
4147
  }
4148
+ return res.status(200).send();
3796
4149
  });
3797
4150
  this.server = createServer(app);
3798
4151
  this.wss = new WebSocketServer({ noServer: true });
@@ -3939,11 +4292,12 @@ Connect AI agents to phone numbers in 4 lines of code
3939
4292
  getLogger().error("Failed to parse Telnyx WS message:", e);
3940
4293
  return;
3941
4294
  }
3942
- const eventType = data.event_type ?? "";
3943
- getLogger().info(`Telnyx event: ${eventType}`);
3944
- if (eventType === "stream_started" && !streamStarted) {
4295
+ const event = data.event ?? "";
4296
+ if (event === "connected") return;
4297
+ getLogger().info(`Telnyx event: ${event}`);
4298
+ if (event === "start" && !streamStarted) {
3945
4299
  streamStarted = true;
3946
- const callControlId = data.payload?.call_control_id ?? "";
4300
+ const callControlId = data.start?.call_control_id ?? "";
3947
4301
  if (callControlId) this.activeCallIds.set(ws, callControlId);
3948
4302
  await handler.handleCallStart(callControlId);
3949
4303
  if (this.recording) {
@@ -3953,22 +4307,21 @@ Connect AI agents to phone numbers in 4 lines of code
3953
4307
  getLogger().warn(`Could not start recording: ${String(e)}`);
3954
4308
  }
3955
4309
  }
3956
- } else if (eventType === "media") {
3957
- const audioChunk = data.payload?.audio?.chunk ?? "";
4310
+ } else if (event === "media") {
4311
+ const track = data.media?.track ?? "inbound";
4312
+ if (track !== "inbound") return;
4313
+ const audioChunk = data.media?.payload ?? "";
3958
4314
  if (!audioChunk) return;
3959
4315
  handler.handleAudio(Buffer.from(audioChunk, "base64"));
3960
- } else if (eventType === "call.dtmf.received") {
3961
- const digit = String(data.payload?.digit ?? "").trim();
4316
+ } else if (event === "dtmf") {
4317
+ const digit = String(data.dtmf?.digit ?? "").trim();
3962
4318
  if (digit) {
3963
4319
  getLogger().info(`Telnyx DTMF received: ${digit}`);
3964
4320
  await handler.handleDtmf(digit);
3965
4321
  }
3966
- } else if (eventType === "call.recording.saved") {
3967
- const recordingUrl = data.payload?.recording_urls?.mp3 ?? data.payload?.recording_urls?.wav ?? data.payload?.public_recording_urls?.mp3 ?? "";
3968
- if (recordingUrl) {
3969
- getLogger().info(`Telnyx recording saved: ${recordingUrl}`);
3970
- }
3971
- } else if (eventType === "stream_stopped") {
4322
+ } else if (event === "error") {
4323
+ getLogger().warn(`Telnyx stream error: ${JSON.stringify(data)}`);
4324
+ } else if (event === "stop") {
3972
4325
  await handler.handleStop();
3973
4326
  }
3974
4327
  } catch (err) {