npm - getpatter - Versions diffs - 0.6.5 → 0.6.7 - Mend

getpatter 0.6.5 → 0.6.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/dist/{chunk-CRPJLVHB.mjs → chunk-YJX2EKON.mjs} +649 -80
package/dist/cli.js +501 -2
package/dist/index.d.mts +233 -10
package/dist/index.d.ts +233 -10
package/dist/index.js +1591 -198
package/dist/index.mjs +883 -78
package/dist/{test-mode-HGHI2AUV.mjs → test-mode-XFOADUNE.mjs} +1 -1
package/package.json +1 -1

package/dist/{chunk-CRPJLVHB.mjs → chunk-YJX2EKON.mjs} RENAMED Viewed

@@ -29,6 +29,105 @@ import express from "express";
 import { createServer } from "http";
 import { WebSocketServer } from "ws";
+// src/telemetry/call-metrics.ts
+init_esm_shims();
+function engineFromMode(mode) {
+  if (mode === "openai_realtime" || mode === "openai_realtime_2") return "realtime";
+  if (mode === "elevenlabs_convai") return "convai";
+  if (mode === "pipeline") return "pipeline";
+  return "other";
+}
+function providerFromMetrics(m) {
+  const mode = m.provider_mode;
+  if (mode === "openai_realtime" || mode === "openai_realtime_2") return "openai";
+  if (mode === "elevenlabs_convai") return "elevenlabs";
+  for (const key of ["llm_provider", "stt_provider", "tts_provider"]) {
+    const v = m[key];
+    if (typeof v === "string" && v) return v.toLowerCase();
+  }
+  return "other";
+}
+function providerFromMode(mode) {
+  if (mode === "openai_realtime" || mode === "openai_realtime_2") return "openai";
+  if (mode === "elevenlabs_convai") return "elevenlabs";
+  return "other";
+}
+function carrierFamily(tp) {
+  return typeof tp === "string" && tp ? tp.toLowerCase() : "none";
+}
+function direction(value) {
+  const v = typeof value === "string" ? value.toLowerCase() : "";
+  return v === "inbound" || v === "outbound" ? v : void 0;
+}
+function turnCountBucket(n) {
+  if (n <= 0) return "0";
+  if (n === 1) return "1";
+  if (n <= 3) return "2_3";
+  if (n <= 6) return "4_6";
+  if (n <= 12) return "7_12";
+  return "13_plus";
+}
+function latencyMs(m) {
+  const p95 = m.latency_p95;
+  if (p95 && typeof p95 === "object") {
+    return p95.agent_response_ms;
+  }
+  return void 0;
+}
+function recordCallStarted(telemetry, opts) {
+  if (!telemetry) return;
+  try {
+    const dims = {
+      engine: engineFromMode(opts.providerMode),
+      provider: providerFromMode(opts.providerMode),
+      carrier: carrierFamily(opts.telephonyProvider)
+    };
+    const d = direction(opts.direction);
+    if (d !== void 0) dims.direction = d;
+    telemetry.record("call_started", dims);
+  } catch {
+  }
+}
+function recordCallCompleted(telemetry, opts) {
+  if (!telemetry) return;
+  try {
+    const dims = { outcome: opts.outcome };
+    const d = direction(opts.direction);
+    if (d !== void 0) dims.direction = d;
+    const metrics = opts.metrics;
+    if (metrics && typeof metrics === "object") {
+      const m = metrics;
+      dims.engine = engineFromMode(m.provider_mode);
+      dims.provider = providerFromMetrics(m);
+      dims.carrier = carrierFamily(m.telephony_provider);
+      if (typeof m.duration_seconds === "number") {
+        dims.duration_seconds = Math.max(0, Math.round(m.duration_seconds));
+      }
+      const lat = latencyMs(m);
+      if (typeof lat === "number") dims.latency_ms = Math.max(0, Math.round(lat));
+      const cost = m.cost;
+      if (cost && typeof cost === "object") {
+        const total = cost.total;
+        if (typeof total === "number" && Number.isFinite(total)) {
+          dims.cost_usd = Math.max(0, Math.round(total * 1e4) / 1e4);
+        }
+      }
+      if (Array.isArray(m.turns)) {
+        dims.turn_count_bucket = turnCountBucket(m.turns.length);
+      }
+      const errorCode = m.error_code;
+      if (typeof errorCode === "string" && errorCode) {
+        dims.error_code = errorCode;
+        dims.outcome = "error";
+      }
+    } else if (opts.carrier !== void 0) {
+      dims.carrier = carrierFamily(opts.carrier);
+    }
+    telemetry.record("call_completed", dims);
+  } catch {
+  }
+}
 // src/providers/elevenlabs-convai.ts
 init_esm_shims();
 import WebSocket from "ws";
@@ -2826,6 +2925,9 @@ var CallMetricsAccumulator = class {
   ttsModel;
   realtimeModel;
   _pricing;
+  // Terminal error code (lowercased ErrorCode value or "other"); set by
+  // recordError when the call ends abnormally. Empty for a clean call.
+  _errorCode = "";
   _callStart;
   _turns = [];
   // mutable internal array; immutable when exposed via TurnMetrics[] → readonly TurnMetrics[]
@@ -3396,11 +3498,35 @@ var CallMetricsAccumulator = class {
       telephony_provider: this.telephonyProvider,
       stt_model: this.sttModel,
       tts_model: this.ttsModel,
-      llm_model: this._llmModel
+      llm_model: this._llmModel,
+      error_code: this._errorCode
     };
     this._eventBus?.emit("call_ended", { callId: this.callId, metrics });
     return metrics;
   }
+  /**
+   * Record the call's terminal error as a coarse, anonymous code. Stores the
+   * PatterError `.code` lowercased; maps common timeout/connection errors; falls
+   * back to "other". Never stores the message. Last write wins.
+   */
+  recordError(err) {
+    const code = err?.code;
+    const name = err?.name;
+    const sys = typeof code === "string" ? code : "";
+    if (sys.startsWith("ECONN") || sys === "EHOSTUNREACH" || sys === "ENETUNREACH" || sys === "EPIPE") {
+      this._errorCode = "connection";
+      return;
+    }
+    if (typeof code === "string" && code) {
+      this._errorCode = code.toLowerCase();
+      return;
+    }
+    if (name === "TimeoutError" || name === "AbortError") {
+      this._errorCode = "timeout";
+    } else {
+      this._errorCode = "other";
+    }
+  }
   /** Return the cost breakdown for the call so far without ending it. */
   getCostSoFar() {
     const duration = (hrTimeMs() - this._callStart) / 1e3;
@@ -4879,6 +5005,28 @@ function isSttHallucination(text) {
   const pieces = stripped.split(/[.!?…。！？]+/u).map((p) => p.trim()).filter((p) => p.length > 0);
   return pieces.length > 1 && pieces.every((p) => HALLUCINATIONS.has(p));
 }
+var ECHO_WORD_OVERLAP_THRESHOLD = 0.6;
+var ECHO_MIN_CANDIDATE_WORDS = 4;
+function normalizeForEcho(text) {
+  return text.toLowerCase().replace(/[^\p{L}\p{N}\s]/gu, " ").replace(/\s+/u, " ").trim().replace(/\s+/gu, " ");
+}
+function looksLikeEcho(candidate, agentText) {
+  const a = normalizeForEcho(agentText);
+  const c = normalizeForEcho(candidate);
+  if (!a || !c) return false;
+  const words = c.split(" ").filter(Boolean);
+  if (words.length < ECHO_MIN_CANDIDATE_WORDS) return false;
+  if (a.includes(c)) return true;
+  const agentWords = new Set(a.split(" "));
+  const overlap = words.filter((w) => agentWords.has(w)).length / words.length;
+  return overlap >= ECHO_WORD_OVERLAP_THRESHOLD;
+}
+function isNearDuplicate(a, b) {
+  if (!a || !b) return false;
+  if (a === b) return true;
+  const [shorter, longer] = a.length <= b.length ? [a, b] : [b, a];
+  return longer.startsWith(shorter + " ");
+}
 var StreamHandler = class _StreamHandler {
   deps;
   ws;
@@ -4891,6 +5039,17 @@ var StreamHandler = class _StreamHandler {
   stt = null;
   tts = null;
   isSpeaking = false;
+  /**
+   * True only while the post-TTS tail-grace window is pending: the agent has
+   * finished its turn but ``isSpeaking`` is still held for
+   * ``PATTER_TTS_TAIL_GRACE_MS`` to swallow the fading echo tail. A VAD
+   * ``speech_start`` (or a transcript) during this window is the user's NEXT
+   * turn, not a barge-in — there is nothing left to interrupt. Set by
+   * ``endSpeakingWithGrace``; cleared by ``beginSpeaking``, the grace flip,
+   * ``cancelSpeaking``, and ``endTailGraceForNewTurn``. Parity with Python
+   * ``_tail_grace_active``.
+   */
+  tailGraceActive = false;
   /**
    * Ring buffer of inbound PCM16 16 kHz frames captured while the agent
    * is speaking and the self-hearing guard is dropping audio. On
@@ -4966,6 +5125,35 @@ var StreamHandler = class _StreamHandler {
    * ``isSpeaking=false``, and silently cut the agent's first turn.
    */
   firstAudioSentAt = null;
+  /**
+   * Estimated wall-clock (ms) when the LAST audio byte pushed to the carrier
+   * finishes PLAYING on the phone. The pipeline pushes TTS audio as fast as
+   * the provider synthesizes it (no pacing) and the carrier buffers + plays
+   * at realtime, so "we finished pushing" and "the caller finished hearing"
+   * can diverge by tens of seconds — especially with agent-runtime LLMs
+   * (Hermes/OpenClaw) that deliver a long reply all at once after a thinking
+   * pause. ``endSpeakingWithGrace`` holds ``isSpeaking=true`` (with
+   * ``tailGraceActive=false``) until this cursor passes, so a barge-in during
+   * the audible backlog still takes the cancel path (``sendClear`` drops the
+   * carrier buffer) instead of being treated as a calm next turn. Advanced by
+   * ``trackOutboundPlayback``; reset by ``cancelSpeaking`` (the buffer is
+   * cleared) and ``endTailGraceForNewTurn``.
+   */
+  playbackBufferedUntil = 0;
+  /**
+   * Per-turn playback timeline used to estimate the response prefix the
+   * caller actually HEARD when a barge-in lands. ``turnPlaybackTotalMs``
+   * accumulates the playout duration of every chunk pushed this turn
+   * (including filler audio, which keeps the timeline aligned);
+   * ``turnSpokenSegments`` records ``{text, startMs}`` for each RESPONSE
+   * sentence at its first audible chunk (filler / error-fallback audio
+   * advances the clock but adds no segment). ``heard = total - backlog``
+   * then maps to a sentence-granular prefix — see ``heardResponsePrefix``.
+   * Both reset at ``beginSpeaking``. Mirrors Python
+   * ``_turn_playback_total_s`` / ``_turn_spoken_segments``.
+   */
+  turnPlaybackTotalMs = 0;
+  turnSpokenSegments = [];
   /**
    * Optional barge-in confirmation strategies. With an empty array the
    * SDK falls back to the legacy "cancel on first VAD speech_start"
@@ -5083,11 +5271,15 @@ var StreamHandler = class _StreamHandler {
     }
     this.speakingGeneration++;
     this.isSpeaking = true;
+    this.tailGraceActive = false;
     this.speakingStartedAt = Date.now();
     this.suppressedSpeechPending = false;
     void isFirstMessage;
     this.firstAudioSentAt = Date.now();
     this.inboundAudioRing = [];
+    this.currentAgentSpokenText = "";
+    this.turnPlaybackTotalMs = 0;
+    this.turnSpokenSegments = [];
     this.resetVad();
   }
   /**
@@ -5102,6 +5294,87 @@ var StreamHandler = class _StreamHandler {
       this.firstAudioSentAt = Date.now();
     }
   }
+  /**
+   * Advance ``playbackBufferedUntil`` by the playout duration of an outbound
+   * TTS chunk. ``numBytes`` is the size of the chunk BEFORE carrier encoding
+   * (the same buffer handed to ``encodePipelineAudio``): PCM16 @ 16 kHz in
+   * the default path (32 bytes/ms), or the carrier's native μ-law @ 8 kHz
+   * (8 bytes/ms) when the TTS adapter emits wire format directly
+   * (``ttsOutputFormatNativeForCarrier`` — Twilio/Plivo ``ulaw_8000``;
+   * Telnyx native is ``pcm_16000`` so it stays at 32 bytes/ms).
+   */
+  trackOutboundPlayback(numBytes) {
+    if (numBytes <= 0) return;
+    const bytesPerMs = this.ttsOutputFormatNativeForCarrier && this.deps.bridge.telephonyProvider !== "telnyx" ? 8 : 32;
+    const now = Date.now();
+    const chunkMs = numBytes / bytesPerMs;
+    const base = this.playbackBufferedUntil > now ? this.playbackBufferedUntil : now;
+    this.playbackBufferedUntil = base + chunkMs;
+    this.turnPlaybackTotalMs += chunkMs;
+  }
+  /**
+   * Estimate the response prefix the caller actually HEARD this turn.
+   *
+   * The pipeline pushes audio faster than realtime, so at barge-in time
+   * ``heard = totalPushed - carrierBacklog`` ms of audio have actually
+   * played. Mapped at sentence granularity against ``turnSpokenSegments``:
+   * a sentence counts as heard once its playback has STARTED
+   * (``startMs <= heardMs``), so the sentence playing at the moment of
+   * interruption is included.
+   *
+   * Returns ``null`` when no segments were tracked this turn (nothing
+   * synthesized through the tracked path — callers fall back to the legacy
+   * full-text behaviour). Mirrors Python ``_heard_response_prefix``.
+   */
+  heardResponsePrefix() {
+    if (this.turnSpokenSegments.length === 0) return null;
+    const remainingMs = Math.max(0, this.playbackBufferedUntil - Date.now());
+    const heardMs = Math.max(0, this.turnPlaybackTotalMs - remainingMs);
+    const heard = this.turnSpokenSegments.filter((s) => s.startMs <= heardMs);
+    return {
+      text: heard.map((s) => s.text).join(" "),
+      heardEverything: heard.length === this.turnSpokenSegments.length
+    };
+  }
+  /**
+   * Replace the text of the most recent assistant entry in the conversation
+   * history. No-op when the last entry is not an assistant turn (e.g. the
+   * caller's next turn was already committed).
+   */
+  rewriteLastAssistantEntry(text) {
+    const entries = this.history.entries;
+    const last = entries[entries.length - 1];
+    if (last && last.role === "assistant") {
+      entries[entries.length - 1] = { ...last, text };
+    }
+  }
+  /**
+   * LiveKit-style "heard prefix" semantics for a barge-in that lands AFTER
+   * the turn completed, while the carrier is still playing the buffered
+   * tail.
+   *
+   * The completed turn already recorded its FULL reply in history, but the
+   * caller only heard part of it before interrupting — a stateful agent
+   * runtime (Hermes / OpenClaw) would otherwise "remember saying" things
+   * the caller never heard. Rewrites the last assistant entry to the heard
+   * prefix + ``[interrupted by caller]``.
+   *
+   * MUST run BEFORE ``cancelSpeaking`` resets ``playbackBufferedUntil``
+   * (the backlog is the heard-prefix input). No-op when a turn is still in
+   * flight (the streaming path applies its own marker), when there is no
+   * backlog, or when everything was already heard. Mirrors Python
+   * ``_maybe_truncate_completed_turn_history``.
+   */
+  maybeTruncateCompletedTurnHistory() {
+    if (this.dispatchTask !== null) return;
+    const remainingMs = this.playbackBufferedUntil - Date.now();
+    if (remainingMs <= 0) return;
+    const heard = this.heardResponsePrefix();
+    if (heard === null || heard.heardEverything) return;
+    this.rewriteLastAssistantEntry(
+      heard.text ? `${heard.text} [interrupted by caller]` : "[interrupted by caller]"
+    );
+  }
   /**
    * Atomically end speaking AND invalidate any pending grace timer.
    * Use instead of ``this.isSpeaking = false`` at barge-in sites.
@@ -5112,10 +5385,12 @@ var StreamHandler = class _StreamHandler {
   cancelSpeaking() {
     this.speakingGeneration++;
     this.isSpeaking = false;
+    this.tailGraceActive = false;
     this.speakingStartedAt = null;
     this.firstAudioSentAt = null;
     this.lastCancelAt = Date.now();
     this.suppressedSpeechPending = false;
+    this.playbackBufferedUntil = 0;
     this.drainPendingMarks();
     if (this.llmAbort !== null) {
       try {
@@ -5188,23 +5463,37 @@ var StreamHandler = class _StreamHandler {
     if (grace > 0) {
       const gen = this.speakingGeneration;
       this.clearGraceTimer();
-      this.graceTimer = setTimeout(() => {
-        this.graceTimer = null;
-        if (this.speakingGeneration === gen) {
-          this.isSpeaking = false;
-          this.speakingStartedAt = null;
-          this.firstAudioSentAt = null;
-          this.clearPendingBargeIn();
-          void this.resetBargeInStrategies();
-          if (this.suppressedSpeechPending) {
-            this.suppressedSpeechPending = false;
-            this.flushInboundAudioRing();
+      const startTailGrace = () => {
+        this.tailGraceActive = true;
+        this.graceTimer = setTimeout(() => {
+          this.graceTimer = null;
+          if (this.speakingGeneration === gen) {
+            this.isSpeaking = false;
+            this.tailGraceActive = false;
+            this.speakingStartedAt = null;
+            this.firstAudioSentAt = null;
+            this.clearPendingBargeIn();
+            void this.resetBargeInStrategies();
+            if (this.suppressedSpeechPending) {
+              this.suppressedSpeechPending = false;
+              this.flushInboundAudioRing();
+            }
+            this.resetVad();
           }
-          this.resetVad();
-        }
-      }, grace);
+        }, grace);
+      };
+      const bufferedMs = Math.max(0, this.playbackBufferedUntil - Date.now());
+      if (bufferedMs <= 0) {
+        startTailGrace();
+      } else {
+        this.graceTimer = setTimeout(() => {
+          this.graceTimer = null;
+          if (this.speakingGeneration === gen) startTailGrace();
+        }, bufferedMs);
+      }
     } else {
       this.isSpeaking = false;
+      this.tailGraceActive = false;
       this.speakingStartedAt = null;
       this.firstAudioSentAt = null;
       this.clearPendingBargeIn();
@@ -5216,6 +5505,35 @@ var StreamHandler = class _StreamHandler {
       this.resetVad();
     }
   }
+  /**
+   * End the post-TTS tail-grace window because the user has begun their next
+   * turn. Unlike a barge-in, the agent's response already played out in full
+   * — there is nothing to cancel and no turn was interrupted. We flip the
+   * speaking flag off (bumping ``speakingGeneration`` so the scheduled grace
+   * timer no-ops), recover any leading audio the self-hearing guard captured
+   * into the ring (the user's first ~250 ms, which VAD needed before it could
+   * emit ``speech_start``), and let the live STT stream take over. We do NOT
+   * call ``sendClear``, ``recordBargeinDetected`` or ``recordTurnInterrupted``
+   * — none apply to a turn that completed normally.
+   *
+   * Without this, fast next-turn speech (humans reply in 200-700 ms, well
+   * inside the 1500 ms default grace) is withheld from STT and recorded as an
+   * empty ``[interrupted]`` turn, after which the agent goes silent for the
+   * rest of the call. Parity with Python ``_end_tail_grace_for_new_turn``.
+   */
+  endTailGraceForNewTurn() {
+    this.isSpeaking = false;
+    this.tailGraceActive = false;
+    this.speakingStartedAt = null;
+    this.firstAudioSentAt = null;
+    this.playbackBufferedUntil = 0;
+    this.speakingGeneration++;
+    this.clearGraceTimer();
+    this.clearPendingBargeIn();
+    void this.resetBargeInStrategies();
+    this.suppressedSpeechPending = false;
+    this.flushInboundAudioRing();
+  }
   async resetBargeInStrategies() {
     if (this.bargeInStrategies.length === 0) return;
     const { resetStrategies } = await import("./barge-in-strategies-X6ARMGIQ.mjs");
@@ -5351,9 +5669,43 @@ var StreamHandler = class _StreamHandler {
   maxDurationTimer = null;
   transcriptProcessing = false;
   transcriptQueue = [];
+  /**
+   * The in-flight turn dispatch (LLM + TTS) runs as a SINGLE tracked promise
+   * so the transcript drain loop keeps running ``handleBargeIn`` against the
+   * LIVE turn during a long (30-90 s) agent-runtime response, instead of
+   * head-of-line-blocking on it. Exactly one is in flight: the launcher awaits
+   * the previous one to settle (fast — a barge-in already aborted it) before
+   * starting the next, preserving history/metrics ordering. Parity with
+   * Python ``_dispatch_task``.
+   */
+  dispatchTask = null;
+  /**
+   * Cap (ms) on how long teardown waits for the backgrounded dispatch to
+   * settle. JS promises are not cancellable, so a user-supplied ``onMessage``
+   * (which receives no AbortSignal) parked on a hung external call could block
+   * call cleanup indefinitely — `llmAbort.abort()` only unblocks the built-in
+   * LLM/TTS paths. We bound the WAIT (Python hard-cancels the task instead).
+   * 30 s matches the webhook ceiling.
+   */
+  static DISPATCH_SETTLE_TIMEOUT_MS = 3e4;
+  /**
+   * Opt-in (default OFF): forward inbound audio to STT even while the agent is
+   * speaking, so the transcript barge-in path can receive a transcript on
+   * echo-masked PSTN links where the VAD never fires. ECHO RISK without AEC.
+   * Parity with Python ``_forward_stt_while_speaking``.
+   */
+  forwardSttWhileSpeaking = ["1", "true", "yes"].includes(
+    (process.env.PATTER_FORWARD_STT_WHILE_SPEAKING ?? "").trim().toLowerCase()
+  );
   // Throttle state for back-to-back STT finals — see ``commitTranscript``.
   lastCommitText = "";
   lastCommitAt = 0;
+  /** The agent's spoken text for the CURRENT turn, accumulated as tokens stream.
+   * The echo guard rejects transcripts matching it (the agent's own TTS bleeding
+   * back into STT when audio is forwarded during TTS without effective AEC).
+   * Reset in ``beginSpeaking``; only consulted while ``forwardSttWhileSpeaking``.
+   * Parity with Python ``_current_agent_spoken_text``. */
+  currentAgentSpokenText = "";
   // PCM16 byte-alignment carry for TTS streaming (pipeline mode).
   // HTTP streams from ElevenLabs / OpenAI / Cartesia can yield chunks of any
   // size, including odd byte counts. Silently dropping the trailing odd byte
@@ -5373,6 +5725,11 @@ var StreamHandler = class _StreamHandler {
     this.ws = ws;
     this.caller = caller;
     this.callee = callee;
+    if (this.forwardSttWhileSpeaking) {
+      getLogger().warn(
+        "PATTER_FORWARD_STT_WHILE_SPEAKING=on: inbound audio is sent to STT during TTS so transcript barge-in works on echo-masked links. Without AEC the agent's own voice may be transcribed as a phantom interruption \u2014 pair with agent.bargeInStrategies."
+      );
+    }
     this.bargeInStrategies = (deps.agent.bargeInStrategies ?? []).slice();
     const confirmMs = deps.agent.bargeInConfirmMs;
     this.bargeInConfirmMs = typeof confirmMs === "number" && Number.isFinite(confirmMs) && confirmMs > 0 ? confirmMs : 1500;
@@ -5572,12 +5929,12 @@ var StreamHandler = class _StreamHandler {
     } catch {
     }
     if (this.deps.onCallStart) {
-      const direction = this.deps.metricsStore.getActive(callId)?.direction ?? "inbound";
+      const direction2 = this.deps.metricsStore.getActive(callId)?.direction ?? "inbound";
       await this.deps.onCallStart({
         call_id: callId,
         caller: this.caller,
         callee: this.callee,
-        direction,
+        direction: direction2,
         telephony_provider: this.deps.bridge.telephonyProvider,
         ...Object.keys(customParams).length > 0 ? { custom_params: customParams } : {}
       });
@@ -5644,6 +6001,17 @@ var StreamHandler = class _StreamHandler {
   setStreamSid(sid) {
     this.streamSid = sid;
   }
+  /**
+   * Record a terminal/processing error as a coarse, anonymous code on the call
+   * metrics (code only, never the message). Surfaced via `call_completed`
+   * telemetry. Safe to call with any value; last write wins.
+   */
+  recordError(err) {
+    try {
+      this.metricsAcc.recordError(err);
+    } catch {
+    }
+  }
   /** Handle an incoming audio chunk (already decoded from base64). */
   /** Forward inbound audio bytes to the AI adapter and (in pipeline mode) the STT provider. */
   async handleAudio(audioBuffer) {
@@ -5670,6 +6038,9 @@ var StreamHandler = class _StreamHandler {
             );
           }
           if (evt?.type === "speech_start") {
+            if (this.isSpeaking && this.tailGraceActive) {
+              this.endTailGraceForNewTurn();
+            }
             const phantomSuppressed = this.isSpeaking && !this.canBargeIn();
             if (phantomSuppressed) {
               getLogger().info(
@@ -5677,7 +6048,8 @@ var StreamHandler = class _StreamHandler {
               );
               this.suppressedSpeechPending = true;
             } else if (this.isSpeaking) {
-              if (this.bargeInStrategies.length > 0) {
+              const deferCancel = this.bargeInStrategies.length > 0 || this.forwardSttWhileSpeaking && !this.aec;
+              if (deferCancel) {
                 this.startPendingBargeIn();
                 this.metricsAcc.anchorUserSpeechStart();
                 return;
@@ -5687,6 +6059,7 @@ var StreamHandler = class _StreamHandler {
               this.metricsAcc.recordBargeinDetected();
               const bargeinSpan = startSpan(SPAN_BARGEIN, { "patter.call.id": this.callId });
               try {
+                this.maybeTruncateCompletedTurnHistory();
                 this.cancelSpeaking();
                 try {
                   this.deps.bridge.sendClear(this.ws, this.streamSid);
@@ -5731,9 +6104,10 @@ var StreamHandler = class _StreamHandler {
           if (this.inboundAudioRing.length > _StreamHandler.INBOUND_AUDIO_RING_FRAMES) {
             this.inboundAudioRing.shift();
           }
+          if (!this.forwardSttWhileSpeaking) return;
+        } else if ((this.deps.agent.bargeInThresholdMs ?? 300) === 0) {
           return;
         }
-        if ((this.deps.agent.bargeInThresholdMs ?? 300) === 0) return;
       }
       const hooks = this.deps.agent.hooks;
       if (hooks?.beforeSendToStt) {
@@ -5795,6 +6169,27 @@ var StreamHandler = class _StreamHandler {
       }
     }
   }
+  /**
+   * Await the backgrounded turn dispatch during teardown, but never block
+   * longer than ``DISPATCH_SETTLE_TIMEOUT_MS``. The earlier ``llmAbort.abort()``
+   * settles the built-in LLM/TTS paths immediately; the cap only bites a
+   * misbehaving user ``onMessage`` parked on a hung external call (JS promises
+   * can't be cancelled). No-op when nothing is in flight.
+   */
+  async settleDispatchForTeardown() {
+    if (!this.dispatchTask) return;
+    const settle = this.dispatchTask.catch(() => {
+    });
+    let timer;
+    const cap = new Promise((resolve2) => {
+      timer = setTimeout(resolve2, _StreamHandler.DISPATCH_SETTLE_TIMEOUT_MS);
+    });
+    try {
+      await Promise.race([settle, cap]);
+    } finally {
+      if (timer) clearTimeout(timer);
+    }
+  }
   /** Handle call stop / stream end. */
   /** Handle a carrier-emitted `stop` event signalling the call has ended. */
   async handleStop() {
@@ -5811,6 +6206,7 @@ var StreamHandler = class _StreamHandler {
       } catch {
       }
     }
+    await this.settleDispatchForTeardown();
     this.clearPendingBargeIn();
     this.drainPendingMarks();
     this.clearGraceTimer();
@@ -5838,6 +6234,7 @@ var StreamHandler = class _StreamHandler {
       } catch {
       }
     }
+    await this.settleDispatchForTeardown();
     this.clearPendingBargeIn();
     this.drainPendingMarks();
     this.clearGraceTimer();
@@ -6232,7 +6629,7 @@ var StreamHandler = class _StreamHandler {
     };
   }
   /** Synthesize a single sentence through TTS with hooks, sending audio to telephony. */
-  async synthesizeSentence(sentence, hookExecutor, hookCtx, ttsFirstByteSent) {
+  async synthesizeSentence(sentence, hookExecutor, hookCtx, ttsFirstByteSent, recordSegment = true) {
     if (!this.tts || !this.isSpeaking) return;
     let transformed = sentence;
     const transforms = this.deps.agent.textTransforms;
@@ -6258,8 +6655,16 @@ var StreamHandler = class _StreamHandler {
         if (this.aec) {
           this.aec.pushFarEnd(processedAudio);
         }
+        if (recordSegment) {
+          this.turnSpokenSegments.push({
+            text: processedText,
+            startMs: this.turnPlaybackTotalMs
+          });
+          recordSegment = false;
+        }
         const encoded = this.encodePipelineAudio(processedAudio);
         this.deps.bridge.sendAudio(this.ws, encoded, this.streamSid);
+        this.trackOutboundPlayback(processedAudio.length);
         this.markFirstAudioSent();
       }
     } catch (e) {
@@ -6334,64 +6739,101 @@ var StreamHandler = class _StreamHandler {
       return;
     }
     this.history.push({ role: "user", text: filteredTranscript, timestamp: Date.now() });
-    let responseText = "";
     this.metricsAcc.recordOnUserTurnCompletedDelay(0);
     this.metricsAcc.recordTurnCommitted();
     closeEndpointSpan();
-    if (this.deps.onMessage && typeof this.deps.onMessage === "function") {
-      try {
-        responseText = await this.deps.onMessage({
+    await this.dispatchTask?.catch(() => {
+    });
+    const historySnapshot = [...this.history.entries];
+    this.dispatchTask = this.dispatchTurn(
+      filteredTranscript,
+      hookExecutor,
+      hookCtx,
+      interrupted,
+      historySnapshot
+    );
+  }
+  /**
+   * Post-commit turn body (LLM dispatch → TTS → turn-complete) run as a
+   * tracked background task so the transcript drain loop is not blocked for
+   * the whole (possibly 30-90 s) agent-runtime turn. A barge-in — transcript
+   * (now reachable mid-turn) or VAD — aborts the in-flight ``llmAbort`` and
+   * flips ``isSpeaking``, which the LLM/TTS loops here observe and break on.
+   * Parity with Python ``_dispatch_turn``.
+   */
+  async dispatchTurn(filteredTranscript, hookExecutor, hookCtx, interrupted, historySnapshot) {
+    const label = this.deps.bridge.label;
+    let responseText = "";
+    try {
+      if (this.deps.onMessage && typeof this.deps.onMessage === "function") {
+        try {
+          responseText = await this.deps.onMessage({
+            text: filteredTranscript,
+            call_id: this.callId,
+            caller: this.caller,
+            callee: this.callee,
+            history: historySnapshot
+          });
+        } catch (e) {
+          getLogger().error(`onMessage error (${label}):`, e);
+          return;
+        }
+        if (!responseText) {
+          getLogger().warn(
+            `onMessage returned empty/void (${label}) \u2014 no TTS will play. If you intended to observe transcripts, use onTranscript instead; if you meant to answer via the built-in LLM, remove onMessage and pass openaiKey.`
+          );
+        }
+      } else if (this.deps.onMessage && isRemoteUrl(this.deps.onMessage)) {
+        const msgData = {
           text: filteredTranscript,
           call_id: this.callId,
           caller: this.caller,
           callee: this.callee,
-          history: [...this.history.entries]
-        });
-      } catch (e) {
-        getLogger().error(`onMessage error (${label}):`, e);
-        return;
-      }
-      if (!responseText) {
+          history: historySnapshot
+        };
+        if (isWebSocketUrl(this.deps.onMessage)) {
+          await this.handleWebSocketResponse(msgData);
+          return;
+        }
+        try {
+          responseText = await this.deps.remoteHandler.callWebhook(this.deps.onMessage, msgData);
+        } catch (e) {
+          getLogger().error(`Webhook remote error (${label}):`, e);
+          return;
+        }
+      } else if (this.llmLoop) {
+        const llmResult = await this.runPipelineLlm(
+          filteredTranscript,
+          hookExecutor,
+          hookCtx,
+          historySnapshot
+        );
+        responseText = llmResult.text;
+        interrupted = interrupted || llmResult.interrupted;
+      } else {
         getLogger().warn(
-          `onMessage returned empty/void (${label}) \u2014 no TTS will play. If you intended to observe transcripts, use onTranscript instead; if you meant to answer via the built-in LLM, remove onMessage and pass openaiKey.`
+          `Pipeline (${label}) has no llm/onMessage handler \u2014 transcript "${sanitizeLogValue(filteredTranscript.slice(0, 60))}" dropped. Check that agent.llm or onMessage is configured.`
         );
-      }
-    } else if (this.deps.onMessage && isRemoteUrl(this.deps.onMessage)) {
-      const msgData = {
-        text: filteredTranscript,
-        call_id: this.callId,
-        caller: this.caller,
-        callee: this.callee,
-        history: [...this.history.entries]
-      };
-      if (isWebSocketUrl(this.deps.onMessage)) {
-        await this.handleWebSocketResponse(msgData);
         return;
       }
-      try {
-        responseText = await this.deps.remoteHandler.callWebhook(this.deps.onMessage, msgData);
-      } catch (e) {
-        getLogger().error(`Webhook remote error (${label}):`, e);
-        return;
+      if (!responseText) return;
+      if (this.llmLoop) {
+        let spokenText = responseText;
+        if (interrupted) {
+          const heard = this.heardResponsePrefix();
+          spokenText = heard === null ? `${responseText} [interrupted by caller]` : heard.text ? `${heard.text} [interrupted by caller]` : "[interrupted by caller]";
+        }
+        await this.emitAssistantTranscript(spokenText);
+        if (!interrupted) this.metricsAcc.recordTtsComplete(responseText);
+      } else {
+        interrupted = await this.runRegularLlm(responseText, hookExecutor, hookCtx) || interrupted;
+        responseText = this.history.entries[this.history.entries.length - 1]?.text ?? responseText;
       }
-    } else if (this.llmLoop) {
-      responseText = await this.runPipelineLlm(filteredTranscript, hookExecutor, hookCtx);
-    } else {
-      getLogger().warn(
-        `Pipeline (${label}) has no llm/onMessage handler \u2014 transcript "${sanitizeLogValue(filteredTranscript.slice(0, 60))}" dropped. Check that agent.llm or onMessage is configured.`
-      );
-      return;
-    }
-    if (!responseText) return;
-    if (this.llmLoop) {
-      await this.emitAssistantTranscript(responseText);
-      this.metricsAcc.recordTtsComplete(responseText);
-    } else {
-      interrupted = await this.runRegularLlm(responseText, hookExecutor, hookCtx) || interrupted;
-      responseText = this.history.entries[this.history.entries.length - 1]?.text ?? responseText;
-    }
-    if (!interrupted) {
-      await this.emitTurnMetrics(this.metricsAcc.recordTurnComplete(responseText));
+      if (!interrupted) {
+        await this.emitTurnMetrics(this.metricsAcc.recordTurnComplete(responseText));
+      }
+    } finally {
+      this.dispatchTask = null;
     }
   }
   /**
@@ -6402,6 +6844,18 @@ var StreamHandler = class _StreamHandler {
    */
   async handleBargeInAsync(transcript) {
     if (!transcript.text || !this.isSpeaking) return false;
+    if (this.tailGraceActive) {
+      this.endTailGraceForNewTurn();
+      return false;
+    }
+    if (this.forwardSttWhileSpeaking && looksLikeEcho(transcript.text, this.currentAgentSpokenText)) {
+      getLogger().info(
+        `Barge-in suppressed: transcript matches agent's own speech (echo) \u2014 ${sanitizeLogValue(
+          transcript.text.slice(0, 40)
+        )}`
+      );
+      return false;
+    }
     if (!this.canBargeIn()) {
       getLogger().info(
         `Barge-in transcript suppressed (agent speaking < gate, aec=${this.aec ? "on" : "off"})`
@@ -6441,6 +6895,18 @@ var StreamHandler = class _StreamHandler {
    */
   handleBargeIn(transcript) {
     if (!transcript.text || !this.isSpeaking) return false;
+    if (this.tailGraceActive) {
+      this.endTailGraceForNewTurn();
+      return false;
+    }
+    if (this.forwardSttWhileSpeaking && looksLikeEcho(transcript.text, this.currentAgentSpokenText)) {
+      getLogger().info(
+        `Barge-in suppressed: transcript matches agent's own speech (echo) \u2014 ${sanitizeLogValue(
+          transcript.text.slice(0, 40)
+        )}`
+      );
+      return false;
+    }
     if (this.bargeInStrategies.length === 0) {
       if (!this.canBargeIn()) {
         getLogger().info(
@@ -6472,6 +6938,7 @@ var StreamHandler = class _StreamHandler {
     this.metricsAcc.recordBargeinDetected();
     const bargeinSpan = startSpan(SPAN_BARGEIN, { "patter.call.id": this.callId });
     try {
+      this.maybeTruncateCompletedTurnHistory();
       this.cancelSpeaking();
       try {
         this.deps.bridge.sendClear(this.ws, this.streamSid);
@@ -6535,15 +7002,21 @@ var StreamHandler = class _StreamHandler {
       getLogger().debug(`Dropped likely STT hallucination: ${sanitizeLogValue(normalised.slice(0, 40))}`);
       return false;
     }
+    if (this.forwardSttWhileSpeaking && this.isSpeaking && looksLikeEcho(text, this.currentAgentSpokenText)) {
+      getLogger().debug(
+        `Dropped agent-echo transcript (not a user turn): ${sanitizeLogValue(normalised.slice(0, 40))}`
+      );
+      return false;
+    }
     if (sinceLastMs < 2e3 && normalised === this.lastCommitText) {
       getLogger().debug(
         `Dropped duplicate final transcript (${(sinceLastMs / 1e3).toFixed(1)}s since last): ${sanitizeLogValue(normalised.slice(0, 40))}`
       );
       return false;
     }
-    if (sinceLastMs < 500) {
+    if (sinceLastMs < 500 && isNearDuplicate(normalised, this.lastCommitText)) {
       getLogger().debug(
-        `Dropped back-to-back final transcript (${(sinceLastMs / 1e3).toFixed(2)}s since last): ${sanitizeLogValue(normalised.slice(0, 40))}`
+        `Dropped back-to-back near-duplicate final (${(sinceLastMs / 1e3).toFixed(2)}s since last): ${sanitizeLogValue(normalised.slice(0, 40))}`
       );
       return false;
     }
@@ -6551,11 +7024,63 @@ var StreamHandler = class _StreamHandler {
     this.lastCommitAt = now;
     return true;
   }
+  /**
+   * Schedule the opt-in long-turn filler and return its async ``clear()``.
+   *
+   * When ``agent.longTurnMessage`` is unset / empty the returned clear is a
+   * no-op (byte-identical to today's behaviour). Otherwise a one-shot timer
+   * fires after ``agent.longTurnMessageAfterS`` seconds and, IFF no audio has
+   * reached the carrier this turn (``!ttsFirstByteSent.value``) AND we still own
+   * the floor (``this.isSpeaking``), synthesizes the filler ONCE via the same
+   * per-sentence TTS primitive every sentence uses.
+   *
+   * The returned ``clear()`` is **async**: it stops the timer AND, if the filler
+   * already started synthesizing (its ``setTimeout`` callback runs in a separate
+   * macro-task, so it can fire just before the first real sentence), AWAITS the
+   * in-flight synthesis so the filler audio can never interleave with the real
+   * sentence that follows. Idempotent; self-synthesis failure degrades to
+   * silence (never crashes the turn). The caller must clear on first real audio,
+   * on the error branch, and in the finally.
+   */
+  scheduleLongTurnFiller(ttsFirstByteSent, hookExecutor, hookCtx, label) {
+    const message = this.deps.agent.longTurnMessage;
+    if (!message) return async () => {
+    };
+    const afterS = this.deps.agent.longTurnMessageAfterS ?? 4;
+    let cancelled = false;
+    let inFlight = null;
+    const timer = setTimeout(() => {
+      if (cancelled || ttsFirstByteSent.value || !this.isSpeaking) return;
+      inFlight = this.synthesizeSentence(
+        message,
+        hookExecutor,
+        hookCtx,
+        ttsFirstByteSent,
+        false
+      ).catch((err) => {
+        getLogger().error(
+          `longTurnMessage filler synthesis failed (${label}):`,
+          err
+        );
+      });
+    }, Math.max(0, afterS * 1e3));
+    return async () => {
+      cancelled = true;
+      clearTimeout(timer);
+      if (inFlight !== null) {
+        const pending = inFlight;
+        inFlight = null;
+        await pending;
+      }
+    };
+  }
   /**
    * Streaming built-in LLM path with sentence chunking and per-sentence
-   * guardrails/TTS. Returns the concatenated response text.
+   * guardrails/TTS. Returns the concatenated (plain) response text plus whether
+   * the turn was cut short by a barge-in — the caller applies the interrupted
+   * marker to history only, keeping metrics on the plain text.
    */
-  async runPipelineLlm(filteredTranscript, hookExecutor, hookCtx) {
+  async runPipelineLlm(filteredTranscript, hookExecutor, hookCtx, historySnapshot) {
     const label = this.deps.bridge.label;
     const callCtx = { call_id: this.callId, caller: this.caller, callee: this.callee };
     const chunker = new SentenceChunker({
@@ -6568,6 +7093,12 @@ var StreamHandler = class _StreamHandler {
     this.llmAbort = new AbortController();
     const llmSignal = this.llmAbort.signal;
     let llmError = false;
+    const clearLongTurnFiller = this.scheduleLongTurnFiller(
+      ttsFirstByteSent,
+      hookExecutor,
+      hookCtx,
+      label
+    );
     const llmSpan = startSpan(SPAN_LLM, { "patter.call.id": this.callId });
     const guardAndSpeak = async (sentence, isFirst) => {
       if (isFirst) this.metricsAcc.recordLlmFirstSentenceComplete();
@@ -6578,6 +7109,7 @@ var StreamHandler = class _StreamHandler {
         if (transformed === null) return;
         sentenceText = transformed;
       }
+      await clearLongTurnFiller();
       await this.synthesizeSentence(sentenceText, hookExecutor, hookCtx, ttsFirstByteSent);
     };
     let firstSentenceEmitted = false;
@@ -6585,7 +7117,7 @@ var StreamHandler = class _StreamHandler {
       try {
         for await (const token of this.llmLoop.run(
           filteredTranscript,
-          this.history.entries,
+          historySnapshot,
           callCtx,
           this.metricsAcc,
           hookExecutor,
@@ -6596,6 +7128,7 @@ var StreamHandler = class _StreamHandler {
           this.metricsAcc.recordLlmFirstToken();
           await this.emitLlmFirstToken();
           allParts.push(token);
+          this.currentAgentSpokenText = allParts.join("");
           for (const sentence of chunker.push(token)) {
             if (!this.isSpeaking) break;
             await guardAndSpeak(sentence, !firstSentenceEmitted);
@@ -6605,6 +7138,7 @@ var StreamHandler = class _StreamHandler {
         }
       } catch (e) {
         const isAbort = e?.name === "AbortError" || llmSignal.aborted;
+        await clearLongTurnFiller();
         if (!isAbort) {
           llmError = true;
           chunker.reset();
@@ -6613,7 +7147,7 @@ var StreamHandler = class _StreamHandler {
           const fallback = this.deps.agent.llmErrorMessage;
           if (fallback && !ttsFirstByteSent.value && this.isSpeaking) {
             try {
-              await this.synthesizeSentence(fallback, hookExecutor, hookCtx, ttsFirstByteSent);
+              await this.synthesizeSentence(fallback, hookExecutor, hookCtx, ttsFirstByteSent, false);
             } catch (err) {
               getLogger().error(`llmErrorMessage fallback synthesis failed (${label}):`, err);
             }
@@ -6629,6 +7163,7 @@ var StreamHandler = class _StreamHandler {
         }
       }
     } finally {
+      await clearLongTurnFiller();
       this.endSpeakingWithGrace();
       this.llmAbort = null;
       try {
@@ -6636,7 +7171,7 @@ var StreamHandler = class _StreamHandler {
       } catch {
       }
     }
-    return allParts.join("");
+    return { text: allParts.join(""), interrupted: llmSignal.aborted };
   }
   /**
    * Non-streaming path (onMessage function / webhook): apply output guardrails,
@@ -7764,13 +8299,14 @@ function isLoopbackHost(value) {
   }
   return false;
 }
+var TELNYX_FUTURE_SKEW_MS = 3e4;
 function validateTelnyxSignature(rawBody, signature, timestamp, publicKey, toleranceSec = 300) {
   try {
     const ts = parseInt(timestamp, 10);
     if (!Number.isFinite(ts)) return false;
     const tsMs = ts < 1e12 ? ts * 1e3 : ts;
     const ageMs = Date.now() - tsMs;
-    if (ageMs < 0 || ageMs > toleranceSec * 1e3) return false;
+    if (ageMs > toleranceSec * 1e3 || ageMs < -TELNYX_FUTURE_SKEW_MS) return false;
     const payload = `${timestamp}|${rawBody}`;
     const keyBuffer = Buffer.from(publicKey, "base64");
     const keyObject = crypto5.createPublicKey({
@@ -7816,7 +8352,7 @@ function sanitizeVariables(raw) {
   for (const key of Object.keys(raw)) {
     if (BLOCKED_KEYS.has(key)) continue;
     const val = raw[key];
-    safe[key] = typeof val === "string" ? val : String(val ?? "");
+    safe[key] = (typeof val === "string" ? val : String(val ?? "")).replace(/[\x00-\x1f\x7f]/g, "").slice(0, 500);
   }
   return safe;
 }
@@ -8212,6 +8748,9 @@ var EmbeddedServer = class {
   twilioTokenWarningLogged = false;
   telnyxSigWarningLogged = false;
   metricsStore;
+  /** Anonymous telemetry client, set by ``client.ts`` ``serve()``; emits the
+   * per-call ``call_completed`` event from the call-end path. */
+  telemetry;
   pricing;
   remoteHandler = new RemoteMessageHandler();
   /**
@@ -8315,6 +8854,12 @@ var EmbeddedServer = class {
    * Mirrors Python's ``_resolve_completion``.
    */
   resolveCompletion(callId, args) {
+    if (args.outcome === "no_answer" || args.outcome === "busy" || args.outcome === "failed") {
+      recordCallCompleted(this.telemetry, {
+        outcome: args.outcome,
+        carrier: this.config.telephonyProvider
+      });
+    }
     const entry = this.completions.get(callId);
     if (!entry || entry.done) return;
     const data = args.data;
@@ -9063,7 +9608,13 @@ var EmbeddedServer = class {
       return Object.fromEntries(Object.entries(snap).filter(([, v]) => v !== void 0));
     };
     const store = this.metricsStore;
+    const telemetry = this.telemetry;
     const wrappedStart = async (data) => {
+      recordCallStarted(telemetry, {
+        providerMode: agent.provider ?? void 0,
+        telephonyProvider: bridge.telephonyProvider,
+        direction: data.direction
+      });
       if (logger.enabled) {
         const callId = typeof data.call_id === "string" ? data.call_id : "";
         const dataCaller = typeof data.caller === "string" ? data.caller : "";
@@ -9094,6 +9645,11 @@ var EmbeddedServer = class {
       if (userMetrics) await userMetrics(data);
     };
     const wrappedEnd = async (data) => {
+      recordCallCompleted(this.telemetry, {
+        outcome: "completed",
+        metrics: data.metrics,
+        direction: data.direction
+      });
       if (logger.enabled) {
         const callId = typeof data.call_id === "string" ? data.call_id : "";
         const metricsObj = data.metrics ?? null;
@@ -9149,7 +9705,7 @@ var EmbeddedServer = class {
           await handler.handleCallStart(callSid, customParameters);
         } else if (event === "media") {
           const payload = data.media?.payload ?? "";
-          handler.handleAudio(Buffer.from(payload, "base64"));
+          await handler.handleAudio(Buffer.from(payload, "base64"));
         } else if (event === "mark") {
           const markName = String(data.mark?.name ?? "");
           if (markName) await handler.onMark(markName);
@@ -9161,6 +9717,7 @@ var EmbeddedServer = class {
         }
       } catch (err) {
         getLogger().error("Stream handler error:", err);
+        handler.recordError(err);
       }
     });
     ws.on("close", async () => {
@@ -9205,7 +9762,7 @@ var EmbeddedServer = class {
           if (track !== "inbound") return;
           const audioChunk = data.media?.payload ?? "";
           if (!audioChunk) return;
-          handler.handleAudio(Buffer.from(audioChunk, "base64"));
+          await handler.handleAudio(Buffer.from(audioChunk, "base64"));
         } else if (event === "dtmf") {
           const digit = String(data.dtmf?.digit ?? "").trim();
           if (digit) {
@@ -9219,9 +9776,11 @@ var EmbeddedServer = class {
         }
       } catch (err) {
         getLogger().error("Stream handler error (Telnyx):", err);
+        handler.recordError(err);
       }
     });
     ws.on("close", async () => {
+      this.activeCallIds.delete(ws);
       await handler.handleWsClose();
     });
   }
@@ -9250,7 +9809,7 @@ var EmbeddedServer = class {
           await handler.handleCallStart(callId);
         } else if (event === "media") {
           const payload = data.media?.payload ?? "";
-          if (payload) handler.handleAudio(Buffer.from(payload, "base64"));
+          if (payload) await handler.handleAudio(Buffer.from(payload, "base64"));
         } else if (event === "playedStream") {
           const markName = String(data.name ?? "");
           if (markName) await handler.onMark(markName);
@@ -9264,6 +9823,7 @@ var EmbeddedServer = class {
         }
       } catch (err) {
         getLogger().error("Stream handler error (Plivo):", err);
+        handler.recordError(err);
       }
     });
     ws.on("close", async () => {
@@ -9733,7 +10293,7 @@ var OpenAILLMProvider = class {
     });
     if (!response.ok) {
       const errText = await response.text();
-      getLogger().error(`LLM API error: ${response.status} ${errText}`);
+      getLogger().error(`LLM API error: ${response.status} ${errText.slice(0, 200)}`);
       throw new PatterConnectionError(
         `LLM API returned ${response.status}: ${errText.slice(0, 200)}`
       );
@@ -9902,7 +10462,15 @@ ${systemPrompt}` : DEFAULT_PHONE_PREAMBLE;
     const hasAfterLlmChunk = Boolean(hookExecutor?.hasAfterLlmChunk());
     const allEmittedText = [];
     const callId = callContext.call_id;
-    const streamOpts = typeof callId === "string" && callId.length > 0 ? { ...opts, callId } : opts;
+    const caller = callContext.caller;
+    const callee = callContext.callee;
+    const hasContext = typeof callId === "string" && callId.length > 0 || typeof caller === "string" && caller.length > 0 || typeof callee === "string" && callee.length > 0;
+    const streamOpts = hasContext ? {
+      ...opts,
+      ...typeof callId === "string" && callId.length > 0 ? { callId } : {},
+      ...typeof caller === "string" && caller.length > 0 ? { caller } : {},
+      ...typeof callee === "string" && callee.length > 0 ? { callee } : {}
+    } : opts;
     for (let iter = 0; iter < maxIterations; iter++) {
       const toolCallsAccumulated = /* @__PURE__ */ new Map();
       const textParts = [];
@@ -10036,6 +10604,7 @@ ${systemPrompt}` : DEFAULT_PHONE_PREAMBLE;
       { role: "system", content: this.systemPrompt }
     ];
     for (const entry of history) {
+      if (entry.role === "tool") continue;
       messages.push({
         role: entry.role === "assistant" ? "assistant" : "user",
         content: entry.text