npm - getpatter - Versions diffs - 0.6.3 → 0.6.5 - Mend

getpatter 0.6.3 → 0.6.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

package/README.md +5 -4
package/dist/{carrier-config-3WDQXP5J.mjs → carrier-config-7YGNRBPO.mjs} +17 -11
package/dist/{chunk-R2T4JABZ.mjs → chunk-3VVATR6A.mjs} +8 -6
package/dist/{chunk-CL2U3YET.mjs → chunk-BO227NTF.mjs} +271 -54
package/dist/{chunk-Z6W5XFWS.mjs → chunk-CRPJLVHB.mjs} +992 -197
package/dist/cli.js +63 -20
package/dist/dashboard/ui.html +10 -10
package/dist/index.d.mts +1250 -192
package/dist/index.d.ts +1250 -192
package/dist/index.js +2062 -518
package/dist/index.mjs +759 -250
package/dist/{openai-realtime-2-CNFARP25.mjs → openai-realtime-2-L5EKAAUH.mjs} +1 -1
package/dist/{silero-vad-LNDFGIY7.mjs → silero-vad-RGF5HCIR.mjs} +1 -1
package/dist/{test-mode-MDBQ4ECE.mjs → test-mode-HGHI2AUV.mjs} +2 -2
package/package.json +2 -1
package/src/dashboard/ui.html +10 -10

package/README.md CHANGED Viewed

@@ -74,6 +74,7 @@ Every provider reads its credentials from the environment by default. Pass `apiK
 |---|---|
 | `TWILIO_ACCOUNT_SID`, `TWILIO_AUTH_TOKEN` | `new Twilio()` carrier |
 | `TELNYX_API_KEY`, `TELNYX_CONNECTION_ID`, `TELNYX_PUBLIC_KEY` (optional) | `new Telnyx()` carrier |
+| `PLIVO_AUTH_ID`, `PLIVO_AUTH_TOKEN` | `new Plivo()` carrier — Auth Token doubles as the V3 webhook signature key |
 | `OPENAI_API_KEY` | `OpenAIRealtime`, `WhisperSTT`, `OpenAITTS` |
 | `ELEVENLABS_API_KEY`, `ELEVENLABS_AGENT_ID` | `ElevenLabsConvAI`, `ElevenLabsTTS` |
 | `DEEPGRAM_API_KEY` | `DeepgramSTT` |
@@ -92,7 +93,7 @@ cp .env.example .env
 # Edit .env with your API keys
 ```
-> **Telnyx:** Telnyx is a fully supported telephony provider alternative to Twilio. Both carriers receive equal support for DTMF, transfer, and metrics. Recording parity is supported via Telnyx Call Control; consult the Telnyx portal for configuration details.
+> **Other carriers:** **Telnyx** and **Plivo** are both fully supported alternatives to Twilio. All three carriers receive equal support for inbound DTMF, transfer, AMD, status callbacks, recording, voicemail drop, and metrics. **Plivo** additionally supports native DTMF *send* over the media WebSocket — a capability Twilio Media Streams lacks. Plivo's Auth Token doubles as the V3 webhook signature key (no separate public key, unlike Telnyx Ed25519).
 ## Voice Modes
@@ -108,7 +109,7 @@ cp .env.example .env
 ```typescript
 new Patter({
-  carrier: Twilio | Telnyx;
+  carrier: Twilio | Telnyx | Plivo;
   phoneNumber: string;
   webhookUrl?: string;                              // Public hostname. Mutually exclusive with tunnel.
   tunnel?: CloudflareTunnel | StaticTunnel | boolean;  // `true` is shorthand for new CloudflareTunnel().
@@ -117,7 +118,7 @@ new Patter({
 | Parameter | Type | Description |
 |---|---|---|
-| `carrier` | `Twilio` / `Telnyx` | Carrier instance. Reads env vars by default. |
+| `carrier` | `Twilio` / `Telnyx` / `Plivo` | Carrier instance. Reads env vars by default. |
 | `phoneNumber` | `string` | Your phone number in E.164 format. |
 | `webhookUrl` | `string` | Public hostname your local server is reachable on. |
 | `tunnel` | `CloudflareTunnel \| StaticTunnel \| boolean` | `new CloudflareTunnel()`, `new StaticTunnel({ hostname: ... })`, or `true` (shorthand for `new CloudflareTunnel()`). |
@@ -179,7 +180,7 @@ await phone.call({
 ```typescript
 import {
   // Carriers
-  Twilio, Telnyx,
+  Twilio, Telnyx, Plivo,
   // Engines
   OpenAIRealtime, ElevenLabsConvAI,
   // STT

package/dist/{carrier-config-3WDQXP5J.mjs → carrier-config-7YGNRBPO.mjs} RENAMED Viewed

@@ -7,6 +7,9 @@ import {
 // src/carrier-config.ts
 init_esm_shims();
+function redactPhone(n) {
+  return n.slice(0, 3) + "***" + n.slice(-4);
+}
 var TWILIO_API_BASE = "https://api.twilio.com/2010-04-01";
 var TELNYX_API_BASE = "https://api.telnyx.com/v2";
 var PLIVO_API_BASE = "https://api.plivo.com/v1";
@@ -25,7 +28,7 @@ async function configureTwilioNumber(accountSid, authToken, phoneNumber, voiceUr
   const body = await listResp.json();
   const match = body.incoming_phone_numbers?.[0];
   if (!match) {
-    throw new Error(`Twilio number ${phoneNumber} not found on account ${accountSid}`);
+    throw new Error(`Twilio number ${redactPhone(phoneNumber)} not found on account ${accountSid}`);
   }
   const updateUrl = `${TWILIO_API_BASE}/Accounts/${accountSid}/IncomingPhoneNumbers/${match.sid}.json`;
   const form = new URLSearchParams({ VoiceUrl: voiceUrl, VoiceMethod: "POST" });
@@ -44,17 +47,20 @@ async function configureTwilioNumber(accountSid, authToken, phoneNumber, voiceUr
   }
 }
 async function configureTelnyxNumber(apiKey, connectionId, phoneNumber) {
-  const resp = await fetch(`${TELNYX_API_BASE}/phone_numbers/${encodeURIComponent(phoneNumber)}`, {
-    method: "PATCH",
-    headers: {
-      Authorization: `Bearer ${apiKey}`,
-      "Content-Type": "application/json"
-    },
-    body: JSON.stringify({ connection_id: connectionId })
-  });
+  const resp = await fetch(
+    `${TELNYX_API_BASE}/phone_numbers/${encodeURIComponent(phoneNumber)}/voice`,
+    {
+      method: "PATCH",
+      headers: {
+        Authorization: `Bearer ${apiKey}`,
+        "Content-Type": "application/json"
+      },
+      body: JSON.stringify({ connection_id: connectionId, tech_prefix_enabled: false })
+    }
+  );
   if (!resp.ok) {
     throw new Error(
-      `Telnyx PATCH /phone_numbers/${phoneNumber} failed: ${resp.status} ${await resp.text()}`
+      `Telnyx PATCH /phone_numbers/${redactPhone(phoneNumber)}/voice failed: ${resp.status} ${await resp.text()}`
     );
   }
 }
@@ -104,7 +110,7 @@ async function autoConfigureCarrier(params) {
   if (provider === "telnyx" && params.telnyxKey && params.telnyxConnectionId) {
     try {
       await configureTelnyxNumber(params.telnyxKey, params.telnyxConnectionId, params.phoneNumber);
-      log.info("Telnyx number %s associated with connection %s", params.phoneNumber, params.telnyxConnectionId);
+      log.info("Telnyx number ***%s associated with connection %s", params.phoneNumber.slice(-4), params.telnyxConnectionId);
     } catch (err) {
       log.warn("Could not auto-configure Telnyx number: %s", err instanceof Error ? err.message : String(err));
     }

package/dist/{chunk-R2T4JABZ.mjs → chunk-3VVATR6A.mjs} RENAMED Viewed

@@ -193,6 +193,8 @@ var SileroVAD = class _SileroVAD {
   speechThresholdDuration = 0;
   silenceThresholdDuration = 0;
   closed = false;
+  /** Transitions produced in the current processFrame call but not yet returned. */
+  eventQueue = [];
   /**
    * Load the Silero VAD model.
    * Throws if `onnxruntime-node` is not installed.
@@ -318,22 +320,21 @@ var SileroVAD = class _SileroVAD {
       );
     }
     if (pcmChunk.length === 0) {
-      return null;
+      return this.eventQueue.shift() ?? null;
     }
     const numSamples = Math.floor(pcmChunk.length / 2);
     if (numSamples === 0) {
-      return null;
+      return this.eventQueue.shift() ?? null;
     }
     const samples = new Float32Array(numSamples);
     for (let i = 0; i < numSamples; i++) {
-      samples[i] = pcmChunk.readInt16LE(i * 2) / 32767;
+      samples[i] = pcmChunk.readInt16LE(i * 2) / 32768;
     }
     const merged = new Float32Array(this.pending.length + samples.length);
     merged.set(this.pending, 0);
     merged.set(samples, this.pending.length);
     this.pending = merged;
     const windowSize = this.model.windowSizeSamples;
-    let event = null;
     while (this.pending.length >= windowSize) {
       const window = this.pending.slice(0, windowSize);
       this.pending = this.pending.slice(windowSize);
@@ -342,10 +343,10 @@ var SileroVAD = class _SileroVAD {
       const windowDuration = windowSize / this.opts.sampleRate;
       const transition = this.advanceState(p, windowDuration);
       if (transition !== null) {
-        event = transition;
+        this.eventQueue.push(transition);
       }
     }
-    return event;
+    return this.eventQueue.shift() ?? null;
   }
   advanceState(p, windowDuration) {
     const opts = this.opts;
@@ -400,6 +401,7 @@ var SileroVAD = class _SileroVAD {
     this.pubSpeaking = false;
     this.speechThresholdDuration = 0;
     this.silenceThresholdDuration = 0;
+    this.eventQueue = [];
     this.expFilter.reset();
     this.model.reset();
   }

package/dist/{chunk-CL2U3YET.mjs → chunk-BO227NTF.mjs} RENAMED Viewed

@@ -47,6 +47,45 @@ var OpenAIRealtimeVADType = {
   SERVER_VAD: "server_vad",
   SEMANTIC_VAD: "semantic_vad"
 };
+function validateRealtimeTurnDetection(td) {
+  if (td === void 0) return;
+  if (td.type !== void 0 && td.type !== "server_vad" && td.type !== "semantic_vad") {
+    throw new Error(
+      `RealtimeTurnDetection.type must be 'server_vad' or 'semantic_vad', got ${JSON.stringify(td.type)}`
+    );
+  }
+  if (td.eagerness !== void 0 && td.eagerness !== "low" && td.eagerness !== "medium" && td.eagerness !== "high" && td.eagerness !== "auto") {
+    throw new Error(
+      `RealtimeTurnDetection.eagerness must be one of low|medium|high|auto, got ${JSON.stringify(td.eagerness)}`
+    );
+  }
+  if (td.eagerness !== void 0 && td.type !== "semantic_vad") {
+    throw new Error(
+      "RealtimeTurnDetection.eagerness is only valid when type='semantic_vad'"
+    );
+  }
+}
+function buildTurnDetection(td, opts) {
+  validateRealtimeTurnDetection(td);
+  let detection;
+  if (td?.type === "semantic_vad") {
+    detection = { type: "semantic_vad" };
+    if (td.eagerness !== void 0) detection.eagerness = td.eagerness;
+  } else {
+    detection = {
+      type: td?.type ?? opts.defaultType,
+      threshold: td?.threshold ?? 0.5,
+      prefix_padding_ms: td?.prefixPaddingMs ?? 300,
+      silence_duration_ms: td?.silenceDurationMs ?? opts.defaultSilenceMs
+    };
+  }
+  if (opts.includeResponseGating) {
+    const serverManaged = !(opts.gateResponseOnTranscript ?? false);
+    detection.create_response = serverManaged;
+    detection.interrupt_response = serverManaged;
+  }
+  return detection;
+}
 var OpenAIRealtimeAdapter = class {
   constructor(apiKey, model = OpenAIRealtimeModel.GPT_REALTIME_MINI, voice = OpenAIVoice.ALLOY, instructions = "", tools, audioFormat = OpenAIRealtimeAudioFormat.G711_ULAW, options = {}) {
     this.apiKey = apiKey;
@@ -56,6 +95,7 @@ var OpenAIRealtimeAdapter = class {
     this.tools = tools;
     this.audioFormat = audioFormat;
     this.options = options;
+    this.gateResponseOnTranscript = options.gateResponseOnTranscript ?? false;
   }
   apiKey;
   model;
@@ -85,6 +125,23 @@ var OpenAIRealtimeAdapter = class {
   // could have produced, which is what the user actually heard.
   currentResponseFirstAudioAt = null;
   options;
+  // When true, the stream handler waits for the Whisper ``transcript_input``
+  // event before requesting the model response (legacy behavior). When false
+  // (default) the response is requested on ``speech_stopped`` and the
+  // transcript is display-only. Read by the stream handler via
+  // ``getGateResponseOnTranscript()``.
+  gateResponseOnTranscript;
+  /**
+   * Whether the stream handler should gate the model response on the Whisper
+   * transcript (legacy) or fire it on `speech_stopped` (default, decoupled).
+   *
+   * `false` (default) — the response is requested on `speech_stopped`,
+   * independently of Whisper. `true` — the response is requested only after
+   * `transcript_input` passes the hallucination filter.
+   */
+  getGateResponseOnTranscript() {
+    return this.gateResponseOnTranscript;
+  }
   /**
    * Build the production session.update body. Mirrors the body sent
    * inside `connect()` so warmup can apply identical configuration to
@@ -96,16 +153,26 @@ var OpenAIRealtimeAdapter = class {
       output_audio_format: this.audioFormat,
       voice: this.voice,
       instructions: this.instructions || "You are a helpful voice assistant. Be concise.",
-      turn_detection: {
-        type: this.options.vadType ?? OpenAIRealtimeVADType.SERVER_VAD,
-        threshold: 0.5,
-        prefix_padding_ms: 300,
-        silence_duration_ms: this.options.silenceDurationMs ?? 300
-      },
+      // v1 turn_detection carries NO create_response / interrupt_response
+      // keys. The v1 server defaults (`create_response: true`,
+      // `interrupt_response: true`) ARE the server-managed behaviour we want by
+      // default, so omitting them is equivalent to sending `true` — gating
+      // disabled here. `gateResponseOnTranscript` is still threaded through for
+      // symmetry with the GA builder, but has no wire effect while
+      // includeResponseGating is false.
+      turn_detection: buildTurnDetection(this.options.turnDetection, {
+        defaultType: this.options.vadType ?? OpenAIRealtimeVADType.SERVER_VAD,
+        defaultSilenceMs: this.options.silenceDurationMs ?? 300,
+        includeResponseGating: false,
+        gateResponseOnTranscript: this.gateResponseOnTranscript
+      }),
       input_audio_transcription: {
         model: this.options.inputAudioTranscriptionModel ?? OpenAITranscriptionModel.WHISPER_1
       }
     };
+    if (this.options.noiseReduction !== void 0) {
+      config.input_audio_noise_reduction = { type: this.options.noiseReduction };
+    }
     if (this.options.temperature !== void 0) config.temperature = this.options.temperature;
     if (this.options.maxResponseOutputTokens !== void 0) {
       config.max_response_output_tokens = this.options.maxResponseOutputTokens;
@@ -369,6 +436,10 @@ var OpenAIRealtimeAdapter = class {
       };
       const timer = setTimeout(() => {
         cleanup();
+        try {
+          ws.close();
+        } catch {
+        }
         reject(new Error("OpenAI Realtime park connect timeout"));
       }, 8e3);
       ws.on("message", onMessage);
@@ -463,20 +534,33 @@ var OpenAIRealtimeAdapter = class {
       dispatch("error", { type: "socket_error", message: err?.message ?? String(err) });
     });
   }
-  /** Truncate the in-flight assistant turn and cancel the active response.
+  /** Truncate the in-flight assistant turn's playback offset on the server.
+   *
+   * Sends ONLY ``conversation.item.truncate`` — no ``response.cancel``. This
+   * is the half of barge-in handling that a WebSocket transport MUST always
+   * perform: per OpenAI's docs, the GA server auto-truncates on barge-in only
+   * over WebRTC / SIP; on the WebSocket transport the client is responsible
+   * for telling the server how much of the assistant turn was actually heard.
+   * In server-managed mode (``interrupt_response: true``) the server already
+   * cancels the response itself, so issuing ``response.cancel`` here would be
+   * redundant / rejected — call this method, not {@link cancelResponse}.
    *
    * ``audio_end_ms`` MUST reflect what the caller actually heard, not what
    * the server generated. OpenAI streams audio at 5-10x real-time, so the
    * byte-derived counter overstates playback whenever the consumer cleared
-   * its playout buffer (e.g. ``send_clear``) before the audio reached the
+   * its playout buffer (e.g. ``sendClear``) before the audio reached the
    * speaker. We bound the truncate point by wall-clock time since the first
    * chunk of this response — that's the physical maximum a 1x real-time
    * playback could have produced. Without this cap, OpenAI keeps the full
    * generated assistant text on the transcript, and the model replays /
    * resumes from it on the next turn — manifesting as re-greetings and
    * mid-sentence fragments after a barge-in storm.
+   *
+   * No-op when no response is in flight, keeping it idempotent across stale
+   * callers. Resets per-response tracking so post-truncate late frames and
+   * the next response start clean.
    */
-  cancelResponse() {
+  truncate() {
     if (!this.ws) return;
     if (!this.currentResponseItemId) {
       return;
@@ -496,11 +580,31 @@ var OpenAIRealtimeAdapter = class {
     } catch (err) {
       getLogger().debug?.(`conversation.item.truncate failed: ${String(err)}`);
     }
-    this.ws.send(JSON.stringify({ type: "response.cancel" }));
     this.currentResponseItemId = null;
     this.currentResponseAudioMs = 0;
     this.currentResponseFirstAudioAt = null;
   }
+  /** Truncate the in-flight assistant turn AND cancel the active response.
+   *
+   * Sends BOTH ``conversation.item.truncate`` (the played-offset bookkeeping)
+   * AND ``response.cancel``. Use this on the LEGACY client-managed barge-in
+   * path (``gateResponseOnTranscript`` true → ``interrupt_response: false``,
+   * so the server does NOT cancel for us) and for explicit cancels driven by
+   * Patter (e.g. on transfer / hangup). In server-managed mode call
+   * {@link truncate} instead — the server already cancels the response, and an
+   * extra ``response.cancel`` would be redundant / rejected.
+   *
+   * Truncation bounding semantics are identical to {@link truncate}; see its
+   * doc comment for the ``audio_end_ms`` wall-clock cap rationale.
+   */
+  cancelResponse() {
+    if (!this.ws) return;
+    if (!this.currentResponseItemId) {
+      return;
+    }
+    this.truncate();
+    this.ws.send(JSON.stringify({ type: "response.cancel" }));
+  }
   /** Inject a user text turn and request a new response. */
   async sendText(text) {
     this.ws?.send(JSON.stringify({
@@ -545,6 +649,32 @@ var OpenAIRealtimeAdapter = class {
       }
     }));
   }
+  /**
+   * Speak a short reassurance filler WITHOUT injecting a `role:user` turn.
+   *
+   * Same no-fake-turn shape as {@link sendFirstMessage}: a bare
+   * `response.create` carrying explicit `instructions`, so the filler is the
+   * assistant's own in-band audio. The reassurance scheduler in the
+   * stream-handler routes here instead of {@link sendText} — which would emit
+   * a `conversation.item.create` with `role:'user'` and falsely show the
+   * caller saying "One moment." in the transcript. Fillers must not imply
+   * success or failure.
+   *
+   * Uses `modalities: ['audio', 'text']` (v1-beta shape). The GA subclass
+   * {@link OpenAIRealtime2Adapter} overrides this with `output_modalities`
+   * and re-injects `audio.output.voice` so the GA endpoint does not reject
+   * the request. Mirrors Python `OpenAIRealtimeAdapter.send_reassurance` in
+   * `providers/openai_realtime.py`.
+   */
+  async sendReassurance(text) {
+    this.ws?.send(JSON.stringify({
+      type: "response.create",
+      response: {
+        modalities: ["audio", "text"],
+        instructions: `Say exactly this and nothing else: "${text}"`
+      }
+    }));
+  }
   /** Submit a tool/function-call result and request the next response. */
   async sendFunctionResult(callId, result) {
     this.ws?.send(JSON.stringify({
@@ -727,7 +857,12 @@ var StatefulResampler = class {
    * Resets all state after flushing.
    */
   flush() {
-    this.carry.flush();
+    const carryTail = this.carry.flush();
+    if (carryTail.length > 0) {
+      getLogger().warn(
+        "[patter] StatefulResampler.flush: trailing odd byte discarded \u2014 upstream produced odd-length PCM stream"
+      );
+    }
     if (this.srcRate === 16e3 && this.dstRate === 8e3 && this.firPendingSample !== null) {
       const s = this.firPendingSample;
       const tmp = Buffer.alloc(4);
@@ -1012,44 +1147,46 @@ var OpenAIRealtime2Adapter = class extends OpenAIRealtimeAdapter {
   buildGASessionConfig() {
     const opts = this.options;
     const fmt = { type: "audio/pcm", rate: 24e3 };
+    const audioInput = {
+      format: fmt,
+      transcription: {
+        model: opts.inputAudioTranscriptionModel ?? OpenAITranscriptionModel.WHISPER_1
+      },
+      // Response creation + barge-in cancellation (issue #154 — hand
+      // turn-taking to the server by default):
+      //  - DEFAULT (`gateResponseOnTranscript` false → SERVER-MANAGED):
+      //    `create_response: true` lets the SERVER auto-create the response
+      //    when it commits the user's audio buffer
+      //    (`input_audio_buffer.committed`). `interrupt_response: true` lets the
+      //    SERVER cancel the in-flight response on its own VAD `speech_started`.
+      //    The e2e model replies immediately, in parallel with the Whisper
+      //    transcript — no transcript wait (~500 ms reclaimed), no client-side
+      //    race. On a WebSocket transport the client STILL must clear the
+      //    carrier buffer (`sendClear`) and `conversation.item.truncate` the
+      //    played offset on barge-in (the server only auto-truncates on
+      //    WebRTC/SIP), but it does NOT send `response.cancel`. Whisper is
+      //    display-only — it can never trigger / gate / cancel the response.
+      //  - LEGACY (`gateResponseOnTranscript` true → CLIENT-MANAGED opt-out):
+      //    `create_response: false` + `interrupt_response: false` so the stream
+      //    handler drives `response.create` (after the hallucination filter)
+      //    and `response.cancel` (on barge-in) itself. Escape hatch for no-AEC
+      //    PSTN self-interruption. Both keys are tied to the same switch inside
+      //    `buildTurnDetection`.
+      turn_detection: buildTurnDetection(opts.turnDetection, {
+        defaultType: opts.vadType ?? OpenAIRealtimeVADType.SERVER_VAD,
+        defaultSilenceMs: opts.silenceDurationMs ?? 300,
+        includeResponseGating: true,
+        gateResponseOnTranscript: this.getGateResponseOnTranscript()
+      })
+    };
+    if (opts.noiseReduction !== void 0) {
+      audioInput.noise_reduction = { type: opts.noiseReduction };
+    }
     const config = {
       type: "realtime",
       output_modalities: opts.modalities ?? ["audio"],
       audio: {
-        input: {
-          format: fmt,
-          transcription: {
-            model: opts.inputAudioTranscriptionModel ?? OpenAITranscriptionModel.WHISPER_1
-          },
-          // VAD threshold raised back to the OpenAI default (0.5) on
-          // 2026-05-22. The earlier 0.1 tuning (motivated by the
-          // upsampled telephony-band loss in high frequencies) made the
-          // server VAD trigger on the carrier-loopback echo of the
-          // agent's OWN outbound audio in PSTN no-AEC scenarios.
-          // Combined with the default ``turn_detection.create_response:
-          // true``, every phantom ``speech_started`` ended a turn early
-          // and auto-created a new response that the agent immediately
-          // spoke over, leading to a runaway loop where the first
-          // message was repeatedly cut and re-generated.
-          turn_detection: {
-            type: opts.vadType ?? OpenAIRealtimeVADType.SERVER_VAD,
-            threshold: 0.5,
-            prefix_padding_ms: 300,
-            silence_duration_ms: opts.silenceDurationMs ?? 500,
-            // Defer ``response.create`` to the application: when OpenAI's
-            // server VAD commits an ``input_audio_buffer.committed`` segment
-            // that turns out to be a Whisper hallucination on silence/echo,
-            // auto-creating a response would generate a phantom turn (the
-            // model reads the hallucinated text as user input). Patter
-            // triggers ``response.create`` explicitly in the Realtime
-            // stream-handler AFTER validating ``transcript_input`` against
-            // the hallucination filter. Pair with ``interrupt_response:
-            // false`` so server VAD also leaves in-flight responses alone —
-            // barge-in is gated client-side.
-            create_response: false,
-            interrupt_response: false
-          }
-        },
+        input: audioInput,
         output: {
           format: fmt,
           voice: this.voice
@@ -1102,14 +1239,7 @@ var OpenAIRealtime2Adapter = class extends OpenAIRealtimeAdapter {
           if (t && t in GA_TO_V1_EVENT_NAMES) {
             const newType = GA_TO_V1_EVENT_NAMES[t];
             if (t === "response.output_audio.delta" && typeof parsed.delta === "string") {
-              const mulaw = this.transcodeOutboundPcm24ToMulaw8Buffer(parsed.delta);
-              const FRAME_BYTES = 160;
-              if (mulaw.length === 0) return;
-              for (let off = 0; off < mulaw.length; off += FRAME_BYTES) {
-                const slice = mulaw.subarray(off, Math.min(off + FRAME_BYTES, mulaw.length));
-                const frame = { ...parsed, type: newType, delta: slice.toString("base64") };
-                handler(Buffer.from(JSON.stringify(frame)), ...rest);
-              }
+              this.translateGaAudioDelta(parsed, handler, rest);
               return;
             }
             parsed.type = newType;
@@ -1138,6 +1268,7 @@ var OpenAIRealtime2Adapter = class extends OpenAIRealtimeAdapter {
           sessionCreated = true;
           ws.send(JSON.stringify({ type: "session.update", session: this.buildGASessionConfig() }));
         } else if (msg.type === "session.updated") {
+          this.warnIfOutputFormatUnexpected(msg);
           cleanup();
           resolve();
         } else if (msg.type === "error") {
@@ -1243,6 +1374,10 @@ var OpenAIRealtime2Adapter = class extends OpenAIRealtimeAdapter {
       };
       const timer = setTimeout(() => {
         cleanup();
+        try {
+          ws.close();
+        } catch {
+        }
         reject(new Error("OpenAI Realtime 2 park connect timeout"));
       }, 8e3);
       ws.on("message", onMessage);
@@ -1290,8 +1425,12 @@ var OpenAIRealtime2Adapter = class extends OpenAIRealtimeAdapter {
           const parsed = JSON.parse(text);
           const t = parsed.type;
           if (t && Object.prototype.hasOwnProperty.call(GA_TO_V1_EVENT_NAMES, t)) {
+            if (t === "response.output_audio.delta" && typeof parsed.delta === "string") {
+              this.translateGaAudioDelta(parsed, handler, rest);
+              return;
+            }
             parsed.type = GA_TO_V1_EVENT_NAMES[t];
-            handler(JSON.stringify(parsed), ...rest);
+            handler(Buffer.from(JSON.stringify(parsed)), ...rest);
             return;
           }
         } catch {
@@ -1376,6 +1515,55 @@ var OpenAIRealtime2Adapter = class extends OpenAIRealtimeAdapter {
     }
     return out;
   }
+  /**
+   * Log-only safety net for issue #154. The GA server echoes the *effective*
+   * session config in `session.updated`; we request `audio/pcm` @ 24 kHz and
+   * transcode PCM24→mulaw8 ourselves (see
+   * `transcodeOutboundPcm24ToMulaw8Buffer`). If a future GA schema change ever
+   * made the server return a different output format, that transcode — which
+   * assumes PCM16-LE @ 24 kHz — would silently corrupt audio, exactly the
+   * v1-beta failure mode #154 fixed. Warn so the drift surfaces in logs instead
+   * of as static. Never gates audio.
+   */
+  warnIfOutputFormatUnexpected(msg) {
+    const fmt = msg?.session?.audio?.output?.format;
+    if (!fmt || typeof fmt !== "object") return;
+    if (fmt.type !== "audio/pcm" || fmt.rate != null && fmt.rate !== 24e3) {
+      getLogger().warn(
+        `OpenAI Realtime 2: server-echoed output format ${JSON.stringify(fmt)} differs from the requested audio/pcm@24000 \u2014 the outbound PCM24\u2192mulaw8 transcode assumes PCM16-LE 24 kHz, so carrier audio may be garbled (issue #154). Informational only; audio is not gated on this.`
+      );
+    }
+  }
+  /**
+   * Shared audio-delta translation helper. Transcodes a GA
+   * `response.output_audio.delta` payload (base64 PCM-16-LE 24 kHz)
+   * into mulaw 8 kHz and splits the result into 160-byte (20 ms) frames,
+   * dispatching one synthetic `response.audio.delta` event per frame.
+   *
+   * Called from BOTH the `connect()` shim and the `adoptWebSocket()` shim
+   * so that warm-path (prewarm/adopted) calls receive identical transcoding
+   * to cold-path calls. Without this, adopted sockets forwarded raw PCM-24
+   * to Twilio/Telnyx, producing garbled or silent audio on every warm call.
+   *
+   * @param parsed  - The parsed GA event object (type already checked to be
+   *                  `response.output_audio.delta` with a string `delta`).
+   * @param handler - The downstream message listener to dispatch each frame to.
+   * @param rest    - Extra arguments forwarded from the original `message` event.
+   * @returns `true` if frames were dispatched (caller should return early),
+   *          `false` if the resampler is still warming up (zero output bytes).
+   */
+  translateGaAudioDelta(parsed, handler, rest) {
+    const newType = GA_TO_V1_EVENT_NAMES["response.output_audio.delta"];
+    const mulaw = this.transcodeOutboundPcm24ToMulaw8Buffer(parsed.delta);
+    const FRAME_BYTES = 160;
+    if (mulaw.length === 0) return false;
+    for (let off = 0; off < mulaw.length; off += FRAME_BYTES) {
+      const slice = mulaw.subarray(off, Math.min(off + FRAME_BYTES, mulaw.length));
+      const frame = { ...parsed, type: newType, delta: slice.toString("base64") };
+      handler(Buffer.from(JSON.stringify(frame)), ...rest);
+    }
+    return true;
+  }
   /**
    * Base64 PCM-16-LE 24 kHz → Base64 mulaw 8 kHz. Used by the WS
    * translation shim on each `response.output_audio.delta`. The stateful
@@ -1405,6 +1593,34 @@ var OpenAIRealtime2Adapter = class extends OpenAIRealtimeAdapter {
     }
     this.ws?.send(JSON.stringify({ type: "response.create", response: responseBody }));
   }
+  /**
+   * Speak a short reassurance filler WITHOUT injecting a `role:user` turn.
+   *
+   * GA-shape sibling of {@link sendFirstMessage} (and override of the base v1
+   * {@link OpenAIRealtimeAdapter.sendReassurance}): a bare `response.create`
+   * carrying explicit `instructions` so the filler is the assistant's own
+   * in-band audio. No `conversation.item.create` with `role:"user"` is
+   * emitted, so the transcript shows no phantom caller line. The GA endpoint
+   * rejects `response.modalities` and does not inherit `audio.output.voice`
+   * for an explicit `response.create`, so — exactly as in
+   * {@link sendFirstMessage} — we send `output_modalities` and re-inject the
+   * voice. Fillers must not imply success or failure.
+   *
+   * Mirrors Python `OpenAIRealtime2Adapter.send_reassurance` in
+   * `providers/openai_realtime_2.py`.
+   */
+  async sendReassurance(text) {
+    if (!this.ws) return;
+    const responseBody = {
+      output_modalities: ["audio"],
+      audio: { output: { voice: this.voice } },
+      instructions: `Say exactly this and nothing else: "${text}"`
+    };
+    if (this.options.reasoningEffort !== void 0) {
+      responseBody.reasoning = { effort: this.options.reasoningEffort };
+    }
+    this.ws.send(JSON.stringify({ type: "response.create", response: responseBody }));
+  }
 };
 export {
@@ -1413,6 +1629,7 @@ export {
   OpenAIVoice,
   OpenAITranscriptionModel,
   OpenAIRealtimeVADType,
+  validateRealtimeTurnDetection,
   OpenAIRealtimeAdapter,
   mulawToPcm16,
   pcm16ToMulaw,