npm - voice-router-dev - Versions diffs - 0.9.0 → 0.9.2 - Mend

voice-router-dev 0.9.0 → 0.9.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

package/CHANGELOG.md +40 -0
package/dist/constants.d.mts +1 -1
package/dist/constants.d.ts +1 -1
package/dist/{field-configs-DYiUtRUz.d.mts → field-configs-FbtCPxzs.d.mts} +5730 -5786
package/dist/{field-configs-DYiUtRUz.d.ts → field-configs-FbtCPxzs.d.ts} +5730 -5786
package/dist/field-configs.d.mts +1 -1
package/dist/field-configs.d.ts +1 -1
package/dist/field-configs.js +42 -51
package/dist/field-configs.mjs +42 -51
package/dist/index.d.mts +840 -1161
package/dist/index.d.ts +840 -1161
package/dist/index.js +613 -701
package/dist/index.mjs +613 -701
package/dist/{speechToTextChunkResponseModel-CI-Aqxcr.d.ts → speechToTextChunkResponseModel-BY2lGyZ3.d.ts} +319 -1
package/dist/{speechToTextChunkResponseModel-D8VJ-wz6.d.mts → speechToTextChunkResponseModel-KayxDiZ7.d.mts} +319 -1
package/dist/webhooks.d.mts +1 -1
package/dist/webhooks.d.ts +1 -1
package/package.json +2 -1

package/dist/index.mjs CHANGED Viewed

@@ -5835,23 +5835,22 @@ var AssemblyAIAdapter = class extends BaseAdapter {
         "AssemblyAI adapter currently only supports URL-based audio input. Use audio.type='url'"
       );
     }
-    const aaiOpts = { ...options?.assemblyai };
-    if ("speech_model" in aaiOpts && aaiOpts.speech_model != null) {
-      if (!aaiOpts.speech_models) {
-        aaiOpts.speech_models = [aaiOpts.speech_model];
-      }
-      delete aaiOpts.speech_model;
+    const passthrough = options?.assemblyai;
+    let speechModels;
+    if (passthrough?.speech_model != null && !passthrough.speech_models) {
+      speechModels = [passthrough.speech_model];
+    } else if (passthrough?.speech_models) {
+      speechModels = passthrough.speech_models;
     }
+    const { speech_model: _deprecated, ...typedOpts } = passthrough ?? {};
     const request = {
-      ...aaiOpts,
+      ...typedOpts,
       audio_url: audioUrl,
       // speech_models is required — default to universal-3-pro
-      speech_models: aaiOpts.speech_models ?? [
-        "universal-3-pro"
-      ],
+      speech_models: speechModels ?? ["universal-3-pro"],
       // Enable punctuation and formatting by default
-      punctuate: aaiOpts.punctuate ?? true,
-      format_text: aaiOpts.format_text ?? true
+      punctuate: typedOpts.punctuate ?? true,
+      format_text: typedOpts.format_text ?? true
     };
     if (options) {
       if (options.model) {
@@ -5899,22 +5898,22 @@ var AssemblyAIAdapter = class extends BaseAdapter {
   normalizeResponse(response) {
     let status;
     switch (response.status) {
-      case TranscriptStatus.queued:
+      case "queued":
         status = "queued";
         break;
-      case TranscriptStatus.processing:
+      case "processing":
         status = "processing";
         break;
-      case TranscriptStatus.completed:
+      case "completed":
         status = "completed";
         break;
-      case TranscriptStatus.error:
+      case "error":
         status = "error";
         break;
       default:
         status = "queued";
     }
-    if (response.status === TranscriptStatus.error) {
+    if (response.status === "error") {
       return {
         success: false,
         provider: this.name,
@@ -6566,8 +6565,14 @@ var DeepgramAdapter = class extends BaseAdapter {
   /**
    * Submit audio for transcription
    *
-   * Sends audio to Deepgram API for transcription. Deepgram processes
-   * synchronously and returns results immediately (no polling required).
+   * Sends audio to Deepgram API for transcription. Deepgram normally processes
+   * synchronously and returns results immediately.
+   *
+   * **Callback mode:** When `webhookUrl` is set, Deepgram returns immediately
+   * with a `request_id` (status `"queued"`). The full transcript is POSTed to
+   * the webhook URL — this is the primary delivery mechanism. `getTranscript()`
+   * can attempt to retrieve the result later via request history, but that
+   * endpoint is best-effort and not a guaranteed durable store.
    *
    * @param audio - Audio input (URL or file buffer)
    * @param options - Transcription options
@@ -6618,47 +6623,81 @@ var DeepgramAdapter = class extends BaseAdapter {
           { params }
         ).then((res) => res.data);
       } else if (audio.type === "file") {
-        response = await this.client.post("/listen", audio.file, {
-          params,
-          headers: {
-            "Content-Type": "audio/*"
+        response = await this.client.post(
+          "/listen",
+          audio.file,
+          {
+            params,
+            headers: {
+              "Content-Type": "audio/*"
+            }
           }
-        }).then((res) => res.data);
+        ).then((res) => res.data);
       } else {
         throw new Error(
           "Deepgram adapter does not support stream type for pre-recorded transcription. Use transcribeStream() for real-time streaming."
         );
       }
+      if (options?.webhookUrl) {
+        const requestId = ("request_id" in response ? response.request_id : void 0) || ("metadata" in response ? response.metadata?.request_id : void 0);
+        if (!requestId) {
+          return {
+            success: false,
+            provider: this.name,
+            error: {
+              code: "MISSING_REQUEST_ID",
+              message: "Deepgram callback mode did not return a request ID"
+            },
+            raw: response
+          };
+        }
+        return {
+          success: true,
+          provider: this.name,
+          data: {
+            id: requestId,
+            text: "",
+            status: "queued"
+          },
+          tracking: {
+            requestId
+          },
+          raw: response
+        };
+      }
+      if (!("results" in response) || !("metadata" in response)) {
+        return {
+          success: false,
+          provider: this.name,
+          error: {
+            code: "INVALID_RESPONSE",
+            message: "Deepgram did not return a synchronous transcription payload"
+          },
+          raw: response
+        };
+      }
       return this.normalizeResponse(response);
     } catch (error) {
       return this.createErrorResponse(error);
     }
   }
   /**
-   * Get transcription result by ID
+   * Get transcription result by ID (best-effort)
    *
-   * Retrieves a previous transcription from Deepgram's request history.
-   *
-   * Unlike the list endpoint, getting a single request DOES include the full
-   * transcript response. Requires `projectId` to be set during initialization.
+   * Retrieves a previous transcription from Deepgram's request history API.
+   * Requires `projectId` to be set during initialization.
    *
-   * @param transcriptId - Request ID from a previous transcription
-   * @returns Full transcript response including text, words, and metadata
+   * **Important:** Deepgram's request history is best-effort. Requests may
+   * expire or be unavailable depending on your plan and retention settings.
+   * This is NOT a durable transcript store — for reliable retrieval, use
+   * callback mode (`webhookUrl`) and persist the webhook payload yourself.
    *
-   * @example Get a transcript by request ID
-   * ```typescript
-   * const adapter = new DeepgramAdapter()
-   * adapter.initialize({
-   *   apiKey: process.env.DEEPGRAM_API_KEY,
-   *   projectId: process.env.DEEPGRAM_PROJECT_ID
-   * })
+   * The response field on the request history entry is cast to
+   * `ListenV1Response` — this appears to work in practice but is not
+   * explicitly documented by Deepgram as a guaranteed contract.
    *
-   * const result = await adapter.getTranscript('abc123-request-id')
-   * if (result.success) {
-   *   console.log(result.data?.text)
-   *   console.log(result.data?.words)
-   * }
-   * ```
+   * @param transcriptId - Request ID from a previous transcription
+   * @returns Transcript response if still available in request history
    *
    * @see https://developers.deepgram.com/reference/get-request
    */
@@ -7289,7 +7328,8 @@ var DeepgramAdapter = class extends BaseAdapter {
         break;
       }
       case "Metadata": {
-        callbacks?.onMetadata?.(message);
+        const { type: _, ...metadata } = message;
+        callbacks?.onMetadata?.(metadata);
         break;
       }
       case "Error": {
@@ -7725,10 +7765,7 @@ var AzureSTTAdapter = class extends BaseAdapter {
         contentUrls: [audio.url],
         properties: this.buildTranscriptionProperties(options)
       };
-      const response = await transcriptionsCreate(
-        transcriptionRequest,
-        this.getAxiosConfig()
-      );
+      const response = await transcriptionsCreate(transcriptionRequest, this.getAxiosConfig());
       const transcription = response.data;
       const transcriptId = transcription.self?.split("/").pop() || "";
       return await this.pollForCompletion(transcriptId);
@@ -8268,7 +8305,6 @@ var OpenAIWhisperAdapter = class extends BaseAdapter {
       const request = {
         ...options?.openai,
         file: audioData,
-        // Buffer/Blob both accepted at runtime; generated type expects Blob
         model
       };
       if (options?.language) {
@@ -8288,11 +8324,7 @@ var OpenAIWhisperAdapter = class extends BaseAdapter {
         request.response_format = OpenAIResponseFormat.json;
       }
       const response = await createTranscription(request, this.getAxiosConfig());
-      return this.normalizeResponse(
-        response.data,
-        model,
-        isDiarization
-      );
+      return this.normalizeResponse(response.data, model, isDiarization);
     } catch (error) {
       return this.createErrorResponse(error);
     }
@@ -8699,7 +8731,6 @@ function createOpenAIWhisperAdapter(config) {
 // src/adapters/speechmatics-adapter.ts
 import axios8 from "axios";
-import WebSocket6 from "ws";
 // src/generated/speechmatics/schema/notificationConfigContentsItem.ts
 var NotificationConfigContentsItem = {
@@ -8884,16 +8915,13 @@ var SpeechmaticsAdapter = class extends BaseAdapter {
         jobConfig.fetch_data = {
           url: audio.url
         };
-        const formData = new FormData();
-        formData.append("config", JSON.stringify(jobConfig));
-        requestBody = formData;
-        headers = { "Content-Type": "multipart/form-data" };
+        requestBody = { config: JSON.stringify(jobConfig) };
+        headers = { "Content-Type": "application/json" };
       } else if (audio.type === "file") {
-        const formData = new FormData();
-        formData.append("config", JSON.stringify(jobConfig));
-        const audioBlob = audio.file instanceof Blob ? audio.file : new Blob([audio.file], { type: audio.mimeType || "audio/wav" });
-        formData.append("data_file", audioBlob, audio.filename || "audio.wav");
-        requestBody = formData;
+        requestBody = {
+          config: JSON.stringify(jobConfig),
+          data_file: audio.file
+        };
         headers = { "Content-Type": "multipart/form-data" };
       } else {
         return {
@@ -8999,216 +9027,224 @@ var SpeechmaticsAdapter = class extends BaseAdapter {
     }
   }
   /**
-   * Build WebSocket URL for real-time streaming
+   * Get the regional WebSocket host for real-time streaming
    *
-   * Note: Real-time API uses a different host from the batch API:
-   * - Batch: {region}.asr.api.speechmatics.com
-   * - Real-time: {region}.rt.speechmatics.com
-   *
-   * @param region - Regional endpoint identifier
-   * @returns WebSocket URL for real-time API
+   * Speechmatics RT uses a different host pattern: {region}.rt.speechmatics.com
    */
-  getRegionalWsUrl(region) {
-    if (this.config?.wsBaseUrl) {
-      return this.config.wsBaseUrl;
-    }
-    const rtRegionMap = {
-      eu1: "eu",
-      eu2: "eu",
-      us1: "us",
-      us2: "us",
-      au1: "eu"
-      // No AU RT endpoint — fall back to EU
-    };
-    const rtPrefix = rtRegionMap[region || ""] || "eu";
-    return `wss://${rtPrefix}.rt.speechmatics.com/v2`;
+  getRegionalWsHost(region) {
+    const regionPrefix = region || "eu1";
+    return `${regionPrefix}.rt.speechmatics.com`;
   }
   /**
-   * Stream audio for real-time transcription via WebSocket
-   *
-   * Connects to Speechmatics' real-time API and sends audio chunks
-   * for transcription with results returned via callbacks.
+   * Stream audio for real-time transcription
    *
-   * @param options - Streaming configuration options
-   * @param callbacks - Event callbacks for transcription results
-   * @returns Promise that resolves with a StreamingSession
+   * Creates a WebSocket connection to the Speechmatics Real-Time API.
+   * Protocol: send StartRecognition config, then AddAudio binary frames,
+   * receive AddPartialTranscript/AddTranscript/EndOfUtterance messages.
    *
-   * @example Basic streaming
-   * ```typescript
-   * const session = await adapter.transcribeStream({
-   *   language: 'en',
-   *   speechmaticsStreaming: {
-   *     enablePartials: true,
-   *     operatingPoint: 'enhanced'
-   *   }
-   * }, {
-   *   onTranscript: (event) => console.log(event.text),
-   *   onUtterance: (utt) => console.log(`[${utt.speaker}]: ${utt.text}`),
-   *   onError: (error) => console.error(error)
-   * });
+   * @param options - Streaming configuration
+   * @param callbacks - Event callbacks
+   * @returns StreamingSession for sending audio and closing
    *
-   * await session.sendAudio({ data: audioBuffer });
-   * await session.close();
-   * ```
+   * @see https://docs.speechmatics.com/rt-api-ref
    */
   async transcribeStream(options, callbacks) {
     this.validateConfig();
-    const smOpts = options?.speechmaticsStreaming || {};
-    const region = smOpts.region || this.config?.region;
-    const wsUrl = this.getRegionalWsUrl(region);
-    const ws = new WebSocket6(wsUrl, {
-      headers: {
-        Authorization: `Bearer ${this.config.apiKey}`
-      }
-    });
-    let sessionStatus = "connecting";
-    const sessionId = `speechmatics-${Date.now()}-${Math.random().toString(36).substring(7)}`;
-    let seqNo = 0;
-    let utteranceResults = [];
-    const sessionReady = new Promise((resolve, reject) => {
-      const timeout = setTimeout(() => {
-        reject(new Error("WebSocket connection timeout"));
-      }, 1e4);
-      let wsOpen = false;
-      ws.once("error", (error) => {
-        clearTimeout(timeout);
-        reject(error);
-      });
-      ws.once("open", () => {
-        wsOpen = true;
-        const encoding = smOpts.encoding || options?.encoding || "pcm_s16le";
-        const sampleRate = smOpts.sampleRate || options?.sampleRate || 16e3;
-        const startMsg = {
-          message: "StartRecognition",
-          audio_format: {
-            type: "raw",
-            encoding,
-            sample_rate: sampleRate
-          },
-          transcription_config: {
-            language: smOpts.language || options?.language || "en",
-            enable_partials: smOpts.enablePartials ?? options?.interimResults ?? true
-          }
-        };
-        const txConfig = startMsg.transcription_config;
-        if (smOpts.domain) txConfig.domain = smOpts.domain;
-        if (smOpts.operatingPoint) txConfig.operating_point = smOpts.operatingPoint;
-        if (smOpts.maxDelay !== void 0) txConfig.max_delay = smOpts.maxDelay;
-        if (smOpts.maxDelayMode) txConfig.max_delay_mode = smOpts.maxDelayMode;
-        if (smOpts.enableEntities !== void 0) txConfig.enable_entities = smOpts.enableEntities;
-        if (smOpts.diarization === "speaker" || options?.diarization) {
-          txConfig.diarization = "speaker";
-          if (smOpts.maxSpeakers) {
-            txConfig.speaker_diarization_config = {
-              max_speakers: smOpts.maxSpeakers
-            };
-          } else if (options?.speakersExpected) {
-            txConfig.speaker_diarization_config = {
-              max_speakers: options.speakersExpected
-            };
-          }
-        }
-        if (smOpts.additionalVocab && smOpts.additionalVocab.length > 0) {
-          txConfig.additional_vocab = smOpts.additionalVocab.map((word) => ({
-            content: word
-          }));
-        } else if (options?.customVocabulary && options.customVocabulary.length > 0) {
-          txConfig.additional_vocab = options.customVocabulary.map((word) => ({
-            content: word
-          }));
-        }
-        if (smOpts.conversationConfig) {
-          txConfig.conversation_config = {
-            end_of_utterance_silence_trigger: smOpts.conversationConfig.endOfUtteranceSilenceTrigger
-          };
-        }
-        const startPayload = JSON.stringify(startMsg);
-        if (callbacks?.onRawMessage) {
-          callbacks.onRawMessage({
-            provider: "speechmatics",
-            direction: "outgoing",
-            timestamp: Date.now(),
-            payload: startPayload,
-            messageType: "StartRecognition"
-          });
+    const sessionId = `speechmatics_${Date.now()}_${Math.random().toString(36).substring(7)}`;
+    const createdAt = /* @__PURE__ */ new Date();
+    const smOpts = options?.speechmaticsStreaming;
+    const region = smOpts?.region || this.config?.region;
+    const wsBase = this.config?.wsBaseUrl || (this.config?.baseUrl ? this.deriveWsUrl(this.config.baseUrl) : `wss://${this.getRegionalWsHost(region)}`);
+    const wsUrl = `${wsBase}/v2`;
+    let status = "connecting";
+    let recognitionStarted = false;
+    const WebSocketImpl = typeof WebSocket !== "undefined" ? WebSocket : __require("ws");
+    const ws = new WebSocketImpl(wsUrl);
+    const language = smOpts?.language || options?.language || "en";
+    const transcriptionConfig = {
+      language,
+      enable_entities: smOpts?.enableEntities ?? options?.entityDetection ?? false,
+      enable_partials: smOpts?.enablePartials ?? options?.interimResults !== false,
+      operating_point: smOpts?.operatingPoint || OperatingPoint.enhanced,
+      ...smOpts?.maxDelay !== void 0 && { max_delay: smOpts.maxDelay },
+      ...smOpts?.maxDelayMode && {
+        max_delay_mode: smOpts.maxDelayMode
+      },
+      ...smOpts?.domain && { domain: smOpts.domain },
+      ...(options?.diarization || smOpts?.diarization === TranscriptionConfigDiarization.speaker) && {
+        diarization: TranscriptionConfigDiarization.speaker,
+        ...smOpts?.maxSpeakers !== void 0 && {
+          speaker_diarization_config: { max_speakers: smOpts.maxSpeakers }
         }
-        ws.send(startPayload);
-      });
-      const onMessage = (data) => {
-        const rawPayload = data.toString();
-        try {
-          const msg = JSON.parse(rawPayload);
-          if (msg.message === "RecognitionStarted") {
-            clearTimeout(timeout);
-            ws.removeListener("message", onMessage);
-            ws.emit("message", data);
-            resolve();
-          } else if (msg.message === "Error") {
-            clearTimeout(timeout);
-            ws.removeListener("message", onMessage);
-            reject(new Error(msg.reason || "Recognition failed to start"));
-          }
-        } catch {
+      },
+      ...(options?.customVocabulary?.length || smOpts?.additionalVocab?.length) && {
+        additional_vocab: (smOpts?.additionalVocab || options?.customVocabulary || []).map(
+          (term) => ({ content: term })
+        )
+      }
+    };
+    const startRecognition = {
+      message: "StartRecognition",
+      audio_format: {
+        type: "raw",
+        encoding: smOpts?.encoding || "pcm_s16le",
+        sample_rate: smOpts?.sampleRate || options?.sampleRate || 16e3
+      },
+      transcription_config: transcriptionConfig,
+      ...smOpts?.conversationConfig && {
+        conversation_config: {
+          end_of_utterance_silence_trigger: smOpts.conversationConfig.endOfUtteranceSilenceTrigger
         }
-      };
-      ws.on("message", onMessage);
-    });
-    ws.on("message", (data) => {
-      const rawPayload = data.toString();
+      }
+    };
+    ws.onopen = () => {
+      status = "open";
+      const msg = JSON.stringify(startRecognition);
+      if (callbacks?.onRawMessage) {
+        callbacks.onRawMessage({
+          provider: this.name,
+          direction: "outgoing",
+          timestamp: Date.now(),
+          payload: msg,
+          messageType: "StartRecognition"
+        });
+      }
+      ws.send(msg);
+    };
+    ws.onmessage = (event) => {
+      const rawPayload = typeof event.data === "string" ? event.data : event.data.toString();
       try {
-        const message = JSON.parse(rawPayload);
+        const data = JSON.parse(rawPayload);
+        const messageType = data.message;
         if (callbacks?.onRawMessage) {
           callbacks.onRawMessage({
-            provider: "speechmatics",
+            provider: this.name,
             direction: "incoming",
             timestamp: Date.now(),
             payload: rawPayload,
-            messageType: message.message
+            messageType
           });
         }
-        this.handleStreamingMessage(message, callbacks, utteranceResults);
-      } catch (error) {
-        if (callbacks?.onRawMessage) {
-          callbacks.onRawMessage({
-            provider: "speechmatics",
-            direction: "incoming",
-            timestamp: Date.now(),
-            payload: rawPayload,
-            messageType: "parse_error"
-          });
+        switch (messageType) {
+          case "RecognitionStarted": {
+            recognitionStarted = true;
+            callbacks?.onOpen?.();
+            callbacks?.onMetadata?.({
+              id: data.id,
+              languagePackInfo: data.language_pack_info
+            });
+            break;
+          }
+          case "AddPartialTranscript": {
+            const partial = data;
+            const words = this.resultsToWords(partial.results);
+            callbacks?.onTranscript?.({
+              type: "transcript",
+              text: partial.metadata.transcript,
+              isFinal: false,
+              words,
+              speaker: words[0]?.speaker,
+              confidence: partial.results[0]?.alternatives?.[0]?.confidence,
+              channel: partial.channel ? parseInt(partial.channel) : void 0
+            });
+            break;
+          }
+          case "AddTranscript": {
+            const final = data;
+            const words = this.resultsToWords(final.results);
+            callbacks?.onTranscript?.({
+              type: "transcript",
+              text: final.metadata.transcript,
+              isFinal: true,
+              words,
+              speaker: words[0]?.speaker,
+              confidence: final.results[0]?.alternatives?.[0]?.confidence,
+              channel: final.channel ? parseInt(final.channel) : void 0
+            });
+            if (options?.diarization || smOpts?.diarization === "speaker") {
+              const utterances = buildUtterancesFromWords(words);
+              for (const utterance of utterances) {
+                callbacks?.onUtterance?.(utterance);
+              }
+            }
+            break;
+          }
+          case "EndOfUtterance": {
+            break;
+          }
+          case "EndOfTranscript": {
+            callbacks?.onClose?.(1e3, "Transcription complete");
+            break;
+          }
+          case "Error": {
+            const err = data;
+            callbacks?.onError?.({
+              code: err.type || "SPEECHMATICS_ERROR",
+              message: err.reason || "Unknown error"
+            });
+            break;
+          }
+          case "Warning": {
+            const warn = data;
+            callbacks?.onMetadata?.({
+              warning: warn.type,
+              reason: warn.reason
+            });
+            break;
+          }
+          case "Info": {
+            callbacks?.onMetadata?.(data);
+            break;
+          }
+          case "AudioAdded":
+          case "ChannelAudioAdded":
+            break;
+          default:
+            callbacks?.onMetadata?.(data);
+            break;
         }
+      } catch (error) {
         callbacks?.onError?.({
           code: "PARSE_ERROR",
-          message: "Failed to parse WebSocket message",
-          details: error
+          message: `Failed to parse message: ${error}`
         });
       }
-    });
-    ws.on("error", (error) => {
+    };
+    ws.onerror = () => {
       callbacks?.onError?.({
         code: "WEBSOCKET_ERROR",
-        message: error.message,
-        details: error
+        message: "WebSocket error occurred"
       });
+    };
+    ws.onclose = (event) => {
+      status = "closed";
+      callbacks?.onClose?.(event.code, event.reason);
+    };
+    await new Promise((resolve, reject) => {
+      const timeout = setTimeout(() => {
+        reject(new Error("WebSocket connection timeout"));
+      }, 1e4);
+      const checkReady = () => {
+        if (recognitionStarted) {
+          clearTimeout(timeout);
+          resolve();
+        } else if (status === "closed") {
+          clearTimeout(timeout);
+          reject(new Error("WebSocket connection failed"));
+        } else {
+          setTimeout(checkReady, 100);
+        }
+      };
+      checkReady();
     });
-    ws.on("close", (code, reason) => {
-      sessionStatus = "closed";
-      callbacks?.onClose?.(code, reason.toString());
-    });
-    await sessionReady;
-    sessionStatus = "open";
-    callbacks?.onOpen?.();
     return {
       id: sessionId,
       provider: this.name,
-      createdAt: /* @__PURE__ */ new Date(),
-      getStatus: () => sessionStatus,
+      createdAt,
+      getStatus: () => status,
       sendAudio: async (chunk) => {
-        if (sessionStatus !== "open") {
-          throw new Error(`Cannot send audio: session is ${sessionStatus}`);
-        }
-        if (ws.readyState !== WebSocket6.OPEN) {
-          throw new Error("WebSocket is not open");
+        if (status !== "open") {
+          throw new Error("Session is not open");
         }
         if (callbacks?.onRawMessage) {
           const audioPayload = chunk.data instanceof ArrayBuffer ? chunk.data : chunk.data.buffer.slice(
@@ -9224,12 +9260,11 @@ var SpeechmaticsAdapter = class extends BaseAdapter {
           });
         }
         ws.send(chunk.data);
-        seqNo++;
-        if (chunk.isLast) {
-          const endMsg = JSON.stringify({
-            message: "EndOfStream",
-            last_seq_no: seqNo
-          });
+      },
+      close: async () => {
+        if (status === "open") {
+          status = "closing";
+          const endMsg = JSON.stringify({ message: "EndOfStream", last_seq_no: 0 });
           if (callbacks?.onRawMessage) {
             callbacks.onRawMessage({
               provider: this.name,
@@ -9241,144 +9276,19 @@ var SpeechmaticsAdapter = class extends BaseAdapter {
           }
           ws.send(endMsg);
         }
-      },
-      close: async () => {
-        if (sessionStatus === "closed" || sessionStatus === "closing") {
-          return;
-        }
-        sessionStatus = "closing";
-        if (ws.readyState === WebSocket6.OPEN) {
-          seqNo++;
-          ws.send(
-            JSON.stringify({
-              message: "EndOfStream",
-              last_seq_no: seqNo
-            })
-          );
-        }
-        return new Promise((resolve) => {
-          const timeout = setTimeout(() => {
-            ws.terminate();
-            sessionStatus = "closed";
-            resolve();
-          }, 5e3);
-          const onMsg = (data) => {
-            try {
-              const msg = JSON.parse(data.toString());
-              if (msg.message === "EndOfTranscript") {
-                ws.removeListener("message", onMsg);
-                clearTimeout(timeout);
-                ws.close();
-              }
-            } catch {
-            }
-          };
-          ws.on("message", onMsg);
-          ws.once("close", () => {
-            clearTimeout(timeout);
-            sessionStatus = "closed";
-            resolve();
-          });
-        });
       }
     };
   }
   /**
-   * Handle incoming Speechmatics real-time WebSocket messages
-   */
-  handleStreamingMessage(message, callbacks, utteranceResults) {
-    switch (message.message) {
-      case "RecognitionStarted": {
-        break;
-      }
-      case "AddPartialTranscript": {
-        const results = message.results || [];
-        const text = buildTextFromSpeechmaticsResults(results);
-        if (text) {
-          callbacks?.onTranscript?.({
-            type: "transcript",
-            text,
-            isFinal: false,
-            words: this.extractWordsFromResults(results),
-            data: message
-          });
-        }
-        break;
-      }
-      case "AddTranscript": {
-        const results = message.results || [];
-        const text = buildTextFromSpeechmaticsResults(results);
-        if (utteranceResults) {
-          utteranceResults.push(...results);
-        }
-        if (text) {
-          callbacks?.onTranscript?.({
-            type: "transcript",
-            text,
-            isFinal: true,
-            words: this.extractWordsFromResults(results),
-            data: message
-          });
-        }
-        break;
-      }
-      case "EndOfUtterance": {
-        if (utteranceResults && utteranceResults.length > 0) {
-          const text = buildTextFromSpeechmaticsResults(utteranceResults);
-          const words = this.extractWordsFromResults(utteranceResults);
-          const utterances = buildUtterancesFromWords(words);
-          if (utterances.length > 0) {
-            for (const utt of utterances) {
-              callbacks?.onUtterance?.(utt);
-            }
-          } else if (text) {
-            callbacks?.onUtterance?.({
-              text,
-              start: words.length > 0 ? words[0].start : 0,
-              end: words.length > 0 ? words[words.length - 1].end : 0,
-              words
-            });
-          }
-          utteranceResults.length = 0;
-        }
-        break;
-      }
-      case "AudioAdded": {
-        break;
-      }
-      case "EndOfTranscript": {
-        break;
-      }
-      case "Info":
-      case "Warning": {
-        callbacks?.onMetadata?.(message);
-        break;
-      }
-      case "Error": {
-        const errMsg = message;
-        callbacks?.onError?.({
-          code: errMsg.type || "SPEECHMATICS_ERROR",
-          message: errMsg.reason || "Unknown error",
-          details: message
-        });
-        break;
-      }
-      default: {
-        callbacks?.onMetadata?.(message);
-        break;
-      }
-    }
-  }
-  /**
-   * Extract unified Word[] from Speechmatics recognition results
+   * Convert Speechmatics RecognitionResult[] to unified Word[]
    */
-  extractWordsFromResults(results) {
-    return results.filter((r) => r.type === "word" && r.start_time !== void 0 && r.end_time !== void 0).map((result) => ({
-      word: result.alternatives?.[0]?.content || "",
-      start: result.start_time,
-      end: result.end_time,
-      confidence: result.alternatives?.[0]?.confidence,
-      speaker: result.alternatives?.[0]?.speaker
+  resultsToWords(results) {
+    return results.filter((r) => r.type === "word").map((r) => ({
+      word: r.alternatives?.[0]?.content || "",
+      start: r.start_time,
+      end: r.end_time,
+      confidence: r.alternatives?.[0]?.confidence,
+      speaker: r.alternatives?.[0]?.speaker
     }));
   }
   /**
@@ -9449,9 +9359,6 @@ function createSpeechmaticsAdapter(config) {
   return adapter;
 }
-// src/adapters/soniox-adapter.ts
-import axios9 from "axios";
 // src/generated/soniox/schema/transcriptionStatus.ts
 var TranscriptionStatus = {
   queued: "queued",
@@ -9460,6 +9367,57 @@ var TranscriptionStatus = {
   error: "error"
 };
+// src/generated/soniox/api/sonioxPublicAPI.ts
+import axios9 from "axios";
+// src/generated/soniox/schema/index.ts
+var schema_exports4 = {};
+__export(schema_exports4, {
+  TemporaryApiKeyUsageType: () => TemporaryApiKeyUsageType,
+  TranscriptionMode: () => TranscriptionMode,
+  TranscriptionStatus: () => TranscriptionStatus,
+  TranslationConfigType: () => TranslationConfigType
+});
+// src/generated/soniox/schema/temporaryApiKeyUsageType.ts
+var TemporaryApiKeyUsageType = {
+  transcribe_websocket: "transcribe_websocket"
+};
+// src/generated/soniox/schema/transcriptionMode.ts
+var TranscriptionMode = {
+  real_time: "real_time",
+  async: "async"
+};
+// src/generated/soniox/schema/translationConfigType.ts
+var TranslationConfigType = {
+  one_way: "one_way",
+  two_way: "two_way"
+};
+// src/generated/soniox/api/sonioxPublicAPI.ts
+var uploadFile = (uploadFileBody2, options) => {
+  const formData = new FormData();
+  if (uploadFileBody2.client_reference_id !== void 0 && uploadFileBody2.client_reference_id !== null) {
+    formData.append("client_reference_id", uploadFileBody2.client_reference_id);
+  }
+  formData.append("file", uploadFileBody2.file);
+  return axios9.post("/v1/files", formData, options);
+};
+var createTranscription2 = (createTranscriptionPayload, options) => {
+  return axios9.post("/v1/transcriptions", createTranscriptionPayload, options);
+};
+var getTranscription = (transcriptionId, options) => {
+  return axios9.get(`/v1/transcriptions/${transcriptionId}`, options);
+};
+var getTranscriptionTranscript = (transcriptionId, options) => {
+  return axios9.get(`/v1/transcriptions/${transcriptionId}/transcript`, options);
+};
+var getModels = (options) => {
+  return axios9.get("/v1/models", options);
+};
 // src/adapters/soniox-adapter.ts
 var SonioxAdapter = class extends BaseAdapter {
   constructor() {
@@ -9514,11 +9472,17 @@ var SonioxAdapter = class extends BaseAdapter {
     }
   }
   /**
-   * Get the base URL for API requests
+   * Get the base URL for API requests (no /v1 suffix — generated functions include /v1 in paths)
    */
   get baseUrl() {
     if (this.config?.baseUrl) return this.config.baseUrl;
-    return `https://${this.getRegionalHost()}/v1`;
+    return `https://${this.getRegionalHost()}`;
+  }
+  /**
+   * Build axios config with Soniox Bearer auth
+   */
+  getAxiosConfig() {
+    return super.getAxiosConfig("Authorization", (key) => `Bearer ${key}`);
   }
   initialize(config) {
     super.initialize(config);
@@ -9528,15 +9492,6 @@ var SonioxAdapter = class extends BaseAdapter {
     if (config.model) {
       this.defaultModel = config.model;
     }
-    this.client = axios9.create({
-      baseURL: this.baseUrl,
-      timeout: config.timeout || 12e4,
-      headers: {
-        Authorization: `Bearer ${config.apiKey}`,
-        "Content-Type": "application/json",
-        ...config.headers
-      }
-    });
   }
   /**
    * Get current region
@@ -9566,23 +9521,12 @@ var SonioxAdapter = class extends BaseAdapter {
    */
   setRegion(region) {
     this.region = region;
-    if (this.config?.apiKey) {
-      this.client = axios9.create({
-        baseURL: this.baseUrl,
-        timeout: this.config.timeout || 12e4,
-        headers: {
-          Authorization: `Bearer ${this.config.apiKey}`,
-          "Content-Type": "application/json",
-          ...this.config.headers
-        }
-      });
-    }
   }
   /**
    * Submit audio for transcription
    *
-   * Soniox uses async batch processing. The transcribe method submits audio
-   * and waits for completion (or use getTranscript for polling).
+   * Uses the async v1 API: createTranscription returns status `queued`,
+   * then polls until completed (or returns immediately if webhook is set).
    *
    * @param audio - Audio input (URL or file)
    * @param options - Transcription options
@@ -9591,21 +9535,44 @@ var SonioxAdapter = class extends BaseAdapter {
   async transcribe(audio, options) {
     this.validateConfig();
     try {
-      const requestBody = {
-        model: options?.model || this.defaultModel
-      };
-      if (audio.type === "url") {
-        requestBody.audio_url = audio.url;
-      } else if (audio.type === "file") {
-        const formData = new FormData();
+      const sonioxOpts = options?.soniox;
+      if (audio.type === "file") {
         const audioBlob = audio.file instanceof Blob ? audio.file : new Blob([audio.file], { type: audio.mimeType || "audio/wav" });
-        formData.append("file", audioBlob, audio.filename || "audio.wav");
-        const uploadResponse = await this.client.post("/files", formData, {
-          headers: {
-            "Content-Type": "multipart/form-data"
-          }
-        });
-        requestBody.file_id = uploadResponse.data.id;
+        const uploadBody = { file: audioBlob };
+        const fileResp = await uploadFile(uploadBody, this.getAxiosConfig());
+        const payload = {
+          ...sonioxOpts,
+          model: options?.model || this.defaultModel,
+          file_id: fileResp.data.id,
+          language_hints: options?.language ? [options.language] : sonioxOpts?.language_hints,
+          enable_speaker_diarization: options?.diarization || sonioxOpts?.enable_speaker_diarization,
+          enable_language_identification: options?.languageDetection || sonioxOpts?.enable_language_identification,
+          context: options?.customVocabulary?.length ? { terms: options.customVocabulary } : sonioxOpts?.context,
+          webhook_url: options?.webhookUrl || sonioxOpts?.webhook_url
+        };
+        const createResp = await createTranscription2(payload, this.getAxiosConfig());
+        const meta = createResp.data;
+        if (options?.webhookUrl || sonioxOpts?.webhook_url) {
+          return this.normalizeTranscription(meta);
+        }
+        return this.pollForCompletion(meta.id);
+      } else if (audio.type === "url") {
+        const payload = {
+          ...sonioxOpts,
+          model: options?.model || this.defaultModel,
+          audio_url: audio.url,
+          language_hints: options?.language ? [options.language] : sonioxOpts?.language_hints,
+          enable_speaker_diarization: options?.diarization || sonioxOpts?.enable_speaker_diarization,
+          enable_language_identification: options?.languageDetection || sonioxOpts?.enable_language_identification,
+          context: options?.customVocabulary?.length ? { terms: options.customVocabulary } : sonioxOpts?.context,
+          webhook_url: options?.webhookUrl || sonioxOpts?.webhook_url
+        };
+        const createResp = await createTranscription2(payload, this.getAxiosConfig());
+        const meta = createResp.data;
+        if (options?.webhookUrl || sonioxOpts?.webhook_url) {
+          return this.normalizeTranscription(meta);
+        }
+        return this.pollForCompletion(meta.id);
       } else {
         return {
           success: false,
@@ -9616,38 +9583,6 @@ var SonioxAdapter = class extends BaseAdapter {
           }
         };
       }
-      if (options?.language) {
-        requestBody.language_hints = [options.language];
-      }
-      if (options?.diarization) {
-        requestBody.enable_speaker_diarization = true;
-      }
-      if (options?.languageDetection) {
-        requestBody.enable_language_identification = true;
-      }
-      if (options?.customVocabulary && options.customVocabulary.length > 0) {
-        requestBody.context = {
-          terms: options.customVocabulary
-        };
-      }
-      if (options?.webhookUrl) {
-        requestBody.webhook_url = options.webhookUrl;
-      }
-      const response = await this.client.post("/transcriptions", requestBody);
-      const transcriptionId = response.data.id;
-      if (options?.webhookUrl) {
-        return {
-          success: true,
-          provider: this.name,
-          data: {
-            id: transcriptionId,
-            text: "",
-            status: "queued"
-          },
-          raw: response.data
-        };
-      }
-      return await this.pollForCompletion(transcriptionId);
     } catch (error) {
       return this.createErrorResponse(error);
     }
@@ -9655,9 +9590,8 @@ var SonioxAdapter = class extends BaseAdapter {
   /**
    * Get transcription result by ID
    *
-   * Checks job status via GET /v1/transcriptions/{id}, then fetches
-   * the full transcript via GET /v1/transcriptions/{id}/transcript
-   * when completed.
+   * Fetches transcription metadata and, if completed, the transcript text/tokens.
+   * Used by pollForCompletion() for async polling.
    *
    * @param transcriptId - Transcript ID
    * @returns Transcription response
@@ -9665,39 +9599,20 @@ var SonioxAdapter = class extends BaseAdapter {
   async getTranscript(transcriptId) {
     this.validateConfig();
     try {
-      const statusResponse = await this.client.get(`/transcriptions/${transcriptId}`);
-      const job = statusResponse.data;
-      if (job.status === "error") {
-        return {
-          success: false,
-          provider: this.name,
-          error: {
-            code: "TRANSCRIPTION_ERROR",
-            message: job.error_message || "Transcription failed"
-          }
-        };
-      }
-      if (job.status !== "completed") {
-        return {
-          success: true,
-          provider: this.name,
-          data: {
-            id: job.id,
-            text: "",
-            status: job.status
-          },
-          raw: job
-        };
+      const metaResp = await getTranscription(transcriptId, this.getAxiosConfig());
+      const meta = metaResp.data;
+      if (meta.status === TranscriptionStatus.completed) {
+        try {
+          const transcriptResp = await getTranscriptionTranscript(
+            transcriptId,
+            this.getAxiosConfig()
+          );
+          return this.normalizeTranscription(meta, transcriptResp.data);
+        } catch (transcriptError) {
+          return this.createErrorResponse(transcriptError);
+        }
       }
-      const transcriptResponse = await this.client.get(
-        `/transcriptions/${transcriptId}/transcript`
-      );
-      return this.normalizeResponse({
-        ...transcriptResponse.data,
-        // Carry over job metadata
-        id: job.id,
-        audio_duration_ms: job.audio_duration_ms
-      });
+      return this.normalizeTranscription(meta);
     } catch (error) {
       return this.createErrorResponse(error);
     }
@@ -9717,51 +9632,50 @@ var SonioxAdapter = class extends BaseAdapter {
     const sessionId = `soniox_${Date.now()}_${Math.random().toString(36).substring(7)}`;
     const createdAt = /* @__PURE__ */ new Date();
     const wsBase = this.config?.wsBaseUrl || (this.config?.baseUrl ? this.deriveWsUrl(this.config.baseUrl) : `wss://${this.getRegionalWsHost()}`);
-    const wsUrl = `${wsBase}/transcribe-websocket`;
-    const modelId = options?.sonioxStreaming?.model || options?.model || "stt-rt-v4";
-    const sonioxOpts = options?.sonioxStreaming;
-    const initMessage = {
-      api_key: this.config.apiKey,
-      model: modelId
-    };
-    if (sonioxOpts?.audioFormat) {
-      initMessage.audio_format = sonioxOpts.audioFormat;
-    } else if (options?.encoding) {
+    const wsUrl = new URL(`${wsBase}/transcribe-websocket`);
+    wsUrl.searchParams.set("api_key", this.config.apiKey);
+    const modelId = options?.sonioxStreaming?.model || options?.model || "stt-rt-preview";
+    wsUrl.searchParams.set("model", modelId);
+    if (options?.encoding) {
       const encodingMap = {
         linear16: "pcm_s16le",
         pcm: "pcm_s16le",
         mulaw: "mulaw",
         alaw: "alaw"
       };
-      initMessage.audio_format = encodingMap[options.encoding] || options.encoding;
+      wsUrl.searchParams.set("audio_format", encodingMap[options.encoding] || options.encoding);
     }
-    if (sonioxOpts?.sampleRate || options?.sampleRate) {
-      initMessage.sample_rate = sonioxOpts?.sampleRate || options?.sampleRate;
+    if (options?.sampleRate) {
+      wsUrl.searchParams.set("sample_rate", options.sampleRate.toString());
     }
-    if (sonioxOpts?.numChannels || options?.channels) {
-      initMessage.num_channels = sonioxOpts?.numChannels || options?.channels;
+    if (options?.channels) {
+      wsUrl.searchParams.set("num_channels", options.channels.toString());
     }
+    const sonioxOpts = options?.sonioxStreaming;
     if (sonioxOpts) {
       if (sonioxOpts.languageHints && sonioxOpts.languageHints.length > 0) {
-        initMessage.language_hints = sonioxOpts.languageHints;
+        wsUrl.searchParams.set("language_hints", JSON.stringify(sonioxOpts.languageHints));
       }
       if (sonioxOpts.enableLanguageIdentification) {
-        initMessage.enable_language_identification = true;
+        wsUrl.searchParams.set("enable_language_identification", "true");
       }
       if (sonioxOpts.enableEndpointDetection) {
-        initMessage.enable_endpoint_detection = true;
+        wsUrl.searchParams.set("enable_endpoint_detection", "true");
       }
       if (sonioxOpts.enableSpeakerDiarization) {
-        initMessage.enable_speaker_diarization = true;
+        wsUrl.searchParams.set("enable_speaker_diarization", "true");
       }
       if (sonioxOpts.context) {
-        initMessage.context = typeof sonioxOpts.context === "string" ? sonioxOpts.context : sonioxOpts.context;
+        wsUrl.searchParams.set(
+          "context",
+          typeof sonioxOpts.context === "string" ? sonioxOpts.context : JSON.stringify(sonioxOpts.context)
+        );
       }
       if (sonioxOpts.translation) {
-        initMessage.translation = sonioxOpts.translation;
+        wsUrl.searchParams.set("translation", JSON.stringify(sonioxOpts.translation));
       }
       if (sonioxOpts.clientReferenceId) {
-        initMessage.client_reference_id = sonioxOpts.clientReferenceId;
+        wsUrl.searchParams.set("client_reference_id", sonioxOpts.clientReferenceId);
       }
     }
     if (!sonioxOpts?.languageHints && options?.language) {
@@ -9770,33 +9684,24 @@ var SonioxAdapter = class extends BaseAdapter {
           `[Soniox] Warning: language="multi" is Deepgram-specific and not supported by Soniox. For automatic language detection, use languageDetection: true instead, or specify a language code like 'en'.`
         );
       }
-      initMessage.language_hints = [options.language];
+      wsUrl.searchParams.set("language_hints", JSON.stringify([options.language]));
     }
     if (!sonioxOpts?.enableSpeakerDiarization && options?.diarization) {
-      initMessage.enable_speaker_diarization = true;
+      wsUrl.searchParams.set("enable_speaker_diarization", "true");
     }
     if (!sonioxOpts?.enableLanguageIdentification && options?.languageDetection) {
-      initMessage.enable_language_identification = true;
+      wsUrl.searchParams.set("enable_language_identification", "true");
+    }
+    if (options?.interimResults !== false) {
     }
     let status = "connecting";
     let openedAt = null;
     let receivedData = false;
     const WebSocketImpl = typeof WebSocket !== "undefined" ? WebSocket : __require("ws");
-    const ws = new WebSocketImpl(wsUrl);
+    const ws = new WebSocketImpl(wsUrl.toString());
     ws.onopen = () => {
-      openedAt = Date.now();
-      const initPayload = JSON.stringify(initMessage);
-      if (callbacks?.onRawMessage) {
-        callbacks.onRawMessage({
-          provider: this.name,
-          direction: "outgoing",
-          timestamp: Date.now(),
-          payload: initPayload,
-          messageType: "init"
-        });
-      }
-      ws.send(initPayload);
       status = "open";
+      openedAt = Date.now();
       callbacks?.onOpen?.();
     };
     ws.onmessage = (event) => {
@@ -9805,7 +9710,8 @@ var SonioxAdapter = class extends BaseAdapter {
       let messageType;
       try {
         const data = JSON.parse(rawPayload);
-        if (data.error) {
+        const errorMessage = data.error_message;
+        if (errorMessage) {
           messageType = "error";
         } else if (data.finished) {
           messageType = "finished";
@@ -9821,10 +9727,10 @@ var SonioxAdapter = class extends BaseAdapter {
             messageType
           });
         }
-        if (data.error) {
+        if (errorMessage) {
           callbacks?.onError?.({
             code: data.error_code?.toString() || "STREAM_ERROR",
-            message: data.error
+            message: errorMessage
           });
           return;
         }
@@ -9838,7 +9744,7 @@ var SonioxAdapter = class extends BaseAdapter {
             start: token.start_ms ? token.start_ms / 1e3 : 0,
             end: token.end_ms ? token.end_ms / 1e3 : 0,
             confidence: token.confidence,
-            speaker: token.speaker
+            speaker: token.speaker ?? void 0
           }));
           const text = data.text || data.tokens.map((t) => t.text).join("");
           const isFinal = data.tokens.every((t) => t.is_final);
@@ -9847,8 +9753,8 @@ var SonioxAdapter = class extends BaseAdapter {
             text,
             isFinal,
             words,
-            speaker: data.tokens[0]?.speaker,
-            language: data.tokens[0]?.language,
+            speaker: data.tokens[0]?.speaker ?? void 0,
+            language: data.tokens[0]?.language ?? void 0,
             confidence: data.tokens[0]?.confidence
           };
           callbacks?.onTranscript?.(event2);
@@ -9875,10 +9781,10 @@ var SonioxAdapter = class extends BaseAdapter {
     ws.onclose = (event) => {
       status = "closed";
       const timeSinceOpen = openedAt ? Date.now() - openedAt : null;
-      const isEarlyClose = timeSinceOpen !== null && timeSinceOpen < 5e3 && !receivedData;
-      if (isEarlyClose && event.code === 1e3) {
+      const isImmediateClose = timeSinceOpen !== null && timeSinceOpen < 1e3 && !receivedData;
+      if (isImmediateClose && event.code === 1e3) {
         const errorMessage = [
-          "Soniox closed connection shortly after opening.",
+          "Soniox closed connection immediately after opening.",
           `Current config: region=${this.region}, model=${modelId}`,
           "Likely causes:",
           "  - Invalid API key or region mismatch (keys are region-specific, current: " + this.region + ")",
@@ -9964,7 +9870,7 @@ var SonioxAdapter = class extends BaseAdapter {
   async getModels() {
     this.validateConfig();
     try {
-      const response = await this.client.get("/models");
+      const response = await getModels(this.getAxiosConfig());
       return response.data.models || [];
     } catch (error) {
       console.error("Failed to fetch Soniox models:", error);
@@ -9996,11 +9902,44 @@ var SonioxAdapter = class extends BaseAdapter {
     return buildUtterancesFromWords(words);
   }
   /**
-   * Normalize Soniox response to unified format
+   * Normalize v1 API response to unified format
+   *
+   * @param meta - Transcription metadata from getTranscription/createTranscription
+   * @param transcript - Transcript data (text/tokens), only present when status is completed
    */
-  normalizeResponse(response) {
-    const { text, tokens } = response;
-    const words = tokens.map((token) => ({
+  normalizeTranscription(meta, transcript) {
+    if (meta.status === TranscriptionStatus.error) {
+      return {
+        success: false,
+        provider: this.name,
+        data: {
+          id: meta.id,
+          text: "",
+          status: "error"
+        },
+        error: {
+          code: meta.error_type || "TRANSCRIPTION_ERROR",
+          message: meta.error_message || "Transcription failed"
+        },
+        raw: { meta, transcript }
+      };
+    }
+    if (!transcript) {
+      return {
+        success: true,
+        provider: this.name,
+        data: {
+          id: meta.id,
+          text: "",
+          status: meta.status,
+          duration: meta.audio_duration_ms ? meta.audio_duration_ms / 1e3 : void 0
+        },
+        raw: { meta }
+      };
+    }
+    const tokens = transcript.tokens || [];
+    const text = transcript.text || tokens.map((t) => t.text).join("");
+    const words = tokens.filter((t) => t.start_ms !== void 0 && t.end_ms !== void 0).map((token) => ({
       word: token.text,
       start: token.start_ms / 1e3,
       end: token.end_ms / 1e3,
@@ -10008,33 +9947,32 @@ var SonioxAdapter = class extends BaseAdapter {
       speaker: token.speaker ?? void 0
     }));
     const speakerSet = /* @__PURE__ */ new Set();
-    for (const token of tokens) {
-      if (token.speaker) speakerSet.add(token.speaker);
-    }
+    tokens.forEach((t) => {
+      if (t.speaker) speakerSet.add(String(t.speaker));
+    });
     const speakers = speakerSet.size > 0 ? Array.from(speakerSet).map((id) => ({
       id,
       label: `Speaker ${id}`
     })) : void 0;
-    const utterances = tokens.length > 0 ? this.buildUtterancesFromTokens(tokens) : [];
+    const utterances = this.buildUtterancesFromTokens(tokens);
     const language = tokens.find((t) => t.language)?.language ?? void 0;
     return {
       success: true,
       provider: this.name,
       data: {
-        id: response.id || `soniox_${Date.now()}`,
+        id: meta.id,
         text,
         status: TranscriptionStatus.completed,
         language,
-        duration: response.audio_duration_ms ? response.audio_duration_ms / 1e3 : response.total_audio_proc_ms ? response.total_audio_proc_ms / 1e3 : void 0,
+        duration: meta.audio_duration_ms ? meta.audio_duration_ms / 1e3 : void 0,
         speakers,
         words: words.length > 0 ? words : void 0,
         utterances: utterances.length > 0 ? utterances : void 0
       },
       tracking: {
-        requestId: response.id,
-        processingTimeMs: response.total_audio_proc_ms
+        requestId: meta.id
       },
-      raw: response
+      raw: { meta, transcript }
     };
   }
 };
@@ -10131,7 +10069,15 @@ var ElevenLabsAdapter = class extends BaseAdapter {
   /**
    * Submit audio for transcription
    *
-   * ElevenLabs batch is synchronous - the API returns the result directly.
+   * ElevenLabs batch is normally synchronous — the API returns results directly.
+   *
+   * **Webhook mode:** When `webhookUrl` is set (or `elevenlabs.webhook` is true),
+   * the request is processed asynchronously. ElevenLabs returns a 202 with a
+   * `request_id` and delivers results to a webhook configured in the ElevenLabs
+   * dashboard. The unified `webhookUrl` acts as an intent flag to enable async
+   * mode — the actual delivery destination must be pre-configured in your
+   * ElevenLabs dashboard. Use `elevenlabs.webhook_id` to target a specific
+   * webhook endpoint.
    */
   async transcribe(audio, options) {
     this.validateConfig();
@@ -10154,6 +10100,11 @@ var ElevenLabsAdapter = class extends BaseAdapter {
           }
         };
       }
+      const elevenlabsOpts = options?.elevenlabs;
+      const useWebhook = options?.webhookUrl || elevenlabsOpts?.webhook;
+      if (useWebhook) {
+        formData.append("webhook", "true");
+      }
       if (options?.language) {
         formData.append("language_code", options.language);
       }
@@ -10172,7 +10123,6 @@ var ElevenLabsAdapter = class extends BaseAdapter {
       if (options?.entityDetection) {
         formData.append("entity_detection", "all");
       }
-      const elevenlabsOpts = options?.elevenlabs;
       if (elevenlabsOpts) {
         for (const [key, value] of Object.entries(elevenlabsOpts)) {
           if (value === void 0 || value === null) continue;
@@ -10190,26 +10140,24 @@ var ElevenLabsAdapter = class extends BaseAdapter {
           }
         }
       }
-      if (options?.webhookUrl) {
-        if (!formData.has("webhook")) {
-          formData.append("webhook", "true");
-        }
-      }
       const response = await this.client.post("/v1/speech-to-text", formData, {
         headers: {
           "Content-Type": "multipart/form-data"
         }
       });
-      if (options?.webhookUrl) {
-        const transcriptionId = response.data.transcription_id || response.data.id || `elevenlabs_${Date.now()}`;
+      if (useWebhook) {
+        const ack = response.data;
         return {
           success: true,
           provider: this.name,
           data: {
-            id: transcriptionId,
+            id: ack.request_id || ack.transcription_id || `elevenlabs_${Date.now()}`,
             text: "",
             status: "queued"
           },
+          tracking: {
+            requestId: ack.request_id
+          },
           raw: response.data
         };
       }
@@ -10305,20 +10253,9 @@ var ElevenLabsAdapter = class extends BaseAdapter {
     ws.onmessage = (event) => {
       receivedData = true;
       const rawPayload = typeof event.data === "string" ? event.data : event.data.toString();
-      let messageType;
       try {
         const data = JSON.parse(rawPayload);
-        if (data.error) {
-          messageType = "error";
-        } else if (data.message_type === "session_started") {
-          messageType = "session_started";
-        } else if (data.message_type === "partial_transcript") {
-          messageType = "partial_transcript";
-        } else if (data.message_type === "committed_transcript") {
-          messageType = "committed_transcript";
-        } else if (data.message_type === "committed_transcript_with_timestamps") {
-          messageType = "committed_transcript_with_timestamps";
-        }
+        const messageType = "error" in data ? "error" : data.message_type;
         if (callbacks?.onRawMessage) {
           callbacks.onRawMessage({
             provider: this.name,
@@ -10328,50 +10265,62 @@ var ElevenLabsAdapter = class extends BaseAdapter {
             messageType
           });
         }
-        if (data.error) {
+        if ("error" in data) {
           callbacks?.onError?.({
-            code: data.error_code?.toString() || "STREAM_ERROR",
+            code: data.message_type || "STREAM_ERROR",
             message: data.error
           });
           return;
         }
-        if (data.message_type === "session_started") {
-          return;
-        }
-        if (data.message_type === "partial_transcript") {
-          const streamEvent = {
-            type: "transcript",
-            text: data.text || "",
-            isFinal: false,
-            confidence: void 0,
-            language: data.language_code
-          };
-          callbacks?.onTranscript?.(streamEvent);
-          return;
-        }
-        if (data.message_type === "committed_transcript" || data.message_type === "committed_transcript_with_timestamps") {
-          const words = data.words ? data.words.map((w) => ({
-            word: w.text || "",
-            start: w.start || 0,
-            end: w.end || 0,
-            confidence: w.logprob !== void 0 ? Math.exp(w.logprob) : void 0,
-            speaker: w.speaker_id
-          })) : [];
-          const streamEvent = {
-            type: "transcript",
-            text: data.text || "",
-            isFinal: true,
-            words: words.length > 0 ? words : void 0,
-            speaker: words[0]?.speaker,
-            language: data.language_code,
-            confidence: void 0
-          };
-          callbacks?.onTranscript?.(streamEvent);
-          if (options?.diarization && words.length > 0) {
-            const utterances = buildUtterancesFromWords(words);
-            for (const utterance of utterances) {
-              callbacks?.onUtterance?.(utterance);
+        switch (data.message_type) {
+          case "session_started":
+            break;
+          case "partial_transcript": {
+            const streamEvent = {
+              type: "transcript",
+              text: data.text || "",
+              isFinal: false,
+              confidence: void 0
+            };
+            callbacks?.onTranscript?.(streamEvent);
+            break;
+          }
+          case "committed_transcript": {
+            const streamEvent = {
+              type: "transcript",
+              text: data.text || "",
+              isFinal: true,
+              confidence: void 0
+            };
+            callbacks?.onTranscript?.(streamEvent);
+            break;
+          }
+          case "committed_transcript_with_timestamps": {
+            const tsData = data;
+            const words = tsData.words ? tsData.words.map((w) => ({
+              word: w.text || "",
+              start: w.start || 0,
+              end: w.end || 0,
+              confidence: w.logprob !== void 0 ? Math.exp(w.logprob) : void 0,
+              speaker: w.speaker_id
+            })) : [];
+            const streamEvent = {
+              type: "transcript",
+              text: tsData.text || "",
+              isFinal: true,
+              words: words.length > 0 ? words : void 0,
+              speaker: words[0]?.speaker,
+              language: tsData.language_code,
+              confidence: void 0
+            };
+            callbacks?.onTranscript?.(streamEvent);
+            if (options?.diarization && words.length > 0) {
+              const utterances = buildUtterancesFromWords(words);
+              for (const utterance of utterances) {
+                callbacks?.onUtterance?.(utterance);
+              }
             }
+            break;
           }
         }
       } catch (error) {
@@ -10526,7 +10475,7 @@ var ElevenLabsAdapter = class extends BaseAdapter {
         }
       }
     }
-    const transcriptionId = ("transcription_id" in response ? response.transcription_id : response.transcription_id) || chunks[0]?.transcription_id || `elevenlabs_${Date.now()}`;
+    const transcriptionId = response.transcription_id || chunks[0]?.transcription_id || `elevenlabs_${Date.now()}`;
     return {
       success: true,
       provider: this.name,
@@ -36444,12 +36393,10 @@ var createTemporaryApiKeyBody = zod10.object({
 var streaming_types_zod_exports = {};
 __export(streaming_types_zod_exports, {
   sonioxAudioFormatSchema: () => sonioxAudioFormatSchema,
-  sonioxAutoDetectedAudioFormatSchema: () => sonioxAutoDetectedAudioFormatSchema,
   sonioxContextGeneralItemSchema: () => sonioxContextGeneralItemSchema,
   sonioxContextSchema: () => sonioxContextSchema,
   sonioxErrorStatusSchema: () => sonioxErrorStatusSchema,
   sonioxOneWayTranslationSchema: () => sonioxOneWayTranslationSchema,
-  sonioxPcmAudioEncodingSchema: () => sonioxPcmAudioEncodingSchema,
   sonioxRealtimeModelSchema: () => sonioxRealtimeModelSchema,
   sonioxRecorderStateSchema: () => sonioxRecorderStateSchema,
   sonioxStreamingResponseSchema: () => sonioxStreamingResponseSchema,
@@ -36463,7 +36410,7 @@ __export(streaming_types_zod_exports, {
   streamingUpdateConfigParams: () => streamingUpdateConfigParams3
 });
 import { z as zod11 } from "zod";
-var sonioxAutoDetectedAudioFormatSchema = zod11.enum([
+var sonioxAudioFormatSchema = zod11.enum([
   "auto",
   "aac",
   "aiff",
@@ -36473,10 +36420,7 @@ var sonioxAutoDetectedAudioFormatSchema = zod11.enum([
   "mp3",
   "ogg",
   "wav",
-  "webm"
-]);
-var sonioxPcmAudioEncodingSchema = zod11.enum([
-  // Signed PCM
+  "webm",
   "pcm_s8",
   "pcm_s16le",
   "pcm_s16be",
@@ -36484,7 +36428,6 @@ var sonioxPcmAudioEncodingSchema = zod11.enum([
   "pcm_s24be",
   "pcm_s32le",
   "pcm_s32be",
-  // Unsigned PCM
   "pcm_u8",
   "pcm_u16le",
   "pcm_u16be",
@@ -36492,86 +36435,81 @@ var sonioxPcmAudioEncodingSchema = zod11.enum([
   "pcm_u24be",
   "pcm_u32le",
   "pcm_u32be",
-  // Float PCM
   "pcm_f32le",
   "pcm_f32be",
   "pcm_f64le",
   "pcm_f64be",
-  // Companded
   "mulaw",
   "alaw"
 ]);
-var sonioxAudioFormatSchema = zod11.union([
-  sonioxAutoDetectedAudioFormatSchema,
-  sonioxPcmAudioEncodingSchema
-]);
 var sonioxOneWayTranslationSchema = zod11.object({
   type: zod11.literal("one_way"),
-  target_language: zod11.string().describe("Target language code for translation")
+  target_language: zod11.string()
 });
 var sonioxTwoWayTranslationSchema = zod11.object({
   type: zod11.literal("two_way"),
-  language_a: zod11.string().describe("First language for bidirectional translation"),
-  language_b: zod11.string().describe("Second language for bidirectional translation")
+  language_a: zod11.string(),
+  language_b: zod11.string()
 });
 var sonioxTranslationConfigSchema = zod11.union([
   sonioxOneWayTranslationSchema,
   sonioxTwoWayTranslationSchema
 ]);
 var sonioxContextGeneralItemSchema = zod11.object({
-  key: zod11.string().describe("Context item key (e.g. 'Domain')"),
-  value: zod11.string().describe("Context item value (e.g. 'medicine')")
+  key: zod11.string(),
+  value: zod11.string()
 });
 var sonioxTranslationTermSchema = zod11.object({
-  source: zod11.string().describe("Source term"),
-  target: zod11.string().describe("Target term to translate to")
+  source: zod11.string(),
+  target: zod11.string()
 });
 var sonioxStructuredContextSchema = zod11.object({
-  general: zod11.array(sonioxContextGeneralItemSchema).optional().describe("General context items (key-value pairs)"),
-  text: zod11.string().optional().describe("Text context"),
-  terms: zod11.array(zod11.string()).optional().describe("Terms that might occur in speech"),
-  translation_terms: zod11.array(sonioxTranslationTermSchema).optional().describe("Hints how to translate specific terms (ignored if translation is not enabled)")
+  general: zod11.array(sonioxContextGeneralItemSchema).optional(),
+  text: zod11.string().optional(),
+  terms: zod11.array(zod11.string()).optional(),
+  translation_terms: zod11.array(sonioxTranslationTermSchema).optional()
 });
 var sonioxContextSchema = zod11.union([sonioxStructuredContextSchema, zod11.string()]);
 var sonioxRealtimeModelSchema = zod11.enum([
+  "stt-rt-v4",
   "stt-rt-v3",
   "stt-rt-preview",
   "stt-rt-v3-preview",
   "stt-rt-preview-v2"
 ]);
 var streamingTranscriberParams3 = zod11.object({
-  model: sonioxRealtimeModelSchema.describe("Real-time model to use"),
-  audioFormat: sonioxAudioFormatSchema.optional().describe("Audio format specification. Use 'auto' for automatic detection"),
-  sampleRate: zod11.number().optional().describe("Sample rate in Hz (required for raw PCM formats)"),
-  numChannels: zod11.number().min(1).max(2).optional().describe("Number of audio channels (1 for mono, 2 for stereo) - required for raw PCM formats"),
-  languageHints: zod11.array(zod11.string()).optional().describe("Expected languages in the audio (ISO language codes)"),
-  context: sonioxContextSchema.optional().describe("Additional context to improve transcription accuracy"),
-  enableSpeakerDiarization: zod11.boolean().optional().describe("Enable speaker diarization - each token will include a speaker field"),
-  enableLanguageIdentification: zod11.boolean().optional().describe("Enable language identification - each token will include a language field"),
-  enableEndpointDetection: zod11.boolean().optional().describe("Enable endpoint detection to detect when a speaker has finished talking"),
-  translation: sonioxTranslationConfigSchema.optional().describe("Translation configuration"),
-  clientReferenceId: zod11.string().optional().describe("Optional tracking identifier (client-defined)")
-});
-var sonioxTranslationStatusSchema = zod11.enum(["none", "original", "translation"]);
+  model: sonioxRealtimeModelSchema,
+  audioFormat: sonioxAudioFormatSchema.optional(),
+  sampleRate: zod11.number().optional(),
+  numChannels: zod11.number().optional(),
+  languageHints: zod11.array(zod11.string()).optional(),
+  context: sonioxContextSchema.optional(),
+  enableSpeakerDiarization: zod11.boolean().optional(),
+  enableLanguageIdentification: zod11.boolean().optional(),
+  enableEndpointDetection: zod11.boolean().optional(),
+  translation: sonioxTranslationConfigSchema.optional(),
+  clientReferenceId: zod11.string().optional()
+});
+var sonioxTranslationStatusSchema = zod11.enum(["original", "translation", "none"]);
 var sonioxTokenSchema = zod11.object({
-  text: zod11.string().describe("Token text content (subword, word, or space)"),
-  start_ms: zod11.number().optional().describe("Start time of the token in milliseconds"),
-  end_ms: zod11.number().optional().describe("End time of the token in milliseconds"),
-  confidence: zod11.number().min(0).max(1).optional().describe("Confidence score between 0.0 and 1.0"),
-  is_final: zod11.boolean().describe("Whether this token is final (confirmed) or provisional"),
-  speaker: zod11.string().optional().describe("Speaker identifier (only present when speaker diarization is enabled)"),
-  language: zod11.string().optional().describe("Detected language code (only present when language identification is enabled)"),
-  source_language: zod11.string().optional().describe("Original language code for translated tokens"),
-  translation_status: sonioxTranslationStatusSchema.optional().describe("Translation status: 'none', 'original', or 'translation'")
+  text: zod11.string(),
+  start_ms: zod11.number().optional(),
+  end_ms: zod11.number().optional(),
+  confidence: zod11.number(),
+  is_final: zod11.boolean(),
+  speaker: zod11.string().optional(),
+  translation_status: sonioxTranslationStatusSchema.optional(),
+  language: zod11.string().optional(),
+  source_language: zod11.string().optional()
 });
 var sonioxStreamingResponseSchema = zod11.object({
-  text: zod11.string().optional().describe("Complete transcribed text"),
-  tokens: zod11.array(sonioxTokenSchema).describe("List of recognized tokens"),
-  final_audio_proc_ms: zod11.number().optional().describe("Milliseconds of audio processed into final tokens"),
-  total_audio_proc_ms: zod11.number().optional().describe("Milliseconds of audio processed (final + non-final)"),
-  finished: zod11.boolean().optional().describe("Whether the transcription is complete"),
-  error: zod11.string().optional().describe("Error message if an error occurred"),
-  error_code: zod11.number().optional().describe("Error code if an error occurred")
+  text: zod11.string(),
+  tokens: zod11.array(sonioxTokenSchema),
+  final_audio_proc_ms: zod11.number(),
+  total_audio_proc_ms: zod11.number(),
+  finished: zod11.boolean().optional(),
+  error_code: zod11.number().optional(),
+  error_message: zod11.string().optional()
 });
 var sonioxRecorderStateSchema = zod11.enum([
   "Init",
@@ -37137,8 +37075,8 @@ var BatchOnlyProviders = AllProviders.filter(
 );
 // src/generated/deepgram/schema/index.ts
-var schema_exports4 = {};
-__export(schema_exports4, {
+var schema_exports5 = {};
+__export(schema_exports5, {
   V1ListenPostParametersCallbackMethod: () => V1ListenPostParametersCallbackMethod,
   V1ListenPostParametersCustomIntentMode: () => V1ListenPostParametersCustomIntentMode,
   V1ListenPostParametersCustomTopicMode: () => V1ListenPostParametersCustomTopicMode,
@@ -37393,8 +37331,8 @@ var V1SpeakPostParametersSampleRate = {
 };
 // src/generated/openai/schema/index.ts
-var schema_exports5 = {};
-__export(schema_exports5, {
+var schema_exports6 = {};
+__export(schema_exports6, {
   AudioResponseFormat: () => AudioResponseFormat,
   CreateSpeechRequestResponseFormat: () => CreateSpeechRequestResponseFormat,
   CreateSpeechRequestStreamFormat: () => CreateSpeechRequestStreamFormat,
@@ -37734,8 +37672,8 @@ var VoiceResourceObject = {
 };
 // src/generated/speechmatics/schema/index.ts
-var schema_exports6 = {};
-__export(schema_exports6, {
+var schema_exports7 = {};
+__export(schema_exports7, {
   AutoChaptersResultErrorType: () => AutoChaptersResultErrorType,
   ErrorResponseError: () => ErrorResponseError,
   GetJobsJobidAlignmentTags: () => GetJobsJobidAlignmentTags,
@@ -37924,32 +37862,6 @@ var WrittenFormRecognitionResultType = {
   word: "word"
 };
-// src/generated/soniox/schema/index.ts
-var schema_exports7 = {};
-__export(schema_exports7, {
-  TemporaryApiKeyUsageType: () => TemporaryApiKeyUsageType,
-  TranscriptionMode: () => TranscriptionMode,
-  TranscriptionStatus: () => TranscriptionStatus,
-  TranslationConfigType: () => TranslationConfigType
-});
-// src/generated/soniox/schema/temporaryApiKeyUsageType.ts
-var TemporaryApiKeyUsageType = {
-  transcribe_websocket: "transcribe_websocket"
-};
-// src/generated/soniox/schema/transcriptionMode.ts
-var TranscriptionMode = {
-  real_time: "real_time",
-  async: "async"
-};
-// src/generated/soniox/schema/translationConfigType.ts
-var TranslationConfigType = {
-  one_way: "one_way",
-  two_way: "two_way"
-};
 // src/generated/elevenlabs/schema/index.ts
 var schema_exports8 = {};
 __export(schema_exports8, {
@@ -39653,7 +39565,7 @@ export {
   DeepgramTTSSampleRate,
   DeepgramTopicMode,
   DeepgramTranscriptionSchema,
-  schema_exports4 as DeepgramTypes,
+  schema_exports5 as DeepgramTypes,
   deepgramAPI_zod_exports as DeepgramZodSchemas,
   ElevenLabsAdapter,
   ElevenLabsCapabilities,
@@ -39690,7 +39602,7 @@ export {
   OpenAIResponseFormat,
   streaming_types_exports as OpenAIStreamingTypes,
   OpenAITranscriptionSchema,
-  schema_exports5 as OpenAITypes,
+  schema_exports6 as OpenAITypes,
   OpenAIWhisperAdapter,
   openAIAudioRealtimeAPI_zod_exports as OpenAIZodSchemas,
   ProfanityFilterMode,
@@ -39719,7 +39631,7 @@ export {
   SonioxStreamingUpdateSchema,
   streaming_types_zod_exports as SonioxStreamingZodSchemas,
   SonioxTranscriptionSchema,
-  schema_exports7 as SonioxTypes,
+  schema_exports4 as SonioxTypes,
   SpeakV1ContainerParameter,
   SpeakV1EncodingParameter,
   SpeakV1SampleRateParameter,
@@ -39734,7 +39646,7 @@ export {
   SpeechmaticsStreamingSchema,
   SpeechmaticsStreamingUpdateSchema,
   SpeechmaticsTranscriptionSchema,
-  schema_exports6 as SpeechmaticsTypes,
+  schema_exports7 as SpeechmaticsTypes,
   speechmaticsASRRESTAPI_zod_exports as SpeechmaticsZodSchemas,
   StreamingProviders,
   StreamingSupportedBitDepthEnum,