npm - voice-router-dev - Versions diffs - 0.9.0 → 0.9.1 - Mend

voice-router-dev 0.9.0 → 0.9.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

package/CHANGELOG.md +16 -0
package/dist/constants.d.mts +1 -1
package/dist/constants.d.ts +1 -1
package/dist/{field-configs-DYiUtRUz.d.mts → field-configs-CH0lgAe8.d.mts} +5665 -5721
package/dist/{field-configs-DYiUtRUz.d.ts → field-configs-CH0lgAe8.d.ts} +5665 -5721
package/dist/field-configs.d.mts +1 -1
package/dist/field-configs.d.ts +1 -1
package/dist/field-configs.js +42 -51
package/dist/field-configs.mjs +42 -51
package/dist/index.d.mts +921 -1270
package/dist/index.d.ts +921 -1270
package/dist/index.js +330 -707
package/dist/index.mjs +330 -707
package/dist/{speechToTextChunkResponseModel-CI-Aqxcr.d.ts → speechToTextChunkResponseModel-BY2lGyZ3.d.ts} +319 -1
package/dist/{speechToTextChunkResponseModel-D8VJ-wz6.d.mts → speechToTextChunkResponseModel-KayxDiZ7.d.mts} +319 -1
package/dist/webhooks.d.mts +1 -1
package/dist/webhooks.d.ts +1 -1
package/package.json +2 -1

package/dist/index.js CHANGED Viewed

@@ -82,7 +82,7 @@ __export(src_exports, {
   DeepgramTTSSampleRate: () => DeepgramTTSSampleRate,
   DeepgramTopicMode: () => DeepgramTopicMode,
   DeepgramTranscriptionSchema: () => DeepgramTranscriptionSchema,
-  DeepgramTypes: () => schema_exports4,
+  DeepgramTypes: () => schema_exports5,
   DeepgramZodSchemas: () => deepgramAPI_zod_exports,
   ElevenLabsAdapter: () => ElevenLabsAdapter,
   ElevenLabsCapabilities: () => ElevenLabsCapabilities,
@@ -119,7 +119,7 @@ __export(src_exports, {
   OpenAIResponseFormat: () => OpenAIResponseFormat,
   OpenAIStreamingTypes: () => streaming_types_exports,
   OpenAITranscriptionSchema: () => OpenAITranscriptionSchema,
-  OpenAITypes: () => schema_exports5,
+  OpenAITypes: () => schema_exports6,
   OpenAIWhisperAdapter: () => OpenAIWhisperAdapter,
   OpenAIZodSchemas: () => openAIAudioRealtimeAPI_zod_exports,
   ProfanityFilterMode: () => ProfanityFilterMode,
@@ -148,7 +148,7 @@ __export(src_exports, {
   SonioxStreamingUpdateSchema: () => SonioxStreamingUpdateSchema,
   SonioxStreamingZodSchemas: () => streaming_types_zod_exports,
   SonioxTranscriptionSchema: () => SonioxTranscriptionSchema,
-  SonioxTypes: () => schema_exports7,
+  SonioxTypes: () => schema_exports4,
   SpeakV1ContainerParameter: () => SpeakV1ContainerParameter,
   SpeakV1EncodingParameter: () => SpeakV1EncodingParameter,
   SpeakV1SampleRateParameter: () => SpeakV1SampleRateParameter,
@@ -163,7 +163,7 @@ __export(src_exports, {
   SpeechmaticsStreamingSchema: () => SpeechmaticsStreamingSchema,
   SpeechmaticsStreamingUpdateSchema: () => SpeechmaticsStreamingUpdateSchema,
   SpeechmaticsTranscriptionSchema: () => SpeechmaticsTranscriptionSchema,
-  SpeechmaticsTypes: () => schema_exports6,
+  SpeechmaticsTypes: () => schema_exports7,
   SpeechmaticsZodSchemas: () => speechmaticsASRRESTAPI_zod_exports,
   StreamingProviders: () => StreamingProviders,
   StreamingSupportedBitDepthEnum: () => StreamingSupportedBitDepthEnum,
@@ -6064,23 +6064,22 @@ var AssemblyAIAdapter = class extends BaseAdapter {
         "AssemblyAI adapter currently only supports URL-based audio input. Use audio.type='url'"
       );
     }
-    const aaiOpts = { ...options?.assemblyai };
-    if ("speech_model" in aaiOpts && aaiOpts.speech_model != null) {
-      if (!aaiOpts.speech_models) {
-        aaiOpts.speech_models = [aaiOpts.speech_model];
-      }
-      delete aaiOpts.speech_model;
+    const passthrough = options?.assemblyai;
+    let speechModels;
+    if (passthrough?.speech_model != null && !passthrough.speech_models) {
+      speechModels = [passthrough.speech_model];
+    } else if (passthrough?.speech_models) {
+      speechModels = passthrough.speech_models;
     }
+    const { speech_model: _deprecated, ...typedOpts } = passthrough ?? {};
     const request = {
-      ...aaiOpts,
+      ...typedOpts,
       audio_url: audioUrl,
       // speech_models is required — default to universal-3-pro
-      speech_models: aaiOpts.speech_models ?? [
-        "universal-3-pro"
-      ],
+      speech_models: speechModels ?? ["universal-3-pro"],
       // Enable punctuation and formatting by default
-      punctuate: aaiOpts.punctuate ?? true,
-      format_text: aaiOpts.format_text ?? true
+      punctuate: typedOpts.punctuate ?? true,
+      format_text: typedOpts.format_text ?? true
     };
     if (options) {
       if (options.model) {
@@ -6128,22 +6127,22 @@ var AssemblyAIAdapter = class extends BaseAdapter {
   normalizeResponse(response) {
     let status;
     switch (response.status) {
-      case TranscriptStatus.queued:
+      case "queued":
         status = "queued";
         break;
-      case TranscriptStatus.processing:
+      case "processing":
         status = "processing";
         break;
-      case TranscriptStatus.completed:
+      case "completed":
         status = "completed";
         break;
-      case TranscriptStatus.error:
+      case "error":
         status = "error";
         break;
       default:
         status = "queued";
     }
-    if (response.status === TranscriptStatus.error) {
+    if (response.status === "error") {
       return {
         success: false,
         provider: this.name,
@@ -6795,8 +6794,10 @@ var DeepgramAdapter = class extends BaseAdapter {
   /**
    * Submit audio for transcription
    *
-   * Sends audio to Deepgram API for transcription. Deepgram processes
-   * synchronously and returns results immediately (no polling required).
+   * Sends audio to Deepgram API for transcription. Deepgram normally processes
+   * synchronously and returns results immediately. When `webhookUrl` is set,
+   * Deepgram can instead return an async callback acknowledgment containing a
+   * request ID.
    *
    * @param audio - Audio input (URL or file buffer)
    * @param options - Transcription options
@@ -6847,17 +6848,59 @@ var DeepgramAdapter = class extends BaseAdapter {
           { params }
         ).then((res) => res.data);
       } else if (audio.type === "file") {
-        response = await this.client.post("/listen", audio.file, {
-          params,
-          headers: {
-            "Content-Type": "audio/*"
+        response = await this.client.post(
+          "/listen",
+          audio.file,
+          {
+            params,
+            headers: {
+              "Content-Type": "audio/*"
+            }
           }
-        }).then((res) => res.data);
+        ).then((res) => res.data);
       } else {
         throw new Error(
           "Deepgram adapter does not support stream type for pre-recorded transcription. Use transcribeStream() for real-time streaming."
         );
       }
+      if (options?.webhookUrl) {
+        const requestId = ("request_id" in response ? response.request_id : void 0) || ("metadata" in response ? response.metadata?.request_id : void 0);
+        if (!requestId) {
+          return {
+            success: false,
+            provider: this.name,
+            error: {
+              code: "MISSING_REQUEST_ID",
+              message: "Deepgram callback mode did not return a request ID"
+            },
+            raw: response
+          };
+        }
+        return {
+          success: true,
+          provider: this.name,
+          data: {
+            id: requestId,
+            text: "",
+            status: "queued"
+          },
+          tracking: {
+            requestId
+          },
+          raw: response
+        };
+      }
+      if (!("results" in response) || !("metadata" in response)) {
+        return {
+          success: false,
+          provider: this.name,
+          error: {
+            code: "INVALID_RESPONSE",
+            message: "Deepgram did not return a synchronous transcription payload"
+          },
+          raw: response
+        };
+      }
       return this.normalizeResponse(response);
     } catch (error) {
       return this.createErrorResponse(error);
@@ -7518,7 +7561,8 @@ var DeepgramAdapter = class extends BaseAdapter {
         break;
       }
       case "Metadata": {
-        callbacks?.onMetadata?.(message);
+        const { type: _, ...metadata } = message;
+        callbacks?.onMetadata?.(metadata);
         break;
       }
       case "Error": {
@@ -7954,10 +7998,7 @@ var AzureSTTAdapter = class extends BaseAdapter {
         contentUrls: [audio.url],
         properties: this.buildTranscriptionProperties(options)
       };
-      const response = await transcriptionsCreate(
-        transcriptionRequest,
-        this.getAxiosConfig()
-      );
+      const response = await transcriptionsCreate(transcriptionRequest, this.getAxiosConfig());
       const transcription = response.data;
       const transcriptId = transcription.self?.split("/").pop() || "";
       return await this.pollForCompletion(transcriptId);
@@ -8497,7 +8538,6 @@ var OpenAIWhisperAdapter = class extends BaseAdapter {
       const request = {
         ...options?.openai,
         file: audioData,
-        // Buffer/Blob both accepted at runtime; generated type expects Blob
         model
       };
       if (options?.language) {
@@ -8517,11 +8557,7 @@ var OpenAIWhisperAdapter = class extends BaseAdapter {
         request.response_format = OpenAIResponseFormat.json;
       }
       const response = await createTranscription(request, this.getAxiosConfig());
-      return this.normalizeResponse(
-        response.data,
-        model,
-        isDiarization
-      );
+      return this.normalizeResponse(response.data, model, isDiarization);
     } catch (error) {
       return this.createErrorResponse(error);
     }
@@ -8928,7 +8964,6 @@ function createOpenAIWhisperAdapter(config) {
 // src/adapters/speechmatics-adapter.ts
 var import_axios8 = __toESM(require("axios"));
-var import_ws5 = __toESM(require("ws"));
 // src/generated/speechmatics/schema/notificationConfigContentsItem.ts
 var NotificationConfigContentsItem = {
@@ -8978,7 +9013,8 @@ var SpeechmaticsAdapter = class extends BaseAdapter {
     super(...arguments);
     this.name = "speechmatics";
     this.capabilities = {
-      streaming: true,
+      streaming: false,
+      // Batch only (streaming available via separate WebSocket API)
       diarization: true,
       wordTimestamps: true,
       languageDetection: false,
@@ -9113,16 +9149,13 @@ var SpeechmaticsAdapter = class extends BaseAdapter {
         jobConfig.fetch_data = {
           url: audio.url
         };
-        const formData = new FormData();
-        formData.append("config", JSON.stringify(jobConfig));
-        requestBody = formData;
-        headers = { "Content-Type": "multipart/form-data" };
+        requestBody = { config: JSON.stringify(jobConfig) };
+        headers = { "Content-Type": "application/json" };
       } else if (audio.type === "file") {
-        const formData = new FormData();
-        formData.append("config", JSON.stringify(jobConfig));
-        const audioBlob = audio.file instanceof Blob ? audio.file : new Blob([audio.file], { type: audio.mimeType || "audio/wav" });
-        formData.append("data_file", audioBlob, audio.filename || "audio.wav");
-        requestBody = formData;
+        requestBody = {
+          config: JSON.stringify(jobConfig),
+          data_file: audio.file
+        };
         headers = { "Content-Type": "multipart/form-data" };
       } else {
         return {
@@ -9227,389 +9260,6 @@ var SpeechmaticsAdapter = class extends BaseAdapter {
       throw error;
     }
   }
-  /**
-   * Build WebSocket URL for real-time streaming
-   *
-   * Note: Real-time API uses a different host from the batch API:
-   * - Batch: {region}.asr.api.speechmatics.com
-   * - Real-time: {region}.rt.speechmatics.com
-   *
-   * @param region - Regional endpoint identifier
-   * @returns WebSocket URL for real-time API
-   */
-  getRegionalWsUrl(region) {
-    if (this.config?.wsBaseUrl) {
-      return this.config.wsBaseUrl;
-    }
-    const rtRegionMap = {
-      eu1: "eu",
-      eu2: "eu",
-      us1: "us",
-      us2: "us",
-      au1: "eu"
-      // No AU RT endpoint — fall back to EU
-    };
-    const rtPrefix = rtRegionMap[region || ""] || "eu";
-    return `wss://${rtPrefix}.rt.speechmatics.com/v2`;
-  }
-  /**
-   * Stream audio for real-time transcription via WebSocket
-   *
-   * Connects to Speechmatics' real-time API and sends audio chunks
-   * for transcription with results returned via callbacks.
-   *
-   * @param options - Streaming configuration options
-   * @param callbacks - Event callbacks for transcription results
-   * @returns Promise that resolves with a StreamingSession
-   *
-   * @example Basic streaming
-   * ```typescript
-   * const session = await adapter.transcribeStream({
-   *   language: 'en',
-   *   speechmaticsStreaming: {
-   *     enablePartials: true,
-   *     operatingPoint: 'enhanced'
-   *   }
-   * }, {
-   *   onTranscript: (event) => console.log(event.text),
-   *   onUtterance: (utt) => console.log(`[${utt.speaker}]: ${utt.text}`),
-   *   onError: (error) => console.error(error)
-   * });
-   *
-   * await session.sendAudio({ data: audioBuffer });
-   * await session.close();
-   * ```
-   */
-  async transcribeStream(options, callbacks) {
-    this.validateConfig();
-    const smOpts = options?.speechmaticsStreaming || {};
-    const region = smOpts.region || this.config?.region;
-    const wsUrl = this.getRegionalWsUrl(region);
-    const ws = new import_ws5.default(wsUrl, {
-      headers: {
-        Authorization: `Bearer ${this.config.apiKey}`
-      }
-    });
-    let sessionStatus = "connecting";
-    const sessionId = `speechmatics-${Date.now()}-${Math.random().toString(36).substring(7)}`;
-    let seqNo = 0;
-    let utteranceResults = [];
-    const sessionReady = new Promise((resolve, reject) => {
-      const timeout = setTimeout(() => {
-        reject(new Error("WebSocket connection timeout"));
-      }, 1e4);
-      let wsOpen = false;
-      ws.once("error", (error) => {
-        clearTimeout(timeout);
-        reject(error);
-      });
-      ws.once("open", () => {
-        wsOpen = true;
-        const encoding = smOpts.encoding || options?.encoding || "pcm_s16le";
-        const sampleRate = smOpts.sampleRate || options?.sampleRate || 16e3;
-        const startMsg = {
-          message: "StartRecognition",
-          audio_format: {
-            type: "raw",
-            encoding,
-            sample_rate: sampleRate
-          },
-          transcription_config: {
-            language: smOpts.language || options?.language || "en",
-            enable_partials: smOpts.enablePartials ?? options?.interimResults ?? true
-          }
-        };
-        const txConfig = startMsg.transcription_config;
-        if (smOpts.domain) txConfig.domain = smOpts.domain;
-        if (smOpts.operatingPoint) txConfig.operating_point = smOpts.operatingPoint;
-        if (smOpts.maxDelay !== void 0) txConfig.max_delay = smOpts.maxDelay;
-        if (smOpts.maxDelayMode) txConfig.max_delay_mode = smOpts.maxDelayMode;
-        if (smOpts.enableEntities !== void 0) txConfig.enable_entities = smOpts.enableEntities;
-        if (smOpts.diarization === "speaker" || options?.diarization) {
-          txConfig.diarization = "speaker";
-          if (smOpts.maxSpeakers) {
-            txConfig.speaker_diarization_config = {
-              max_speakers: smOpts.maxSpeakers
-            };
-          } else if (options?.speakersExpected) {
-            txConfig.speaker_diarization_config = {
-              max_speakers: options.speakersExpected
-            };
-          }
-        }
-        if (smOpts.additionalVocab && smOpts.additionalVocab.length > 0) {
-          txConfig.additional_vocab = smOpts.additionalVocab.map((word) => ({
-            content: word
-          }));
-        } else if (options?.customVocabulary && options.customVocabulary.length > 0) {
-          txConfig.additional_vocab = options.customVocabulary.map((word) => ({
-            content: word
-          }));
-        }
-        if (smOpts.conversationConfig) {
-          txConfig.conversation_config = {
-            end_of_utterance_silence_trigger: smOpts.conversationConfig.endOfUtteranceSilenceTrigger
-          };
-        }
-        const startPayload = JSON.stringify(startMsg);
-        if (callbacks?.onRawMessage) {
-          callbacks.onRawMessage({
-            provider: "speechmatics",
-            direction: "outgoing",
-            timestamp: Date.now(),
-            payload: startPayload,
-            messageType: "StartRecognition"
-          });
-        }
-        ws.send(startPayload);
-      });
-      const onMessage = (data) => {
-        const rawPayload = data.toString();
-        try {
-          const msg = JSON.parse(rawPayload);
-          if (msg.message === "RecognitionStarted") {
-            clearTimeout(timeout);
-            ws.removeListener("message", onMessage);
-            ws.emit("message", data);
-            resolve();
-          } else if (msg.message === "Error") {
-            clearTimeout(timeout);
-            ws.removeListener("message", onMessage);
-            reject(new Error(msg.reason || "Recognition failed to start"));
-          }
-        } catch {
-        }
-      };
-      ws.on("message", onMessage);
-    });
-    ws.on("message", (data) => {
-      const rawPayload = data.toString();
-      try {
-        const message = JSON.parse(rawPayload);
-        if (callbacks?.onRawMessage) {
-          callbacks.onRawMessage({
-            provider: "speechmatics",
-            direction: "incoming",
-            timestamp: Date.now(),
-            payload: rawPayload,
-            messageType: message.message
-          });
-        }
-        this.handleStreamingMessage(message, callbacks, utteranceResults);
-      } catch (error) {
-        if (callbacks?.onRawMessage) {
-          callbacks.onRawMessage({
-            provider: "speechmatics",
-            direction: "incoming",
-            timestamp: Date.now(),
-            payload: rawPayload,
-            messageType: "parse_error"
-          });
-        }
-        callbacks?.onError?.({
-          code: "PARSE_ERROR",
-          message: "Failed to parse WebSocket message",
-          details: error
-        });
-      }
-    });
-    ws.on("error", (error) => {
-      callbacks?.onError?.({
-        code: "WEBSOCKET_ERROR",
-        message: error.message,
-        details: error
-      });
-    });
-    ws.on("close", (code, reason) => {
-      sessionStatus = "closed";
-      callbacks?.onClose?.(code, reason.toString());
-    });
-    await sessionReady;
-    sessionStatus = "open";
-    callbacks?.onOpen?.();
-    return {
-      id: sessionId,
-      provider: this.name,
-      createdAt: /* @__PURE__ */ new Date(),
-      getStatus: () => sessionStatus,
-      sendAudio: async (chunk) => {
-        if (sessionStatus !== "open") {
-          throw new Error(`Cannot send audio: session is ${sessionStatus}`);
-        }
-        if (ws.readyState !== import_ws5.default.OPEN) {
-          throw new Error("WebSocket is not open");
-        }
-        if (callbacks?.onRawMessage) {
-          const audioPayload = chunk.data instanceof ArrayBuffer ? chunk.data : chunk.data.buffer.slice(
-            chunk.data.byteOffset,
-            chunk.data.byteOffset + chunk.data.byteLength
-          );
-          callbacks.onRawMessage({
-            provider: this.name,
-            direction: "outgoing",
-            timestamp: Date.now(),
-            payload: audioPayload,
-            messageType: "audio"
-          });
-        }
-        ws.send(chunk.data);
-        seqNo++;
-        if (chunk.isLast) {
-          const endMsg = JSON.stringify({
-            message: "EndOfStream",
-            last_seq_no: seqNo
-          });
-          if (callbacks?.onRawMessage) {
-            callbacks.onRawMessage({
-              provider: this.name,
-              direction: "outgoing",
-              timestamp: Date.now(),
-              payload: endMsg,
-              messageType: "EndOfStream"
-            });
-          }
-          ws.send(endMsg);
-        }
-      },
-      close: async () => {
-        if (sessionStatus === "closed" || sessionStatus === "closing") {
-          return;
-        }
-        sessionStatus = "closing";
-        if (ws.readyState === import_ws5.default.OPEN) {
-          seqNo++;
-          ws.send(
-            JSON.stringify({
-              message: "EndOfStream",
-              last_seq_no: seqNo
-            })
-          );
-        }
-        return new Promise((resolve) => {
-          const timeout = setTimeout(() => {
-            ws.terminate();
-            sessionStatus = "closed";
-            resolve();
-          }, 5e3);
-          const onMsg = (data) => {
-            try {
-              const msg = JSON.parse(data.toString());
-              if (msg.message === "EndOfTranscript") {
-                ws.removeListener("message", onMsg);
-                clearTimeout(timeout);
-                ws.close();
-              }
-            } catch {
-            }
-          };
-          ws.on("message", onMsg);
-          ws.once("close", () => {
-            clearTimeout(timeout);
-            sessionStatus = "closed";
-            resolve();
-          });
-        });
-      }
-    };
-  }
-  /**
-   * Handle incoming Speechmatics real-time WebSocket messages
-   */
-  handleStreamingMessage(message, callbacks, utteranceResults) {
-    switch (message.message) {
-      case "RecognitionStarted": {
-        break;
-      }
-      case "AddPartialTranscript": {
-        const results = message.results || [];
-        const text = buildTextFromSpeechmaticsResults(results);
-        if (text) {
-          callbacks?.onTranscript?.({
-            type: "transcript",
-            text,
-            isFinal: false,
-            words: this.extractWordsFromResults(results),
-            data: message
-          });
-        }
-        break;
-      }
-      case "AddTranscript": {
-        const results = message.results || [];
-        const text = buildTextFromSpeechmaticsResults(results);
-        if (utteranceResults) {
-          utteranceResults.push(...results);
-        }
-        if (text) {
-          callbacks?.onTranscript?.({
-            type: "transcript",
-            text,
-            isFinal: true,
-            words: this.extractWordsFromResults(results),
-            data: message
-          });
-        }
-        break;
-      }
-      case "EndOfUtterance": {
-        if (utteranceResults && utteranceResults.length > 0) {
-          const text = buildTextFromSpeechmaticsResults(utteranceResults);
-          const words = this.extractWordsFromResults(utteranceResults);
-          const utterances = buildUtterancesFromWords(words);
-          if (utterances.length > 0) {
-            for (const utt of utterances) {
-              callbacks?.onUtterance?.(utt);
-            }
-          } else if (text) {
-            callbacks?.onUtterance?.({
-              text,
-              start: words.length > 0 ? words[0].start : 0,
-              end: words.length > 0 ? words[words.length - 1].end : 0,
-              words
-            });
-          }
-          utteranceResults.length = 0;
-        }
-        break;
-      }
-      case "AudioAdded": {
-        break;
-      }
-      case "EndOfTranscript": {
-        break;
-      }
-      case "Info":
-      case "Warning": {
-        callbacks?.onMetadata?.(message);
-        break;
-      }
-      case "Error": {
-        const errMsg = message;
-        callbacks?.onError?.({
-          code: errMsg.type || "SPEECHMATICS_ERROR",
-          message: errMsg.reason || "Unknown error",
-          details: message
-        });
-        break;
-      }
-      default: {
-        callbacks?.onMetadata?.(message);
-        break;
-      }
-    }
-  }
-  /**
-   * Extract unified Word[] from Speechmatics recognition results
-   */
-  extractWordsFromResults(results) {
-    return results.filter((r) => r.type === "word" && r.start_time !== void 0 && r.end_time !== void 0).map((result) => ({
-      word: result.alternatives?.[0]?.content || "",
-      start: result.start_time,
-      end: result.end_time,
-      confidence: result.alternatives?.[0]?.confidence,
-      speaker: result.alternatives?.[0]?.speaker
-    }));
-  }
   /**
    * Normalize Speechmatics status to unified status
    * Uses generated JobDetailsStatus enum values
@@ -9678,9 +9328,6 @@ function createSpeechmaticsAdapter(config) {
   return adapter;
 }
-// src/adapters/soniox-adapter.ts
-var import_axios9 = __toESM(require("axios"));
 // src/generated/soniox/schema/transcriptionStatus.ts
 var TranscriptionStatus = {
   queued: "queued",
@@ -9689,6 +9336,57 @@ var TranscriptionStatus = {
   error: "error"
 };
+// src/generated/soniox/api/sonioxPublicAPI.ts
+var import_axios9 = __toESM(require("axios"));
+// src/generated/soniox/schema/index.ts
+var schema_exports4 = {};
+__export(schema_exports4, {
+  TemporaryApiKeyUsageType: () => TemporaryApiKeyUsageType,
+  TranscriptionMode: () => TranscriptionMode,
+  TranscriptionStatus: () => TranscriptionStatus,
+  TranslationConfigType: () => TranslationConfigType
+});
+// src/generated/soniox/schema/temporaryApiKeyUsageType.ts
+var TemporaryApiKeyUsageType = {
+  transcribe_websocket: "transcribe_websocket"
+};
+// src/generated/soniox/schema/transcriptionMode.ts
+var TranscriptionMode = {
+  real_time: "real_time",
+  async: "async"
+};
+// src/generated/soniox/schema/translationConfigType.ts
+var TranslationConfigType = {
+  one_way: "one_way",
+  two_way: "two_way"
+};
+// src/generated/soniox/api/sonioxPublicAPI.ts
+var uploadFile = (uploadFileBody2, options) => {
+  const formData = new FormData();
+  if (uploadFileBody2.client_reference_id !== void 0 && uploadFileBody2.client_reference_id !== null) {
+    formData.append("client_reference_id", uploadFileBody2.client_reference_id);
+  }
+  formData.append("file", uploadFileBody2.file);
+  return import_axios9.default.post("/v1/files", formData, options);
+};
+var createTranscription2 = (createTranscriptionPayload, options) => {
+  return import_axios9.default.post("/v1/transcriptions", createTranscriptionPayload, options);
+};
+var getTranscription = (transcriptionId, options) => {
+  return import_axios9.default.get(`/v1/transcriptions/${transcriptionId}`, options);
+};
+var getTranscriptionTranscript = (transcriptionId, options) => {
+  return import_axios9.default.get(`/v1/transcriptions/${transcriptionId}/transcript`, options);
+};
+var getModels = (options) => {
+  return import_axios9.default.get("/v1/models", options);
+};
 // src/adapters/soniox-adapter.ts
 var SonioxAdapter = class extends BaseAdapter {
   constructor() {
@@ -9743,11 +9441,17 @@ var SonioxAdapter = class extends BaseAdapter {
     }
   }
   /**
-   * Get the base URL for API requests
+   * Get the base URL for API requests (no /v1 suffix — generated functions include /v1 in paths)
    */
   get baseUrl() {
     if (this.config?.baseUrl) return this.config.baseUrl;
-    return `https://${this.getRegionalHost()}/v1`;
+    return `https://${this.getRegionalHost()}`;
+  }
+  /**
+   * Build axios config with Soniox Bearer auth
+   */
+  getAxiosConfig() {
+    return super.getAxiosConfig("Authorization", (key) => `Bearer ${key}`);
   }
   initialize(config) {
     super.initialize(config);
@@ -9757,15 +9461,6 @@ var SonioxAdapter = class extends BaseAdapter {
     if (config.model) {
       this.defaultModel = config.model;
     }
-    this.client = import_axios9.default.create({
-      baseURL: this.baseUrl,
-      timeout: config.timeout || 12e4,
-      headers: {
-        Authorization: `Bearer ${config.apiKey}`,
-        "Content-Type": "application/json",
-        ...config.headers
-      }
-    });
   }
   /**
    * Get current region
@@ -9795,23 +9490,12 @@ var SonioxAdapter = class extends BaseAdapter {
    */
   setRegion(region) {
     this.region = region;
-    if (this.config?.apiKey) {
-      this.client = import_axios9.default.create({
-        baseURL: this.baseUrl,
-        timeout: this.config.timeout || 12e4,
-        headers: {
-          Authorization: `Bearer ${this.config.apiKey}`,
-          "Content-Type": "application/json",
-          ...this.config.headers
-        }
-      });
-    }
   }
   /**
    * Submit audio for transcription
    *
-   * Soniox uses async batch processing. The transcribe method submits audio
-   * and waits for completion (or use getTranscript for polling).
+   * Uses the async v1 API: createTranscription returns status `queued`,
+   * then polls until completed (or returns immediately if webhook is set).
    *
    * @param audio - Audio input (URL or file)
    * @param options - Transcription options
@@ -9820,21 +9504,44 @@ var SonioxAdapter = class extends BaseAdapter {
   async transcribe(audio, options) {
     this.validateConfig();
     try {
-      const requestBody = {
-        model: options?.model || this.defaultModel
-      };
-      if (audio.type === "url") {
-        requestBody.audio_url = audio.url;
-      } else if (audio.type === "file") {
-        const formData = new FormData();
+      const sonioxOpts = options?.soniox;
+      if (audio.type === "file") {
         const audioBlob = audio.file instanceof Blob ? audio.file : new Blob([audio.file], { type: audio.mimeType || "audio/wav" });
-        formData.append("file", audioBlob, audio.filename || "audio.wav");
-        const uploadResponse = await this.client.post("/files", formData, {
-          headers: {
-            "Content-Type": "multipart/form-data"
-          }
-        });
-        requestBody.file_id = uploadResponse.data.id;
+        const uploadBody = { file: audioBlob };
+        const fileResp = await uploadFile(uploadBody, this.getAxiosConfig());
+        const payload = {
+          ...sonioxOpts,
+          model: options?.model || this.defaultModel,
+          file_id: fileResp.data.id,
+          language_hints: options?.language ? [options.language] : sonioxOpts?.language_hints,
+          enable_speaker_diarization: options?.diarization || sonioxOpts?.enable_speaker_diarization,
+          enable_language_identification: options?.languageDetection || sonioxOpts?.enable_language_identification,
+          context: options?.customVocabulary?.length ? { terms: options.customVocabulary } : sonioxOpts?.context,
+          webhook_url: options?.webhookUrl || sonioxOpts?.webhook_url
+        };
+        const createResp = await createTranscription2(payload, this.getAxiosConfig());
+        const meta = createResp.data;
+        if (options?.webhookUrl || sonioxOpts?.webhook_url) {
+          return this.normalizeTranscription(meta);
+        }
+        return this.pollForCompletion(meta.id);
+      } else if (audio.type === "url") {
+        const payload = {
+          ...sonioxOpts,
+          model: options?.model || this.defaultModel,
+          audio_url: audio.url,
+          language_hints: options?.language ? [options.language] : sonioxOpts?.language_hints,
+          enable_speaker_diarization: options?.diarization || sonioxOpts?.enable_speaker_diarization,
+          enable_language_identification: options?.languageDetection || sonioxOpts?.enable_language_identification,
+          context: options?.customVocabulary?.length ? { terms: options.customVocabulary } : sonioxOpts?.context,
+          webhook_url: options?.webhookUrl || sonioxOpts?.webhook_url
+        };
+        const createResp = await createTranscription2(payload, this.getAxiosConfig());
+        const meta = createResp.data;
+        if (options?.webhookUrl || sonioxOpts?.webhook_url) {
+          return this.normalizeTranscription(meta);
+        }
+        return this.pollForCompletion(meta.id);
       } else {
         return {
           success: false,
@@ -9845,38 +9552,6 @@ var SonioxAdapter = class extends BaseAdapter {
           }
         };
       }
-      if (options?.language) {
-        requestBody.language_hints = [options.language];
-      }
-      if (options?.diarization) {
-        requestBody.enable_speaker_diarization = true;
-      }
-      if (options?.languageDetection) {
-        requestBody.enable_language_identification = true;
-      }
-      if (options?.customVocabulary && options.customVocabulary.length > 0) {
-        requestBody.context = {
-          terms: options.customVocabulary
-        };
-      }
-      if (options?.webhookUrl) {
-        requestBody.webhook_url = options.webhookUrl;
-      }
-      const response = await this.client.post("/transcriptions", requestBody);
-      const transcriptionId = response.data.id;
-      if (options?.webhookUrl) {
-        return {
-          success: true,
-          provider: this.name,
-          data: {
-            id: transcriptionId,
-            text: "",
-            status: "queued"
-          },
-          raw: response.data
-        };
-      }
-      return await this.pollForCompletion(transcriptionId);
     } catch (error) {
       return this.createErrorResponse(error);
     }
@@ -9884,9 +9559,8 @@ var SonioxAdapter = class extends BaseAdapter {
   /**
    * Get transcription result by ID
    *
-   * Checks job status via GET /v1/transcriptions/{id}, then fetches
-   * the full transcript via GET /v1/transcriptions/{id}/transcript
-   * when completed.
+   * Fetches transcription metadata and, if completed, the transcript text/tokens.
+   * Used by pollForCompletion() for async polling.
    *
    * @param transcriptId - Transcript ID
    * @returns Transcription response
@@ -9894,39 +9568,20 @@ var SonioxAdapter = class extends BaseAdapter {
   async getTranscript(transcriptId) {
     this.validateConfig();
     try {
-      const statusResponse = await this.client.get(`/transcriptions/${transcriptId}`);
-      const job = statusResponse.data;
-      if (job.status === "error") {
-        return {
-          success: false,
-          provider: this.name,
-          error: {
-            code: "TRANSCRIPTION_ERROR",
-            message: job.error_message || "Transcription failed"
-          }
-        };
-      }
-      if (job.status !== "completed") {
-        return {
-          success: true,
-          provider: this.name,
-          data: {
-            id: job.id,
-            text: "",
-            status: job.status
-          },
-          raw: job
-        };
+      const metaResp = await getTranscription(transcriptId, this.getAxiosConfig());
+      const meta = metaResp.data;
+      if (meta.status === TranscriptionStatus.completed) {
+        try {
+          const transcriptResp = await getTranscriptionTranscript(
+            transcriptId,
+            this.getAxiosConfig()
+          );
+          return this.normalizeTranscription(meta, transcriptResp.data);
+        } catch (transcriptError) {
+          return this.createErrorResponse(transcriptError);
+        }
       }
-      const transcriptResponse = await this.client.get(
-        `/transcriptions/${transcriptId}/transcript`
-      );
-      return this.normalizeResponse({
-        ...transcriptResponse.data,
-        // Carry over job metadata
-        id: job.id,
-        audio_duration_ms: job.audio_duration_ms
-      });
+      return this.normalizeTranscription(meta);
     } catch (error) {
       return this.createErrorResponse(error);
     }
@@ -9946,51 +9601,50 @@ var SonioxAdapter = class extends BaseAdapter {
     const sessionId = `soniox_${Date.now()}_${Math.random().toString(36).substring(7)}`;
     const createdAt = /* @__PURE__ */ new Date();
     const wsBase = this.config?.wsBaseUrl || (this.config?.baseUrl ? this.deriveWsUrl(this.config.baseUrl) : `wss://${this.getRegionalWsHost()}`);
-    const wsUrl = `${wsBase}/transcribe-websocket`;
-    const modelId = options?.sonioxStreaming?.model || options?.model || "stt-rt-v4";
-    const sonioxOpts = options?.sonioxStreaming;
-    const initMessage = {
-      api_key: this.config.apiKey,
-      model: modelId
-    };
-    if (sonioxOpts?.audioFormat) {
-      initMessage.audio_format = sonioxOpts.audioFormat;
-    } else if (options?.encoding) {
+    const wsUrl = new URL(`${wsBase}/transcribe-websocket`);
+    wsUrl.searchParams.set("api_key", this.config.apiKey);
+    const modelId = options?.sonioxStreaming?.model || options?.model || "stt-rt-preview";
+    wsUrl.searchParams.set("model", modelId);
+    if (options?.encoding) {
       const encodingMap = {
         linear16: "pcm_s16le",
         pcm: "pcm_s16le",
         mulaw: "mulaw",
         alaw: "alaw"
       };
-      initMessage.audio_format = encodingMap[options.encoding] || options.encoding;
+      wsUrl.searchParams.set("audio_format", encodingMap[options.encoding] || options.encoding);
     }
-    if (sonioxOpts?.sampleRate || options?.sampleRate) {
-      initMessage.sample_rate = sonioxOpts?.sampleRate || options?.sampleRate;
+    if (options?.sampleRate) {
+      wsUrl.searchParams.set("sample_rate", options.sampleRate.toString());
     }
-    if (sonioxOpts?.numChannels || options?.channels) {
-      initMessage.num_channels = sonioxOpts?.numChannels || options?.channels;
+    if (options?.channels) {
+      wsUrl.searchParams.set("num_channels", options.channels.toString());
     }
+    const sonioxOpts = options?.sonioxStreaming;
     if (sonioxOpts) {
       if (sonioxOpts.languageHints && sonioxOpts.languageHints.length > 0) {
-        initMessage.language_hints = sonioxOpts.languageHints;
+        wsUrl.searchParams.set("language_hints", JSON.stringify(sonioxOpts.languageHints));
       }
       if (sonioxOpts.enableLanguageIdentification) {
-        initMessage.enable_language_identification = true;
+        wsUrl.searchParams.set("enable_language_identification", "true");
       }
       if (sonioxOpts.enableEndpointDetection) {
-        initMessage.enable_endpoint_detection = true;
+        wsUrl.searchParams.set("enable_endpoint_detection", "true");
       }
       if (sonioxOpts.enableSpeakerDiarization) {
-        initMessage.enable_speaker_diarization = true;
+        wsUrl.searchParams.set("enable_speaker_diarization", "true");
       }
       if (sonioxOpts.context) {
-        initMessage.context = typeof sonioxOpts.context === "string" ? sonioxOpts.context : sonioxOpts.context;
+        wsUrl.searchParams.set(
+          "context",
+          typeof sonioxOpts.context === "string" ? sonioxOpts.context : JSON.stringify(sonioxOpts.context)
+        );
       }
       if (sonioxOpts.translation) {
-        initMessage.translation = sonioxOpts.translation;
+        wsUrl.searchParams.set("translation", JSON.stringify(sonioxOpts.translation));
       }
       if (sonioxOpts.clientReferenceId) {
-        initMessage.client_reference_id = sonioxOpts.clientReferenceId;
+        wsUrl.searchParams.set("client_reference_id", sonioxOpts.clientReferenceId);
       }
     }
     if (!sonioxOpts?.languageHints && options?.language) {
@@ -9999,33 +9653,24 @@ var SonioxAdapter = class extends BaseAdapter {
           `[Soniox] Warning: language="multi" is Deepgram-specific and not supported by Soniox. For automatic language detection, use languageDetection: true instead, or specify a language code like 'en'.`
         );
       }
-      initMessage.language_hints = [options.language];
+      wsUrl.searchParams.set("language_hints", JSON.stringify([options.language]));
     }
     if (!sonioxOpts?.enableSpeakerDiarization && options?.diarization) {
-      initMessage.enable_speaker_diarization = true;
+      wsUrl.searchParams.set("enable_speaker_diarization", "true");
     }
     if (!sonioxOpts?.enableLanguageIdentification && options?.languageDetection) {
-      initMessage.enable_language_identification = true;
+      wsUrl.searchParams.set("enable_language_identification", "true");
+    }
+    if (options?.interimResults !== false) {
     }
     let status = "connecting";
     let openedAt = null;
     let receivedData = false;
     const WebSocketImpl = typeof WebSocket !== "undefined" ? WebSocket : require("ws");
-    const ws = new WebSocketImpl(wsUrl);
+    const ws = new WebSocketImpl(wsUrl.toString());
     ws.onopen = () => {
-      openedAt = Date.now();
-      const initPayload = JSON.stringify(initMessage);
-      if (callbacks?.onRawMessage) {
-        callbacks.onRawMessage({
-          provider: this.name,
-          direction: "outgoing",
-          timestamp: Date.now(),
-          payload: initPayload,
-          messageType: "init"
-        });
-      }
-      ws.send(initPayload);
       status = "open";
+      openedAt = Date.now();
       callbacks?.onOpen?.();
     };
     ws.onmessage = (event) => {
@@ -10034,7 +9679,8 @@ var SonioxAdapter = class extends BaseAdapter {
       let messageType;
       try {
         const data = JSON.parse(rawPayload);
-        if (data.error) {
+        const errorMessage = data.error_message || data.error;
+        if (errorMessage) {
           messageType = "error";
         } else if (data.finished) {
           messageType = "finished";
@@ -10050,10 +9696,10 @@ var SonioxAdapter = class extends BaseAdapter {
             messageType
           });
         }
-        if (data.error) {
+        if (errorMessage) {
           callbacks?.onError?.({
             code: data.error_code?.toString() || "STREAM_ERROR",
-            message: data.error
+            message: errorMessage
           });
           return;
         }
@@ -10067,7 +9713,7 @@ var SonioxAdapter = class extends BaseAdapter {
             start: token.start_ms ? token.start_ms / 1e3 : 0,
             end: token.end_ms ? token.end_ms / 1e3 : 0,
             confidence: token.confidence,
-            speaker: token.speaker
+            speaker: token.speaker ?? void 0
           }));
           const text = data.text || data.tokens.map((t) => t.text).join("");
           const isFinal = data.tokens.every((t) => t.is_final);
@@ -10076,8 +9722,8 @@ var SonioxAdapter = class extends BaseAdapter {
             text,
             isFinal,
             words,
-            speaker: data.tokens[0]?.speaker,
-            language: data.tokens[0]?.language,
+            speaker: data.tokens[0]?.speaker ?? void 0,
+            language: data.tokens[0]?.language ?? void 0,
             confidence: data.tokens[0]?.confidence
           };
           callbacks?.onTranscript?.(event2);
@@ -10104,10 +9750,10 @@ var SonioxAdapter = class extends BaseAdapter {
     ws.onclose = (event) => {
       status = "closed";
       const timeSinceOpen = openedAt ? Date.now() - openedAt : null;
-      const isEarlyClose = timeSinceOpen !== null && timeSinceOpen < 5e3 && !receivedData;
-      if (isEarlyClose && event.code === 1e3) {
+      const isImmediateClose = timeSinceOpen !== null && timeSinceOpen < 1e3 && !receivedData;
+      if (isImmediateClose && event.code === 1e3) {
         const errorMessage = [
-          "Soniox closed connection shortly after opening.",
+          "Soniox closed connection immediately after opening.",
           `Current config: region=${this.region}, model=${modelId}`,
           "Likely causes:",
           "  - Invalid API key or region mismatch (keys are region-specific, current: " + this.region + ")",
@@ -10193,7 +9839,7 @@ var SonioxAdapter = class extends BaseAdapter {
   async getModels() {
     this.validateConfig();
     try {
-      const response = await this.client.get("/models");
+      const response = await getModels(this.getAxiosConfig());
       return response.data.models || [];
     } catch (error) {
       console.error("Failed to fetch Soniox models:", error);
@@ -10225,11 +9871,44 @@ var SonioxAdapter = class extends BaseAdapter {
     return buildUtterancesFromWords(words);
   }
   /**
-   * Normalize Soniox response to unified format
+   * Normalize v1 API response to unified format
+   *
+   * @param meta - Transcription metadata from getTranscription/createTranscription
+   * @param transcript - Transcript data (text/tokens), only present when status is completed
    */
-  normalizeResponse(response) {
-    const { text, tokens } = response;
-    const words = tokens.map((token) => ({
+  normalizeTranscription(meta, transcript) {
+    if (meta.status === TranscriptionStatus.error) {
+      return {
+        success: false,
+        provider: this.name,
+        data: {
+          id: meta.id,
+          text: "",
+          status: "error"
+        },
+        error: {
+          code: meta.error_type || "TRANSCRIPTION_ERROR",
+          message: meta.error_message || "Transcription failed"
+        },
+        raw: { meta, transcript }
+      };
+    }
+    if (!transcript) {
+      return {
+        success: true,
+        provider: this.name,
+        data: {
+          id: meta.id,
+          text: "",
+          status: meta.status,
+          duration: meta.audio_duration_ms ? meta.audio_duration_ms / 1e3 : void 0
+        },
+        raw: { meta }
+      };
+    }
+    const tokens = transcript.tokens || [];
+    const text = transcript.text || tokens.map((t) => t.text).join("");
+    const words = tokens.filter((t) => t.start_ms !== void 0 && t.end_ms !== void 0).map((token) => ({
       word: token.text,
       start: token.start_ms / 1e3,
       end: token.end_ms / 1e3,
@@ -10237,33 +9916,32 @@ var SonioxAdapter = class extends BaseAdapter {
       speaker: token.speaker ?? void 0
     }));
     const speakerSet = /* @__PURE__ */ new Set();
-    for (const token of tokens) {
-      if (token.speaker) speakerSet.add(token.speaker);
-    }
+    tokens.forEach((t) => {
+      if (t.speaker) speakerSet.add(String(t.speaker));
+    });
     const speakers = speakerSet.size > 0 ? Array.from(speakerSet).map((id) => ({
       id,
       label: `Speaker ${id}`
     })) : void 0;
-    const utterances = tokens.length > 0 ? this.buildUtterancesFromTokens(tokens) : [];
+    const utterances = this.buildUtterancesFromTokens(tokens);
     const language = tokens.find((t) => t.language)?.language ?? void 0;
     return {
       success: true,
       provider: this.name,
       data: {
-        id: response.id || `soniox_${Date.now()}`,
+        id: meta.id,
         text,
         status: TranscriptionStatus.completed,
         language,
-        duration: response.audio_duration_ms ? response.audio_duration_ms / 1e3 : response.total_audio_proc_ms ? response.total_audio_proc_ms / 1e3 : void 0,
+        duration: meta.audio_duration_ms ? meta.audio_duration_ms / 1e3 : void 0,
         speakers,
         words: words.length > 0 ? words : void 0,
         utterances: utterances.length > 0 ? utterances : void 0
       },
       tracking: {
-        requestId: response.id,
-        processingTimeMs: response.total_audio_proc_ms
+        requestId: meta.id
       },
-      raw: response
+      raw: { meta, transcript }
     };
   }
 };
@@ -10419,29 +10097,11 @@ var ElevenLabsAdapter = class extends BaseAdapter {
           }
         }
       }
-      if (options?.webhookUrl) {
-        if (!formData.has("webhook")) {
-          formData.append("webhook", "true");
-        }
-      }
       const response = await this.client.post("/v1/speech-to-text", formData, {
         headers: {
           "Content-Type": "multipart/form-data"
         }
       });
-      if (options?.webhookUrl) {
-        const transcriptionId = response.data.transcription_id || response.data.id || `elevenlabs_${Date.now()}`;
-        return {
-          success: true,
-          provider: this.name,
-          data: {
-            id: transcriptionId,
-            text: "",
-            status: "queued"
-          },
-          raw: response.data
-        };
-      }
       return this.normalizeResponse(response.data);
     } catch (error) {
       return this.createErrorResponse(error);
@@ -10755,7 +10415,7 @@ var ElevenLabsAdapter = class extends BaseAdapter {
         }
       }
     }
-    const transcriptionId = ("transcription_id" in response ? response.transcription_id : response.transcription_id) || chunks[0]?.transcription_id || `elevenlabs_${Date.now()}`;
+    const transcriptionId = response.transcription_id || chunks[0]?.transcription_id || `elevenlabs_${Date.now()}`;
     return {
       success: true,
       provider: this.name,
@@ -36673,12 +36333,10 @@ var createTemporaryApiKeyBody = import_zod10.z.object({
 var streaming_types_zod_exports = {};
 __export(streaming_types_zod_exports, {
   sonioxAudioFormatSchema: () => sonioxAudioFormatSchema,
-  sonioxAutoDetectedAudioFormatSchema: () => sonioxAutoDetectedAudioFormatSchema,
   sonioxContextGeneralItemSchema: () => sonioxContextGeneralItemSchema,
   sonioxContextSchema: () => sonioxContextSchema,
   sonioxErrorStatusSchema: () => sonioxErrorStatusSchema,
   sonioxOneWayTranslationSchema: () => sonioxOneWayTranslationSchema,
-  sonioxPcmAudioEncodingSchema: () => sonioxPcmAudioEncodingSchema,
   sonioxRealtimeModelSchema: () => sonioxRealtimeModelSchema,
   sonioxRecorderStateSchema: () => sonioxRecorderStateSchema,
   sonioxStreamingResponseSchema: () => sonioxStreamingResponseSchema,
@@ -36692,7 +36350,7 @@ __export(streaming_types_zod_exports, {
   streamingUpdateConfigParams: () => streamingUpdateConfigParams3
 });
 var import_zod11 = require("zod");
-var sonioxAutoDetectedAudioFormatSchema = import_zod11.z.enum([
+var sonioxAudioFormatSchema = import_zod11.z.enum([
   "auto",
   "aac",
   "aiff",
@@ -36702,10 +36360,7 @@ var sonioxAutoDetectedAudioFormatSchema = import_zod11.z.enum([
   "mp3",
   "ogg",
   "wav",
-  "webm"
-]);
-var sonioxPcmAudioEncodingSchema = import_zod11.z.enum([
-  // Signed PCM
+  "webm",
   "pcm_s8",
   "pcm_s16le",
   "pcm_s16be",
@@ -36713,7 +36368,6 @@ var sonioxPcmAudioEncodingSchema = import_zod11.z.enum([
   "pcm_s24be",
   "pcm_s32le",
   "pcm_s32be",
-  // Unsigned PCM
   "pcm_u8",
   "pcm_u16le",
   "pcm_u16be",
@@ -36721,86 +36375,81 @@ var sonioxPcmAudioEncodingSchema = import_zod11.z.enum([
   "pcm_u24be",
   "pcm_u32le",
   "pcm_u32be",
-  // Float PCM
   "pcm_f32le",
   "pcm_f32be",
   "pcm_f64le",
   "pcm_f64be",
-  // Companded
   "mulaw",
   "alaw"
 ]);
-var sonioxAudioFormatSchema = import_zod11.z.union([
-  sonioxAutoDetectedAudioFormatSchema,
-  sonioxPcmAudioEncodingSchema
-]);
 var sonioxOneWayTranslationSchema = import_zod11.z.object({
   type: import_zod11.z.literal("one_way"),
-  target_language: import_zod11.z.string().describe("Target language code for translation")
+  target_language: import_zod11.z.string()
 });
 var sonioxTwoWayTranslationSchema = import_zod11.z.object({
   type: import_zod11.z.literal("two_way"),
-  language_a: import_zod11.z.string().describe("First language for bidirectional translation"),
-  language_b: import_zod11.z.string().describe("Second language for bidirectional translation")
+  language_a: import_zod11.z.string(),
+  language_b: import_zod11.z.string()
 });
 var sonioxTranslationConfigSchema = import_zod11.z.union([
   sonioxOneWayTranslationSchema,
   sonioxTwoWayTranslationSchema
 ]);
 var sonioxContextGeneralItemSchema = import_zod11.z.object({
-  key: import_zod11.z.string().describe("Context item key (e.g. 'Domain')"),
-  value: import_zod11.z.string().describe("Context item value (e.g. 'medicine')")
+  key: import_zod11.z.string(),
+  value: import_zod11.z.string()
 });
 var sonioxTranslationTermSchema = import_zod11.z.object({
-  source: import_zod11.z.string().describe("Source term"),
-  target: import_zod11.z.string().describe("Target term to translate to")
+  source: import_zod11.z.string(),
+  target: import_zod11.z.string()
 });
 var sonioxStructuredContextSchema = import_zod11.z.object({
-  general: import_zod11.z.array(sonioxContextGeneralItemSchema).optional().describe("General context items (key-value pairs)"),
-  text: import_zod11.z.string().optional().describe("Text context"),
-  terms: import_zod11.z.array(import_zod11.z.string()).optional().describe("Terms that might occur in speech"),
-  translation_terms: import_zod11.z.array(sonioxTranslationTermSchema).optional().describe("Hints how to translate specific terms (ignored if translation is not enabled)")
+  general: import_zod11.z.array(sonioxContextGeneralItemSchema).optional(),
+  text: import_zod11.z.string().optional(),
+  terms: import_zod11.z.array(import_zod11.z.string()).optional(),
+  translation_terms: import_zod11.z.array(sonioxTranslationTermSchema).optional()
 });
 var sonioxContextSchema = import_zod11.z.union([sonioxStructuredContextSchema, import_zod11.z.string()]);
 var sonioxRealtimeModelSchema = import_zod11.z.enum([
+  "stt-rt-v4",
   "stt-rt-v3",
   "stt-rt-preview",
   "stt-rt-v3-preview",
   "stt-rt-preview-v2"
 ]);
 var streamingTranscriberParams3 = import_zod11.z.object({
-  model: sonioxRealtimeModelSchema.describe("Real-time model to use"),
-  audioFormat: sonioxAudioFormatSchema.optional().describe("Audio format specification. Use 'auto' for automatic detection"),
-  sampleRate: import_zod11.z.number().optional().describe("Sample rate in Hz (required for raw PCM formats)"),
-  numChannels: import_zod11.z.number().min(1).max(2).optional().describe("Number of audio channels (1 for mono, 2 for stereo) - required for raw PCM formats"),
-  languageHints: import_zod11.z.array(import_zod11.z.string()).optional().describe("Expected languages in the audio (ISO language codes)"),
-  context: sonioxContextSchema.optional().describe("Additional context to improve transcription accuracy"),
-  enableSpeakerDiarization: import_zod11.z.boolean().optional().describe("Enable speaker diarization - each token will include a speaker field"),
-  enableLanguageIdentification: import_zod11.z.boolean().optional().describe("Enable language identification - each token will include a language field"),
-  enableEndpointDetection: import_zod11.z.boolean().optional().describe("Enable endpoint detection to detect when a speaker has finished talking"),
-  translation: sonioxTranslationConfigSchema.optional().describe("Translation configuration"),
-  clientReferenceId: import_zod11.z.string().optional().describe("Optional tracking identifier (client-defined)")
-});
-var sonioxTranslationStatusSchema = import_zod11.z.enum(["none", "original", "translation"]);
+  model: sonioxRealtimeModelSchema,
+  audioFormat: sonioxAudioFormatSchema.optional(),
+  sampleRate: import_zod11.z.number().optional(),
+  numChannels: import_zod11.z.number().optional(),
+  languageHints: import_zod11.z.array(import_zod11.z.string()).optional(),
+  context: sonioxContextSchema.optional(),
+  enableSpeakerDiarization: import_zod11.z.boolean().optional(),
+  enableLanguageIdentification: import_zod11.z.boolean().optional(),
+  enableEndpointDetection: import_zod11.z.boolean().optional(),
+  translation: sonioxTranslationConfigSchema.optional(),
+  clientReferenceId: import_zod11.z.string().optional()
+});
+var sonioxTranslationStatusSchema = import_zod11.z.enum(["original", "translation", "none"]);
 var sonioxTokenSchema = import_zod11.z.object({
-  text: import_zod11.z.string().describe("Token text content (subword, word, or space)"),
-  start_ms: import_zod11.z.number().optional().describe("Start time of the token in milliseconds"),
-  end_ms: import_zod11.z.number().optional().describe("End time of the token in milliseconds"),
-  confidence: import_zod11.z.number().min(0).max(1).optional().describe("Confidence score between 0.0 and 1.0"),
-  is_final: import_zod11.z.boolean().describe("Whether this token is final (confirmed) or provisional"),
-  speaker: import_zod11.z.string().optional().describe("Speaker identifier (only present when speaker diarization is enabled)"),
-  language: import_zod11.z.string().optional().describe("Detected language code (only present when language identification is enabled)"),
-  source_language: import_zod11.z.string().optional().describe("Original language code for translated tokens"),
-  translation_status: sonioxTranslationStatusSchema.optional().describe("Translation status: 'none', 'original', or 'translation'")
+  text: import_zod11.z.string(),
+  start_ms: import_zod11.z.number().optional(),
+  end_ms: import_zod11.z.number().optional(),
+  confidence: import_zod11.z.number(),
+  is_final: import_zod11.z.boolean(),
+  speaker: import_zod11.z.string().optional(),
+  translation_status: sonioxTranslationStatusSchema.optional(),
+  language: import_zod11.z.string().optional(),
+  source_language: import_zod11.z.string().optional()
 });
 var sonioxStreamingResponseSchema = import_zod11.z.object({
-  text: import_zod11.z.string().optional().describe("Complete transcribed text"),
-  tokens: import_zod11.z.array(sonioxTokenSchema).describe("List of recognized tokens"),
-  final_audio_proc_ms: import_zod11.z.number().optional().describe("Milliseconds of audio processed into final tokens"),
-  total_audio_proc_ms: import_zod11.z.number().optional().describe("Milliseconds of audio processed (final + non-final)"),
-  finished: import_zod11.z.boolean().optional().describe("Whether the transcription is complete"),
-  error: import_zod11.z.string().optional().describe("Error message if an error occurred"),
-  error_code: import_zod11.z.number().optional().describe("Error code if an error occurred")
+  text: import_zod11.z.string(),
+  tokens: import_zod11.z.array(sonioxTokenSchema),
+  final_audio_proc_ms: import_zod11.z.number(),
+  total_audio_proc_ms: import_zod11.z.number(),
+  finished: import_zod11.z.boolean().optional(),
+  error_code: import_zod11.z.number().optional(),
+  error_message: import_zod11.z.string().optional()
 });
 var sonioxRecorderStateSchema = import_zod11.z.enum([
   "Init",
@@ -37366,8 +37015,8 @@ var BatchOnlyProviders = AllProviders.filter(
 );
 // src/generated/deepgram/schema/index.ts
-var schema_exports4 = {};
-__export(schema_exports4, {
+var schema_exports5 = {};
+__export(schema_exports5, {
   V1ListenPostParametersCallbackMethod: () => V1ListenPostParametersCallbackMethod,
   V1ListenPostParametersCustomIntentMode: () => V1ListenPostParametersCustomIntentMode,
   V1ListenPostParametersCustomTopicMode: () => V1ListenPostParametersCustomTopicMode,
@@ -37622,8 +37271,8 @@ var V1SpeakPostParametersSampleRate = {
 };
 // src/generated/openai/schema/index.ts
-var schema_exports5 = {};
-__export(schema_exports5, {
+var schema_exports6 = {};
+__export(schema_exports6, {
   AudioResponseFormat: () => AudioResponseFormat,
   CreateSpeechRequestResponseFormat: () => CreateSpeechRequestResponseFormat,
   CreateSpeechRequestStreamFormat: () => CreateSpeechRequestStreamFormat,
@@ -37963,8 +37612,8 @@ var VoiceResourceObject = {
 };
 // src/generated/speechmatics/schema/index.ts
-var schema_exports6 = {};
-__export(schema_exports6, {
+var schema_exports7 = {};
+__export(schema_exports7, {
   AutoChaptersResultErrorType: () => AutoChaptersResultErrorType,
   ErrorResponseError: () => ErrorResponseError,
   GetJobsJobidAlignmentTags: () => GetJobsJobidAlignmentTags,
@@ -38153,32 +37802,6 @@ var WrittenFormRecognitionResultType = {
   word: "word"
 };
-// src/generated/soniox/schema/index.ts
-var schema_exports7 = {};
-__export(schema_exports7, {
-  TemporaryApiKeyUsageType: () => TemporaryApiKeyUsageType,
-  TranscriptionMode: () => TranscriptionMode,
-  TranscriptionStatus: () => TranscriptionStatus,
-  TranslationConfigType: () => TranslationConfigType
-});
-// src/generated/soniox/schema/temporaryApiKeyUsageType.ts
-var TemporaryApiKeyUsageType = {
-  transcribe_websocket: "transcribe_websocket"
-};
-// src/generated/soniox/schema/transcriptionMode.ts
-var TranscriptionMode = {
-  real_time: "real_time",
-  async: "async"
-};
-// src/generated/soniox/schema/translationConfigType.ts
-var TranslationConfigType = {
-  one_way: "one_way",
-  two_way: "two_way"
-};
 // src/generated/elevenlabs/schema/index.ts
 var schema_exports8 = {};
 __export(schema_exports8, {