npm - voice-router-dev - Versions diffs - 0.9.3 → 0.9.5 - Mend

voice-router-dev 0.9.3 → 0.9.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

package/CHANGELOG.md +12 -0
package/dist/constants.d.mts +11 -92
package/dist/constants.d.ts +11 -92
package/dist/constants.js +11 -88
package/dist/constants.mjs +11 -88
package/dist/{field-configs-FbtCPxzs.d.mts → field-configs-BVOZQiG3.d.mts} +4855 -3773
package/dist/{field-configs-FbtCPxzs.d.ts → field-configs-BVOZQiG3.d.ts} +4855 -3773
package/dist/field-configs.d.mts +1 -1
package/dist/field-configs.d.ts +1 -1
package/dist/field-configs.js +583 -150
package/dist/field-configs.mjs +583 -150
package/dist/index.d.mts +1211 -162
package/dist/index.d.ts +1211 -162
package/dist/index.js +924 -275
package/dist/index.mjs +927 -275
package/dist/{provider-metadata-D1d-9cng.d.ts → provider-metadata-CiSA4fWP.d.ts} +2 -2
package/dist/{provider-metadata-BJ29OPW1.d.mts → provider-metadata-oxzd1q6t.d.mts} +2 -2
package/dist/provider-metadata.d.mts +1 -1
package/dist/provider-metadata.d.ts +1 -1
package/dist/provider-metadata.js +3 -66
package/dist/provider-metadata.mjs +3 -66
package/dist/{speechToTextChunkResponseModel-BY2lGyZ3.d.ts → speechToTextChunkResponseModel-Dns0Ma9x.d.ts} +364 -39
package/dist/{speechToTextChunkResponseModel-KayxDiZ7.d.mts → speechToTextChunkResponseModel-_ZvHTD4e.d.mts} +364 -39
package/dist/webhooks.d.mts +3 -2
package/dist/webhooks.d.ts +3 -2
package/package.json +8 -3

package/dist/index.js CHANGED Viewed

@@ -145,6 +145,7 @@ __export(src_exports, {
   SonioxModels: () => SonioxModels,
   SonioxRealtimeModel: () => SonioxRealtimeModel,
   SonioxRegion: () => SonioxRegion,
+  SonioxSDK: () => sdk_types_exports,
   SonioxStreamingSchema: () => SonioxStreamingSchema,
   SonioxStreamingTypes: () => streaming_types_zod_exports,
   SonioxStreamingUpdateSchema: () => SonioxStreamingUpdateSchema,
@@ -1333,7 +1334,6 @@ var AzureLocales = [
   { code: "ar-YE", name: "Arabic (Yemen)" },
   { code: "as-IN", name: "Assamese (India)" },
   { code: "az-AZ", name: "Azerbaijani (Azerbaijan)" },
-  { code: "be-BY", name: "Belarusian (Belarus)" },
   { code: "bg-BG", name: "Bulgarian (Bulgaria)" },
   { code: "bn-BD", name: "Bengali (Bangladesh)" },
   { code: "bn-IN", name: "Bengali (India)" },
@@ -1414,7 +1414,6 @@ var AzureLocales = [
   { code: "lo-LA", name: "Lao (Latin)" },
   { code: "lt-LT", name: "Lithuanian (Lithuania)" },
   { code: "lv-LV", name: "Latvian (Latvia)" },
-  { code: "mi-NZ", name: "Maori (New Zealand)" },
   { code: "mk-MK", name: "Macedonian (North Macedonia)" },
   { code: "ml-IN", name: "Malayalam (India)" },
   { code: "mn-MN", name: "Mongolian (Mongolia)" },
@@ -1490,7 +1489,6 @@ var AzureLocaleCodes = [
   "ar-YE",
   "as-IN",
   "az-AZ",
-  "be-BY",
   "bg-BG",
   "bn-BD",
   "bn-IN",
@@ -1571,7 +1569,6 @@ var AzureLocaleCodes = [
   "lo-LA",
   "lt-LT",
   "lv-LV",
-  "mi-NZ",
   "mk-MK",
   "ml-IN",
   "mn-MN",
@@ -1647,7 +1644,6 @@ var AzureLocaleLabels = {
   "ar-YE": "Arabic (Yemen)",
   "as-IN": "Assamese (India)",
   "az-AZ": "Azerbaijani (Azerbaijan)",
-  "be-BY": "Belarusian (Belarus)",
   "bg-BG": "Bulgarian (Bulgaria)",
   "bn-BD": "Bengali (Bangladesh)",
   "bn-IN": "Bengali (India)",
@@ -1728,7 +1724,6 @@ var AzureLocaleLabels = {
   "lo-LA": "Lao (Latin)",
   "lt-LT": "Lithuanian (Lithuania)",
   "lv-LV": "Latvian (Latvia)",
-  "mi-NZ": "Maori (New Zealand)",
   "mk-MK": "Macedonian (North Macedonia)",
   "ml-IN": "Malayalam (India)",
   "mn-MN": "Mongolian (Mongolia)",
@@ -1804,7 +1799,6 @@ var AzureLocale = {
   "ar-YE": "ar-YE",
   "as-IN": "as-IN",
   "az-AZ": "az-AZ",
-  "be-BY": "be-BY",
   "bg-BG": "bg-BG",
   "bn-BD": "bn-BD",
   "bn-IN": "bn-IN",
@@ -1885,7 +1879,6 @@ var AzureLocale = {
   "lo-LA": "lo-LA",
   "lt-LT": "lt-LT",
   "lv-LV": "lv-LV",
-  "mi-NZ": "mi-NZ",
   "mk-MK": "mk-MK",
   "ml-IN": "ml-IN",
   "mn-MN": "mn-MN",
@@ -1976,8 +1969,6 @@ var ElevenLabsLanguages = [
   { code: "hr", name: "Croatian" },
   { code: "bg", name: "Bulgarian" },
   { code: "lt", name: "Lithuanian" },
-  { code: "la", name: "Latin" },
-  { code: "mi", name: "Maori" },
   { code: "ml", name: "Malayalam" },
   { code: "cy", name: "Welsh" },
   { code: "sk", name: "Slovak" },
@@ -1991,20 +1982,16 @@ var ElevenLabsLanguages = [
   { code: "kn", name: "Kannada" },
   { code: "et", name: "Estonian" },
   { code: "mk", name: "Macedonian" },
-  { code: "br", name: "Breton" },
-  { code: "eu", name: "Basque" },
   { code: "is", name: "Icelandic" },
   { code: "hy", name: "Armenian" },
   { code: "ne", name: "Nepali" },
   { code: "mn", name: "Mongolian" },
   { code: "bs", name: "Bosnian" },
   { code: "kk", name: "Kazakh" },
-  { code: "sq", name: "Albanian" },
   { code: "sw", name: "Swahili" },
   { code: "gl", name: "Galician" },
   { code: "mr", name: "Marathi" },
   { code: "pa", name: "Punjabi" },
-  { code: "si", name: "Sinhala" },
   { code: "km", name: "Khmer" },
   { code: "sn", name: "Shona" },
   { code: "yo", name: "Yoruba" },
@@ -2017,29 +2004,16 @@ var ElevenLabsLanguages = [
   { code: "sd", name: "Sindhi" },
   { code: "gu", name: "Gujarati" },
   { code: "am", name: "Amharic" },
-  { code: "yi", name: "Yiddish" },
   { code: "lo", name: "Lao" },
   { code: "uz", name: "Uzbek" },
-  { code: "fo", name: "Faroese" },
-  { code: "ht", name: "Haitian Creole" },
   { code: "ps", name: "Pashto" },
-  { code: "tk", name: "Turkmen" },
-  { code: "nn", name: "Norwegian Nynorsk" },
   { code: "mt", name: "Maltese" },
-  { code: "sa", name: "Sanskrit" },
   { code: "lb", name: "Luxembourgish" },
   { code: "my", name: "Burmese" },
-  { code: "bo", name: "Tibetan" },
-  { code: "tl", name: "Tagalog" },
-  { code: "mg", name: "Malagasy" },
   { code: "as", name: "Assamese" },
-  { code: "tt", name: "Tatar" },
-  { code: "haw", name: "Hawaiian" },
   { code: "ln", name: "Lingala" },
   { code: "ha", name: "Hausa" },
-  { code: "ba", name: "Bashkir" },
-  { code: "jw", name: "Javanese" },
-  { code: "su", name: "Sundanese" }
+  { code: "jw", name: "Javanese" }
 ];
 var ElevenLabsLanguageCodes = [
   "en",
@@ -2077,8 +2051,6 @@ var ElevenLabsLanguageCodes = [
   "hr",
   "bg",
   "lt",
-  "la",
-  "mi",
   "ml",
   "cy",
   "sk",
@@ -2092,20 +2064,16 @@ var ElevenLabsLanguageCodes = [
   "kn",
   "et",
   "mk",
-  "br",
-  "eu",
   "is",
   "hy",
   "ne",
   "mn",
   "bs",
   "kk",
-  "sq",
   "sw",
   "gl",
   "mr",
   "pa",
-  "si",
   "km",
   "sn",
   "yo",
@@ -2118,29 +2086,16 @@ var ElevenLabsLanguageCodes = [
   "sd",
   "gu",
   "am",
-  "yi",
   "lo",
   "uz",
-  "fo",
-  "ht",
   "ps",
-  "tk",
-  "nn",
   "mt",
-  "sa",
   "lb",
   "my",
-  "bo",
-  "tl",
-  "mg",
   "as",
-  "tt",
-  "haw",
   "ln",
   "ha",
-  "ba",
-  "jw",
-  "su"
+  "jw"
 ];
 var ElevenLabsLanguageLabels = {
   en: "English",
@@ -2178,8 +2133,6 @@ var ElevenLabsLanguageLabels = {
   hr: "Croatian",
   bg: "Bulgarian",
   lt: "Lithuanian",
-  la: "Latin",
-  mi: "Maori",
   ml: "Malayalam",
   cy: "Welsh",
   sk: "Slovak",
@@ -2193,20 +2146,16 @@ var ElevenLabsLanguageLabels = {
   kn: "Kannada",
   et: "Estonian",
   mk: "Macedonian",
-  br: "Breton",
-  eu: "Basque",
   is: "Icelandic",
   hy: "Armenian",
   ne: "Nepali",
   mn: "Mongolian",
   bs: "Bosnian",
   kk: "Kazakh",
-  sq: "Albanian",
   sw: "Swahili",
   gl: "Galician",
   mr: "Marathi",
   pa: "Punjabi",
-  si: "Sinhala",
   km: "Khmer",
   sn: "Shona",
   yo: "Yoruba",
@@ -2219,29 +2168,16 @@ var ElevenLabsLanguageLabels = {
   sd: "Sindhi",
   gu: "Gujarati",
   am: "Amharic",
-  yi: "Yiddish",
   lo: "Lao",
   uz: "Uzbek",
-  fo: "Faroese",
-  ht: "Haitian Creole",
   ps: "Pashto",
-  tk: "Turkmen",
-  nn: "Norwegian Nynorsk",
   mt: "Maltese",
-  sa: "Sanskrit",
   lb: "Luxembourgish",
   my: "Burmese",
-  bo: "Tibetan",
-  tl: "Tagalog",
-  mg: "Malagasy",
   as: "Assamese",
-  tt: "Tatar",
-  haw: "Hawaiian",
   ln: "Lingala",
   ha: "Hausa",
-  ba: "Bashkir",
-  jw: "Javanese",
-  su: "Sundanese"
+  jw: "Javanese"
 };
 // src/generated/gladia/schema/streamingSupportedBitDepthEnum.ts
@@ -2746,6 +2682,7 @@ var OpenAITranscriptionModel = {
   "gpt-4o-mini-transcribe-2025-12-15": "gpt-4o-mini-transcribe-2025-12-15",
   "gpt-4o-transcribe": "gpt-4o-transcribe",
   "gpt-4o-transcribe-diarize": "gpt-4o-transcribe-diarize",
+  "gpt-realtime-whisper": "gpt-realtime-whisper",
   "whisper-1": "whisper-1"
 };
 var OpenAIRealtimeModel = {
@@ -2761,6 +2698,7 @@ var OpenAIRealtimeModel = {
   "gpt-audio-mini-2025-12-15": "gpt-audio-mini-2025-12-15",
   "gpt-realtime": "gpt-realtime",
   "gpt-realtime-1.5": "gpt-realtime-1.5",
+  "gpt-realtime-2": "gpt-realtime-2",
   "gpt-realtime-2025-08-28": "gpt-realtime-2025-08-28",
   "gpt-realtime-mini": "gpt-realtime-mini",
   "gpt-realtime-mini-2025-10-06": "gpt-realtime-mini-2025-10-06",
@@ -4360,6 +4298,12 @@ var GladiaAdapter = class extends BaseAdapter {
     };
     this.baseUrl = "https://api.gladia.io";
   }
+  initialize(config) {
+    super.initialize(config);
+    if (config.region) {
+      this.streamingRegion = config.region;
+    }
+  }
   /**
    * Get axios config for generated API client functions
    * Configures headers and base URL using Gladia's x-gladia-key header
@@ -5021,9 +4965,10 @@ var GladiaAdapter = class extends BaseAdapter {
   async transcribeStream(options, callbacks) {
     this.validateConfig();
     const streamingRequest = this.buildStreamingRequest(options);
+    const region = options?.region ?? this.streamingRegion;
     const initResponse = await streamingControllerInitStreamingSessionV2(
       streamingRequest,
-      options?.region ? { region: options.region } : void 0,
+      region ? { region } : void 0,
       this.getAxiosConfig()
     );
     const { id, url: apiWsUrl } = initResponse.data;
@@ -5570,12 +5515,20 @@ var EntityType = {
   email_address: "email_address",
   event: "event",
   filename: "filename",
+  gender: "gender",
   gender_sexuality: "gender_sexuality",
   healthcare_number: "healthcare_number",
   injury: "injury",
   ip_address: "ip_address",
   language: "language",
   location: "location",
+  location_address: "location_address",
+  location_address_street: "location_address_street",
+  location_city: "location_city",
+  location_coordinate: "location_coordinate",
+  location_country: "location_country",
+  location_state: "location_state",
+  location_zip: "location_zip",
   marital_status: "marital_status",
   medical_condition: "medical_condition",
   medical_process: "medical_process",
@@ -5584,6 +5537,7 @@ var EntityType = {
   number_sequence: "number_sequence",
   occupation: "occupation",
   organization: "organization",
+  organization_medical_facility: "organization_medical_facility",
   passport_number: "passport_number",
   password: "password",
   person_age: "person_age",
@@ -5592,6 +5546,7 @@ var EntityType = {
   physical_attribute: "physical_attribute",
   political_affiliation: "political_affiliation",
   religion: "religion",
+  sexuality: "sexuality",
   statistics: "statistics",
   time: "time",
   url: "url",
@@ -5618,12 +5573,20 @@ var PiiPolicy = {
   email_address: "email_address",
   event: "event",
   filename: "filename",
+  gender: "gender",
   gender_sexuality: "gender_sexuality",
   healthcare_number: "healthcare_number",
   injury: "injury",
   ip_address: "ip_address",
   language: "language",
   location: "location",
+  location_address: "location_address",
+  location_address_street: "location_address_street",
+  location_city: "location_city",
+  location_coordinate: "location_coordinate",
+  location_country: "location_country",
+  location_state: "location_state",
+  location_zip: "location_zip",
   marital_status: "marital_status",
   medical_condition: "medical_condition",
   medical_process: "medical_process",
@@ -5632,6 +5595,7 @@ var PiiPolicy = {
   number_sequence: "number_sequence",
   occupation: "occupation",
   organization: "organization",
+  organization_medical_facility: "organization_medical_facility",
   passport_number: "passport_number",
   password: "password",
   person_age: "person_age",
@@ -5640,6 +5604,7 @@ var PiiPolicy = {
   physical_attribute: "physical_attribute",
   political_affiliation: "political_affiliation",
   religion: "religion",
+  sexuality: "sexuality",
   statistics: "statistics",
   time: "time",
   url: "url",
@@ -5708,7 +5673,8 @@ var TranscriptOptionalParamsRedactPiiAudioOptionsOverrideAudioRedactionMethod =
 // src/generated/assemblyai/schema/transcriptOptionalParamsRemoveAudioTags.ts
 var TranscriptOptionalParamsRemoveAudioTags = {
-  all: "all"
+  all: "all",
+  speaker: "speaker"
 };
 // src/generated/assemblyai/schema/transcriptRedactPiiAudioOptionsOverrideAudioRedactionMethod.ts
@@ -5718,7 +5684,8 @@ var TranscriptRedactPiiAudioOptionsOverrideAudioRedactionMethod = {
 // src/generated/assemblyai/schema/transcriptRemoveAudioTags.ts
 var TranscriptRemoveAudioTags = {
-  all: "all"
+  all: "all",
+  speaker: "speaker"
 };
 // src/generated/assemblyai/api/assemblyAIAPI.ts
@@ -9610,15 +9577,18 @@ var import_axios9 = __toESM(require("axios"));
 // src/generated/soniox/schema/index.ts
 var schema_exports4 = {};
 __export(schema_exports4, {
+  TTSVoiceGender: () => TTSVoiceGender,
   TemporaryApiKeyUsageType: () => TemporaryApiKeyUsageType,
   TranscriptionMode: () => TranscriptionMode,
   TranscriptionStatus: () => TranscriptionStatus,
-  TranslationConfigType: () => TranslationConfigType
+  TranslationConfigType: () => TranslationConfigType,
+  UsageLogsSort: () => UsageLogsSort
 });
 // src/generated/soniox/schema/temporaryApiKeyUsageType.ts
 var TemporaryApiKeyUsageType = {
-  transcribe_websocket: "transcribe_websocket"
+  transcribe_websocket: "transcribe_websocket",
+  tts_rt: "tts_rt"
 };
 // src/generated/soniox/schema/transcriptionMode.ts
@@ -9633,6 +9603,19 @@ var TranslationConfigType = {
   two_way: "two_way"
 };
+// src/generated/soniox/schema/tTSVoiceGender.ts
+var TTSVoiceGender = {
+  male: "male",
+  female: "female",
+  neutral: "neutral"
+};
+// src/generated/soniox/schema/usageLogsSort.ts
+var UsageLogsSort = {
+  end_time_asc: "end_time_asc",
+  end_time_desc: "end_time_desc"
+};
 // src/generated/soniox/api/sonioxPublicAPI.ts
 var uploadFile = (uploadFileBody2, options) => {
   const formData = new FormData();
@@ -11000,6 +10983,7 @@ __export(deepgramAPI_zod_exports, {
   speakGenerateQueryMipOptOutDefault: () => speakGenerateQueryMipOptOutDefault,
   speakGenerateQueryModelDefault: () => speakGenerateQueryModelDefault,
   speakGenerateQueryParams: () => speakGenerateQueryParams,
+  speakGenerateQuerySpeedDefault: () => speakGenerateQuerySpeedDefault,
   speakGenerateResponse: () => speakGenerateResponse
 });
 var import_zod = require("zod");
@@ -11054,6 +11038,9 @@ var listenTranscribeQueryParams = import_zod.z.object({
   diarize: import_zod.z.boolean().optional().describe(
     "Recognize speaker changes. Each word in the transcript will be assigned a speaker number starting at 0"
   ),
+  diarize_model: import_zod.z.enum(["latest", "v1", "v2"]).optional().describe(
+    "Select and enable a specific batch diarization model version. If specifying this parameter, you should not set the deprecated `diarize=true` parameter. Not accepted on streaming requests."
+  ),
   dictation: import_zod.z.boolean().optional().describe("Dictation mode for controlling formatting with dictated speech"),
   encoding: import_zod.z.enum(["linear16", "flac", "mulaw", "amr-nb", "amr-wb", "opus", "speex", "g729"]).optional().describe("Specify the expected encoding of your submitted audio"),
   filler_words: import_zod.z.boolean().optional().describe('Filler Words can help transcribe interruptions in your audio, like "uh" and "um"'),
@@ -11319,6 +11306,7 @@ var listenTranscribeResponse = import_zod.z.object({
 var speakGenerateQueryCallbackMethodDefault = "POST";
 var speakGenerateQueryMipOptOutDefault = false;
 var speakGenerateQueryModelDefault = "aura-asteria-en";
+var speakGenerateQuerySpeedDefault = 1;
 var speakGenerateQueryParams = import_zod.z.object({
   callback: import_zod.z.string().optional().describe("URL to which we'll make the callback request"),
   callback_method: import_zod.z.enum(["POST", "PUT"]).default(speakGenerateQueryCallbackMethodDefault).describe("HTTP method by which the callback request will be made"),
@@ -11430,6 +11418,9 @@ var speakGenerateQueryParams = import_zod.z.object({
     import_zod.z.enum(["22050"]).describe("Encoding - mp3. Sample rate is fixed and not configurable (22050 Hz).")
   ).or(import_zod.z.enum(["48000"]).describe("Encoding - opus. Sample rate is fixed at 48000 Hz.")).optional().describe(
     "Sample Rate specifies the sample rate for the output audio. Based on the encoding, different sample rates are supported. For some encodings, the sample rate is not configurable"
+  ),
+  speed: import_zod.z.number().default(speakGenerateQuerySpeedDefault).describe(
+    "Speaking rate multiplier that adjusts the pace of generated speech while preserving natural prosody and voice quality. Not yet supported in all languages."
   )
 });
 var speakGenerateHeader = import_zod.z.object({
@@ -11754,6 +11745,7 @@ __export(assemblyAIAPI_zod_exports, {
   createTranscriptBodyRedactPiiAudioDefault: () => createTranscriptBodyRedactPiiAudioDefault,
   createTranscriptBodyRedactPiiAudioOptionsReturnRedactedNoSpeechAudioDefault: () => createTranscriptBodyRedactPiiAudioOptionsReturnRedactedNoSpeechAudioDefault,
   createTranscriptBodyRedactPiiDefault: () => createTranscriptBodyRedactPiiDefault,
+  createTranscriptBodyRedactPiiReturnUnredactedDefault: () => createTranscriptBodyRedactPiiReturnUnredactedDefault,
   createTranscriptBodySentimentAnalysisDefault: () => createTranscriptBodySentimentAnalysisDefault,
   createTranscriptBodySpeakerLabelsDefault: () => createTranscriptBodySpeakerLabelsDefault,
   createTranscriptBodySpeakerOptionsMinSpeakersExpectedDefault: () => createTranscriptBodySpeakerOptionsMinSpeakersExpectedDefault,
@@ -11824,6 +11816,7 @@ var createTranscriptBodyPunctuateDefault = true;
 var createTranscriptBodyRedactPiiDefault = false;
 var createTranscriptBodyRedactPiiAudioDefault = false;
 var createTranscriptBodyRedactPiiAudioOptionsReturnRedactedNoSpeechAudioDefault = false;
+var createTranscriptBodyRedactPiiReturnUnredactedDefault = false;
 var createTranscriptBodySentimentAnalysisDefault = false;
 var createTranscriptBodySpeakerLabelsDefault = false;
 var createTranscriptBodySpeakerOptionsMinSpeakersExpectedDefault = 1;
@@ -11862,7 +11855,7 @@ var createTranscriptBody = import_zod3.z.object({
     "Customize how words are spelled and formatted using to and from values. See [Custom Spelling](https://www.assemblyai.com/docs/pre-recorded-audio/correct-spelling-of-terms) for more details."
   ),
   disfluencies: import_zod3.z.boolean().optional().describe(
-    'Transcribe [Filler Words](https://www.assemblyai.com/docs/pre-recorded-audio/include-filler-words), like "umm", in your media file; can be true or false'
+    'Transcribe [Filler Words](https://www.assemblyai.com/docs/pre-recorded-audio/include-filler-words), like "umm", in your media file; can be true or false. Supported on Universal-3 Pro and Universal-2.'
   ),
   domain: import_zod3.z.string().nullish().describe(
     'Enable domain-specific transcription models to improve accuracy for specialized terminology. Set to `"medical-v1"` to enable [Medical Mode](https://www.assemblyai.com/docs/pre-recorded-audio/medical-mode) for improved accuracy of medical terms such as medications, procedures, conditions, and dosages.\n\nSupported languages: English (`en`), Spanish (`es`), German (`de`), French (`fr`). If used with an unsupported language, the parameter is ignored and a warning is returned.\n'
@@ -12169,12 +12162,20 @@ var createTranscriptBody = import_zod3.z.object({
       "email_address",
       "event",
       "filename",
+      "gender",
       "gender_sexuality",
       "healthcare_number",
       "injury",
       "ip_address",
       "language",
       "location",
+      "location_address",
+      "location_address_street",
+      "location_city",
+      "location_coordinate",
+      "location_country",
+      "location_state",
+      "location_zip",
       "marital_status",
       "medical_condition",
       "medical_process",
@@ -12183,6 +12184,7 @@ var createTranscriptBody = import_zod3.z.object({
       "number_sequence",
       "occupation",
       "organization",
+      "organization_medical_facility",
       "passport_number",
       "password",
       "person_age",
@@ -12191,6 +12193,7 @@ var createTranscriptBody = import_zod3.z.object({
       "physical_attribute",
       "political_affiliation",
       "religion",
+      "sexuality",
       "statistics",
       "time",
       "url",
@@ -12198,15 +12201,20 @@ var createTranscriptBody = import_zod3.z.object({
       "username",
       "vehicle_id",
       "zodiac_sign"
-    ]).describe("The type of PII to redact")
+    ]).describe(
+      "The type of PII to redact. See [PII redaction](https://www.assemblyai.com/docs/streaming/pii-redaction) for the full list of policies and their descriptions."
+    )
   ).optional().describe(
     "The list of PII Redaction policies to enable. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more details."
   ),
   redact_pii_sub: import_zod3.z.enum(["entity_name", "hash"]).describe(
-    "The replacement logic for detected PII, can be `entity_name` or `hash`. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more details."
+    "The replacement logic for detected PII, can be `entity_name` or `hash`. See [PII redaction](https://www.assemblyai.com/docs/streaming/pii-redaction) for more details."
   ).or(import_zod3.z.null()).optional().describe(
     "The replacement logic for detected PII, can be `entity_type` or `hash`. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more details."
   ),
+  redact_pii_return_unredacted: import_zod3.z.boolean().optional().describe(
+    "When set to `true`, returns the original unredacted transcript alongside the redacted one in the same response. Requires `redact_pii` to be `true`, otherwise a 400 error is returned.\n\nWhen enabled, the response includes the additional fields `unredacted_text`, `unredacted_words`, and `unredacted_utterances`. The existing `text`, `words`, and `utterances` fields remain fully redacted. When disabled (default), the response is unchanged and contains only the redacted transcript. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more details.\n"
+  ),
   sentiment_analysis: import_zod3.z.boolean().optional().describe(
     "Enable [Sentiment Analysis](https://www.assemblyai.com/docs/speech-understanding/analyze-sentiment-of-speech), can be true or false"
   ),
@@ -12304,10 +12312,10 @@ var createTranscriptBody = import_zod3.z.object({
   ),
   summary_model: import_zod3.z.enum(["informative", "conversational", "catchy"]).optional().describe("The model to summarize the transcript"),
   summary_type: import_zod3.z.enum(["bullets", "bullets_verbose", "gist", "headline", "paragraph"]).optional().describe("The type of summary"),
-  remove_audio_tags: import_zod3.z.enum(["all"]).describe(
-    'Remove [audio event tags](https://www.assemblyai.com/docs/pre-recorded-audio/universal-3-pro#audio-event-tags) from the transcript text. Set to `"all"` to remove all audio tags.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n'
+  remove_audio_tags: import_zod3.z.enum(["all", "speaker"]).describe(
+    'Universal-3 Pro generates rich transcripts that can include inline annotations such as audio event markers and speaker cues. Set to `"all"` to remove all inline annotations, or `"speaker"` to remove only speaker cues while keeping other annotations.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n'
   ).or(import_zod3.z.null()).optional().describe(
-    'Remove [audio event tags](https://www.assemblyai.com/docs/pre-recorded-audio/universal-3-pro#audio-event-tags) from the transcript text. Set to `"all"` to remove all audio tags.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n'
+    'Universal-3 Pro generates rich transcripts that can include inline annotations such as audio event markers and speaker cues. Set to `"all"` to remove all inline annotations, or `"speaker"` to remove only speaker cues while keeping other annotations.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n'
   ),
   temperature: import_zod3.z.number().optional().describe(
     "Control the amount of randomness injected into the model's response. See the [Prompting Guide](https://www.assemblyai.com/docs/pre-recorded-audio/prompting) for more details.\n\nNote: This parameter can only be used with the Universal-3 Pro model.\n"
@@ -12441,7 +12449,7 @@ var createTranscriptResponse = import_zod3.z.object({
     "Customize how words are spelled and formatted using to and from values. See [Custom Spelling](https://www.assemblyai.com/docs/pre-recorded-audio/correct-spelling-of-terms) for more details."
   ),
   disfluencies: import_zod3.z.boolean().nullish().describe(
-    'Transcribe [Filler Words](https://www.assemblyai.com/docs/pre-recorded-audio/include-filler-words), like "umm", in your media file; can be true or false'
+    'Transcribe [Filler Words](https://www.assemblyai.com/docs/pre-recorded-audio/include-filler-words), like "umm", in your media file; can be true or false. Supported on Universal-3 Pro and Universal-2.'
   ),
   domain: import_zod3.z.string().nullish().describe(
     'The domain-specific model applied to the transcript. When set to `"medical-v1"`, [Medical Mode](https://www.assemblyai.com/docs/pre-recorded-audio/medical-mode) was used to improve accuracy for medical terminology.\n'
@@ -12464,12 +12472,20 @@ var createTranscriptResponse = import_zod3.z.object({
         "email_address",
         "event",
         "filename",
+        "gender",
         "gender_sexuality",
         "healthcare_number",
         "injury",
         "ip_address",
         "language",
         "location",
+        "location_address",
+        "location_address_street",
+        "location_city",
+        "location_coordinate",
+        "location_country",
+        "location_state",
+        "location_zip",
         "marital_status",
         "medical_condition",
         "medical_process",
@@ -12478,6 +12494,7 @@ var createTranscriptResponse = import_zod3.z.object({
         "number_sequence",
         "occupation",
         "organization",
+        "organization_medical_facility",
         "passport_number",
         "password",
         "person_age",
@@ -12486,6 +12503,7 @@ var createTranscriptResponse = import_zod3.z.object({
         "physical_attribute",
         "political_affiliation",
         "religion",
+        "sexuality",
         "statistics",
         "time",
         "url",
@@ -12790,6 +12808,24 @@ var createTranscriptResponse = import_zod3.z.object({
   }).optional().describe(
     "Specify options for [Automatic Language Detection](https://www.assemblyai.com/docs/pre-recorded-audio/language-detection)."
   ),
+  metadata: import_zod3.z.object({
+    domain_used: import_zod3.z.string().nullish().describe(
+      'The domain-specific model that was applied to the transcription (for example, `"medical-v1"` when [Medical Mode](https://www.assemblyai.com/docs/pre-recorded-audio/medical-mode) was applied), or `null` if no domain-specific model was used. Always present when `metadata` is present.\n'
+    ),
+    warnings: import_zod3.z.array(
+      import_zod3.z.object({
+        message: import_zod3.z.string().describe("A human-readable description of the warning.")
+      }).describe(
+        "A warning message emitted while processing a transcription request. Warnings are surfaced on the transcript response under `metadata.warnings`.\n"
+      )
+    ).optional().describe(
+      "Warning messages emitted while processing the request. Each warning is an object with a human-readable `message`. When there are no warnings to report, this field is omitted from the `metadata` object entirely.\n"
+    )
+  }).describe(
+    "Additional metadata about the transcription returned on the `Transcript` object under `metadata`. Only present when there is information to report \u2014 when all of its fields would be empty, the `metadata` object is omitted from the response entirely.\n"
+  ).or(import_zod3.z.null()).optional().describe(
+    "Additional metadata about the transcription, including any warnings emitted while processing the request. Only present when there is information to report; if no fields would be populated, `metadata` is omitted from the response entirely.\n"
+  ),
   multichannel: import_zod3.z.boolean().nullish().describe(
     "Whether [Multichannel transcription](https://www.assemblyai.com/docs/pre-recorded-audio/transcribe-multiple-audio-channels) was enabled in the transcription request, either true or false"
   ),
@@ -12837,12 +12873,20 @@ var createTranscriptResponse = import_zod3.z.object({
       "email_address",
       "event",
       "filename",
+      "gender",
       "gender_sexuality",
       "healthcare_number",
       "injury",
       "ip_address",
       "language",
       "location",
+      "location_address",
+      "location_address_street",
+      "location_city",
+      "location_coordinate",
+      "location_country",
+      "location_state",
+      "location_zip",
       "marital_status",
       "medical_condition",
       "medical_process",
@@ -12851,6 +12895,7 @@ var createTranscriptResponse = import_zod3.z.object({
       "number_sequence",
       "occupation",
       "organization",
+      "organization_medical_facility",
       "passport_number",
       "password",
       "person_age",
@@ -12859,6 +12904,7 @@ var createTranscriptResponse = import_zod3.z.object({
       "physical_attribute",
       "political_affiliation",
       "religion",
+      "sexuality",
       "statistics",
       "time",
       "url",
@@ -12866,12 +12912,17 @@ var createTranscriptResponse = import_zod3.z.object({
       "username",
       "vehicle_id",
       "zodiac_sign"
-    ]).describe("The type of PII to redact")
+    ]).describe(
+      "The type of PII to redact. See [PII redaction](https://www.assemblyai.com/docs/streaming/pii-redaction) for the full list of policies and their descriptions."
+    )
   ).nullish().describe(
     "The list of PII Redaction policies that were enabled, if PII Redaction is enabled.\nSee [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
   ),
   redact_pii_sub: import_zod3.z.enum(["entity_name", "hash"]).optional().describe(
-    "The replacement logic for detected PII, can be `entity_name` or `hash`. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more details."
+    "The replacement logic for detected PII, can be `entity_name` or `hash`. See [PII redaction](https://www.assemblyai.com/docs/streaming/pii-redaction) for more details."
+  ),
+  redact_pii_return_unredacted: import_zod3.z.boolean().nullish().describe(
+    "Whether the original unredacted transcript was also returned alongside the redacted one. When `true`, the response includes `unredacted_text`, `unredacted_words`, and `unredacted_utterances`. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
   ),
   sentiment_analysis: import_zod3.z.boolean().nullish().describe(
     "Whether [Sentiment Analysis](https://www.assemblyai.com/docs/speech-understanding/analyze-sentiment-of-speech) is enabled, can be true or false"
@@ -13008,20 +13059,23 @@ var createTranscriptResponse = import_zod3.z.object({
     "The generated summary of the media file, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details."
   ),
   summary_model: import_zod3.z.string().nullish().describe(
-    "The Summarization model used to generate the summary,\nif [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts#summary-models) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details.\n"
+    "The Summarization model used to generate the summary,\nif [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details.\n"
   ),
   summary_type: import_zod3.z.string().nullish().describe(
-    "The type of summary generated, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts#summary-types) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details."
+    "The type of summary generated, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details."
   ),
-  remove_audio_tags: import_zod3.z.enum(["all"]).describe(
-    "Whether [audio event tags](https://www.assemblyai.com/docs/pre-recorded-audio/universal-3-pro#audio-event-tags) were removed from the transcript text.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n"
+  remove_audio_tags: import_zod3.z.enum(["all", "speaker"]).describe(
+    'Universal-3 Pro generates rich transcripts that can include inline annotations such as audio event markers and speaker cues. Set to `"all"` to remove all inline annotations, or `"speaker"` to remove only speaker cues while keeping other annotations.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n'
   ).or(import_zod3.z.null()).optional().describe(
-    "Whether [audio event tags](https://www.assemblyai.com/docs/pre-recorded-audio/universal-3-pro#audio-event-tags) were removed from the transcript text.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n"
+    'Universal-3 Pro generates rich transcripts that can include inline annotations such as audio event markers and speaker cues. Set to `"all"` to remove all inline annotations, or `"speaker"` to remove only speaker cues while keeping other annotations.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n'
   ),
   temperature: import_zod3.z.number().nullish().describe(
     "The temperature that was used for the model's response. See the [Prompting Guide](https://www.assemblyai.com/docs/pre-recorded-audio/prompting) for more details.\n\nNote: This parameter can only be used with the Universal-3 Pro model.\n"
   ),
   text: import_zod3.z.string().nullish().describe("The textual transcript of your media file"),
+  unredacted_text: import_zod3.z.string().nullish().describe(
+    "The original textual transcript of your media file before PII redaction was applied. Only returned when `redact_pii_return_unredacted` was set to `true` on the transcription request, otherwise this field is omitted and the `text` field remains fully redacted. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
+  ),
   throttled: import_zod3.z.boolean().nullish().describe(
     "True while a request is throttled and false when a request is no longer throttled"
   ),
@@ -13058,6 +13112,39 @@ var createTranscriptResponse = import_zod3.z.object({
   ).nullish().describe(
     "When multichannel or speaker_labels is enabled, a list of turn-by-turn utterance objects.\nSee [Speaker diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) and [Multichannel transcription](https://www.assemblyai.com/docs/pre-recorded-audio/transcribe-multiple-audio-channels) for more information.\n"
   ),
+  unredacted_utterances: import_zod3.z.array(
+    import_zod3.z.object({
+      confidence: import_zod3.z.number().describe("The confidence score for the transcript of this utterance"),
+      start: import_zod3.z.number().describe("The starting time, in milliseconds, of the utterance in the audio file"),
+      end: import_zod3.z.number().describe("The ending time, in milliseconds, of the utterance in the audio file"),
+      text: import_zod3.z.string().describe("The text for this utterance"),
+      words: import_zod3.z.array(
+        import_zod3.z.object({
+          confidence: import_zod3.z.number().describe("The confidence score for the transcript of this word"),
+          start: import_zod3.z.number().describe("The starting time, in milliseconds, for the word"),
+          end: import_zod3.z.number().describe("The ending time, in milliseconds, for the word"),
+          text: import_zod3.z.string().describe("The text of the word"),
+          channel: import_zod3.z.string().nullish().describe(
+            "The channel of the word. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
+          ),
+          speaker: import_zod3.z.string().nullable().describe(
+            "The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) is enabled, else null"
+          )
+        })
+      ).describe("The words in the utterance."),
+      channel: import_zod3.z.string().nullish().describe(
+        "The channel of this utterance. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
+      ),
+      speaker: import_zod3.z.string().describe(
+        'The speaker of this utterance, where each speaker is assigned a sequential capital letter - e.g. "A" for Speaker A, "B" for Speaker B, etc.'
+      ),
+      translated_texts: import_zod3.z.record(import_zod3.z.string(), import_zod3.z.string()).optional().describe(
+        'Translations keyed by language code (e.g., `{"es": "Texto traducido", "de": "\xDCbersetzter Text"}`). Only present when `match_original_utterance` is enabled with translation.'
+      )
+    })
+  ).nullish().describe(
+    "The original turn-by-turn utterance objects before PII redaction was applied. Same shape as `utterances`. Only returned when `redact_pii_return_unredacted` was set to `true` on the transcription request, otherwise this field is omitted and the `utterances` field remains fully redacted. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
+  ),
   webhook_auth: import_zod3.z.boolean().describe(
     "Whether [webhook](https://www.assemblyai.com/docs/deployment/webhooks-for-pre-recorded-audio) authentication details were provided"
   ),
@@ -13086,6 +13173,22 @@ var createTranscriptResponse = import_zod3.z.object({
   ).nullish().describe(
     "An array of temporally-sequential word objects, one for each word in the transcript.\n"
   ),
+  unredacted_words: import_zod3.z.array(
+    import_zod3.z.object({
+      confidence: import_zod3.z.number().describe("The confidence score for the transcript of this word"),
+      start: import_zod3.z.number().describe("The starting time, in milliseconds, for the word"),
+      end: import_zod3.z.number().describe("The ending time, in milliseconds, for the word"),
+      text: import_zod3.z.string().describe("The text of the word"),
+      channel: import_zod3.z.string().nullish().describe(
+        "The channel of the word. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
+      ),
+      speaker: import_zod3.z.string().nullable().describe(
+        "The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) is enabled, else null"
+      )
+    })
+  ).nullish().describe(
+    "The original temporally-sequential word objects before PII redaction was applied. Same shape as `words`. Only returned when `redact_pii_return_unredacted` was set to `true` on the transcription request, otherwise this field is omitted and the `words` field remains fully redacted. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
+  ),
   acoustic_model: import_zod3.z.string().describe("This parameter does not currently have any functionality attached to it."),
   custom_topics: import_zod3.z.boolean().nullish().describe("This parameter does not currently have any functionality attached to it."),
   language_model: import_zod3.z.string().describe("This parameter does not currently have any functionality attached to it."),
@@ -13261,7 +13364,7 @@ var getTranscriptResponse = import_zod3.z.object({
     "Customize how words are spelled and formatted using to and from values. See [Custom Spelling](https://www.assemblyai.com/docs/pre-recorded-audio/correct-spelling-of-terms) for more details."
   ),
   disfluencies: import_zod3.z.boolean().nullish().describe(
-    'Transcribe [Filler Words](https://www.assemblyai.com/docs/pre-recorded-audio/include-filler-words), like "umm", in your media file; can be true or false'
+    'Transcribe [Filler Words](https://www.assemblyai.com/docs/pre-recorded-audio/include-filler-words), like "umm", in your media file; can be true or false. Supported on Universal-3 Pro and Universal-2.'
   ),
   domain: import_zod3.z.string().nullish().describe(
     'The domain-specific model applied to the transcript. When set to `"medical-v1"`, [Medical Mode](https://www.assemblyai.com/docs/pre-recorded-audio/medical-mode) was used to improve accuracy for medical terminology.\n'
@@ -13284,12 +13387,20 @@ var getTranscriptResponse = import_zod3.z.object({
         "email_address",
         "event",
         "filename",
+        "gender",
         "gender_sexuality",
         "healthcare_number",
         "injury",
         "ip_address",
         "language",
         "location",
+        "location_address",
+        "location_address_street",
+        "location_city",
+        "location_coordinate",
+        "location_country",
+        "location_state",
+        "location_zip",
         "marital_status",
         "medical_condition",
         "medical_process",
@@ -13298,6 +13409,7 @@ var getTranscriptResponse = import_zod3.z.object({
         "number_sequence",
         "occupation",
         "organization",
+        "organization_medical_facility",
         "passport_number",
         "password",
         "person_age",
@@ -13306,6 +13418,7 @@ var getTranscriptResponse = import_zod3.z.object({
         "physical_attribute",
         "political_affiliation",
         "religion",
+        "sexuality",
         "statistics",
         "time",
         "url",
@@ -13610,6 +13723,24 @@ var getTranscriptResponse = import_zod3.z.object({
   }).optional().describe(
     "Specify options for [Automatic Language Detection](https://www.assemblyai.com/docs/pre-recorded-audio/language-detection)."
   ),
+  metadata: import_zod3.z.object({
+    domain_used: import_zod3.z.string().nullish().describe(
+      'The domain-specific model that was applied to the transcription (for example, `"medical-v1"` when [Medical Mode](https://www.assemblyai.com/docs/pre-recorded-audio/medical-mode) was applied), or `null` if no domain-specific model was used. Always present when `metadata` is present.\n'
+    ),
+    warnings: import_zod3.z.array(
+      import_zod3.z.object({
+        message: import_zod3.z.string().describe("A human-readable description of the warning.")
+      }).describe(
+        "A warning message emitted while processing a transcription request. Warnings are surfaced on the transcript response under `metadata.warnings`.\n"
+      )
+    ).optional().describe(
+      "Warning messages emitted while processing the request. Each warning is an object with a human-readable `message`. When there are no warnings to report, this field is omitted from the `metadata` object entirely.\n"
+    )
+  }).describe(
+    "Additional metadata about the transcription returned on the `Transcript` object under `metadata`. Only present when there is information to report \u2014 when all of its fields would be empty, the `metadata` object is omitted from the response entirely.\n"
+  ).or(import_zod3.z.null()).optional().describe(
+    "Additional metadata about the transcription, including any warnings emitted while processing the request. Only present when there is information to report; if no fields would be populated, `metadata` is omitted from the response entirely.\n"
+  ),
   multichannel: import_zod3.z.boolean().nullish().describe(
     "Whether [Multichannel transcription](https://www.assemblyai.com/docs/pre-recorded-audio/transcribe-multiple-audio-channels) was enabled in the transcription request, either true or false"
   ),
@@ -13657,12 +13788,20 @@ var getTranscriptResponse = import_zod3.z.object({
       "email_address",
       "event",
       "filename",
+      "gender",
       "gender_sexuality",
       "healthcare_number",
       "injury",
       "ip_address",
       "language",
       "location",
+      "location_address",
+      "location_address_street",
+      "location_city",
+      "location_coordinate",
+      "location_country",
+      "location_state",
+      "location_zip",
       "marital_status",
       "medical_condition",
       "medical_process",
@@ -13671,6 +13810,7 @@ var getTranscriptResponse = import_zod3.z.object({
       "number_sequence",
       "occupation",
       "organization",
+      "organization_medical_facility",
       "passport_number",
       "password",
       "person_age",
@@ -13679,6 +13819,7 @@ var getTranscriptResponse = import_zod3.z.object({
       "physical_attribute",
       "political_affiliation",
       "religion",
+      "sexuality",
       "statistics",
       "time",
       "url",
@@ -13686,12 +13827,17 @@ var getTranscriptResponse = import_zod3.z.object({
       "username",
       "vehicle_id",
       "zodiac_sign"
-    ]).describe("The type of PII to redact")
+    ]).describe(
+      "The type of PII to redact. See [PII redaction](https://www.assemblyai.com/docs/streaming/pii-redaction) for the full list of policies and their descriptions."
+    )
   ).nullish().describe(
     "The list of PII Redaction policies that were enabled, if PII Redaction is enabled.\nSee [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
   ),
   redact_pii_sub: import_zod3.z.enum(["entity_name", "hash"]).optional().describe(
-    "The replacement logic for detected PII, can be `entity_name` or `hash`. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more details."
+    "The replacement logic for detected PII, can be `entity_name` or `hash`. See [PII redaction](https://www.assemblyai.com/docs/streaming/pii-redaction) for more details."
+  ),
+  redact_pii_return_unredacted: import_zod3.z.boolean().nullish().describe(
+    "Whether the original unredacted transcript was also returned alongside the redacted one. When `true`, the response includes `unredacted_text`, `unredacted_words`, and `unredacted_utterances`. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
   ),
   sentiment_analysis: import_zod3.z.boolean().nullish().describe(
     "Whether [Sentiment Analysis](https://www.assemblyai.com/docs/speech-understanding/analyze-sentiment-of-speech) is enabled, can be true or false"
@@ -13828,20 +13974,23 @@ var getTranscriptResponse = import_zod3.z.object({
     "The generated summary of the media file, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details."
   ),
   summary_model: import_zod3.z.string().nullish().describe(
-    "The Summarization model used to generate the summary,\nif [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts#summary-models) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details.\n"
+    "The Summarization model used to generate the summary,\nif [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details.\n"
   ),
   summary_type: import_zod3.z.string().nullish().describe(
-    "The type of summary generated, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts#summary-types) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details."
+    "The type of summary generated, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details."
   ),
-  remove_audio_tags: import_zod3.z.enum(["all"]).describe(
-    "Whether [audio event tags](https://www.assemblyai.com/docs/pre-recorded-audio/universal-3-pro#audio-event-tags) were removed from the transcript text.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n"
+  remove_audio_tags: import_zod3.z.enum(["all", "speaker"]).describe(
+    'Universal-3 Pro generates rich transcripts that can include inline annotations such as audio event markers and speaker cues. Set to `"all"` to remove all inline annotations, or `"speaker"` to remove only speaker cues while keeping other annotations.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n'
   ).or(import_zod3.z.null()).optional().describe(
-    "Whether [audio event tags](https://www.assemblyai.com/docs/pre-recorded-audio/universal-3-pro#audio-event-tags) were removed from the transcript text.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n"
+    'Universal-3 Pro generates rich transcripts that can include inline annotations such as audio event markers and speaker cues. Set to `"all"` to remove all inline annotations, or `"speaker"` to remove only speaker cues while keeping other annotations.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n'
   ),
   temperature: import_zod3.z.number().nullish().describe(
     "The temperature that was used for the model's response. See the [Prompting Guide](https://www.assemblyai.com/docs/pre-recorded-audio/prompting) for more details.\n\nNote: This parameter can only be used with the Universal-3 Pro model.\n"
   ),
   text: import_zod3.z.string().nullish().describe("The textual transcript of your media file"),
+  unredacted_text: import_zod3.z.string().nullish().describe(
+    "The original textual transcript of your media file before PII redaction was applied. Only returned when `redact_pii_return_unredacted` was set to `true` on the transcription request, otherwise this field is omitted and the `text` field remains fully redacted. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
+  ),
   throttled: import_zod3.z.boolean().nullish().describe(
     "True while a request is throttled and false when a request is no longer throttled"
   ),
@@ -13878,6 +14027,39 @@ var getTranscriptResponse = import_zod3.z.object({
   ).nullish().describe(
     "When multichannel or speaker_labels is enabled, a list of turn-by-turn utterance objects.\nSee [Speaker diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) and [Multichannel transcription](https://www.assemblyai.com/docs/pre-recorded-audio/transcribe-multiple-audio-channels) for more information.\n"
   ),
+  unredacted_utterances: import_zod3.z.array(
+    import_zod3.z.object({
+      confidence: import_zod3.z.number().describe("The confidence score for the transcript of this utterance"),
+      start: import_zod3.z.number().describe("The starting time, in milliseconds, of the utterance in the audio file"),
+      end: import_zod3.z.number().describe("The ending time, in milliseconds, of the utterance in the audio file"),
+      text: import_zod3.z.string().describe("The text for this utterance"),
+      words: import_zod3.z.array(
+        import_zod3.z.object({
+          confidence: import_zod3.z.number().describe("The confidence score for the transcript of this word"),
+          start: import_zod3.z.number().describe("The starting time, in milliseconds, for the word"),
+          end: import_zod3.z.number().describe("The ending time, in milliseconds, for the word"),
+          text: import_zod3.z.string().describe("The text of the word"),
+          channel: import_zod3.z.string().nullish().describe(
+            "The channel of the word. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
+          ),
+          speaker: import_zod3.z.string().nullable().describe(
+            "The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) is enabled, else null"
+          )
+        })
+      ).describe("The words in the utterance."),
+      channel: import_zod3.z.string().nullish().describe(
+        "The channel of this utterance. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
+      ),
+      speaker: import_zod3.z.string().describe(
+        'The speaker of this utterance, where each speaker is assigned a sequential capital letter - e.g. "A" for Speaker A, "B" for Speaker B, etc.'
+      ),
+      translated_texts: import_zod3.z.record(import_zod3.z.string(), import_zod3.z.string()).optional().describe(
+        'Translations keyed by language code (e.g., `{"es": "Texto traducido", "de": "\xDCbersetzter Text"}`). Only present when `match_original_utterance` is enabled with translation.'
+      )
+    })
+  ).nullish().describe(
+    "The original turn-by-turn utterance objects before PII redaction was applied. Same shape as `utterances`. Only returned when `redact_pii_return_unredacted` was set to `true` on the transcription request, otherwise this field is omitted and the `utterances` field remains fully redacted. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
+  ),
   webhook_auth: import_zod3.z.boolean().describe(
     "Whether [webhook](https://www.assemblyai.com/docs/deployment/webhooks-for-pre-recorded-audio) authentication details were provided"
   ),
@@ -13906,6 +14088,22 @@ var getTranscriptResponse = import_zod3.z.object({
   ).nullish().describe(
     "An array of temporally-sequential word objects, one for each word in the transcript.\n"
   ),
+  unredacted_words: import_zod3.z.array(
+    import_zod3.z.object({
+      confidence: import_zod3.z.number().describe("The confidence score for the transcript of this word"),
+      start: import_zod3.z.number().describe("The starting time, in milliseconds, for the word"),
+      end: import_zod3.z.number().describe("The ending time, in milliseconds, for the word"),
+      text: import_zod3.z.string().describe("The text of the word"),
+      channel: import_zod3.z.string().nullish().describe(
+        "The channel of the word. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
+      ),
+      speaker: import_zod3.z.string().nullable().describe(
+        "The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) is enabled, else null"
+      )
+    })
+  ).nullish().describe(
+    "The original temporally-sequential word objects before PII redaction was applied. Same shape as `words`. Only returned when `redact_pii_return_unredacted` was set to `true` on the transcription request, otherwise this field is omitted and the `words` field remains fully redacted. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
+  ),
   acoustic_model: import_zod3.z.string().describe("This parameter does not currently have any functionality attached to it."),
   custom_topics: import_zod3.z.boolean().nullish().describe("This parameter does not currently have any functionality attached to it."),
   language_model: import_zod3.z.string().describe("This parameter does not currently have any functionality attached to it."),
@@ -14041,7 +14239,7 @@ var deleteTranscriptResponse = import_zod3.z.object({
     "Customize how words are spelled and formatted using to and from values. See [Custom Spelling](https://www.assemblyai.com/docs/pre-recorded-audio/correct-spelling-of-terms) for more details."
   ),
   disfluencies: import_zod3.z.boolean().nullish().describe(
-    'Transcribe [Filler Words](https://www.assemblyai.com/docs/pre-recorded-audio/include-filler-words), like "umm", in your media file; can be true or false'
+    'Transcribe [Filler Words](https://www.assemblyai.com/docs/pre-recorded-audio/include-filler-words), like "umm", in your media file; can be true or false. Supported on Universal-3 Pro and Universal-2.'
   ),
   domain: import_zod3.z.string().nullish().describe(
     'The domain-specific model applied to the transcript. When set to `"medical-v1"`, [Medical Mode](https://www.assemblyai.com/docs/pre-recorded-audio/medical-mode) was used to improve accuracy for medical terminology.\n'
@@ -14064,12 +14262,20 @@ var deleteTranscriptResponse = import_zod3.z.object({
         "email_address",
         "event",
         "filename",
+        "gender",
         "gender_sexuality",
         "healthcare_number",
         "injury",
         "ip_address",
         "language",
         "location",
+        "location_address",
+        "location_address_street",
+        "location_city",
+        "location_coordinate",
+        "location_country",
+        "location_state",
+        "location_zip",
         "marital_status",
         "medical_condition",
         "medical_process",
@@ -14078,6 +14284,7 @@ var deleteTranscriptResponse = import_zod3.z.object({
         "number_sequence",
         "occupation",
         "organization",
+        "organization_medical_facility",
         "passport_number",
         "password",
         "person_age",
@@ -14086,6 +14293,7 @@ var deleteTranscriptResponse = import_zod3.z.object({
         "physical_attribute",
         "political_affiliation",
         "religion",
+        "sexuality",
         "statistics",
         "time",
         "url",
@@ -14390,6 +14598,24 @@ var deleteTranscriptResponse = import_zod3.z.object({
   }).optional().describe(
     "Specify options for [Automatic Language Detection](https://www.assemblyai.com/docs/pre-recorded-audio/language-detection)."
   ),
+  metadata: import_zod3.z.object({
+    domain_used: import_zod3.z.string().nullish().describe(
+      'The domain-specific model that was applied to the transcription (for example, `"medical-v1"` when [Medical Mode](https://www.assemblyai.com/docs/pre-recorded-audio/medical-mode) was applied), or `null` if no domain-specific model was used. Always present when `metadata` is present.\n'
+    ),
+    warnings: import_zod3.z.array(
+      import_zod3.z.object({
+        message: import_zod3.z.string().describe("A human-readable description of the warning.")
+      }).describe(
+        "A warning message emitted while processing a transcription request. Warnings are surfaced on the transcript response under `metadata.warnings`.\n"
+      )
+    ).optional().describe(
+      "Warning messages emitted while processing the request. Each warning is an object with a human-readable `message`. When there are no warnings to report, this field is omitted from the `metadata` object entirely.\n"
+    )
+  }).describe(
+    "Additional metadata about the transcription returned on the `Transcript` object under `metadata`. Only present when there is information to report \u2014 when all of its fields would be empty, the `metadata` object is omitted from the response entirely.\n"
+  ).or(import_zod3.z.null()).optional().describe(
+    "Additional metadata about the transcription, including any warnings emitted while processing the request. Only present when there is information to report; if no fields would be populated, `metadata` is omitted from the response entirely.\n"
+  ),
   multichannel: import_zod3.z.boolean().nullish().describe(
     "Whether [Multichannel transcription](https://www.assemblyai.com/docs/pre-recorded-audio/transcribe-multiple-audio-channels) was enabled in the transcription request, either true or false"
   ),
@@ -14437,12 +14663,20 @@ var deleteTranscriptResponse = import_zod3.z.object({
       "email_address",
       "event",
       "filename",
+      "gender",
       "gender_sexuality",
       "healthcare_number",
       "injury",
       "ip_address",
       "language",
       "location",
+      "location_address",
+      "location_address_street",
+      "location_city",
+      "location_coordinate",
+      "location_country",
+      "location_state",
+      "location_zip",
       "marital_status",
       "medical_condition",
       "medical_process",
@@ -14451,6 +14685,7 @@ var deleteTranscriptResponse = import_zod3.z.object({
       "number_sequence",
       "occupation",
       "organization",
+      "organization_medical_facility",
       "passport_number",
       "password",
       "person_age",
@@ -14459,6 +14694,7 @@ var deleteTranscriptResponse = import_zod3.z.object({
       "physical_attribute",
       "political_affiliation",
       "religion",
+      "sexuality",
       "statistics",
       "time",
       "url",
@@ -14466,12 +14702,17 @@ var deleteTranscriptResponse = import_zod3.z.object({
       "username",
       "vehicle_id",
       "zodiac_sign"
-    ]).describe("The type of PII to redact")
+    ]).describe(
+      "The type of PII to redact. See [PII redaction](https://www.assemblyai.com/docs/streaming/pii-redaction) for the full list of policies and their descriptions."
+    )
   ).nullish().describe(
     "The list of PII Redaction policies that were enabled, if PII Redaction is enabled.\nSee [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
   ),
   redact_pii_sub: import_zod3.z.enum(["entity_name", "hash"]).optional().describe(
-    "The replacement logic for detected PII, can be `entity_name` or `hash`. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more details."
+    "The replacement logic for detected PII, can be `entity_name` or `hash`. See [PII redaction](https://www.assemblyai.com/docs/streaming/pii-redaction) for more details."
+  ),
+  redact_pii_return_unredacted: import_zod3.z.boolean().nullish().describe(
+    "Whether the original unredacted transcript was also returned alongside the redacted one. When `true`, the response includes `unredacted_text`, `unredacted_words`, and `unredacted_utterances`. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
   ),
   sentiment_analysis: import_zod3.z.boolean().nullish().describe(
     "Whether [Sentiment Analysis](https://www.assemblyai.com/docs/speech-understanding/analyze-sentiment-of-speech) is enabled, can be true or false"
@@ -14608,20 +14849,23 @@ var deleteTranscriptResponse = import_zod3.z.object({
     "The generated summary of the media file, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details."
   ),
   summary_model: import_zod3.z.string().nullish().describe(
-    "The Summarization model used to generate the summary,\nif [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts#summary-models) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details.\n"
+    "The Summarization model used to generate the summary,\nif [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details.\n"
   ),
   summary_type: import_zod3.z.string().nullish().describe(
-    "The type of summary generated, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts#summary-types) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details."
+    "The type of summary generated, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details."
   ),
-  remove_audio_tags: import_zod3.z.enum(["all"]).describe(
-    "Whether [audio event tags](https://www.assemblyai.com/docs/pre-recorded-audio/universal-3-pro#audio-event-tags) were removed from the transcript text.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n"
+  remove_audio_tags: import_zod3.z.enum(["all", "speaker"]).describe(
+    'Universal-3 Pro generates rich transcripts that can include inline annotations such as audio event markers and speaker cues. Set to `"all"` to remove all inline annotations, or `"speaker"` to remove only speaker cues while keeping other annotations.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n'
   ).or(import_zod3.z.null()).optional().describe(
-    "Whether [audio event tags](https://www.assemblyai.com/docs/pre-recorded-audio/universal-3-pro#audio-event-tags) were removed from the transcript text.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n"
+    'Universal-3 Pro generates rich transcripts that can include inline annotations such as audio event markers and speaker cues. Set to `"all"` to remove all inline annotations, or `"speaker"` to remove only speaker cues while keeping other annotations.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n'
   ),
   temperature: import_zod3.z.number().nullish().describe(
     "The temperature that was used for the model's response. See the [Prompting Guide](https://www.assemblyai.com/docs/pre-recorded-audio/prompting) for more details.\n\nNote: This parameter can only be used with the Universal-3 Pro model.\n"
   ),
   text: import_zod3.z.string().nullish().describe("The textual transcript of your media file"),
+  unredacted_text: import_zod3.z.string().nullish().describe(
+    "The original textual transcript of your media file before PII redaction was applied. Only returned when `redact_pii_return_unredacted` was set to `true` on the transcription request, otherwise this field is omitted and the `text` field remains fully redacted. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
+  ),
   throttled: import_zod3.z.boolean().nullish().describe(
     "True while a request is throttled and false when a request is no longer throttled"
   ),
@@ -14658,6 +14902,39 @@ var deleteTranscriptResponse = import_zod3.z.object({
   ).nullish().describe(
     "When multichannel or speaker_labels is enabled, a list of turn-by-turn utterance objects.\nSee [Speaker diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) and [Multichannel transcription](https://www.assemblyai.com/docs/pre-recorded-audio/transcribe-multiple-audio-channels) for more information.\n"
   ),
+  unredacted_utterances: import_zod3.z.array(
+    import_zod3.z.object({
+      confidence: import_zod3.z.number().describe("The confidence score for the transcript of this utterance"),
+      start: import_zod3.z.number().describe("The starting time, in milliseconds, of the utterance in the audio file"),
+      end: import_zod3.z.number().describe("The ending time, in milliseconds, of the utterance in the audio file"),
+      text: import_zod3.z.string().describe("The text for this utterance"),
+      words: import_zod3.z.array(
+        import_zod3.z.object({
+          confidence: import_zod3.z.number().describe("The confidence score for the transcript of this word"),
+          start: import_zod3.z.number().describe("The starting time, in milliseconds, for the word"),
+          end: import_zod3.z.number().describe("The ending time, in milliseconds, for the word"),
+          text: import_zod3.z.string().describe("The text of the word"),
+          channel: import_zod3.z.string().nullish().describe(
+            "The channel of the word. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
+          ),
+          speaker: import_zod3.z.string().nullable().describe(
+            "The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) is enabled, else null"
+          )
+        })
+      ).describe("The words in the utterance."),
+      channel: import_zod3.z.string().nullish().describe(
+        "The channel of this utterance. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
+      ),
+      speaker: import_zod3.z.string().describe(
+        'The speaker of this utterance, where each speaker is assigned a sequential capital letter - e.g. "A" for Speaker A, "B" for Speaker B, etc.'
+      ),
+      translated_texts: import_zod3.z.record(import_zod3.z.string(), import_zod3.z.string()).optional().describe(
+        'Translations keyed by language code (e.g., `{"es": "Texto traducido", "de": "\xDCbersetzter Text"}`). Only present when `match_original_utterance` is enabled with translation.'
+      )
+    })
+  ).nullish().describe(
+    "The original turn-by-turn utterance objects before PII redaction was applied. Same shape as `utterances`. Only returned when `redact_pii_return_unredacted` was set to `true` on the transcription request, otherwise this field is omitted and the `utterances` field remains fully redacted. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
+  ),
   webhook_auth: import_zod3.z.boolean().describe(
     "Whether [webhook](https://www.assemblyai.com/docs/deployment/webhooks-for-pre-recorded-audio) authentication details were provided"
   ),
@@ -14686,6 +14963,22 @@ var deleteTranscriptResponse = import_zod3.z.object({
   ).nullish().describe(
     "An array of temporally-sequential word objects, one for each word in the transcript.\n"
   ),
+  unredacted_words: import_zod3.z.array(
+    import_zod3.z.object({
+      confidence: import_zod3.z.number().describe("The confidence score for the transcript of this word"),
+      start: import_zod3.z.number().describe("The starting time, in milliseconds, for the word"),
+      end: import_zod3.z.number().describe("The ending time, in milliseconds, for the word"),
+      text: import_zod3.z.string().describe("The text of the word"),
+      channel: import_zod3.z.string().nullish().describe(
+        "The channel of the word. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
+      ),
+      speaker: import_zod3.z.string().nullable().describe(
+        "The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) is enabled, else null"
+      )
+    })
+  ).nullish().describe(
+    "The original temporally-sequential word objects before PII redaction was applied. Same shape as `words`. Only returned when `redact_pii_return_unredacted` was set to `true` on the transcription request, otherwise this field is omitted and the `words` field remains fully redacted. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
+  ),
   acoustic_model: import_zod3.z.string().describe("This parameter does not currently have any functionality attached to it."),
   custom_topics: import_zod3.z.boolean().nullish().describe("This parameter does not currently have any functionality attached to it."),
   language_model: import_zod3.z.string().describe("This parameter does not currently have any functionality attached to it."),
@@ -14841,7 +15134,21 @@ var streamingTranscriberParams = import_zod4.z.object({
   inactivityTimeout: import_zod4.z.number().optional().describe("From SDK v3"),
   speakerLabels: import_zod4.z.boolean().optional().describe("From SDK v3"),
   maxSpeakers: import_zod4.z.number().optional().describe("From SDK v3"),
-  llmGateway: import_zod4.z.unknown().optional().describe("From SDK v3")
+  voiceFocus: import_zod4.z.unknown().optional().describe("From SDK v3"),
+  voiceFocusThreshold: import_zod4.z.number().optional().describe("From SDK v3"),
+  continuousPartials: import_zod4.z.boolean().optional().describe("From SDK v3"),
+  interruptionDelay: import_zod4.z.number().optional().describe("From SDK v3"),
+  turnLeftPadMs: import_zod4.z.number().optional().describe("From SDK v3"),
+  customerSupportAudioCapture: import_zod4.z.boolean().optional().describe("From SDK v3"),
+  includePartialTurns: import_zod4.z.boolean().optional().describe("From SDK v3"),
+  redactPii: import_zod4.z.boolean().optional().describe("From SDK v3"),
+  redactPiiPolicies: import_zod4.z.unknown().optional().describe("From SDK v3"),
+  redactPiiSub: import_zod4.z.unknown().optional().describe("From SDK v3"),
+  llmGateway: import_zod4.z.unknown().optional().describe("From SDK v3"),
+  webhookUrl: import_zod4.z.string().optional().describe("From SDK v3"),
+  webhookAuthHeaderName: import_zod4.z.string().optional().describe("From SDK v3"),
+  webhookAuthHeaderValue: import_zod4.z.string().optional().describe("From SDK v3"),
+  mode: import_zod4.z.unknown().describe("From SDK v3")
 });
 var streamingUpdateConfigParams = import_zod4.z.object({
   end_utterance_silence_threshold: import_zod4.z.number().min(0).max(2e4).optional().describe("The duration threshold in milliseconds"),
@@ -14853,7 +15160,9 @@ var streamingUpdateConfigParams = import_zod4.z.object({
   format_turns: import_zod4.z.boolean().optional().describe("From SDK v3"),
   keyterms_prompt: import_zod4.z.array(import_zod4.z.string()).optional().describe("From SDK v3"),
   prompt: import_zod4.z.string().optional().describe("From SDK v3"),
-  filter_profanity: import_zod4.z.boolean().optional().describe("From SDK v3")
+  filter_profanity: import_zod4.z.boolean().optional().describe("From SDK v3"),
+  interruption_delay: import_zod4.z.number().optional().describe("From SDK v3"),
+  turn_left_pad_ms: import_zod4.z.number().optional().describe("From SDK v3")
 });
 // src/generated/gladia/api/gladiaControlAPI.zod.ts
@@ -15602,7 +15911,7 @@ var preRecordedControllerInitPreRecordedJobV2BodyNamedEntityRecognitionDefault =
 var preRecordedControllerInitPreRecordedJobV2BodyCustomSpellingDefault = false;
 var preRecordedControllerInitPreRecordedJobV2BodySentimentAnalysisDefault = false;
 var preRecordedControllerInitPreRecordedJobV2BodyAudioToLlmDefault = false;
-var preRecordedControllerInitPreRecordedJobV2BodyAudioToLlmConfigModelDefault = "openai/gpt-3.5-turbo";
+var preRecordedControllerInitPreRecordedJobV2BodyAudioToLlmConfigModelDefault = "openai/gpt-5.4-nano";
 var preRecordedControllerInitPreRecordedJobV2BodyPiiRedactionDefault = false;
 var preRecordedControllerInitPreRecordedJobV2BodySentencesDefault = false;
 var preRecordedControllerInitPreRecordedJobV2BodyPunctuationEnhancedDefault = false;
@@ -15891,23 +16200,23 @@ var preRecordedControllerInitPreRecordedJobV2Body = import_zod5.z.object({
       "Forces the translation to use informal language forms when available in the target language."
     )
   }).optional().describe("**[Beta]** Translation configuration, if `translation` is enabled"),
-  summarization: import_zod5.z.boolean().optional().describe("**[Beta]** Enable summarization for this audio"),
+  summarization: import_zod5.z.boolean().optional().describe("Enable summarization for this audio"),
   summarization_config: import_zod5.z.object({
     type: import_zod5.z.enum(["general", "bullet_points", "concise"]).describe("The type of summarization to apply").default(preRecordedControllerInitPreRecordedJobV2BodySummarizationConfigTypeDefault).describe("The type of summarization to apply")
-  }).optional().describe("**[Beta]** Summarization configuration, if `summarization` is enabled"),
+  }).optional().describe("Summarization configuration, if `summarization` is enabled"),
   named_entity_recognition: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable named entity recognition for this audio"),
   custom_spelling: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable custom spelling for this audio"),
   custom_spelling_config: import_zod5.z.object({
     spelling_dictionary: import_zod5.z.record(import_zod5.z.string(), import_zod5.z.array(import_zod5.z.string())).describe("The list of spelling applied on the audio transcription")
   }).optional().describe("**[Alpha]** Custom spelling configuration, if `custom_spelling` is enabled"),
   sentiment_analysis: import_zod5.z.boolean().optional().describe("Enable sentiment analysis for this audio"),
-  audio_to_llm: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable audio to llm processing for this audio"),
+  audio_to_llm: import_zod5.z.boolean().optional().describe("Enable audio to LLM processing for this audio"),
   audio_to_llm_config: import_zod5.z.object({
     prompts: import_zod5.z.array(import_zod5.z.array(import_zod5.z.unknown())).min(1).describe("The list of prompts applied on the audio transcription"),
     model: import_zod5.z.string().default(preRecordedControllerInitPreRecordedJobV2BodyAudioToLlmConfigModelDefault).describe(
       "The model to use for the prompt execution. You can find the list of supported models [here](https://openrouter.ai/models)."
     )
-  }).optional().describe("**[Alpha]** Audio to llm configuration, if `audio_to_llm` is enabled"),
+  }).optional().describe("Audio to LLM configuration, if `audio_to_llm` is enabled"),
   pii_redaction: import_zod5.z.boolean().optional().describe("Enable PII redaction for this audio"),
   pii_redaction_config: import_zod5.z.object({
     entity_types: import_zod5.z.enum([
@@ -16162,7 +16471,7 @@ var preRecordedControllerGetPreRecordedJobsV2ResponseItemsItemRequestParamsNamed
 var preRecordedControllerGetPreRecordedJobsV2ResponseItemsItemRequestParamsCustomSpellingDefault = false;
 var preRecordedControllerGetPreRecordedJobsV2ResponseItemsItemRequestParamsSentimentAnalysisDefault = false;
 var preRecordedControllerGetPreRecordedJobsV2ResponseItemsItemRequestParamsAudioToLlmDefault = false;
-var preRecordedControllerGetPreRecordedJobsV2ResponseItemsItemRequestParamsAudioToLlmConfigModelDefault = "openai/gpt-3.5-turbo";
+var preRecordedControllerGetPreRecordedJobsV2ResponseItemsItemRequestParamsAudioToLlmConfigModelDefault = "openai/gpt-5.4-nano";
 var preRecordedControllerGetPreRecordedJobsV2ResponseItemsItemRequestParamsPiiRedactionDefault = false;
 var preRecordedControllerGetPreRecordedJobsV2ResponseItemsItemRequestParamsSentencesDefault = false;
 var preRecordedControllerGetPreRecordedJobsV2ResponseItemsItemRequestParamsPunctuationEnhancedDefault = false;
@@ -16510,12 +16819,12 @@ var preRecordedControllerGetPreRecordedJobsV2Response = import_zod5.z.object({
             "Forces the translation to use informal language forms when available in the target language."
           )
         }).optional().describe("**[Beta]** Translation configuration, if `translation` is enabled"),
-        summarization: import_zod5.z.boolean().optional().describe("**[Beta]** Enable summarization for this audio"),
+        summarization: import_zod5.z.boolean().optional().describe("Enable summarization for this audio"),
         summarization_config: import_zod5.z.object({
           type: import_zod5.z.enum(["general", "bullet_points", "concise"]).describe("The type of summarization to apply").default(
             preRecordedControllerGetPreRecordedJobsV2ResponseItemsItemRequestParamsSummarizationConfigTypeDefault
           ).describe("The type of summarization to apply")
-        }).optional().describe("**[Beta]** Summarization configuration, if `summarization` is enabled"),
+        }).optional().describe("Summarization configuration, if `summarization` is enabled"),
         named_entity_recognition: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable named entity recognition for this audio"),
         custom_spelling: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable custom spelling for this audio"),
         custom_spelling_config: import_zod5.z.object({
@@ -16524,7 +16833,7 @@ var preRecordedControllerGetPreRecordedJobsV2Response = import_zod5.z.object({
           "**[Alpha]** Custom spelling configuration, if `custom_spelling` is enabled"
         ),
         sentiment_analysis: import_zod5.z.boolean().optional().describe("Enable sentiment analysis for this audio"),
-        audio_to_llm: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable audio to llm processing for this audio"),
+        audio_to_llm: import_zod5.z.boolean().optional().describe("Enable audio to LLM processing for this audio"),
         audio_to_llm_config: import_zod5.z.object({
           prompts: import_zod5.z.array(import_zod5.z.array(import_zod5.z.unknown())).min(1).describe("The list of prompts applied on the audio transcription"),
           model: import_zod5.z.string().default(
@@ -16532,7 +16841,7 @@ var preRecordedControllerGetPreRecordedJobsV2Response = import_zod5.z.object({
           ).describe(
             "The model to use for the prompt execution. You can find the list of supported models [here](https://openrouter.ai/models)."
           )
-        }).optional().describe("**[Alpha]** Audio to llm configuration, if `audio_to_llm` is enabled"),
+        }).optional().describe("Audio to LLM configuration, if `audio_to_llm` is enabled"),
         pii_redaction: import_zod5.z.boolean().optional().describe("Enable PII redaction for this audio"),
         pii_redaction_config: import_zod5.z.object({
           entity_types: import_zod5.z.enum([
@@ -17669,7 +17978,7 @@ var preRecordedControllerGetPreRecordedJobV2ResponseRequestParamsNamedEntityReco
 var preRecordedControllerGetPreRecordedJobV2ResponseRequestParamsCustomSpellingDefault = false;
 var preRecordedControllerGetPreRecordedJobV2ResponseRequestParamsSentimentAnalysisDefault = false;
 var preRecordedControllerGetPreRecordedJobV2ResponseRequestParamsAudioToLlmDefault = false;
-var preRecordedControllerGetPreRecordedJobV2ResponseRequestParamsAudioToLlmConfigModelDefault = "openai/gpt-3.5-turbo";
+var preRecordedControllerGetPreRecordedJobV2ResponseRequestParamsAudioToLlmConfigModelDefault = "openai/gpt-5.4-nano";
 var preRecordedControllerGetPreRecordedJobV2ResponseRequestParamsPiiRedactionDefault = false;
 var preRecordedControllerGetPreRecordedJobV2ResponseRequestParamsSentencesDefault = false;
 var preRecordedControllerGetPreRecordedJobV2ResponseRequestParamsPunctuationEnhancedDefault = false;
@@ -18010,19 +18319,19 @@ var preRecordedControllerGetPreRecordedJobV2Response = import_zod5.z.object({
         "Forces the translation to use informal language forms when available in the target language."
       )
     }).optional().describe("**[Beta]** Translation configuration, if `translation` is enabled"),
-    summarization: import_zod5.z.boolean().optional().describe("**[Beta]** Enable summarization for this audio"),
+    summarization: import_zod5.z.boolean().optional().describe("Enable summarization for this audio"),
     summarization_config: import_zod5.z.object({
       type: import_zod5.z.enum(["general", "bullet_points", "concise"]).describe("The type of summarization to apply").default(
         preRecordedControllerGetPreRecordedJobV2ResponseRequestParamsSummarizationConfigTypeDefault
       ).describe("The type of summarization to apply")
-    }).optional().describe("**[Beta]** Summarization configuration, if `summarization` is enabled"),
+    }).optional().describe("Summarization configuration, if `summarization` is enabled"),
     named_entity_recognition: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable named entity recognition for this audio"),
     custom_spelling: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable custom spelling for this audio"),
     custom_spelling_config: import_zod5.z.object({
       spelling_dictionary: import_zod5.z.record(import_zod5.z.string(), import_zod5.z.array(import_zod5.z.string())).describe("The list of spelling applied on the audio transcription")
     }).optional().describe("**[Alpha]** Custom spelling configuration, if `custom_spelling` is enabled"),
     sentiment_analysis: import_zod5.z.boolean().optional().describe("Enable sentiment analysis for this audio"),
-    audio_to_llm: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable audio to llm processing for this audio"),
+    audio_to_llm: import_zod5.z.boolean().optional().describe("Enable audio to LLM processing for this audio"),
     audio_to_llm_config: import_zod5.z.object({
       prompts: import_zod5.z.array(import_zod5.z.array(import_zod5.z.unknown())).min(1).describe("The list of prompts applied on the audio transcription"),
       model: import_zod5.z.string().default(
@@ -18030,7 +18339,7 @@ var preRecordedControllerGetPreRecordedJobV2Response = import_zod5.z.object({
       ).describe(
         "The model to use for the prompt execution. You can find the list of supported models [here](https://openrouter.ai/models)."
       )
-    }).optional().describe("**[Alpha]** Audio to llm configuration, if `audio_to_llm` is enabled"),
+    }).optional().describe("Audio to LLM configuration, if `audio_to_llm` is enabled"),
     pii_redaction: import_zod5.z.boolean().optional().describe("Enable PII redaction for this audio"),
     pii_redaction_config: import_zod5.z.object({
       entity_types: import_zod5.z.enum([
@@ -19143,7 +19452,7 @@ var transcriptionControllerInitPreRecordedJobV2BodyNamedEntityRecognitionDefault
 var transcriptionControllerInitPreRecordedJobV2BodyCustomSpellingDefault = false;
 var transcriptionControllerInitPreRecordedJobV2BodySentimentAnalysisDefault = false;
 var transcriptionControllerInitPreRecordedJobV2BodyAudioToLlmDefault = false;
-var transcriptionControllerInitPreRecordedJobV2BodyAudioToLlmConfigModelDefault = "openai/gpt-3.5-turbo";
+var transcriptionControllerInitPreRecordedJobV2BodyAudioToLlmConfigModelDefault = "openai/gpt-5.4-nano";
 var transcriptionControllerInitPreRecordedJobV2BodyPiiRedactionDefault = false;
 var transcriptionControllerInitPreRecordedJobV2BodySentencesDefault = false;
 var transcriptionControllerInitPreRecordedJobV2BodyPunctuationEnhancedDefault = false;
@@ -19436,23 +19745,23 @@ var transcriptionControllerInitPreRecordedJobV2Body = import_zod5.z.object({
       "Forces the translation to use informal language forms when available in the target language."
     )
   }).optional().describe("**[Beta]** Translation configuration, if `translation` is enabled"),
-  summarization: import_zod5.z.boolean().optional().describe("**[Beta]** Enable summarization for this audio"),
+  summarization: import_zod5.z.boolean().optional().describe("Enable summarization for this audio"),
   summarization_config: import_zod5.z.object({
     type: import_zod5.z.enum(["general", "bullet_points", "concise"]).describe("The type of summarization to apply").default(transcriptionControllerInitPreRecordedJobV2BodySummarizationConfigTypeDefault).describe("The type of summarization to apply")
-  }).optional().describe("**[Beta]** Summarization configuration, if `summarization` is enabled"),
+  }).optional().describe("Summarization configuration, if `summarization` is enabled"),
   named_entity_recognition: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable named entity recognition for this audio"),
   custom_spelling: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable custom spelling for this audio"),
   custom_spelling_config: import_zod5.z.object({
     spelling_dictionary: import_zod5.z.record(import_zod5.z.string(), import_zod5.z.array(import_zod5.z.string())).describe("The list of spelling applied on the audio transcription")
   }).optional().describe("**[Alpha]** Custom spelling configuration, if `custom_spelling` is enabled"),
   sentiment_analysis: import_zod5.z.boolean().optional().describe("Enable sentiment analysis for this audio"),
-  audio_to_llm: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable audio to llm processing for this audio"),
+  audio_to_llm: import_zod5.z.boolean().optional().describe("Enable audio to LLM processing for this audio"),
   audio_to_llm_config: import_zod5.z.object({
     prompts: import_zod5.z.array(import_zod5.z.array(import_zod5.z.unknown())).min(1).describe("The list of prompts applied on the audio transcription"),
     model: import_zod5.z.string().default(transcriptionControllerInitPreRecordedJobV2BodyAudioToLlmConfigModelDefault).describe(
       "The model to use for the prompt execution. You can find the list of supported models [here](https://openrouter.ai/models)."
     )
-  }).optional().describe("**[Alpha]** Audio to llm configuration, if `audio_to_llm` is enabled"),
+  }).optional().describe("Audio to LLM configuration, if `audio_to_llm` is enabled"),
   pii_redaction: import_zod5.z.boolean().optional().describe("Enable PII redaction for this audio"),
   pii_redaction_config: import_zod5.z.object({
     entity_types: import_zod5.z.enum([
@@ -19710,7 +20019,7 @@ var transcriptionControllerListV2ResponseItemsItemRequestParamsNamedEntityRecogn
 var transcriptionControllerListV2ResponseItemsItemRequestParamsCustomSpellingDefault = false;
 var transcriptionControllerListV2ResponseItemsItemRequestParamsSentimentAnalysisDefault = false;
 var transcriptionControllerListV2ResponseItemsItemRequestParamsAudioToLlmDefault = false;
-var transcriptionControllerListV2ResponseItemsItemRequestParamsAudioToLlmConfigModelDefault = "openai/gpt-3.5-turbo";
+var transcriptionControllerListV2ResponseItemsItemRequestParamsAudioToLlmConfigModelDefault = "openai/gpt-5.4-nano";
 var transcriptionControllerListV2ResponseItemsItemRequestParamsPiiRedactionDefault = false;
 var transcriptionControllerListV2ResponseItemsItemRequestParamsSentencesDefault = false;
 var transcriptionControllerListV2ResponseItemsItemRequestParamsPunctuationEnhancedDefault = false;
@@ -20121,12 +20430,12 @@ var transcriptionControllerListV2Response = import_zod5.z.object({
             "Forces the translation to use informal language forms when available in the target language."
           )
         }).optional().describe("**[Beta]** Translation configuration, if `translation` is enabled"),
-        summarization: import_zod5.z.boolean().optional().describe("**[Beta]** Enable summarization for this audio"),
+        summarization: import_zod5.z.boolean().optional().describe("Enable summarization for this audio"),
         summarization_config: import_zod5.z.object({
           type: import_zod5.z.enum(["general", "bullet_points", "concise"]).describe("The type of summarization to apply").default(
             transcriptionControllerListV2ResponseItemsItemRequestParamsSummarizationConfigTypeDefault
           ).describe("The type of summarization to apply")
-        }).optional().describe("**[Beta]** Summarization configuration, if `summarization` is enabled"),
+        }).optional().describe("Summarization configuration, if `summarization` is enabled"),
         named_entity_recognition: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable named entity recognition for this audio"),
         custom_spelling: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable custom spelling for this audio"),
         custom_spelling_config: import_zod5.z.object({
@@ -20135,7 +20444,7 @@ var transcriptionControllerListV2Response = import_zod5.z.object({
           "**[Alpha]** Custom spelling configuration, if `custom_spelling` is enabled"
         ),
         sentiment_analysis: import_zod5.z.boolean().optional().describe("Enable sentiment analysis for this audio"),
-        audio_to_llm: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable audio to llm processing for this audio"),
+        audio_to_llm: import_zod5.z.boolean().optional().describe("Enable audio to LLM processing for this audio"),
         audio_to_llm_config: import_zod5.z.object({
           prompts: import_zod5.z.array(import_zod5.z.array(import_zod5.z.unknown())).min(1).describe("The list of prompts applied on the audio transcription"),
           model: import_zod5.z.string().default(
@@ -20143,7 +20452,7 @@ var transcriptionControllerListV2Response = import_zod5.z.object({
           ).describe(
             "The model to use for the prompt execution. You can find the list of supported models [here](https://openrouter.ai/models)."
           )
-        }).optional().describe("**[Alpha]** Audio to llm configuration, if `audio_to_llm` is enabled"),
+        }).optional().describe("Audio to LLM configuration, if `audio_to_llm` is enabled"),
         pii_redaction: import_zod5.z.boolean().optional().describe("Enable PII redaction for this audio"),
         pii_redaction_config: import_zod5.z.object({
           entity_types: import_zod5.z.enum([
@@ -22461,7 +22770,7 @@ var transcriptionControllerGetTranscriptV2ResponseRequestParamsNamedEntityRecogn
 var transcriptionControllerGetTranscriptV2ResponseRequestParamsCustomSpellingDefault = false;
 var transcriptionControllerGetTranscriptV2ResponseRequestParamsSentimentAnalysisDefault = false;
 var transcriptionControllerGetTranscriptV2ResponseRequestParamsAudioToLlmDefault = false;
-var transcriptionControllerGetTranscriptV2ResponseRequestParamsAudioToLlmConfigModelDefault = "openai/gpt-3.5-turbo";
+var transcriptionControllerGetTranscriptV2ResponseRequestParamsAudioToLlmConfigModelDefault = "openai/gpt-5.4-nano";
 var transcriptionControllerGetTranscriptV2ResponseRequestParamsPiiRedactionDefault = false;
 var transcriptionControllerGetTranscriptV2ResponseRequestParamsSentencesDefault = false;
 var transcriptionControllerGetTranscriptV2ResponseRequestParamsPunctuationEnhancedDefault = false;
@@ -22866,19 +23175,19 @@ var transcriptionControllerGetTranscriptV2Response = import_zod5.z.discriminated
           "Forces the translation to use informal language forms when available in the target language."
         )
       }).optional().describe("**[Beta]** Translation configuration, if `translation` is enabled"),
-      summarization: import_zod5.z.boolean().optional().describe("**[Beta]** Enable summarization for this audio"),
+      summarization: import_zod5.z.boolean().optional().describe("Enable summarization for this audio"),
       summarization_config: import_zod5.z.object({
         type: import_zod5.z.enum(["general", "bullet_points", "concise"]).describe("The type of summarization to apply").default(
           transcriptionControllerGetTranscriptV2ResponseRequestParamsSummarizationConfigTypeDefault
         ).describe("The type of summarization to apply")
-      }).optional().describe("**[Beta]** Summarization configuration, if `summarization` is enabled"),
+      }).optional().describe("Summarization configuration, if `summarization` is enabled"),
       named_entity_recognition: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable named entity recognition for this audio"),
       custom_spelling: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable custom spelling for this audio"),
       custom_spelling_config: import_zod5.z.object({
         spelling_dictionary: import_zod5.z.record(import_zod5.z.string(), import_zod5.z.array(import_zod5.z.string())).describe("The list of spelling applied on the audio transcription")
       }).optional().describe("**[Alpha]** Custom spelling configuration, if `custom_spelling` is enabled"),
       sentiment_analysis: import_zod5.z.boolean().optional().describe("Enable sentiment analysis for this audio"),
-      audio_to_llm: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable audio to llm processing for this audio"),
+      audio_to_llm: import_zod5.z.boolean().optional().describe("Enable audio to LLM processing for this audio"),
       audio_to_llm_config: import_zod5.z.object({
         prompts: import_zod5.z.array(import_zod5.z.array(import_zod5.z.unknown())).min(1).describe("The list of prompts applied on the audio transcription"),
         model: import_zod5.z.string().default(
@@ -22886,7 +23195,7 @@ var transcriptionControllerGetTranscriptV2Response = import_zod5.z.discriminated
         ).describe(
           "The model to use for the prompt execution. You can find the list of supported models [here](https://openrouter.ai/models)."
         )
-      }).optional().describe("**[Alpha]** Audio to llm configuration, if `audio_to_llm` is enabled"),
+      }).optional().describe("Audio to LLM configuration, if `audio_to_llm` is enabled"),
       pii_redaction: import_zod5.z.boolean().optional().describe("Enable PII redaction for this audio"),
       pii_redaction_config: import_zod5.z.object({
         entity_types: import_zod5.z.enum([
@@ -25598,7 +25907,7 @@ var historyControllerGetListV1ResponseItemsItemRequestParamsNamedEntityRecogniti
 var historyControllerGetListV1ResponseItemsItemRequestParamsCustomSpellingDefault = false;
 var historyControllerGetListV1ResponseItemsItemRequestParamsSentimentAnalysisDefault = false;
 var historyControllerGetListV1ResponseItemsItemRequestParamsAudioToLlmDefault = false;
-var historyControllerGetListV1ResponseItemsItemRequestParamsAudioToLlmConfigModelDefault = "openai/gpt-3.5-turbo";
+var historyControllerGetListV1ResponseItemsItemRequestParamsAudioToLlmConfigModelDefault = "openai/gpt-5.4-nano";
 var historyControllerGetListV1ResponseItemsItemRequestParamsPiiRedactionDefault = false;
 var historyControllerGetListV1ResponseItemsItemRequestParamsSentencesDefault = false;
 var historyControllerGetListV1ResponseItemsItemRequestParamsPunctuationEnhancedDefault = false;
@@ -26009,12 +26318,12 @@ var historyControllerGetListV1Response = import_zod5.z.object({
             "Forces the translation to use informal language forms when available in the target language."
           )
         }).optional().describe("**[Beta]** Translation configuration, if `translation` is enabled"),
-        summarization: import_zod5.z.boolean().optional().describe("**[Beta]** Enable summarization for this audio"),
+        summarization: import_zod5.z.boolean().optional().describe("Enable summarization for this audio"),
         summarization_config: import_zod5.z.object({
           type: import_zod5.z.enum(["general", "bullet_points", "concise"]).describe("The type of summarization to apply").default(
             historyControllerGetListV1ResponseItemsItemRequestParamsSummarizationConfigTypeDefault
           ).describe("The type of summarization to apply")
-        }).optional().describe("**[Beta]** Summarization configuration, if `summarization` is enabled"),
+        }).optional().describe("Summarization configuration, if `summarization` is enabled"),
         named_entity_recognition: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable named entity recognition for this audio"),
         custom_spelling: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable custom spelling for this audio"),
         custom_spelling_config: import_zod5.z.object({
@@ -26023,7 +26332,7 @@ var historyControllerGetListV1Response = import_zod5.z.object({
           "**[Alpha]** Custom spelling configuration, if `custom_spelling` is enabled"
         ),
         sentiment_analysis: import_zod5.z.boolean().optional().describe("Enable sentiment analysis for this audio"),
-        audio_to_llm: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable audio to llm processing for this audio"),
+        audio_to_llm: import_zod5.z.boolean().optional().describe("Enable audio to LLM processing for this audio"),
         audio_to_llm_config: import_zod5.z.object({
           prompts: import_zod5.z.array(import_zod5.z.array(import_zod5.z.unknown())).min(1).describe("The list of prompts applied on the audio transcription"),
           model: import_zod5.z.string().default(
@@ -26031,7 +26340,7 @@ var historyControllerGetListV1Response = import_zod5.z.object({
           ).describe(
             "The model to use for the prompt execution. You can find the list of supported models [here](https://openrouter.ai/models)."
           )
-        }).optional().describe("**[Alpha]** Audio to llm configuration, if `audio_to_llm` is enabled"),
+        }).optional().describe("Audio to LLM configuration, if `audio_to_llm` is enabled"),
         pii_redaction: import_zod5.z.boolean().optional().describe("Enable PII redaction for this audio"),
         pii_redaction_config: import_zod5.z.object({
           entity_types: import_zod5.z.enum([
@@ -31276,6 +31585,7 @@ __export(openAIAudioRealtimeAPI_zod_exports, {
   createRealtimeClientSecretBodySessionPromptVariablesTypeDefault: () => createRealtimeClientSecretBodySessionPromptVariablesTypeDefault,
   createRealtimeClientSecretBodySessionPromptVariablesTypeDefaultOne: () => createRealtimeClientSecretBodySessionPromptVariablesTypeDefaultOne,
   createRealtimeClientSecretBodySessionPromptVariablesTypeDefaultTwo: () => createRealtimeClientSecretBodySessionPromptVariablesTypeDefaultTwo,
+  createRealtimeClientSecretBodySessionReasoningEffortDefault: () => createRealtimeClientSecretBodySessionReasoningEffortDefault,
   createRealtimeClientSecretBodySessionToolChoiceDefault: () => createRealtimeClientSecretBodySessionToolChoiceDefault,
   createRealtimeClientSecretBodySessionToolsItemRequireApprovalDefaultOne: () => createRealtimeClientSecretBodySessionToolsItemRequireApprovalDefaultOne,
   createRealtimeClientSecretBodySessionTracingDefault: () => createRealtimeClientSecretBodySessionTracingDefault,
@@ -31300,6 +31610,7 @@ __export(openAIAudioRealtimeAPI_zod_exports, {
   createRealtimeClientSecretResponseSessionPromptVariablesTypeDefault: () => createRealtimeClientSecretResponseSessionPromptVariablesTypeDefault,
   createRealtimeClientSecretResponseSessionPromptVariablesTypeDefaultOne: () => createRealtimeClientSecretResponseSessionPromptVariablesTypeDefaultOne,
   createRealtimeClientSecretResponseSessionPromptVariablesTypeDefaultTwo: () => createRealtimeClientSecretResponseSessionPromptVariablesTypeDefaultTwo,
+  createRealtimeClientSecretResponseSessionReasoningEffortDefault: () => createRealtimeClientSecretResponseSessionReasoningEffortDefault,
   createRealtimeClientSecretResponseSessionToolChoiceDefault: () => createRealtimeClientSecretResponseSessionToolChoiceDefault,
   createRealtimeClientSecretResponseSessionToolsItemRequireApprovalDefaultOne: () => createRealtimeClientSecretResponseSessionToolsItemRequireApprovalDefaultOne,
   createRealtimeClientSecretResponseSessionTracingDefaultOne: () => createRealtimeClientSecretResponseSessionTracingDefaultOne,
@@ -31656,6 +31967,7 @@ var createRealtimeClientSecretBodySessionTracingDefaultOne = "auto";
 var createRealtimeClientSecretBodySessionTracingDefault = null;
 var createRealtimeClientSecretBodySessionToolsItemRequireApprovalDefaultOne = "always";
 var createRealtimeClientSecretBodySessionToolChoiceDefault = "auto";
+var createRealtimeClientSecretBodySessionReasoningEffortDefault = "low";
 var createRealtimeClientSecretBodySessionTruncationRetentionRatioMin = 0;
 var createRealtimeClientSecretBodySessionTruncationRetentionRatioMax = 1;
 var createRealtimeClientSecretBodySessionTruncationTokenLimitsPostInstructionsMin = 0;
@@ -31691,6 +32003,7 @@ var createRealtimeClientSecretBody = import_zod6.z.object({
       import_zod6.z.enum([
         "gpt-realtime",
         "gpt-realtime-1.5",
+        "gpt-realtime-2",
         "gpt-realtime-2025-08-28",
         "gpt-4o-realtime-preview",
         "gpt-4o-realtime-preview-2024-10-01",
@@ -31731,16 +32044,20 @@ var createRealtimeClientSecretBody = import_zod6.z.object({
               "gpt-4o-mini-transcribe",
               "gpt-4o-mini-transcribe-2025-12-15",
               "gpt-4o-transcribe",
-              "gpt-4o-transcribe-diarize"
+              "gpt-4o-transcribe-diarize",
+              "gpt-realtime-whisper"
             ])
           ).optional().describe(
-            "The model to use for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`, and `gpt-4o-transcribe-diarize`. Use `gpt-4o-transcribe-diarize` when you need diarization with speaker labels.\n"
+            "The model to use for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`, `gpt-4o-transcribe-diarize`, and `gpt-realtime-whisper`. Use `gpt-4o-transcribe-diarize` when you need diarization with speaker labels.\n"
           ),
           language: import_zod6.z.string().optional().describe(
             "The language of the input audio. Supplying the input language in\n[ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`) format\nwill improve accuracy and latency.\n"
           ),
           prompt: import_zod6.z.string().optional().describe(
-            'An optional text to guide the model\'s style or continue a previous audio\nsegment.\nFor `whisper-1`, the [prompt is a list of keywords](/docs/guides/speech-to-text#prompting).\nFor `gpt-4o-transcribe` models (excluding `gpt-4o-transcribe-diarize`), the prompt is a free text string, for example "expect words related to technology".\n'
+            'An optional text to guide the model\'s style or continue a previous audio\nsegment.\nFor `whisper-1`, the [prompt is a list of keywords](/docs/guides/speech-to-text#prompting).\nFor `gpt-4o-transcribe` models (excluding `gpt-4o-transcribe-diarize`), the prompt is a free text string, for example "expect words related to technology".\nPrompt is not supported with `gpt-realtime-whisper` in GA Realtime sessions.\n'
+          ),
+          delay: import_zod6.z.enum(["minimal", "low", "medium", "high", "xhigh"]).optional().describe(
+            "Controls how long the model waits before emitting transcription text.\nHigher values can improve transcription accuracy at the cost of latency.\nOnly supported with `gpt-realtime-whisper` in GA Realtime sessions.\n"
           )
         }).optional(),
         noise_reduction: import_zod6.z.object({
@@ -31807,7 +32124,7 @@ var createRealtimeClientSecretBody = import_zod6.z.object({
             "Server-side semantic turn detection which uses a model to determine when the user has finished speaking."
           )
         ]).describe(
-          'Configuration for turn detection, ether Server VAD or Semantic VAD. This can be set to `null` to turn off, in which case the client must manually trigger model response.\n\nServer VAD means that the model will detect the start and end of speech based on audio volume and respond at the end of user speech.\n\nSemantic VAD is more advanced and uses a turn detection model (in conjunction with VAD) to semantically estimate whether the user has finished speaking, then dynamically sets a timeout based on this probability. For example, if user audio trails off with "uhhm", the model will score a low probability of turn end and wait longer for the user to continue speaking. This can be useful for more natural conversations, but may have a higher latency.\n'
+          'Configuration for turn detection, ether Server VAD or Semantic VAD. This can be set to `null` to turn off, in which case the client must manually trigger model response.\n\nServer VAD means that the model will detect the start and end of speech based on audio volume and respond at the end of user speech.\n\nSemantic VAD is more advanced and uses a turn detection model (in conjunction with VAD) to semantically estimate whether the user has finished speaking, then dynamically sets a timeout based on this probability. For example, if user audio trails off with "uhhm", the model will score a low probability of turn end and wait longer for the user to continue speaking. This can be useful for more natural conversations, but may have a higher latency.\n\nFor `gpt-realtime-whisper` transcription sessions, turn detection must be\nset to `null`; VAD is not supported.\n'
         ).or(import_zod6.z.null()).optional()
       }).optional(),
       output: import_zod6.z.object({
@@ -31880,7 +32197,7 @@ var createRealtimeClientSecretBody = import_zod6.z.object({
           server_label: import_zod6.z.string().describe(
             "A label for this MCP server, used to identify it in tool calls.\n"
           ),
-          server_url: import_zod6.z.string().optional().describe(
+          server_url: import_zod6.z.string().url().optional().describe(
             "The URL for the MCP server. One of `server_url` or `connector_id` must be\nprovided.\n"
           ),
           connector_id: import_zod6.z.enum([
@@ -31958,6 +32275,16 @@ var createRealtimeClientSecretBody = import_zod6.z.object({
     ).default(createRealtimeClientSecretBodySessionToolChoiceDefault).describe(
       "How the model chooses tools. Provide one of the string modes or force a specific\nfunction/MCP tool.\n"
     ),
+    parallel_tool_calls: import_zod6.z.boolean().optional().describe(
+      "Whether the model may call multiple tools in parallel. Only supported by\nreasoning Realtime models such as `gpt-realtime-2`.\n"
+    ),
+    reasoning: import_zod6.z.object({
+      effort: import_zod6.z.enum(["minimal", "low", "medium", "high", "xhigh"]).default(createRealtimeClientSecretBodySessionReasoningEffortDefault).describe(
+        "Constrains effort on reasoning for reasoning-capable Realtime models such as\n`gpt-realtime-2`.\n"
+      )
+    }).optional().describe(
+      "Configuration for reasoning-capable Realtime models such as `gpt-realtime-2`.\n"
+    ),
     max_output_tokens: import_zod6.z.number().or(import_zod6.z.enum(["inf"])).optional().describe(
       "Maximum number of output tokens for a single assistant response,\ninclusive of tool calls. Provide an integer between 1 and 4096 to\nlimit output tokens, or `inf` for the maximum available tokens for a\ngiven model. Defaults to `inf`.\n"
     ),
@@ -31997,7 +32324,7 @@ var createRealtimeClientSecretBody = import_zod6.z.object({
         ).or(
           import_zod6.z.object({
             type: import_zod6.z.enum(["input_image"]).describe("The type of the input item. Always `input_image`."),
-            image_url: import_zod6.z.string().describe(
+            image_url: import_zod6.z.string().url().describe(
               "The URL of the image to be sent to the model. A fully qualified URL or base64 encoded image in a data URL."
             ).or(import_zod6.z.null()).optional(),
             file_id: import_zod6.z.string().describe("The ID of the file to be sent to the model.").or(import_zod6.z.null()).optional(),
@@ -32011,7 +32338,7 @@ var createRealtimeClientSecretBody = import_zod6.z.object({
             file_id: import_zod6.z.string().describe("The ID of the file to be sent to the model.").or(import_zod6.z.null()).optional(),
             filename: import_zod6.z.string().optional().describe("The name of the file to be sent to the model."),
             file_data: import_zod6.z.string().optional().describe("The content of the file to be sent to the model.\n"),
-            file_url: import_zod6.z.string().optional().describe("The URL of the file to be sent to the model."),
+            file_url: import_zod6.z.string().url().optional().describe("The URL of the file to be sent to the model."),
             detail: import_zod6.z.enum(["low", "high"]).optional()
           }).describe("A file input to the model.")
         )
@@ -32047,16 +32374,20 @@ var createRealtimeClientSecretBody = import_zod6.z.object({
                 "gpt-4o-mini-transcribe",
                 "gpt-4o-mini-transcribe-2025-12-15",
                 "gpt-4o-transcribe",
-                "gpt-4o-transcribe-diarize"
+                "gpt-4o-transcribe-diarize",
+                "gpt-realtime-whisper"
               ])
             ).optional().describe(
-              "The model to use for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`, and `gpt-4o-transcribe-diarize`. Use `gpt-4o-transcribe-diarize` when you need diarization with speaker labels.\n"
+              "The model to use for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`, `gpt-4o-transcribe-diarize`, and `gpt-realtime-whisper`. Use `gpt-4o-transcribe-diarize` when you need diarization with speaker labels.\n"
             ),
             language: import_zod6.z.string().optional().describe(
               "The language of the input audio. Supplying the input language in\n[ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`) format\nwill improve accuracy and latency.\n"
             ),
             prompt: import_zod6.z.string().optional().describe(
-              'An optional text to guide the model\'s style or continue a previous audio\nsegment.\nFor `whisper-1`, the [prompt is a list of keywords](/docs/guides/speech-to-text#prompting).\nFor `gpt-4o-transcribe` models (excluding `gpt-4o-transcribe-diarize`), the prompt is a free text string, for example "expect words related to technology".\n'
+              'An optional text to guide the model\'s style or continue a previous audio\nsegment.\nFor `whisper-1`, the [prompt is a list of keywords](/docs/guides/speech-to-text#prompting).\nFor `gpt-4o-transcribe` models (excluding `gpt-4o-transcribe-diarize`), the prompt is a free text string, for example "expect words related to technology".\nPrompt is not supported with `gpt-realtime-whisper` in GA Realtime sessions.\n'
+            ),
+            delay: import_zod6.z.enum(["minimal", "low", "medium", "high", "xhigh"]).optional().describe(
+              "Controls how long the model waits before emitting transcription text.\nHigher values can improve transcription accuracy at the cost of latency.\nOnly supported with `gpt-realtime-whisper` in GA Realtime sessions.\n"
             )
           }).optional(),
           noise_reduction: import_zod6.z.object({
@@ -32123,7 +32454,7 @@ var createRealtimeClientSecretBody = import_zod6.z.object({
               "Server-side semantic turn detection which uses a model to determine when the user has finished speaking."
             )
           ]).describe(
-            'Configuration for turn detection, ether Server VAD or Semantic VAD. This can be set to `null` to turn off, in which case the client must manually trigger model response.\n\nServer VAD means that the model will detect the start and end of speech based on audio volume and respond at the end of user speech.\n\nSemantic VAD is more advanced and uses a turn detection model (in conjunction with VAD) to semantically estimate whether the user has finished speaking, then dynamically sets a timeout based on this probability. For example, if user audio trails off with "uhhm", the model will score a low probability of turn end and wait longer for the user to continue speaking. This can be useful for more natural conversations, but may have a higher latency.\n'
+            'Configuration for turn detection, ether Server VAD or Semantic VAD. This can be set to `null` to turn off, in which case the client must manually trigger model response.\n\nServer VAD means that the model will detect the start and end of speech based on audio volume and respond at the end of user speech.\n\nSemantic VAD is more advanced and uses a turn detection model (in conjunction with VAD) to semantically estimate whether the user has finished speaking, then dynamically sets a timeout based on this probability. For example, if user audio trails off with "uhhm", the model will score a low probability of turn end and wait longer for the user to continue speaking. This can be useful for more natural conversations, but may have a higher latency.\n\nFor `gpt-realtime-whisper` transcription sessions, turn detection must be\nset to `null`; VAD is not supported.\n'
           ).or(import_zod6.z.null()).optional()
         }).optional()
       }).optional().describe("Configuration for input and output audio.\n"),
@@ -32154,6 +32485,7 @@ var createRealtimeClientSecretResponseSessionTracingDefaultTwo = "auto";
 var createRealtimeClientSecretResponseSessionTracingDefaultOne = null;
 var createRealtimeClientSecretResponseSessionToolsItemRequireApprovalDefaultOne = "always";
 var createRealtimeClientSecretResponseSessionToolChoiceDefault = "auto";
+var createRealtimeClientSecretResponseSessionReasoningEffortDefault = "low";
 var createRealtimeClientSecretResponseSessionTruncationRetentionRatioMin = 0;
 var createRealtimeClientSecretResponseSessionTruncationRetentionRatioMax = 1;
 var createRealtimeClientSecretResponseSessionTruncationTokenLimitsPostInstructionsMin = 0;
@@ -32163,17 +32495,14 @@ var createRealtimeClientSecretResponseSessionPromptVariablesTypeDefaultTwo = "in
 var createRealtimeClientSecretResponse = import_zod6.z.object({
   value: import_zod6.z.string().describe("The generated client secret value."),
   expires_at: import_zod6.z.number().describe("Expiration timestamp for the client secret, in seconds since epoch."),
-  session: import_zod6.z.discriminatedUnion("type", [
+  session: import_zod6.z.union([
     import_zod6.z.object({
-      client_secret: import_zod6.z.object({
-        value: import_zod6.z.string().describe(
-          "Ephemeral key usable in client environments to authenticate connections to the Realtime API. Use this in client-side environments rather than a standard API token, which should only be used server-side.\n"
-        ),
-        expires_at: import_zod6.z.number().describe(
-          "Timestamp for when the token expires. Currently, all tokens expire\nafter one minute.\n"
-        )
-      }).describe("Ephemeral key returned by the API."),
       type: import_zod6.z.enum(["realtime"]).describe("The type of session to create. Always `realtime` for the Realtime API.\n"),
+      id: import_zod6.z.string().describe(
+        "Unique identifier for the session that looks like `sess_1234567890abcdef`.\n"
+      ),
+      object: import_zod6.z.enum(["realtime.session"]).describe("The object type. Always `realtime.session`."),
+      expires_at: import_zod6.z.number().optional().describe("Expiration timestamp for the session, in seconds since epoch."),
       output_modalities: import_zod6.z.array(import_zod6.z.enum(["text", "audio"])).default(createRealtimeClientSecretResponseSessionOutputModalitiesDefault).describe(
         'The set of modalities the model can respond with. It defaults to `["audio"]`, indicating\nthat the model will respond with audio plus a transcript. `["text"]` can be used to make\nthe model respond with text only. It is not possible to request both `text` and `audio` at the same time.\n'
       ),
@@ -32181,6 +32510,7 @@ var createRealtimeClientSecretResponse = import_zod6.z.object({
         import_zod6.z.enum([
           "gpt-realtime",
           "gpt-realtime-1.5",
+          "gpt-realtime-2",
           "gpt-realtime-2025-08-28",
           "gpt-4o-realtime-preview",
           "gpt-4o-realtime-preview-2024-10-01",
@@ -32203,15 +32533,15 @@ var createRealtimeClientSecretResponse = import_zod6.z.object({
       audio: import_zod6.z.object({
         input: import_zod6.z.object({
           format: import_zod6.z.object({
-            type: import_zod6.z.enum(["audio/pcm"]).describe("The audio format. Always `audio/pcm`."),
-            rate: import_zod6.z.literal(24e3).describe("The sample rate of the audio. Always `24000`.")
+            type: import_zod6.z.enum(["audio/pcm"]).optional().describe("The audio format. Always `audio/pcm`."),
+            rate: import_zod6.z.literal(24e3).optional().describe("The sample rate of the audio. Always `24000`.")
           }).describe("The PCM audio format. Only a 24kHz sample rate is supported.").or(
             import_zod6.z.object({
-              type: import_zod6.z.enum(["audio/pcmu"]).describe("The audio format. Always `audio/pcmu`.")
+              type: import_zod6.z.enum(["audio/pcmu"]).optional().describe("The audio format. Always `audio/pcmu`.")
             }).describe("The G.711 \u03BC-law format.")
           ).or(
             import_zod6.z.object({
-              type: import_zod6.z.enum(["audio/pcma"]).describe("The audio format. Always `audio/pcma`.")
+              type: import_zod6.z.enum(["audio/pcma"]).optional().describe("The audio format. Always `audio/pcma`.")
             }).describe("The G.711 A-law format.")
           ).optional(),
           transcription: import_zod6.z.object({
@@ -32221,20 +32551,19 @@ var createRealtimeClientSecretResponse = import_zod6.z.object({
                 "gpt-4o-mini-transcribe",
                 "gpt-4o-mini-transcribe-2025-12-15",
                 "gpt-4o-transcribe",
-                "gpt-4o-transcribe-diarize"
+                "gpt-4o-transcribe-diarize",
+                "gpt-realtime-whisper"
               ])
             ).optional().describe(
-              "The model to use for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`, and `gpt-4o-transcribe-diarize`. Use `gpt-4o-transcribe-diarize` when you need diarization with speaker labels.\n"
-            ),
-            language: import_zod6.z.string().optional().describe(
-              "The language of the input audio. Supplying the input language in\n[ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`) format\nwill improve accuracy and latency.\n"
+              "The model used for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`, `gpt-4o-transcribe-diarize`, and `gpt-realtime-whisper`.\n"
             ),
+            language: import_zod6.z.string().optional().describe("The language of the input audio.\n"),
             prompt: import_zod6.z.string().optional().describe(
-              'An optional text to guide the model\'s style or continue a previous audio\nsegment.\nFor `whisper-1`, the [prompt is a list of keywords](/docs/guides/speech-to-text#prompting).\nFor `gpt-4o-transcribe` models (excluding `gpt-4o-transcribe-diarize`), the prompt is a free text string, for example "expect words related to technology".\n'
+              "The prompt configured for input audio transcription, when present.\n"
             )
           }).optional(),
           noise_reduction: import_zod6.z.object({
-            type: import_zod6.z.enum(["near_field", "far_field"]).describe(
+            type: import_zod6.z.enum(["near_field", "far_field"]).optional().describe(
               "Type of noise reduction. `near_field` is for close-talking microphones such as headphones, `far_field` is for far-field microphones such as laptop or conference room microphones.\n"
             )
           }).optional().describe(
@@ -32297,20 +32626,20 @@ var createRealtimeClientSecretResponse = import_zod6.z.object({
               "Server-side semantic turn detection which uses a model to determine when the user has finished speaking."
             )
           ]).describe(
-            'Configuration for turn detection, ether Server VAD or Semantic VAD. This can be set to `null` to turn off, in which case the client must manually trigger model response.\n\nServer VAD means that the model will detect the start and end of speech based on audio volume and respond at the end of user speech.\n\nSemantic VAD is more advanced and uses a turn detection model (in conjunction with VAD) to semantically estimate whether the user has finished speaking, then dynamically sets a timeout based on this probability. For example, if user audio trails off with "uhhm", the model will score a low probability of turn end and wait longer for the user to continue speaking. This can be useful for more natural conversations, but may have a higher latency.\n'
+            'Configuration for turn detection, ether Server VAD or Semantic VAD. This can be set to `null` to turn off, in which case the client must manually trigger model response.\n\nServer VAD means that the model will detect the start and end of speech based on audio volume and respond at the end of user speech.\n\nSemantic VAD is more advanced and uses a turn detection model (in conjunction with VAD) to semantically estimate whether the user has finished speaking, then dynamically sets a timeout based on this probability. For example, if user audio trails off with "uhhm", the model will score a low probability of turn end and wait longer for the user to continue speaking. This can be useful for more natural conversations, but may have a higher latency.\n\nFor `gpt-realtime-whisper` transcription sessions, turn detection must be\nset to `null`; VAD is not supported.\n'
           ).or(import_zod6.z.null()).optional()
         }).optional(),
         output: import_zod6.z.object({
           format: import_zod6.z.object({
-            type: import_zod6.z.enum(["audio/pcm"]).describe("The audio format. Always `audio/pcm`."),
-            rate: import_zod6.z.literal(24e3).describe("The sample rate of the audio. Always `24000`.")
+            type: import_zod6.z.enum(["audio/pcm"]).optional().describe("The audio format. Always `audio/pcm`."),
+            rate: import_zod6.z.literal(24e3).optional().describe("The sample rate of the audio. Always `24000`.")
           }).describe("The PCM audio format. Only a 24kHz sample rate is supported.").or(
             import_zod6.z.object({
-              type: import_zod6.z.enum(["audio/pcmu"]).describe("The audio format. Always `audio/pcmu`.")
+              type: import_zod6.z.enum(["audio/pcmu"]).optional().describe("The audio format. Always `audio/pcmu`.")
             }).describe("The G.711 \u03BC-law format.")
           ).or(
             import_zod6.z.object({
-              type: import_zod6.z.enum(["audio/pcma"]).describe("The audio format. Always `audio/pcma`.")
+              type: import_zod6.z.enum(["audio/pcma"]).optional().describe("The audio format. Always `audio/pcma`.")
             }).describe("The G.711 A-law format.")
           ).optional(),
           voice: import_zod6.z.string().or(
@@ -32354,7 +32683,7 @@ var createRealtimeClientSecretResponse = import_zod6.z.object({
       ).or(import_zod6.z.null()).optional(),
       tools: import_zod6.z.array(
         import_zod6.z.object({
-          type: import_zod6.z.enum(["function"]).describe("The type of the tool, i.e. `function`."),
+          type: import_zod6.z.enum(["function"]).optional().describe("The type of the tool, i.e. `function`."),
           name: import_zod6.z.string().optional().describe("The name of the function."),
           description: import_zod6.z.string().optional().describe(
             "The description of the function, including guidance on when and how\nto call it, and guidance about what to tell the user when calling\n(if anything).\n"
@@ -32366,7 +32695,7 @@ var createRealtimeClientSecretResponse = import_zod6.z.object({
             server_label: import_zod6.z.string().describe(
               "A label for this MCP server, used to identify it in tool calls.\n"
             ),
-            server_url: import_zod6.z.string().optional().describe(
+            server_url: import_zod6.z.string().url().optional().describe(
               "The URL for the MCP server. One of `server_url` or `connector_id` must be\nprovided.\n"
             ),
             connector_id: import_zod6.z.enum([
@@ -32378,7 +32707,7 @@ var createRealtimeClientSecretResponse = import_zod6.z.object({
               "connector_outlookcalendar",
               "connector_outlookemail",
               "connector_sharepoint"
-            ]).describe(
+            ]).optional().describe(
               "Identifier for service connectors, like those available in ChatGPT. One of\n`server_url` or `connector_id` must be provided. Learn more about service\nconnectors [here](/docs/guides/tools-remote-mcp#connectors).\n\nCurrently supported `connector_id` values are:\n\n- Dropbox: `connector_dropbox`\n- Gmail: `connector_gmail`\n- Google Calendar: `connector_googlecalendar`\n- Google Drive: `connector_googledrive`\n- Microsoft Teams: `connector_microsoftteams`\n- Outlook Calendar: `connector_outlookcalendar`\n- Outlook Email: `connector_outlookemail`\n- SharePoint: `connector_sharepoint`\n"
             ),
             authorization: import_zod6.z.string().optional().describe(
@@ -32444,6 +32773,13 @@ var createRealtimeClientSecretResponse = import_zod6.z.object({
       ).default(createRealtimeClientSecretResponseSessionToolChoiceDefault).describe(
         "How the model chooses tools. Provide one of the string modes or force a specific\nfunction/MCP tool.\n"
       ),
+      reasoning: import_zod6.z.object({
+        effort: import_zod6.z.enum(["minimal", "low", "medium", "high", "xhigh"]).default(createRealtimeClientSecretResponseSessionReasoningEffortDefault).describe(
+          "Constrains effort on reasoning for reasoning-capable Realtime models such as\n`gpt-realtime-2`.\n"
+        )
+      }).optional().describe(
+        "Configuration for reasoning-capable Realtime models such as `gpt-realtime-2`.\n"
+      ),
       max_output_tokens: import_zod6.z.number().or(import_zod6.z.enum(["inf"])).optional().describe(
         "Maximum number of output tokens for a single assistant response,\ninclusive of tool calls. Provide an integer between 1 and 4096 to\nlimit output tokens, or `inf` for the maximum available tokens for a\ngiven model. Defaults to `inf`.\n"
       ),
@@ -32483,7 +32819,7 @@ var createRealtimeClientSecretResponse = import_zod6.z.object({
           ).or(
             import_zod6.z.object({
               type: import_zod6.z.enum(["input_image"]).describe("The type of the input item. Always `input_image`."),
-              image_url: import_zod6.z.string().describe(
+              image_url: import_zod6.z.string().url().describe(
                 "The URL of the image to be sent to the model. A fully qualified URL or base64 encoded image in a data URL."
               ).or(import_zod6.z.null()).optional(),
               file_id: import_zod6.z.string().describe("The ID of the file to be sent to the model.").or(import_zod6.z.null()).optional(),
@@ -32497,8 +32833,8 @@ var createRealtimeClientSecretResponse = import_zod6.z.object({
               file_id: import_zod6.z.string().describe("The ID of the file to be sent to the model.").or(import_zod6.z.null()).optional(),
               filename: import_zod6.z.string().optional().describe("The name of the file to be sent to the model."),
               file_data: import_zod6.z.string().optional().describe("The content of the file to be sent to the model.\n"),
-              file_url: import_zod6.z.string().optional().describe("The URL of the file to be sent to the model."),
-              detail: import_zod6.z.enum(["low", "high"])
+              file_url: import_zod6.z.string().url().optional().describe("The URL of the file to be sent to the model."),
+              detail: import_zod6.z.enum(["low", "high"]).optional()
             }).describe("A file input to the model.")
           )
         ).describe(
@@ -32507,9 +32843,7 @@ var createRealtimeClientSecretResponse = import_zod6.z.object({
       }).describe(
         "Reference to a prompt template and its variables.\n[Learn more](/docs/guides/text?api-mode=responses#reusable-prompts).\n"
       ).or(import_zod6.z.null()).optional()
-    }).describe(
-      "A new Realtime session configuration, with an ephemeral key. Default TTL\nfor keys is one minute.\n"
-    ),
+    }).describe("A Realtime session configuration object.\n"),
     import_zod6.z.object({
       type: import_zod6.z.enum(["transcription"]).describe(
         "The type of session. Always `transcription` for transcription sessions.\n"
@@ -32525,15 +32859,15 @@ var createRealtimeClientSecretResponse = import_zod6.z.object({
       audio: import_zod6.z.object({
         input: import_zod6.z.object({
           format: import_zod6.z.object({
-            type: import_zod6.z.enum(["audio/pcm"]).describe("The audio format. Always `audio/pcm`."),
-            rate: import_zod6.z.literal(24e3).describe("The sample rate of the audio. Always `24000`.")
+            type: import_zod6.z.enum(["audio/pcm"]).optional().describe("The audio format. Always `audio/pcm`."),
+            rate: import_zod6.z.literal(24e3).optional().describe("The sample rate of the audio. Always `24000`.")
           }).describe("The PCM audio format. Only a 24kHz sample rate is supported.").or(
             import_zod6.z.object({
-              type: import_zod6.z.enum(["audio/pcmu"]).describe("The audio format. Always `audio/pcmu`.")
+              type: import_zod6.z.enum(["audio/pcmu"]).optional().describe("The audio format. Always `audio/pcmu`.")
             }).describe("The G.711 \u03BC-law format.")
           ).or(
             import_zod6.z.object({
-              type: import_zod6.z.enum(["audio/pcma"]).describe("The audio format. Always `audio/pcma`.")
+              type: import_zod6.z.enum(["audio/pcma"]).optional().describe("The audio format. Always `audio/pcma`.")
             }).describe("The G.711 A-law format.")
           ).optional(),
           transcription: import_zod6.z.object({
@@ -32543,20 +32877,19 @@ var createRealtimeClientSecretResponse = import_zod6.z.object({
                 "gpt-4o-mini-transcribe",
                 "gpt-4o-mini-transcribe-2025-12-15",
                 "gpt-4o-transcribe",
-                "gpt-4o-transcribe-diarize"
+                "gpt-4o-transcribe-diarize",
+                "gpt-realtime-whisper"
               ])
             ).optional().describe(
-              "The model to use for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`, and `gpt-4o-transcribe-diarize`. Use `gpt-4o-transcribe-diarize` when you need diarization with speaker labels.\n"
-            ),
-            language: import_zod6.z.string().optional().describe(
-              "The language of the input audio. Supplying the input language in\n[ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`) format\nwill improve accuracy and latency.\n"
+              "The model used for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`, `gpt-4o-transcribe-diarize`, and `gpt-realtime-whisper`.\n"
             ),
+            language: import_zod6.z.string().optional().describe("The language of the input audio.\n"),
             prompt: import_zod6.z.string().optional().describe(
-              'An optional text to guide the model\'s style or continue a previous audio\nsegment.\nFor `whisper-1`, the [prompt is a list of keywords](/docs/guides/speech-to-text#prompting).\nFor `gpt-4o-transcribe` models (excluding `gpt-4o-transcribe-diarize`), the prompt is a free text string, for example "expect words related to technology".\n'
+              "The prompt configured for input audio transcription, when present.\n"
             )
           }).optional(),
           noise_reduction: import_zod6.z.object({
-            type: import_zod6.z.enum(["near_field", "far_field"]).describe(
+            type: import_zod6.z.enum(["near_field", "far_field"]).optional().describe(
               "Type of noise reduction. `near_field` is for close-talking microphones such as headphones, `far_field` is for far-field microphones such as laptop or conference room microphones.\n"
             )
           }).optional().describe("Configuration for input audio noise reduction.\n"),
@@ -32573,8 +32906,10 @@ var createRealtimeClientSecretResponse = import_zod6.z.object({
             silence_duration_ms: import_zod6.z.number().optional().describe(
               "Duration of silence to detect speech stop (in milliseconds). Defaults\nto 500ms. With shorter values the model will respond more quickly,\nbut may jump in on short pauses from the user.\n"
             )
-          }).optional().describe(
-            "Configuration for turn detection. Can be set to `null` to turn off. Server\nVAD means that the model will detect the start and end of speech based on\naudio volume and respond at the end of user speech.\n"
+          }).describe(
+            "Configuration for turn detection. Can be set to `null` to turn off. Server\nVAD means that the model will detect the start and end of speech based on\naudio volume and respond at the end of user speech. For `gpt-realtime-whisper`, this must be `null`; VAD is not supported.\n"
+          ).or(import_zod6.z.null()).optional().describe(
+            "Configuration for turn detection. For `gpt-realtime-whisper`, this must be `null`; VAD is not supported.\n"
           )
         }).optional()
       }).optional().describe("Configuration for input audio for the session.\n")
@@ -32714,7 +33049,7 @@ var createRealtimeSessionBody = import_zod6.z.object({
       ).or(
         import_zod6.z.object({
           type: import_zod6.z.enum(["input_image"]).describe("The type of the input item. Always `input_image`."),
-          image_url: import_zod6.z.string().describe(
+          image_url: import_zod6.z.string().url().describe(
             "The URL of the image to be sent to the model. A fully qualified URL or base64 encoded image in a data URL."
           ).or(import_zod6.z.null()).optional(),
           file_id: import_zod6.z.string().describe("The ID of the file to be sent to the model.").or(import_zod6.z.null()).optional(),
@@ -32728,7 +33063,7 @@ var createRealtimeSessionBody = import_zod6.z.object({
           file_id: import_zod6.z.string().describe("The ID of the file to be sent to the model.").or(import_zod6.z.null()).optional(),
           filename: import_zod6.z.string().optional().describe("The name of the file to be sent to the model."),
           file_data: import_zod6.z.string().optional().describe("The content of the file to be sent to the model.\n"),
-          file_url: import_zod6.z.string().optional().describe("The URL of the file to be sent to the model."),
+          file_url: import_zod6.z.string().url().optional().describe("The URL of the file to be sent to the model."),
           detail: import_zod6.z.enum(["low", "high"]).optional()
         }).describe("A file input to the model.")
       )
@@ -32777,17 +33112,14 @@ var createRealtimeSessionResponse = import_zod6.z.object({
             "gpt-4o-mini-transcribe",
             "gpt-4o-mini-transcribe-2025-12-15",
             "gpt-4o-transcribe",
-            "gpt-4o-transcribe-diarize"
+            "gpt-4o-transcribe-diarize",
+            "gpt-realtime-whisper"
           ])
         ).optional().describe(
-          "The model to use for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`, and `gpt-4o-transcribe-diarize`. Use `gpt-4o-transcribe-diarize` when you need diarization with speaker labels.\n"
+          "The model used for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`, `gpt-4o-transcribe-diarize`, and `gpt-realtime-whisper`.\n"
         ),
-        language: import_zod6.z.string().optional().describe(
-          "The language of the input audio. Supplying the input language in\n[ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`) format\nwill improve accuracy and latency.\n"
-        ),
-        prompt: import_zod6.z.string().optional().describe(
-          'An optional text to guide the model\'s style or continue a previous audio\nsegment.\nFor `whisper-1`, the [prompt is a list of keywords](/docs/guides/speech-to-text#prompting).\nFor `gpt-4o-transcribe` models (excluding `gpt-4o-transcribe-diarize`), the prompt is a free text string, for example "expect words related to technology".\n'
-        )
+        language: import_zod6.z.string().optional().describe("The language of the input audio.\n"),
+        prompt: import_zod6.z.string().optional().describe("The prompt configured for input audio transcription, when present.\n")
       }).optional(),
       noise_reduction: import_zod6.z.object({
         type: import_zod6.z.enum(["near_field", "far_field"]).optional().describe(
@@ -32913,16 +33245,20 @@ var createRealtimeTranscriptionSessionBody = import_zod6.z.object({
         "gpt-4o-mini-transcribe",
         "gpt-4o-mini-transcribe-2025-12-15",
         "gpt-4o-transcribe",
-        "gpt-4o-transcribe-diarize"
+        "gpt-4o-transcribe-diarize",
+        "gpt-realtime-whisper"
       ])
     ).optional().describe(
-      "The model to use for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`, and `gpt-4o-transcribe-diarize`. Use `gpt-4o-transcribe-diarize` when you need diarization with speaker labels.\n"
+      "The model to use for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`, `gpt-4o-transcribe-diarize`, and `gpt-realtime-whisper`. Use `gpt-4o-transcribe-diarize` when you need diarization with speaker labels.\n"
     ),
     language: import_zod6.z.string().optional().describe(
       "The language of the input audio. Supplying the input language in\n[ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`) format\nwill improve accuracy and latency.\n"
     ),
     prompt: import_zod6.z.string().optional().describe(
-      'An optional text to guide the model\'s style or continue a previous audio\nsegment.\nFor `whisper-1`, the [prompt is a list of keywords](/docs/guides/speech-to-text#prompting).\nFor `gpt-4o-transcribe` models (excluding `gpt-4o-transcribe-diarize`), the prompt is a free text string, for example "expect words related to technology".\n'
+      'An optional text to guide the model\'s style or continue a previous audio\nsegment.\nFor `whisper-1`, the [prompt is a list of keywords](/docs/guides/speech-to-text#prompting).\nFor `gpt-4o-transcribe` models (excluding `gpt-4o-transcribe-diarize`), the prompt is a free text string, for example "expect words related to technology".\nPrompt is not supported with `gpt-realtime-whisper` in GA Realtime sessions.\n'
+    ),
+    delay: import_zod6.z.enum(["minimal", "low", "medium", "high", "xhigh"]).optional().describe(
+      "Controls how long the model waits before emitting transcription text.\nHigher values can improve transcription accuracy at the cost of latency.\nOnly supported with `gpt-realtime-whisper` in GA Realtime sessions.\n"
     )
   }).optional(),
   include: import_zod6.z.array(import_zod6.z.enum(["item.input_audio_transcription.logprobs"])).optional().describe(
@@ -32951,17 +33287,14 @@ var createRealtimeTranscriptionSessionResponse = import_zod6.z.object({
         "gpt-4o-mini-transcribe",
         "gpt-4o-mini-transcribe-2025-12-15",
         "gpt-4o-transcribe",
-        "gpt-4o-transcribe-diarize"
+        "gpt-4o-transcribe-diarize",
+        "gpt-realtime-whisper"
       ])
     ).optional().describe(
-      "The model to use for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`, and `gpt-4o-transcribe-diarize`. Use `gpt-4o-transcribe-diarize` when you need diarization with speaker labels.\n"
+      "The model used for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`, `gpt-4o-transcribe-diarize`, and `gpt-realtime-whisper`.\n"
     ),
-    language: import_zod6.z.string().optional().describe(
-      "The language of the input audio. Supplying the input language in\n[ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`) format\nwill improve accuracy and latency.\n"
-    ),
-    prompt: import_zod6.z.string().optional().describe(
-      'An optional text to guide the model\'s style or continue a previous audio\nsegment.\nFor `whisper-1`, the [prompt is a list of keywords](/docs/guides/speech-to-text#prompting).\nFor `gpt-4o-transcribe` models (excluding `gpt-4o-transcribe-diarize`), the prompt is a free text string, for example "expect words related to technology".\n'
-    )
+    language: import_zod6.z.string().optional().describe("The language of the input audio.\n"),
+    prompt: import_zod6.z.string().optional().describe("The prompt configured for input audio transcription, when present.\n")
   }).optional(),
   turn_detection: import_zod6.z.object({
     type: import_zod6.z.string().optional().describe("Type of turn detection, only `server_vad` is currently supported.\n"),
@@ -36346,6 +36679,7 @@ __export(sonioxPublicAPI_zod_exports, {
   createTranscriptionBodyWebhookUrlRegExpOne: () => createTranscriptionBodyWebhookUrlRegExpOne,
   deleteFileParams: () => deleteFileParams,
   deleteTranscriptionParams: () => deleteTranscriptionParams,
+  getConcurrencyLimitsResponse: () => getConcurrencyLimitsResponse,
   getFileParams: () => getFileParams,
   getFileResponse: () => getFileResponse,
   getFilesCountResponse: () => getFilesCountResponse,
@@ -36363,6 +36697,12 @@ __export(sonioxPublicAPI_zod_exports, {
   getTranscriptionsQueryLimitMax: () => getTranscriptionsQueryLimitMax,
   getTranscriptionsQueryParams: () => getTranscriptionsQueryParams,
   getTranscriptionsResponse: () => getTranscriptionsResponse,
+  getTtsModelsResponse: () => getTtsModelsResponse,
+  getUsageLogsQueryLimitDefault: () => getUsageLogsQueryLimitDefault,
+  getUsageLogsQueryLimitMax: () => getUsageLogsQueryLimitMax,
+  getUsageLogsQueryParams: () => getUsageLogsQueryParams,
+  getUsageLogsQuerySortDefault: () => getUsageLogsQuerySortDefault,
+  getUsageLogsResponse: () => getUsageLogsResponse,
   uploadFileBody: () => uploadFileBody,
   uploadFileBodyClientReferenceIdMaxOne: () => uploadFileBodyClientReferenceIdMaxOne
 });
@@ -36613,11 +36953,73 @@ var getModelsResponse = import_zod10.z.object({
     })
   ).describe("List of available models and their attributes.")
 });
+var getTtsModelsResponse = import_zod10.z.object({
+  models: import_zod10.z.array(
+    import_zod10.z.object({
+      id: import_zod10.z.string().describe("Unique identifier of the model."),
+      aliased_model_id: import_zod10.z.string().or(import_zod10.z.null()).describe("If this is an alias, the id of the aliased model."),
+      name: import_zod10.z.string().describe("Name of the model."),
+      voices: import_zod10.z.array(
+        import_zod10.z.object({
+          id: import_zod10.z.string().describe("Unique identifier of the voice."),
+          description: import_zod10.z.string().describe("Description of the TTS voice."),
+          gender: import_zod10.z.enum(["male", "female", "neutral"])
+        })
+      ).describe("List of available voices for this model."),
+      languages: import_zod10.z.array(
+        import_zod10.z.object({
+          code: import_zod10.z.string().describe("2-letter language code."),
+          name: import_zod10.z.string().describe("Language name.")
+        })
+      ).describe("List of languages supported by the model.")
+    })
+  ).describe("List of available TTS models and their attributes.")
+});
+var getUsageLogsQueryLimitDefault = 1e3;
+var getUsageLogsQueryLimitMax = 1e3;
+var getUsageLogsQuerySortDefault = "end_time_asc";
+var getUsageLogsQueryParams = import_zod10.z.object({
+  start_time: import_zod10.z.string().describe("Start of the time window (inclusive). Filters by request end time."),
+  end_time: import_zod10.z.string().describe("End of the time window (exclusive). Filters by request end time."),
+  limit: import_zod10.z.number().min(1).max(getUsageLogsQueryLimitMax).default(getUsageLogsQueryLimitDefault).describe("Maximum number of usage log entries to return."),
+  sort: import_zod10.z.enum(["end_time_asc", "end_time_desc"]).default(getUsageLogsQuerySortDefault).describe(
+    "Sort order by end_time.Use `end_time_desc` to get the most recent entries first. When paginating, pass the same `sort` value alongside the cursor."
+  ),
+  cursor: import_zod10.z.string().or(import_zod10.z.null()).optional().describe("Pagination cursor for the next page of results.")
+});
+var getUsageLogsResponse = import_zod10.z.object({
+  usage_logs: import_zod10.z.array(
+    import_zod10.z.object({
+      uuid: import_zod10.z.string().uuid().describe("Unique identifier of the request."),
+      request_scope: import_zod10.z.string().describe("Scope of the request (api / playground)."),
+      client_reference_id: import_zod10.z.string().describe("Client reference ID supplied on the original request. Empty string if none."),
+      model: import_zod10.z.string().describe("Model identifier."),
+      start_time: import_zod10.z.string().datetime({}).describe("When the request started."),
+      end_time: import_zod10.z.string().datetime({}).describe("When the request ended."),
+      input_text_tokens: import_zod10.z.number(),
+      input_audio_tokens: import_zod10.z.number(),
+      input_audio_duration_ms: import_zod10.z.number(),
+      output_text_tokens: import_zod10.z.number(),
+      output_audio_tokens: import_zod10.z.number(),
+      output_audio_duration_ms: import_zod10.z.number(),
+      cost_usd: import_zod10.z.string(),
+      input_cost_usd: import_zod10.z.string(),
+      input_text_cost_usd: import_zod10.z.string(),
+      input_audio_cost_usd: import_zod10.z.string(),
+      output_cost_usd: import_zod10.z.string(),
+      output_text_cost_usd: import_zod10.z.string(),
+      output_audio_cost_usd: import_zod10.z.string()
+    })
+  ).describe("Per-request usage log entries ordered by end_time, uuid (per `sort`)."),
+  next_page_cursor: import_zod10.z.string().or(import_zod10.z.null()).optional().describe(
+    "A pagination token that references the next page of results. When more data is available, this field contains a value to pass in the cursor parameter of a subsequent request. When null, no additional results are available."
+  )
+});
 var createTemporaryApiKeyBodyExpiresInSecondsMax = 3600;
 var createTemporaryApiKeyBodyClientReferenceIdMaxOne = 256;
 var createTemporaryApiKeyBodyMaxSessionDurationSecondsMaxOne = 18e3;
 var createTemporaryApiKeyBody = import_zod10.z.object({
-  usage_type: import_zod10.z.enum(["transcribe_websocket"]),
+  usage_type: import_zod10.z.enum(["transcribe_websocket", "tts_rt"]),
   expires_in_seconds: import_zod10.z.number().min(1).max(createTemporaryApiKeyBodyExpiresInSecondsMax).describe("Duration in seconds until the temporary API key expires."),
   client_reference_id: import_zod10.z.string().max(createTemporaryApiKeyBodyClientReferenceIdMaxOne).or(import_zod10.z.null()).optional().describe("Optional tracking identifier string. Does not need to be unique."),
   single_use: import_zod10.z.boolean().or(import_zod10.z.null()).optional().describe("If true, the temporary API key can be used only once."),
@@ -36625,6 +37027,28 @@ var createTemporaryApiKeyBody = import_zod10.z.object({
     "Maximum WebSocket connection duration in seconds. If exceeded, the connection will be dropped. If not set, no limit is applied."
   )
 });
+var getConcurrencyLimitsResponse = import_zod10.z.object({
+  project: import_zod10.z.object({
+    current: import_zod10.z.object({
+      transcribe_concurrent: import_zod10.z.number(),
+      tts_concurrent: import_zod10.z.number()
+    }).describe("Live counts read from Redis"),
+    limits: import_zod10.z.object({
+      transcribe_concurrent: import_zod10.z.number().or(import_zod10.z.null()),
+      tts_concurrent: import_zod10.z.number().or(import_zod10.z.null())
+    }).describe("Configured limits")
+  }),
+  organization: import_zod10.z.object({
+    current: import_zod10.z.object({
+      transcribe_concurrent: import_zod10.z.number(),
+      tts_concurrent: import_zod10.z.number()
+    }).describe("Live counts read from Redis"),
+    limits: import_zod10.z.object({
+      transcribe_concurrent: import_zod10.z.number().or(import_zod10.z.null()),
+      tts_concurrent: import_zod10.z.number().or(import_zod10.z.null())
+    }).describe("Configured limits")
+  })
+});
 // src/generated/soniox/streaming-types.zod.ts
 var streaming_types_zod_exports = {};
@@ -36709,10 +37133,10 @@ var sonioxStructuredContextSchema = import_zod11.z.object({
 var sonioxContextSchema = import_zod11.z.union([sonioxStructuredContextSchema, import_zod11.z.string()]);
 var sonioxRealtimeModelSchema = import_zod11.z.enum([
   "stt-rt-v4",
-  "stt-rt-v3",
   "stt-rt-preview",
   "stt-rt-v3-preview",
-  "stt-rt-preview-v2"
+  "stt-rt-preview-v2",
+  "stt-rt-v3"
 ]);
 var streamingTranscriberParams3 = import_zod11.z.object({
   model: sonioxRealtimeModelSchema,
@@ -36720,12 +37144,16 @@ var streamingTranscriberParams3 = import_zod11.z.object({
   sampleRate: import_zod11.z.number().optional(),
   numChannels: import_zod11.z.number().optional(),
   languageHints: import_zod11.z.array(import_zod11.z.string()).optional(),
+  languageHintsStrict: import_zod11.z.boolean().optional(),
   context: sonioxContextSchema.optional(),
   enableSpeakerDiarization: import_zod11.z.boolean().optional(),
   enableLanguageIdentification: import_zod11.z.boolean().optional(),
   enableEndpointDetection: import_zod11.z.boolean().optional(),
+  maxEndpointDelayMs: import_zod11.z.number().optional(),
   translation: sonioxTranslationConfigSchema.optional(),
-  clientReferenceId: import_zod11.z.string().optional()
+  clientReferenceId: import_zod11.z.string().optional(),
+  keepaliveIntervalMs: import_zod11.z.number().optional(),
+  connectTimeoutMs: import_zod11.z.number().optional()
 });
 var sonioxTranslationStatusSchema = import_zod11.z.enum(["original", "translation", "none"]);
 var sonioxTokenSchema = import_zod11.z.object({
@@ -37317,6 +37745,7 @@ __export(schema_exports5, {
   V1ListenPostParametersCallbackMethod: () => V1ListenPostParametersCallbackMethod,
   V1ListenPostParametersCustomIntentMode: () => V1ListenPostParametersCustomIntentMode,
   V1ListenPostParametersCustomTopicMode: () => V1ListenPostParametersCustomTopicMode,
+  V1ListenPostParametersDiarizeModel: () => V1ListenPostParametersDiarizeModel,
   V1ListenPostParametersEncoding: () => V1ListenPostParametersEncoding,
   V1ListenPostParametersModel0: () => V1ListenPostParametersModel0,
   V1ListenPostParametersRedactSchemaOneOf1Items: () => V1ListenPostParametersRedactSchemaOneOf1Items,
@@ -37355,6 +37784,13 @@ __export(schema_exports5, {
   V1SpeakPostParametersSampleRate4: () => V1SpeakPostParametersSampleRate4
 });
+// src/generated/deepgram/schema/v1ListenPostParametersDiarizeModel.ts
+var V1ListenPostParametersDiarizeModel = {
+  latest: "latest",
+  v1: "v1",
+  v2: "v2"
+};
 // src/generated/deepgram/schema/v1ListenPostParametersModel0.ts
 var V1ListenPostParametersModel0 = {
   "nova-3": "nova-3",
@@ -37571,6 +38007,7 @@ var V1SpeakPostParametersSampleRate = {
 var schema_exports6 = {};
 __export(schema_exports6, {
   AudioResponseFormat: () => AudioResponseFormat,
+  AudioTranscriptionDelay: () => AudioTranscriptionDelay,
   CreateSpeechRequestResponseFormat: () => CreateSpeechRequestResponseFormat,
   CreateSpeechRequestStreamFormat: () => CreateSpeechRequestStreamFormat,
   CreateTranscriptionRequestTimestampGranularitiesItem: () => CreateTranscriptionRequestTimestampGranularitiesItem,
@@ -37590,12 +38027,14 @@ __export(schema_exports6, {
   RealtimeAudioFormatsAnyOfType: () => RealtimeAudioFormatsAnyOfType,
   RealtimeCreateClientSecretRequestExpiresAfterAnchor: () => RealtimeCreateClientSecretRequestExpiresAfterAnchor,
   RealtimeFunctionToolType: () => RealtimeFunctionToolType,
+  RealtimeReasoningEffort: () => RealtimeReasoningEffort,
   RealtimeSessionCreateRequestGAIncludeItem: () => RealtimeSessionCreateRequestGAIncludeItem,
   RealtimeSessionCreateRequestGAOutputModalitiesItem: () => RealtimeSessionCreateRequestGAOutputModalitiesItem,
   RealtimeSessionCreateRequestGAType: () => RealtimeSessionCreateRequestGAType,
   RealtimeSessionCreateRequestModalitiesItem: () => RealtimeSessionCreateRequestModalitiesItem,
   RealtimeSessionCreateRequestToolsItemType: () => RealtimeSessionCreateRequestToolsItemType,
   RealtimeSessionCreateResponseGAIncludeItem: () => RealtimeSessionCreateResponseGAIncludeItem,
+  RealtimeSessionCreateResponseGAObject: () => RealtimeSessionCreateResponseGAObject,
   RealtimeSessionCreateResponseGAOutputModalitiesItem: () => RealtimeSessionCreateResponseGAOutputModalitiesItem,
   RealtimeSessionCreateResponseGAType: () => RealtimeSessionCreateResponseGAType,
   RealtimeSessionCreateResponseIncludeItem: () => RealtimeSessionCreateResponseIncludeItem,
@@ -37626,6 +38065,15 @@ __export(schema_exports6, {
   VoiceResourceObject: () => VoiceResourceObject
 });
+// src/generated/openai/schema/audioTranscriptionDelay.ts
+var AudioTranscriptionDelay = {
+  minimal: "minimal",
+  low: "low",
+  medium: "medium",
+  high: "high",
+  xhigh: "xhigh"
+};
 // src/generated/openai/schema/createSpeechRequestResponseFormat.ts
 var CreateSpeechRequestResponseFormat = {
   mp3: "mp3",
@@ -37738,6 +38186,15 @@ var RealtimeFunctionToolType = {
   function: "function"
 };
+// src/generated/openai/schema/realtimeReasoningEffort.ts
+var RealtimeReasoningEffort = {
+  minimal: "minimal",
+  low: "low",
+  medium: "medium",
+  high: "high",
+  xhigh: "xhigh"
+};
 // src/generated/openai/schema/realtimeSessionCreateRequestGAIncludeItem.ts
 var RealtimeSessionCreateRequestGAIncludeItem = {
   iteminput_audio_transcriptionlogprobs: "item.input_audio_transcription.logprobs"
@@ -37770,6 +38227,11 @@ var RealtimeSessionCreateResponseGAIncludeItem = {
   iteminput_audio_transcriptionlogprobs: "item.input_audio_transcription.logprobs"
 };
+// src/generated/openai/schema/realtimeSessionCreateResponseGAObject.ts
+var RealtimeSessionCreateResponseGAObject = {
+  realtimesession: "realtime.session"
+};
 // src/generated/openai/schema/realtimeSessionCreateResponseGAOutputModalitiesItem.ts
 var RealtimeSessionCreateResponseGAOutputModalitiesItem = {
   text: "text",
@@ -37914,6 +38376,7 @@ __export(schema_exports7, {
   AutoChaptersResultErrorType: () => AutoChaptersResultErrorType,
   ErrorResponseError: () => ErrorResponseError,
   GetJobsJobidAlignmentTags: () => GetJobsJobidAlignmentTags,
+  GetJobsJobidObjectUrlsUrlForItem: () => GetJobsJobidObjectUrlsUrlForItem,
   GetJobsJobidTranscriptFormat: () => GetJobsJobidTranscriptFormat,
   JobDetailsStatus: () => JobDetailsStatus,
   JobMode: () => JobMode,
@@ -37983,6 +38446,13 @@ var GetJobsJobidAlignmentTags = {
   one_per_line: "one_per_line"
 };
+// src/generated/speechmatics/schema/getJobsJobidObjectUrlsUrlForItem.ts
+var GetJobsJobidObjectUrlsUrlForItem = {
+  data: "data",
+  audio_mp3: "audio_mp3",
+  transcript: "transcript"
+};
 // src/generated/speechmatics/schema/getJobsJobidTranscriptFormat.ts
 var GetJobsJobidTranscriptFormat = {
   "json-v2": "json-v2",
@@ -38099,6 +38569,15 @@ var WrittenFormRecognitionResultType = {
   word: "word"
 };
+// src/generated/soniox/sdk-types.ts
+var sdk_types_exports = {};
+__export(sdk_types_exports, {
+  RealtimeSttSession: () => import_node.RealtimeSttSession,
+  SonioxFetchHttpClient: () => import_node.FetchHttpClient,
+  SonioxNodeClient: () => import_node.SonioxNodeClient
+});
+var import_node = require("@soniox/node");
 // src/generated/elevenlabs/schema/index.ts
 var schema_exports8 = {};
 __export(schema_exports8, {
@@ -38176,6 +38655,10 @@ __export(speechmaticsASRRESTAPI_zod_exports, {
   deleteJobsJobidParams: () => deleteJobsJobidParams,
   deleteJobsJobidQueryParams: () => deleteJobsJobidQueryParams,
   deleteJobsJobidResponse: () => deleteJobsJobidResponse,
+  deleteJobsJobidResponseJobConfigSummarizationConfigContentTypeDefault: () => deleteJobsJobidResponseJobConfigSummarizationConfigContentTypeDefault,
+  deleteJobsJobidResponseJobConfigSummarizationConfigSummaryLengthDefault: () => deleteJobsJobidResponseJobConfigSummarizationConfigSummaryLengthDefault,
+  deleteJobsJobidResponseJobConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMax: () => deleteJobsJobidResponseJobConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMax,
+  deleteJobsJobidResponseJobConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMin: () => deleteJobsJobidResponseJobConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMin,
   deleteJobsJobidResponseJobConfigTranscriptionConfigChannelDiarizationLabelsItemRegExp: () => deleteJobsJobidResponseJobConfigTranscriptionConfigChannelDiarizationLabelsItemRegExp,
   deleteJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesPermittedMarksItemRegExp: () => deleteJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesPermittedMarksItemRegExp,
   deleteJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesSensitivityMax: () => deleteJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesSensitivityMax,
@@ -38191,8 +38674,15 @@ __export(speechmaticsASRRESTAPI_zod_exports, {
   getJobsJobidDataResponse: () => getJobsJobidDataResponse,
   getJobsJobidLogParams: () => getJobsJobidLogParams,
   getJobsJobidLogResponse: () => getJobsJobidLogResponse,
+  getJobsJobidObjectUrlsParams: () => getJobsJobidObjectUrlsParams,
+  getJobsJobidObjectUrlsQueryParams: () => getJobsJobidObjectUrlsQueryParams,
+  getJobsJobidObjectUrlsResponse: () => getJobsJobidObjectUrlsResponse,
   getJobsJobidParams: () => getJobsJobidParams,
   getJobsJobidResponse: () => getJobsJobidResponse,
+  getJobsJobidResponseJobConfigSummarizationConfigContentTypeDefault: () => getJobsJobidResponseJobConfigSummarizationConfigContentTypeDefault,
+  getJobsJobidResponseJobConfigSummarizationConfigSummaryLengthDefault: () => getJobsJobidResponseJobConfigSummarizationConfigSummaryLengthDefault,
+  getJobsJobidResponseJobConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMax: () => getJobsJobidResponseJobConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMax,
+  getJobsJobidResponseJobConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMin: () => getJobsJobidResponseJobConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMin,
   getJobsJobidResponseJobConfigTranscriptionConfigChannelDiarizationLabelsItemRegExp: () => getJobsJobidResponseJobConfigTranscriptionConfigChannelDiarizationLabelsItemRegExp,
   getJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesPermittedMarksItemRegExp: () => getJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesPermittedMarksItemRegExp,
   getJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesSensitivityMax: () => getJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesSensitivityMax,
@@ -38207,6 +38697,8 @@ __export(speechmaticsASRRESTAPI_zod_exports, {
   getJobsJobidTranscriptQueryParams: () => getJobsJobidTranscriptQueryParams,
   getJobsJobidTranscriptResponse: () => getJobsJobidTranscriptResponse,
   getJobsJobidTranscriptResponseJobDurationMin: () => getJobsJobidTranscriptResponseJobDurationMin,
+  getJobsJobidTranscriptResponseMetadataTranscriptionConfigAudioFilteringConfigVolumeThresholdMax: () => getJobsJobidTranscriptResponseMetadataTranscriptionConfigAudioFilteringConfigVolumeThresholdMax,
+  getJobsJobidTranscriptResponseMetadataTranscriptionConfigAudioFilteringConfigVolumeThresholdMin: () => getJobsJobidTranscriptResponseMetadataTranscriptionConfigAudioFilteringConfigVolumeThresholdMin,
   getJobsJobidTranscriptResponseMetadataTranscriptionConfigChannelDiarizationLabelsItemRegExp: () => getJobsJobidTranscriptResponseMetadataTranscriptionConfigChannelDiarizationLabelsItemRegExp,
   getJobsJobidTranscriptResponseMetadataTranscriptionConfigPunctuationOverridesPermittedMarksItemRegExp: () => getJobsJobidTranscriptResponseMetadataTranscriptionConfigPunctuationOverridesPermittedMarksItemRegExp,
   getJobsJobidTranscriptResponseMetadataTranscriptionConfigPunctuationOverridesSensitivityMax: () => getJobsJobidTranscriptResponseMetadataTranscriptionConfigPunctuationOverridesSensitivityMax,
@@ -38218,6 +38710,10 @@ __export(speechmaticsASRRESTAPI_zod_exports, {
   getJobsQueryLimitMax: () => getJobsQueryLimitMax,
   getJobsQueryParams: () => getJobsQueryParams,
   getJobsResponse: () => getJobsResponse,
+  getJobsResponseJobsItemConfigSummarizationConfigContentTypeDefault: () => getJobsResponseJobsItemConfigSummarizationConfigContentTypeDefault,
+  getJobsResponseJobsItemConfigSummarizationConfigSummaryLengthDefault: () => getJobsResponseJobsItemConfigSummarizationConfigSummaryLengthDefault,
+  getJobsResponseJobsItemConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMax: () => getJobsResponseJobsItemConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMax,
+  getJobsResponseJobsItemConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMin: () => getJobsResponseJobsItemConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMin,
   getJobsResponseJobsItemConfigTranscriptionConfigChannelDiarizationLabelsItemRegExp: () => getJobsResponseJobsItemConfigTranscriptionConfigChannelDiarizationLabelsItemRegExp,
   getJobsResponseJobsItemConfigTranscriptionConfigPunctuationOverridesPermittedMarksItemRegExp: () => getJobsResponseJobsItemConfigTranscriptionConfigPunctuationOverridesPermittedMarksItemRegExp,
   getJobsResponseJobsItemConfigTranscriptionConfigPunctuationOverridesSensitivityMax: () => getJobsResponseJobsItemConfigTranscriptionConfigPunctuationOverridesSensitivityMax,
@@ -38228,12 +38724,18 @@ __export(speechmaticsASRRESTAPI_zod_exports, {
   getJobsResponseJobsItemDurationMin: () => getJobsResponseJobsItemDurationMin,
   getUsageQueryParams: () => getUsageQueryParams,
   getUsageResponse: () => getUsageResponse,
-  postJobsBody: () => postJobsBody
+  postJobsBody: () => postJobsBody,
+  postJobsHeader: () => postJobsHeader
 });
 var import_zod12 = require("zod");
+var postJobsHeader = import_zod12.z.object({
+  "X-SM-Processing-Data": import_zod12.z.string().optional().describe(
+    '**Note**: Only available for on-prem\nJSON dictionary of processing settings for the job worker. Currently supports `parallel_engines` (integer), which controls the number of engines the worker can use in parallel for this job, and `user_id` (string), which is the user id for this job. Example: `{"parallel_engines": 4}`'
+  )
+});
 var postJobsBody = import_zod12.z.object({
   config: import_zod12.z.string().describe(
-    "JSON containing a `JobConfig` model indicating the type and parameters for the recognition job."
+    "JSON containing a [`JobConfig`](/speech-to-text/batch/input#jobconfig-schema) model indicating the type and parameters for the recognition job."
   ),
   data_file: import_zod12.z.instanceof(File).optional().describe(
     "The data file to be processed. Alternatively the data file can be fetched from a url specified in `JobConfig`."
@@ -38255,9 +38757,13 @@ var getJobsResponseJobsItemConfigTranscriptionConfigPunctuationOverridesSensitiv
 var getJobsResponseJobsItemConfigTranscriptionConfigPunctuationOverridesSensitivityMax = 1;
 var getJobsResponseJobsItemConfigTranscriptionConfigPunctuationOverridesPermittedMarksItemRegExp = /^(.|all)$/;
 var getJobsResponseJobsItemConfigTranscriptionConfigChannelDiarizationLabelsItemRegExp = /^[A-Za-z0-9._]+$/;
+var getJobsResponseJobsItemConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMin = 0;
+var getJobsResponseJobsItemConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMax = 100;
 var getJobsResponseJobsItemConfigTranscriptionConfigSpeakerDiarizationConfigSpeakerSensitivityMin = 0;
 var getJobsResponseJobsItemConfigTranscriptionConfigSpeakerDiarizationConfigSpeakerSensitivityMax = 1;
 var getJobsResponseJobsItemConfigTranslationConfigTargetLanguagesMax = 5;
+var getJobsResponseJobsItemConfigSummarizationConfigContentTypeDefault = "auto";
+var getJobsResponseJobsItemConfigSummarizationConfigSummaryLengthDefault = "brief";
 var getJobsResponse = import_zod12.z.object({
   jobs: import_zod12.z.array(
     import_zod12.z.object({
@@ -38337,19 +38843,30 @@ var getJobsResponse = import_zod12.z.object({
           max_delay_mode: import_zod12.z.enum(["fixed", "flexible"]).optional().describe(
             "Whether or not to enable flexible endpointing and allow the entity to continue to be spoken."
           ),
+          audio_filtering_config: import_zod12.z.object({
+            volume_threshold: import_zod12.z.number().min(
+              getJobsResponseJobsItemConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMin
+            ).max(
+              getJobsResponseJobsItemConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMax
+            ).optional().describe(
+              "Controls the lower limit of audio volume at which speech and audio events will be transcribed. If the volume limit is very low, then most sound will be passed to the speech recognition engine. Higher numbers will cut out increasing amounts of sound."
+            )
+          }).optional().describe("Configuration for limiting the transcription of quiet audio."),
           transcript_filtering_config: import_zod12.z.object({
             remove_disfluencies: import_zod12.z.boolean().optional().describe(
-              "If true, words that are identified as disfluencies will be removed from the transcript. If false (default), they are tagged in the transcript as 'disfluency'."
+              "If true, words identified as disfluencies (e.g., 'um', 'uh') will be removed from the transcript. If false (default), they are tagged in the transcript as 'disfluency'."
             ),
             replacements: import_zod12.z.array(
               import_zod12.z.object({
-                from: import_zod12.z.string(),
-                to: import_zod12.z.string()
+                from: import_zod12.z.string().describe("The text or pattern identified to be replaced."),
+                to: import_zod12.z.string().describe(
+                  "The corrected or formatted string to appear in the transcript."
+                )
               })
             ).optional().describe(
-              "A list of replacements to apply to the transcript. Each replacement is a pair of strings, where the first string is the pattern to be replaced and the second string is the replacement text."
+              'An array of objects defining custom replacements. Each replacement contains a pair of strings: the text to find ("from:") and the text to replace it with ("to:").'
             )
-          }).optional().describe("Configuration for applying filtering to the transcription"),
+          }).optional().describe("Configuration for applying filtering to the transcription."),
           speaker_diarization_config: import_zod12.z.object({
             prefer_current_speaker: import_zod12.z.boolean().optional().describe(
               'If true, the algorithm will prefer to stay with the current active speaker if it is a close enough match, even if other speakers may be closer.  This is useful for cases where we can flip incorrectly between similar speakers during a single speaker section."'
@@ -38360,6 +38877,19 @@ var getJobsResponse = import_zod12.z.object({
               getJobsResponseJobsItemConfigTranscriptionConfigSpeakerDiarizationConfigSpeakerSensitivityMax
             ).optional().describe(
               "Controls how sensitive the algorithm is in terms of keeping similar speakers separate, as opposed to combining them into a single speaker.  Higher values will typically lead to more speakers, as the degree of difference between speakers in order to allow them to remain distinct will be lower.  A lower value for this parameter will conversely guide the algorithm towards being less sensitive in terms of retaining similar speakers, and as such may lead to fewer speakers overall.  The default is 0.5."
+            ),
+            get_speakers: import_zod12.z.boolean().optional().describe(
+              "If true, speaker identifiers will be returned at the end of transcript."
+            ),
+            speakers: import_zod12.z.array(
+              import_zod12.z.object({
+                label: import_zod12.z.string().min(1).describe(
+                  "Speaker label, which must not match the format used internally (e.g. S1, S2, etc)"
+                ),
+                speaker_identifiers: import_zod12.z.array(import_zod12.z.string().describe("Speaker identifiers.")).min(1)
+              })
+            ).optional().describe(
+              "Use this option to provide speaker labels linked to their speaker identifiers. When passed, the transcription system will tag spoken words in the transcript with the provided speaker labels whenever any of the specified speakers is detected in the audio. A maximum of 50 speakers identifiers across all speakers can be provided."
             )
           }).optional().describe("Configuration for speaker diarization")
         }).optional(),
@@ -38417,10 +38947,14 @@ var getJobsResponse = import_zod12.z.object({
           default_language: import_zod12.z.string().optional()
         }).optional(),
         summarization_config: import_zod12.z.object({
-          content_type: import_zod12.z.enum(["auto", "informative", "conversational"]).optional(),
-          summary_length: import_zod12.z.enum(["brief", "detailed"]).optional(),
+          content_type: import_zod12.z.enum(["auto", "informative", "conversational"]).default(getJobsResponseJobsItemConfigSummarizationConfigContentTypeDefault).describe(
+            "Choose from three options:\n- `conversational` - Best suited for dialogues involving multiple participants, such as calls, meetings or discussions. It focuses on summarizing key points of the conversation.\n- `informative` - Recommended for more structured information delivered by one or more people, making it ideal for videos, podcasts, lectures, and presentations.\n- `auto` - Automatically selects the most appropriate content type based on an analysis of the transcript.\n"
+          ),
+          summary_length: import_zod12.z.enum(["brief", "detailed"]).default(getJobsResponseJobsItemConfigSummarizationConfigSummaryLengthDefault).describe(
+            "Determines the depth of the summary:\n- `brief` - Provides a succinct summary, condensing the content into just a few sentences.\n- `detailed` - Provide a longer, structured summary. For _conversational_ content, it includes key topics and a summary of the entire conversation. For _informative_ content, it logically divides the audio into sections and provides a summary for each."
+          ),
           summary_type: import_zod12.z.enum(["paragraphs", "bullets"]).optional()
-        }).optional(),
+        }).optional().describe("Configuration options for summarization."),
         sentiment_analysis_config: import_zod12.z.object({}).optional(),
         topic_detection_config: import_zod12.z.object({
           topics: import_zod12.z.array(import_zod12.z.string()).optional()
@@ -38442,7 +38976,7 @@ var getJobsResponse = import_zod12.z.object({
         "Optional list of errors that have occurred in user interaction, for example: audio could not be fetched or notification could not be sent."
       )
     }).describe(
-      "Document describing a job. JobConfig will be present in JobDetails returned for GET jobs/<id> request in SaaS and in Batch Appliance, but it will not be present in JobDetails returned as item in RetrieveJobsResponse in case of Batch Appliance."
+      "Document describing a job. JobConfig will be present in JobDetails returned for GET jobs/{id} request in SaaS and in Batch Appliance, but it will not be present in JobDetails returned as item in RetrieveJobsResponse in case of Batch Appliance."
     )
   )
 });
@@ -38454,9 +38988,13 @@ var getJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesSensitiv
 var getJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesSensitivityMax = 1;
 var getJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesPermittedMarksItemRegExp = /^(.|all)$/;
 var getJobsJobidResponseJobConfigTranscriptionConfigChannelDiarizationLabelsItemRegExp = /^[A-Za-z0-9._]+$/;
+var getJobsJobidResponseJobConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMin = 0;
+var getJobsJobidResponseJobConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMax = 100;
 var getJobsJobidResponseJobConfigTranscriptionConfigSpeakerDiarizationConfigSpeakerSensitivityMin = 0;
 var getJobsJobidResponseJobConfigTranscriptionConfigSpeakerDiarizationConfigSpeakerSensitivityMax = 1;
 var getJobsJobidResponseJobConfigTranslationConfigTargetLanguagesMax = 5;
+var getJobsJobidResponseJobConfigSummarizationConfigContentTypeDefault = "auto";
+var getJobsJobidResponseJobConfigSummarizationConfigSummaryLengthDefault = "brief";
 var getJobsJobidResponse = import_zod12.z.object({
   job: import_zod12.z.object({
     created_at: import_zod12.z.string().datetime({}).describe("The UTC date time the job was created."),
@@ -38533,19 +39071,30 @@ var getJobsJobidResponse = import_zod12.z.object({
         max_delay_mode: import_zod12.z.enum(["fixed", "flexible"]).optional().describe(
           "Whether or not to enable flexible endpointing and allow the entity to continue to be spoken."
         ),
+        audio_filtering_config: import_zod12.z.object({
+          volume_threshold: import_zod12.z.number().min(
+            getJobsJobidResponseJobConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMin
+          ).max(
+            getJobsJobidResponseJobConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMax
+          ).optional().describe(
+            "Controls the lower limit of audio volume at which speech and audio events will be transcribed. If the volume limit is very low, then most sound will be passed to the speech recognition engine. Higher numbers will cut out increasing amounts of sound."
+          )
+        }).optional().describe("Configuration for limiting the transcription of quiet audio."),
         transcript_filtering_config: import_zod12.z.object({
           remove_disfluencies: import_zod12.z.boolean().optional().describe(
-            "If true, words that are identified as disfluencies will be removed from the transcript. If false (default), they are tagged in the transcript as 'disfluency'."
+            "If true, words identified as disfluencies (e.g., 'um', 'uh') will be removed from the transcript. If false (default), they are tagged in the transcript as 'disfluency'."
           ),
           replacements: import_zod12.z.array(
             import_zod12.z.object({
-              from: import_zod12.z.string(),
-              to: import_zod12.z.string()
+              from: import_zod12.z.string().describe("The text or pattern identified to be replaced."),
+              to: import_zod12.z.string().describe(
+                "The corrected or formatted string to appear in the transcript."
+              )
             })
           ).optional().describe(
-            "A list of replacements to apply to the transcript. Each replacement is a pair of strings, where the first string is the pattern to be replaced and the second string is the replacement text."
+            'An array of objects defining custom replacements. Each replacement contains a pair of strings: the text to find ("from:") and the text to replace it with ("to:").'
           )
-        }).optional().describe("Configuration for applying filtering to the transcription"),
+        }).optional().describe("Configuration for applying filtering to the transcription."),
         speaker_diarization_config: import_zod12.z.object({
           prefer_current_speaker: import_zod12.z.boolean().optional().describe(
             'If true, the algorithm will prefer to stay with the current active speaker if it is a close enough match, even if other speakers may be closer.  This is useful for cases where we can flip incorrectly between similar speakers during a single speaker section."'
@@ -38556,6 +39105,19 @@ var getJobsJobidResponse = import_zod12.z.object({
             getJobsJobidResponseJobConfigTranscriptionConfigSpeakerDiarizationConfigSpeakerSensitivityMax
           ).optional().describe(
             "Controls how sensitive the algorithm is in terms of keeping similar speakers separate, as opposed to combining them into a single speaker.  Higher values will typically lead to more speakers, as the degree of difference between speakers in order to allow them to remain distinct will be lower.  A lower value for this parameter will conversely guide the algorithm towards being less sensitive in terms of retaining similar speakers, and as such may lead to fewer speakers overall.  The default is 0.5."
+          ),
+          get_speakers: import_zod12.z.boolean().optional().describe(
+            "If true, speaker identifiers will be returned at the end of transcript."
+          ),
+          speakers: import_zod12.z.array(
+            import_zod12.z.object({
+              label: import_zod12.z.string().min(1).describe(
+                "Speaker label, which must not match the format used internally (e.g. S1, S2, etc)"
+              ),
+              speaker_identifiers: import_zod12.z.array(import_zod12.z.string().describe("Speaker identifiers.")).min(1)
+            })
+          ).optional().describe(
+            "Use this option to provide speaker labels linked to their speaker identifiers. When passed, the transcription system will tag spoken words in the transcript with the provided speaker labels whenever any of the specified speakers is detected in the audio. A maximum of 50 speakers identifiers across all speakers can be provided."
           )
         }).optional().describe("Configuration for speaker diarization")
       }).optional(),
@@ -38611,10 +39173,14 @@ var getJobsJobidResponse = import_zod12.z.object({
         default_language: import_zod12.z.string().optional()
       }).optional(),
       summarization_config: import_zod12.z.object({
-        content_type: import_zod12.z.enum(["auto", "informative", "conversational"]).optional(),
-        summary_length: import_zod12.z.enum(["brief", "detailed"]).optional(),
+        content_type: import_zod12.z.enum(["auto", "informative", "conversational"]).default(getJobsJobidResponseJobConfigSummarizationConfigContentTypeDefault).describe(
+          "Choose from three options:\n- `conversational` - Best suited for dialogues involving multiple participants, such as calls, meetings or discussions. It focuses on summarizing key points of the conversation.\n- `informative` - Recommended for more structured information delivered by one or more people, making it ideal for videos, podcasts, lectures, and presentations.\n- `auto` - Automatically selects the most appropriate content type based on an analysis of the transcript.\n"
+        ),
+        summary_length: import_zod12.z.enum(["brief", "detailed"]).default(getJobsJobidResponseJobConfigSummarizationConfigSummaryLengthDefault).describe(
+          "Determines the depth of the summary:\n- `brief` - Provides a succinct summary, condensing the content into just a few sentences.\n- `detailed` - Provide a longer, structured summary. For _conversational_ content, it includes key topics and a summary of the entire conversation. For _informative_ content, it logically divides the audio into sections and provides a summary for each."
+        ),
         summary_type: import_zod12.z.enum(["paragraphs", "bullets"]).optional()
-      }).optional(),
+      }).optional().describe("Configuration options for summarization."),
       sentiment_analysis_config: import_zod12.z.object({}).optional(),
       topic_detection_config: import_zod12.z.object({
         topics: import_zod12.z.array(import_zod12.z.string()).optional()
@@ -38636,7 +39202,7 @@ var getJobsJobidResponse = import_zod12.z.object({
       "Optional list of errors that have occurred in user interaction, for example: audio could not be fetched or notification could not be sent."
     )
   }).describe(
-    "Document describing a job. JobConfig will be present in JobDetails returned for GET jobs/<id> request in SaaS and in Batch Appliance, but it will not be present in JobDetails returned as item in RetrieveJobsResponse in case of Batch Appliance."
+    "Document describing a job. JobConfig will be present in JobDetails returned for GET jobs/{id} request in SaaS and in Batch Appliance, but it will not be present in JobDetails returned as item in RetrieveJobsResponse in case of Batch Appliance."
   )
 });
 var deleteJobsJobidParams = import_zod12.z.object({
@@ -38652,9 +39218,13 @@ var deleteJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesSensi
 var deleteJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesSensitivityMax = 1;
 var deleteJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesPermittedMarksItemRegExp = /^(.|all)$/;
 var deleteJobsJobidResponseJobConfigTranscriptionConfigChannelDiarizationLabelsItemRegExp = /^[A-Za-z0-9._]+$/;
+var deleteJobsJobidResponseJobConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMin = 0;
+var deleteJobsJobidResponseJobConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMax = 100;
 var deleteJobsJobidResponseJobConfigTranscriptionConfigSpeakerDiarizationConfigSpeakerSensitivityMin = 0;
 var deleteJobsJobidResponseJobConfigTranscriptionConfigSpeakerDiarizationConfigSpeakerSensitivityMax = 1;
 var deleteJobsJobidResponseJobConfigTranslationConfigTargetLanguagesMax = 5;
+var deleteJobsJobidResponseJobConfigSummarizationConfigContentTypeDefault = "auto";
+var deleteJobsJobidResponseJobConfigSummarizationConfigSummaryLengthDefault = "brief";
 var deleteJobsJobidResponse = import_zod12.z.object({
   job: import_zod12.z.object({
     created_at: import_zod12.z.string().datetime({}).describe("The UTC date time the job was created."),
@@ -38731,19 +39301,30 @@ var deleteJobsJobidResponse = import_zod12.z.object({
         max_delay_mode: import_zod12.z.enum(["fixed", "flexible"]).optional().describe(
           "Whether or not to enable flexible endpointing and allow the entity to continue to be spoken."
         ),
+        audio_filtering_config: import_zod12.z.object({
+          volume_threshold: import_zod12.z.number().min(
+            deleteJobsJobidResponseJobConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMin
+          ).max(
+            deleteJobsJobidResponseJobConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMax
+          ).optional().describe(
+            "Controls the lower limit of audio volume at which speech and audio events will be transcribed. If the volume limit is very low, then most sound will be passed to the speech recognition engine. Higher numbers will cut out increasing amounts of sound."
+          )
+        }).optional().describe("Configuration for limiting the transcription of quiet audio."),
         transcript_filtering_config: import_zod12.z.object({
           remove_disfluencies: import_zod12.z.boolean().optional().describe(
-            "If true, words that are identified as disfluencies will be removed from the transcript. If false (default), they are tagged in the transcript as 'disfluency'."
+            "If true, words identified as disfluencies (e.g., 'um', 'uh') will be removed from the transcript. If false (default), they are tagged in the transcript as 'disfluency'."
           ),
           replacements: import_zod12.z.array(
             import_zod12.z.object({
-              from: import_zod12.z.string(),
-              to: import_zod12.z.string()
+              from: import_zod12.z.string().describe("The text or pattern identified to be replaced."),
+              to: import_zod12.z.string().describe(
+                "The corrected or formatted string to appear in the transcript."
+              )
             })
           ).optional().describe(
-            "A list of replacements to apply to the transcript. Each replacement is a pair of strings, where the first string is the pattern to be replaced and the second string is the replacement text."
+            'An array of objects defining custom replacements. Each replacement contains a pair of strings: the text to find ("from:") and the text to replace it with ("to:").'
           )
-        }).optional().describe("Configuration for applying filtering to the transcription"),
+        }).optional().describe("Configuration for applying filtering to the transcription."),
         speaker_diarization_config: import_zod12.z.object({
           prefer_current_speaker: import_zod12.z.boolean().optional().describe(
             'If true, the algorithm will prefer to stay with the current active speaker if it is a close enough match, even if other speakers may be closer.  This is useful for cases where we can flip incorrectly between similar speakers during a single speaker section."'
@@ -38754,6 +39335,19 @@ var deleteJobsJobidResponse = import_zod12.z.object({
             deleteJobsJobidResponseJobConfigTranscriptionConfigSpeakerDiarizationConfigSpeakerSensitivityMax
           ).optional().describe(
             "Controls how sensitive the algorithm is in terms of keeping similar speakers separate, as opposed to combining them into a single speaker.  Higher values will typically lead to more speakers, as the degree of difference between speakers in order to allow them to remain distinct will be lower.  A lower value for this parameter will conversely guide the algorithm towards being less sensitive in terms of retaining similar speakers, and as such may lead to fewer speakers overall.  The default is 0.5."
+          ),
+          get_speakers: import_zod12.z.boolean().optional().describe(
+            "If true, speaker identifiers will be returned at the end of transcript."
+          ),
+          speakers: import_zod12.z.array(
+            import_zod12.z.object({
+              label: import_zod12.z.string().min(1).describe(
+                "Speaker label, which must not match the format used internally (e.g. S1, S2, etc)"
+              ),
+              speaker_identifiers: import_zod12.z.array(import_zod12.z.string().describe("Speaker identifiers.")).min(1)
+            })
+          ).optional().describe(
+            "Use this option to provide speaker labels linked to their speaker identifiers. When passed, the transcription system will tag spoken words in the transcript with the provided speaker labels whenever any of the specified speakers is detected in the audio. A maximum of 50 speakers identifiers across all speakers can be provided."
           )
         }).optional().describe("Configuration for speaker diarization")
       }).optional(),
@@ -38809,10 +39403,14 @@ var deleteJobsJobidResponse = import_zod12.z.object({
         default_language: import_zod12.z.string().optional()
       }).optional(),
       summarization_config: import_zod12.z.object({
-        content_type: import_zod12.z.enum(["auto", "informative", "conversational"]).optional(),
-        summary_length: import_zod12.z.enum(["brief", "detailed"]).optional(),
+        content_type: import_zod12.z.enum(["auto", "informative", "conversational"]).default(deleteJobsJobidResponseJobConfigSummarizationConfigContentTypeDefault).describe(
+          "Choose from three options:\n- `conversational` - Best suited for dialogues involving multiple participants, such as calls, meetings or discussions. It focuses on summarizing key points of the conversation.\n- `informative` - Recommended for more structured information delivered by one or more people, making it ideal for videos, podcasts, lectures, and presentations.\n- `auto` - Automatically selects the most appropriate content type based on an analysis of the transcript.\n"
+        ),
+        summary_length: import_zod12.z.enum(["brief", "detailed"]).default(deleteJobsJobidResponseJobConfigSummarizationConfigSummaryLengthDefault).describe(
+          "Determines the depth of the summary:\n- `brief` - Provides a succinct summary, condensing the content into just a few sentences.\n- `detailed` - Provide a longer, structured summary. For _conversational_ content, it includes key topics and a summary of the entire conversation. For _informative_ content, it logically divides the audio into sections and provides a summary for each."
+        ),
         summary_type: import_zod12.z.enum(["paragraphs", "bullets"]).optional()
-      }).optional(),
+      }).optional().describe("Configuration options for summarization."),
       sentiment_analysis_config: import_zod12.z.object({}).optional(),
       topic_detection_config: import_zod12.z.object({
         topics: import_zod12.z.array(import_zod12.z.string()).optional()
@@ -38834,7 +39432,7 @@ var deleteJobsJobidResponse = import_zod12.z.object({
       "Optional list of errors that have occurred in user interaction, for example: audio could not be fetched or notification could not be sent."
     )
   }).describe(
-    "Document describing a job. JobConfig will be present in JobDetails returned for GET jobs/<id> request in SaaS and in Batch Appliance, but it will not be present in JobDetails returned as item in RetrieveJobsResponse in case of Batch Appliance."
+    "Document describing a job. JobConfig will be present in JobDetails returned for GET jobs/{id} request in SaaS and in Batch Appliance, but it will not be present in JobDetails returned as item in RetrieveJobsResponse in case of Batch Appliance."
   )
 });
 var getJobsJobidDataParams = import_zod12.z.object({
@@ -38856,6 +39454,8 @@ var getJobsJobidTranscriptResponseMetadataTranscriptionConfigPunctuationOverride
 var getJobsJobidTranscriptResponseMetadataTranscriptionConfigPunctuationOverridesSensitivityMax = 1;
 var getJobsJobidTranscriptResponseMetadataTranscriptionConfigPunctuationOverridesPermittedMarksItemRegExp = /^(.|all)$/;
 var getJobsJobidTranscriptResponseMetadataTranscriptionConfigChannelDiarizationLabelsItemRegExp = /^[A-Za-z0-9._]+$/;
+var getJobsJobidTranscriptResponseMetadataTranscriptionConfigAudioFilteringConfigVolumeThresholdMin = 0;
+var getJobsJobidTranscriptResponseMetadataTranscriptionConfigAudioFilteringConfigVolumeThresholdMax = 100;
 var getJobsJobidTranscriptResponseMetadataTranscriptionConfigSpeakerDiarizationConfigSpeakerSensitivityMin = 0;
 var getJobsJobidTranscriptResponseMetadataTranscriptionConfigSpeakerDiarizationConfigSpeakerSensitivityMax = 1;
 var getJobsJobidTranscriptResponseResultsItemVolumeMin = 0;
@@ -38927,19 +39527,28 @@ var getJobsJobidTranscriptResponse = import_zod12.z.object({
       max_delay_mode: import_zod12.z.enum(["fixed", "flexible"]).optional().describe(
         "Whether or not to enable flexible endpointing and allow the entity to continue to be spoken."
       ),
+      audio_filtering_config: import_zod12.z.object({
+        volume_threshold: import_zod12.z.number().min(
+          getJobsJobidTranscriptResponseMetadataTranscriptionConfigAudioFilteringConfigVolumeThresholdMin
+        ).max(
+          getJobsJobidTranscriptResponseMetadataTranscriptionConfigAudioFilteringConfigVolumeThresholdMax
+        ).optional().describe(
+          "Controls the lower limit of audio volume at which speech and audio events will be transcribed. If the volume limit is very low, then most sound will be passed to the speech recognition engine. Higher numbers will cut out increasing amounts of sound."
+        )
+      }).optional().describe("Configuration for limiting the transcription of quiet audio."),
       transcript_filtering_config: import_zod12.z.object({
         remove_disfluencies: import_zod12.z.boolean().optional().describe(
-          "If true, words that are identified as disfluencies will be removed from the transcript. If false (default), they are tagged in the transcript as 'disfluency'."
+          "If true, words identified as disfluencies (e.g., 'um', 'uh') will be removed from the transcript. If false (default), they are tagged in the transcript as 'disfluency'."
         ),
         replacements: import_zod12.z.array(
           import_zod12.z.object({
-            from: import_zod12.z.string(),
-            to: import_zod12.z.string()
+            from: import_zod12.z.string().describe("The text or pattern identified to be replaced."),
+            to: import_zod12.z.string().describe("The corrected or formatted string to appear in the transcript.")
           })
         ).optional().describe(
-          "A list of replacements to apply to the transcript. Each replacement is a pair of strings, where the first string is the pattern to be replaced and the second string is the replacement text."
+          'An array of objects defining custom replacements. Each replacement contains a pair of strings: the text to find ("from:") and the text to replace it with ("to:").'
         )
-      }).optional().describe("Configuration for applying filtering to the transcription"),
+      }).optional().describe("Configuration for applying filtering to the transcription."),
       speaker_diarization_config: import_zod12.z.object({
         prefer_current_speaker: import_zod12.z.boolean().optional().describe(
           'If true, the algorithm will prefer to stay with the current active speaker if it is a close enough match, even if other speakers may be closer.  This is useful for cases where we can flip incorrectly between similar speakers during a single speaker section."'
@@ -38950,9 +39559,23 @@ var getJobsJobidTranscriptResponse = import_zod12.z.object({
           getJobsJobidTranscriptResponseMetadataTranscriptionConfigSpeakerDiarizationConfigSpeakerSensitivityMax
         ).optional().describe(
           "Controls how sensitive the algorithm is in terms of keeping similar speakers separate, as opposed to combining them into a single speaker.  Higher values will typically lead to more speakers, as the degree of difference between speakers in order to allow them to remain distinct will be lower.  A lower value for this parameter will conversely guide the algorithm towards being less sensitive in terms of retaining similar speakers, and as such may lead to fewer speakers overall.  The default is 0.5."
+        ),
+        get_speakers: import_zod12.z.boolean().optional().describe(
+          "If true, speaker identifiers will be returned at the end of transcript."
+        ),
+        speakers: import_zod12.z.array(
+          import_zod12.z.object({
+            label: import_zod12.z.string().min(1).describe(
+              "Speaker label, which must not match the format used internally (e.g. S1, S2, etc)"
+            ),
+            speaker_identifiers: import_zod12.z.array(import_zod12.z.string().describe("Speaker identifiers.")).min(1)
+          })
+        ).optional().describe(
+          "Use this option to provide speaker labels linked to their speaker identifiers. When passed, the transcription system will tag spoken words in the transcript with the provided speaker labels whenever any of the specified speakers is detected in the audio. A maximum of 50 speakers identifiers across all speakers can be provided."
         )
       }).optional().describe("Configuration for speaker diarization")
     }).optional(),
+    orchestrator_version: import_zod12.z.string().optional().describe("The engine version used to generate transcription output."),
     translation_errors: import_zod12.z.array(
       import_zod12.z.object({
         type: import_zod12.z.enum(["translation_failed", "unsupported_translation_pair"]).optional(),
@@ -39030,10 +39653,7 @@ var getJobsJobidTranscriptResponse = import_zod12.z.object({
         "OTHER"
       ]).optional(),
       message: import_zod12.z.string().optional()
-    }).optional(),
-    orchestrator_version: import_zod12.z.string().optional().describe(
-      "Orchestrator version in PEP 440 Format or set to 'version_not_found' as default."
-    )
+    }).optional()
   }).describe(
     "Summary information about the output from an ASR job, comprising the job type and configuration parameters used when generating the output."
   ),
@@ -39116,6 +39736,12 @@ var getJobsJobidTranscriptResponse = import_zod12.z.object({
       "An ASR job output item. The primary item types are `word` and `punctuation`. Other item types may be present, for example to provide semantic information of different forms."
     )
   ),
+  speakers: import_zod12.z.array(
+    import_zod12.z.object({
+      label: import_zod12.z.string().min(1).describe("Speaker label."),
+      speaker_identifiers: import_zod12.z.array(import_zod12.z.string().describe("Speaker identifiers.")).min(1)
+    })
+  ).optional().describe("List of unique speaker identifiers detected in the transcript."),
   translations: import_zod12.z.record(
     import_zod12.z.string(),
     import_zod12.z.array(
@@ -39137,13 +39763,23 @@ var getJobsJobidTranscriptResponse = import_zod12.z.object({
     sentiment_analysis: import_zod12.z.object({
       segments: import_zod12.z.array(
         import_zod12.z.object({
-          text: import_zod12.z.string().optional(),
-          start_time: import_zod12.z.number().optional(),
-          end_time: import_zod12.z.number().optional(),
-          sentiment: import_zod12.z.string().optional(),
-          speaker: import_zod12.z.string().optional(),
-          channel: import_zod12.z.string().optional(),
-          confidence: import_zod12.z.number().optional()
+          text: import_zod12.z.string().optional().describe("Represents the transcript of the analysed segment"),
+          sentiment: import_zod12.z.string().optional().describe(
+            "The assigned sentiment to the segment, which can be positive, neutral or negative"
+          ),
+          start_time: import_zod12.z.number().optional().describe(
+            "The timestamp corresponding to the beginning of the transcription segment"
+          ),
+          end_time: import_zod12.z.number().optional().describe(
+            "The timestamp corresponding to the end of the transcription segment"
+          ),
+          speaker: import_zod12.z.string().optional().describe(
+            "The speaker label for the segment, if speaker diarization is enabled"
+          ),
+          channel: import_zod12.z.string().optional().describe(
+            "The channel label for the segment, if channel diarization is enabled"
+          ),
+          confidence: import_zod12.z.number().optional().describe("A confidence score in the range of 0-1")
         }).describe("Represents a segment of text and its associated sentiment.")
       ).optional().describe(
         "An array of objects that represent a segment of text and its associated sentiment."
@@ -39202,10 +39838,10 @@ var getJobsJobidTranscriptResponse = import_zod12.z.object({
   }).optional().describe("Main object that holds topic detection results."),
   chapters: import_zod12.z.array(
     import_zod12.z.object({
-      title: import_zod12.z.string().optional(),
-      summary: import_zod12.z.string().optional(),
-      start_time: import_zod12.z.number().optional(),
-      end_time: import_zod12.z.number().optional()
+      title: import_zod12.z.string().optional().describe("The auto-generated title for the chapter"),
+      summary: import_zod12.z.string().optional().describe("An auto-generated paragraph-style, short summary of the chapter"),
+      start_time: import_zod12.z.number().optional().describe("The start time of the chapter in the audio file"),
+      end_time: import_zod12.z.number().optional().describe("The end time of the chapter in the audio file")
     })
   ).optional().describe("An array of objects that represent summarized chapters of the transcript"),
   audio_events: import_zod12.z.array(
@@ -39250,6 +39886,18 @@ var getJobsJobidLogParams = import_zod12.z.object({
   jobid: import_zod12.z.string().describe("ID of the job.")
 });
 var getJobsJobidLogResponse = import_zod12.z.instanceof(File);
+var getJobsJobidObjectUrlsParams = import_zod12.z.object({
+  jobid: import_zod12.z.string().describe("ID of the job.")
+});
+var getJobsJobidObjectUrlsQueryParams = import_zod12.z.object({
+  ttl: import_zod12.z.number().describe("Time to live in seconds for the signed URLs"),
+  url_for: import_zod12.z.array(import_zod12.z.enum(["data", "audio_mp3", "transcript"]))
+});
+var getJobsJobidObjectUrlsResponse = import_zod12.z.object({
+  data: import_zod12.z.string().optional(),
+  audio_mp3: import_zod12.z.string().optional(),
+  transcript: import_zod12.z.string().optional()
+});
 var getUsageQueryParams = import_zod12.z.object({
   since: import_zod12.z.string().date().optional().describe(
     "Include usage after the given date (inclusive). This is a [ISO-8601](https://en.wikipedia.org/wiki/ISO_8601) calendar date format: `YYYY-MM-DD`."
@@ -39383,7 +40031,7 @@ var speechToTextBodyKeytermsDefault = [];
 var speechToTextBody = import_zod13.z.object({
   model_id: import_zod13.z.enum(["scribe_v1", "scribe_v2"]).describe("The ID of the model to use for transcription."),
   file: import_zod13.z.instanceof(File).or(import_zod13.z.null()).optional().describe(
-    "The file to transcribe (100ms minimum audio length). All major audio and video formats are supported. Exactly one of the file or cloud_storage_url parameters must be provided. The file size must be less than 3.0GB."
+    "The file to transcribe (100ms minimum audio length). All major audio and video formats are supported. Exactly one of the file or cloud_storage_url parameters must be provided. The file size must be less than 5.0GB."
   ),
   language_code: import_zod13.z.string().or(import_zod13.z.null()).optional().describe(
     "An ISO-639-1 or ISO-639-3 language_code corresponding to the language of the audio file. Can sometimes improve transcription performance if known beforehand. Defaults to null, in this case the language is predicted automatically."
@@ -39461,7 +40109,7 @@ var speechToTextBody = import_zod13.z.object({
     "The format of input audio. Options are 'pcm_s16le_16' or 'other' For `pcm_s16le_16`, the input audio must be 16-bit PCM at a 16kHz sample rate, single channel (mono), and little-endian byte order. Latency will be lower than with passing an encoded waveform."
   ),
   cloud_storage_url: import_zod13.z.string().or(import_zod13.z.null()).optional().describe(
-    "The HTTPS URL of the file to transcribe. Exactly one of the file or cloud_storage_url parameters must be provided. The file must be accessible via HTTPS and the file size must be less than 2GB. Any valid HTTPS URL is accepted, including URLs from cloud storage providers (AWS S3, Google Cloud Storage, Cloudflare R2, etc.), CDNs, or any other HTTPS source. URLs can be pre-signed or include authentication tokens in query parameters."
+    "[Deprecated] This parameter is deprecated and will be removed in the future. Use 'source_url' instead.The HTTPS URL of the file to transcribe. Exactly one of the file or cloud_storage_url parameters must be provided. The file must be accessible via HTTPS and the file size must be less than 2GB. Any valid HTTPS URL is accepted, including URLs from cloud storage providers (AWS S3, Google Cloud Storage, Cloudflare R2, etc.), CDNs, or any other HTTPS source. URLs can be pre-signed or include authentication tokens in query parameters."
   ),
   source_url: import_zod13.z.string().or(import_zod13.z.null()).optional().describe(
     "The URL of an audio or video file to transcribe. Supports hosted video or audio files, YouTube video URLs, TikTok video URLs, and other video hosting services."
@@ -39500,7 +40148,7 @@ var speechToTextBody = import_zod13.z.object({
     "How to format redacted entities. 'redacted' replaces with {REDACTED}, 'entity_type' replaces with {ENTITY_TYPE}, 'enumerated_entity_type' replaces with {ENTITY_TYPE_N} where N enumerates each occurrence. Only used when entity_redaction is set."
   ),
   keyterms: import_zod13.z.array(import_zod13.z.string()).default(speechToTextBodyKeytermsDefault).describe(
-    'A list of keyterms to bias the transcription towards.           The keyterms are words or phrases you want the model to recognise more accurately.           The number of keyterms cannot exceed 1000.           The length of each keyterm must be less than 50 characters.           Keyterms can contain at most 5 words (after normalisation).           For example ["hello", "world", "technical term"].           Usage of this parameter will incur an additional 20% surcharge on the base transcription cost.           When more than 100 keyterms are provided, a minimum billable duration of 20 seconds applies per request.'
+    'A list of keyterms to bias the transcription towards.           The keyterms are words or phrases you want the model to recognise more accurately.           The number of keyterms cannot exceed 1000.           The length of each keyterm must be less than 50 characters.           Keyterms can contain at most 5 words (after normalisation).           For example ["hello", "world", "technical term"].           The following characters are not supported: `<`, `>`, `{`, `}`, `[`, `]`, `\\`.           Usage of this parameter will incur an additional 20% surcharge on the base transcription cost.           When more than 100 keyterms are provided, a minimum billable duration of 20 seconds applies per request.'
   )
 });
 var speechToTextResponse = import_zod13.z.object({
@@ -39866,6 +40514,7 @@ var deleteTranscriptByIdResponse = import_zod13.z.any();
   SonioxModels,
   SonioxRealtimeModel,
   SonioxRegion,
+  SonioxSDK,
   SonioxStreamingSchema,
   SonioxStreamingTypes,
   SonioxStreamingUpdateSchema,