npm - voice-router-dev - Versions diffs - 0.9.3 → 0.9.5 - Mend

voice-router-dev 0.9.3 → 0.9.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

package/CHANGELOG.md +12 -0
package/dist/constants.d.mts +11 -92
package/dist/constants.d.ts +11 -92
package/dist/constants.js +11 -88
package/dist/constants.mjs +11 -88
package/dist/{field-configs-FbtCPxzs.d.mts → field-configs-BVOZQiG3.d.mts} +4855 -3773
package/dist/{field-configs-FbtCPxzs.d.ts → field-configs-BVOZQiG3.d.ts} +4855 -3773
package/dist/field-configs.d.mts +1 -1
package/dist/field-configs.d.ts +1 -1
package/dist/field-configs.js +583 -150
package/dist/field-configs.mjs +583 -150
package/dist/index.d.mts +1211 -162
package/dist/index.d.ts +1211 -162
package/dist/index.js +924 -275
package/dist/index.mjs +927 -275
package/dist/{provider-metadata-D1d-9cng.d.ts → provider-metadata-CiSA4fWP.d.ts} +2 -2
package/dist/{provider-metadata-BJ29OPW1.d.mts → provider-metadata-oxzd1q6t.d.mts} +2 -2
package/dist/provider-metadata.d.mts +1 -1
package/dist/provider-metadata.d.ts +1 -1
package/dist/provider-metadata.js +3 -66
package/dist/provider-metadata.mjs +3 -66
package/dist/{speechToTextChunkResponseModel-BY2lGyZ3.d.ts → speechToTextChunkResponseModel-Dns0Ma9x.d.ts} +364 -39
package/dist/{speechToTextChunkResponseModel-KayxDiZ7.d.mts → speechToTextChunkResponseModel-_ZvHTD4e.d.mts} +364 -39
package/dist/webhooks.d.mts +3 -2
package/dist/webhooks.d.ts +3 -2
package/package.json +8 -3

package/dist/field-configs.js CHANGED Viewed

@@ -298,6 +298,9 @@ var listenTranscribeQueryParams = import_zod.z.object({
   diarize: import_zod.z.boolean().optional().describe(
     "Recognize speaker changes. Each word in the transcript will be assigned a speaker number starting at 0"
   ),
+  diarize_model: import_zod.z.enum(["latest", "v1", "v2"]).optional().describe(
+    "Select and enable a specific batch diarization model version. If specifying this parameter, you should not set the deprecated `diarize=true` parameter. Not accepted on streaming requests."
+  ),
   dictation: import_zod.z.boolean().optional().describe("Dictation mode for controlling formatting with dictated speech"),
   encoding: import_zod.z.enum(["linear16", "flac", "mulaw", "amr-nb", "amr-wb", "opus", "speex", "g729"]).optional().describe("Specify the expected encoding of your submitted audio"),
   filler_words: import_zod.z.boolean().optional().describe('Filler Words can help transcribe interruptions in your audio, like "uh" and "um"'),
@@ -562,6 +565,7 @@ var listenTranscribeResponse = import_zod.z.object({
 );
 var speakGenerateQueryCallbackMethodDefault = "POST";
 var speakGenerateQueryModelDefault = "aura-asteria-en";
+var speakGenerateQuerySpeedDefault = 1;
 var speakGenerateQueryParams = import_zod.z.object({
   callback: import_zod.z.string().optional().describe("URL to which we'll make the callback request"),
   callback_method: import_zod.z.enum(["POST", "PUT"]).default(speakGenerateQueryCallbackMethodDefault).describe("HTTP method by which the callback request will be made"),
@@ -673,6 +677,9 @@ var speakGenerateQueryParams = import_zod.z.object({
     import_zod.z.enum(["22050"]).describe("Encoding - mp3. Sample rate is fixed and not configurable (22050 Hz).")
   ).or(import_zod.z.enum(["48000"]).describe("Encoding - opus. Sample rate is fixed at 48000 Hz.")).optional().describe(
     "Sample Rate specifies the sample rate for the output audio. Based on the encoding, different sample rates are supported. For some encodings, the sample rate is not configurable"
+  ),
+  speed: import_zod.z.number().default(speakGenerateQuerySpeedDefault).describe(
+    "Speaking rate multiplier that adjusts the pace of generated speech while preserving natural prosody and voice quality. Not yet supported in all languages."
   )
 });
 var speakGenerateHeader = import_zod.z.object({
@@ -1012,7 +1019,7 @@ var createTranscriptBody = import_zod3.z.object({
     "Customize how words are spelled and formatted using to and from values. See [Custom Spelling](https://www.assemblyai.com/docs/pre-recorded-audio/correct-spelling-of-terms) for more details."
   ),
   disfluencies: import_zod3.z.boolean().optional().describe(
-    'Transcribe [Filler Words](https://www.assemblyai.com/docs/pre-recorded-audio/include-filler-words), like "umm", in your media file; can be true or false'
+    'Transcribe [Filler Words](https://www.assemblyai.com/docs/pre-recorded-audio/include-filler-words), like "umm", in your media file; can be true or false. Supported on Universal-3 Pro and Universal-2.'
   ),
   domain: import_zod3.z.string().nullish().describe(
     'Enable domain-specific transcription models to improve accuracy for specialized terminology. Set to `"medical-v1"` to enable [Medical Mode](https://www.assemblyai.com/docs/pre-recorded-audio/medical-mode) for improved accuracy of medical terms such as medications, procedures, conditions, and dosages.\n\nSupported languages: English (`en`), Spanish (`es`), German (`de`), French (`fr`). If used with an unsupported language, the parameter is ignored and a warning is returned.\n'
@@ -1319,12 +1326,20 @@ var createTranscriptBody = import_zod3.z.object({
       "email_address",
       "event",
       "filename",
+      "gender",
       "gender_sexuality",
       "healthcare_number",
       "injury",
       "ip_address",
       "language",
       "location",
+      "location_address",
+      "location_address_street",
+      "location_city",
+      "location_coordinate",
+      "location_country",
+      "location_state",
+      "location_zip",
       "marital_status",
       "medical_condition",
       "medical_process",
@@ -1333,6 +1348,7 @@ var createTranscriptBody = import_zod3.z.object({
       "number_sequence",
       "occupation",
       "organization",
+      "organization_medical_facility",
       "passport_number",
       "password",
       "person_age",
@@ -1341,6 +1357,7 @@ var createTranscriptBody = import_zod3.z.object({
       "physical_attribute",
       "political_affiliation",
       "religion",
+      "sexuality",
       "statistics",
       "time",
       "url",
@@ -1348,15 +1365,20 @@ var createTranscriptBody = import_zod3.z.object({
       "username",
       "vehicle_id",
       "zodiac_sign"
-    ]).describe("The type of PII to redact")
+    ]).describe(
+      "The type of PII to redact. See [PII redaction](https://www.assemblyai.com/docs/streaming/pii-redaction) for the full list of policies and their descriptions."
+    )
   ).optional().describe(
     "The list of PII Redaction policies to enable. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more details."
   ),
   redact_pii_sub: import_zod3.z.enum(["entity_name", "hash"]).describe(
-    "The replacement logic for detected PII, can be `entity_name` or `hash`. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more details."
+    "The replacement logic for detected PII, can be `entity_name` or `hash`. See [PII redaction](https://www.assemblyai.com/docs/streaming/pii-redaction) for more details."
   ).or(import_zod3.z.null()).optional().describe(
     "The replacement logic for detected PII, can be `entity_type` or `hash`. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more details."
   ),
+  redact_pii_return_unredacted: import_zod3.z.boolean().optional().describe(
+    "When set to `true`, returns the original unredacted transcript alongside the redacted one in the same response. Requires `redact_pii` to be `true`, otherwise a 400 error is returned.\n\nWhen enabled, the response includes the additional fields `unredacted_text`, `unredacted_words`, and `unredacted_utterances`. The existing `text`, `words`, and `utterances` fields remain fully redacted. When disabled (default), the response is unchanged and contains only the redacted transcript. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more details.\n"
+  ),
   sentiment_analysis: import_zod3.z.boolean().optional().describe(
     "Enable [Sentiment Analysis](https://www.assemblyai.com/docs/speech-understanding/analyze-sentiment-of-speech), can be true or false"
   ),
@@ -1454,10 +1476,10 @@ var createTranscriptBody = import_zod3.z.object({
   ),
   summary_model: import_zod3.z.enum(["informative", "conversational", "catchy"]).optional().describe("The model to summarize the transcript"),
   summary_type: import_zod3.z.enum(["bullets", "bullets_verbose", "gist", "headline", "paragraph"]).optional().describe("The type of summary"),
-  remove_audio_tags: import_zod3.z.enum(["all"]).describe(
-    'Remove [audio event tags](https://www.assemblyai.com/docs/pre-recorded-audio/universal-3-pro#audio-event-tags) from the transcript text. Set to `"all"` to remove all audio tags.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n'
+  remove_audio_tags: import_zod3.z.enum(["all", "speaker"]).describe(
+    'Universal-3 Pro generates rich transcripts that can include inline annotations such as audio event markers and speaker cues. Set to `"all"` to remove all inline annotations, or `"speaker"` to remove only speaker cues while keeping other annotations.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n'
   ).or(import_zod3.z.null()).optional().describe(
-    'Remove [audio event tags](https://www.assemblyai.com/docs/pre-recorded-audio/universal-3-pro#audio-event-tags) from the transcript text. Set to `"all"` to remove all audio tags.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n'
+    'Universal-3 Pro generates rich transcripts that can include inline annotations such as audio event markers and speaker cues. Set to `"all"` to remove all inline annotations, or `"speaker"` to remove only speaker cues while keeping other annotations.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n'
   ),
   temperature: import_zod3.z.number().optional().describe(
     "Control the amount of randomness injected into the model's response. See the [Prompting Guide](https://www.assemblyai.com/docs/pre-recorded-audio/prompting) for more details.\n\nNote: This parameter can only be used with the Universal-3 Pro model.\n"
@@ -1588,7 +1610,7 @@ var createTranscriptResponse = import_zod3.z.object({
     "Customize how words are spelled and formatted using to and from values. See [Custom Spelling](https://www.assemblyai.com/docs/pre-recorded-audio/correct-spelling-of-terms) for more details."
   ),
   disfluencies: import_zod3.z.boolean().nullish().describe(
-    'Transcribe [Filler Words](https://www.assemblyai.com/docs/pre-recorded-audio/include-filler-words), like "umm", in your media file; can be true or false'
+    'Transcribe [Filler Words](https://www.assemblyai.com/docs/pre-recorded-audio/include-filler-words), like "umm", in your media file; can be true or false. Supported on Universal-3 Pro and Universal-2.'
   ),
   domain: import_zod3.z.string().nullish().describe(
     'The domain-specific model applied to the transcript. When set to `"medical-v1"`, [Medical Mode](https://www.assemblyai.com/docs/pre-recorded-audio/medical-mode) was used to improve accuracy for medical terminology.\n'
@@ -1611,12 +1633,20 @@ var createTranscriptResponse = import_zod3.z.object({
         "email_address",
         "event",
         "filename",
+        "gender",
         "gender_sexuality",
         "healthcare_number",
         "injury",
         "ip_address",
         "language",
         "location",
+        "location_address",
+        "location_address_street",
+        "location_city",
+        "location_coordinate",
+        "location_country",
+        "location_state",
+        "location_zip",
         "marital_status",
         "medical_condition",
         "medical_process",
@@ -1625,6 +1655,7 @@ var createTranscriptResponse = import_zod3.z.object({
         "number_sequence",
         "occupation",
         "organization",
+        "organization_medical_facility",
         "passport_number",
         "password",
         "person_age",
@@ -1633,6 +1664,7 @@ var createTranscriptResponse = import_zod3.z.object({
         "physical_attribute",
         "political_affiliation",
         "religion",
+        "sexuality",
         "statistics",
         "time",
         "url",
@@ -1937,6 +1969,24 @@ var createTranscriptResponse = import_zod3.z.object({
   }).optional().describe(
     "Specify options for [Automatic Language Detection](https://www.assemblyai.com/docs/pre-recorded-audio/language-detection)."
   ),
+  metadata: import_zod3.z.object({
+    domain_used: import_zod3.z.string().nullish().describe(
+      'The domain-specific model that was applied to the transcription (for example, `"medical-v1"` when [Medical Mode](https://www.assemblyai.com/docs/pre-recorded-audio/medical-mode) was applied), or `null` if no domain-specific model was used. Always present when `metadata` is present.\n'
+    ),
+    warnings: import_zod3.z.array(
+      import_zod3.z.object({
+        message: import_zod3.z.string().describe("A human-readable description of the warning.")
+      }).describe(
+        "A warning message emitted while processing a transcription request. Warnings are surfaced on the transcript response under `metadata.warnings`.\n"
+      )
+    ).optional().describe(
+      "Warning messages emitted while processing the request. Each warning is an object with a human-readable `message`. When there are no warnings to report, this field is omitted from the `metadata` object entirely.\n"
+    )
+  }).describe(
+    "Additional metadata about the transcription returned on the `Transcript` object under `metadata`. Only present when there is information to report \u2014 when all of its fields would be empty, the `metadata` object is omitted from the response entirely.\n"
+  ).or(import_zod3.z.null()).optional().describe(
+    "Additional metadata about the transcription, including any warnings emitted while processing the request. Only present when there is information to report; if no fields would be populated, `metadata` is omitted from the response entirely.\n"
+  ),
   multichannel: import_zod3.z.boolean().nullish().describe(
     "Whether [Multichannel transcription](https://www.assemblyai.com/docs/pre-recorded-audio/transcribe-multiple-audio-channels) was enabled in the transcription request, either true or false"
   ),
@@ -1984,12 +2034,20 @@ var createTranscriptResponse = import_zod3.z.object({
       "email_address",
       "event",
       "filename",
+      "gender",
       "gender_sexuality",
       "healthcare_number",
       "injury",
       "ip_address",
       "language",
       "location",
+      "location_address",
+      "location_address_street",
+      "location_city",
+      "location_coordinate",
+      "location_country",
+      "location_state",
+      "location_zip",
       "marital_status",
       "medical_condition",
       "medical_process",
@@ -1998,6 +2056,7 @@ var createTranscriptResponse = import_zod3.z.object({
       "number_sequence",
       "occupation",
       "organization",
+      "organization_medical_facility",
       "passport_number",
       "password",
       "person_age",
@@ -2006,6 +2065,7 @@ var createTranscriptResponse = import_zod3.z.object({
       "physical_attribute",
       "political_affiliation",
       "religion",
+      "sexuality",
       "statistics",
       "time",
       "url",
@@ -2013,12 +2073,17 @@ var createTranscriptResponse = import_zod3.z.object({
       "username",
       "vehicle_id",
       "zodiac_sign"
-    ]).describe("The type of PII to redact")
+    ]).describe(
+      "The type of PII to redact. See [PII redaction](https://www.assemblyai.com/docs/streaming/pii-redaction) for the full list of policies and their descriptions."
+    )
   ).nullish().describe(
     "The list of PII Redaction policies that were enabled, if PII Redaction is enabled.\nSee [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
   ),
   redact_pii_sub: import_zod3.z.enum(["entity_name", "hash"]).optional().describe(
-    "The replacement logic for detected PII, can be `entity_name` or `hash`. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more details."
+    "The replacement logic for detected PII, can be `entity_name` or `hash`. See [PII redaction](https://www.assemblyai.com/docs/streaming/pii-redaction) for more details."
+  ),
+  redact_pii_return_unredacted: import_zod3.z.boolean().nullish().describe(
+    "Whether the original unredacted transcript was also returned alongside the redacted one. When `true`, the response includes `unredacted_text`, `unredacted_words`, and `unredacted_utterances`. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
   ),
   sentiment_analysis: import_zod3.z.boolean().nullish().describe(
     "Whether [Sentiment Analysis](https://www.assemblyai.com/docs/speech-understanding/analyze-sentiment-of-speech) is enabled, can be true or false"
@@ -2155,20 +2220,23 @@ var createTranscriptResponse = import_zod3.z.object({
     "The generated summary of the media file, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details."
   ),
   summary_model: import_zod3.z.string().nullish().describe(
-    "The Summarization model used to generate the summary,\nif [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts#summary-models) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details.\n"
+    "The Summarization model used to generate the summary,\nif [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details.\n"
   ),
   summary_type: import_zod3.z.string().nullish().describe(
-    "The type of summary generated, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts#summary-types) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details."
+    "The type of summary generated, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details."
   ),
-  remove_audio_tags: import_zod3.z.enum(["all"]).describe(
-    "Whether [audio event tags](https://www.assemblyai.com/docs/pre-recorded-audio/universal-3-pro#audio-event-tags) were removed from the transcript text.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n"
+  remove_audio_tags: import_zod3.z.enum(["all", "speaker"]).describe(
+    'Universal-3 Pro generates rich transcripts that can include inline annotations such as audio event markers and speaker cues. Set to `"all"` to remove all inline annotations, or `"speaker"` to remove only speaker cues while keeping other annotations.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n'
   ).or(import_zod3.z.null()).optional().describe(
-    "Whether [audio event tags](https://www.assemblyai.com/docs/pre-recorded-audio/universal-3-pro#audio-event-tags) were removed from the transcript text.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n"
+    'Universal-3 Pro generates rich transcripts that can include inline annotations such as audio event markers and speaker cues. Set to `"all"` to remove all inline annotations, or `"speaker"` to remove only speaker cues while keeping other annotations.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n'
   ),
   temperature: import_zod3.z.number().nullish().describe(
     "The temperature that was used for the model's response. See the [Prompting Guide](https://www.assemblyai.com/docs/pre-recorded-audio/prompting) for more details.\n\nNote: This parameter can only be used with the Universal-3 Pro model.\n"
   ),
   text: import_zod3.z.string().nullish().describe("The textual transcript of your media file"),
+  unredacted_text: import_zod3.z.string().nullish().describe(
+    "The original textual transcript of your media file before PII redaction was applied. Only returned when `redact_pii_return_unredacted` was set to `true` on the transcription request, otherwise this field is omitted and the `text` field remains fully redacted. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
+  ),
   throttled: import_zod3.z.boolean().nullish().describe(
     "True while a request is throttled and false when a request is no longer throttled"
   ),
@@ -2205,6 +2273,39 @@ var createTranscriptResponse = import_zod3.z.object({
   ).nullish().describe(
     "When multichannel or speaker_labels is enabled, a list of turn-by-turn utterance objects.\nSee [Speaker diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) and [Multichannel transcription](https://www.assemblyai.com/docs/pre-recorded-audio/transcribe-multiple-audio-channels) for more information.\n"
   ),
+  unredacted_utterances: import_zod3.z.array(
+    import_zod3.z.object({
+      confidence: import_zod3.z.number().describe("The confidence score for the transcript of this utterance"),
+      start: import_zod3.z.number().describe("The starting time, in milliseconds, of the utterance in the audio file"),
+      end: import_zod3.z.number().describe("The ending time, in milliseconds, of the utterance in the audio file"),
+      text: import_zod3.z.string().describe("The text for this utterance"),
+      words: import_zod3.z.array(
+        import_zod3.z.object({
+          confidence: import_zod3.z.number().describe("The confidence score for the transcript of this word"),
+          start: import_zod3.z.number().describe("The starting time, in milliseconds, for the word"),
+          end: import_zod3.z.number().describe("The ending time, in milliseconds, for the word"),
+          text: import_zod3.z.string().describe("The text of the word"),
+          channel: import_zod3.z.string().nullish().describe(
+            "The channel of the word. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
+          ),
+          speaker: import_zod3.z.string().nullable().describe(
+            "The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) is enabled, else null"
+          )
+        })
+      ).describe("The words in the utterance."),
+      channel: import_zod3.z.string().nullish().describe(
+        "The channel of this utterance. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
+      ),
+      speaker: import_zod3.z.string().describe(
+        'The speaker of this utterance, where each speaker is assigned a sequential capital letter - e.g. "A" for Speaker A, "B" for Speaker B, etc.'
+      ),
+      translated_texts: import_zod3.z.record(import_zod3.z.string(), import_zod3.z.string()).optional().describe(
+        'Translations keyed by language code (e.g., `{"es": "Texto traducido", "de": "\xDCbersetzter Text"}`). Only present when `match_original_utterance` is enabled with translation.'
+      )
+    })
+  ).nullish().describe(
+    "The original turn-by-turn utterance objects before PII redaction was applied. Same shape as `utterances`. Only returned when `redact_pii_return_unredacted` was set to `true` on the transcription request, otherwise this field is omitted and the `utterances` field remains fully redacted. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
+  ),
   webhook_auth: import_zod3.z.boolean().describe(
     "Whether [webhook](https://www.assemblyai.com/docs/deployment/webhooks-for-pre-recorded-audio) authentication details were provided"
   ),
@@ -2233,6 +2334,22 @@ var createTranscriptResponse = import_zod3.z.object({
   ).nullish().describe(
     "An array of temporally-sequential word objects, one for each word in the transcript.\n"
   ),
+  unredacted_words: import_zod3.z.array(
+    import_zod3.z.object({
+      confidence: import_zod3.z.number().describe("The confidence score for the transcript of this word"),
+      start: import_zod3.z.number().describe("The starting time, in milliseconds, for the word"),
+      end: import_zod3.z.number().describe("The ending time, in milliseconds, for the word"),
+      text: import_zod3.z.string().describe("The text of the word"),
+      channel: import_zod3.z.string().nullish().describe(
+        "The channel of the word. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
+      ),
+      speaker: import_zod3.z.string().nullable().describe(
+        "The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) is enabled, else null"
+      )
+    })
+  ).nullish().describe(
+    "The original temporally-sequential word objects before PII redaction was applied. Same shape as `words`. Only returned when `redact_pii_return_unredacted` was set to `true` on the transcription request, otherwise this field is omitted and the `words` field remains fully redacted. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
+  ),
   acoustic_model: import_zod3.z.string().describe("This parameter does not currently have any functionality attached to it."),
   custom_topics: import_zod3.z.boolean().nullish().describe("This parameter does not currently have any functionality attached to it."),
   language_model: import_zod3.z.string().describe("This parameter does not currently have any functionality attached to it."),
@@ -2404,7 +2521,7 @@ var getTranscriptResponse = import_zod3.z.object({
     "Customize how words are spelled and formatted using to and from values. See [Custom Spelling](https://www.assemblyai.com/docs/pre-recorded-audio/correct-spelling-of-terms) for more details."
   ),
   disfluencies: import_zod3.z.boolean().nullish().describe(
-    'Transcribe [Filler Words](https://www.assemblyai.com/docs/pre-recorded-audio/include-filler-words), like "umm", in your media file; can be true or false'
+    'Transcribe [Filler Words](https://www.assemblyai.com/docs/pre-recorded-audio/include-filler-words), like "umm", in your media file; can be true or false. Supported on Universal-3 Pro and Universal-2.'
   ),
   domain: import_zod3.z.string().nullish().describe(
     'The domain-specific model applied to the transcript. When set to `"medical-v1"`, [Medical Mode](https://www.assemblyai.com/docs/pre-recorded-audio/medical-mode) was used to improve accuracy for medical terminology.\n'
@@ -2427,12 +2544,20 @@ var getTranscriptResponse = import_zod3.z.object({
         "email_address",
         "event",
         "filename",
+        "gender",
         "gender_sexuality",
         "healthcare_number",
         "injury",
         "ip_address",
         "language",
         "location",
+        "location_address",
+        "location_address_street",
+        "location_city",
+        "location_coordinate",
+        "location_country",
+        "location_state",
+        "location_zip",
         "marital_status",
         "medical_condition",
         "medical_process",
@@ -2441,6 +2566,7 @@ var getTranscriptResponse = import_zod3.z.object({
         "number_sequence",
         "occupation",
         "organization",
+        "organization_medical_facility",
         "passport_number",
         "password",
         "person_age",
@@ -2449,6 +2575,7 @@ var getTranscriptResponse = import_zod3.z.object({
         "physical_attribute",
         "political_affiliation",
         "religion",
+        "sexuality",
         "statistics",
         "time",
         "url",
@@ -2753,6 +2880,24 @@ var getTranscriptResponse = import_zod3.z.object({
   }).optional().describe(
     "Specify options for [Automatic Language Detection](https://www.assemblyai.com/docs/pre-recorded-audio/language-detection)."
   ),
+  metadata: import_zod3.z.object({
+    domain_used: import_zod3.z.string().nullish().describe(
+      'The domain-specific model that was applied to the transcription (for example, `"medical-v1"` when [Medical Mode](https://www.assemblyai.com/docs/pre-recorded-audio/medical-mode) was applied), or `null` if no domain-specific model was used. Always present when `metadata` is present.\n'
+    ),
+    warnings: import_zod3.z.array(
+      import_zod3.z.object({
+        message: import_zod3.z.string().describe("A human-readable description of the warning.")
+      }).describe(
+        "A warning message emitted while processing a transcription request. Warnings are surfaced on the transcript response under `metadata.warnings`.\n"
+      )
+    ).optional().describe(
+      "Warning messages emitted while processing the request. Each warning is an object with a human-readable `message`. When there are no warnings to report, this field is omitted from the `metadata` object entirely.\n"
+    )
+  }).describe(
+    "Additional metadata about the transcription returned on the `Transcript` object under `metadata`. Only present when there is information to report \u2014 when all of its fields would be empty, the `metadata` object is omitted from the response entirely.\n"
+  ).or(import_zod3.z.null()).optional().describe(
+    "Additional metadata about the transcription, including any warnings emitted while processing the request. Only present when there is information to report; if no fields would be populated, `metadata` is omitted from the response entirely.\n"
+  ),
   multichannel: import_zod3.z.boolean().nullish().describe(
     "Whether [Multichannel transcription](https://www.assemblyai.com/docs/pre-recorded-audio/transcribe-multiple-audio-channels) was enabled in the transcription request, either true or false"
   ),
@@ -2800,12 +2945,20 @@ var getTranscriptResponse = import_zod3.z.object({
       "email_address",
       "event",
       "filename",
+      "gender",
       "gender_sexuality",
       "healthcare_number",
       "injury",
       "ip_address",
       "language",
       "location",
+      "location_address",
+      "location_address_street",
+      "location_city",
+      "location_coordinate",
+      "location_country",
+      "location_state",
+      "location_zip",
       "marital_status",
       "medical_condition",
       "medical_process",
@@ -2814,6 +2967,7 @@ var getTranscriptResponse = import_zod3.z.object({
       "number_sequence",
       "occupation",
       "organization",
+      "organization_medical_facility",
       "passport_number",
       "password",
       "person_age",
@@ -2822,6 +2976,7 @@ var getTranscriptResponse = import_zod3.z.object({
       "physical_attribute",
       "political_affiliation",
       "religion",
+      "sexuality",
       "statistics",
       "time",
       "url",
@@ -2829,12 +2984,17 @@ var getTranscriptResponse = import_zod3.z.object({
       "username",
       "vehicle_id",
       "zodiac_sign"
-    ]).describe("The type of PII to redact")
+    ]).describe(
+      "The type of PII to redact. See [PII redaction](https://www.assemblyai.com/docs/streaming/pii-redaction) for the full list of policies and their descriptions."
+    )
   ).nullish().describe(
     "The list of PII Redaction policies that were enabled, if PII Redaction is enabled.\nSee [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
   ),
   redact_pii_sub: import_zod3.z.enum(["entity_name", "hash"]).optional().describe(
-    "The replacement logic for detected PII, can be `entity_name` or `hash`. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more details."
+    "The replacement logic for detected PII, can be `entity_name` or `hash`. See [PII redaction](https://www.assemblyai.com/docs/streaming/pii-redaction) for more details."
+  ),
+  redact_pii_return_unredacted: import_zod3.z.boolean().nullish().describe(
+    "Whether the original unredacted transcript was also returned alongside the redacted one. When `true`, the response includes `unredacted_text`, `unredacted_words`, and `unredacted_utterances`. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
   ),
   sentiment_analysis: import_zod3.z.boolean().nullish().describe(
     "Whether [Sentiment Analysis](https://www.assemblyai.com/docs/speech-understanding/analyze-sentiment-of-speech) is enabled, can be true or false"
@@ -2971,20 +3131,23 @@ var getTranscriptResponse = import_zod3.z.object({
     "The generated summary of the media file, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details."
   ),
   summary_model: import_zod3.z.string().nullish().describe(
-    "The Summarization model used to generate the summary,\nif [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts#summary-models) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details.\n"
+    "The Summarization model used to generate the summary,\nif [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details.\n"
   ),
   summary_type: import_zod3.z.string().nullish().describe(
-    "The type of summary generated, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts#summary-types) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details."
+    "The type of summary generated, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details."
   ),
-  remove_audio_tags: import_zod3.z.enum(["all"]).describe(
-    "Whether [audio event tags](https://www.assemblyai.com/docs/pre-recorded-audio/universal-3-pro#audio-event-tags) were removed from the transcript text.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n"
+  remove_audio_tags: import_zod3.z.enum(["all", "speaker"]).describe(
+    'Universal-3 Pro generates rich transcripts that can include inline annotations such as audio event markers and speaker cues. Set to `"all"` to remove all inline annotations, or `"speaker"` to remove only speaker cues while keeping other annotations.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n'
   ).or(import_zod3.z.null()).optional().describe(
-    "Whether [audio event tags](https://www.assemblyai.com/docs/pre-recorded-audio/universal-3-pro#audio-event-tags) were removed from the transcript text.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n"
+    'Universal-3 Pro generates rich transcripts that can include inline annotations such as audio event markers and speaker cues. Set to `"all"` to remove all inline annotations, or `"speaker"` to remove only speaker cues while keeping other annotations.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n'
   ),
   temperature: import_zod3.z.number().nullish().describe(
     "The temperature that was used for the model's response. See the [Prompting Guide](https://www.assemblyai.com/docs/pre-recorded-audio/prompting) for more details.\n\nNote: This parameter can only be used with the Universal-3 Pro model.\n"
   ),
   text: import_zod3.z.string().nullish().describe("The textual transcript of your media file"),
+  unredacted_text: import_zod3.z.string().nullish().describe(
+    "The original textual transcript of your media file before PII redaction was applied. Only returned when `redact_pii_return_unredacted` was set to `true` on the transcription request, otherwise this field is omitted and the `text` field remains fully redacted. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
+  ),
   throttled: import_zod3.z.boolean().nullish().describe(
     "True while a request is throttled and false when a request is no longer throttled"
   ),
@@ -3021,6 +3184,39 @@ var getTranscriptResponse = import_zod3.z.object({
   ).nullish().describe(
     "When multichannel or speaker_labels is enabled, a list of turn-by-turn utterance objects.\nSee [Speaker diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) and [Multichannel transcription](https://www.assemblyai.com/docs/pre-recorded-audio/transcribe-multiple-audio-channels) for more information.\n"
   ),
+  unredacted_utterances: import_zod3.z.array(
+    import_zod3.z.object({
+      confidence: import_zod3.z.number().describe("The confidence score for the transcript of this utterance"),
+      start: import_zod3.z.number().describe("The starting time, in milliseconds, of the utterance in the audio file"),
+      end: import_zod3.z.number().describe("The ending time, in milliseconds, of the utterance in the audio file"),
+      text: import_zod3.z.string().describe("The text for this utterance"),
+      words: import_zod3.z.array(
+        import_zod3.z.object({
+          confidence: import_zod3.z.number().describe("The confidence score for the transcript of this word"),
+          start: import_zod3.z.number().describe("The starting time, in milliseconds, for the word"),
+          end: import_zod3.z.number().describe("The ending time, in milliseconds, for the word"),
+          text: import_zod3.z.string().describe("The text of the word"),
+          channel: import_zod3.z.string().nullish().describe(
+            "The channel of the word. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
+          ),
+          speaker: import_zod3.z.string().nullable().describe(
+            "The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) is enabled, else null"
+          )
+        })
+      ).describe("The words in the utterance."),
+      channel: import_zod3.z.string().nullish().describe(
+        "The channel of this utterance. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
+      ),
+      speaker: import_zod3.z.string().describe(
+        'The speaker of this utterance, where each speaker is assigned a sequential capital letter - e.g. "A" for Speaker A, "B" for Speaker B, etc.'
+      ),
+      translated_texts: import_zod3.z.record(import_zod3.z.string(), import_zod3.z.string()).optional().describe(
+        'Translations keyed by language code (e.g., `{"es": "Texto traducido", "de": "\xDCbersetzter Text"}`). Only present when `match_original_utterance` is enabled with translation.'
+      )
+    })
+  ).nullish().describe(
+    "The original turn-by-turn utterance objects before PII redaction was applied. Same shape as `utterances`. Only returned when `redact_pii_return_unredacted` was set to `true` on the transcription request, otherwise this field is omitted and the `utterances` field remains fully redacted. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
+  ),
   webhook_auth: import_zod3.z.boolean().describe(
     "Whether [webhook](https://www.assemblyai.com/docs/deployment/webhooks-for-pre-recorded-audio) authentication details were provided"
   ),
@@ -3049,6 +3245,22 @@ var getTranscriptResponse = import_zod3.z.object({
   ).nullish().describe(
     "An array of temporally-sequential word objects, one for each word in the transcript.\n"
   ),
+  unredacted_words: import_zod3.z.array(
+    import_zod3.z.object({
+      confidence: import_zod3.z.number().describe("The confidence score for the transcript of this word"),
+      start: import_zod3.z.number().describe("The starting time, in milliseconds, for the word"),
+      end: import_zod3.z.number().describe("The ending time, in milliseconds, for the word"),
+      text: import_zod3.z.string().describe("The text of the word"),
+      channel: import_zod3.z.string().nullish().describe(
+        "The channel of the word. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
+      ),
+      speaker: import_zod3.z.string().nullable().describe(
+        "The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) is enabled, else null"
+      )
+    })
+  ).nullish().describe(
+    "The original temporally-sequential word objects before PII redaction was applied. Same shape as `words`. Only returned when `redact_pii_return_unredacted` was set to `true` on the transcription request, otherwise this field is omitted and the `words` field remains fully redacted. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
+  ),
   acoustic_model: import_zod3.z.string().describe("This parameter does not currently have any functionality attached to it."),
   custom_topics: import_zod3.z.boolean().nullish().describe("This parameter does not currently have any functionality attached to it."),
   language_model: import_zod3.z.string().describe("This parameter does not currently have any functionality attached to it."),
@@ -3181,7 +3393,7 @@ var deleteTranscriptResponse = import_zod3.z.object({
     "Customize how words are spelled and formatted using to and from values. See [Custom Spelling](https://www.assemblyai.com/docs/pre-recorded-audio/correct-spelling-of-terms) for more details."
   ),
   disfluencies: import_zod3.z.boolean().nullish().describe(
-    'Transcribe [Filler Words](https://www.assemblyai.com/docs/pre-recorded-audio/include-filler-words), like "umm", in your media file; can be true or false'
+    'Transcribe [Filler Words](https://www.assemblyai.com/docs/pre-recorded-audio/include-filler-words), like "umm", in your media file; can be true or false. Supported on Universal-3 Pro and Universal-2.'
   ),
   domain: import_zod3.z.string().nullish().describe(
     'The domain-specific model applied to the transcript. When set to `"medical-v1"`, [Medical Mode](https://www.assemblyai.com/docs/pre-recorded-audio/medical-mode) was used to improve accuracy for medical terminology.\n'
@@ -3204,12 +3416,20 @@ var deleteTranscriptResponse = import_zod3.z.object({
         "email_address",
         "event",
         "filename",
+        "gender",
         "gender_sexuality",
         "healthcare_number",
         "injury",
         "ip_address",
         "language",
         "location",
+        "location_address",
+        "location_address_street",
+        "location_city",
+        "location_coordinate",
+        "location_country",
+        "location_state",
+        "location_zip",
         "marital_status",
         "medical_condition",
         "medical_process",
@@ -3218,6 +3438,7 @@ var deleteTranscriptResponse = import_zod3.z.object({
         "number_sequence",
         "occupation",
         "organization",
+        "organization_medical_facility",
         "passport_number",
         "password",
         "person_age",
@@ -3226,6 +3447,7 @@ var deleteTranscriptResponse = import_zod3.z.object({
         "physical_attribute",
         "political_affiliation",
         "religion",
+        "sexuality",
         "statistics",
         "time",
         "url",
@@ -3530,6 +3752,24 @@ var deleteTranscriptResponse = import_zod3.z.object({
   }).optional().describe(
     "Specify options for [Automatic Language Detection](https://www.assemblyai.com/docs/pre-recorded-audio/language-detection)."
   ),
+  metadata: import_zod3.z.object({
+    domain_used: import_zod3.z.string().nullish().describe(
+      'The domain-specific model that was applied to the transcription (for example, `"medical-v1"` when [Medical Mode](https://www.assemblyai.com/docs/pre-recorded-audio/medical-mode) was applied), or `null` if no domain-specific model was used. Always present when `metadata` is present.\n'
+    ),
+    warnings: import_zod3.z.array(
+      import_zod3.z.object({
+        message: import_zod3.z.string().describe("A human-readable description of the warning.")
+      }).describe(
+        "A warning message emitted while processing a transcription request. Warnings are surfaced on the transcript response under `metadata.warnings`.\n"
+      )
+    ).optional().describe(
+      "Warning messages emitted while processing the request. Each warning is an object with a human-readable `message`. When there are no warnings to report, this field is omitted from the `metadata` object entirely.\n"
+    )
+  }).describe(
+    "Additional metadata about the transcription returned on the `Transcript` object under `metadata`. Only present when there is information to report \u2014 when all of its fields would be empty, the `metadata` object is omitted from the response entirely.\n"
+  ).or(import_zod3.z.null()).optional().describe(
+    "Additional metadata about the transcription, including any warnings emitted while processing the request. Only present when there is information to report; if no fields would be populated, `metadata` is omitted from the response entirely.\n"
+  ),
   multichannel: import_zod3.z.boolean().nullish().describe(
     "Whether [Multichannel transcription](https://www.assemblyai.com/docs/pre-recorded-audio/transcribe-multiple-audio-channels) was enabled in the transcription request, either true or false"
   ),
@@ -3577,12 +3817,20 @@ var deleteTranscriptResponse = import_zod3.z.object({
       "email_address",
       "event",
       "filename",
+      "gender",
       "gender_sexuality",
       "healthcare_number",
       "injury",
       "ip_address",
       "language",
       "location",
+      "location_address",
+      "location_address_street",
+      "location_city",
+      "location_coordinate",
+      "location_country",
+      "location_state",
+      "location_zip",
       "marital_status",
       "medical_condition",
       "medical_process",
@@ -3591,6 +3839,7 @@ var deleteTranscriptResponse = import_zod3.z.object({
       "number_sequence",
       "occupation",
       "organization",
+      "organization_medical_facility",
       "passport_number",
       "password",
       "person_age",
@@ -3599,6 +3848,7 @@ var deleteTranscriptResponse = import_zod3.z.object({
       "physical_attribute",
       "political_affiliation",
       "religion",
+      "sexuality",
       "statistics",
       "time",
       "url",
@@ -3606,12 +3856,17 @@ var deleteTranscriptResponse = import_zod3.z.object({
       "username",
       "vehicle_id",
       "zodiac_sign"
-    ]).describe("The type of PII to redact")
+    ]).describe(
+      "The type of PII to redact. See [PII redaction](https://www.assemblyai.com/docs/streaming/pii-redaction) for the full list of policies and their descriptions."
+    )
   ).nullish().describe(
     "The list of PII Redaction policies that were enabled, if PII Redaction is enabled.\nSee [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
   ),
   redact_pii_sub: import_zod3.z.enum(["entity_name", "hash"]).optional().describe(
-    "The replacement logic for detected PII, can be `entity_name` or `hash`. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more details."
+    "The replacement logic for detected PII, can be `entity_name` or `hash`. See [PII redaction](https://www.assemblyai.com/docs/streaming/pii-redaction) for more details."
+  ),
+  redact_pii_return_unredacted: import_zod3.z.boolean().nullish().describe(
+    "Whether the original unredacted transcript was also returned alongside the redacted one. When `true`, the response includes `unredacted_text`, `unredacted_words`, and `unredacted_utterances`. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
   ),
   sentiment_analysis: import_zod3.z.boolean().nullish().describe(
     "Whether [Sentiment Analysis](https://www.assemblyai.com/docs/speech-understanding/analyze-sentiment-of-speech) is enabled, can be true or false"
@@ -3748,20 +4003,23 @@ var deleteTranscriptResponse = import_zod3.z.object({
     "The generated summary of the media file, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details."
   ),
   summary_model: import_zod3.z.string().nullish().describe(
-    "The Summarization model used to generate the summary,\nif [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts#summary-models) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details.\n"
+    "The Summarization model used to generate the summary,\nif [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details.\n"
   ),
   summary_type: import_zod3.z.string().nullish().describe(
-    "The type of summary generated, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts#summary-types) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details."
+    "The type of summary generated, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details."
   ),
-  remove_audio_tags: import_zod3.z.enum(["all"]).describe(
-    "Whether [audio event tags](https://www.assemblyai.com/docs/pre-recorded-audio/universal-3-pro#audio-event-tags) were removed from the transcript text.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n"
+  remove_audio_tags: import_zod3.z.enum(["all", "speaker"]).describe(
+    'Universal-3 Pro generates rich transcripts that can include inline annotations such as audio event markers and speaker cues. Set to `"all"` to remove all inline annotations, or `"speaker"` to remove only speaker cues while keeping other annotations.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n'
   ).or(import_zod3.z.null()).optional().describe(
-    "Whether [audio event tags](https://www.assemblyai.com/docs/pre-recorded-audio/universal-3-pro#audio-event-tags) were removed from the transcript text.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n"
+    'Universal-3 Pro generates rich transcripts that can include inline annotations such as audio event markers and speaker cues. Set to `"all"` to remove all inline annotations, or `"speaker"` to remove only speaker cues while keeping other annotations.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n'
   ),
   temperature: import_zod3.z.number().nullish().describe(
     "The temperature that was used for the model's response. See the [Prompting Guide](https://www.assemblyai.com/docs/pre-recorded-audio/prompting) for more details.\n\nNote: This parameter can only be used with the Universal-3 Pro model.\n"
   ),
   text: import_zod3.z.string().nullish().describe("The textual transcript of your media file"),
+  unredacted_text: import_zod3.z.string().nullish().describe(
+    "The original textual transcript of your media file before PII redaction was applied. Only returned when `redact_pii_return_unredacted` was set to `true` on the transcription request, otherwise this field is omitted and the `text` field remains fully redacted. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
+  ),
   throttled: import_zod3.z.boolean().nullish().describe(
     "True while a request is throttled and false when a request is no longer throttled"
   ),
@@ -3798,6 +4056,39 @@ var deleteTranscriptResponse = import_zod3.z.object({
   ).nullish().describe(
     "When multichannel or speaker_labels is enabled, a list of turn-by-turn utterance objects.\nSee [Speaker diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) and [Multichannel transcription](https://www.assemblyai.com/docs/pre-recorded-audio/transcribe-multiple-audio-channels) for more information.\n"
   ),
+  unredacted_utterances: import_zod3.z.array(
+    import_zod3.z.object({
+      confidence: import_zod3.z.number().describe("The confidence score for the transcript of this utterance"),
+      start: import_zod3.z.number().describe("The starting time, in milliseconds, of the utterance in the audio file"),
+      end: import_zod3.z.number().describe("The ending time, in milliseconds, of the utterance in the audio file"),
+      text: import_zod3.z.string().describe("The text for this utterance"),
+      words: import_zod3.z.array(
+        import_zod3.z.object({
+          confidence: import_zod3.z.number().describe("The confidence score for the transcript of this word"),
+          start: import_zod3.z.number().describe("The starting time, in milliseconds, for the word"),
+          end: import_zod3.z.number().describe("The ending time, in milliseconds, for the word"),
+          text: import_zod3.z.string().describe("The text of the word"),
+          channel: import_zod3.z.string().nullish().describe(
+            "The channel of the word. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
+          ),
+          speaker: import_zod3.z.string().nullable().describe(
+            "The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) is enabled, else null"
+          )
+        })
+      ).describe("The words in the utterance."),
+      channel: import_zod3.z.string().nullish().describe(
+        "The channel of this utterance. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
+      ),
+      speaker: import_zod3.z.string().describe(
+        'The speaker of this utterance, where each speaker is assigned a sequential capital letter - e.g. "A" for Speaker A, "B" for Speaker B, etc.'
+      ),
+      translated_texts: import_zod3.z.record(import_zod3.z.string(), import_zod3.z.string()).optional().describe(
+        'Translations keyed by language code (e.g., `{"es": "Texto traducido", "de": "\xDCbersetzter Text"}`). Only present when `match_original_utterance` is enabled with translation.'
+      )
+    })
+  ).nullish().describe(
+    "The original turn-by-turn utterance objects before PII redaction was applied. Same shape as `utterances`. Only returned when `redact_pii_return_unredacted` was set to `true` on the transcription request, otherwise this field is omitted and the `utterances` field remains fully redacted. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
+  ),
   webhook_auth: import_zod3.z.boolean().describe(
     "Whether [webhook](https://www.assemblyai.com/docs/deployment/webhooks-for-pre-recorded-audio) authentication details were provided"
   ),
@@ -3826,6 +4117,22 @@ var deleteTranscriptResponse = import_zod3.z.object({
   ).nullish().describe(
     "An array of temporally-sequential word objects, one for each word in the transcript.\n"
   ),
+  unredacted_words: import_zod3.z.array(
+    import_zod3.z.object({
+      confidence: import_zod3.z.number().describe("The confidence score for the transcript of this word"),
+      start: import_zod3.z.number().describe("The starting time, in milliseconds, for the word"),
+      end: import_zod3.z.number().describe("The ending time, in milliseconds, for the word"),
+      text: import_zod3.z.string().describe("The text of the word"),
+      channel: import_zod3.z.string().nullish().describe(
+        "The channel of the word. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
+      ),
+      speaker: import_zod3.z.string().nullable().describe(
+        "The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) is enabled, else null"
+      )
+    })
+  ).nullish().describe(
+    "The original temporally-sequential word objects before PII redaction was applied. Same shape as `words`. Only returned when `redact_pii_return_unredacted` was set to `true` on the transcription request, otherwise this field is omitted and the `words` field remains fully redacted. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
+  ),
   acoustic_model: import_zod3.z.string().describe("This parameter does not currently have any functionality attached to it."),
   custom_topics: import_zod3.z.boolean().nullish().describe("This parameter does not currently have any functionality attached to it."),
   language_model: import_zod3.z.string().describe("This parameter does not currently have any functionality attached to it."),
@@ -3981,7 +4288,21 @@ var streamingTranscriberParams = import_zod4.z.object({
   inactivityTimeout: import_zod4.z.number().optional().describe("From SDK v3"),
   speakerLabels: import_zod4.z.boolean().optional().describe("From SDK v3"),
   maxSpeakers: import_zod4.z.number().optional().describe("From SDK v3"),
-  llmGateway: import_zod4.z.unknown().optional().describe("From SDK v3")
+  voiceFocus: import_zod4.z.unknown().optional().describe("From SDK v3"),
+  voiceFocusThreshold: import_zod4.z.number().optional().describe("From SDK v3"),
+  continuousPartials: import_zod4.z.boolean().optional().describe("From SDK v3"),
+  interruptionDelay: import_zod4.z.number().optional().describe("From SDK v3"),
+  turnLeftPadMs: import_zod4.z.number().optional().describe("From SDK v3"),
+  customerSupportAudioCapture: import_zod4.z.boolean().optional().describe("From SDK v3"),
+  includePartialTurns: import_zod4.z.boolean().optional().describe("From SDK v3"),
+  redactPii: import_zod4.z.boolean().optional().describe("From SDK v3"),
+  redactPiiPolicies: import_zod4.z.unknown().optional().describe("From SDK v3"),
+  redactPiiSub: import_zod4.z.unknown().optional().describe("From SDK v3"),
+  llmGateway: import_zod4.z.unknown().optional().describe("From SDK v3"),
+  webhookUrl: import_zod4.z.string().optional().describe("From SDK v3"),
+  webhookAuthHeaderName: import_zod4.z.string().optional().describe("From SDK v3"),
+  webhookAuthHeaderValue: import_zod4.z.string().optional().describe("From SDK v3"),
+  mode: import_zod4.z.unknown().describe("From SDK v3")
 });
 var streamingUpdateConfigParams = import_zod4.z.object({
   end_utterance_silence_threshold: import_zod4.z.number().min(0).max(2e4).optional().describe("The duration threshold in milliseconds"),
@@ -3993,7 +4314,9 @@ var streamingUpdateConfigParams = import_zod4.z.object({
   format_turns: import_zod4.z.boolean().optional().describe("From SDK v3"),
   keyterms_prompt: import_zod4.z.array(import_zod4.z.string()).optional().describe("From SDK v3"),
   prompt: import_zod4.z.string().optional().describe("From SDK v3"),
-  filter_profanity: import_zod4.z.boolean().optional().describe("From SDK v3")
+  filter_profanity: import_zod4.z.boolean().optional().describe("From SDK v3"),
+  interruption_delay: import_zod4.z.number().optional().describe("From SDK v3"),
+  turn_left_pad_ms: import_zod4.z.number().optional().describe("From SDK v3")
 });
 // src/generated/gladia/api/gladiaControlAPI.zod.ts
@@ -4030,7 +4353,7 @@ var preRecordedControllerInitPreRecordedJobV2BodyTranslationConfigMatchOriginalU
 var preRecordedControllerInitPreRecordedJobV2BodyTranslationConfigLipsyncDefault = true;
 var preRecordedControllerInitPreRecordedJobV2BodyTranslationConfigContextAdaptationDefault = true;
 var preRecordedControllerInitPreRecordedJobV2BodySummarizationConfigTypeDefault = "general";
-var preRecordedControllerInitPreRecordedJobV2BodyAudioToLlmConfigModelDefault = "openai/gpt-3.5-turbo";
+var preRecordedControllerInitPreRecordedJobV2BodyAudioToLlmConfigModelDefault = "openai/gpt-5.4-nano";
 var preRecordedControllerInitPreRecordedJobV2BodyLanguageConfigLanguagesDefault = [];
 var preRecordedControllerInitPreRecordedJobV2Body = import_zod5.z.object({
   custom_vocabulary: import_zod5.z.boolean().optional().describe(
@@ -4315,23 +4638,23 @@ var preRecordedControllerInitPreRecordedJobV2Body = import_zod5.z.object({
       "Forces the translation to use informal language forms when available in the target language."
     )
   }).optional().describe("**[Beta]** Translation configuration, if `translation` is enabled"),
-  summarization: import_zod5.z.boolean().optional().describe("**[Beta]** Enable summarization for this audio"),
+  summarization: import_zod5.z.boolean().optional().describe("Enable summarization for this audio"),
   summarization_config: import_zod5.z.object({
     type: import_zod5.z.enum(["general", "bullet_points", "concise"]).describe("The type of summarization to apply").default(preRecordedControllerInitPreRecordedJobV2BodySummarizationConfigTypeDefault).describe("The type of summarization to apply")
-  }).optional().describe("**[Beta]** Summarization configuration, if `summarization` is enabled"),
+  }).optional().describe("Summarization configuration, if `summarization` is enabled"),
   named_entity_recognition: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable named entity recognition for this audio"),
   custom_spelling: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable custom spelling for this audio"),
   custom_spelling_config: import_zod5.z.object({
     spelling_dictionary: import_zod5.z.record(import_zod5.z.string(), import_zod5.z.array(import_zod5.z.string())).describe("The list of spelling applied on the audio transcription")
   }).optional().describe("**[Alpha]** Custom spelling configuration, if `custom_spelling` is enabled"),
   sentiment_analysis: import_zod5.z.boolean().optional().describe("Enable sentiment analysis for this audio"),
-  audio_to_llm: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable audio to llm processing for this audio"),
+  audio_to_llm: import_zod5.z.boolean().optional().describe("Enable audio to LLM processing for this audio"),
   audio_to_llm_config: import_zod5.z.object({
     prompts: import_zod5.z.array(import_zod5.z.array(import_zod5.z.unknown())).min(1).describe("The list of prompts applied on the audio transcription"),
     model: import_zod5.z.string().default(preRecordedControllerInitPreRecordedJobV2BodyAudioToLlmConfigModelDefault).describe(
       "The model to use for the prompt execution. You can find the list of supported models [here](https://openrouter.ai/models)."
     )
-  }).optional().describe("**[Alpha]** Audio to llm configuration, if `audio_to_llm` is enabled"),
+  }).optional().describe("Audio to LLM configuration, if `audio_to_llm` is enabled"),
   pii_redaction: import_zod5.z.boolean().optional().describe("Enable PII redaction for this audio"),
   pii_redaction_config: import_zod5.z.object({
     entity_types: import_zod5.z.enum([
@@ -4573,7 +4896,7 @@ var preRecordedControllerGetPreRecordedJobsV2ResponseItemsItemRequestParamsTrans
 var preRecordedControllerGetPreRecordedJobsV2ResponseItemsItemRequestParamsTranslationConfigLipsyncDefault = true;
 var preRecordedControllerGetPreRecordedJobsV2ResponseItemsItemRequestParamsTranslationConfigContextAdaptationDefault = true;
 var preRecordedControllerGetPreRecordedJobsV2ResponseItemsItemRequestParamsSummarizationConfigTypeDefault = "general";
-var preRecordedControllerGetPreRecordedJobsV2ResponseItemsItemRequestParamsAudioToLlmConfigModelDefault = "openai/gpt-3.5-turbo";
+var preRecordedControllerGetPreRecordedJobsV2ResponseItemsItemRequestParamsAudioToLlmConfigModelDefault = "openai/gpt-5.4-nano";
 var preRecordedControllerGetPreRecordedJobsV2ResponseItemsItemRequestParamsLanguageConfigLanguagesDefault = [];
 var preRecordedControllerGetPreRecordedJobsV2ResponseItemsItemResultTranscriptionUtterancesItemChannelMin = 0;
 var preRecordedControllerGetPreRecordedJobsV2ResponseItemsItemResultTranscriptionUtterancesItemSpeakerMin = 0;
@@ -4917,12 +5240,12 @@ var preRecordedControllerGetPreRecordedJobsV2Response = import_zod5.z.object({
             "Forces the translation to use informal language forms when available in the target language."
           )
         }).optional().describe("**[Beta]** Translation configuration, if `translation` is enabled"),
-        summarization: import_zod5.z.boolean().optional().describe("**[Beta]** Enable summarization for this audio"),
+        summarization: import_zod5.z.boolean().optional().describe("Enable summarization for this audio"),
         summarization_config: import_zod5.z.object({
           type: import_zod5.z.enum(["general", "bullet_points", "concise"]).describe("The type of summarization to apply").default(
             preRecordedControllerGetPreRecordedJobsV2ResponseItemsItemRequestParamsSummarizationConfigTypeDefault
           ).describe("The type of summarization to apply")
-        }).optional().describe("**[Beta]** Summarization configuration, if `summarization` is enabled"),
+        }).optional().describe("Summarization configuration, if `summarization` is enabled"),
         named_entity_recognition: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable named entity recognition for this audio"),
         custom_spelling: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable custom spelling for this audio"),
         custom_spelling_config: import_zod5.z.object({
@@ -4931,7 +5254,7 @@ var preRecordedControllerGetPreRecordedJobsV2Response = import_zod5.z.object({
           "**[Alpha]** Custom spelling configuration, if `custom_spelling` is enabled"
         ),
         sentiment_analysis: import_zod5.z.boolean().optional().describe("Enable sentiment analysis for this audio"),
-        audio_to_llm: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable audio to llm processing for this audio"),
+        audio_to_llm: import_zod5.z.boolean().optional().describe("Enable audio to LLM processing for this audio"),
         audio_to_llm_config: import_zod5.z.object({
           prompts: import_zod5.z.array(import_zod5.z.array(import_zod5.z.unknown())).min(1).describe("The list of prompts applied on the audio transcription"),
           model: import_zod5.z.string().default(
@@ -4939,7 +5262,7 @@ var preRecordedControllerGetPreRecordedJobsV2Response = import_zod5.z.object({
           ).describe(
             "The model to use for the prompt execution. You can find the list of supported models [here](https://openrouter.ai/models)."
           )
-        }).optional().describe("**[Alpha]** Audio to llm configuration, if `audio_to_llm` is enabled"),
+        }).optional().describe("Audio to LLM configuration, if `audio_to_llm` is enabled"),
         pii_redaction: import_zod5.z.boolean().optional().describe("Enable PII redaction for this audio"),
         pii_redaction_config: import_zod5.z.object({
           entity_types: import_zod5.z.enum([
@@ -6064,7 +6387,7 @@ var preRecordedControllerGetPreRecordedJobV2ResponseRequestParamsTranslationConf
 var preRecordedControllerGetPreRecordedJobV2ResponseRequestParamsTranslationConfigLipsyncDefault = true;
 var preRecordedControllerGetPreRecordedJobV2ResponseRequestParamsTranslationConfigContextAdaptationDefault = true;
 var preRecordedControllerGetPreRecordedJobV2ResponseRequestParamsSummarizationConfigTypeDefault = "general";
-var preRecordedControllerGetPreRecordedJobV2ResponseRequestParamsAudioToLlmConfigModelDefault = "openai/gpt-3.5-turbo";
+var preRecordedControllerGetPreRecordedJobV2ResponseRequestParamsAudioToLlmConfigModelDefault = "openai/gpt-5.4-nano";
 var preRecordedControllerGetPreRecordedJobV2ResponseRequestParamsLanguageConfigLanguagesDefault = [];
 var preRecordedControllerGetPreRecordedJobV2ResponseResultTranscriptionUtterancesItemChannelMin = 0;
 var preRecordedControllerGetPreRecordedJobV2ResponseResultTranscriptionUtterancesItemSpeakerMin = 0;
@@ -6401,19 +6724,19 @@ var preRecordedControllerGetPreRecordedJobV2Response = import_zod5.z.object({
         "Forces the translation to use informal language forms when available in the target language."
       )
     }).optional().describe("**[Beta]** Translation configuration, if `translation` is enabled"),
-    summarization: import_zod5.z.boolean().optional().describe("**[Beta]** Enable summarization for this audio"),
+    summarization: import_zod5.z.boolean().optional().describe("Enable summarization for this audio"),
     summarization_config: import_zod5.z.object({
       type: import_zod5.z.enum(["general", "bullet_points", "concise"]).describe("The type of summarization to apply").default(
         preRecordedControllerGetPreRecordedJobV2ResponseRequestParamsSummarizationConfigTypeDefault
       ).describe("The type of summarization to apply")
-    }).optional().describe("**[Beta]** Summarization configuration, if `summarization` is enabled"),
+    }).optional().describe("Summarization configuration, if `summarization` is enabled"),
     named_entity_recognition: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable named entity recognition for this audio"),
     custom_spelling: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable custom spelling for this audio"),
     custom_spelling_config: import_zod5.z.object({
       spelling_dictionary: import_zod5.z.record(import_zod5.z.string(), import_zod5.z.array(import_zod5.z.string())).describe("The list of spelling applied on the audio transcription")
     }).optional().describe("**[Alpha]** Custom spelling configuration, if `custom_spelling` is enabled"),
     sentiment_analysis: import_zod5.z.boolean().optional().describe("Enable sentiment analysis for this audio"),
-    audio_to_llm: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable audio to llm processing for this audio"),
+    audio_to_llm: import_zod5.z.boolean().optional().describe("Enable audio to LLM processing for this audio"),
     audio_to_llm_config: import_zod5.z.object({
       prompts: import_zod5.z.array(import_zod5.z.array(import_zod5.z.unknown())).min(1).describe("The list of prompts applied on the audio transcription"),
       model: import_zod5.z.string().default(
@@ -6421,7 +6744,7 @@ var preRecordedControllerGetPreRecordedJobV2Response = import_zod5.z.object({
       ).describe(
         "The model to use for the prompt execution. You can find the list of supported models [here](https://openrouter.ai/models)."
       )
-    }).optional().describe("**[Alpha]** Audio to llm configuration, if `audio_to_llm` is enabled"),
+    }).optional().describe("Audio to LLM configuration, if `audio_to_llm` is enabled"),
     pii_redaction: import_zod5.z.boolean().optional().describe("Enable PII redaction for this audio"),
     pii_redaction_config: import_zod5.z.object({
       entity_types: import_zod5.z.enum([
@@ -7523,7 +7846,7 @@ var transcriptionControllerInitPreRecordedJobV2BodyTranslationConfigMatchOrigina
 var transcriptionControllerInitPreRecordedJobV2BodyTranslationConfigLipsyncDefault = true;
 var transcriptionControllerInitPreRecordedJobV2BodyTranslationConfigContextAdaptationDefault = true;
 var transcriptionControllerInitPreRecordedJobV2BodySummarizationConfigTypeDefault = "general";
-var transcriptionControllerInitPreRecordedJobV2BodyAudioToLlmConfigModelDefault = "openai/gpt-3.5-turbo";
+var transcriptionControllerInitPreRecordedJobV2BodyAudioToLlmConfigModelDefault = "openai/gpt-5.4-nano";
 var transcriptionControllerInitPreRecordedJobV2BodyLanguageConfigLanguagesDefault = [];
 var transcriptionControllerInitPreRecordedJobV2Body = import_zod5.z.object({
   custom_vocabulary: import_zod5.z.boolean().optional().describe(
@@ -7812,23 +8135,23 @@ var transcriptionControllerInitPreRecordedJobV2Body = import_zod5.z.object({
       "Forces the translation to use informal language forms when available in the target language."
     )
   }).optional().describe("**[Beta]** Translation configuration, if `translation` is enabled"),
-  summarization: import_zod5.z.boolean().optional().describe("**[Beta]** Enable summarization for this audio"),
+  summarization: import_zod5.z.boolean().optional().describe("Enable summarization for this audio"),
   summarization_config: import_zod5.z.object({
     type: import_zod5.z.enum(["general", "bullet_points", "concise"]).describe("The type of summarization to apply").default(transcriptionControllerInitPreRecordedJobV2BodySummarizationConfigTypeDefault).describe("The type of summarization to apply")
-  }).optional().describe("**[Beta]** Summarization configuration, if `summarization` is enabled"),
+  }).optional().describe("Summarization configuration, if `summarization` is enabled"),
   named_entity_recognition: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable named entity recognition for this audio"),
   custom_spelling: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable custom spelling for this audio"),
   custom_spelling_config: import_zod5.z.object({
     spelling_dictionary: import_zod5.z.record(import_zod5.z.string(), import_zod5.z.array(import_zod5.z.string())).describe("The list of spelling applied on the audio transcription")
   }).optional().describe("**[Alpha]** Custom spelling configuration, if `custom_spelling` is enabled"),
   sentiment_analysis: import_zod5.z.boolean().optional().describe("Enable sentiment analysis for this audio"),
-  audio_to_llm: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable audio to llm processing for this audio"),
+  audio_to_llm: import_zod5.z.boolean().optional().describe("Enable audio to LLM processing for this audio"),
   audio_to_llm_config: import_zod5.z.object({
     prompts: import_zod5.z.array(import_zod5.z.array(import_zod5.z.unknown())).min(1).describe("The list of prompts applied on the audio transcription"),
     model: import_zod5.z.string().default(transcriptionControllerInitPreRecordedJobV2BodyAudioToLlmConfigModelDefault).describe(
       "The model to use for the prompt execution. You can find the list of supported models [here](https://openrouter.ai/models)."
     )
-  }).optional().describe("**[Alpha]** Audio to llm configuration, if `audio_to_llm` is enabled"),
+  }).optional().describe("Audio to LLM configuration, if `audio_to_llm` is enabled"),
   pii_redaction: import_zod5.z.boolean().optional().describe("Enable PII redaction for this audio"),
   pii_redaction_config: import_zod5.z.object({
     entity_types: import_zod5.z.enum([
@@ -8073,7 +8396,7 @@ var transcriptionControllerListV2ResponseItemsItemRequestParamsTranslationConfig
 var transcriptionControllerListV2ResponseItemsItemRequestParamsTranslationConfigLipsyncDefault = true;
 var transcriptionControllerListV2ResponseItemsItemRequestParamsTranslationConfigContextAdaptationDefault = true;
 var transcriptionControllerListV2ResponseItemsItemRequestParamsSummarizationConfigTypeDefault = "general";
-var transcriptionControllerListV2ResponseItemsItemRequestParamsAudioToLlmConfigModelDefault = "openai/gpt-3.5-turbo";
+var transcriptionControllerListV2ResponseItemsItemRequestParamsAudioToLlmConfigModelDefault = "openai/gpt-5.4-nano";
 var transcriptionControllerListV2ResponseItemsItemRequestParamsLanguageConfigLanguagesDefault = [];
 var transcriptionControllerListV2ResponseItemsItemResultTranscriptionUtterancesItemChannelMin = 0;
 var transcriptionControllerListV2ResponseItemsItemResultTranscriptionUtterancesItemSpeakerMin = 0;
@@ -8462,12 +8785,12 @@ var transcriptionControllerListV2Response = import_zod5.z.object({
             "Forces the translation to use informal language forms when available in the target language."
           )
         }).optional().describe("**[Beta]** Translation configuration, if `translation` is enabled"),
-        summarization: import_zod5.z.boolean().optional().describe("**[Beta]** Enable summarization for this audio"),
+        summarization: import_zod5.z.boolean().optional().describe("Enable summarization for this audio"),
         summarization_config: import_zod5.z.object({
           type: import_zod5.z.enum(["general", "bullet_points", "concise"]).describe("The type of summarization to apply").default(
             transcriptionControllerListV2ResponseItemsItemRequestParamsSummarizationConfigTypeDefault
           ).describe("The type of summarization to apply")
-        }).optional().describe("**[Beta]** Summarization configuration, if `summarization` is enabled"),
+        }).optional().describe("Summarization configuration, if `summarization` is enabled"),
         named_entity_recognition: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable named entity recognition for this audio"),
         custom_spelling: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable custom spelling for this audio"),
         custom_spelling_config: import_zod5.z.object({
@@ -8476,7 +8799,7 @@ var transcriptionControllerListV2Response = import_zod5.z.object({
           "**[Alpha]** Custom spelling configuration, if `custom_spelling` is enabled"
         ),
         sentiment_analysis: import_zod5.z.boolean().optional().describe("Enable sentiment analysis for this audio"),
-        audio_to_llm: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable audio to llm processing for this audio"),
+        audio_to_llm: import_zod5.z.boolean().optional().describe("Enable audio to LLM processing for this audio"),
         audio_to_llm_config: import_zod5.z.object({
           prompts: import_zod5.z.array(import_zod5.z.array(import_zod5.z.unknown())).min(1).describe("The list of prompts applied on the audio transcription"),
           model: import_zod5.z.string().default(
@@ -8484,7 +8807,7 @@ var transcriptionControllerListV2Response = import_zod5.z.object({
           ).describe(
             "The model to use for the prompt execution. You can find the list of supported models [here](https://openrouter.ai/models)."
           )
-        }).optional().describe("**[Alpha]** Audio to llm configuration, if `audio_to_llm` is enabled"),
+        }).optional().describe("Audio to LLM configuration, if `audio_to_llm` is enabled"),
         pii_redaction: import_zod5.z.boolean().optional().describe("Enable PII redaction for this audio"),
         pii_redaction_config: import_zod5.z.object({
           entity_types: import_zod5.z.enum([
@@ -10790,7 +11113,7 @@ var transcriptionControllerGetTranscriptV2ResponseRequestParamsTranslationConfig
 var transcriptionControllerGetTranscriptV2ResponseRequestParamsTranslationConfigLipsyncDefault = true;
 var transcriptionControllerGetTranscriptV2ResponseRequestParamsTranslationConfigContextAdaptationDefault = true;
 var transcriptionControllerGetTranscriptV2ResponseRequestParamsSummarizationConfigTypeDefault = "general";
-var transcriptionControllerGetTranscriptV2ResponseRequestParamsAudioToLlmConfigModelDefault = "openai/gpt-3.5-turbo";
+var transcriptionControllerGetTranscriptV2ResponseRequestParamsAudioToLlmConfigModelDefault = "openai/gpt-5.4-nano";
 var transcriptionControllerGetTranscriptV2ResponseRequestParamsLanguageConfigLanguagesDefault = [];
 var transcriptionControllerGetTranscriptV2ResponseResultTranscriptionUtterancesItemChannelMin = 0;
 var transcriptionControllerGetTranscriptV2ResponseResultTranscriptionUtterancesItemSpeakerMin = 0;
@@ -11173,19 +11496,19 @@ var transcriptionControllerGetTranscriptV2Response = import_zod5.z.discriminated
           "Forces the translation to use informal language forms when available in the target language."
         )
       }).optional().describe("**[Beta]** Translation configuration, if `translation` is enabled"),
-      summarization: import_zod5.z.boolean().optional().describe("**[Beta]** Enable summarization for this audio"),
+      summarization: import_zod5.z.boolean().optional().describe("Enable summarization for this audio"),
       summarization_config: import_zod5.z.object({
         type: import_zod5.z.enum(["general", "bullet_points", "concise"]).describe("The type of summarization to apply").default(
           transcriptionControllerGetTranscriptV2ResponseRequestParamsSummarizationConfigTypeDefault
         ).describe("The type of summarization to apply")
-      }).optional().describe("**[Beta]** Summarization configuration, if `summarization` is enabled"),
+      }).optional().describe("Summarization configuration, if `summarization` is enabled"),
       named_entity_recognition: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable named entity recognition for this audio"),
       custom_spelling: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable custom spelling for this audio"),
       custom_spelling_config: import_zod5.z.object({
         spelling_dictionary: import_zod5.z.record(import_zod5.z.string(), import_zod5.z.array(import_zod5.z.string())).describe("The list of spelling applied on the audio transcription")
       }).optional().describe("**[Alpha]** Custom spelling configuration, if `custom_spelling` is enabled"),
       sentiment_analysis: import_zod5.z.boolean().optional().describe("Enable sentiment analysis for this audio"),
-      audio_to_llm: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable audio to llm processing for this audio"),
+      audio_to_llm: import_zod5.z.boolean().optional().describe("Enable audio to LLM processing for this audio"),
       audio_to_llm_config: import_zod5.z.object({
         prompts: import_zod5.z.array(import_zod5.z.array(import_zod5.z.unknown())).min(1).describe("The list of prompts applied on the audio transcription"),
         model: import_zod5.z.string().default(
@@ -11193,7 +11516,7 @@ var transcriptionControllerGetTranscriptV2Response = import_zod5.z.discriminated
         ).describe(
           "The model to use for the prompt execution. You can find the list of supported models [here](https://openrouter.ai/models)."
         )
-      }).optional().describe("**[Alpha]** Audio to llm configuration, if `audio_to_llm` is enabled"),
+      }).optional().describe("Audio to LLM configuration, if `audio_to_llm` is enabled"),
       pii_redaction: import_zod5.z.boolean().optional().describe("Enable PII redaction for this audio"),
       pii_redaction_config: import_zod5.z.object({
         entity_types: import_zod5.z.enum([
@@ -13884,7 +14207,7 @@ var historyControllerGetListV1ResponseItemsItemRequestParamsTranslationConfigMat
 var historyControllerGetListV1ResponseItemsItemRequestParamsTranslationConfigLipsyncDefault = true;
 var historyControllerGetListV1ResponseItemsItemRequestParamsTranslationConfigContextAdaptationDefault = true;
 var historyControllerGetListV1ResponseItemsItemRequestParamsSummarizationConfigTypeDefault = "general";
-var historyControllerGetListV1ResponseItemsItemRequestParamsAudioToLlmConfigModelDefault = "openai/gpt-3.5-turbo";
+var historyControllerGetListV1ResponseItemsItemRequestParamsAudioToLlmConfigModelDefault = "openai/gpt-5.4-nano";
 var historyControllerGetListV1ResponseItemsItemRequestParamsLanguageConfigLanguagesDefault = [];
 var historyControllerGetListV1ResponseItemsItemResultTranscriptionUtterancesItemChannelMin = 0;
 var historyControllerGetListV1ResponseItemsItemResultTranscriptionUtterancesItemSpeakerMin = 0;
@@ -14273,12 +14596,12 @@ var historyControllerGetListV1Response = import_zod5.z.object({
             "Forces the translation to use informal language forms when available in the target language."
           )
         }).optional().describe("**[Beta]** Translation configuration, if `translation` is enabled"),
-        summarization: import_zod5.z.boolean().optional().describe("**[Beta]** Enable summarization for this audio"),
+        summarization: import_zod5.z.boolean().optional().describe("Enable summarization for this audio"),
         summarization_config: import_zod5.z.object({
           type: import_zod5.z.enum(["general", "bullet_points", "concise"]).describe("The type of summarization to apply").default(
             historyControllerGetListV1ResponseItemsItemRequestParamsSummarizationConfigTypeDefault
           ).describe("The type of summarization to apply")
-        }).optional().describe("**[Beta]** Summarization configuration, if `summarization` is enabled"),
+        }).optional().describe("Summarization configuration, if `summarization` is enabled"),
         named_entity_recognition: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable named entity recognition for this audio"),
         custom_spelling: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable custom spelling for this audio"),
         custom_spelling_config: import_zod5.z.object({
@@ -14287,7 +14610,7 @@ var historyControllerGetListV1Response = import_zod5.z.object({
           "**[Alpha]** Custom spelling configuration, if `custom_spelling` is enabled"
         ),
         sentiment_analysis: import_zod5.z.boolean().optional().describe("Enable sentiment analysis for this audio"),
-        audio_to_llm: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable audio to llm processing for this audio"),
+        audio_to_llm: import_zod5.z.boolean().optional().describe("Enable audio to LLM processing for this audio"),
         audio_to_llm_config: import_zod5.z.object({
           prompts: import_zod5.z.array(import_zod5.z.array(import_zod5.z.unknown())).min(1).describe("The list of prompts applied on the audio transcription"),
           model: import_zod5.z.string().default(
@@ -14295,7 +14618,7 @@ var historyControllerGetListV1Response = import_zod5.z.object({
           ).describe(
             "The model to use for the prompt execution. You can find the list of supported models [here](https://openrouter.ai/models)."
           )
-        }).optional().describe("**[Alpha]** Audio to llm configuration, if `audio_to_llm` is enabled"),
+        }).optional().describe("Audio to LLM configuration, if `audio_to_llm` is enabled"),
         pii_redaction: import_zod5.z.boolean().optional().describe("Enable PII redaction for this audio"),
         pii_redaction_config: import_zod5.z.object({
           entity_types: import_zod5.z.enum([
@@ -19748,6 +20071,7 @@ var createRealtimeClientSecretBodySessionAudioOutputSpeedDefault = 1;
 var createRealtimeClientSecretBodySessionAudioOutputSpeedMin = 0.25;
 var createRealtimeClientSecretBodySessionAudioOutputSpeedMax = 1.5;
 var createRealtimeClientSecretBodySessionToolChoiceDefault = "auto";
+var createRealtimeClientSecretBodySessionReasoningEffortDefault = "low";
 var createRealtimeClientSecretBodySessionTruncationRetentionRatioMin = 0;
 var createRealtimeClientSecretBodySessionTruncationRetentionRatioMax = 1;
 var createRealtimeClientSecretBodySessionTruncationTokenLimitsPostInstructionsMin = 0;
@@ -19778,6 +20102,7 @@ var createRealtimeClientSecretBody = import_zod6.z.object({
       import_zod6.z.enum([
         "gpt-realtime",
         "gpt-realtime-1.5",
+        "gpt-realtime-2",
         "gpt-realtime-2025-08-28",
         "gpt-4o-realtime-preview",
         "gpt-4o-realtime-preview-2024-10-01",
@@ -19818,16 +20143,20 @@ var createRealtimeClientSecretBody = import_zod6.z.object({
               "gpt-4o-mini-transcribe",
               "gpt-4o-mini-transcribe-2025-12-15",
               "gpt-4o-transcribe",
-              "gpt-4o-transcribe-diarize"
+              "gpt-4o-transcribe-diarize",
+              "gpt-realtime-whisper"
             ])
           ).optional().describe(
-            "The model to use for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`, and `gpt-4o-transcribe-diarize`. Use `gpt-4o-transcribe-diarize` when you need diarization with speaker labels.\n"
+            "The model to use for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`, `gpt-4o-transcribe-diarize`, and `gpt-realtime-whisper`. Use `gpt-4o-transcribe-diarize` when you need diarization with speaker labels.\n"
           ),
           language: import_zod6.z.string().optional().describe(
             "The language of the input audio. Supplying the input language in\n[ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`) format\nwill improve accuracy and latency.\n"
           ),
           prompt: import_zod6.z.string().optional().describe(
-            'An optional text to guide the model\'s style or continue a previous audio\nsegment.\nFor `whisper-1`, the [prompt is a list of keywords](/docs/guides/speech-to-text#prompting).\nFor `gpt-4o-transcribe` models (excluding `gpt-4o-transcribe-diarize`), the prompt is a free text string, for example "expect words related to technology".\n'
+            'An optional text to guide the model\'s style or continue a previous audio\nsegment.\nFor `whisper-1`, the [prompt is a list of keywords](/docs/guides/speech-to-text#prompting).\nFor `gpt-4o-transcribe` models (excluding `gpt-4o-transcribe-diarize`), the prompt is a free text string, for example "expect words related to technology".\nPrompt is not supported with `gpt-realtime-whisper` in GA Realtime sessions.\n'
+          ),
+          delay: import_zod6.z.enum(["minimal", "low", "medium", "high", "xhigh"]).optional().describe(
+            "Controls how long the model waits before emitting transcription text.\nHigher values can improve transcription accuracy at the cost of latency.\nOnly supported with `gpt-realtime-whisper` in GA Realtime sessions.\n"
           )
         }).optional(),
         noise_reduction: import_zod6.z.object({
@@ -19894,7 +20223,7 @@ var createRealtimeClientSecretBody = import_zod6.z.object({
             "Server-side semantic turn detection which uses a model to determine when the user has finished speaking."
           )
         ]).describe(
-          'Configuration for turn detection, ether Server VAD or Semantic VAD. This can be set to `null` to turn off, in which case the client must manually trigger model response.\n\nServer VAD means that the model will detect the start and end of speech based on audio volume and respond at the end of user speech.\n\nSemantic VAD is more advanced and uses a turn detection model (in conjunction with VAD) to semantically estimate whether the user has finished speaking, then dynamically sets a timeout based on this probability. For example, if user audio trails off with "uhhm", the model will score a low probability of turn end and wait longer for the user to continue speaking. This can be useful for more natural conversations, but may have a higher latency.\n'
+          'Configuration for turn detection, ether Server VAD or Semantic VAD. This can be set to `null` to turn off, in which case the client must manually trigger model response.\n\nServer VAD means that the model will detect the start and end of speech based on audio volume and respond at the end of user speech.\n\nSemantic VAD is more advanced and uses a turn detection model (in conjunction with VAD) to semantically estimate whether the user has finished speaking, then dynamically sets a timeout based on this probability. For example, if user audio trails off with "uhhm", the model will score a low probability of turn end and wait longer for the user to continue speaking. This can be useful for more natural conversations, but may have a higher latency.\n\nFor `gpt-realtime-whisper` transcription sessions, turn detection must be\nset to `null`; VAD is not supported.\n'
         ).or(import_zod6.z.null()).optional()
       }).optional(),
       output: import_zod6.z.object({
@@ -19967,7 +20296,7 @@ var createRealtimeClientSecretBody = import_zod6.z.object({
           server_label: import_zod6.z.string().describe(
             "A label for this MCP server, used to identify it in tool calls.\n"
           ),
-          server_url: import_zod6.z.string().optional().describe(
+          server_url: import_zod6.z.string().url().optional().describe(
             "The URL for the MCP server. One of `server_url` or `connector_id` must be\nprovided.\n"
           ),
           connector_id: import_zod6.z.enum([
@@ -20045,6 +20374,16 @@ var createRealtimeClientSecretBody = import_zod6.z.object({
     ).default(createRealtimeClientSecretBodySessionToolChoiceDefault).describe(
       "How the model chooses tools. Provide one of the string modes or force a specific\nfunction/MCP tool.\n"
     ),
+    parallel_tool_calls: import_zod6.z.boolean().optional().describe(
+      "Whether the model may call multiple tools in parallel. Only supported by\nreasoning Realtime models such as `gpt-realtime-2`.\n"
+    ),
+    reasoning: import_zod6.z.object({
+      effort: import_zod6.z.enum(["minimal", "low", "medium", "high", "xhigh"]).default(createRealtimeClientSecretBodySessionReasoningEffortDefault).describe(
+        "Constrains effort on reasoning for reasoning-capable Realtime models such as\n`gpt-realtime-2`.\n"
+      )
+    }).optional().describe(
+      "Configuration for reasoning-capable Realtime models such as `gpt-realtime-2`.\n"
+    ),
     max_output_tokens: import_zod6.z.number().or(import_zod6.z.enum(["inf"])).optional().describe(
       "Maximum number of output tokens for a single assistant response,\ninclusive of tool calls. Provide an integer between 1 and 4096 to\nlimit output tokens, or `inf` for the maximum available tokens for a\ngiven model. Defaults to `inf`.\n"
     ),
@@ -20084,7 +20423,7 @@ var createRealtimeClientSecretBody = import_zod6.z.object({
         ).or(
           import_zod6.z.object({
             type: import_zod6.z.enum(["input_image"]).describe("The type of the input item. Always `input_image`."),
-            image_url: import_zod6.z.string().describe(
+            image_url: import_zod6.z.string().url().describe(
               "The URL of the image to be sent to the model. A fully qualified URL or base64 encoded image in a data URL."
             ).or(import_zod6.z.null()).optional(),
             file_id: import_zod6.z.string().describe("The ID of the file to be sent to the model.").or(import_zod6.z.null()).optional(),
@@ -20098,7 +20437,7 @@ var createRealtimeClientSecretBody = import_zod6.z.object({
             file_id: import_zod6.z.string().describe("The ID of the file to be sent to the model.").or(import_zod6.z.null()).optional(),
             filename: import_zod6.z.string().optional().describe("The name of the file to be sent to the model."),
             file_data: import_zod6.z.string().optional().describe("The content of the file to be sent to the model.\n"),
-            file_url: import_zod6.z.string().optional().describe("The URL of the file to be sent to the model."),
+            file_url: import_zod6.z.string().url().optional().describe("The URL of the file to be sent to the model."),
             detail: import_zod6.z.enum(["low", "high"]).optional()
           }).describe("A file input to the model.")
         )
@@ -20134,16 +20473,20 @@ var createRealtimeClientSecretBody = import_zod6.z.object({
                 "gpt-4o-mini-transcribe",
                 "gpt-4o-mini-transcribe-2025-12-15",
                 "gpt-4o-transcribe",
-                "gpt-4o-transcribe-diarize"
+                "gpt-4o-transcribe-diarize",
+                "gpt-realtime-whisper"
               ])
             ).optional().describe(
-              "The model to use for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`, and `gpt-4o-transcribe-diarize`. Use `gpt-4o-transcribe-diarize` when you need diarization with speaker labels.\n"
+              "The model to use for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`, `gpt-4o-transcribe-diarize`, and `gpt-realtime-whisper`. Use `gpt-4o-transcribe-diarize` when you need diarization with speaker labels.\n"
             ),
             language: import_zod6.z.string().optional().describe(
               "The language of the input audio. Supplying the input language in\n[ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`) format\nwill improve accuracy and latency.\n"
             ),
             prompt: import_zod6.z.string().optional().describe(
-              'An optional text to guide the model\'s style or continue a previous audio\nsegment.\nFor `whisper-1`, the [prompt is a list of keywords](/docs/guides/speech-to-text#prompting).\nFor `gpt-4o-transcribe` models (excluding `gpt-4o-transcribe-diarize`), the prompt is a free text string, for example "expect words related to technology".\n'
+              'An optional text to guide the model\'s style or continue a previous audio\nsegment.\nFor `whisper-1`, the [prompt is a list of keywords](/docs/guides/speech-to-text#prompting).\nFor `gpt-4o-transcribe` models (excluding `gpt-4o-transcribe-diarize`), the prompt is a free text string, for example "expect words related to technology".\nPrompt is not supported with `gpt-realtime-whisper` in GA Realtime sessions.\n'
+            ),
+            delay: import_zod6.z.enum(["minimal", "low", "medium", "high", "xhigh"]).optional().describe(
+              "Controls how long the model waits before emitting transcription text.\nHigher values can improve transcription accuracy at the cost of latency.\nOnly supported with `gpt-realtime-whisper` in GA Realtime sessions.\n"
             )
           }).optional(),
           noise_reduction: import_zod6.z.object({
@@ -20210,7 +20553,7 @@ var createRealtimeClientSecretBody = import_zod6.z.object({
               "Server-side semantic turn detection which uses a model to determine when the user has finished speaking."
             )
           ]).describe(
-            'Configuration for turn detection, ether Server VAD or Semantic VAD. This can be set to `null` to turn off, in which case the client must manually trigger model response.\n\nServer VAD means that the model will detect the start and end of speech based on audio volume and respond at the end of user speech.\n\nSemantic VAD is more advanced and uses a turn detection model (in conjunction with VAD) to semantically estimate whether the user has finished speaking, then dynamically sets a timeout based on this probability. For example, if user audio trails off with "uhhm", the model will score a low probability of turn end and wait longer for the user to continue speaking. This can be useful for more natural conversations, but may have a higher latency.\n'
+            'Configuration for turn detection, ether Server VAD or Semantic VAD. This can be set to `null` to turn off, in which case the client must manually trigger model response.\n\nServer VAD means that the model will detect the start and end of speech based on audio volume and respond at the end of user speech.\n\nSemantic VAD is more advanced and uses a turn detection model (in conjunction with VAD) to semantically estimate whether the user has finished speaking, then dynamically sets a timeout based on this probability. For example, if user audio trails off with "uhhm", the model will score a low probability of turn end and wait longer for the user to continue speaking. This can be useful for more natural conversations, but may have a higher latency.\n\nFor `gpt-realtime-whisper` transcription sessions, turn detection must be\nset to `null`; VAD is not supported.\n'
           ).or(import_zod6.z.null()).optional()
         }).optional()
       }).optional().describe("Configuration for input and output audio.\n"),
@@ -20236,23 +20579,21 @@ var createRealtimeClientSecretResponseSessionAudioOutputSpeedDefault = 1;
 var createRealtimeClientSecretResponseSessionAudioOutputSpeedMin = 0.25;
 var createRealtimeClientSecretResponseSessionAudioOutputSpeedMax = 1.5;
 var createRealtimeClientSecretResponseSessionToolChoiceDefault = "auto";
+var createRealtimeClientSecretResponseSessionReasoningEffortDefault = "low";
 var createRealtimeClientSecretResponseSessionTruncationRetentionRatioMin = 0;
 var createRealtimeClientSecretResponseSessionTruncationRetentionRatioMax = 1;
 var createRealtimeClientSecretResponseSessionTruncationTokenLimitsPostInstructionsMin = 0;
 var createRealtimeClientSecretResponse = import_zod6.z.object({
   value: import_zod6.z.string().describe("The generated client secret value."),
   expires_at: import_zod6.z.number().describe("Expiration timestamp for the client secret, in seconds since epoch."),
-  session: import_zod6.z.discriminatedUnion("type", [
+  session: import_zod6.z.union([
     import_zod6.z.object({
-      client_secret: import_zod6.z.object({
-        value: import_zod6.z.string().describe(
-          "Ephemeral key usable in client environments to authenticate connections to the Realtime API. Use this in client-side environments rather than a standard API token, which should only be used server-side.\n"
-        ),
-        expires_at: import_zod6.z.number().describe(
-          "Timestamp for when the token expires. Currently, all tokens expire\nafter one minute.\n"
-        )
-      }).describe("Ephemeral key returned by the API."),
       type: import_zod6.z.enum(["realtime"]).describe("The type of session to create. Always `realtime` for the Realtime API.\n"),
+      id: import_zod6.z.string().describe(
+        "Unique identifier for the session that looks like `sess_1234567890abcdef`.\n"
+      ),
+      object: import_zod6.z.enum(["realtime.session"]).describe("The object type. Always `realtime.session`."),
+      expires_at: import_zod6.z.number().optional().describe("Expiration timestamp for the session, in seconds since epoch."),
       output_modalities: import_zod6.z.array(import_zod6.z.enum(["text", "audio"])).default(createRealtimeClientSecretResponseSessionOutputModalitiesDefault).describe(
         'The set of modalities the model can respond with. It defaults to `["audio"]`, indicating\nthat the model will respond with audio plus a transcript. `["text"]` can be used to make\nthe model respond with text only. It is not possible to request both `text` and `audio` at the same time.\n'
       ),
@@ -20260,6 +20601,7 @@ var createRealtimeClientSecretResponse = import_zod6.z.object({
         import_zod6.z.enum([
           "gpt-realtime",
           "gpt-realtime-1.5",
+          "gpt-realtime-2",
           "gpt-realtime-2025-08-28",
           "gpt-4o-realtime-preview",
           "gpt-4o-realtime-preview-2024-10-01",
@@ -20282,15 +20624,15 @@ var createRealtimeClientSecretResponse = import_zod6.z.object({
       audio: import_zod6.z.object({
         input: import_zod6.z.object({
           format: import_zod6.z.object({
-            type: import_zod6.z.enum(["audio/pcm"]).describe("The audio format. Always `audio/pcm`."),
-            rate: import_zod6.z.literal(24e3).describe("The sample rate of the audio. Always `24000`.")
+            type: import_zod6.z.enum(["audio/pcm"]).optional().describe("The audio format. Always `audio/pcm`."),
+            rate: import_zod6.z.literal(24e3).optional().describe("The sample rate of the audio. Always `24000`.")
           }).describe("The PCM audio format. Only a 24kHz sample rate is supported.").or(
             import_zod6.z.object({
-              type: import_zod6.z.enum(["audio/pcmu"]).describe("The audio format. Always `audio/pcmu`.")
+              type: import_zod6.z.enum(["audio/pcmu"]).optional().describe("The audio format. Always `audio/pcmu`.")
             }).describe("The G.711 \u03BC-law format.")
           ).or(
             import_zod6.z.object({
-              type: import_zod6.z.enum(["audio/pcma"]).describe("The audio format. Always `audio/pcma`.")
+              type: import_zod6.z.enum(["audio/pcma"]).optional().describe("The audio format. Always `audio/pcma`.")
             }).describe("The G.711 A-law format.")
           ).optional(),
           transcription: import_zod6.z.object({
@@ -20300,20 +20642,19 @@ var createRealtimeClientSecretResponse = import_zod6.z.object({
                 "gpt-4o-mini-transcribe",
                 "gpt-4o-mini-transcribe-2025-12-15",
                 "gpt-4o-transcribe",
-                "gpt-4o-transcribe-diarize"
+                "gpt-4o-transcribe-diarize",
+                "gpt-realtime-whisper"
               ])
             ).optional().describe(
-              "The model to use for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`, and `gpt-4o-transcribe-diarize`. Use `gpt-4o-transcribe-diarize` when you need diarization with speaker labels.\n"
-            ),
-            language: import_zod6.z.string().optional().describe(
-              "The language of the input audio. Supplying the input language in\n[ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`) format\nwill improve accuracy and latency.\n"
+              "The model used for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`, `gpt-4o-transcribe-diarize`, and `gpt-realtime-whisper`.\n"
             ),
+            language: import_zod6.z.string().optional().describe("The language of the input audio.\n"),
             prompt: import_zod6.z.string().optional().describe(
-              'An optional text to guide the model\'s style or continue a previous audio\nsegment.\nFor `whisper-1`, the [prompt is a list of keywords](/docs/guides/speech-to-text#prompting).\nFor `gpt-4o-transcribe` models (excluding `gpt-4o-transcribe-diarize`), the prompt is a free text string, for example "expect words related to technology".\n'
+              "The prompt configured for input audio transcription, when present.\n"
             )
           }).optional(),
           noise_reduction: import_zod6.z.object({
-            type: import_zod6.z.enum(["near_field", "far_field"]).describe(
+            type: import_zod6.z.enum(["near_field", "far_field"]).optional().describe(
               "Type of noise reduction. `near_field` is for close-talking microphones such as headphones, `far_field` is for far-field microphones such as laptop or conference room microphones.\n"
             )
           }).optional().describe(
@@ -20376,20 +20717,20 @@ var createRealtimeClientSecretResponse = import_zod6.z.object({
               "Server-side semantic turn detection which uses a model to determine when the user has finished speaking."
             )
           ]).describe(
-            'Configuration for turn detection, ether Server VAD or Semantic VAD. This can be set to `null` to turn off, in which case the client must manually trigger model response.\n\nServer VAD means that the model will detect the start and end of speech based on audio volume and respond at the end of user speech.\n\nSemantic VAD is more advanced and uses a turn detection model (in conjunction with VAD) to semantically estimate whether the user has finished speaking, then dynamically sets a timeout based on this probability. For example, if user audio trails off with "uhhm", the model will score a low probability of turn end and wait longer for the user to continue speaking. This can be useful for more natural conversations, but may have a higher latency.\n'
+            'Configuration for turn detection, ether Server VAD or Semantic VAD. This can be set to `null` to turn off, in which case the client must manually trigger model response.\n\nServer VAD means that the model will detect the start and end of speech based on audio volume and respond at the end of user speech.\n\nSemantic VAD is more advanced and uses a turn detection model (in conjunction with VAD) to semantically estimate whether the user has finished speaking, then dynamically sets a timeout based on this probability. For example, if user audio trails off with "uhhm", the model will score a low probability of turn end and wait longer for the user to continue speaking. This can be useful for more natural conversations, but may have a higher latency.\n\nFor `gpt-realtime-whisper` transcription sessions, turn detection must be\nset to `null`; VAD is not supported.\n'
           ).or(import_zod6.z.null()).optional()
         }).optional(),
         output: import_zod6.z.object({
           format: import_zod6.z.object({
-            type: import_zod6.z.enum(["audio/pcm"]).describe("The audio format. Always `audio/pcm`."),
-            rate: import_zod6.z.literal(24e3).describe("The sample rate of the audio. Always `24000`.")
+            type: import_zod6.z.enum(["audio/pcm"]).optional().describe("The audio format. Always `audio/pcm`."),
+            rate: import_zod6.z.literal(24e3).optional().describe("The sample rate of the audio. Always `24000`.")
           }).describe("The PCM audio format. Only a 24kHz sample rate is supported.").or(
             import_zod6.z.object({
-              type: import_zod6.z.enum(["audio/pcmu"]).describe("The audio format. Always `audio/pcmu`.")
+              type: import_zod6.z.enum(["audio/pcmu"]).optional().describe("The audio format. Always `audio/pcmu`.")
             }).describe("The G.711 \u03BC-law format.")
           ).or(
             import_zod6.z.object({
-              type: import_zod6.z.enum(["audio/pcma"]).describe("The audio format. Always `audio/pcma`.")
+              type: import_zod6.z.enum(["audio/pcma"]).optional().describe("The audio format. Always `audio/pcma`.")
             }).describe("The G.711 A-law format.")
           ).optional(),
           voice: import_zod6.z.string().or(
@@ -20433,7 +20774,7 @@ var createRealtimeClientSecretResponse = import_zod6.z.object({
       ).or(import_zod6.z.null()).optional(),
       tools: import_zod6.z.array(
         import_zod6.z.object({
-          type: import_zod6.z.enum(["function"]).describe("The type of the tool, i.e. `function`."),
+          type: import_zod6.z.enum(["function"]).optional().describe("The type of the tool, i.e. `function`."),
           name: import_zod6.z.string().optional().describe("The name of the function."),
           description: import_zod6.z.string().optional().describe(
             "The description of the function, including guidance on when and how\nto call it, and guidance about what to tell the user when calling\n(if anything).\n"
@@ -20445,7 +20786,7 @@ var createRealtimeClientSecretResponse = import_zod6.z.object({
             server_label: import_zod6.z.string().describe(
               "A label for this MCP server, used to identify it in tool calls.\n"
             ),
-            server_url: import_zod6.z.string().optional().describe(
+            server_url: import_zod6.z.string().url().optional().describe(
               "The URL for the MCP server. One of `server_url` or `connector_id` must be\nprovided.\n"
             ),
             connector_id: import_zod6.z.enum([
@@ -20457,7 +20798,7 @@ var createRealtimeClientSecretResponse = import_zod6.z.object({
               "connector_outlookcalendar",
               "connector_outlookemail",
               "connector_sharepoint"
-            ]).describe(
+            ]).optional().describe(
               "Identifier for service connectors, like those available in ChatGPT. One of\n`server_url` or `connector_id` must be provided. Learn more about service\nconnectors [here](/docs/guides/tools-remote-mcp#connectors).\n\nCurrently supported `connector_id` values are:\n\n- Dropbox: `connector_dropbox`\n- Gmail: `connector_gmail`\n- Google Calendar: `connector_googlecalendar`\n- Google Drive: `connector_googledrive`\n- Microsoft Teams: `connector_microsoftteams`\n- Outlook Calendar: `connector_outlookcalendar`\n- Outlook Email: `connector_outlookemail`\n- SharePoint: `connector_sharepoint`\n"
             ),
             authorization: import_zod6.z.string().optional().describe(
@@ -20523,6 +20864,13 @@ var createRealtimeClientSecretResponse = import_zod6.z.object({
       ).default(createRealtimeClientSecretResponseSessionToolChoiceDefault).describe(
         "How the model chooses tools. Provide one of the string modes or force a specific\nfunction/MCP tool.\n"
       ),
+      reasoning: import_zod6.z.object({
+        effort: import_zod6.z.enum(["minimal", "low", "medium", "high", "xhigh"]).default(createRealtimeClientSecretResponseSessionReasoningEffortDefault).describe(
+          "Constrains effort on reasoning for reasoning-capable Realtime models such as\n`gpt-realtime-2`.\n"
+        )
+      }).optional().describe(
+        "Configuration for reasoning-capable Realtime models such as `gpt-realtime-2`.\n"
+      ),
       max_output_tokens: import_zod6.z.number().or(import_zod6.z.enum(["inf"])).optional().describe(
         "Maximum number of output tokens for a single assistant response,\ninclusive of tool calls. Provide an integer between 1 and 4096 to\nlimit output tokens, or `inf` for the maximum available tokens for a\ngiven model. Defaults to `inf`.\n"
       ),
@@ -20562,7 +20910,7 @@ var createRealtimeClientSecretResponse = import_zod6.z.object({
           ).or(
             import_zod6.z.object({
               type: import_zod6.z.enum(["input_image"]).describe("The type of the input item. Always `input_image`."),
-              image_url: import_zod6.z.string().describe(
+              image_url: import_zod6.z.string().url().describe(
                 "The URL of the image to be sent to the model. A fully qualified URL or base64 encoded image in a data URL."
               ).or(import_zod6.z.null()).optional(),
               file_id: import_zod6.z.string().describe("The ID of the file to be sent to the model.").or(import_zod6.z.null()).optional(),
@@ -20576,8 +20924,8 @@ var createRealtimeClientSecretResponse = import_zod6.z.object({
               file_id: import_zod6.z.string().describe("The ID of the file to be sent to the model.").or(import_zod6.z.null()).optional(),
               filename: import_zod6.z.string().optional().describe("The name of the file to be sent to the model."),
               file_data: import_zod6.z.string().optional().describe("The content of the file to be sent to the model.\n"),
-              file_url: import_zod6.z.string().optional().describe("The URL of the file to be sent to the model."),
-              detail: import_zod6.z.enum(["low", "high"])
+              file_url: import_zod6.z.string().url().optional().describe("The URL of the file to be sent to the model."),
+              detail: import_zod6.z.enum(["low", "high"]).optional()
             }).describe("A file input to the model.")
           )
         ).describe(
@@ -20586,9 +20934,7 @@ var createRealtimeClientSecretResponse = import_zod6.z.object({
       }).describe(
         "Reference to a prompt template and its variables.\n[Learn more](/docs/guides/text?api-mode=responses#reusable-prompts).\n"
       ).or(import_zod6.z.null()).optional()
-    }).describe(
-      "A new Realtime session configuration, with an ephemeral key. Default TTL\nfor keys is one minute.\n"
-    ),
+    }).describe("A Realtime session configuration object.\n"),
     import_zod6.z.object({
       type: import_zod6.z.enum(["transcription"]).describe(
         "The type of session. Always `transcription` for transcription sessions.\n"
@@ -20604,15 +20950,15 @@ var createRealtimeClientSecretResponse = import_zod6.z.object({
       audio: import_zod6.z.object({
         input: import_zod6.z.object({
           format: import_zod6.z.object({
-            type: import_zod6.z.enum(["audio/pcm"]).describe("The audio format. Always `audio/pcm`."),
-            rate: import_zod6.z.literal(24e3).describe("The sample rate of the audio. Always `24000`.")
+            type: import_zod6.z.enum(["audio/pcm"]).optional().describe("The audio format. Always `audio/pcm`."),
+            rate: import_zod6.z.literal(24e3).optional().describe("The sample rate of the audio. Always `24000`.")
           }).describe("The PCM audio format. Only a 24kHz sample rate is supported.").or(
             import_zod6.z.object({
-              type: import_zod6.z.enum(["audio/pcmu"]).describe("The audio format. Always `audio/pcmu`.")
+              type: import_zod6.z.enum(["audio/pcmu"]).optional().describe("The audio format. Always `audio/pcmu`.")
             }).describe("The G.711 \u03BC-law format.")
           ).or(
             import_zod6.z.object({
-              type: import_zod6.z.enum(["audio/pcma"]).describe("The audio format. Always `audio/pcma`.")
+              type: import_zod6.z.enum(["audio/pcma"]).optional().describe("The audio format. Always `audio/pcma`.")
             }).describe("The G.711 A-law format.")
           ).optional(),
           transcription: import_zod6.z.object({
@@ -20622,20 +20968,19 @@ var createRealtimeClientSecretResponse = import_zod6.z.object({
                 "gpt-4o-mini-transcribe",
                 "gpt-4o-mini-transcribe-2025-12-15",
                 "gpt-4o-transcribe",
-                "gpt-4o-transcribe-diarize"
+                "gpt-4o-transcribe-diarize",
+                "gpt-realtime-whisper"
               ])
             ).optional().describe(
-              "The model to use for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`, and `gpt-4o-transcribe-diarize`. Use `gpt-4o-transcribe-diarize` when you need diarization with speaker labels.\n"
-            ),
-            language: import_zod6.z.string().optional().describe(
-              "The language of the input audio. Supplying the input language in\n[ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`) format\nwill improve accuracy and latency.\n"
+              "The model used for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`, `gpt-4o-transcribe-diarize`, and `gpt-realtime-whisper`.\n"
             ),
+            language: import_zod6.z.string().optional().describe("The language of the input audio.\n"),
             prompt: import_zod6.z.string().optional().describe(
-              'An optional text to guide the model\'s style or continue a previous audio\nsegment.\nFor `whisper-1`, the [prompt is a list of keywords](/docs/guides/speech-to-text#prompting).\nFor `gpt-4o-transcribe` models (excluding `gpt-4o-transcribe-diarize`), the prompt is a free text string, for example "expect words related to technology".\n'
+              "The prompt configured for input audio transcription, when present.\n"
             )
           }).optional(),
           noise_reduction: import_zod6.z.object({
-            type: import_zod6.z.enum(["near_field", "far_field"]).describe(
+            type: import_zod6.z.enum(["near_field", "far_field"]).optional().describe(
               "Type of noise reduction. `near_field` is for close-talking microphones such as headphones, `far_field` is for far-field microphones such as laptop or conference room microphones.\n"
             )
           }).optional().describe("Configuration for input audio noise reduction.\n"),
@@ -20652,8 +20997,10 @@ var createRealtimeClientSecretResponse = import_zod6.z.object({
             silence_duration_ms: import_zod6.z.number().optional().describe(
               "Duration of silence to detect speech stop (in milliseconds). Defaults\nto 500ms. With shorter values the model will respond more quickly,\nbut may jump in on short pauses from the user.\n"
             )
-          }).optional().describe(
-            "Configuration for turn detection. Can be set to `null` to turn off. Server\nVAD means that the model will detect the start and end of speech based on\naudio volume and respond at the end of user speech.\n"
+          }).describe(
+            "Configuration for turn detection. Can be set to `null` to turn off. Server\nVAD means that the model will detect the start and end of speech based on\naudio volume and respond at the end of user speech. For `gpt-realtime-whisper`, this must be `null`; VAD is not supported.\n"
+          ).or(import_zod6.z.null()).optional().describe(
+            "Configuration for turn detection. For `gpt-realtime-whisper`, this must be `null`; VAD is not supported.\n"
           )
         }).optional()
       }).optional().describe("Configuration for input audio for the session.\n")
@@ -20789,7 +21136,7 @@ var createRealtimeSessionBody = import_zod6.z.object({
       ).or(
         import_zod6.z.object({
           type: import_zod6.z.enum(["input_image"]).describe("The type of the input item. Always `input_image`."),
-          image_url: import_zod6.z.string().describe(
+          image_url: import_zod6.z.string().url().describe(
             "The URL of the image to be sent to the model. A fully qualified URL or base64 encoded image in a data URL."
           ).or(import_zod6.z.null()).optional(),
           file_id: import_zod6.z.string().describe("The ID of the file to be sent to the model.").or(import_zod6.z.null()).optional(),
@@ -20803,7 +21150,7 @@ var createRealtimeSessionBody = import_zod6.z.object({
           file_id: import_zod6.z.string().describe("The ID of the file to be sent to the model.").or(import_zod6.z.null()).optional(),
           filename: import_zod6.z.string().optional().describe("The name of the file to be sent to the model."),
           file_data: import_zod6.z.string().optional().describe("The content of the file to be sent to the model.\n"),
-          file_url: import_zod6.z.string().optional().describe("The URL of the file to be sent to the model."),
+          file_url: import_zod6.z.string().url().optional().describe("The URL of the file to be sent to the model."),
           detail: import_zod6.z.enum(["low", "high"]).optional()
         }).describe("A file input to the model.")
       )
@@ -20851,17 +21198,14 @@ var createRealtimeSessionResponse = import_zod6.z.object({
             "gpt-4o-mini-transcribe",
             "gpt-4o-mini-transcribe-2025-12-15",
             "gpt-4o-transcribe",
-            "gpt-4o-transcribe-diarize"
+            "gpt-4o-transcribe-diarize",
+            "gpt-realtime-whisper"
           ])
         ).optional().describe(
-          "The model to use for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`, and `gpt-4o-transcribe-diarize`. Use `gpt-4o-transcribe-diarize` when you need diarization with speaker labels.\n"
+          "The model used for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`, `gpt-4o-transcribe-diarize`, and `gpt-realtime-whisper`.\n"
         ),
-        language: import_zod6.z.string().optional().describe(
-          "The language of the input audio. Supplying the input language in\n[ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`) format\nwill improve accuracy and latency.\n"
-        ),
-        prompt: import_zod6.z.string().optional().describe(
-          'An optional text to guide the model\'s style or continue a previous audio\nsegment.\nFor `whisper-1`, the [prompt is a list of keywords](/docs/guides/speech-to-text#prompting).\nFor `gpt-4o-transcribe` models (excluding `gpt-4o-transcribe-diarize`), the prompt is a free text string, for example "expect words related to technology".\n'
-        )
+        language: import_zod6.z.string().optional().describe("The language of the input audio.\n"),
+        prompt: import_zod6.z.string().optional().describe("The prompt configured for input audio transcription, when present.\n")
       }).optional(),
       noise_reduction: import_zod6.z.object({
         type: import_zod6.z.enum(["near_field", "far_field"]).optional().describe(
@@ -20986,16 +21330,20 @@ var createRealtimeTranscriptionSessionBody = import_zod6.z.object({
         "gpt-4o-mini-transcribe",
         "gpt-4o-mini-transcribe-2025-12-15",
         "gpt-4o-transcribe",
-        "gpt-4o-transcribe-diarize"
+        "gpt-4o-transcribe-diarize",
+        "gpt-realtime-whisper"
       ])
     ).optional().describe(
-      "The model to use for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`, and `gpt-4o-transcribe-diarize`. Use `gpt-4o-transcribe-diarize` when you need diarization with speaker labels.\n"
+      "The model to use for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`, `gpt-4o-transcribe-diarize`, and `gpt-realtime-whisper`. Use `gpt-4o-transcribe-diarize` when you need diarization with speaker labels.\n"
     ),
     language: import_zod6.z.string().optional().describe(
       "The language of the input audio. Supplying the input language in\n[ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`) format\nwill improve accuracy and latency.\n"
     ),
     prompt: import_zod6.z.string().optional().describe(
-      'An optional text to guide the model\'s style or continue a previous audio\nsegment.\nFor `whisper-1`, the [prompt is a list of keywords](/docs/guides/speech-to-text#prompting).\nFor `gpt-4o-transcribe` models (excluding `gpt-4o-transcribe-diarize`), the prompt is a free text string, for example "expect words related to technology".\n'
+      'An optional text to guide the model\'s style or continue a previous audio\nsegment.\nFor `whisper-1`, the [prompt is a list of keywords](/docs/guides/speech-to-text#prompting).\nFor `gpt-4o-transcribe` models (excluding `gpt-4o-transcribe-diarize`), the prompt is a free text string, for example "expect words related to technology".\nPrompt is not supported with `gpt-realtime-whisper` in GA Realtime sessions.\n'
+    ),
+    delay: import_zod6.z.enum(["minimal", "low", "medium", "high", "xhigh"]).optional().describe(
+      "Controls how long the model waits before emitting transcription text.\nHigher values can improve transcription accuracy at the cost of latency.\nOnly supported with `gpt-realtime-whisper` in GA Realtime sessions.\n"
     )
   }).optional(),
   include: import_zod6.z.array(import_zod6.z.enum(["item.input_audio_transcription.logprobs"])).optional().describe(
@@ -21024,17 +21372,14 @@ var createRealtimeTranscriptionSessionResponse = import_zod6.z.object({
         "gpt-4o-mini-transcribe",
         "gpt-4o-mini-transcribe-2025-12-15",
         "gpt-4o-transcribe",
-        "gpt-4o-transcribe-diarize"
+        "gpt-4o-transcribe-diarize",
+        "gpt-realtime-whisper"
       ])
     ).optional().describe(
-      "The model to use for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`, and `gpt-4o-transcribe-diarize`. Use `gpt-4o-transcribe-diarize` when you need diarization with speaker labels.\n"
-    ),
-    language: import_zod6.z.string().optional().describe(
-      "The language of the input audio. Supplying the input language in\n[ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`) format\nwill improve accuracy and latency.\n"
+      "The model used for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`, `gpt-4o-transcribe-diarize`, and `gpt-realtime-whisper`.\n"
     ),
-    prompt: import_zod6.z.string().optional().describe(
-      'An optional text to guide the model\'s style or continue a previous audio\nsegment.\nFor `whisper-1`, the [prompt is a list of keywords](/docs/guides/speech-to-text#prompting).\nFor `gpt-4o-transcribe` models (excluding `gpt-4o-transcribe-diarize`), the prompt is a free text string, for example "expect words related to technology".\n'
-    )
+    language: import_zod6.z.string().optional().describe("The language of the input audio.\n"),
+    prompt: import_zod6.z.string().optional().describe("The prompt configured for input audio transcription, when present.\n")
   }).optional(),
   turn_detection: import_zod6.z.object({
     type: import_zod6.z.string().optional().describe("Type of turn detection, only `server_vad` is currently supported.\n"),
@@ -24647,11 +24992,73 @@ var getModelsResponse = import_zod10.z.object({
     })
   ).describe("List of available models and their attributes.")
 });
+var getTtsModelsResponse = import_zod10.z.object({
+  models: import_zod10.z.array(
+    import_zod10.z.object({
+      id: import_zod10.z.string().describe("Unique identifier of the model."),
+      aliased_model_id: import_zod10.z.string().or(import_zod10.z.null()).describe("If this is an alias, the id of the aliased model."),
+      name: import_zod10.z.string().describe("Name of the model."),
+      voices: import_zod10.z.array(
+        import_zod10.z.object({
+          id: import_zod10.z.string().describe("Unique identifier of the voice."),
+          description: import_zod10.z.string().describe("Description of the TTS voice."),
+          gender: import_zod10.z.enum(["male", "female", "neutral"])
+        })
+      ).describe("List of available voices for this model."),
+      languages: import_zod10.z.array(
+        import_zod10.z.object({
+          code: import_zod10.z.string().describe("2-letter language code."),
+          name: import_zod10.z.string().describe("Language name.")
+        })
+      ).describe("List of languages supported by the model.")
+    })
+  ).describe("List of available TTS models and their attributes.")
+});
+var getUsageLogsQueryLimitDefault = 1e3;
+var getUsageLogsQueryLimitMax = 1e3;
+var getUsageLogsQuerySortDefault = "end_time_asc";
+var getUsageLogsQueryParams = import_zod10.z.object({
+  start_time: import_zod10.z.string().describe("Start of the time window (inclusive). Filters by request end time."),
+  end_time: import_zod10.z.string().describe("End of the time window (exclusive). Filters by request end time."),
+  limit: import_zod10.z.number().min(1).max(getUsageLogsQueryLimitMax).default(getUsageLogsQueryLimitDefault).describe("Maximum number of usage log entries to return."),
+  sort: import_zod10.z.enum(["end_time_asc", "end_time_desc"]).default(getUsageLogsQuerySortDefault).describe(
+    "Sort order by end_time.Use `end_time_desc` to get the most recent entries first. When paginating, pass the same `sort` value alongside the cursor."
+  ),
+  cursor: import_zod10.z.string().or(import_zod10.z.null()).optional().describe("Pagination cursor for the next page of results.")
+});
+var getUsageLogsResponse = import_zod10.z.object({
+  usage_logs: import_zod10.z.array(
+    import_zod10.z.object({
+      uuid: import_zod10.z.string().uuid().describe("Unique identifier of the request."),
+      request_scope: import_zod10.z.string().describe("Scope of the request (api / playground)."),
+      client_reference_id: import_zod10.z.string().describe("Client reference ID supplied on the original request. Empty string if none."),
+      model: import_zod10.z.string().describe("Model identifier."),
+      start_time: import_zod10.z.string().datetime({}).describe("When the request started."),
+      end_time: import_zod10.z.string().datetime({}).describe("When the request ended."),
+      input_text_tokens: import_zod10.z.number(),
+      input_audio_tokens: import_zod10.z.number(),
+      input_audio_duration_ms: import_zod10.z.number(),
+      output_text_tokens: import_zod10.z.number(),
+      output_audio_tokens: import_zod10.z.number(),
+      output_audio_duration_ms: import_zod10.z.number(),
+      cost_usd: import_zod10.z.string(),
+      input_cost_usd: import_zod10.z.string(),
+      input_text_cost_usd: import_zod10.z.string(),
+      input_audio_cost_usd: import_zod10.z.string(),
+      output_cost_usd: import_zod10.z.string(),
+      output_text_cost_usd: import_zod10.z.string(),
+      output_audio_cost_usd: import_zod10.z.string()
+    })
+  ).describe("Per-request usage log entries ordered by end_time, uuid (per `sort`)."),
+  next_page_cursor: import_zod10.z.string().or(import_zod10.z.null()).optional().describe(
+    "A pagination token that references the next page of results. When more data is available, this field contains a value to pass in the cursor parameter of a subsequent request. When null, no additional results are available."
+  )
+});
 var createTemporaryApiKeyBodyExpiresInSecondsMax = 3600;
 var createTemporaryApiKeyBodyClientReferenceIdMaxOne = 256;
 var createTemporaryApiKeyBodyMaxSessionDurationSecondsMaxOne = 18e3;
 var createTemporaryApiKeyBody = import_zod10.z.object({
-  usage_type: import_zod10.z.enum(["transcribe_websocket"]),
+  usage_type: import_zod10.z.enum(["transcribe_websocket", "tts_rt"]),
   expires_in_seconds: import_zod10.z.number().min(1).max(createTemporaryApiKeyBodyExpiresInSecondsMax).describe("Duration in seconds until the temporary API key expires."),
   client_reference_id: import_zod10.z.string().max(createTemporaryApiKeyBodyClientReferenceIdMaxOne).or(import_zod10.z.null()).optional().describe("Optional tracking identifier string. Does not need to be unique."),
   single_use: import_zod10.z.boolean().or(import_zod10.z.null()).optional().describe("If true, the temporary API key can be used only once."),
@@ -24659,6 +25066,28 @@ var createTemporaryApiKeyBody = import_zod10.z.object({
     "Maximum WebSocket connection duration in seconds. If exceeded, the connection will be dropped. If not set, no limit is applied."
   )
 });
+var getConcurrencyLimitsResponse = import_zod10.z.object({
+  project: import_zod10.z.object({
+    current: import_zod10.z.object({
+      transcribe_concurrent: import_zod10.z.number(),
+      tts_concurrent: import_zod10.z.number()
+    }).describe("Live counts read from Redis"),
+    limits: import_zod10.z.object({
+      transcribe_concurrent: import_zod10.z.number().or(import_zod10.z.null()),
+      tts_concurrent: import_zod10.z.number().or(import_zod10.z.null())
+    }).describe("Configured limits")
+  }),
+  organization: import_zod10.z.object({
+    current: import_zod10.z.object({
+      transcribe_concurrent: import_zod10.z.number(),
+      tts_concurrent: import_zod10.z.number()
+    }).describe("Live counts read from Redis"),
+    limits: import_zod10.z.object({
+      transcribe_concurrent: import_zod10.z.number().or(import_zod10.z.null()),
+      tts_concurrent: import_zod10.z.number().or(import_zod10.z.null())
+    }).describe("Configured limits")
+  })
+});
 // src/generated/soniox/streaming-types.zod.ts
 var import_zod11 = require("zod");
@@ -24724,10 +25153,10 @@ var sonioxStructuredContextSchema = import_zod11.z.object({
 var sonioxContextSchema = import_zod11.z.union([sonioxStructuredContextSchema, import_zod11.z.string()]);
 var sonioxRealtimeModelSchema = import_zod11.z.enum([
   "stt-rt-v4",
-  "stt-rt-v3",
   "stt-rt-preview",
   "stt-rt-v3-preview",
-  "stt-rt-preview-v2"
+  "stt-rt-preview-v2",
+  "stt-rt-v3"
 ]);
 var streamingTranscriberParams3 = import_zod11.z.object({
   model: sonioxRealtimeModelSchema,
@@ -24735,12 +25164,16 @@ var streamingTranscriberParams3 = import_zod11.z.object({
   sampleRate: import_zod11.z.number().optional(),
   numChannels: import_zod11.z.number().optional(),
   languageHints: import_zod11.z.array(import_zod11.z.string()).optional(),
+  languageHintsStrict: import_zod11.z.boolean().optional(),
   context: sonioxContextSchema.optional(),
   enableSpeakerDiarization: import_zod11.z.boolean().optional(),
   enableLanguageIdentification: import_zod11.z.boolean().optional(),
   enableEndpointDetection: import_zod11.z.boolean().optional(),
+  maxEndpointDelayMs: import_zod11.z.number().optional(),
   translation: sonioxTranslationConfigSchema.optional(),
-  clientReferenceId: import_zod11.z.string().optional()
+  clientReferenceId: import_zod11.z.string().optional(),
+  keepaliveIntervalMs: import_zod11.z.number().optional(),
+  connectTimeoutMs: import_zod11.z.number().optional()
 });
 var sonioxTranslationStatusSchema = import_zod11.z.enum(["original", "translation", "none"]);
 var sonioxTokenSchema = import_zod11.z.object({