voice-router-dev 0.9.4 → 0.9.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -298,6 +298,9 @@ var listenTranscribeQueryParams = import_zod.z.object({
298
298
  diarize: import_zod.z.boolean().optional().describe(
299
299
  "Recognize speaker changes. Each word in the transcript will be assigned a speaker number starting at 0"
300
300
  ),
301
+ diarize_model: import_zod.z.enum(["latest", "v1", "v2"]).optional().describe(
302
+ "Select and enable a specific batch diarization model version. If specifying this parameter, you should not set the deprecated `diarize=true` parameter. Not accepted on streaming requests."
303
+ ),
301
304
  dictation: import_zod.z.boolean().optional().describe("Dictation mode for controlling formatting with dictated speech"),
302
305
  encoding: import_zod.z.enum(["linear16", "flac", "mulaw", "amr-nb", "amr-wb", "opus", "speex", "g729"]).optional().describe("Specify the expected encoding of your submitted audio"),
303
306
  filler_words: import_zod.z.boolean().optional().describe('Filler Words can help transcribe interruptions in your audio, like "uh" and "um"'),
@@ -562,6 +565,7 @@ var listenTranscribeResponse = import_zod.z.object({
562
565
  );
563
566
  var speakGenerateQueryCallbackMethodDefault = "POST";
564
567
  var speakGenerateQueryModelDefault = "aura-asteria-en";
568
+ var speakGenerateQuerySpeedDefault = 1;
565
569
  var speakGenerateQueryParams = import_zod.z.object({
566
570
  callback: import_zod.z.string().optional().describe("URL to which we'll make the callback request"),
567
571
  callback_method: import_zod.z.enum(["POST", "PUT"]).default(speakGenerateQueryCallbackMethodDefault).describe("HTTP method by which the callback request will be made"),
@@ -673,6 +677,9 @@ var speakGenerateQueryParams = import_zod.z.object({
673
677
  import_zod.z.enum(["22050"]).describe("Encoding - mp3. Sample rate is fixed and not configurable (22050 Hz).")
674
678
  ).or(import_zod.z.enum(["48000"]).describe("Encoding - opus. Sample rate is fixed at 48000 Hz.")).optional().describe(
675
679
  "Sample Rate specifies the sample rate for the output audio. Based on the encoding, different sample rates are supported. For some encodings, the sample rate is not configurable"
680
+ ),
681
+ speed: import_zod.z.number().default(speakGenerateQuerySpeedDefault).describe(
682
+ "Speaking rate multiplier that adjusts the pace of generated speech while preserving natural prosody and voice quality. Not yet supported in all languages."
676
683
  )
677
684
  });
678
685
  var speakGenerateHeader = import_zod.z.object({
@@ -1012,7 +1019,7 @@ var createTranscriptBody = import_zod3.z.object({
1012
1019
  "Customize how words are spelled and formatted using to and from values. See [Custom Spelling](https://www.assemblyai.com/docs/pre-recorded-audio/correct-spelling-of-terms) for more details."
1013
1020
  ),
1014
1021
  disfluencies: import_zod3.z.boolean().optional().describe(
1015
- 'Transcribe [Filler Words](https://www.assemblyai.com/docs/pre-recorded-audio/include-filler-words), like "umm", in your media file; can be true or false'
1022
+ 'Transcribe [Filler Words](https://www.assemblyai.com/docs/pre-recorded-audio/include-filler-words), like "umm", in your media file; can be true or false. Supported on Universal-3 Pro and Universal-2.'
1016
1023
  ),
1017
1024
  domain: import_zod3.z.string().nullish().describe(
1018
1025
  'Enable domain-specific transcription models to improve accuracy for specialized terminology. Set to `"medical-v1"` to enable [Medical Mode](https://www.assemblyai.com/docs/pre-recorded-audio/medical-mode) for improved accuracy of medical terms such as medications, procedures, conditions, and dosages.\n\nSupported languages: English (`en`), Spanish (`es`), German (`de`), French (`fr`). If used with an unsupported language, the parameter is ignored and a warning is returned.\n'
@@ -1319,12 +1326,20 @@ var createTranscriptBody = import_zod3.z.object({
1319
1326
  "email_address",
1320
1327
  "event",
1321
1328
  "filename",
1329
+ "gender",
1322
1330
  "gender_sexuality",
1323
1331
  "healthcare_number",
1324
1332
  "injury",
1325
1333
  "ip_address",
1326
1334
  "language",
1327
1335
  "location",
1336
+ "location_address",
1337
+ "location_address_street",
1338
+ "location_city",
1339
+ "location_coordinate",
1340
+ "location_country",
1341
+ "location_state",
1342
+ "location_zip",
1328
1343
  "marital_status",
1329
1344
  "medical_condition",
1330
1345
  "medical_process",
@@ -1333,6 +1348,7 @@ var createTranscriptBody = import_zod3.z.object({
1333
1348
  "number_sequence",
1334
1349
  "occupation",
1335
1350
  "organization",
1351
+ "organization_medical_facility",
1336
1352
  "passport_number",
1337
1353
  "password",
1338
1354
  "person_age",
@@ -1341,6 +1357,7 @@ var createTranscriptBody = import_zod3.z.object({
1341
1357
  "physical_attribute",
1342
1358
  "political_affiliation",
1343
1359
  "religion",
1360
+ "sexuality",
1344
1361
  "statistics",
1345
1362
  "time",
1346
1363
  "url",
@@ -1348,15 +1365,20 @@ var createTranscriptBody = import_zod3.z.object({
1348
1365
  "username",
1349
1366
  "vehicle_id",
1350
1367
  "zodiac_sign"
1351
- ]).describe("The type of PII to redact")
1368
+ ]).describe(
1369
+ "The type of PII to redact. See [PII redaction](https://www.assemblyai.com/docs/streaming/pii-redaction) for the full list of policies and their descriptions."
1370
+ )
1352
1371
  ).optional().describe(
1353
1372
  "The list of PII Redaction policies to enable. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more details."
1354
1373
  ),
1355
1374
  redact_pii_sub: import_zod3.z.enum(["entity_name", "hash"]).describe(
1356
- "The replacement logic for detected PII, can be `entity_name` or `hash`. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more details."
1375
+ "The replacement logic for detected PII, can be `entity_name` or `hash`. See [PII redaction](https://www.assemblyai.com/docs/streaming/pii-redaction) for more details."
1357
1376
  ).or(import_zod3.z.null()).optional().describe(
1358
1377
  "The replacement logic for detected PII, can be `entity_type` or `hash`. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more details."
1359
1378
  ),
1379
+ redact_pii_return_unredacted: import_zod3.z.boolean().optional().describe(
1380
+ "When set to `true`, returns the original unredacted transcript alongside the redacted one in the same response. Requires `redact_pii` to be `true`, otherwise a 400 error is returned.\n\nWhen enabled, the response includes the additional fields `unredacted_text`, `unredacted_words`, and `unredacted_utterances`. The existing `text`, `words`, and `utterances` fields remain fully redacted. When disabled (default), the response is unchanged and contains only the redacted transcript. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more details.\n"
1381
+ ),
1360
1382
  sentiment_analysis: import_zod3.z.boolean().optional().describe(
1361
1383
  "Enable [Sentiment Analysis](https://www.assemblyai.com/docs/speech-understanding/analyze-sentiment-of-speech), can be true or false"
1362
1384
  ),
@@ -1454,10 +1476,10 @@ var createTranscriptBody = import_zod3.z.object({
1454
1476
  ),
1455
1477
  summary_model: import_zod3.z.enum(["informative", "conversational", "catchy"]).optional().describe("The model to summarize the transcript"),
1456
1478
  summary_type: import_zod3.z.enum(["bullets", "bullets_verbose", "gist", "headline", "paragraph"]).optional().describe("The type of summary"),
1457
- remove_audio_tags: import_zod3.z.enum(["all"]).describe(
1458
- 'Remove [audio event tags](https://www.assemblyai.com/docs/pre-recorded-audio/universal-3-pro#audio-event-tags) from the transcript text. Set to `"all"` to remove all audio tags.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n'
1479
+ remove_audio_tags: import_zod3.z.enum(["all", "speaker"]).describe(
1480
+ 'Universal-3 Pro generates rich transcripts that can include inline annotations such as audio event markers and speaker cues. Set to `"all"` to remove all inline annotations, or `"speaker"` to remove only speaker cues while keeping other annotations.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n'
1459
1481
  ).or(import_zod3.z.null()).optional().describe(
1460
- 'Remove [audio event tags](https://www.assemblyai.com/docs/pre-recorded-audio/universal-3-pro#audio-event-tags) from the transcript text. Set to `"all"` to remove all audio tags.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n'
1482
+ 'Universal-3 Pro generates rich transcripts that can include inline annotations such as audio event markers and speaker cues. Set to `"all"` to remove all inline annotations, or `"speaker"` to remove only speaker cues while keeping other annotations.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n'
1461
1483
  ),
1462
1484
  temperature: import_zod3.z.number().optional().describe(
1463
1485
  "Control the amount of randomness injected into the model's response. See the [Prompting Guide](https://www.assemblyai.com/docs/pre-recorded-audio/prompting) for more details.\n\nNote: This parameter can only be used with the Universal-3 Pro model.\n"
@@ -1588,7 +1610,7 @@ var createTranscriptResponse = import_zod3.z.object({
1588
1610
  "Customize how words are spelled and formatted using to and from values. See [Custom Spelling](https://www.assemblyai.com/docs/pre-recorded-audio/correct-spelling-of-terms) for more details."
1589
1611
  ),
1590
1612
  disfluencies: import_zod3.z.boolean().nullish().describe(
1591
- 'Transcribe [Filler Words](https://www.assemblyai.com/docs/pre-recorded-audio/include-filler-words), like "umm", in your media file; can be true or false'
1613
+ 'Transcribe [Filler Words](https://www.assemblyai.com/docs/pre-recorded-audio/include-filler-words), like "umm", in your media file; can be true or false. Supported on Universal-3 Pro and Universal-2.'
1592
1614
  ),
1593
1615
  domain: import_zod3.z.string().nullish().describe(
1594
1616
  'The domain-specific model applied to the transcript. When set to `"medical-v1"`, [Medical Mode](https://www.assemblyai.com/docs/pre-recorded-audio/medical-mode) was used to improve accuracy for medical terminology.\n'
@@ -1611,12 +1633,20 @@ var createTranscriptResponse = import_zod3.z.object({
1611
1633
  "email_address",
1612
1634
  "event",
1613
1635
  "filename",
1636
+ "gender",
1614
1637
  "gender_sexuality",
1615
1638
  "healthcare_number",
1616
1639
  "injury",
1617
1640
  "ip_address",
1618
1641
  "language",
1619
1642
  "location",
1643
+ "location_address",
1644
+ "location_address_street",
1645
+ "location_city",
1646
+ "location_coordinate",
1647
+ "location_country",
1648
+ "location_state",
1649
+ "location_zip",
1620
1650
  "marital_status",
1621
1651
  "medical_condition",
1622
1652
  "medical_process",
@@ -1625,6 +1655,7 @@ var createTranscriptResponse = import_zod3.z.object({
1625
1655
  "number_sequence",
1626
1656
  "occupation",
1627
1657
  "organization",
1658
+ "organization_medical_facility",
1628
1659
  "passport_number",
1629
1660
  "password",
1630
1661
  "person_age",
@@ -1633,6 +1664,7 @@ var createTranscriptResponse = import_zod3.z.object({
1633
1664
  "physical_attribute",
1634
1665
  "political_affiliation",
1635
1666
  "religion",
1667
+ "sexuality",
1636
1668
  "statistics",
1637
1669
  "time",
1638
1670
  "url",
@@ -1937,6 +1969,24 @@ var createTranscriptResponse = import_zod3.z.object({
1937
1969
  }).optional().describe(
1938
1970
  "Specify options for [Automatic Language Detection](https://www.assemblyai.com/docs/pre-recorded-audio/language-detection)."
1939
1971
  ),
1972
+ metadata: import_zod3.z.object({
1973
+ domain_used: import_zod3.z.string().nullish().describe(
1974
+ 'The domain-specific model that was applied to the transcription (for example, `"medical-v1"` when [Medical Mode](https://www.assemblyai.com/docs/pre-recorded-audio/medical-mode) was applied), or `null` if no domain-specific model was used. Always present when `metadata` is present.\n'
1975
+ ),
1976
+ warnings: import_zod3.z.array(
1977
+ import_zod3.z.object({
1978
+ message: import_zod3.z.string().describe("A human-readable description of the warning.")
1979
+ }).describe(
1980
+ "A warning message emitted while processing a transcription request. Warnings are surfaced on the transcript response under `metadata.warnings`.\n"
1981
+ )
1982
+ ).optional().describe(
1983
+ "Warning messages emitted while processing the request. Each warning is an object with a human-readable `message`. When there are no warnings to report, this field is omitted from the `metadata` object entirely.\n"
1984
+ )
1985
+ }).describe(
1986
+ "Additional metadata about the transcription returned on the `Transcript` object under `metadata`. Only present when there is information to report \u2014 when all of its fields would be empty, the `metadata` object is omitted from the response entirely.\n"
1987
+ ).or(import_zod3.z.null()).optional().describe(
1988
+ "Additional metadata about the transcription, including any warnings emitted while processing the request. Only present when there is information to report; if no fields would be populated, `metadata` is omitted from the response entirely.\n"
1989
+ ),
1940
1990
  multichannel: import_zod3.z.boolean().nullish().describe(
1941
1991
  "Whether [Multichannel transcription](https://www.assemblyai.com/docs/pre-recorded-audio/transcribe-multiple-audio-channels) was enabled in the transcription request, either true or false"
1942
1992
  ),
@@ -1984,12 +2034,20 @@ var createTranscriptResponse = import_zod3.z.object({
1984
2034
  "email_address",
1985
2035
  "event",
1986
2036
  "filename",
2037
+ "gender",
1987
2038
  "gender_sexuality",
1988
2039
  "healthcare_number",
1989
2040
  "injury",
1990
2041
  "ip_address",
1991
2042
  "language",
1992
2043
  "location",
2044
+ "location_address",
2045
+ "location_address_street",
2046
+ "location_city",
2047
+ "location_coordinate",
2048
+ "location_country",
2049
+ "location_state",
2050
+ "location_zip",
1993
2051
  "marital_status",
1994
2052
  "medical_condition",
1995
2053
  "medical_process",
@@ -1998,6 +2056,7 @@ var createTranscriptResponse = import_zod3.z.object({
1998
2056
  "number_sequence",
1999
2057
  "occupation",
2000
2058
  "organization",
2059
+ "organization_medical_facility",
2001
2060
  "passport_number",
2002
2061
  "password",
2003
2062
  "person_age",
@@ -2006,6 +2065,7 @@ var createTranscriptResponse = import_zod3.z.object({
2006
2065
  "physical_attribute",
2007
2066
  "political_affiliation",
2008
2067
  "religion",
2068
+ "sexuality",
2009
2069
  "statistics",
2010
2070
  "time",
2011
2071
  "url",
@@ -2013,12 +2073,17 @@ var createTranscriptResponse = import_zod3.z.object({
2013
2073
  "username",
2014
2074
  "vehicle_id",
2015
2075
  "zodiac_sign"
2016
- ]).describe("The type of PII to redact")
2076
+ ]).describe(
2077
+ "The type of PII to redact. See [PII redaction](https://www.assemblyai.com/docs/streaming/pii-redaction) for the full list of policies and their descriptions."
2078
+ )
2017
2079
  ).nullish().describe(
2018
2080
  "The list of PII Redaction policies that were enabled, if PII Redaction is enabled.\nSee [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
2019
2081
  ),
2020
2082
  redact_pii_sub: import_zod3.z.enum(["entity_name", "hash"]).optional().describe(
2021
- "The replacement logic for detected PII, can be `entity_name` or `hash`. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more details."
2083
+ "The replacement logic for detected PII, can be `entity_name` or `hash`. See [PII redaction](https://www.assemblyai.com/docs/streaming/pii-redaction) for more details."
2084
+ ),
2085
+ redact_pii_return_unredacted: import_zod3.z.boolean().nullish().describe(
2086
+ "Whether the original unredacted transcript was also returned alongside the redacted one. When `true`, the response includes `unredacted_text`, `unredacted_words`, and `unredacted_utterances`. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
2022
2087
  ),
2023
2088
  sentiment_analysis: import_zod3.z.boolean().nullish().describe(
2024
2089
  "Whether [Sentiment Analysis](https://www.assemblyai.com/docs/speech-understanding/analyze-sentiment-of-speech) is enabled, can be true or false"
@@ -2155,20 +2220,23 @@ var createTranscriptResponse = import_zod3.z.object({
2155
2220
  "The generated summary of the media file, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details."
2156
2221
  ),
2157
2222
  summary_model: import_zod3.z.string().nullish().describe(
2158
- "The Summarization model used to generate the summary,\nif [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts#summary-models) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details.\n"
2223
+ "The Summarization model used to generate the summary,\nif [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details.\n"
2159
2224
  ),
2160
2225
  summary_type: import_zod3.z.string().nullish().describe(
2161
- "The type of summary generated, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts#summary-types) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details."
2226
+ "The type of summary generated, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details."
2162
2227
  ),
2163
- remove_audio_tags: import_zod3.z.enum(["all"]).describe(
2164
- "Whether [audio event tags](https://www.assemblyai.com/docs/pre-recorded-audio/universal-3-pro#audio-event-tags) were removed from the transcript text.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n"
2228
+ remove_audio_tags: import_zod3.z.enum(["all", "speaker"]).describe(
2229
+ 'Universal-3 Pro generates rich transcripts that can include inline annotations such as audio event markers and speaker cues. Set to `"all"` to remove all inline annotations, or `"speaker"` to remove only speaker cues while keeping other annotations.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n'
2165
2230
  ).or(import_zod3.z.null()).optional().describe(
2166
- "Whether [audio event tags](https://www.assemblyai.com/docs/pre-recorded-audio/universal-3-pro#audio-event-tags) were removed from the transcript text.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n"
2231
+ 'Universal-3 Pro generates rich transcripts that can include inline annotations such as audio event markers and speaker cues. Set to `"all"` to remove all inline annotations, or `"speaker"` to remove only speaker cues while keeping other annotations.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n'
2167
2232
  ),
2168
2233
  temperature: import_zod3.z.number().nullish().describe(
2169
2234
  "The temperature that was used for the model's response. See the [Prompting Guide](https://www.assemblyai.com/docs/pre-recorded-audio/prompting) for more details.\n\nNote: This parameter can only be used with the Universal-3 Pro model.\n"
2170
2235
  ),
2171
2236
  text: import_zod3.z.string().nullish().describe("The textual transcript of your media file"),
2237
+ unredacted_text: import_zod3.z.string().nullish().describe(
2238
+ "The original textual transcript of your media file before PII redaction was applied. Only returned when `redact_pii_return_unredacted` was set to `true` on the transcription request, otherwise this field is omitted and the `text` field remains fully redacted. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
2239
+ ),
2172
2240
  throttled: import_zod3.z.boolean().nullish().describe(
2173
2241
  "True while a request is throttled and false when a request is no longer throttled"
2174
2242
  ),
@@ -2205,6 +2273,39 @@ var createTranscriptResponse = import_zod3.z.object({
2205
2273
  ).nullish().describe(
2206
2274
  "When multichannel or speaker_labels is enabled, a list of turn-by-turn utterance objects.\nSee [Speaker diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) and [Multichannel transcription](https://www.assemblyai.com/docs/pre-recorded-audio/transcribe-multiple-audio-channels) for more information.\n"
2207
2275
  ),
2276
+ unredacted_utterances: import_zod3.z.array(
2277
+ import_zod3.z.object({
2278
+ confidence: import_zod3.z.number().describe("The confidence score for the transcript of this utterance"),
2279
+ start: import_zod3.z.number().describe("The starting time, in milliseconds, of the utterance in the audio file"),
2280
+ end: import_zod3.z.number().describe("The ending time, in milliseconds, of the utterance in the audio file"),
2281
+ text: import_zod3.z.string().describe("The text for this utterance"),
2282
+ words: import_zod3.z.array(
2283
+ import_zod3.z.object({
2284
+ confidence: import_zod3.z.number().describe("The confidence score for the transcript of this word"),
2285
+ start: import_zod3.z.number().describe("The starting time, in milliseconds, for the word"),
2286
+ end: import_zod3.z.number().describe("The ending time, in milliseconds, for the word"),
2287
+ text: import_zod3.z.string().describe("The text of the word"),
2288
+ channel: import_zod3.z.string().nullish().describe(
2289
+ "The channel of the word. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
2290
+ ),
2291
+ speaker: import_zod3.z.string().nullable().describe(
2292
+ "The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) is enabled, else null"
2293
+ )
2294
+ })
2295
+ ).describe("The words in the utterance."),
2296
+ channel: import_zod3.z.string().nullish().describe(
2297
+ "The channel of this utterance. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
2298
+ ),
2299
+ speaker: import_zod3.z.string().describe(
2300
+ 'The speaker of this utterance, where each speaker is assigned a sequential capital letter - e.g. "A" for Speaker A, "B" for Speaker B, etc.'
2301
+ ),
2302
+ translated_texts: import_zod3.z.record(import_zod3.z.string(), import_zod3.z.string()).optional().describe(
2303
+ 'Translations keyed by language code (e.g., `{"es": "Texto traducido", "de": "\xDCbersetzter Text"}`). Only present when `match_original_utterance` is enabled with translation.'
2304
+ )
2305
+ })
2306
+ ).nullish().describe(
2307
+ "The original turn-by-turn utterance objects before PII redaction was applied. Same shape as `utterances`. Only returned when `redact_pii_return_unredacted` was set to `true` on the transcription request, otherwise this field is omitted and the `utterances` field remains fully redacted. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
2308
+ ),
2208
2309
  webhook_auth: import_zod3.z.boolean().describe(
2209
2310
  "Whether [webhook](https://www.assemblyai.com/docs/deployment/webhooks-for-pre-recorded-audio) authentication details were provided"
2210
2311
  ),
@@ -2233,6 +2334,22 @@ var createTranscriptResponse = import_zod3.z.object({
2233
2334
  ).nullish().describe(
2234
2335
  "An array of temporally-sequential word objects, one for each word in the transcript.\n"
2235
2336
  ),
2337
+ unredacted_words: import_zod3.z.array(
2338
+ import_zod3.z.object({
2339
+ confidence: import_zod3.z.number().describe("The confidence score for the transcript of this word"),
2340
+ start: import_zod3.z.number().describe("The starting time, in milliseconds, for the word"),
2341
+ end: import_zod3.z.number().describe("The ending time, in milliseconds, for the word"),
2342
+ text: import_zod3.z.string().describe("The text of the word"),
2343
+ channel: import_zod3.z.string().nullish().describe(
2344
+ "The channel of the word. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
2345
+ ),
2346
+ speaker: import_zod3.z.string().nullable().describe(
2347
+ "The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) is enabled, else null"
2348
+ )
2349
+ })
2350
+ ).nullish().describe(
2351
+ "The original temporally-sequential word objects before PII redaction was applied. Same shape as `words`. Only returned when `redact_pii_return_unredacted` was set to `true` on the transcription request, otherwise this field is omitted and the `words` field remains fully redacted. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
2352
+ ),
2236
2353
  acoustic_model: import_zod3.z.string().describe("This parameter does not currently have any functionality attached to it."),
2237
2354
  custom_topics: import_zod3.z.boolean().nullish().describe("This parameter does not currently have any functionality attached to it."),
2238
2355
  language_model: import_zod3.z.string().describe("This parameter does not currently have any functionality attached to it."),
@@ -2404,7 +2521,7 @@ var getTranscriptResponse = import_zod3.z.object({
2404
2521
  "Customize how words are spelled and formatted using to and from values. See [Custom Spelling](https://www.assemblyai.com/docs/pre-recorded-audio/correct-spelling-of-terms) for more details."
2405
2522
  ),
2406
2523
  disfluencies: import_zod3.z.boolean().nullish().describe(
2407
- 'Transcribe [Filler Words](https://www.assemblyai.com/docs/pre-recorded-audio/include-filler-words), like "umm", in your media file; can be true or false'
2524
+ 'Transcribe [Filler Words](https://www.assemblyai.com/docs/pre-recorded-audio/include-filler-words), like "umm", in your media file; can be true or false. Supported on Universal-3 Pro and Universal-2.'
2408
2525
  ),
2409
2526
  domain: import_zod3.z.string().nullish().describe(
2410
2527
  'The domain-specific model applied to the transcript. When set to `"medical-v1"`, [Medical Mode](https://www.assemblyai.com/docs/pre-recorded-audio/medical-mode) was used to improve accuracy for medical terminology.\n'
@@ -2427,12 +2544,20 @@ var getTranscriptResponse = import_zod3.z.object({
2427
2544
  "email_address",
2428
2545
  "event",
2429
2546
  "filename",
2547
+ "gender",
2430
2548
  "gender_sexuality",
2431
2549
  "healthcare_number",
2432
2550
  "injury",
2433
2551
  "ip_address",
2434
2552
  "language",
2435
2553
  "location",
2554
+ "location_address",
2555
+ "location_address_street",
2556
+ "location_city",
2557
+ "location_coordinate",
2558
+ "location_country",
2559
+ "location_state",
2560
+ "location_zip",
2436
2561
  "marital_status",
2437
2562
  "medical_condition",
2438
2563
  "medical_process",
@@ -2441,6 +2566,7 @@ var getTranscriptResponse = import_zod3.z.object({
2441
2566
  "number_sequence",
2442
2567
  "occupation",
2443
2568
  "organization",
2569
+ "organization_medical_facility",
2444
2570
  "passport_number",
2445
2571
  "password",
2446
2572
  "person_age",
@@ -2449,6 +2575,7 @@ var getTranscriptResponse = import_zod3.z.object({
2449
2575
  "physical_attribute",
2450
2576
  "political_affiliation",
2451
2577
  "religion",
2578
+ "sexuality",
2452
2579
  "statistics",
2453
2580
  "time",
2454
2581
  "url",
@@ -2753,6 +2880,24 @@ var getTranscriptResponse = import_zod3.z.object({
2753
2880
  }).optional().describe(
2754
2881
  "Specify options for [Automatic Language Detection](https://www.assemblyai.com/docs/pre-recorded-audio/language-detection)."
2755
2882
  ),
2883
+ metadata: import_zod3.z.object({
2884
+ domain_used: import_zod3.z.string().nullish().describe(
2885
+ 'The domain-specific model that was applied to the transcription (for example, `"medical-v1"` when [Medical Mode](https://www.assemblyai.com/docs/pre-recorded-audio/medical-mode) was applied), or `null` if no domain-specific model was used. Always present when `metadata` is present.\n'
2886
+ ),
2887
+ warnings: import_zod3.z.array(
2888
+ import_zod3.z.object({
2889
+ message: import_zod3.z.string().describe("A human-readable description of the warning.")
2890
+ }).describe(
2891
+ "A warning message emitted while processing a transcription request. Warnings are surfaced on the transcript response under `metadata.warnings`.\n"
2892
+ )
2893
+ ).optional().describe(
2894
+ "Warning messages emitted while processing the request. Each warning is an object with a human-readable `message`. When there are no warnings to report, this field is omitted from the `metadata` object entirely.\n"
2895
+ )
2896
+ }).describe(
2897
+ "Additional metadata about the transcription returned on the `Transcript` object under `metadata`. Only present when there is information to report \u2014 when all of its fields would be empty, the `metadata` object is omitted from the response entirely.\n"
2898
+ ).or(import_zod3.z.null()).optional().describe(
2899
+ "Additional metadata about the transcription, including any warnings emitted while processing the request. Only present when there is information to report; if no fields would be populated, `metadata` is omitted from the response entirely.\n"
2900
+ ),
2756
2901
  multichannel: import_zod3.z.boolean().nullish().describe(
2757
2902
  "Whether [Multichannel transcription](https://www.assemblyai.com/docs/pre-recorded-audio/transcribe-multiple-audio-channels) was enabled in the transcription request, either true or false"
2758
2903
  ),
@@ -2800,12 +2945,20 @@ var getTranscriptResponse = import_zod3.z.object({
2800
2945
  "email_address",
2801
2946
  "event",
2802
2947
  "filename",
2948
+ "gender",
2803
2949
  "gender_sexuality",
2804
2950
  "healthcare_number",
2805
2951
  "injury",
2806
2952
  "ip_address",
2807
2953
  "language",
2808
2954
  "location",
2955
+ "location_address",
2956
+ "location_address_street",
2957
+ "location_city",
2958
+ "location_coordinate",
2959
+ "location_country",
2960
+ "location_state",
2961
+ "location_zip",
2809
2962
  "marital_status",
2810
2963
  "medical_condition",
2811
2964
  "medical_process",
@@ -2814,6 +2967,7 @@ var getTranscriptResponse = import_zod3.z.object({
2814
2967
  "number_sequence",
2815
2968
  "occupation",
2816
2969
  "organization",
2970
+ "organization_medical_facility",
2817
2971
  "passport_number",
2818
2972
  "password",
2819
2973
  "person_age",
@@ -2822,6 +2976,7 @@ var getTranscriptResponse = import_zod3.z.object({
2822
2976
  "physical_attribute",
2823
2977
  "political_affiliation",
2824
2978
  "religion",
2979
+ "sexuality",
2825
2980
  "statistics",
2826
2981
  "time",
2827
2982
  "url",
@@ -2829,12 +2984,17 @@ var getTranscriptResponse = import_zod3.z.object({
2829
2984
  "username",
2830
2985
  "vehicle_id",
2831
2986
  "zodiac_sign"
2832
- ]).describe("The type of PII to redact")
2987
+ ]).describe(
2988
+ "The type of PII to redact. See [PII redaction](https://www.assemblyai.com/docs/streaming/pii-redaction) for the full list of policies and their descriptions."
2989
+ )
2833
2990
  ).nullish().describe(
2834
2991
  "The list of PII Redaction policies that were enabled, if PII Redaction is enabled.\nSee [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
2835
2992
  ),
2836
2993
  redact_pii_sub: import_zod3.z.enum(["entity_name", "hash"]).optional().describe(
2837
- "The replacement logic for detected PII, can be `entity_name` or `hash`. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more details."
2994
+ "The replacement logic for detected PII, can be `entity_name` or `hash`. See [PII redaction](https://www.assemblyai.com/docs/streaming/pii-redaction) for more details."
2995
+ ),
2996
+ redact_pii_return_unredacted: import_zod3.z.boolean().nullish().describe(
2997
+ "Whether the original unredacted transcript was also returned alongside the redacted one. When `true`, the response includes `unredacted_text`, `unredacted_words`, and `unredacted_utterances`. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
2838
2998
  ),
2839
2999
  sentiment_analysis: import_zod3.z.boolean().nullish().describe(
2840
3000
  "Whether [Sentiment Analysis](https://www.assemblyai.com/docs/speech-understanding/analyze-sentiment-of-speech) is enabled, can be true or false"
@@ -2971,20 +3131,23 @@ var getTranscriptResponse = import_zod3.z.object({
2971
3131
  "The generated summary of the media file, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details."
2972
3132
  ),
2973
3133
  summary_model: import_zod3.z.string().nullish().describe(
2974
- "The Summarization model used to generate the summary,\nif [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts#summary-models) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details.\n"
3134
+ "The Summarization model used to generate the summary,\nif [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details.\n"
2975
3135
  ),
2976
3136
  summary_type: import_zod3.z.string().nullish().describe(
2977
- "The type of summary generated, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts#summary-types) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details."
3137
+ "The type of summary generated, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details."
2978
3138
  ),
2979
- remove_audio_tags: import_zod3.z.enum(["all"]).describe(
2980
- "Whether [audio event tags](https://www.assemblyai.com/docs/pre-recorded-audio/universal-3-pro#audio-event-tags) were removed from the transcript text.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n"
3139
+ remove_audio_tags: import_zod3.z.enum(["all", "speaker"]).describe(
3140
+ 'Universal-3 Pro generates rich transcripts that can include inline annotations such as audio event markers and speaker cues. Set to `"all"` to remove all inline annotations, or `"speaker"` to remove only speaker cues while keeping other annotations.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n'
2981
3141
  ).or(import_zod3.z.null()).optional().describe(
2982
- "Whether [audio event tags](https://www.assemblyai.com/docs/pre-recorded-audio/universal-3-pro#audio-event-tags) were removed from the transcript text.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n"
3142
+ 'Universal-3 Pro generates rich transcripts that can include inline annotations such as audio event markers and speaker cues. Set to `"all"` to remove all inline annotations, or `"speaker"` to remove only speaker cues while keeping other annotations.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n'
2983
3143
  ),
2984
3144
  temperature: import_zod3.z.number().nullish().describe(
2985
3145
  "The temperature that was used for the model's response. See the [Prompting Guide](https://www.assemblyai.com/docs/pre-recorded-audio/prompting) for more details.\n\nNote: This parameter can only be used with the Universal-3 Pro model.\n"
2986
3146
  ),
2987
3147
  text: import_zod3.z.string().nullish().describe("The textual transcript of your media file"),
3148
+ unredacted_text: import_zod3.z.string().nullish().describe(
3149
+ "The original textual transcript of your media file before PII redaction was applied. Only returned when `redact_pii_return_unredacted` was set to `true` on the transcription request, otherwise this field is omitted and the `text` field remains fully redacted. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
3150
+ ),
2988
3151
  throttled: import_zod3.z.boolean().nullish().describe(
2989
3152
  "True while a request is throttled and false when a request is no longer throttled"
2990
3153
  ),
@@ -3021,6 +3184,39 @@ var getTranscriptResponse = import_zod3.z.object({
3021
3184
  ).nullish().describe(
3022
3185
  "When multichannel or speaker_labels is enabled, a list of turn-by-turn utterance objects.\nSee [Speaker diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) and [Multichannel transcription](https://www.assemblyai.com/docs/pre-recorded-audio/transcribe-multiple-audio-channels) for more information.\n"
3023
3186
  ),
3187
+ unredacted_utterances: import_zod3.z.array(
3188
+ import_zod3.z.object({
3189
+ confidence: import_zod3.z.number().describe("The confidence score for the transcript of this utterance"),
3190
+ start: import_zod3.z.number().describe("The starting time, in milliseconds, of the utterance in the audio file"),
3191
+ end: import_zod3.z.number().describe("The ending time, in milliseconds, of the utterance in the audio file"),
3192
+ text: import_zod3.z.string().describe("The text for this utterance"),
3193
+ words: import_zod3.z.array(
3194
+ import_zod3.z.object({
3195
+ confidence: import_zod3.z.number().describe("The confidence score for the transcript of this word"),
3196
+ start: import_zod3.z.number().describe("The starting time, in milliseconds, for the word"),
3197
+ end: import_zod3.z.number().describe("The ending time, in milliseconds, for the word"),
3198
+ text: import_zod3.z.string().describe("The text of the word"),
3199
+ channel: import_zod3.z.string().nullish().describe(
3200
+ "The channel of the word. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
3201
+ ),
3202
+ speaker: import_zod3.z.string().nullable().describe(
3203
+ "The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) is enabled, else null"
3204
+ )
3205
+ })
3206
+ ).describe("The words in the utterance."),
3207
+ channel: import_zod3.z.string().nullish().describe(
3208
+ "The channel of this utterance. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
3209
+ ),
3210
+ speaker: import_zod3.z.string().describe(
3211
+ 'The speaker of this utterance, where each speaker is assigned a sequential capital letter - e.g. "A" for Speaker A, "B" for Speaker B, etc.'
3212
+ ),
3213
+ translated_texts: import_zod3.z.record(import_zod3.z.string(), import_zod3.z.string()).optional().describe(
3214
+ 'Translations keyed by language code (e.g., `{"es": "Texto traducido", "de": "\xDCbersetzter Text"}`). Only present when `match_original_utterance` is enabled with translation.'
3215
+ )
3216
+ })
3217
+ ).nullish().describe(
3218
+ "The original turn-by-turn utterance objects before PII redaction was applied. Same shape as `utterances`. Only returned when `redact_pii_return_unredacted` was set to `true` on the transcription request, otherwise this field is omitted and the `utterances` field remains fully redacted. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
3219
+ ),
3024
3220
  webhook_auth: import_zod3.z.boolean().describe(
3025
3221
  "Whether [webhook](https://www.assemblyai.com/docs/deployment/webhooks-for-pre-recorded-audio) authentication details were provided"
3026
3222
  ),
@@ -3049,6 +3245,22 @@ var getTranscriptResponse = import_zod3.z.object({
3049
3245
  ).nullish().describe(
3050
3246
  "An array of temporally-sequential word objects, one for each word in the transcript.\n"
3051
3247
  ),
3248
+ unredacted_words: import_zod3.z.array(
3249
+ import_zod3.z.object({
3250
+ confidence: import_zod3.z.number().describe("The confidence score for the transcript of this word"),
3251
+ start: import_zod3.z.number().describe("The starting time, in milliseconds, for the word"),
3252
+ end: import_zod3.z.number().describe("The ending time, in milliseconds, for the word"),
3253
+ text: import_zod3.z.string().describe("The text of the word"),
3254
+ channel: import_zod3.z.string().nullish().describe(
3255
+ "The channel of the word. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
3256
+ ),
3257
+ speaker: import_zod3.z.string().nullable().describe(
3258
+ "The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) is enabled, else null"
3259
+ )
3260
+ })
3261
+ ).nullish().describe(
3262
+ "The original temporally-sequential word objects before PII redaction was applied. Same shape as `words`. Only returned when `redact_pii_return_unredacted` was set to `true` on the transcription request, otherwise this field is omitted and the `words` field remains fully redacted. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
3263
+ ),
3052
3264
  acoustic_model: import_zod3.z.string().describe("This parameter does not currently have any functionality attached to it."),
3053
3265
  custom_topics: import_zod3.z.boolean().nullish().describe("This parameter does not currently have any functionality attached to it."),
3054
3266
  language_model: import_zod3.z.string().describe("This parameter does not currently have any functionality attached to it."),
@@ -3181,7 +3393,7 @@ var deleteTranscriptResponse = import_zod3.z.object({
3181
3393
  "Customize how words are spelled and formatted using to and from values. See [Custom Spelling](https://www.assemblyai.com/docs/pre-recorded-audio/correct-spelling-of-terms) for more details."
3182
3394
  ),
3183
3395
  disfluencies: import_zod3.z.boolean().nullish().describe(
3184
- 'Transcribe [Filler Words](https://www.assemblyai.com/docs/pre-recorded-audio/include-filler-words), like "umm", in your media file; can be true or false'
3396
+ 'Transcribe [Filler Words](https://www.assemblyai.com/docs/pre-recorded-audio/include-filler-words), like "umm", in your media file; can be true or false. Supported on Universal-3 Pro and Universal-2.'
3185
3397
  ),
3186
3398
  domain: import_zod3.z.string().nullish().describe(
3187
3399
  'The domain-specific model applied to the transcript. When set to `"medical-v1"`, [Medical Mode](https://www.assemblyai.com/docs/pre-recorded-audio/medical-mode) was used to improve accuracy for medical terminology.\n'
@@ -3204,12 +3416,20 @@ var deleteTranscriptResponse = import_zod3.z.object({
3204
3416
  "email_address",
3205
3417
  "event",
3206
3418
  "filename",
3419
+ "gender",
3207
3420
  "gender_sexuality",
3208
3421
  "healthcare_number",
3209
3422
  "injury",
3210
3423
  "ip_address",
3211
3424
  "language",
3212
3425
  "location",
3426
+ "location_address",
3427
+ "location_address_street",
3428
+ "location_city",
3429
+ "location_coordinate",
3430
+ "location_country",
3431
+ "location_state",
3432
+ "location_zip",
3213
3433
  "marital_status",
3214
3434
  "medical_condition",
3215
3435
  "medical_process",
@@ -3218,6 +3438,7 @@ var deleteTranscriptResponse = import_zod3.z.object({
3218
3438
  "number_sequence",
3219
3439
  "occupation",
3220
3440
  "organization",
3441
+ "organization_medical_facility",
3221
3442
  "passport_number",
3222
3443
  "password",
3223
3444
  "person_age",
@@ -3226,6 +3447,7 @@ var deleteTranscriptResponse = import_zod3.z.object({
3226
3447
  "physical_attribute",
3227
3448
  "political_affiliation",
3228
3449
  "religion",
3450
+ "sexuality",
3229
3451
  "statistics",
3230
3452
  "time",
3231
3453
  "url",
@@ -3530,6 +3752,24 @@ var deleteTranscriptResponse = import_zod3.z.object({
3530
3752
  }).optional().describe(
3531
3753
  "Specify options for [Automatic Language Detection](https://www.assemblyai.com/docs/pre-recorded-audio/language-detection)."
3532
3754
  ),
3755
+ metadata: import_zod3.z.object({
3756
+ domain_used: import_zod3.z.string().nullish().describe(
3757
+ 'The domain-specific model that was applied to the transcription (for example, `"medical-v1"` when [Medical Mode](https://www.assemblyai.com/docs/pre-recorded-audio/medical-mode) was applied), or `null` if no domain-specific model was used. Always present when `metadata` is present.\n'
3758
+ ),
3759
+ warnings: import_zod3.z.array(
3760
+ import_zod3.z.object({
3761
+ message: import_zod3.z.string().describe("A human-readable description of the warning.")
3762
+ }).describe(
3763
+ "A warning message emitted while processing a transcription request. Warnings are surfaced on the transcript response under `metadata.warnings`.\n"
3764
+ )
3765
+ ).optional().describe(
3766
+ "Warning messages emitted while processing the request. Each warning is an object with a human-readable `message`. When there are no warnings to report, this field is omitted from the `metadata` object entirely.\n"
3767
+ )
3768
+ }).describe(
3769
+ "Additional metadata about the transcription returned on the `Transcript` object under `metadata`. Only present when there is information to report \u2014 when all of its fields would be empty, the `metadata` object is omitted from the response entirely.\n"
3770
+ ).or(import_zod3.z.null()).optional().describe(
3771
+ "Additional metadata about the transcription, including any warnings emitted while processing the request. Only present when there is information to report; if no fields would be populated, `metadata` is omitted from the response entirely.\n"
3772
+ ),
3533
3773
  multichannel: import_zod3.z.boolean().nullish().describe(
3534
3774
  "Whether [Multichannel transcription](https://www.assemblyai.com/docs/pre-recorded-audio/transcribe-multiple-audio-channels) was enabled in the transcription request, either true or false"
3535
3775
  ),
@@ -3577,12 +3817,20 @@ var deleteTranscriptResponse = import_zod3.z.object({
3577
3817
  "email_address",
3578
3818
  "event",
3579
3819
  "filename",
3820
+ "gender",
3580
3821
  "gender_sexuality",
3581
3822
  "healthcare_number",
3582
3823
  "injury",
3583
3824
  "ip_address",
3584
3825
  "language",
3585
3826
  "location",
3827
+ "location_address",
3828
+ "location_address_street",
3829
+ "location_city",
3830
+ "location_coordinate",
3831
+ "location_country",
3832
+ "location_state",
3833
+ "location_zip",
3586
3834
  "marital_status",
3587
3835
  "medical_condition",
3588
3836
  "medical_process",
@@ -3591,6 +3839,7 @@ var deleteTranscriptResponse = import_zod3.z.object({
3591
3839
  "number_sequence",
3592
3840
  "occupation",
3593
3841
  "organization",
3842
+ "organization_medical_facility",
3594
3843
  "passport_number",
3595
3844
  "password",
3596
3845
  "person_age",
@@ -3599,6 +3848,7 @@ var deleteTranscriptResponse = import_zod3.z.object({
3599
3848
  "physical_attribute",
3600
3849
  "political_affiliation",
3601
3850
  "religion",
3851
+ "sexuality",
3602
3852
  "statistics",
3603
3853
  "time",
3604
3854
  "url",
@@ -3606,12 +3856,17 @@ var deleteTranscriptResponse = import_zod3.z.object({
3606
3856
  "username",
3607
3857
  "vehicle_id",
3608
3858
  "zodiac_sign"
3609
- ]).describe("The type of PII to redact")
3859
+ ]).describe(
3860
+ "The type of PII to redact. See [PII redaction](https://www.assemblyai.com/docs/streaming/pii-redaction) for the full list of policies and their descriptions."
3861
+ )
3610
3862
  ).nullish().describe(
3611
3863
  "The list of PII Redaction policies that were enabled, if PII Redaction is enabled.\nSee [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
3612
3864
  ),
3613
3865
  redact_pii_sub: import_zod3.z.enum(["entity_name", "hash"]).optional().describe(
3614
- "The replacement logic for detected PII, can be `entity_name` or `hash`. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more details."
3866
+ "The replacement logic for detected PII, can be `entity_name` or `hash`. See [PII redaction](https://www.assemblyai.com/docs/streaming/pii-redaction) for more details."
3867
+ ),
3868
+ redact_pii_return_unredacted: import_zod3.z.boolean().nullish().describe(
3869
+ "Whether the original unredacted transcript was also returned alongside the redacted one. When `true`, the response includes `unredacted_text`, `unredacted_words`, and `unredacted_utterances`. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
3615
3870
  ),
3616
3871
  sentiment_analysis: import_zod3.z.boolean().nullish().describe(
3617
3872
  "Whether [Sentiment Analysis](https://www.assemblyai.com/docs/speech-understanding/analyze-sentiment-of-speech) is enabled, can be true or false"
@@ -3748,20 +4003,23 @@ var deleteTranscriptResponse = import_zod3.z.object({
3748
4003
  "The generated summary of the media file, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details."
3749
4004
  ),
3750
4005
  summary_model: import_zod3.z.string().nullish().describe(
3751
- "The Summarization model used to generate the summary,\nif [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts#summary-models) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details.\n"
4006
+ "The Summarization model used to generate the summary,\nif [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details.\n"
3752
4007
  ),
3753
4008
  summary_type: import_zod3.z.string().nullish().describe(
3754
- "The type of summary generated, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts#summary-types) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details."
4009
+ "The type of summary generated, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details."
3755
4010
  ),
3756
- remove_audio_tags: import_zod3.z.enum(["all"]).describe(
3757
- "Whether [audio event tags](https://www.assemblyai.com/docs/pre-recorded-audio/universal-3-pro#audio-event-tags) were removed from the transcript text.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n"
4011
+ remove_audio_tags: import_zod3.z.enum(["all", "speaker"]).describe(
4012
+ 'Universal-3 Pro generates rich transcripts that can include inline annotations such as audio event markers and speaker cues. Set to `"all"` to remove all inline annotations, or `"speaker"` to remove only speaker cues while keeping other annotations.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n'
3758
4013
  ).or(import_zod3.z.null()).optional().describe(
3759
- "Whether [audio event tags](https://www.assemblyai.com/docs/pre-recorded-audio/universal-3-pro#audio-event-tags) were removed from the transcript text.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n"
4014
+ 'Universal-3 Pro generates rich transcripts that can include inline annotations such as audio event markers and speaker cues. Set to `"all"` to remove all inline annotations, or `"speaker"` to remove only speaker cues while keeping other annotations.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n'
3760
4015
  ),
3761
4016
  temperature: import_zod3.z.number().nullish().describe(
3762
4017
  "The temperature that was used for the model's response. See the [Prompting Guide](https://www.assemblyai.com/docs/pre-recorded-audio/prompting) for more details.\n\nNote: This parameter can only be used with the Universal-3 Pro model.\n"
3763
4018
  ),
3764
4019
  text: import_zod3.z.string().nullish().describe("The textual transcript of your media file"),
4020
+ unredacted_text: import_zod3.z.string().nullish().describe(
4021
+ "The original textual transcript of your media file before PII redaction was applied. Only returned when `redact_pii_return_unredacted` was set to `true` on the transcription request, otherwise this field is omitted and the `text` field remains fully redacted. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
4022
+ ),
3765
4023
  throttled: import_zod3.z.boolean().nullish().describe(
3766
4024
  "True while a request is throttled and false when a request is no longer throttled"
3767
4025
  ),
@@ -3798,6 +4056,39 @@ var deleteTranscriptResponse = import_zod3.z.object({
3798
4056
  ).nullish().describe(
3799
4057
  "When multichannel or speaker_labels is enabled, a list of turn-by-turn utterance objects.\nSee [Speaker diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) and [Multichannel transcription](https://www.assemblyai.com/docs/pre-recorded-audio/transcribe-multiple-audio-channels) for more information.\n"
3800
4058
  ),
4059
+ unredacted_utterances: import_zod3.z.array(
4060
+ import_zod3.z.object({
4061
+ confidence: import_zod3.z.number().describe("The confidence score for the transcript of this utterance"),
4062
+ start: import_zod3.z.number().describe("The starting time, in milliseconds, of the utterance in the audio file"),
4063
+ end: import_zod3.z.number().describe("The ending time, in milliseconds, of the utterance in the audio file"),
4064
+ text: import_zod3.z.string().describe("The text for this utterance"),
4065
+ words: import_zod3.z.array(
4066
+ import_zod3.z.object({
4067
+ confidence: import_zod3.z.number().describe("The confidence score for the transcript of this word"),
4068
+ start: import_zod3.z.number().describe("The starting time, in milliseconds, for the word"),
4069
+ end: import_zod3.z.number().describe("The ending time, in milliseconds, for the word"),
4070
+ text: import_zod3.z.string().describe("The text of the word"),
4071
+ channel: import_zod3.z.string().nullish().describe(
4072
+ "The channel of the word. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
4073
+ ),
4074
+ speaker: import_zod3.z.string().nullable().describe(
4075
+ "The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) is enabled, else null"
4076
+ )
4077
+ })
4078
+ ).describe("The words in the utterance."),
4079
+ channel: import_zod3.z.string().nullish().describe(
4080
+ "The channel of this utterance. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
4081
+ ),
4082
+ speaker: import_zod3.z.string().describe(
4083
+ 'The speaker of this utterance, where each speaker is assigned a sequential capital letter - e.g. "A" for Speaker A, "B" for Speaker B, etc.'
4084
+ ),
4085
+ translated_texts: import_zod3.z.record(import_zod3.z.string(), import_zod3.z.string()).optional().describe(
4086
+ 'Translations keyed by language code (e.g., `{"es": "Texto traducido", "de": "\xDCbersetzter Text"}`). Only present when `match_original_utterance` is enabled with translation.'
4087
+ )
4088
+ })
4089
+ ).nullish().describe(
4090
+ "The original turn-by-turn utterance objects before PII redaction was applied. Same shape as `utterances`. Only returned when `redact_pii_return_unredacted` was set to `true` on the transcription request, otherwise this field is omitted and the `utterances` field remains fully redacted. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
4091
+ ),
3801
4092
  webhook_auth: import_zod3.z.boolean().describe(
3802
4093
  "Whether [webhook](https://www.assemblyai.com/docs/deployment/webhooks-for-pre-recorded-audio) authentication details were provided"
3803
4094
  ),
@@ -3826,6 +4117,22 @@ var deleteTranscriptResponse = import_zod3.z.object({
3826
4117
  ).nullish().describe(
3827
4118
  "An array of temporally-sequential word objects, one for each word in the transcript.\n"
3828
4119
  ),
4120
+ unredacted_words: import_zod3.z.array(
4121
+ import_zod3.z.object({
4122
+ confidence: import_zod3.z.number().describe("The confidence score for the transcript of this word"),
4123
+ start: import_zod3.z.number().describe("The starting time, in milliseconds, for the word"),
4124
+ end: import_zod3.z.number().describe("The ending time, in milliseconds, for the word"),
4125
+ text: import_zod3.z.string().describe("The text of the word"),
4126
+ channel: import_zod3.z.string().nullish().describe(
4127
+ "The channel of the word. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
4128
+ ),
4129
+ speaker: import_zod3.z.string().nullable().describe(
4130
+ "The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) is enabled, else null"
4131
+ )
4132
+ })
4133
+ ).nullish().describe(
4134
+ "The original temporally-sequential word objects before PII redaction was applied. Same shape as `words`. Only returned when `redact_pii_return_unredacted` was set to `true` on the transcription request, otherwise this field is omitted and the `words` field remains fully redacted. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
4135
+ ),
3829
4136
  acoustic_model: import_zod3.z.string().describe("This parameter does not currently have any functionality attached to it."),
3830
4137
  custom_topics: import_zod3.z.boolean().nullish().describe("This parameter does not currently have any functionality attached to it."),
3831
4138
  language_model: import_zod3.z.string().describe("This parameter does not currently have any functionality attached to it."),
@@ -3981,7 +4288,21 @@ var streamingTranscriberParams = import_zod4.z.object({
3981
4288
  inactivityTimeout: import_zod4.z.number().optional().describe("From SDK v3"),
3982
4289
  speakerLabels: import_zod4.z.boolean().optional().describe("From SDK v3"),
3983
4290
  maxSpeakers: import_zod4.z.number().optional().describe("From SDK v3"),
3984
- llmGateway: import_zod4.z.unknown().optional().describe("From SDK v3")
4291
+ voiceFocus: import_zod4.z.unknown().optional().describe("From SDK v3"),
4292
+ voiceFocusThreshold: import_zod4.z.number().optional().describe("From SDK v3"),
4293
+ continuousPartials: import_zod4.z.boolean().optional().describe("From SDK v3"),
4294
+ interruptionDelay: import_zod4.z.number().optional().describe("From SDK v3"),
4295
+ turnLeftPadMs: import_zod4.z.number().optional().describe("From SDK v3"),
4296
+ customerSupportAudioCapture: import_zod4.z.boolean().optional().describe("From SDK v3"),
4297
+ includePartialTurns: import_zod4.z.boolean().optional().describe("From SDK v3"),
4298
+ redactPii: import_zod4.z.boolean().optional().describe("From SDK v3"),
4299
+ redactPiiPolicies: import_zod4.z.unknown().optional().describe("From SDK v3"),
4300
+ redactPiiSub: import_zod4.z.unknown().optional().describe("From SDK v3"),
4301
+ llmGateway: import_zod4.z.unknown().optional().describe("From SDK v3"),
4302
+ webhookUrl: import_zod4.z.string().optional().describe("From SDK v3"),
4303
+ webhookAuthHeaderName: import_zod4.z.string().optional().describe("From SDK v3"),
4304
+ webhookAuthHeaderValue: import_zod4.z.string().optional().describe("From SDK v3"),
4305
+ mode: import_zod4.z.unknown().describe("From SDK v3")
3985
4306
  });
3986
4307
  var streamingUpdateConfigParams = import_zod4.z.object({
3987
4308
  end_utterance_silence_threshold: import_zod4.z.number().min(0).max(2e4).optional().describe("The duration threshold in milliseconds"),
@@ -3993,7 +4314,9 @@ var streamingUpdateConfigParams = import_zod4.z.object({
3993
4314
  format_turns: import_zod4.z.boolean().optional().describe("From SDK v3"),
3994
4315
  keyterms_prompt: import_zod4.z.array(import_zod4.z.string()).optional().describe("From SDK v3"),
3995
4316
  prompt: import_zod4.z.string().optional().describe("From SDK v3"),
3996
- filter_profanity: import_zod4.z.boolean().optional().describe("From SDK v3")
4317
+ filter_profanity: import_zod4.z.boolean().optional().describe("From SDK v3"),
4318
+ interruption_delay: import_zod4.z.number().optional().describe("From SDK v3"),
4319
+ turn_left_pad_ms: import_zod4.z.number().optional().describe("From SDK v3")
3997
4320
  });
3998
4321
 
3999
4322
  // src/generated/gladia/api/gladiaControlAPI.zod.ts
@@ -4030,7 +4353,7 @@ var preRecordedControllerInitPreRecordedJobV2BodyTranslationConfigMatchOriginalU
4030
4353
  var preRecordedControllerInitPreRecordedJobV2BodyTranslationConfigLipsyncDefault = true;
4031
4354
  var preRecordedControllerInitPreRecordedJobV2BodyTranslationConfigContextAdaptationDefault = true;
4032
4355
  var preRecordedControllerInitPreRecordedJobV2BodySummarizationConfigTypeDefault = "general";
4033
- var preRecordedControllerInitPreRecordedJobV2BodyAudioToLlmConfigModelDefault = "openai/gpt-3.5-turbo";
4356
+ var preRecordedControllerInitPreRecordedJobV2BodyAudioToLlmConfigModelDefault = "openai/gpt-5.4-nano";
4034
4357
  var preRecordedControllerInitPreRecordedJobV2BodyLanguageConfigLanguagesDefault = [];
4035
4358
  var preRecordedControllerInitPreRecordedJobV2Body = import_zod5.z.object({
4036
4359
  custom_vocabulary: import_zod5.z.boolean().optional().describe(
@@ -4315,23 +4638,23 @@ var preRecordedControllerInitPreRecordedJobV2Body = import_zod5.z.object({
4315
4638
  "Forces the translation to use informal language forms when available in the target language."
4316
4639
  )
4317
4640
  }).optional().describe("**[Beta]** Translation configuration, if `translation` is enabled"),
4318
- summarization: import_zod5.z.boolean().optional().describe("**[Beta]** Enable summarization for this audio"),
4641
+ summarization: import_zod5.z.boolean().optional().describe("Enable summarization for this audio"),
4319
4642
  summarization_config: import_zod5.z.object({
4320
4643
  type: import_zod5.z.enum(["general", "bullet_points", "concise"]).describe("The type of summarization to apply").default(preRecordedControllerInitPreRecordedJobV2BodySummarizationConfigTypeDefault).describe("The type of summarization to apply")
4321
- }).optional().describe("**[Beta]** Summarization configuration, if `summarization` is enabled"),
4644
+ }).optional().describe("Summarization configuration, if `summarization` is enabled"),
4322
4645
  named_entity_recognition: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable named entity recognition for this audio"),
4323
4646
  custom_spelling: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable custom spelling for this audio"),
4324
4647
  custom_spelling_config: import_zod5.z.object({
4325
4648
  spelling_dictionary: import_zod5.z.record(import_zod5.z.string(), import_zod5.z.array(import_zod5.z.string())).describe("The list of spelling applied on the audio transcription")
4326
4649
  }).optional().describe("**[Alpha]** Custom spelling configuration, if `custom_spelling` is enabled"),
4327
4650
  sentiment_analysis: import_zod5.z.boolean().optional().describe("Enable sentiment analysis for this audio"),
4328
- audio_to_llm: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable audio to llm processing for this audio"),
4651
+ audio_to_llm: import_zod5.z.boolean().optional().describe("Enable audio to LLM processing for this audio"),
4329
4652
  audio_to_llm_config: import_zod5.z.object({
4330
4653
  prompts: import_zod5.z.array(import_zod5.z.array(import_zod5.z.unknown())).min(1).describe("The list of prompts applied on the audio transcription"),
4331
4654
  model: import_zod5.z.string().default(preRecordedControllerInitPreRecordedJobV2BodyAudioToLlmConfigModelDefault).describe(
4332
4655
  "The model to use for the prompt execution. You can find the list of supported models [here](https://openrouter.ai/models)."
4333
4656
  )
4334
- }).optional().describe("**[Alpha]** Audio to llm configuration, if `audio_to_llm` is enabled"),
4657
+ }).optional().describe("Audio to LLM configuration, if `audio_to_llm` is enabled"),
4335
4658
  pii_redaction: import_zod5.z.boolean().optional().describe("Enable PII redaction for this audio"),
4336
4659
  pii_redaction_config: import_zod5.z.object({
4337
4660
  entity_types: import_zod5.z.enum([
@@ -4573,7 +4896,7 @@ var preRecordedControllerGetPreRecordedJobsV2ResponseItemsItemRequestParamsTrans
4573
4896
  var preRecordedControllerGetPreRecordedJobsV2ResponseItemsItemRequestParamsTranslationConfigLipsyncDefault = true;
4574
4897
  var preRecordedControllerGetPreRecordedJobsV2ResponseItemsItemRequestParamsTranslationConfigContextAdaptationDefault = true;
4575
4898
  var preRecordedControllerGetPreRecordedJobsV2ResponseItemsItemRequestParamsSummarizationConfigTypeDefault = "general";
4576
- var preRecordedControllerGetPreRecordedJobsV2ResponseItemsItemRequestParamsAudioToLlmConfigModelDefault = "openai/gpt-3.5-turbo";
4899
+ var preRecordedControllerGetPreRecordedJobsV2ResponseItemsItemRequestParamsAudioToLlmConfigModelDefault = "openai/gpt-5.4-nano";
4577
4900
  var preRecordedControllerGetPreRecordedJobsV2ResponseItemsItemRequestParamsLanguageConfigLanguagesDefault = [];
4578
4901
  var preRecordedControllerGetPreRecordedJobsV2ResponseItemsItemResultTranscriptionUtterancesItemChannelMin = 0;
4579
4902
  var preRecordedControllerGetPreRecordedJobsV2ResponseItemsItemResultTranscriptionUtterancesItemSpeakerMin = 0;
@@ -4917,12 +5240,12 @@ var preRecordedControllerGetPreRecordedJobsV2Response = import_zod5.z.object({
4917
5240
  "Forces the translation to use informal language forms when available in the target language."
4918
5241
  )
4919
5242
  }).optional().describe("**[Beta]** Translation configuration, if `translation` is enabled"),
4920
- summarization: import_zod5.z.boolean().optional().describe("**[Beta]** Enable summarization for this audio"),
5243
+ summarization: import_zod5.z.boolean().optional().describe("Enable summarization for this audio"),
4921
5244
  summarization_config: import_zod5.z.object({
4922
5245
  type: import_zod5.z.enum(["general", "bullet_points", "concise"]).describe("The type of summarization to apply").default(
4923
5246
  preRecordedControllerGetPreRecordedJobsV2ResponseItemsItemRequestParamsSummarizationConfigTypeDefault
4924
5247
  ).describe("The type of summarization to apply")
4925
- }).optional().describe("**[Beta]** Summarization configuration, if `summarization` is enabled"),
5248
+ }).optional().describe("Summarization configuration, if `summarization` is enabled"),
4926
5249
  named_entity_recognition: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable named entity recognition for this audio"),
4927
5250
  custom_spelling: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable custom spelling for this audio"),
4928
5251
  custom_spelling_config: import_zod5.z.object({
@@ -4931,7 +5254,7 @@ var preRecordedControllerGetPreRecordedJobsV2Response = import_zod5.z.object({
4931
5254
  "**[Alpha]** Custom spelling configuration, if `custom_spelling` is enabled"
4932
5255
  ),
4933
5256
  sentiment_analysis: import_zod5.z.boolean().optional().describe("Enable sentiment analysis for this audio"),
4934
- audio_to_llm: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable audio to llm processing for this audio"),
5257
+ audio_to_llm: import_zod5.z.boolean().optional().describe("Enable audio to LLM processing for this audio"),
4935
5258
  audio_to_llm_config: import_zod5.z.object({
4936
5259
  prompts: import_zod5.z.array(import_zod5.z.array(import_zod5.z.unknown())).min(1).describe("The list of prompts applied on the audio transcription"),
4937
5260
  model: import_zod5.z.string().default(
@@ -4939,7 +5262,7 @@ var preRecordedControllerGetPreRecordedJobsV2Response = import_zod5.z.object({
4939
5262
  ).describe(
4940
5263
  "The model to use for the prompt execution. You can find the list of supported models [here](https://openrouter.ai/models)."
4941
5264
  )
4942
- }).optional().describe("**[Alpha]** Audio to llm configuration, if `audio_to_llm` is enabled"),
5265
+ }).optional().describe("Audio to LLM configuration, if `audio_to_llm` is enabled"),
4943
5266
  pii_redaction: import_zod5.z.boolean().optional().describe("Enable PII redaction for this audio"),
4944
5267
  pii_redaction_config: import_zod5.z.object({
4945
5268
  entity_types: import_zod5.z.enum([
@@ -6064,7 +6387,7 @@ var preRecordedControllerGetPreRecordedJobV2ResponseRequestParamsTranslationConf
6064
6387
  var preRecordedControllerGetPreRecordedJobV2ResponseRequestParamsTranslationConfigLipsyncDefault = true;
6065
6388
  var preRecordedControllerGetPreRecordedJobV2ResponseRequestParamsTranslationConfigContextAdaptationDefault = true;
6066
6389
  var preRecordedControllerGetPreRecordedJobV2ResponseRequestParamsSummarizationConfigTypeDefault = "general";
6067
- var preRecordedControllerGetPreRecordedJobV2ResponseRequestParamsAudioToLlmConfigModelDefault = "openai/gpt-3.5-turbo";
6390
+ var preRecordedControllerGetPreRecordedJobV2ResponseRequestParamsAudioToLlmConfigModelDefault = "openai/gpt-5.4-nano";
6068
6391
  var preRecordedControllerGetPreRecordedJobV2ResponseRequestParamsLanguageConfigLanguagesDefault = [];
6069
6392
  var preRecordedControllerGetPreRecordedJobV2ResponseResultTranscriptionUtterancesItemChannelMin = 0;
6070
6393
  var preRecordedControllerGetPreRecordedJobV2ResponseResultTranscriptionUtterancesItemSpeakerMin = 0;
@@ -6401,19 +6724,19 @@ var preRecordedControllerGetPreRecordedJobV2Response = import_zod5.z.object({
6401
6724
  "Forces the translation to use informal language forms when available in the target language."
6402
6725
  )
6403
6726
  }).optional().describe("**[Beta]** Translation configuration, if `translation` is enabled"),
6404
- summarization: import_zod5.z.boolean().optional().describe("**[Beta]** Enable summarization for this audio"),
6727
+ summarization: import_zod5.z.boolean().optional().describe("Enable summarization for this audio"),
6405
6728
  summarization_config: import_zod5.z.object({
6406
6729
  type: import_zod5.z.enum(["general", "bullet_points", "concise"]).describe("The type of summarization to apply").default(
6407
6730
  preRecordedControllerGetPreRecordedJobV2ResponseRequestParamsSummarizationConfigTypeDefault
6408
6731
  ).describe("The type of summarization to apply")
6409
- }).optional().describe("**[Beta]** Summarization configuration, if `summarization` is enabled"),
6732
+ }).optional().describe("Summarization configuration, if `summarization` is enabled"),
6410
6733
  named_entity_recognition: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable named entity recognition for this audio"),
6411
6734
  custom_spelling: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable custom spelling for this audio"),
6412
6735
  custom_spelling_config: import_zod5.z.object({
6413
6736
  spelling_dictionary: import_zod5.z.record(import_zod5.z.string(), import_zod5.z.array(import_zod5.z.string())).describe("The list of spelling applied on the audio transcription")
6414
6737
  }).optional().describe("**[Alpha]** Custom spelling configuration, if `custom_spelling` is enabled"),
6415
6738
  sentiment_analysis: import_zod5.z.boolean().optional().describe("Enable sentiment analysis for this audio"),
6416
- audio_to_llm: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable audio to llm processing for this audio"),
6739
+ audio_to_llm: import_zod5.z.boolean().optional().describe("Enable audio to LLM processing for this audio"),
6417
6740
  audio_to_llm_config: import_zod5.z.object({
6418
6741
  prompts: import_zod5.z.array(import_zod5.z.array(import_zod5.z.unknown())).min(1).describe("The list of prompts applied on the audio transcription"),
6419
6742
  model: import_zod5.z.string().default(
@@ -6421,7 +6744,7 @@ var preRecordedControllerGetPreRecordedJobV2Response = import_zod5.z.object({
6421
6744
  ).describe(
6422
6745
  "The model to use for the prompt execution. You can find the list of supported models [here](https://openrouter.ai/models)."
6423
6746
  )
6424
- }).optional().describe("**[Alpha]** Audio to llm configuration, if `audio_to_llm` is enabled"),
6747
+ }).optional().describe("Audio to LLM configuration, if `audio_to_llm` is enabled"),
6425
6748
  pii_redaction: import_zod5.z.boolean().optional().describe("Enable PII redaction for this audio"),
6426
6749
  pii_redaction_config: import_zod5.z.object({
6427
6750
  entity_types: import_zod5.z.enum([
@@ -7523,7 +7846,7 @@ var transcriptionControllerInitPreRecordedJobV2BodyTranslationConfigMatchOrigina
7523
7846
  var transcriptionControllerInitPreRecordedJobV2BodyTranslationConfigLipsyncDefault = true;
7524
7847
  var transcriptionControllerInitPreRecordedJobV2BodyTranslationConfigContextAdaptationDefault = true;
7525
7848
  var transcriptionControllerInitPreRecordedJobV2BodySummarizationConfigTypeDefault = "general";
7526
- var transcriptionControllerInitPreRecordedJobV2BodyAudioToLlmConfigModelDefault = "openai/gpt-3.5-turbo";
7849
+ var transcriptionControllerInitPreRecordedJobV2BodyAudioToLlmConfigModelDefault = "openai/gpt-5.4-nano";
7527
7850
  var transcriptionControllerInitPreRecordedJobV2BodyLanguageConfigLanguagesDefault = [];
7528
7851
  var transcriptionControllerInitPreRecordedJobV2Body = import_zod5.z.object({
7529
7852
  custom_vocabulary: import_zod5.z.boolean().optional().describe(
@@ -7812,23 +8135,23 @@ var transcriptionControllerInitPreRecordedJobV2Body = import_zod5.z.object({
7812
8135
  "Forces the translation to use informal language forms when available in the target language."
7813
8136
  )
7814
8137
  }).optional().describe("**[Beta]** Translation configuration, if `translation` is enabled"),
7815
- summarization: import_zod5.z.boolean().optional().describe("**[Beta]** Enable summarization for this audio"),
8138
+ summarization: import_zod5.z.boolean().optional().describe("Enable summarization for this audio"),
7816
8139
  summarization_config: import_zod5.z.object({
7817
8140
  type: import_zod5.z.enum(["general", "bullet_points", "concise"]).describe("The type of summarization to apply").default(transcriptionControllerInitPreRecordedJobV2BodySummarizationConfigTypeDefault).describe("The type of summarization to apply")
7818
- }).optional().describe("**[Beta]** Summarization configuration, if `summarization` is enabled"),
8141
+ }).optional().describe("Summarization configuration, if `summarization` is enabled"),
7819
8142
  named_entity_recognition: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable named entity recognition for this audio"),
7820
8143
  custom_spelling: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable custom spelling for this audio"),
7821
8144
  custom_spelling_config: import_zod5.z.object({
7822
8145
  spelling_dictionary: import_zod5.z.record(import_zod5.z.string(), import_zod5.z.array(import_zod5.z.string())).describe("The list of spelling applied on the audio transcription")
7823
8146
  }).optional().describe("**[Alpha]** Custom spelling configuration, if `custom_spelling` is enabled"),
7824
8147
  sentiment_analysis: import_zod5.z.boolean().optional().describe("Enable sentiment analysis for this audio"),
7825
- audio_to_llm: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable audio to llm processing for this audio"),
8148
+ audio_to_llm: import_zod5.z.boolean().optional().describe("Enable audio to LLM processing for this audio"),
7826
8149
  audio_to_llm_config: import_zod5.z.object({
7827
8150
  prompts: import_zod5.z.array(import_zod5.z.array(import_zod5.z.unknown())).min(1).describe("The list of prompts applied on the audio transcription"),
7828
8151
  model: import_zod5.z.string().default(transcriptionControllerInitPreRecordedJobV2BodyAudioToLlmConfigModelDefault).describe(
7829
8152
  "The model to use for the prompt execution. You can find the list of supported models [here](https://openrouter.ai/models)."
7830
8153
  )
7831
- }).optional().describe("**[Alpha]** Audio to llm configuration, if `audio_to_llm` is enabled"),
8154
+ }).optional().describe("Audio to LLM configuration, if `audio_to_llm` is enabled"),
7832
8155
  pii_redaction: import_zod5.z.boolean().optional().describe("Enable PII redaction for this audio"),
7833
8156
  pii_redaction_config: import_zod5.z.object({
7834
8157
  entity_types: import_zod5.z.enum([
@@ -8073,7 +8396,7 @@ var transcriptionControllerListV2ResponseItemsItemRequestParamsTranslationConfig
8073
8396
  var transcriptionControllerListV2ResponseItemsItemRequestParamsTranslationConfigLipsyncDefault = true;
8074
8397
  var transcriptionControllerListV2ResponseItemsItemRequestParamsTranslationConfigContextAdaptationDefault = true;
8075
8398
  var transcriptionControllerListV2ResponseItemsItemRequestParamsSummarizationConfigTypeDefault = "general";
8076
- var transcriptionControllerListV2ResponseItemsItemRequestParamsAudioToLlmConfigModelDefault = "openai/gpt-3.5-turbo";
8399
+ var transcriptionControllerListV2ResponseItemsItemRequestParamsAudioToLlmConfigModelDefault = "openai/gpt-5.4-nano";
8077
8400
  var transcriptionControllerListV2ResponseItemsItemRequestParamsLanguageConfigLanguagesDefault = [];
8078
8401
  var transcriptionControllerListV2ResponseItemsItemResultTranscriptionUtterancesItemChannelMin = 0;
8079
8402
  var transcriptionControllerListV2ResponseItemsItemResultTranscriptionUtterancesItemSpeakerMin = 0;
@@ -8462,12 +8785,12 @@ var transcriptionControllerListV2Response = import_zod5.z.object({
8462
8785
  "Forces the translation to use informal language forms when available in the target language."
8463
8786
  )
8464
8787
  }).optional().describe("**[Beta]** Translation configuration, if `translation` is enabled"),
8465
- summarization: import_zod5.z.boolean().optional().describe("**[Beta]** Enable summarization for this audio"),
8788
+ summarization: import_zod5.z.boolean().optional().describe("Enable summarization for this audio"),
8466
8789
  summarization_config: import_zod5.z.object({
8467
8790
  type: import_zod5.z.enum(["general", "bullet_points", "concise"]).describe("The type of summarization to apply").default(
8468
8791
  transcriptionControllerListV2ResponseItemsItemRequestParamsSummarizationConfigTypeDefault
8469
8792
  ).describe("The type of summarization to apply")
8470
- }).optional().describe("**[Beta]** Summarization configuration, if `summarization` is enabled"),
8793
+ }).optional().describe("Summarization configuration, if `summarization` is enabled"),
8471
8794
  named_entity_recognition: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable named entity recognition for this audio"),
8472
8795
  custom_spelling: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable custom spelling for this audio"),
8473
8796
  custom_spelling_config: import_zod5.z.object({
@@ -8476,7 +8799,7 @@ var transcriptionControllerListV2Response = import_zod5.z.object({
8476
8799
  "**[Alpha]** Custom spelling configuration, if `custom_spelling` is enabled"
8477
8800
  ),
8478
8801
  sentiment_analysis: import_zod5.z.boolean().optional().describe("Enable sentiment analysis for this audio"),
8479
- audio_to_llm: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable audio to llm processing for this audio"),
8802
+ audio_to_llm: import_zod5.z.boolean().optional().describe("Enable audio to LLM processing for this audio"),
8480
8803
  audio_to_llm_config: import_zod5.z.object({
8481
8804
  prompts: import_zod5.z.array(import_zod5.z.array(import_zod5.z.unknown())).min(1).describe("The list of prompts applied on the audio transcription"),
8482
8805
  model: import_zod5.z.string().default(
@@ -8484,7 +8807,7 @@ var transcriptionControllerListV2Response = import_zod5.z.object({
8484
8807
  ).describe(
8485
8808
  "The model to use for the prompt execution. You can find the list of supported models [here](https://openrouter.ai/models)."
8486
8809
  )
8487
- }).optional().describe("**[Alpha]** Audio to llm configuration, if `audio_to_llm` is enabled"),
8810
+ }).optional().describe("Audio to LLM configuration, if `audio_to_llm` is enabled"),
8488
8811
  pii_redaction: import_zod5.z.boolean().optional().describe("Enable PII redaction for this audio"),
8489
8812
  pii_redaction_config: import_zod5.z.object({
8490
8813
  entity_types: import_zod5.z.enum([
@@ -10790,7 +11113,7 @@ var transcriptionControllerGetTranscriptV2ResponseRequestParamsTranslationConfig
10790
11113
  var transcriptionControllerGetTranscriptV2ResponseRequestParamsTranslationConfigLipsyncDefault = true;
10791
11114
  var transcriptionControllerGetTranscriptV2ResponseRequestParamsTranslationConfigContextAdaptationDefault = true;
10792
11115
  var transcriptionControllerGetTranscriptV2ResponseRequestParamsSummarizationConfigTypeDefault = "general";
10793
- var transcriptionControllerGetTranscriptV2ResponseRequestParamsAudioToLlmConfigModelDefault = "openai/gpt-3.5-turbo";
11116
+ var transcriptionControllerGetTranscriptV2ResponseRequestParamsAudioToLlmConfigModelDefault = "openai/gpt-5.4-nano";
10794
11117
  var transcriptionControllerGetTranscriptV2ResponseRequestParamsLanguageConfigLanguagesDefault = [];
10795
11118
  var transcriptionControllerGetTranscriptV2ResponseResultTranscriptionUtterancesItemChannelMin = 0;
10796
11119
  var transcriptionControllerGetTranscriptV2ResponseResultTranscriptionUtterancesItemSpeakerMin = 0;
@@ -11173,19 +11496,19 @@ var transcriptionControllerGetTranscriptV2Response = import_zod5.z.discriminated
11173
11496
  "Forces the translation to use informal language forms when available in the target language."
11174
11497
  )
11175
11498
  }).optional().describe("**[Beta]** Translation configuration, if `translation` is enabled"),
11176
- summarization: import_zod5.z.boolean().optional().describe("**[Beta]** Enable summarization for this audio"),
11499
+ summarization: import_zod5.z.boolean().optional().describe("Enable summarization for this audio"),
11177
11500
  summarization_config: import_zod5.z.object({
11178
11501
  type: import_zod5.z.enum(["general", "bullet_points", "concise"]).describe("The type of summarization to apply").default(
11179
11502
  transcriptionControllerGetTranscriptV2ResponseRequestParamsSummarizationConfigTypeDefault
11180
11503
  ).describe("The type of summarization to apply")
11181
- }).optional().describe("**[Beta]** Summarization configuration, if `summarization` is enabled"),
11504
+ }).optional().describe("Summarization configuration, if `summarization` is enabled"),
11182
11505
  named_entity_recognition: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable named entity recognition for this audio"),
11183
11506
  custom_spelling: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable custom spelling for this audio"),
11184
11507
  custom_spelling_config: import_zod5.z.object({
11185
11508
  spelling_dictionary: import_zod5.z.record(import_zod5.z.string(), import_zod5.z.array(import_zod5.z.string())).describe("The list of spelling applied on the audio transcription")
11186
11509
  }).optional().describe("**[Alpha]** Custom spelling configuration, if `custom_spelling` is enabled"),
11187
11510
  sentiment_analysis: import_zod5.z.boolean().optional().describe("Enable sentiment analysis for this audio"),
11188
- audio_to_llm: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable audio to llm processing for this audio"),
11511
+ audio_to_llm: import_zod5.z.boolean().optional().describe("Enable audio to LLM processing for this audio"),
11189
11512
  audio_to_llm_config: import_zod5.z.object({
11190
11513
  prompts: import_zod5.z.array(import_zod5.z.array(import_zod5.z.unknown())).min(1).describe("The list of prompts applied on the audio transcription"),
11191
11514
  model: import_zod5.z.string().default(
@@ -11193,7 +11516,7 @@ var transcriptionControllerGetTranscriptV2Response = import_zod5.z.discriminated
11193
11516
  ).describe(
11194
11517
  "The model to use for the prompt execution. You can find the list of supported models [here](https://openrouter.ai/models)."
11195
11518
  )
11196
- }).optional().describe("**[Alpha]** Audio to llm configuration, if `audio_to_llm` is enabled"),
11519
+ }).optional().describe("Audio to LLM configuration, if `audio_to_llm` is enabled"),
11197
11520
  pii_redaction: import_zod5.z.boolean().optional().describe("Enable PII redaction for this audio"),
11198
11521
  pii_redaction_config: import_zod5.z.object({
11199
11522
  entity_types: import_zod5.z.enum([
@@ -13884,7 +14207,7 @@ var historyControllerGetListV1ResponseItemsItemRequestParamsTranslationConfigMat
13884
14207
  var historyControllerGetListV1ResponseItemsItemRequestParamsTranslationConfigLipsyncDefault = true;
13885
14208
  var historyControllerGetListV1ResponseItemsItemRequestParamsTranslationConfigContextAdaptationDefault = true;
13886
14209
  var historyControllerGetListV1ResponseItemsItemRequestParamsSummarizationConfigTypeDefault = "general";
13887
- var historyControllerGetListV1ResponseItemsItemRequestParamsAudioToLlmConfigModelDefault = "openai/gpt-3.5-turbo";
14210
+ var historyControllerGetListV1ResponseItemsItemRequestParamsAudioToLlmConfigModelDefault = "openai/gpt-5.4-nano";
13888
14211
  var historyControllerGetListV1ResponseItemsItemRequestParamsLanguageConfigLanguagesDefault = [];
13889
14212
  var historyControllerGetListV1ResponseItemsItemResultTranscriptionUtterancesItemChannelMin = 0;
13890
14213
  var historyControllerGetListV1ResponseItemsItemResultTranscriptionUtterancesItemSpeakerMin = 0;
@@ -14273,12 +14596,12 @@ var historyControllerGetListV1Response = import_zod5.z.object({
14273
14596
  "Forces the translation to use informal language forms when available in the target language."
14274
14597
  )
14275
14598
  }).optional().describe("**[Beta]** Translation configuration, if `translation` is enabled"),
14276
- summarization: import_zod5.z.boolean().optional().describe("**[Beta]** Enable summarization for this audio"),
14599
+ summarization: import_zod5.z.boolean().optional().describe("Enable summarization for this audio"),
14277
14600
  summarization_config: import_zod5.z.object({
14278
14601
  type: import_zod5.z.enum(["general", "bullet_points", "concise"]).describe("The type of summarization to apply").default(
14279
14602
  historyControllerGetListV1ResponseItemsItemRequestParamsSummarizationConfigTypeDefault
14280
14603
  ).describe("The type of summarization to apply")
14281
- }).optional().describe("**[Beta]** Summarization configuration, if `summarization` is enabled"),
14604
+ }).optional().describe("Summarization configuration, if `summarization` is enabled"),
14282
14605
  named_entity_recognition: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable named entity recognition for this audio"),
14283
14606
  custom_spelling: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable custom spelling for this audio"),
14284
14607
  custom_spelling_config: import_zod5.z.object({
@@ -14287,7 +14610,7 @@ var historyControllerGetListV1Response = import_zod5.z.object({
14287
14610
  "**[Alpha]** Custom spelling configuration, if `custom_spelling` is enabled"
14288
14611
  ),
14289
14612
  sentiment_analysis: import_zod5.z.boolean().optional().describe("Enable sentiment analysis for this audio"),
14290
- audio_to_llm: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable audio to llm processing for this audio"),
14613
+ audio_to_llm: import_zod5.z.boolean().optional().describe("Enable audio to LLM processing for this audio"),
14291
14614
  audio_to_llm_config: import_zod5.z.object({
14292
14615
  prompts: import_zod5.z.array(import_zod5.z.array(import_zod5.z.unknown())).min(1).describe("The list of prompts applied on the audio transcription"),
14293
14616
  model: import_zod5.z.string().default(
@@ -14295,7 +14618,7 @@ var historyControllerGetListV1Response = import_zod5.z.object({
14295
14618
  ).describe(
14296
14619
  "The model to use for the prompt execution. You can find the list of supported models [here](https://openrouter.ai/models)."
14297
14620
  )
14298
- }).optional().describe("**[Alpha]** Audio to llm configuration, if `audio_to_llm` is enabled"),
14621
+ }).optional().describe("Audio to LLM configuration, if `audio_to_llm` is enabled"),
14299
14622
  pii_redaction: import_zod5.z.boolean().optional().describe("Enable PII redaction for this audio"),
14300
14623
  pii_redaction_config: import_zod5.z.object({
14301
14624
  entity_types: import_zod5.z.enum([
@@ -19748,6 +20071,7 @@ var createRealtimeClientSecretBodySessionAudioOutputSpeedDefault = 1;
19748
20071
  var createRealtimeClientSecretBodySessionAudioOutputSpeedMin = 0.25;
19749
20072
  var createRealtimeClientSecretBodySessionAudioOutputSpeedMax = 1.5;
19750
20073
  var createRealtimeClientSecretBodySessionToolChoiceDefault = "auto";
20074
+ var createRealtimeClientSecretBodySessionReasoningEffortDefault = "low";
19751
20075
  var createRealtimeClientSecretBodySessionTruncationRetentionRatioMin = 0;
19752
20076
  var createRealtimeClientSecretBodySessionTruncationRetentionRatioMax = 1;
19753
20077
  var createRealtimeClientSecretBodySessionTruncationTokenLimitsPostInstructionsMin = 0;
@@ -19778,6 +20102,7 @@ var createRealtimeClientSecretBody = import_zod6.z.object({
19778
20102
  import_zod6.z.enum([
19779
20103
  "gpt-realtime",
19780
20104
  "gpt-realtime-1.5",
20105
+ "gpt-realtime-2",
19781
20106
  "gpt-realtime-2025-08-28",
19782
20107
  "gpt-4o-realtime-preview",
19783
20108
  "gpt-4o-realtime-preview-2024-10-01",
@@ -19818,16 +20143,20 @@ var createRealtimeClientSecretBody = import_zod6.z.object({
19818
20143
  "gpt-4o-mini-transcribe",
19819
20144
  "gpt-4o-mini-transcribe-2025-12-15",
19820
20145
  "gpt-4o-transcribe",
19821
- "gpt-4o-transcribe-diarize"
20146
+ "gpt-4o-transcribe-diarize",
20147
+ "gpt-realtime-whisper"
19822
20148
  ])
19823
20149
  ).optional().describe(
19824
- "The model to use for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`, and `gpt-4o-transcribe-diarize`. Use `gpt-4o-transcribe-diarize` when you need diarization with speaker labels.\n"
20150
+ "The model to use for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`, `gpt-4o-transcribe-diarize`, and `gpt-realtime-whisper`. Use `gpt-4o-transcribe-diarize` when you need diarization with speaker labels.\n"
19825
20151
  ),
19826
20152
  language: import_zod6.z.string().optional().describe(
19827
20153
  "The language of the input audio. Supplying the input language in\n[ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`) format\nwill improve accuracy and latency.\n"
19828
20154
  ),
19829
20155
  prompt: import_zod6.z.string().optional().describe(
19830
- 'An optional text to guide the model\'s style or continue a previous audio\nsegment.\nFor `whisper-1`, the [prompt is a list of keywords](/docs/guides/speech-to-text#prompting).\nFor `gpt-4o-transcribe` models (excluding `gpt-4o-transcribe-diarize`), the prompt is a free text string, for example "expect words related to technology".\n'
20156
+ 'An optional text to guide the model\'s style or continue a previous audio\nsegment.\nFor `whisper-1`, the [prompt is a list of keywords](/docs/guides/speech-to-text#prompting).\nFor `gpt-4o-transcribe` models (excluding `gpt-4o-transcribe-diarize`), the prompt is a free text string, for example "expect words related to technology".\nPrompt is not supported with `gpt-realtime-whisper` in GA Realtime sessions.\n'
20157
+ ),
20158
+ delay: import_zod6.z.enum(["minimal", "low", "medium", "high", "xhigh"]).optional().describe(
20159
+ "Controls how long the model waits before emitting transcription text.\nHigher values can improve transcription accuracy at the cost of latency.\nOnly supported with `gpt-realtime-whisper` in GA Realtime sessions.\n"
19831
20160
  )
19832
20161
  }).optional(),
19833
20162
  noise_reduction: import_zod6.z.object({
@@ -19894,7 +20223,7 @@ var createRealtimeClientSecretBody = import_zod6.z.object({
19894
20223
  "Server-side semantic turn detection which uses a model to determine when the user has finished speaking."
19895
20224
  )
19896
20225
  ]).describe(
19897
- 'Configuration for turn detection, ether Server VAD or Semantic VAD. This can be set to `null` to turn off, in which case the client must manually trigger model response.\n\nServer VAD means that the model will detect the start and end of speech based on audio volume and respond at the end of user speech.\n\nSemantic VAD is more advanced and uses a turn detection model (in conjunction with VAD) to semantically estimate whether the user has finished speaking, then dynamically sets a timeout based on this probability. For example, if user audio trails off with "uhhm", the model will score a low probability of turn end and wait longer for the user to continue speaking. This can be useful for more natural conversations, but may have a higher latency.\n'
20226
+ 'Configuration for turn detection, ether Server VAD or Semantic VAD. This can be set to `null` to turn off, in which case the client must manually trigger model response.\n\nServer VAD means that the model will detect the start and end of speech based on audio volume and respond at the end of user speech.\n\nSemantic VAD is more advanced and uses a turn detection model (in conjunction with VAD) to semantically estimate whether the user has finished speaking, then dynamically sets a timeout based on this probability. For example, if user audio trails off with "uhhm", the model will score a low probability of turn end and wait longer for the user to continue speaking. This can be useful for more natural conversations, but may have a higher latency.\n\nFor `gpt-realtime-whisper` transcription sessions, turn detection must be\nset to `null`; VAD is not supported.\n'
19898
20227
  ).or(import_zod6.z.null()).optional()
19899
20228
  }).optional(),
19900
20229
  output: import_zod6.z.object({
@@ -19967,7 +20296,7 @@ var createRealtimeClientSecretBody = import_zod6.z.object({
19967
20296
  server_label: import_zod6.z.string().describe(
19968
20297
  "A label for this MCP server, used to identify it in tool calls.\n"
19969
20298
  ),
19970
- server_url: import_zod6.z.string().optional().describe(
20299
+ server_url: import_zod6.z.string().url().optional().describe(
19971
20300
  "The URL for the MCP server. One of `server_url` or `connector_id` must be\nprovided.\n"
19972
20301
  ),
19973
20302
  connector_id: import_zod6.z.enum([
@@ -20045,6 +20374,16 @@ var createRealtimeClientSecretBody = import_zod6.z.object({
20045
20374
  ).default(createRealtimeClientSecretBodySessionToolChoiceDefault).describe(
20046
20375
  "How the model chooses tools. Provide one of the string modes or force a specific\nfunction/MCP tool.\n"
20047
20376
  ),
20377
+ parallel_tool_calls: import_zod6.z.boolean().optional().describe(
20378
+ "Whether the model may call multiple tools in parallel. Only supported by\nreasoning Realtime models such as `gpt-realtime-2`.\n"
20379
+ ),
20380
+ reasoning: import_zod6.z.object({
20381
+ effort: import_zod6.z.enum(["minimal", "low", "medium", "high", "xhigh"]).default(createRealtimeClientSecretBodySessionReasoningEffortDefault).describe(
20382
+ "Constrains effort on reasoning for reasoning-capable Realtime models such as\n`gpt-realtime-2`.\n"
20383
+ )
20384
+ }).optional().describe(
20385
+ "Configuration for reasoning-capable Realtime models such as `gpt-realtime-2`.\n"
20386
+ ),
20048
20387
  max_output_tokens: import_zod6.z.number().or(import_zod6.z.enum(["inf"])).optional().describe(
20049
20388
  "Maximum number of output tokens for a single assistant response,\ninclusive of tool calls. Provide an integer between 1 and 4096 to\nlimit output tokens, or `inf` for the maximum available tokens for a\ngiven model. Defaults to `inf`.\n"
20050
20389
  ),
@@ -20084,7 +20423,7 @@ var createRealtimeClientSecretBody = import_zod6.z.object({
20084
20423
  ).or(
20085
20424
  import_zod6.z.object({
20086
20425
  type: import_zod6.z.enum(["input_image"]).describe("The type of the input item. Always `input_image`."),
20087
- image_url: import_zod6.z.string().describe(
20426
+ image_url: import_zod6.z.string().url().describe(
20088
20427
  "The URL of the image to be sent to the model. A fully qualified URL or base64 encoded image in a data URL."
20089
20428
  ).or(import_zod6.z.null()).optional(),
20090
20429
  file_id: import_zod6.z.string().describe("The ID of the file to be sent to the model.").or(import_zod6.z.null()).optional(),
@@ -20098,7 +20437,7 @@ var createRealtimeClientSecretBody = import_zod6.z.object({
20098
20437
  file_id: import_zod6.z.string().describe("The ID of the file to be sent to the model.").or(import_zod6.z.null()).optional(),
20099
20438
  filename: import_zod6.z.string().optional().describe("The name of the file to be sent to the model."),
20100
20439
  file_data: import_zod6.z.string().optional().describe("The content of the file to be sent to the model.\n"),
20101
- file_url: import_zod6.z.string().optional().describe("The URL of the file to be sent to the model."),
20440
+ file_url: import_zod6.z.string().url().optional().describe("The URL of the file to be sent to the model."),
20102
20441
  detail: import_zod6.z.enum(["low", "high"]).optional()
20103
20442
  }).describe("A file input to the model.")
20104
20443
  )
@@ -20134,16 +20473,20 @@ var createRealtimeClientSecretBody = import_zod6.z.object({
20134
20473
  "gpt-4o-mini-transcribe",
20135
20474
  "gpt-4o-mini-transcribe-2025-12-15",
20136
20475
  "gpt-4o-transcribe",
20137
- "gpt-4o-transcribe-diarize"
20476
+ "gpt-4o-transcribe-diarize",
20477
+ "gpt-realtime-whisper"
20138
20478
  ])
20139
20479
  ).optional().describe(
20140
- "The model to use for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`, and `gpt-4o-transcribe-diarize`. Use `gpt-4o-transcribe-diarize` when you need diarization with speaker labels.\n"
20480
+ "The model to use for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`, `gpt-4o-transcribe-diarize`, and `gpt-realtime-whisper`. Use `gpt-4o-transcribe-diarize` when you need diarization with speaker labels.\n"
20141
20481
  ),
20142
20482
  language: import_zod6.z.string().optional().describe(
20143
20483
  "The language of the input audio. Supplying the input language in\n[ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`) format\nwill improve accuracy and latency.\n"
20144
20484
  ),
20145
20485
  prompt: import_zod6.z.string().optional().describe(
20146
- 'An optional text to guide the model\'s style or continue a previous audio\nsegment.\nFor `whisper-1`, the [prompt is a list of keywords](/docs/guides/speech-to-text#prompting).\nFor `gpt-4o-transcribe` models (excluding `gpt-4o-transcribe-diarize`), the prompt is a free text string, for example "expect words related to technology".\n'
20486
+ 'An optional text to guide the model\'s style or continue a previous audio\nsegment.\nFor `whisper-1`, the [prompt is a list of keywords](/docs/guides/speech-to-text#prompting).\nFor `gpt-4o-transcribe` models (excluding `gpt-4o-transcribe-diarize`), the prompt is a free text string, for example "expect words related to technology".\nPrompt is not supported with `gpt-realtime-whisper` in GA Realtime sessions.\n'
20487
+ ),
20488
+ delay: import_zod6.z.enum(["minimal", "low", "medium", "high", "xhigh"]).optional().describe(
20489
+ "Controls how long the model waits before emitting transcription text.\nHigher values can improve transcription accuracy at the cost of latency.\nOnly supported with `gpt-realtime-whisper` in GA Realtime sessions.\n"
20147
20490
  )
20148
20491
  }).optional(),
20149
20492
  noise_reduction: import_zod6.z.object({
@@ -20210,7 +20553,7 @@ var createRealtimeClientSecretBody = import_zod6.z.object({
20210
20553
  "Server-side semantic turn detection which uses a model to determine when the user has finished speaking."
20211
20554
  )
20212
20555
  ]).describe(
20213
- 'Configuration for turn detection, ether Server VAD or Semantic VAD. This can be set to `null` to turn off, in which case the client must manually trigger model response.\n\nServer VAD means that the model will detect the start and end of speech based on audio volume and respond at the end of user speech.\n\nSemantic VAD is more advanced and uses a turn detection model (in conjunction with VAD) to semantically estimate whether the user has finished speaking, then dynamically sets a timeout based on this probability. For example, if user audio trails off with "uhhm", the model will score a low probability of turn end and wait longer for the user to continue speaking. This can be useful for more natural conversations, but may have a higher latency.\n'
20556
+ 'Configuration for turn detection, ether Server VAD or Semantic VAD. This can be set to `null` to turn off, in which case the client must manually trigger model response.\n\nServer VAD means that the model will detect the start and end of speech based on audio volume and respond at the end of user speech.\n\nSemantic VAD is more advanced and uses a turn detection model (in conjunction with VAD) to semantically estimate whether the user has finished speaking, then dynamically sets a timeout based on this probability. For example, if user audio trails off with "uhhm", the model will score a low probability of turn end and wait longer for the user to continue speaking. This can be useful for more natural conversations, but may have a higher latency.\n\nFor `gpt-realtime-whisper` transcription sessions, turn detection must be\nset to `null`; VAD is not supported.\n'
20214
20557
  ).or(import_zod6.z.null()).optional()
20215
20558
  }).optional()
20216
20559
  }).optional().describe("Configuration for input and output audio.\n"),
@@ -20236,23 +20579,21 @@ var createRealtimeClientSecretResponseSessionAudioOutputSpeedDefault = 1;
20236
20579
  var createRealtimeClientSecretResponseSessionAudioOutputSpeedMin = 0.25;
20237
20580
  var createRealtimeClientSecretResponseSessionAudioOutputSpeedMax = 1.5;
20238
20581
  var createRealtimeClientSecretResponseSessionToolChoiceDefault = "auto";
20582
+ var createRealtimeClientSecretResponseSessionReasoningEffortDefault = "low";
20239
20583
  var createRealtimeClientSecretResponseSessionTruncationRetentionRatioMin = 0;
20240
20584
  var createRealtimeClientSecretResponseSessionTruncationRetentionRatioMax = 1;
20241
20585
  var createRealtimeClientSecretResponseSessionTruncationTokenLimitsPostInstructionsMin = 0;
20242
20586
  var createRealtimeClientSecretResponse = import_zod6.z.object({
20243
20587
  value: import_zod6.z.string().describe("The generated client secret value."),
20244
20588
  expires_at: import_zod6.z.number().describe("Expiration timestamp for the client secret, in seconds since epoch."),
20245
- session: import_zod6.z.discriminatedUnion("type", [
20589
+ session: import_zod6.z.union([
20246
20590
  import_zod6.z.object({
20247
- client_secret: import_zod6.z.object({
20248
- value: import_zod6.z.string().describe(
20249
- "Ephemeral key usable in client environments to authenticate connections to the Realtime API. Use this in client-side environments rather than a standard API token, which should only be used server-side.\n"
20250
- ),
20251
- expires_at: import_zod6.z.number().describe(
20252
- "Timestamp for when the token expires. Currently, all tokens expire\nafter one minute.\n"
20253
- )
20254
- }).describe("Ephemeral key returned by the API."),
20255
20591
  type: import_zod6.z.enum(["realtime"]).describe("The type of session to create. Always `realtime` for the Realtime API.\n"),
20592
+ id: import_zod6.z.string().describe(
20593
+ "Unique identifier for the session that looks like `sess_1234567890abcdef`.\n"
20594
+ ),
20595
+ object: import_zod6.z.enum(["realtime.session"]).describe("The object type. Always `realtime.session`."),
20596
+ expires_at: import_zod6.z.number().optional().describe("Expiration timestamp for the session, in seconds since epoch."),
20256
20597
  output_modalities: import_zod6.z.array(import_zod6.z.enum(["text", "audio"])).default(createRealtimeClientSecretResponseSessionOutputModalitiesDefault).describe(
20257
20598
  'The set of modalities the model can respond with. It defaults to `["audio"]`, indicating\nthat the model will respond with audio plus a transcript. `["text"]` can be used to make\nthe model respond with text only. It is not possible to request both `text` and `audio` at the same time.\n'
20258
20599
  ),
@@ -20260,6 +20601,7 @@ var createRealtimeClientSecretResponse = import_zod6.z.object({
20260
20601
  import_zod6.z.enum([
20261
20602
  "gpt-realtime",
20262
20603
  "gpt-realtime-1.5",
20604
+ "gpt-realtime-2",
20263
20605
  "gpt-realtime-2025-08-28",
20264
20606
  "gpt-4o-realtime-preview",
20265
20607
  "gpt-4o-realtime-preview-2024-10-01",
@@ -20282,15 +20624,15 @@ var createRealtimeClientSecretResponse = import_zod6.z.object({
20282
20624
  audio: import_zod6.z.object({
20283
20625
  input: import_zod6.z.object({
20284
20626
  format: import_zod6.z.object({
20285
- type: import_zod6.z.enum(["audio/pcm"]).describe("The audio format. Always `audio/pcm`."),
20286
- rate: import_zod6.z.literal(24e3).describe("The sample rate of the audio. Always `24000`.")
20627
+ type: import_zod6.z.enum(["audio/pcm"]).optional().describe("The audio format. Always `audio/pcm`."),
20628
+ rate: import_zod6.z.literal(24e3).optional().describe("The sample rate of the audio. Always `24000`.")
20287
20629
  }).describe("The PCM audio format. Only a 24kHz sample rate is supported.").or(
20288
20630
  import_zod6.z.object({
20289
- type: import_zod6.z.enum(["audio/pcmu"]).describe("The audio format. Always `audio/pcmu`.")
20631
+ type: import_zod6.z.enum(["audio/pcmu"]).optional().describe("The audio format. Always `audio/pcmu`.")
20290
20632
  }).describe("The G.711 \u03BC-law format.")
20291
20633
  ).or(
20292
20634
  import_zod6.z.object({
20293
- type: import_zod6.z.enum(["audio/pcma"]).describe("The audio format. Always `audio/pcma`.")
20635
+ type: import_zod6.z.enum(["audio/pcma"]).optional().describe("The audio format. Always `audio/pcma`.")
20294
20636
  }).describe("The G.711 A-law format.")
20295
20637
  ).optional(),
20296
20638
  transcription: import_zod6.z.object({
@@ -20300,20 +20642,19 @@ var createRealtimeClientSecretResponse = import_zod6.z.object({
20300
20642
  "gpt-4o-mini-transcribe",
20301
20643
  "gpt-4o-mini-transcribe-2025-12-15",
20302
20644
  "gpt-4o-transcribe",
20303
- "gpt-4o-transcribe-diarize"
20645
+ "gpt-4o-transcribe-diarize",
20646
+ "gpt-realtime-whisper"
20304
20647
  ])
20305
20648
  ).optional().describe(
20306
- "The model to use for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`, and `gpt-4o-transcribe-diarize`. Use `gpt-4o-transcribe-diarize` when you need diarization with speaker labels.\n"
20307
- ),
20308
- language: import_zod6.z.string().optional().describe(
20309
- "The language of the input audio. Supplying the input language in\n[ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`) format\nwill improve accuracy and latency.\n"
20649
+ "The model used for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`, `gpt-4o-transcribe-diarize`, and `gpt-realtime-whisper`.\n"
20310
20650
  ),
20651
+ language: import_zod6.z.string().optional().describe("The language of the input audio.\n"),
20311
20652
  prompt: import_zod6.z.string().optional().describe(
20312
- 'An optional text to guide the model\'s style or continue a previous audio\nsegment.\nFor `whisper-1`, the [prompt is a list of keywords](/docs/guides/speech-to-text#prompting).\nFor `gpt-4o-transcribe` models (excluding `gpt-4o-transcribe-diarize`), the prompt is a free text string, for example "expect words related to technology".\n'
20653
+ "The prompt configured for input audio transcription, when present.\n"
20313
20654
  )
20314
20655
  }).optional(),
20315
20656
  noise_reduction: import_zod6.z.object({
20316
- type: import_zod6.z.enum(["near_field", "far_field"]).describe(
20657
+ type: import_zod6.z.enum(["near_field", "far_field"]).optional().describe(
20317
20658
  "Type of noise reduction. `near_field` is for close-talking microphones such as headphones, `far_field` is for far-field microphones such as laptop or conference room microphones.\n"
20318
20659
  )
20319
20660
  }).optional().describe(
@@ -20376,20 +20717,20 @@ var createRealtimeClientSecretResponse = import_zod6.z.object({
20376
20717
  "Server-side semantic turn detection which uses a model to determine when the user has finished speaking."
20377
20718
  )
20378
20719
  ]).describe(
20379
- 'Configuration for turn detection, ether Server VAD or Semantic VAD. This can be set to `null` to turn off, in which case the client must manually trigger model response.\n\nServer VAD means that the model will detect the start and end of speech based on audio volume and respond at the end of user speech.\n\nSemantic VAD is more advanced and uses a turn detection model (in conjunction with VAD) to semantically estimate whether the user has finished speaking, then dynamically sets a timeout based on this probability. For example, if user audio trails off with "uhhm", the model will score a low probability of turn end and wait longer for the user to continue speaking. This can be useful for more natural conversations, but may have a higher latency.\n'
20720
+ 'Configuration for turn detection, ether Server VAD or Semantic VAD. This can be set to `null` to turn off, in which case the client must manually trigger model response.\n\nServer VAD means that the model will detect the start and end of speech based on audio volume and respond at the end of user speech.\n\nSemantic VAD is more advanced and uses a turn detection model (in conjunction with VAD) to semantically estimate whether the user has finished speaking, then dynamically sets a timeout based on this probability. For example, if user audio trails off with "uhhm", the model will score a low probability of turn end and wait longer for the user to continue speaking. This can be useful for more natural conversations, but may have a higher latency.\n\nFor `gpt-realtime-whisper` transcription sessions, turn detection must be\nset to `null`; VAD is not supported.\n'
20380
20721
  ).or(import_zod6.z.null()).optional()
20381
20722
  }).optional(),
20382
20723
  output: import_zod6.z.object({
20383
20724
  format: import_zod6.z.object({
20384
- type: import_zod6.z.enum(["audio/pcm"]).describe("The audio format. Always `audio/pcm`."),
20385
- rate: import_zod6.z.literal(24e3).describe("The sample rate of the audio. Always `24000`.")
20725
+ type: import_zod6.z.enum(["audio/pcm"]).optional().describe("The audio format. Always `audio/pcm`."),
20726
+ rate: import_zod6.z.literal(24e3).optional().describe("The sample rate of the audio. Always `24000`.")
20386
20727
  }).describe("The PCM audio format. Only a 24kHz sample rate is supported.").or(
20387
20728
  import_zod6.z.object({
20388
- type: import_zod6.z.enum(["audio/pcmu"]).describe("The audio format. Always `audio/pcmu`.")
20729
+ type: import_zod6.z.enum(["audio/pcmu"]).optional().describe("The audio format. Always `audio/pcmu`.")
20389
20730
  }).describe("The G.711 \u03BC-law format.")
20390
20731
  ).or(
20391
20732
  import_zod6.z.object({
20392
- type: import_zod6.z.enum(["audio/pcma"]).describe("The audio format. Always `audio/pcma`.")
20733
+ type: import_zod6.z.enum(["audio/pcma"]).optional().describe("The audio format. Always `audio/pcma`.")
20393
20734
  }).describe("The G.711 A-law format.")
20394
20735
  ).optional(),
20395
20736
  voice: import_zod6.z.string().or(
@@ -20433,7 +20774,7 @@ var createRealtimeClientSecretResponse = import_zod6.z.object({
20433
20774
  ).or(import_zod6.z.null()).optional(),
20434
20775
  tools: import_zod6.z.array(
20435
20776
  import_zod6.z.object({
20436
- type: import_zod6.z.enum(["function"]).describe("The type of the tool, i.e. `function`."),
20777
+ type: import_zod6.z.enum(["function"]).optional().describe("The type of the tool, i.e. `function`."),
20437
20778
  name: import_zod6.z.string().optional().describe("The name of the function."),
20438
20779
  description: import_zod6.z.string().optional().describe(
20439
20780
  "The description of the function, including guidance on when and how\nto call it, and guidance about what to tell the user when calling\n(if anything).\n"
@@ -20445,7 +20786,7 @@ var createRealtimeClientSecretResponse = import_zod6.z.object({
20445
20786
  server_label: import_zod6.z.string().describe(
20446
20787
  "A label for this MCP server, used to identify it in tool calls.\n"
20447
20788
  ),
20448
- server_url: import_zod6.z.string().optional().describe(
20789
+ server_url: import_zod6.z.string().url().optional().describe(
20449
20790
  "The URL for the MCP server. One of `server_url` or `connector_id` must be\nprovided.\n"
20450
20791
  ),
20451
20792
  connector_id: import_zod6.z.enum([
@@ -20457,7 +20798,7 @@ var createRealtimeClientSecretResponse = import_zod6.z.object({
20457
20798
  "connector_outlookcalendar",
20458
20799
  "connector_outlookemail",
20459
20800
  "connector_sharepoint"
20460
- ]).describe(
20801
+ ]).optional().describe(
20461
20802
  "Identifier for service connectors, like those available in ChatGPT. One of\n`server_url` or `connector_id` must be provided. Learn more about service\nconnectors [here](/docs/guides/tools-remote-mcp#connectors).\n\nCurrently supported `connector_id` values are:\n\n- Dropbox: `connector_dropbox`\n- Gmail: `connector_gmail`\n- Google Calendar: `connector_googlecalendar`\n- Google Drive: `connector_googledrive`\n- Microsoft Teams: `connector_microsoftteams`\n- Outlook Calendar: `connector_outlookcalendar`\n- Outlook Email: `connector_outlookemail`\n- SharePoint: `connector_sharepoint`\n"
20462
20803
  ),
20463
20804
  authorization: import_zod6.z.string().optional().describe(
@@ -20523,6 +20864,13 @@ var createRealtimeClientSecretResponse = import_zod6.z.object({
20523
20864
  ).default(createRealtimeClientSecretResponseSessionToolChoiceDefault).describe(
20524
20865
  "How the model chooses tools. Provide one of the string modes or force a specific\nfunction/MCP tool.\n"
20525
20866
  ),
20867
+ reasoning: import_zod6.z.object({
20868
+ effort: import_zod6.z.enum(["minimal", "low", "medium", "high", "xhigh"]).default(createRealtimeClientSecretResponseSessionReasoningEffortDefault).describe(
20869
+ "Constrains effort on reasoning for reasoning-capable Realtime models such as\n`gpt-realtime-2`.\n"
20870
+ )
20871
+ }).optional().describe(
20872
+ "Configuration for reasoning-capable Realtime models such as `gpt-realtime-2`.\n"
20873
+ ),
20526
20874
  max_output_tokens: import_zod6.z.number().or(import_zod6.z.enum(["inf"])).optional().describe(
20527
20875
  "Maximum number of output tokens for a single assistant response,\ninclusive of tool calls. Provide an integer between 1 and 4096 to\nlimit output tokens, or `inf` for the maximum available tokens for a\ngiven model. Defaults to `inf`.\n"
20528
20876
  ),
@@ -20562,7 +20910,7 @@ var createRealtimeClientSecretResponse = import_zod6.z.object({
20562
20910
  ).or(
20563
20911
  import_zod6.z.object({
20564
20912
  type: import_zod6.z.enum(["input_image"]).describe("The type of the input item. Always `input_image`."),
20565
- image_url: import_zod6.z.string().describe(
20913
+ image_url: import_zod6.z.string().url().describe(
20566
20914
  "The URL of the image to be sent to the model. A fully qualified URL or base64 encoded image in a data URL."
20567
20915
  ).or(import_zod6.z.null()).optional(),
20568
20916
  file_id: import_zod6.z.string().describe("The ID of the file to be sent to the model.").or(import_zod6.z.null()).optional(),
@@ -20576,8 +20924,8 @@ var createRealtimeClientSecretResponse = import_zod6.z.object({
20576
20924
  file_id: import_zod6.z.string().describe("The ID of the file to be sent to the model.").or(import_zod6.z.null()).optional(),
20577
20925
  filename: import_zod6.z.string().optional().describe("The name of the file to be sent to the model."),
20578
20926
  file_data: import_zod6.z.string().optional().describe("The content of the file to be sent to the model.\n"),
20579
- file_url: import_zod6.z.string().optional().describe("The URL of the file to be sent to the model."),
20580
- detail: import_zod6.z.enum(["low", "high"])
20927
+ file_url: import_zod6.z.string().url().optional().describe("The URL of the file to be sent to the model."),
20928
+ detail: import_zod6.z.enum(["low", "high"]).optional()
20581
20929
  }).describe("A file input to the model.")
20582
20930
  )
20583
20931
  ).describe(
@@ -20586,9 +20934,7 @@ var createRealtimeClientSecretResponse = import_zod6.z.object({
20586
20934
  }).describe(
20587
20935
  "Reference to a prompt template and its variables.\n[Learn more](/docs/guides/text?api-mode=responses#reusable-prompts).\n"
20588
20936
  ).or(import_zod6.z.null()).optional()
20589
- }).describe(
20590
- "A new Realtime session configuration, with an ephemeral key. Default TTL\nfor keys is one minute.\n"
20591
- ),
20937
+ }).describe("A Realtime session configuration object.\n"),
20592
20938
  import_zod6.z.object({
20593
20939
  type: import_zod6.z.enum(["transcription"]).describe(
20594
20940
  "The type of session. Always `transcription` for transcription sessions.\n"
@@ -20604,15 +20950,15 @@ var createRealtimeClientSecretResponse = import_zod6.z.object({
20604
20950
  audio: import_zod6.z.object({
20605
20951
  input: import_zod6.z.object({
20606
20952
  format: import_zod6.z.object({
20607
- type: import_zod6.z.enum(["audio/pcm"]).describe("The audio format. Always `audio/pcm`."),
20608
- rate: import_zod6.z.literal(24e3).describe("The sample rate of the audio. Always `24000`.")
20953
+ type: import_zod6.z.enum(["audio/pcm"]).optional().describe("The audio format. Always `audio/pcm`."),
20954
+ rate: import_zod6.z.literal(24e3).optional().describe("The sample rate of the audio. Always `24000`.")
20609
20955
  }).describe("The PCM audio format. Only a 24kHz sample rate is supported.").or(
20610
20956
  import_zod6.z.object({
20611
- type: import_zod6.z.enum(["audio/pcmu"]).describe("The audio format. Always `audio/pcmu`.")
20957
+ type: import_zod6.z.enum(["audio/pcmu"]).optional().describe("The audio format. Always `audio/pcmu`.")
20612
20958
  }).describe("The G.711 \u03BC-law format.")
20613
20959
  ).or(
20614
20960
  import_zod6.z.object({
20615
- type: import_zod6.z.enum(["audio/pcma"]).describe("The audio format. Always `audio/pcma`.")
20961
+ type: import_zod6.z.enum(["audio/pcma"]).optional().describe("The audio format. Always `audio/pcma`.")
20616
20962
  }).describe("The G.711 A-law format.")
20617
20963
  ).optional(),
20618
20964
  transcription: import_zod6.z.object({
@@ -20622,20 +20968,19 @@ var createRealtimeClientSecretResponse = import_zod6.z.object({
20622
20968
  "gpt-4o-mini-transcribe",
20623
20969
  "gpt-4o-mini-transcribe-2025-12-15",
20624
20970
  "gpt-4o-transcribe",
20625
- "gpt-4o-transcribe-diarize"
20971
+ "gpt-4o-transcribe-diarize",
20972
+ "gpt-realtime-whisper"
20626
20973
  ])
20627
20974
  ).optional().describe(
20628
- "The model to use for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`, and `gpt-4o-transcribe-diarize`. Use `gpt-4o-transcribe-diarize` when you need diarization with speaker labels.\n"
20629
- ),
20630
- language: import_zod6.z.string().optional().describe(
20631
- "The language of the input audio. Supplying the input language in\n[ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`) format\nwill improve accuracy and latency.\n"
20975
+ "The model used for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`, `gpt-4o-transcribe-diarize`, and `gpt-realtime-whisper`.\n"
20632
20976
  ),
20977
+ language: import_zod6.z.string().optional().describe("The language of the input audio.\n"),
20633
20978
  prompt: import_zod6.z.string().optional().describe(
20634
- 'An optional text to guide the model\'s style or continue a previous audio\nsegment.\nFor `whisper-1`, the [prompt is a list of keywords](/docs/guides/speech-to-text#prompting).\nFor `gpt-4o-transcribe` models (excluding `gpt-4o-transcribe-diarize`), the prompt is a free text string, for example "expect words related to technology".\n'
20979
+ "The prompt configured for input audio transcription, when present.\n"
20635
20980
  )
20636
20981
  }).optional(),
20637
20982
  noise_reduction: import_zod6.z.object({
20638
- type: import_zod6.z.enum(["near_field", "far_field"]).describe(
20983
+ type: import_zod6.z.enum(["near_field", "far_field"]).optional().describe(
20639
20984
  "Type of noise reduction. `near_field` is for close-talking microphones such as headphones, `far_field` is for far-field microphones such as laptop or conference room microphones.\n"
20640
20985
  )
20641
20986
  }).optional().describe("Configuration for input audio noise reduction.\n"),
@@ -20652,8 +20997,10 @@ var createRealtimeClientSecretResponse = import_zod6.z.object({
20652
20997
  silence_duration_ms: import_zod6.z.number().optional().describe(
20653
20998
  "Duration of silence to detect speech stop (in milliseconds). Defaults\nto 500ms. With shorter values the model will respond more quickly,\nbut may jump in on short pauses from the user.\n"
20654
20999
  )
20655
- }).optional().describe(
20656
- "Configuration for turn detection. Can be set to `null` to turn off. Server\nVAD means that the model will detect the start and end of speech based on\naudio volume and respond at the end of user speech.\n"
21000
+ }).describe(
21001
+ "Configuration for turn detection. Can be set to `null` to turn off. Server\nVAD means that the model will detect the start and end of speech based on\naudio volume and respond at the end of user speech. For `gpt-realtime-whisper`, this must be `null`; VAD is not supported.\n"
21002
+ ).or(import_zod6.z.null()).optional().describe(
21003
+ "Configuration for turn detection. For `gpt-realtime-whisper`, this must be `null`; VAD is not supported.\n"
20657
21004
  )
20658
21005
  }).optional()
20659
21006
  }).optional().describe("Configuration for input audio for the session.\n")
@@ -20789,7 +21136,7 @@ var createRealtimeSessionBody = import_zod6.z.object({
20789
21136
  ).or(
20790
21137
  import_zod6.z.object({
20791
21138
  type: import_zod6.z.enum(["input_image"]).describe("The type of the input item. Always `input_image`."),
20792
- image_url: import_zod6.z.string().describe(
21139
+ image_url: import_zod6.z.string().url().describe(
20793
21140
  "The URL of the image to be sent to the model. A fully qualified URL or base64 encoded image in a data URL."
20794
21141
  ).or(import_zod6.z.null()).optional(),
20795
21142
  file_id: import_zod6.z.string().describe("The ID of the file to be sent to the model.").or(import_zod6.z.null()).optional(),
@@ -20803,7 +21150,7 @@ var createRealtimeSessionBody = import_zod6.z.object({
20803
21150
  file_id: import_zod6.z.string().describe("The ID of the file to be sent to the model.").or(import_zod6.z.null()).optional(),
20804
21151
  filename: import_zod6.z.string().optional().describe("The name of the file to be sent to the model."),
20805
21152
  file_data: import_zod6.z.string().optional().describe("The content of the file to be sent to the model.\n"),
20806
- file_url: import_zod6.z.string().optional().describe("The URL of the file to be sent to the model."),
21153
+ file_url: import_zod6.z.string().url().optional().describe("The URL of the file to be sent to the model."),
20807
21154
  detail: import_zod6.z.enum(["low", "high"]).optional()
20808
21155
  }).describe("A file input to the model.")
20809
21156
  )
@@ -20851,17 +21198,14 @@ var createRealtimeSessionResponse = import_zod6.z.object({
20851
21198
  "gpt-4o-mini-transcribe",
20852
21199
  "gpt-4o-mini-transcribe-2025-12-15",
20853
21200
  "gpt-4o-transcribe",
20854
- "gpt-4o-transcribe-diarize"
21201
+ "gpt-4o-transcribe-diarize",
21202
+ "gpt-realtime-whisper"
20855
21203
  ])
20856
21204
  ).optional().describe(
20857
- "The model to use for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`, and `gpt-4o-transcribe-diarize`. Use `gpt-4o-transcribe-diarize` when you need diarization with speaker labels.\n"
21205
+ "The model used for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`, `gpt-4o-transcribe-diarize`, and `gpt-realtime-whisper`.\n"
20858
21206
  ),
20859
- language: import_zod6.z.string().optional().describe(
20860
- "The language of the input audio. Supplying the input language in\n[ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`) format\nwill improve accuracy and latency.\n"
20861
- ),
20862
- prompt: import_zod6.z.string().optional().describe(
20863
- 'An optional text to guide the model\'s style or continue a previous audio\nsegment.\nFor `whisper-1`, the [prompt is a list of keywords](/docs/guides/speech-to-text#prompting).\nFor `gpt-4o-transcribe` models (excluding `gpt-4o-transcribe-diarize`), the prompt is a free text string, for example "expect words related to technology".\n'
20864
- )
21207
+ language: import_zod6.z.string().optional().describe("The language of the input audio.\n"),
21208
+ prompt: import_zod6.z.string().optional().describe("The prompt configured for input audio transcription, when present.\n")
20865
21209
  }).optional(),
20866
21210
  noise_reduction: import_zod6.z.object({
20867
21211
  type: import_zod6.z.enum(["near_field", "far_field"]).optional().describe(
@@ -20986,16 +21330,20 @@ var createRealtimeTranscriptionSessionBody = import_zod6.z.object({
20986
21330
  "gpt-4o-mini-transcribe",
20987
21331
  "gpt-4o-mini-transcribe-2025-12-15",
20988
21332
  "gpt-4o-transcribe",
20989
- "gpt-4o-transcribe-diarize"
21333
+ "gpt-4o-transcribe-diarize",
21334
+ "gpt-realtime-whisper"
20990
21335
  ])
20991
21336
  ).optional().describe(
20992
- "The model to use for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`, and `gpt-4o-transcribe-diarize`. Use `gpt-4o-transcribe-diarize` when you need diarization with speaker labels.\n"
21337
+ "The model to use for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`, `gpt-4o-transcribe-diarize`, and `gpt-realtime-whisper`. Use `gpt-4o-transcribe-diarize` when you need diarization with speaker labels.\n"
20993
21338
  ),
20994
21339
  language: import_zod6.z.string().optional().describe(
20995
21340
  "The language of the input audio. Supplying the input language in\n[ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`) format\nwill improve accuracy and latency.\n"
20996
21341
  ),
20997
21342
  prompt: import_zod6.z.string().optional().describe(
20998
- 'An optional text to guide the model\'s style or continue a previous audio\nsegment.\nFor `whisper-1`, the [prompt is a list of keywords](/docs/guides/speech-to-text#prompting).\nFor `gpt-4o-transcribe` models (excluding `gpt-4o-transcribe-diarize`), the prompt is a free text string, for example "expect words related to technology".\n'
21343
+ 'An optional text to guide the model\'s style or continue a previous audio\nsegment.\nFor `whisper-1`, the [prompt is a list of keywords](/docs/guides/speech-to-text#prompting).\nFor `gpt-4o-transcribe` models (excluding `gpt-4o-transcribe-diarize`), the prompt is a free text string, for example "expect words related to technology".\nPrompt is not supported with `gpt-realtime-whisper` in GA Realtime sessions.\n'
21344
+ ),
21345
+ delay: import_zod6.z.enum(["minimal", "low", "medium", "high", "xhigh"]).optional().describe(
21346
+ "Controls how long the model waits before emitting transcription text.\nHigher values can improve transcription accuracy at the cost of latency.\nOnly supported with `gpt-realtime-whisper` in GA Realtime sessions.\n"
20999
21347
  )
21000
21348
  }).optional(),
21001
21349
  include: import_zod6.z.array(import_zod6.z.enum(["item.input_audio_transcription.logprobs"])).optional().describe(
@@ -21024,17 +21372,14 @@ var createRealtimeTranscriptionSessionResponse = import_zod6.z.object({
21024
21372
  "gpt-4o-mini-transcribe",
21025
21373
  "gpt-4o-mini-transcribe-2025-12-15",
21026
21374
  "gpt-4o-transcribe",
21027
- "gpt-4o-transcribe-diarize"
21375
+ "gpt-4o-transcribe-diarize",
21376
+ "gpt-realtime-whisper"
21028
21377
  ])
21029
21378
  ).optional().describe(
21030
- "The model to use for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`, and `gpt-4o-transcribe-diarize`. Use `gpt-4o-transcribe-diarize` when you need diarization with speaker labels.\n"
21031
- ),
21032
- language: import_zod6.z.string().optional().describe(
21033
- "The language of the input audio. Supplying the input language in\n[ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`) format\nwill improve accuracy and latency.\n"
21379
+ "The model used for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`, `gpt-4o-transcribe-diarize`, and `gpt-realtime-whisper`.\n"
21034
21380
  ),
21035
- prompt: import_zod6.z.string().optional().describe(
21036
- 'An optional text to guide the model\'s style or continue a previous audio\nsegment.\nFor `whisper-1`, the [prompt is a list of keywords](/docs/guides/speech-to-text#prompting).\nFor `gpt-4o-transcribe` models (excluding `gpt-4o-transcribe-diarize`), the prompt is a free text string, for example "expect words related to technology".\n'
21037
- )
21381
+ language: import_zod6.z.string().optional().describe("The language of the input audio.\n"),
21382
+ prompt: import_zod6.z.string().optional().describe("The prompt configured for input audio transcription, when present.\n")
21038
21383
  }).optional(),
21039
21384
  turn_detection: import_zod6.z.object({
21040
21385
  type: import_zod6.z.string().optional().describe("Type of turn detection, only `server_vad` is currently supported.\n"),
@@ -24647,11 +24992,73 @@ var getModelsResponse = import_zod10.z.object({
24647
24992
  })
24648
24993
  ).describe("List of available models and their attributes.")
24649
24994
  });
24995
+ var getTtsModelsResponse = import_zod10.z.object({
24996
+ models: import_zod10.z.array(
24997
+ import_zod10.z.object({
24998
+ id: import_zod10.z.string().describe("Unique identifier of the model."),
24999
+ aliased_model_id: import_zod10.z.string().or(import_zod10.z.null()).describe("If this is an alias, the id of the aliased model."),
25000
+ name: import_zod10.z.string().describe("Name of the model."),
25001
+ voices: import_zod10.z.array(
25002
+ import_zod10.z.object({
25003
+ id: import_zod10.z.string().describe("Unique identifier of the voice."),
25004
+ description: import_zod10.z.string().describe("Description of the TTS voice."),
25005
+ gender: import_zod10.z.enum(["male", "female", "neutral"])
25006
+ })
25007
+ ).describe("List of available voices for this model."),
25008
+ languages: import_zod10.z.array(
25009
+ import_zod10.z.object({
25010
+ code: import_zod10.z.string().describe("2-letter language code."),
25011
+ name: import_zod10.z.string().describe("Language name.")
25012
+ })
25013
+ ).describe("List of languages supported by the model.")
25014
+ })
25015
+ ).describe("List of available TTS models and their attributes.")
25016
+ });
25017
+ var getUsageLogsQueryLimitDefault = 1e3;
25018
+ var getUsageLogsQueryLimitMax = 1e3;
25019
+ var getUsageLogsQuerySortDefault = "end_time_asc";
25020
+ var getUsageLogsQueryParams = import_zod10.z.object({
25021
+ start_time: import_zod10.z.string().describe("Start of the time window (inclusive). Filters by request end time."),
25022
+ end_time: import_zod10.z.string().describe("End of the time window (exclusive). Filters by request end time."),
25023
+ limit: import_zod10.z.number().min(1).max(getUsageLogsQueryLimitMax).default(getUsageLogsQueryLimitDefault).describe("Maximum number of usage log entries to return."),
25024
+ sort: import_zod10.z.enum(["end_time_asc", "end_time_desc"]).default(getUsageLogsQuerySortDefault).describe(
25025
+ "Sort order by end_time.Use `end_time_desc` to get the most recent entries first. When paginating, pass the same `sort` value alongside the cursor."
25026
+ ),
25027
+ cursor: import_zod10.z.string().or(import_zod10.z.null()).optional().describe("Pagination cursor for the next page of results.")
25028
+ });
25029
+ var getUsageLogsResponse = import_zod10.z.object({
25030
+ usage_logs: import_zod10.z.array(
25031
+ import_zod10.z.object({
25032
+ uuid: import_zod10.z.string().uuid().describe("Unique identifier of the request."),
25033
+ request_scope: import_zod10.z.string().describe("Scope of the request (api / playground)."),
25034
+ client_reference_id: import_zod10.z.string().describe("Client reference ID supplied on the original request. Empty string if none."),
25035
+ model: import_zod10.z.string().describe("Model identifier."),
25036
+ start_time: import_zod10.z.string().datetime({}).describe("When the request started."),
25037
+ end_time: import_zod10.z.string().datetime({}).describe("When the request ended."),
25038
+ input_text_tokens: import_zod10.z.number(),
25039
+ input_audio_tokens: import_zod10.z.number(),
25040
+ input_audio_duration_ms: import_zod10.z.number(),
25041
+ output_text_tokens: import_zod10.z.number(),
25042
+ output_audio_tokens: import_zod10.z.number(),
25043
+ output_audio_duration_ms: import_zod10.z.number(),
25044
+ cost_usd: import_zod10.z.string(),
25045
+ input_cost_usd: import_zod10.z.string(),
25046
+ input_text_cost_usd: import_zod10.z.string(),
25047
+ input_audio_cost_usd: import_zod10.z.string(),
25048
+ output_cost_usd: import_zod10.z.string(),
25049
+ output_text_cost_usd: import_zod10.z.string(),
25050
+ output_audio_cost_usd: import_zod10.z.string()
25051
+ })
25052
+ ).describe("Per-request usage log entries ordered by end_time, uuid (per `sort`)."),
25053
+ next_page_cursor: import_zod10.z.string().or(import_zod10.z.null()).optional().describe(
25054
+ "A pagination token that references the next page of results. When more data is available, this field contains a value to pass in the cursor parameter of a subsequent request. When null, no additional results are available."
25055
+ )
25056
+ });
24650
25057
  var createTemporaryApiKeyBodyExpiresInSecondsMax = 3600;
24651
25058
  var createTemporaryApiKeyBodyClientReferenceIdMaxOne = 256;
24652
25059
  var createTemporaryApiKeyBodyMaxSessionDurationSecondsMaxOne = 18e3;
24653
25060
  var createTemporaryApiKeyBody = import_zod10.z.object({
24654
- usage_type: import_zod10.z.enum(["transcribe_websocket"]),
25061
+ usage_type: import_zod10.z.enum(["transcribe_websocket", "tts_rt"]),
24655
25062
  expires_in_seconds: import_zod10.z.number().min(1).max(createTemporaryApiKeyBodyExpiresInSecondsMax).describe("Duration in seconds until the temporary API key expires."),
24656
25063
  client_reference_id: import_zod10.z.string().max(createTemporaryApiKeyBodyClientReferenceIdMaxOne).or(import_zod10.z.null()).optional().describe("Optional tracking identifier string. Does not need to be unique."),
24657
25064
  single_use: import_zod10.z.boolean().or(import_zod10.z.null()).optional().describe("If true, the temporary API key can be used only once."),
@@ -24659,6 +25066,28 @@ var createTemporaryApiKeyBody = import_zod10.z.object({
24659
25066
  "Maximum WebSocket connection duration in seconds. If exceeded, the connection will be dropped. If not set, no limit is applied."
24660
25067
  )
24661
25068
  });
25069
+ var getConcurrencyLimitsResponse = import_zod10.z.object({
25070
+ project: import_zod10.z.object({
25071
+ current: import_zod10.z.object({
25072
+ transcribe_concurrent: import_zod10.z.number(),
25073
+ tts_concurrent: import_zod10.z.number()
25074
+ }).describe("Live counts read from Redis"),
25075
+ limits: import_zod10.z.object({
25076
+ transcribe_concurrent: import_zod10.z.number().or(import_zod10.z.null()),
25077
+ tts_concurrent: import_zod10.z.number().or(import_zod10.z.null())
25078
+ }).describe("Configured limits")
25079
+ }),
25080
+ organization: import_zod10.z.object({
25081
+ current: import_zod10.z.object({
25082
+ transcribe_concurrent: import_zod10.z.number(),
25083
+ tts_concurrent: import_zod10.z.number()
25084
+ }).describe("Live counts read from Redis"),
25085
+ limits: import_zod10.z.object({
25086
+ transcribe_concurrent: import_zod10.z.number().or(import_zod10.z.null()),
25087
+ tts_concurrent: import_zod10.z.number().or(import_zod10.z.null())
25088
+ }).describe("Configured limits")
25089
+ })
25090
+ });
24662
25091
 
24663
25092
  // src/generated/soniox/streaming-types.zod.ts
24664
25093
  var import_zod11 = require("zod");
@@ -24724,10 +25153,10 @@ var sonioxStructuredContextSchema = import_zod11.z.object({
24724
25153
  var sonioxContextSchema = import_zod11.z.union([sonioxStructuredContextSchema, import_zod11.z.string()]);
24725
25154
  var sonioxRealtimeModelSchema = import_zod11.z.enum([
24726
25155
  "stt-rt-v4",
24727
- "stt-rt-v3",
24728
25156
  "stt-rt-preview",
24729
25157
  "stt-rt-v3-preview",
24730
- "stt-rt-preview-v2"
25158
+ "stt-rt-preview-v2",
25159
+ "stt-rt-v3"
24731
25160
  ]);
24732
25161
  var streamingTranscriberParams3 = import_zod11.z.object({
24733
25162
  model: sonioxRealtimeModelSchema,
@@ -24735,12 +25164,16 @@ var streamingTranscriberParams3 = import_zod11.z.object({
24735
25164
  sampleRate: import_zod11.z.number().optional(),
24736
25165
  numChannels: import_zod11.z.number().optional(),
24737
25166
  languageHints: import_zod11.z.array(import_zod11.z.string()).optional(),
25167
+ languageHintsStrict: import_zod11.z.boolean().optional(),
24738
25168
  context: sonioxContextSchema.optional(),
24739
25169
  enableSpeakerDiarization: import_zod11.z.boolean().optional(),
24740
25170
  enableLanguageIdentification: import_zod11.z.boolean().optional(),
24741
25171
  enableEndpointDetection: import_zod11.z.boolean().optional(),
25172
+ maxEndpointDelayMs: import_zod11.z.number().optional(),
24742
25173
  translation: sonioxTranslationConfigSchema.optional(),
24743
- clientReferenceId: import_zod11.z.string().optional()
25174
+ clientReferenceId: import_zod11.z.string().optional(),
25175
+ keepaliveIntervalMs: import_zod11.z.number().optional(),
25176
+ connectTimeoutMs: import_zod11.z.number().optional()
24744
25177
  });
24745
25178
  var sonioxTranslationStatusSchema = import_zod11.z.enum(["original", "translation", "none"]);
24746
25179
  var sonioxTokenSchema = import_zod11.z.object({