voice-router-dev 0.9.4 → 0.9.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -145,6 +145,7 @@ __export(src_exports, {
145
145
  SonioxModels: () => SonioxModels,
146
146
  SonioxRealtimeModel: () => SonioxRealtimeModel,
147
147
  SonioxRegion: () => SonioxRegion,
148
+ SonioxSDK: () => sdk_types_exports,
148
149
  SonioxStreamingSchema: () => SonioxStreamingSchema,
149
150
  SonioxStreamingTypes: () => streaming_types_zod_exports,
150
151
  SonioxStreamingUpdateSchema: () => SonioxStreamingUpdateSchema,
@@ -1333,7 +1334,6 @@ var AzureLocales = [
1333
1334
  { code: "ar-YE", name: "Arabic (Yemen)" },
1334
1335
  { code: "as-IN", name: "Assamese (India)" },
1335
1336
  { code: "az-AZ", name: "Azerbaijani (Azerbaijan)" },
1336
- { code: "be-BY", name: "Belarusian (Belarus)" },
1337
1337
  { code: "bg-BG", name: "Bulgarian (Bulgaria)" },
1338
1338
  { code: "bn-BD", name: "Bengali (Bangladesh)" },
1339
1339
  { code: "bn-IN", name: "Bengali (India)" },
@@ -1414,7 +1414,6 @@ var AzureLocales = [
1414
1414
  { code: "lo-LA", name: "Lao (Latin)" },
1415
1415
  { code: "lt-LT", name: "Lithuanian (Lithuania)" },
1416
1416
  { code: "lv-LV", name: "Latvian (Latvia)" },
1417
- { code: "mi-NZ", name: "Maori (New Zealand)" },
1418
1417
  { code: "mk-MK", name: "Macedonian (North Macedonia)" },
1419
1418
  { code: "ml-IN", name: "Malayalam (India)" },
1420
1419
  { code: "mn-MN", name: "Mongolian (Mongolia)" },
@@ -1490,7 +1489,6 @@ var AzureLocaleCodes = [
1490
1489
  "ar-YE",
1491
1490
  "as-IN",
1492
1491
  "az-AZ",
1493
- "be-BY",
1494
1492
  "bg-BG",
1495
1493
  "bn-BD",
1496
1494
  "bn-IN",
@@ -1571,7 +1569,6 @@ var AzureLocaleCodes = [
1571
1569
  "lo-LA",
1572
1570
  "lt-LT",
1573
1571
  "lv-LV",
1574
- "mi-NZ",
1575
1572
  "mk-MK",
1576
1573
  "ml-IN",
1577
1574
  "mn-MN",
@@ -1647,7 +1644,6 @@ var AzureLocaleLabels = {
1647
1644
  "ar-YE": "Arabic (Yemen)",
1648
1645
  "as-IN": "Assamese (India)",
1649
1646
  "az-AZ": "Azerbaijani (Azerbaijan)",
1650
- "be-BY": "Belarusian (Belarus)",
1651
1647
  "bg-BG": "Bulgarian (Bulgaria)",
1652
1648
  "bn-BD": "Bengali (Bangladesh)",
1653
1649
  "bn-IN": "Bengali (India)",
@@ -1728,7 +1724,6 @@ var AzureLocaleLabels = {
1728
1724
  "lo-LA": "Lao (Latin)",
1729
1725
  "lt-LT": "Lithuanian (Lithuania)",
1730
1726
  "lv-LV": "Latvian (Latvia)",
1731
- "mi-NZ": "Maori (New Zealand)",
1732
1727
  "mk-MK": "Macedonian (North Macedonia)",
1733
1728
  "ml-IN": "Malayalam (India)",
1734
1729
  "mn-MN": "Mongolian (Mongolia)",
@@ -1804,7 +1799,6 @@ var AzureLocale = {
1804
1799
  "ar-YE": "ar-YE",
1805
1800
  "as-IN": "as-IN",
1806
1801
  "az-AZ": "az-AZ",
1807
- "be-BY": "be-BY",
1808
1802
  "bg-BG": "bg-BG",
1809
1803
  "bn-BD": "bn-BD",
1810
1804
  "bn-IN": "bn-IN",
@@ -1885,7 +1879,6 @@ var AzureLocale = {
1885
1879
  "lo-LA": "lo-LA",
1886
1880
  "lt-LT": "lt-LT",
1887
1881
  "lv-LV": "lv-LV",
1888
- "mi-NZ": "mi-NZ",
1889
1882
  "mk-MK": "mk-MK",
1890
1883
  "ml-IN": "ml-IN",
1891
1884
  "mn-MN": "mn-MN",
@@ -1976,8 +1969,6 @@ var ElevenLabsLanguages = [
1976
1969
  { code: "hr", name: "Croatian" },
1977
1970
  { code: "bg", name: "Bulgarian" },
1978
1971
  { code: "lt", name: "Lithuanian" },
1979
- { code: "la", name: "Latin" },
1980
- { code: "mi", name: "Maori" },
1981
1972
  { code: "ml", name: "Malayalam" },
1982
1973
  { code: "cy", name: "Welsh" },
1983
1974
  { code: "sk", name: "Slovak" },
@@ -1991,20 +1982,16 @@ var ElevenLabsLanguages = [
1991
1982
  { code: "kn", name: "Kannada" },
1992
1983
  { code: "et", name: "Estonian" },
1993
1984
  { code: "mk", name: "Macedonian" },
1994
- { code: "br", name: "Breton" },
1995
- { code: "eu", name: "Basque" },
1996
1985
  { code: "is", name: "Icelandic" },
1997
1986
  { code: "hy", name: "Armenian" },
1998
1987
  { code: "ne", name: "Nepali" },
1999
1988
  { code: "mn", name: "Mongolian" },
2000
1989
  { code: "bs", name: "Bosnian" },
2001
1990
  { code: "kk", name: "Kazakh" },
2002
- { code: "sq", name: "Albanian" },
2003
1991
  { code: "sw", name: "Swahili" },
2004
1992
  { code: "gl", name: "Galician" },
2005
1993
  { code: "mr", name: "Marathi" },
2006
1994
  { code: "pa", name: "Punjabi" },
2007
- { code: "si", name: "Sinhala" },
2008
1995
  { code: "km", name: "Khmer" },
2009
1996
  { code: "sn", name: "Shona" },
2010
1997
  { code: "yo", name: "Yoruba" },
@@ -2017,29 +2004,16 @@ var ElevenLabsLanguages = [
2017
2004
  { code: "sd", name: "Sindhi" },
2018
2005
  { code: "gu", name: "Gujarati" },
2019
2006
  { code: "am", name: "Amharic" },
2020
- { code: "yi", name: "Yiddish" },
2021
2007
  { code: "lo", name: "Lao" },
2022
2008
  { code: "uz", name: "Uzbek" },
2023
- { code: "fo", name: "Faroese" },
2024
- { code: "ht", name: "Haitian Creole" },
2025
2009
  { code: "ps", name: "Pashto" },
2026
- { code: "tk", name: "Turkmen" },
2027
- { code: "nn", name: "Norwegian Nynorsk" },
2028
2010
  { code: "mt", name: "Maltese" },
2029
- { code: "sa", name: "Sanskrit" },
2030
2011
  { code: "lb", name: "Luxembourgish" },
2031
2012
  { code: "my", name: "Burmese" },
2032
- { code: "bo", name: "Tibetan" },
2033
- { code: "tl", name: "Tagalog" },
2034
- { code: "mg", name: "Malagasy" },
2035
2013
  { code: "as", name: "Assamese" },
2036
- { code: "tt", name: "Tatar" },
2037
- { code: "haw", name: "Hawaiian" },
2038
2014
  { code: "ln", name: "Lingala" },
2039
2015
  { code: "ha", name: "Hausa" },
2040
- { code: "ba", name: "Bashkir" },
2041
- { code: "jw", name: "Javanese" },
2042
- { code: "su", name: "Sundanese" }
2016
+ { code: "jw", name: "Javanese" }
2043
2017
  ];
2044
2018
  var ElevenLabsLanguageCodes = [
2045
2019
  "en",
@@ -2077,8 +2051,6 @@ var ElevenLabsLanguageCodes = [
2077
2051
  "hr",
2078
2052
  "bg",
2079
2053
  "lt",
2080
- "la",
2081
- "mi",
2082
2054
  "ml",
2083
2055
  "cy",
2084
2056
  "sk",
@@ -2092,20 +2064,16 @@ var ElevenLabsLanguageCodes = [
2092
2064
  "kn",
2093
2065
  "et",
2094
2066
  "mk",
2095
- "br",
2096
- "eu",
2097
2067
  "is",
2098
2068
  "hy",
2099
2069
  "ne",
2100
2070
  "mn",
2101
2071
  "bs",
2102
2072
  "kk",
2103
- "sq",
2104
2073
  "sw",
2105
2074
  "gl",
2106
2075
  "mr",
2107
2076
  "pa",
2108
- "si",
2109
2077
  "km",
2110
2078
  "sn",
2111
2079
  "yo",
@@ -2118,29 +2086,16 @@ var ElevenLabsLanguageCodes = [
2118
2086
  "sd",
2119
2087
  "gu",
2120
2088
  "am",
2121
- "yi",
2122
2089
  "lo",
2123
2090
  "uz",
2124
- "fo",
2125
- "ht",
2126
2091
  "ps",
2127
- "tk",
2128
- "nn",
2129
2092
  "mt",
2130
- "sa",
2131
2093
  "lb",
2132
2094
  "my",
2133
- "bo",
2134
- "tl",
2135
- "mg",
2136
2095
  "as",
2137
- "tt",
2138
- "haw",
2139
2096
  "ln",
2140
2097
  "ha",
2141
- "ba",
2142
- "jw",
2143
- "su"
2098
+ "jw"
2144
2099
  ];
2145
2100
  var ElevenLabsLanguageLabels = {
2146
2101
  en: "English",
@@ -2178,8 +2133,6 @@ var ElevenLabsLanguageLabels = {
2178
2133
  hr: "Croatian",
2179
2134
  bg: "Bulgarian",
2180
2135
  lt: "Lithuanian",
2181
- la: "Latin",
2182
- mi: "Maori",
2183
2136
  ml: "Malayalam",
2184
2137
  cy: "Welsh",
2185
2138
  sk: "Slovak",
@@ -2193,20 +2146,16 @@ var ElevenLabsLanguageLabels = {
2193
2146
  kn: "Kannada",
2194
2147
  et: "Estonian",
2195
2148
  mk: "Macedonian",
2196
- br: "Breton",
2197
- eu: "Basque",
2198
2149
  is: "Icelandic",
2199
2150
  hy: "Armenian",
2200
2151
  ne: "Nepali",
2201
2152
  mn: "Mongolian",
2202
2153
  bs: "Bosnian",
2203
2154
  kk: "Kazakh",
2204
- sq: "Albanian",
2205
2155
  sw: "Swahili",
2206
2156
  gl: "Galician",
2207
2157
  mr: "Marathi",
2208
2158
  pa: "Punjabi",
2209
- si: "Sinhala",
2210
2159
  km: "Khmer",
2211
2160
  sn: "Shona",
2212
2161
  yo: "Yoruba",
@@ -2219,29 +2168,16 @@ var ElevenLabsLanguageLabels = {
2219
2168
  sd: "Sindhi",
2220
2169
  gu: "Gujarati",
2221
2170
  am: "Amharic",
2222
- yi: "Yiddish",
2223
2171
  lo: "Lao",
2224
2172
  uz: "Uzbek",
2225
- fo: "Faroese",
2226
- ht: "Haitian Creole",
2227
2173
  ps: "Pashto",
2228
- tk: "Turkmen",
2229
- nn: "Norwegian Nynorsk",
2230
2174
  mt: "Maltese",
2231
- sa: "Sanskrit",
2232
2175
  lb: "Luxembourgish",
2233
2176
  my: "Burmese",
2234
- bo: "Tibetan",
2235
- tl: "Tagalog",
2236
- mg: "Malagasy",
2237
2177
  as: "Assamese",
2238
- tt: "Tatar",
2239
- haw: "Hawaiian",
2240
2178
  ln: "Lingala",
2241
2179
  ha: "Hausa",
2242
- ba: "Bashkir",
2243
- jw: "Javanese",
2244
- su: "Sundanese"
2180
+ jw: "Javanese"
2245
2181
  };
2246
2182
 
2247
2183
  // src/generated/gladia/schema/streamingSupportedBitDepthEnum.ts
@@ -2746,6 +2682,7 @@ var OpenAITranscriptionModel = {
2746
2682
  "gpt-4o-mini-transcribe-2025-12-15": "gpt-4o-mini-transcribe-2025-12-15",
2747
2683
  "gpt-4o-transcribe": "gpt-4o-transcribe",
2748
2684
  "gpt-4o-transcribe-diarize": "gpt-4o-transcribe-diarize",
2685
+ "gpt-realtime-whisper": "gpt-realtime-whisper",
2749
2686
  "whisper-1": "whisper-1"
2750
2687
  };
2751
2688
  var OpenAIRealtimeModel = {
@@ -2761,6 +2698,7 @@ var OpenAIRealtimeModel = {
2761
2698
  "gpt-audio-mini-2025-12-15": "gpt-audio-mini-2025-12-15",
2762
2699
  "gpt-realtime": "gpt-realtime",
2763
2700
  "gpt-realtime-1.5": "gpt-realtime-1.5",
2701
+ "gpt-realtime-2": "gpt-realtime-2",
2764
2702
  "gpt-realtime-2025-08-28": "gpt-realtime-2025-08-28",
2765
2703
  "gpt-realtime-mini": "gpt-realtime-mini",
2766
2704
  "gpt-realtime-mini-2025-10-06": "gpt-realtime-mini-2025-10-06",
@@ -5577,12 +5515,20 @@ var EntityType = {
5577
5515
  email_address: "email_address",
5578
5516
  event: "event",
5579
5517
  filename: "filename",
5518
+ gender: "gender",
5580
5519
  gender_sexuality: "gender_sexuality",
5581
5520
  healthcare_number: "healthcare_number",
5582
5521
  injury: "injury",
5583
5522
  ip_address: "ip_address",
5584
5523
  language: "language",
5585
5524
  location: "location",
5525
+ location_address: "location_address",
5526
+ location_address_street: "location_address_street",
5527
+ location_city: "location_city",
5528
+ location_coordinate: "location_coordinate",
5529
+ location_country: "location_country",
5530
+ location_state: "location_state",
5531
+ location_zip: "location_zip",
5586
5532
  marital_status: "marital_status",
5587
5533
  medical_condition: "medical_condition",
5588
5534
  medical_process: "medical_process",
@@ -5591,6 +5537,7 @@ var EntityType = {
5591
5537
  number_sequence: "number_sequence",
5592
5538
  occupation: "occupation",
5593
5539
  organization: "organization",
5540
+ organization_medical_facility: "organization_medical_facility",
5594
5541
  passport_number: "passport_number",
5595
5542
  password: "password",
5596
5543
  person_age: "person_age",
@@ -5599,6 +5546,7 @@ var EntityType = {
5599
5546
  physical_attribute: "physical_attribute",
5600
5547
  political_affiliation: "political_affiliation",
5601
5548
  religion: "religion",
5549
+ sexuality: "sexuality",
5602
5550
  statistics: "statistics",
5603
5551
  time: "time",
5604
5552
  url: "url",
@@ -5625,12 +5573,20 @@ var PiiPolicy = {
5625
5573
  email_address: "email_address",
5626
5574
  event: "event",
5627
5575
  filename: "filename",
5576
+ gender: "gender",
5628
5577
  gender_sexuality: "gender_sexuality",
5629
5578
  healthcare_number: "healthcare_number",
5630
5579
  injury: "injury",
5631
5580
  ip_address: "ip_address",
5632
5581
  language: "language",
5633
5582
  location: "location",
5583
+ location_address: "location_address",
5584
+ location_address_street: "location_address_street",
5585
+ location_city: "location_city",
5586
+ location_coordinate: "location_coordinate",
5587
+ location_country: "location_country",
5588
+ location_state: "location_state",
5589
+ location_zip: "location_zip",
5634
5590
  marital_status: "marital_status",
5635
5591
  medical_condition: "medical_condition",
5636
5592
  medical_process: "medical_process",
@@ -5639,6 +5595,7 @@ var PiiPolicy = {
5639
5595
  number_sequence: "number_sequence",
5640
5596
  occupation: "occupation",
5641
5597
  organization: "organization",
5598
+ organization_medical_facility: "organization_medical_facility",
5642
5599
  passport_number: "passport_number",
5643
5600
  password: "password",
5644
5601
  person_age: "person_age",
@@ -5647,6 +5604,7 @@ var PiiPolicy = {
5647
5604
  physical_attribute: "physical_attribute",
5648
5605
  political_affiliation: "political_affiliation",
5649
5606
  religion: "religion",
5607
+ sexuality: "sexuality",
5650
5608
  statistics: "statistics",
5651
5609
  time: "time",
5652
5610
  url: "url",
@@ -5715,7 +5673,8 @@ var TranscriptOptionalParamsRedactPiiAudioOptionsOverrideAudioRedactionMethod =
5715
5673
 
5716
5674
  // src/generated/assemblyai/schema/transcriptOptionalParamsRemoveAudioTags.ts
5717
5675
  var TranscriptOptionalParamsRemoveAudioTags = {
5718
- all: "all"
5676
+ all: "all",
5677
+ speaker: "speaker"
5719
5678
  };
5720
5679
 
5721
5680
  // src/generated/assemblyai/schema/transcriptRedactPiiAudioOptionsOverrideAudioRedactionMethod.ts
@@ -5725,7 +5684,8 @@ var TranscriptRedactPiiAudioOptionsOverrideAudioRedactionMethod = {
5725
5684
 
5726
5685
  // src/generated/assemblyai/schema/transcriptRemoveAudioTags.ts
5727
5686
  var TranscriptRemoveAudioTags = {
5728
- all: "all"
5687
+ all: "all",
5688
+ speaker: "speaker"
5729
5689
  };
5730
5690
 
5731
5691
  // src/generated/assemblyai/api/assemblyAIAPI.ts
@@ -9617,15 +9577,18 @@ var import_axios9 = __toESM(require("axios"));
9617
9577
  // src/generated/soniox/schema/index.ts
9618
9578
  var schema_exports4 = {};
9619
9579
  __export(schema_exports4, {
9580
+ TTSVoiceGender: () => TTSVoiceGender,
9620
9581
  TemporaryApiKeyUsageType: () => TemporaryApiKeyUsageType,
9621
9582
  TranscriptionMode: () => TranscriptionMode,
9622
9583
  TranscriptionStatus: () => TranscriptionStatus,
9623
- TranslationConfigType: () => TranslationConfigType
9584
+ TranslationConfigType: () => TranslationConfigType,
9585
+ UsageLogsSort: () => UsageLogsSort
9624
9586
  });
9625
9587
 
9626
9588
  // src/generated/soniox/schema/temporaryApiKeyUsageType.ts
9627
9589
  var TemporaryApiKeyUsageType = {
9628
- transcribe_websocket: "transcribe_websocket"
9590
+ transcribe_websocket: "transcribe_websocket",
9591
+ tts_rt: "tts_rt"
9629
9592
  };
9630
9593
 
9631
9594
  // src/generated/soniox/schema/transcriptionMode.ts
@@ -9640,6 +9603,19 @@ var TranslationConfigType = {
9640
9603
  two_way: "two_way"
9641
9604
  };
9642
9605
 
9606
+ // src/generated/soniox/schema/tTSVoiceGender.ts
9607
+ var TTSVoiceGender = {
9608
+ male: "male",
9609
+ female: "female",
9610
+ neutral: "neutral"
9611
+ };
9612
+
9613
+ // src/generated/soniox/schema/usageLogsSort.ts
9614
+ var UsageLogsSort = {
9615
+ end_time_asc: "end_time_asc",
9616
+ end_time_desc: "end_time_desc"
9617
+ };
9618
+
9643
9619
  // src/generated/soniox/api/sonioxPublicAPI.ts
9644
9620
  var uploadFile = (uploadFileBody2, options) => {
9645
9621
  const formData = new FormData();
@@ -11007,6 +10983,7 @@ __export(deepgramAPI_zod_exports, {
11007
10983
  speakGenerateQueryMipOptOutDefault: () => speakGenerateQueryMipOptOutDefault,
11008
10984
  speakGenerateQueryModelDefault: () => speakGenerateQueryModelDefault,
11009
10985
  speakGenerateQueryParams: () => speakGenerateQueryParams,
10986
+ speakGenerateQuerySpeedDefault: () => speakGenerateQuerySpeedDefault,
11010
10987
  speakGenerateResponse: () => speakGenerateResponse
11011
10988
  });
11012
10989
  var import_zod = require("zod");
@@ -11061,6 +11038,9 @@ var listenTranscribeQueryParams = import_zod.z.object({
11061
11038
  diarize: import_zod.z.boolean().optional().describe(
11062
11039
  "Recognize speaker changes. Each word in the transcript will be assigned a speaker number starting at 0"
11063
11040
  ),
11041
+ diarize_model: import_zod.z.enum(["latest", "v1", "v2"]).optional().describe(
11042
+ "Select and enable a specific batch diarization model version. If specifying this parameter, you should not set the deprecated `diarize=true` parameter. Not accepted on streaming requests."
11043
+ ),
11064
11044
  dictation: import_zod.z.boolean().optional().describe("Dictation mode for controlling formatting with dictated speech"),
11065
11045
  encoding: import_zod.z.enum(["linear16", "flac", "mulaw", "amr-nb", "amr-wb", "opus", "speex", "g729"]).optional().describe("Specify the expected encoding of your submitted audio"),
11066
11046
  filler_words: import_zod.z.boolean().optional().describe('Filler Words can help transcribe interruptions in your audio, like "uh" and "um"'),
@@ -11326,6 +11306,7 @@ var listenTranscribeResponse = import_zod.z.object({
11326
11306
  var speakGenerateQueryCallbackMethodDefault = "POST";
11327
11307
  var speakGenerateQueryMipOptOutDefault = false;
11328
11308
  var speakGenerateQueryModelDefault = "aura-asteria-en";
11309
+ var speakGenerateQuerySpeedDefault = 1;
11329
11310
  var speakGenerateQueryParams = import_zod.z.object({
11330
11311
  callback: import_zod.z.string().optional().describe("URL to which we'll make the callback request"),
11331
11312
  callback_method: import_zod.z.enum(["POST", "PUT"]).default(speakGenerateQueryCallbackMethodDefault).describe("HTTP method by which the callback request will be made"),
@@ -11437,6 +11418,9 @@ var speakGenerateQueryParams = import_zod.z.object({
11437
11418
  import_zod.z.enum(["22050"]).describe("Encoding - mp3. Sample rate is fixed and not configurable (22050 Hz).")
11438
11419
  ).or(import_zod.z.enum(["48000"]).describe("Encoding - opus. Sample rate is fixed at 48000 Hz.")).optional().describe(
11439
11420
  "Sample Rate specifies the sample rate for the output audio. Based on the encoding, different sample rates are supported. For some encodings, the sample rate is not configurable"
11421
+ ),
11422
+ speed: import_zod.z.number().default(speakGenerateQuerySpeedDefault).describe(
11423
+ "Speaking rate multiplier that adjusts the pace of generated speech while preserving natural prosody and voice quality. Not yet supported in all languages."
11440
11424
  )
11441
11425
  });
11442
11426
  var speakGenerateHeader = import_zod.z.object({
@@ -11761,6 +11745,7 @@ __export(assemblyAIAPI_zod_exports, {
11761
11745
  createTranscriptBodyRedactPiiAudioDefault: () => createTranscriptBodyRedactPiiAudioDefault,
11762
11746
  createTranscriptBodyRedactPiiAudioOptionsReturnRedactedNoSpeechAudioDefault: () => createTranscriptBodyRedactPiiAudioOptionsReturnRedactedNoSpeechAudioDefault,
11763
11747
  createTranscriptBodyRedactPiiDefault: () => createTranscriptBodyRedactPiiDefault,
11748
+ createTranscriptBodyRedactPiiReturnUnredactedDefault: () => createTranscriptBodyRedactPiiReturnUnredactedDefault,
11764
11749
  createTranscriptBodySentimentAnalysisDefault: () => createTranscriptBodySentimentAnalysisDefault,
11765
11750
  createTranscriptBodySpeakerLabelsDefault: () => createTranscriptBodySpeakerLabelsDefault,
11766
11751
  createTranscriptBodySpeakerOptionsMinSpeakersExpectedDefault: () => createTranscriptBodySpeakerOptionsMinSpeakersExpectedDefault,
@@ -11831,6 +11816,7 @@ var createTranscriptBodyPunctuateDefault = true;
11831
11816
  var createTranscriptBodyRedactPiiDefault = false;
11832
11817
  var createTranscriptBodyRedactPiiAudioDefault = false;
11833
11818
  var createTranscriptBodyRedactPiiAudioOptionsReturnRedactedNoSpeechAudioDefault = false;
11819
+ var createTranscriptBodyRedactPiiReturnUnredactedDefault = false;
11834
11820
  var createTranscriptBodySentimentAnalysisDefault = false;
11835
11821
  var createTranscriptBodySpeakerLabelsDefault = false;
11836
11822
  var createTranscriptBodySpeakerOptionsMinSpeakersExpectedDefault = 1;
@@ -11869,7 +11855,7 @@ var createTranscriptBody = import_zod3.z.object({
11869
11855
  "Customize how words are spelled and formatted using to and from values. See [Custom Spelling](https://www.assemblyai.com/docs/pre-recorded-audio/correct-spelling-of-terms) for more details."
11870
11856
  ),
11871
11857
  disfluencies: import_zod3.z.boolean().optional().describe(
11872
- 'Transcribe [Filler Words](https://www.assemblyai.com/docs/pre-recorded-audio/include-filler-words), like "umm", in your media file; can be true or false'
11858
+ 'Transcribe [Filler Words](https://www.assemblyai.com/docs/pre-recorded-audio/include-filler-words), like "umm", in your media file; can be true or false. Supported on Universal-3 Pro and Universal-2.'
11873
11859
  ),
11874
11860
  domain: import_zod3.z.string().nullish().describe(
11875
11861
  'Enable domain-specific transcription models to improve accuracy for specialized terminology. Set to `"medical-v1"` to enable [Medical Mode](https://www.assemblyai.com/docs/pre-recorded-audio/medical-mode) for improved accuracy of medical terms such as medications, procedures, conditions, and dosages.\n\nSupported languages: English (`en`), Spanish (`es`), German (`de`), French (`fr`). If used with an unsupported language, the parameter is ignored and a warning is returned.\n'
@@ -12176,12 +12162,20 @@ var createTranscriptBody = import_zod3.z.object({
12176
12162
  "email_address",
12177
12163
  "event",
12178
12164
  "filename",
12165
+ "gender",
12179
12166
  "gender_sexuality",
12180
12167
  "healthcare_number",
12181
12168
  "injury",
12182
12169
  "ip_address",
12183
12170
  "language",
12184
12171
  "location",
12172
+ "location_address",
12173
+ "location_address_street",
12174
+ "location_city",
12175
+ "location_coordinate",
12176
+ "location_country",
12177
+ "location_state",
12178
+ "location_zip",
12185
12179
  "marital_status",
12186
12180
  "medical_condition",
12187
12181
  "medical_process",
@@ -12190,6 +12184,7 @@ var createTranscriptBody = import_zod3.z.object({
12190
12184
  "number_sequence",
12191
12185
  "occupation",
12192
12186
  "organization",
12187
+ "organization_medical_facility",
12193
12188
  "passport_number",
12194
12189
  "password",
12195
12190
  "person_age",
@@ -12198,6 +12193,7 @@ var createTranscriptBody = import_zod3.z.object({
12198
12193
  "physical_attribute",
12199
12194
  "political_affiliation",
12200
12195
  "religion",
12196
+ "sexuality",
12201
12197
  "statistics",
12202
12198
  "time",
12203
12199
  "url",
@@ -12205,15 +12201,20 @@ var createTranscriptBody = import_zod3.z.object({
12205
12201
  "username",
12206
12202
  "vehicle_id",
12207
12203
  "zodiac_sign"
12208
- ]).describe("The type of PII to redact")
12204
+ ]).describe(
12205
+ "The type of PII to redact. See [PII redaction](https://www.assemblyai.com/docs/streaming/pii-redaction) for the full list of policies and their descriptions."
12206
+ )
12209
12207
  ).optional().describe(
12210
12208
  "The list of PII Redaction policies to enable. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more details."
12211
12209
  ),
12212
12210
  redact_pii_sub: import_zod3.z.enum(["entity_name", "hash"]).describe(
12213
- "The replacement logic for detected PII, can be `entity_name` or `hash`. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more details."
12211
+ "The replacement logic for detected PII, can be `entity_name` or `hash`. See [PII redaction](https://www.assemblyai.com/docs/streaming/pii-redaction) for more details."
12214
12212
  ).or(import_zod3.z.null()).optional().describe(
12215
12213
  "The replacement logic for detected PII, can be `entity_type` or `hash`. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more details."
12216
12214
  ),
12215
+ redact_pii_return_unredacted: import_zod3.z.boolean().optional().describe(
12216
+ "When set to `true`, returns the original unredacted transcript alongside the redacted one in the same response. Requires `redact_pii` to be `true`, otherwise a 400 error is returned.\n\nWhen enabled, the response includes the additional fields `unredacted_text`, `unredacted_words`, and `unredacted_utterances`. The existing `text`, `words`, and `utterances` fields remain fully redacted. When disabled (default), the response is unchanged and contains only the redacted transcript. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more details.\n"
12217
+ ),
12217
12218
  sentiment_analysis: import_zod3.z.boolean().optional().describe(
12218
12219
  "Enable [Sentiment Analysis](https://www.assemblyai.com/docs/speech-understanding/analyze-sentiment-of-speech), can be true or false"
12219
12220
  ),
@@ -12311,10 +12312,10 @@ var createTranscriptBody = import_zod3.z.object({
12311
12312
  ),
12312
12313
  summary_model: import_zod3.z.enum(["informative", "conversational", "catchy"]).optional().describe("The model to summarize the transcript"),
12313
12314
  summary_type: import_zod3.z.enum(["bullets", "bullets_verbose", "gist", "headline", "paragraph"]).optional().describe("The type of summary"),
12314
- remove_audio_tags: import_zod3.z.enum(["all"]).describe(
12315
- 'Remove [audio event tags](https://www.assemblyai.com/docs/pre-recorded-audio/universal-3-pro#audio-event-tags) from the transcript text. Set to `"all"` to remove all audio tags.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n'
12315
+ remove_audio_tags: import_zod3.z.enum(["all", "speaker"]).describe(
12316
+ 'Universal-3 Pro generates rich transcripts that can include inline annotations such as audio event markers and speaker cues. Set to `"all"` to remove all inline annotations, or `"speaker"` to remove only speaker cues while keeping other annotations.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n'
12316
12317
  ).or(import_zod3.z.null()).optional().describe(
12317
- 'Remove [audio event tags](https://www.assemblyai.com/docs/pre-recorded-audio/universal-3-pro#audio-event-tags) from the transcript text. Set to `"all"` to remove all audio tags.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n'
12318
+ 'Universal-3 Pro generates rich transcripts that can include inline annotations such as audio event markers and speaker cues. Set to `"all"` to remove all inline annotations, or `"speaker"` to remove only speaker cues while keeping other annotations.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n'
12318
12319
  ),
12319
12320
  temperature: import_zod3.z.number().optional().describe(
12320
12321
  "Control the amount of randomness injected into the model's response. See the [Prompting Guide](https://www.assemblyai.com/docs/pre-recorded-audio/prompting) for more details.\n\nNote: This parameter can only be used with the Universal-3 Pro model.\n"
@@ -12448,7 +12449,7 @@ var createTranscriptResponse = import_zod3.z.object({
12448
12449
  "Customize how words are spelled and formatted using to and from values. See [Custom Spelling](https://www.assemblyai.com/docs/pre-recorded-audio/correct-spelling-of-terms) for more details."
12449
12450
  ),
12450
12451
  disfluencies: import_zod3.z.boolean().nullish().describe(
12451
- 'Transcribe [Filler Words](https://www.assemblyai.com/docs/pre-recorded-audio/include-filler-words), like "umm", in your media file; can be true or false'
12452
+ 'Transcribe [Filler Words](https://www.assemblyai.com/docs/pre-recorded-audio/include-filler-words), like "umm", in your media file; can be true or false. Supported on Universal-3 Pro and Universal-2.'
12452
12453
  ),
12453
12454
  domain: import_zod3.z.string().nullish().describe(
12454
12455
  'The domain-specific model applied to the transcript. When set to `"medical-v1"`, [Medical Mode](https://www.assemblyai.com/docs/pre-recorded-audio/medical-mode) was used to improve accuracy for medical terminology.\n'
@@ -12471,12 +12472,20 @@ var createTranscriptResponse = import_zod3.z.object({
12471
12472
  "email_address",
12472
12473
  "event",
12473
12474
  "filename",
12475
+ "gender",
12474
12476
  "gender_sexuality",
12475
12477
  "healthcare_number",
12476
12478
  "injury",
12477
12479
  "ip_address",
12478
12480
  "language",
12479
12481
  "location",
12482
+ "location_address",
12483
+ "location_address_street",
12484
+ "location_city",
12485
+ "location_coordinate",
12486
+ "location_country",
12487
+ "location_state",
12488
+ "location_zip",
12480
12489
  "marital_status",
12481
12490
  "medical_condition",
12482
12491
  "medical_process",
@@ -12485,6 +12494,7 @@ var createTranscriptResponse = import_zod3.z.object({
12485
12494
  "number_sequence",
12486
12495
  "occupation",
12487
12496
  "organization",
12497
+ "organization_medical_facility",
12488
12498
  "passport_number",
12489
12499
  "password",
12490
12500
  "person_age",
@@ -12493,6 +12503,7 @@ var createTranscriptResponse = import_zod3.z.object({
12493
12503
  "physical_attribute",
12494
12504
  "political_affiliation",
12495
12505
  "religion",
12506
+ "sexuality",
12496
12507
  "statistics",
12497
12508
  "time",
12498
12509
  "url",
@@ -12797,6 +12808,24 @@ var createTranscriptResponse = import_zod3.z.object({
12797
12808
  }).optional().describe(
12798
12809
  "Specify options for [Automatic Language Detection](https://www.assemblyai.com/docs/pre-recorded-audio/language-detection)."
12799
12810
  ),
12811
+ metadata: import_zod3.z.object({
12812
+ domain_used: import_zod3.z.string().nullish().describe(
12813
+ 'The domain-specific model that was applied to the transcription (for example, `"medical-v1"` when [Medical Mode](https://www.assemblyai.com/docs/pre-recorded-audio/medical-mode) was applied), or `null` if no domain-specific model was used. Always present when `metadata` is present.\n'
12814
+ ),
12815
+ warnings: import_zod3.z.array(
12816
+ import_zod3.z.object({
12817
+ message: import_zod3.z.string().describe("A human-readable description of the warning.")
12818
+ }).describe(
12819
+ "A warning message emitted while processing a transcription request. Warnings are surfaced on the transcript response under `metadata.warnings`.\n"
12820
+ )
12821
+ ).optional().describe(
12822
+ "Warning messages emitted while processing the request. Each warning is an object with a human-readable `message`. When there are no warnings to report, this field is omitted from the `metadata` object entirely.\n"
12823
+ )
12824
+ }).describe(
12825
+ "Additional metadata about the transcription returned on the `Transcript` object under `metadata`. Only present when there is information to report \u2014 when all of its fields would be empty, the `metadata` object is omitted from the response entirely.\n"
12826
+ ).or(import_zod3.z.null()).optional().describe(
12827
+ "Additional metadata about the transcription, including any warnings emitted while processing the request. Only present when there is information to report; if no fields would be populated, `metadata` is omitted from the response entirely.\n"
12828
+ ),
12800
12829
  multichannel: import_zod3.z.boolean().nullish().describe(
12801
12830
  "Whether [Multichannel transcription](https://www.assemblyai.com/docs/pre-recorded-audio/transcribe-multiple-audio-channels) was enabled in the transcription request, either true or false"
12802
12831
  ),
@@ -12844,12 +12873,20 @@ var createTranscriptResponse = import_zod3.z.object({
12844
12873
  "email_address",
12845
12874
  "event",
12846
12875
  "filename",
12876
+ "gender",
12847
12877
  "gender_sexuality",
12848
12878
  "healthcare_number",
12849
12879
  "injury",
12850
12880
  "ip_address",
12851
12881
  "language",
12852
12882
  "location",
12883
+ "location_address",
12884
+ "location_address_street",
12885
+ "location_city",
12886
+ "location_coordinate",
12887
+ "location_country",
12888
+ "location_state",
12889
+ "location_zip",
12853
12890
  "marital_status",
12854
12891
  "medical_condition",
12855
12892
  "medical_process",
@@ -12858,6 +12895,7 @@ var createTranscriptResponse = import_zod3.z.object({
12858
12895
  "number_sequence",
12859
12896
  "occupation",
12860
12897
  "organization",
12898
+ "organization_medical_facility",
12861
12899
  "passport_number",
12862
12900
  "password",
12863
12901
  "person_age",
@@ -12866,6 +12904,7 @@ var createTranscriptResponse = import_zod3.z.object({
12866
12904
  "physical_attribute",
12867
12905
  "political_affiliation",
12868
12906
  "religion",
12907
+ "sexuality",
12869
12908
  "statistics",
12870
12909
  "time",
12871
12910
  "url",
@@ -12873,12 +12912,17 @@ var createTranscriptResponse = import_zod3.z.object({
12873
12912
  "username",
12874
12913
  "vehicle_id",
12875
12914
  "zodiac_sign"
12876
- ]).describe("The type of PII to redact")
12915
+ ]).describe(
12916
+ "The type of PII to redact. See [PII redaction](https://www.assemblyai.com/docs/streaming/pii-redaction) for the full list of policies and their descriptions."
12917
+ )
12877
12918
  ).nullish().describe(
12878
12919
  "The list of PII Redaction policies that were enabled, if PII Redaction is enabled.\nSee [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
12879
12920
  ),
12880
12921
  redact_pii_sub: import_zod3.z.enum(["entity_name", "hash"]).optional().describe(
12881
- "The replacement logic for detected PII, can be `entity_name` or `hash`. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more details."
12922
+ "The replacement logic for detected PII, can be `entity_name` or `hash`. See [PII redaction](https://www.assemblyai.com/docs/streaming/pii-redaction) for more details."
12923
+ ),
12924
+ redact_pii_return_unredacted: import_zod3.z.boolean().nullish().describe(
12925
+ "Whether the original unredacted transcript was also returned alongside the redacted one. When `true`, the response includes `unredacted_text`, `unredacted_words`, and `unredacted_utterances`. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
12882
12926
  ),
12883
12927
  sentiment_analysis: import_zod3.z.boolean().nullish().describe(
12884
12928
  "Whether [Sentiment Analysis](https://www.assemblyai.com/docs/speech-understanding/analyze-sentiment-of-speech) is enabled, can be true or false"
@@ -13015,20 +13059,23 @@ var createTranscriptResponse = import_zod3.z.object({
13015
13059
  "The generated summary of the media file, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details."
13016
13060
  ),
13017
13061
  summary_model: import_zod3.z.string().nullish().describe(
13018
- "The Summarization model used to generate the summary,\nif [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts#summary-models) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details.\n"
13062
+ "The Summarization model used to generate the summary,\nif [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details.\n"
13019
13063
  ),
13020
13064
  summary_type: import_zod3.z.string().nullish().describe(
13021
- "The type of summary generated, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts#summary-types) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details."
13065
+ "The type of summary generated, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details."
13022
13066
  ),
13023
- remove_audio_tags: import_zod3.z.enum(["all"]).describe(
13024
- "Whether [audio event tags](https://www.assemblyai.com/docs/pre-recorded-audio/universal-3-pro#audio-event-tags) were removed from the transcript text.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n"
13067
+ remove_audio_tags: import_zod3.z.enum(["all", "speaker"]).describe(
13068
+ 'Universal-3 Pro generates rich transcripts that can include inline annotations such as audio event markers and speaker cues. Set to `"all"` to remove all inline annotations, or `"speaker"` to remove only speaker cues while keeping other annotations.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n'
13025
13069
  ).or(import_zod3.z.null()).optional().describe(
13026
- "Whether [audio event tags](https://www.assemblyai.com/docs/pre-recorded-audio/universal-3-pro#audio-event-tags) were removed from the transcript text.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n"
13070
+ 'Universal-3 Pro generates rich transcripts that can include inline annotations such as audio event markers and speaker cues. Set to `"all"` to remove all inline annotations, or `"speaker"` to remove only speaker cues while keeping other annotations.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n'
13027
13071
  ),
13028
13072
  temperature: import_zod3.z.number().nullish().describe(
13029
13073
  "The temperature that was used for the model's response. See the [Prompting Guide](https://www.assemblyai.com/docs/pre-recorded-audio/prompting) for more details.\n\nNote: This parameter can only be used with the Universal-3 Pro model.\n"
13030
13074
  ),
13031
13075
  text: import_zod3.z.string().nullish().describe("The textual transcript of your media file"),
13076
+ unredacted_text: import_zod3.z.string().nullish().describe(
13077
+ "The original textual transcript of your media file before PII redaction was applied. Only returned when `redact_pii_return_unredacted` was set to `true` on the transcription request, otherwise this field is omitted and the `text` field remains fully redacted. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
13078
+ ),
13032
13079
  throttled: import_zod3.z.boolean().nullish().describe(
13033
13080
  "True while a request is throttled and false when a request is no longer throttled"
13034
13081
  ),
@@ -13065,6 +13112,39 @@ var createTranscriptResponse = import_zod3.z.object({
13065
13112
  ).nullish().describe(
13066
13113
  "When multichannel or speaker_labels is enabled, a list of turn-by-turn utterance objects.\nSee [Speaker diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) and [Multichannel transcription](https://www.assemblyai.com/docs/pre-recorded-audio/transcribe-multiple-audio-channels) for more information.\n"
13067
13114
  ),
13115
+ unredacted_utterances: import_zod3.z.array(
13116
+ import_zod3.z.object({
13117
+ confidence: import_zod3.z.number().describe("The confidence score for the transcript of this utterance"),
13118
+ start: import_zod3.z.number().describe("The starting time, in milliseconds, of the utterance in the audio file"),
13119
+ end: import_zod3.z.number().describe("The ending time, in milliseconds, of the utterance in the audio file"),
13120
+ text: import_zod3.z.string().describe("The text for this utterance"),
13121
+ words: import_zod3.z.array(
13122
+ import_zod3.z.object({
13123
+ confidence: import_zod3.z.number().describe("The confidence score for the transcript of this word"),
13124
+ start: import_zod3.z.number().describe("The starting time, in milliseconds, for the word"),
13125
+ end: import_zod3.z.number().describe("The ending time, in milliseconds, for the word"),
13126
+ text: import_zod3.z.string().describe("The text of the word"),
13127
+ channel: import_zod3.z.string().nullish().describe(
13128
+ "The channel of the word. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
13129
+ ),
13130
+ speaker: import_zod3.z.string().nullable().describe(
13131
+ "The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) is enabled, else null"
13132
+ )
13133
+ })
13134
+ ).describe("The words in the utterance."),
13135
+ channel: import_zod3.z.string().nullish().describe(
13136
+ "The channel of this utterance. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
13137
+ ),
13138
+ speaker: import_zod3.z.string().describe(
13139
+ 'The speaker of this utterance, where each speaker is assigned a sequential capital letter - e.g. "A" for Speaker A, "B" for Speaker B, etc.'
13140
+ ),
13141
+ translated_texts: import_zod3.z.record(import_zod3.z.string(), import_zod3.z.string()).optional().describe(
13142
+ 'Translations keyed by language code (e.g., `{"es": "Texto traducido", "de": "\xDCbersetzter Text"}`). Only present when `match_original_utterance` is enabled with translation.'
13143
+ )
13144
+ })
13145
+ ).nullish().describe(
13146
+ "The original turn-by-turn utterance objects before PII redaction was applied. Same shape as `utterances`. Only returned when `redact_pii_return_unredacted` was set to `true` on the transcription request, otherwise this field is omitted and the `utterances` field remains fully redacted. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
13147
+ ),
13068
13148
  webhook_auth: import_zod3.z.boolean().describe(
13069
13149
  "Whether [webhook](https://www.assemblyai.com/docs/deployment/webhooks-for-pre-recorded-audio) authentication details were provided"
13070
13150
  ),
@@ -13093,6 +13173,22 @@ var createTranscriptResponse = import_zod3.z.object({
13093
13173
  ).nullish().describe(
13094
13174
  "An array of temporally-sequential word objects, one for each word in the transcript.\n"
13095
13175
  ),
13176
+ unredacted_words: import_zod3.z.array(
13177
+ import_zod3.z.object({
13178
+ confidence: import_zod3.z.number().describe("The confidence score for the transcript of this word"),
13179
+ start: import_zod3.z.number().describe("The starting time, in milliseconds, for the word"),
13180
+ end: import_zod3.z.number().describe("The ending time, in milliseconds, for the word"),
13181
+ text: import_zod3.z.string().describe("The text of the word"),
13182
+ channel: import_zod3.z.string().nullish().describe(
13183
+ "The channel of the word. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
13184
+ ),
13185
+ speaker: import_zod3.z.string().nullable().describe(
13186
+ "The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) is enabled, else null"
13187
+ )
13188
+ })
13189
+ ).nullish().describe(
13190
+ "The original temporally-sequential word objects before PII redaction was applied. Same shape as `words`. Only returned when `redact_pii_return_unredacted` was set to `true` on the transcription request, otherwise this field is omitted and the `words` field remains fully redacted. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
13191
+ ),
13096
13192
  acoustic_model: import_zod3.z.string().describe("This parameter does not currently have any functionality attached to it."),
13097
13193
  custom_topics: import_zod3.z.boolean().nullish().describe("This parameter does not currently have any functionality attached to it."),
13098
13194
  language_model: import_zod3.z.string().describe("This parameter does not currently have any functionality attached to it."),
@@ -13268,7 +13364,7 @@ var getTranscriptResponse = import_zod3.z.object({
13268
13364
  "Customize how words are spelled and formatted using to and from values. See [Custom Spelling](https://www.assemblyai.com/docs/pre-recorded-audio/correct-spelling-of-terms) for more details."
13269
13365
  ),
13270
13366
  disfluencies: import_zod3.z.boolean().nullish().describe(
13271
- 'Transcribe [Filler Words](https://www.assemblyai.com/docs/pre-recorded-audio/include-filler-words), like "umm", in your media file; can be true or false'
13367
+ 'Transcribe [Filler Words](https://www.assemblyai.com/docs/pre-recorded-audio/include-filler-words), like "umm", in your media file; can be true or false. Supported on Universal-3 Pro and Universal-2.'
13272
13368
  ),
13273
13369
  domain: import_zod3.z.string().nullish().describe(
13274
13370
  'The domain-specific model applied to the transcript. When set to `"medical-v1"`, [Medical Mode](https://www.assemblyai.com/docs/pre-recorded-audio/medical-mode) was used to improve accuracy for medical terminology.\n'
@@ -13291,12 +13387,20 @@ var getTranscriptResponse = import_zod3.z.object({
13291
13387
  "email_address",
13292
13388
  "event",
13293
13389
  "filename",
13390
+ "gender",
13294
13391
  "gender_sexuality",
13295
13392
  "healthcare_number",
13296
13393
  "injury",
13297
13394
  "ip_address",
13298
13395
  "language",
13299
13396
  "location",
13397
+ "location_address",
13398
+ "location_address_street",
13399
+ "location_city",
13400
+ "location_coordinate",
13401
+ "location_country",
13402
+ "location_state",
13403
+ "location_zip",
13300
13404
  "marital_status",
13301
13405
  "medical_condition",
13302
13406
  "medical_process",
@@ -13305,6 +13409,7 @@ var getTranscriptResponse = import_zod3.z.object({
13305
13409
  "number_sequence",
13306
13410
  "occupation",
13307
13411
  "organization",
13412
+ "organization_medical_facility",
13308
13413
  "passport_number",
13309
13414
  "password",
13310
13415
  "person_age",
@@ -13313,6 +13418,7 @@ var getTranscriptResponse = import_zod3.z.object({
13313
13418
  "physical_attribute",
13314
13419
  "political_affiliation",
13315
13420
  "religion",
13421
+ "sexuality",
13316
13422
  "statistics",
13317
13423
  "time",
13318
13424
  "url",
@@ -13617,6 +13723,24 @@ var getTranscriptResponse = import_zod3.z.object({
13617
13723
  }).optional().describe(
13618
13724
  "Specify options for [Automatic Language Detection](https://www.assemblyai.com/docs/pre-recorded-audio/language-detection)."
13619
13725
  ),
13726
+ metadata: import_zod3.z.object({
13727
+ domain_used: import_zod3.z.string().nullish().describe(
13728
+ 'The domain-specific model that was applied to the transcription (for example, `"medical-v1"` when [Medical Mode](https://www.assemblyai.com/docs/pre-recorded-audio/medical-mode) was applied), or `null` if no domain-specific model was used. Always present when `metadata` is present.\n'
13729
+ ),
13730
+ warnings: import_zod3.z.array(
13731
+ import_zod3.z.object({
13732
+ message: import_zod3.z.string().describe("A human-readable description of the warning.")
13733
+ }).describe(
13734
+ "A warning message emitted while processing a transcription request. Warnings are surfaced on the transcript response under `metadata.warnings`.\n"
13735
+ )
13736
+ ).optional().describe(
13737
+ "Warning messages emitted while processing the request. Each warning is an object with a human-readable `message`. When there are no warnings to report, this field is omitted from the `metadata` object entirely.\n"
13738
+ )
13739
+ }).describe(
13740
+ "Additional metadata about the transcription returned on the `Transcript` object under `metadata`. Only present when there is information to report \u2014 when all of its fields would be empty, the `metadata` object is omitted from the response entirely.\n"
13741
+ ).or(import_zod3.z.null()).optional().describe(
13742
+ "Additional metadata about the transcription, including any warnings emitted while processing the request. Only present when there is information to report; if no fields would be populated, `metadata` is omitted from the response entirely.\n"
13743
+ ),
13620
13744
  multichannel: import_zod3.z.boolean().nullish().describe(
13621
13745
  "Whether [Multichannel transcription](https://www.assemblyai.com/docs/pre-recorded-audio/transcribe-multiple-audio-channels) was enabled in the transcription request, either true or false"
13622
13746
  ),
@@ -13664,12 +13788,20 @@ var getTranscriptResponse = import_zod3.z.object({
13664
13788
  "email_address",
13665
13789
  "event",
13666
13790
  "filename",
13791
+ "gender",
13667
13792
  "gender_sexuality",
13668
13793
  "healthcare_number",
13669
13794
  "injury",
13670
13795
  "ip_address",
13671
13796
  "language",
13672
13797
  "location",
13798
+ "location_address",
13799
+ "location_address_street",
13800
+ "location_city",
13801
+ "location_coordinate",
13802
+ "location_country",
13803
+ "location_state",
13804
+ "location_zip",
13673
13805
  "marital_status",
13674
13806
  "medical_condition",
13675
13807
  "medical_process",
@@ -13678,6 +13810,7 @@ var getTranscriptResponse = import_zod3.z.object({
13678
13810
  "number_sequence",
13679
13811
  "occupation",
13680
13812
  "organization",
13813
+ "organization_medical_facility",
13681
13814
  "passport_number",
13682
13815
  "password",
13683
13816
  "person_age",
@@ -13686,6 +13819,7 @@ var getTranscriptResponse = import_zod3.z.object({
13686
13819
  "physical_attribute",
13687
13820
  "political_affiliation",
13688
13821
  "religion",
13822
+ "sexuality",
13689
13823
  "statistics",
13690
13824
  "time",
13691
13825
  "url",
@@ -13693,12 +13827,17 @@ var getTranscriptResponse = import_zod3.z.object({
13693
13827
  "username",
13694
13828
  "vehicle_id",
13695
13829
  "zodiac_sign"
13696
- ]).describe("The type of PII to redact")
13830
+ ]).describe(
13831
+ "The type of PII to redact. See [PII redaction](https://www.assemblyai.com/docs/streaming/pii-redaction) for the full list of policies and their descriptions."
13832
+ )
13697
13833
  ).nullish().describe(
13698
13834
  "The list of PII Redaction policies that were enabled, if PII Redaction is enabled.\nSee [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
13699
13835
  ),
13700
13836
  redact_pii_sub: import_zod3.z.enum(["entity_name", "hash"]).optional().describe(
13701
- "The replacement logic for detected PII, can be `entity_name` or `hash`. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more details."
13837
+ "The replacement logic for detected PII, can be `entity_name` or `hash`. See [PII redaction](https://www.assemblyai.com/docs/streaming/pii-redaction) for more details."
13838
+ ),
13839
+ redact_pii_return_unredacted: import_zod3.z.boolean().nullish().describe(
13840
+ "Whether the original unredacted transcript was also returned alongside the redacted one. When `true`, the response includes `unredacted_text`, `unredacted_words`, and `unredacted_utterances`. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
13702
13841
  ),
13703
13842
  sentiment_analysis: import_zod3.z.boolean().nullish().describe(
13704
13843
  "Whether [Sentiment Analysis](https://www.assemblyai.com/docs/speech-understanding/analyze-sentiment-of-speech) is enabled, can be true or false"
@@ -13835,20 +13974,23 @@ var getTranscriptResponse = import_zod3.z.object({
13835
13974
  "The generated summary of the media file, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details."
13836
13975
  ),
13837
13976
  summary_model: import_zod3.z.string().nullish().describe(
13838
- "The Summarization model used to generate the summary,\nif [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts#summary-models) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details.\n"
13977
+ "The Summarization model used to generate the summary,\nif [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details.\n"
13839
13978
  ),
13840
13979
  summary_type: import_zod3.z.string().nullish().describe(
13841
- "The type of summary generated, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts#summary-types) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details."
13980
+ "The type of summary generated, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details."
13842
13981
  ),
13843
- remove_audio_tags: import_zod3.z.enum(["all"]).describe(
13844
- "Whether [audio event tags](https://www.assemblyai.com/docs/pre-recorded-audio/universal-3-pro#audio-event-tags) were removed from the transcript text.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n"
13982
+ remove_audio_tags: import_zod3.z.enum(["all", "speaker"]).describe(
13983
+ 'Universal-3 Pro generates rich transcripts that can include inline annotations such as audio event markers and speaker cues. Set to `"all"` to remove all inline annotations, or `"speaker"` to remove only speaker cues while keeping other annotations.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n'
13845
13984
  ).or(import_zod3.z.null()).optional().describe(
13846
- "Whether [audio event tags](https://www.assemblyai.com/docs/pre-recorded-audio/universal-3-pro#audio-event-tags) were removed from the transcript text.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n"
13985
+ 'Universal-3 Pro generates rich transcripts that can include inline annotations such as audio event markers and speaker cues. Set to `"all"` to remove all inline annotations, or `"speaker"` to remove only speaker cues while keeping other annotations.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n'
13847
13986
  ),
13848
13987
  temperature: import_zod3.z.number().nullish().describe(
13849
13988
  "The temperature that was used for the model's response. See the [Prompting Guide](https://www.assemblyai.com/docs/pre-recorded-audio/prompting) for more details.\n\nNote: This parameter can only be used with the Universal-3 Pro model.\n"
13850
13989
  ),
13851
13990
  text: import_zod3.z.string().nullish().describe("The textual transcript of your media file"),
13991
+ unredacted_text: import_zod3.z.string().nullish().describe(
13992
+ "The original textual transcript of your media file before PII redaction was applied. Only returned when `redact_pii_return_unredacted` was set to `true` on the transcription request, otherwise this field is omitted and the `text` field remains fully redacted. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
13993
+ ),
13852
13994
  throttled: import_zod3.z.boolean().nullish().describe(
13853
13995
  "True while a request is throttled and false when a request is no longer throttled"
13854
13996
  ),
@@ -13885,6 +14027,39 @@ var getTranscriptResponse = import_zod3.z.object({
13885
14027
  ).nullish().describe(
13886
14028
  "When multichannel or speaker_labels is enabled, a list of turn-by-turn utterance objects.\nSee [Speaker diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) and [Multichannel transcription](https://www.assemblyai.com/docs/pre-recorded-audio/transcribe-multiple-audio-channels) for more information.\n"
13887
14029
  ),
14030
+ unredacted_utterances: import_zod3.z.array(
14031
+ import_zod3.z.object({
14032
+ confidence: import_zod3.z.number().describe("The confidence score for the transcript of this utterance"),
14033
+ start: import_zod3.z.number().describe("The starting time, in milliseconds, of the utterance in the audio file"),
14034
+ end: import_zod3.z.number().describe("The ending time, in milliseconds, of the utterance in the audio file"),
14035
+ text: import_zod3.z.string().describe("The text for this utterance"),
14036
+ words: import_zod3.z.array(
14037
+ import_zod3.z.object({
14038
+ confidence: import_zod3.z.number().describe("The confidence score for the transcript of this word"),
14039
+ start: import_zod3.z.number().describe("The starting time, in milliseconds, for the word"),
14040
+ end: import_zod3.z.number().describe("The ending time, in milliseconds, for the word"),
14041
+ text: import_zod3.z.string().describe("The text of the word"),
14042
+ channel: import_zod3.z.string().nullish().describe(
14043
+ "The channel of the word. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
14044
+ ),
14045
+ speaker: import_zod3.z.string().nullable().describe(
14046
+ "The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) is enabled, else null"
14047
+ )
14048
+ })
14049
+ ).describe("The words in the utterance."),
14050
+ channel: import_zod3.z.string().nullish().describe(
14051
+ "The channel of this utterance. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
14052
+ ),
14053
+ speaker: import_zod3.z.string().describe(
14054
+ 'The speaker of this utterance, where each speaker is assigned a sequential capital letter - e.g. "A" for Speaker A, "B" for Speaker B, etc.'
14055
+ ),
14056
+ translated_texts: import_zod3.z.record(import_zod3.z.string(), import_zod3.z.string()).optional().describe(
14057
+ 'Translations keyed by language code (e.g., `{"es": "Texto traducido", "de": "\xDCbersetzter Text"}`). Only present when `match_original_utterance` is enabled with translation.'
14058
+ )
14059
+ })
14060
+ ).nullish().describe(
14061
+ "The original turn-by-turn utterance objects before PII redaction was applied. Same shape as `utterances`. Only returned when `redact_pii_return_unredacted` was set to `true` on the transcription request, otherwise this field is omitted and the `utterances` field remains fully redacted. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
14062
+ ),
13888
14063
  webhook_auth: import_zod3.z.boolean().describe(
13889
14064
  "Whether [webhook](https://www.assemblyai.com/docs/deployment/webhooks-for-pre-recorded-audio) authentication details were provided"
13890
14065
  ),
@@ -13913,6 +14088,22 @@ var getTranscriptResponse = import_zod3.z.object({
13913
14088
  ).nullish().describe(
13914
14089
  "An array of temporally-sequential word objects, one for each word in the transcript.\n"
13915
14090
  ),
14091
+ unredacted_words: import_zod3.z.array(
14092
+ import_zod3.z.object({
14093
+ confidence: import_zod3.z.number().describe("The confidence score for the transcript of this word"),
14094
+ start: import_zod3.z.number().describe("The starting time, in milliseconds, for the word"),
14095
+ end: import_zod3.z.number().describe("The ending time, in milliseconds, for the word"),
14096
+ text: import_zod3.z.string().describe("The text of the word"),
14097
+ channel: import_zod3.z.string().nullish().describe(
14098
+ "The channel of the word. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
14099
+ ),
14100
+ speaker: import_zod3.z.string().nullable().describe(
14101
+ "The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) is enabled, else null"
14102
+ )
14103
+ })
14104
+ ).nullish().describe(
14105
+ "The original temporally-sequential word objects before PII redaction was applied. Same shape as `words`. Only returned when `redact_pii_return_unredacted` was set to `true` on the transcription request, otherwise this field is omitted and the `words` field remains fully redacted. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
14106
+ ),
13916
14107
  acoustic_model: import_zod3.z.string().describe("This parameter does not currently have any functionality attached to it."),
13917
14108
  custom_topics: import_zod3.z.boolean().nullish().describe("This parameter does not currently have any functionality attached to it."),
13918
14109
  language_model: import_zod3.z.string().describe("This parameter does not currently have any functionality attached to it."),
@@ -14048,7 +14239,7 @@ var deleteTranscriptResponse = import_zod3.z.object({
14048
14239
  "Customize how words are spelled and formatted using to and from values. See [Custom Spelling](https://www.assemblyai.com/docs/pre-recorded-audio/correct-spelling-of-terms) for more details."
14049
14240
  ),
14050
14241
  disfluencies: import_zod3.z.boolean().nullish().describe(
14051
- 'Transcribe [Filler Words](https://www.assemblyai.com/docs/pre-recorded-audio/include-filler-words), like "umm", in your media file; can be true or false'
14242
+ 'Transcribe [Filler Words](https://www.assemblyai.com/docs/pre-recorded-audio/include-filler-words), like "umm", in your media file; can be true or false. Supported on Universal-3 Pro and Universal-2.'
14052
14243
  ),
14053
14244
  domain: import_zod3.z.string().nullish().describe(
14054
14245
  'The domain-specific model applied to the transcript. When set to `"medical-v1"`, [Medical Mode](https://www.assemblyai.com/docs/pre-recorded-audio/medical-mode) was used to improve accuracy for medical terminology.\n'
@@ -14071,12 +14262,20 @@ var deleteTranscriptResponse = import_zod3.z.object({
14071
14262
  "email_address",
14072
14263
  "event",
14073
14264
  "filename",
14265
+ "gender",
14074
14266
  "gender_sexuality",
14075
14267
  "healthcare_number",
14076
14268
  "injury",
14077
14269
  "ip_address",
14078
14270
  "language",
14079
14271
  "location",
14272
+ "location_address",
14273
+ "location_address_street",
14274
+ "location_city",
14275
+ "location_coordinate",
14276
+ "location_country",
14277
+ "location_state",
14278
+ "location_zip",
14080
14279
  "marital_status",
14081
14280
  "medical_condition",
14082
14281
  "medical_process",
@@ -14085,6 +14284,7 @@ var deleteTranscriptResponse = import_zod3.z.object({
14085
14284
  "number_sequence",
14086
14285
  "occupation",
14087
14286
  "organization",
14287
+ "organization_medical_facility",
14088
14288
  "passport_number",
14089
14289
  "password",
14090
14290
  "person_age",
@@ -14093,6 +14293,7 @@ var deleteTranscriptResponse = import_zod3.z.object({
14093
14293
  "physical_attribute",
14094
14294
  "political_affiliation",
14095
14295
  "religion",
14296
+ "sexuality",
14096
14297
  "statistics",
14097
14298
  "time",
14098
14299
  "url",
@@ -14397,6 +14598,24 @@ var deleteTranscriptResponse = import_zod3.z.object({
14397
14598
  }).optional().describe(
14398
14599
  "Specify options for [Automatic Language Detection](https://www.assemblyai.com/docs/pre-recorded-audio/language-detection)."
14399
14600
  ),
14601
+ metadata: import_zod3.z.object({
14602
+ domain_used: import_zod3.z.string().nullish().describe(
14603
+ 'The domain-specific model that was applied to the transcription (for example, `"medical-v1"` when [Medical Mode](https://www.assemblyai.com/docs/pre-recorded-audio/medical-mode) was applied), or `null` if no domain-specific model was used. Always present when `metadata` is present.\n'
14604
+ ),
14605
+ warnings: import_zod3.z.array(
14606
+ import_zod3.z.object({
14607
+ message: import_zod3.z.string().describe("A human-readable description of the warning.")
14608
+ }).describe(
14609
+ "A warning message emitted while processing a transcription request. Warnings are surfaced on the transcript response under `metadata.warnings`.\n"
14610
+ )
14611
+ ).optional().describe(
14612
+ "Warning messages emitted while processing the request. Each warning is an object with a human-readable `message`. When there are no warnings to report, this field is omitted from the `metadata` object entirely.\n"
14613
+ )
14614
+ }).describe(
14615
+ "Additional metadata about the transcription returned on the `Transcript` object under `metadata`. Only present when there is information to report \u2014 when all of its fields would be empty, the `metadata` object is omitted from the response entirely.\n"
14616
+ ).or(import_zod3.z.null()).optional().describe(
14617
+ "Additional metadata about the transcription, including any warnings emitted while processing the request. Only present when there is information to report; if no fields would be populated, `metadata` is omitted from the response entirely.\n"
14618
+ ),
14400
14619
  multichannel: import_zod3.z.boolean().nullish().describe(
14401
14620
  "Whether [Multichannel transcription](https://www.assemblyai.com/docs/pre-recorded-audio/transcribe-multiple-audio-channels) was enabled in the transcription request, either true or false"
14402
14621
  ),
@@ -14444,12 +14663,20 @@ var deleteTranscriptResponse = import_zod3.z.object({
14444
14663
  "email_address",
14445
14664
  "event",
14446
14665
  "filename",
14666
+ "gender",
14447
14667
  "gender_sexuality",
14448
14668
  "healthcare_number",
14449
14669
  "injury",
14450
14670
  "ip_address",
14451
14671
  "language",
14452
14672
  "location",
14673
+ "location_address",
14674
+ "location_address_street",
14675
+ "location_city",
14676
+ "location_coordinate",
14677
+ "location_country",
14678
+ "location_state",
14679
+ "location_zip",
14453
14680
  "marital_status",
14454
14681
  "medical_condition",
14455
14682
  "medical_process",
@@ -14458,6 +14685,7 @@ var deleteTranscriptResponse = import_zod3.z.object({
14458
14685
  "number_sequence",
14459
14686
  "occupation",
14460
14687
  "organization",
14688
+ "organization_medical_facility",
14461
14689
  "passport_number",
14462
14690
  "password",
14463
14691
  "person_age",
@@ -14466,6 +14694,7 @@ var deleteTranscriptResponse = import_zod3.z.object({
14466
14694
  "physical_attribute",
14467
14695
  "political_affiliation",
14468
14696
  "religion",
14697
+ "sexuality",
14469
14698
  "statistics",
14470
14699
  "time",
14471
14700
  "url",
@@ -14473,12 +14702,17 @@ var deleteTranscriptResponse = import_zod3.z.object({
14473
14702
  "username",
14474
14703
  "vehicle_id",
14475
14704
  "zodiac_sign"
14476
- ]).describe("The type of PII to redact")
14705
+ ]).describe(
14706
+ "The type of PII to redact. See [PII redaction](https://www.assemblyai.com/docs/streaming/pii-redaction) for the full list of policies and their descriptions."
14707
+ )
14477
14708
  ).nullish().describe(
14478
14709
  "The list of PII Redaction policies that were enabled, if PII Redaction is enabled.\nSee [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
14479
14710
  ),
14480
14711
  redact_pii_sub: import_zod3.z.enum(["entity_name", "hash"]).optional().describe(
14481
- "The replacement logic for detected PII, can be `entity_name` or `hash`. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more details."
14712
+ "The replacement logic for detected PII, can be `entity_name` or `hash`. See [PII redaction](https://www.assemblyai.com/docs/streaming/pii-redaction) for more details."
14713
+ ),
14714
+ redact_pii_return_unredacted: import_zod3.z.boolean().nullish().describe(
14715
+ "Whether the original unredacted transcript was also returned alongside the redacted one. When `true`, the response includes `unredacted_text`, `unredacted_words`, and `unredacted_utterances`. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
14482
14716
  ),
14483
14717
  sentiment_analysis: import_zod3.z.boolean().nullish().describe(
14484
14718
  "Whether [Sentiment Analysis](https://www.assemblyai.com/docs/speech-understanding/analyze-sentiment-of-speech) is enabled, can be true or false"
@@ -14615,20 +14849,23 @@ var deleteTranscriptResponse = import_zod3.z.object({
14615
14849
  "The generated summary of the media file, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details."
14616
14850
  ),
14617
14851
  summary_model: import_zod3.z.string().nullish().describe(
14618
- "The Summarization model used to generate the summary,\nif [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts#summary-models) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details.\n"
14852
+ "The Summarization model used to generate the summary,\nif [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details.\n"
14619
14853
  ),
14620
14854
  summary_type: import_zod3.z.string().nullish().describe(
14621
- "The type of summary generated, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts#summary-types) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details."
14855
+ "The type of summary generated, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details."
14622
14856
  ),
14623
- remove_audio_tags: import_zod3.z.enum(["all"]).describe(
14624
- "Whether [audio event tags](https://www.assemblyai.com/docs/pre-recorded-audio/universal-3-pro#audio-event-tags) were removed from the transcript text.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n"
14857
+ remove_audio_tags: import_zod3.z.enum(["all", "speaker"]).describe(
14858
+ 'Universal-3 Pro generates rich transcripts that can include inline annotations such as audio event markers and speaker cues. Set to `"all"` to remove all inline annotations, or `"speaker"` to remove only speaker cues while keeping other annotations.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n'
14625
14859
  ).or(import_zod3.z.null()).optional().describe(
14626
- "Whether [audio event tags](https://www.assemblyai.com/docs/pre-recorded-audio/universal-3-pro#audio-event-tags) were removed from the transcript text.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n"
14860
+ 'Universal-3 Pro generates rich transcripts that can include inline annotations such as audio event markers and speaker cues. Set to `"all"` to remove all inline annotations, or `"speaker"` to remove only speaker cues while keeping other annotations.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n'
14627
14861
  ),
14628
14862
  temperature: import_zod3.z.number().nullish().describe(
14629
14863
  "The temperature that was used for the model's response. See the [Prompting Guide](https://www.assemblyai.com/docs/pre-recorded-audio/prompting) for more details.\n\nNote: This parameter can only be used with the Universal-3 Pro model.\n"
14630
14864
  ),
14631
14865
  text: import_zod3.z.string().nullish().describe("The textual transcript of your media file"),
14866
+ unredacted_text: import_zod3.z.string().nullish().describe(
14867
+ "The original textual transcript of your media file before PII redaction was applied. Only returned when `redact_pii_return_unredacted` was set to `true` on the transcription request, otherwise this field is omitted and the `text` field remains fully redacted. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
14868
+ ),
14632
14869
  throttled: import_zod3.z.boolean().nullish().describe(
14633
14870
  "True while a request is throttled and false when a request is no longer throttled"
14634
14871
  ),
@@ -14665,6 +14902,39 @@ var deleteTranscriptResponse = import_zod3.z.object({
14665
14902
  ).nullish().describe(
14666
14903
  "When multichannel or speaker_labels is enabled, a list of turn-by-turn utterance objects.\nSee [Speaker diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) and [Multichannel transcription](https://www.assemblyai.com/docs/pre-recorded-audio/transcribe-multiple-audio-channels) for more information.\n"
14667
14904
  ),
14905
+ unredacted_utterances: import_zod3.z.array(
14906
+ import_zod3.z.object({
14907
+ confidence: import_zod3.z.number().describe("The confidence score for the transcript of this utterance"),
14908
+ start: import_zod3.z.number().describe("The starting time, in milliseconds, of the utterance in the audio file"),
14909
+ end: import_zod3.z.number().describe("The ending time, in milliseconds, of the utterance in the audio file"),
14910
+ text: import_zod3.z.string().describe("The text for this utterance"),
14911
+ words: import_zod3.z.array(
14912
+ import_zod3.z.object({
14913
+ confidence: import_zod3.z.number().describe("The confidence score for the transcript of this word"),
14914
+ start: import_zod3.z.number().describe("The starting time, in milliseconds, for the word"),
14915
+ end: import_zod3.z.number().describe("The ending time, in milliseconds, for the word"),
14916
+ text: import_zod3.z.string().describe("The text of the word"),
14917
+ channel: import_zod3.z.string().nullish().describe(
14918
+ "The channel of the word. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
14919
+ ),
14920
+ speaker: import_zod3.z.string().nullable().describe(
14921
+ "The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) is enabled, else null"
14922
+ )
14923
+ })
14924
+ ).describe("The words in the utterance."),
14925
+ channel: import_zod3.z.string().nullish().describe(
14926
+ "The channel of this utterance. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
14927
+ ),
14928
+ speaker: import_zod3.z.string().describe(
14929
+ 'The speaker of this utterance, where each speaker is assigned a sequential capital letter - e.g. "A" for Speaker A, "B" for Speaker B, etc.'
14930
+ ),
14931
+ translated_texts: import_zod3.z.record(import_zod3.z.string(), import_zod3.z.string()).optional().describe(
14932
+ 'Translations keyed by language code (e.g., `{"es": "Texto traducido", "de": "\xDCbersetzter Text"}`). Only present when `match_original_utterance` is enabled with translation.'
14933
+ )
14934
+ })
14935
+ ).nullish().describe(
14936
+ "The original turn-by-turn utterance objects before PII redaction was applied. Same shape as `utterances`. Only returned when `redact_pii_return_unredacted` was set to `true` on the transcription request, otherwise this field is omitted and the `utterances` field remains fully redacted. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
14937
+ ),
14668
14938
  webhook_auth: import_zod3.z.boolean().describe(
14669
14939
  "Whether [webhook](https://www.assemblyai.com/docs/deployment/webhooks-for-pre-recorded-audio) authentication details were provided"
14670
14940
  ),
@@ -14693,6 +14963,22 @@ var deleteTranscriptResponse = import_zod3.z.object({
14693
14963
  ).nullish().describe(
14694
14964
  "An array of temporally-sequential word objects, one for each word in the transcript.\n"
14695
14965
  ),
14966
+ unredacted_words: import_zod3.z.array(
14967
+ import_zod3.z.object({
14968
+ confidence: import_zod3.z.number().describe("The confidence score for the transcript of this word"),
14969
+ start: import_zod3.z.number().describe("The starting time, in milliseconds, for the word"),
14970
+ end: import_zod3.z.number().describe("The ending time, in milliseconds, for the word"),
14971
+ text: import_zod3.z.string().describe("The text of the word"),
14972
+ channel: import_zod3.z.string().nullish().describe(
14973
+ "The channel of the word. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
14974
+ ),
14975
+ speaker: import_zod3.z.string().nullable().describe(
14976
+ "The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) is enabled, else null"
14977
+ )
14978
+ })
14979
+ ).nullish().describe(
14980
+ "The original temporally-sequential word objects before PII redaction was applied. Same shape as `words`. Only returned when `redact_pii_return_unredacted` was set to `true` on the transcription request, otherwise this field is omitted and the `words` field remains fully redacted. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
14981
+ ),
14696
14982
  acoustic_model: import_zod3.z.string().describe("This parameter does not currently have any functionality attached to it."),
14697
14983
  custom_topics: import_zod3.z.boolean().nullish().describe("This parameter does not currently have any functionality attached to it."),
14698
14984
  language_model: import_zod3.z.string().describe("This parameter does not currently have any functionality attached to it."),
@@ -14848,7 +15134,21 @@ var streamingTranscriberParams = import_zod4.z.object({
14848
15134
  inactivityTimeout: import_zod4.z.number().optional().describe("From SDK v3"),
14849
15135
  speakerLabels: import_zod4.z.boolean().optional().describe("From SDK v3"),
14850
15136
  maxSpeakers: import_zod4.z.number().optional().describe("From SDK v3"),
14851
- llmGateway: import_zod4.z.unknown().optional().describe("From SDK v3")
15137
+ voiceFocus: import_zod4.z.unknown().optional().describe("From SDK v3"),
15138
+ voiceFocusThreshold: import_zod4.z.number().optional().describe("From SDK v3"),
15139
+ continuousPartials: import_zod4.z.boolean().optional().describe("From SDK v3"),
15140
+ interruptionDelay: import_zod4.z.number().optional().describe("From SDK v3"),
15141
+ turnLeftPadMs: import_zod4.z.number().optional().describe("From SDK v3"),
15142
+ customerSupportAudioCapture: import_zod4.z.boolean().optional().describe("From SDK v3"),
15143
+ includePartialTurns: import_zod4.z.boolean().optional().describe("From SDK v3"),
15144
+ redactPii: import_zod4.z.boolean().optional().describe("From SDK v3"),
15145
+ redactPiiPolicies: import_zod4.z.unknown().optional().describe("From SDK v3"),
15146
+ redactPiiSub: import_zod4.z.unknown().optional().describe("From SDK v3"),
15147
+ llmGateway: import_zod4.z.unknown().optional().describe("From SDK v3"),
15148
+ webhookUrl: import_zod4.z.string().optional().describe("From SDK v3"),
15149
+ webhookAuthHeaderName: import_zod4.z.string().optional().describe("From SDK v3"),
15150
+ webhookAuthHeaderValue: import_zod4.z.string().optional().describe("From SDK v3"),
15151
+ mode: import_zod4.z.unknown().describe("From SDK v3")
14852
15152
  });
14853
15153
  var streamingUpdateConfigParams = import_zod4.z.object({
14854
15154
  end_utterance_silence_threshold: import_zod4.z.number().min(0).max(2e4).optional().describe("The duration threshold in milliseconds"),
@@ -14860,7 +15160,9 @@ var streamingUpdateConfigParams = import_zod4.z.object({
14860
15160
  format_turns: import_zod4.z.boolean().optional().describe("From SDK v3"),
14861
15161
  keyterms_prompt: import_zod4.z.array(import_zod4.z.string()).optional().describe("From SDK v3"),
14862
15162
  prompt: import_zod4.z.string().optional().describe("From SDK v3"),
14863
- filter_profanity: import_zod4.z.boolean().optional().describe("From SDK v3")
15163
+ filter_profanity: import_zod4.z.boolean().optional().describe("From SDK v3"),
15164
+ interruption_delay: import_zod4.z.number().optional().describe("From SDK v3"),
15165
+ turn_left_pad_ms: import_zod4.z.number().optional().describe("From SDK v3")
14864
15166
  });
14865
15167
 
14866
15168
  // src/generated/gladia/api/gladiaControlAPI.zod.ts
@@ -15609,7 +15911,7 @@ var preRecordedControllerInitPreRecordedJobV2BodyNamedEntityRecognitionDefault =
15609
15911
  var preRecordedControllerInitPreRecordedJobV2BodyCustomSpellingDefault = false;
15610
15912
  var preRecordedControllerInitPreRecordedJobV2BodySentimentAnalysisDefault = false;
15611
15913
  var preRecordedControllerInitPreRecordedJobV2BodyAudioToLlmDefault = false;
15612
- var preRecordedControllerInitPreRecordedJobV2BodyAudioToLlmConfigModelDefault = "openai/gpt-3.5-turbo";
15914
+ var preRecordedControllerInitPreRecordedJobV2BodyAudioToLlmConfigModelDefault = "openai/gpt-5.4-nano";
15613
15915
  var preRecordedControllerInitPreRecordedJobV2BodyPiiRedactionDefault = false;
15614
15916
  var preRecordedControllerInitPreRecordedJobV2BodySentencesDefault = false;
15615
15917
  var preRecordedControllerInitPreRecordedJobV2BodyPunctuationEnhancedDefault = false;
@@ -15898,23 +16200,23 @@ var preRecordedControllerInitPreRecordedJobV2Body = import_zod5.z.object({
15898
16200
  "Forces the translation to use informal language forms when available in the target language."
15899
16201
  )
15900
16202
  }).optional().describe("**[Beta]** Translation configuration, if `translation` is enabled"),
15901
- summarization: import_zod5.z.boolean().optional().describe("**[Beta]** Enable summarization for this audio"),
16203
+ summarization: import_zod5.z.boolean().optional().describe("Enable summarization for this audio"),
15902
16204
  summarization_config: import_zod5.z.object({
15903
16205
  type: import_zod5.z.enum(["general", "bullet_points", "concise"]).describe("The type of summarization to apply").default(preRecordedControllerInitPreRecordedJobV2BodySummarizationConfigTypeDefault).describe("The type of summarization to apply")
15904
- }).optional().describe("**[Beta]** Summarization configuration, if `summarization` is enabled"),
16206
+ }).optional().describe("Summarization configuration, if `summarization` is enabled"),
15905
16207
  named_entity_recognition: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable named entity recognition for this audio"),
15906
16208
  custom_spelling: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable custom spelling for this audio"),
15907
16209
  custom_spelling_config: import_zod5.z.object({
15908
16210
  spelling_dictionary: import_zod5.z.record(import_zod5.z.string(), import_zod5.z.array(import_zod5.z.string())).describe("The list of spelling applied on the audio transcription")
15909
16211
  }).optional().describe("**[Alpha]** Custom spelling configuration, if `custom_spelling` is enabled"),
15910
16212
  sentiment_analysis: import_zod5.z.boolean().optional().describe("Enable sentiment analysis for this audio"),
15911
- audio_to_llm: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable audio to llm processing for this audio"),
16213
+ audio_to_llm: import_zod5.z.boolean().optional().describe("Enable audio to LLM processing for this audio"),
15912
16214
  audio_to_llm_config: import_zod5.z.object({
15913
16215
  prompts: import_zod5.z.array(import_zod5.z.array(import_zod5.z.unknown())).min(1).describe("The list of prompts applied on the audio transcription"),
15914
16216
  model: import_zod5.z.string().default(preRecordedControllerInitPreRecordedJobV2BodyAudioToLlmConfigModelDefault).describe(
15915
16217
  "The model to use for the prompt execution. You can find the list of supported models [here](https://openrouter.ai/models)."
15916
16218
  )
15917
- }).optional().describe("**[Alpha]** Audio to llm configuration, if `audio_to_llm` is enabled"),
16219
+ }).optional().describe("Audio to LLM configuration, if `audio_to_llm` is enabled"),
15918
16220
  pii_redaction: import_zod5.z.boolean().optional().describe("Enable PII redaction for this audio"),
15919
16221
  pii_redaction_config: import_zod5.z.object({
15920
16222
  entity_types: import_zod5.z.enum([
@@ -16169,7 +16471,7 @@ var preRecordedControllerGetPreRecordedJobsV2ResponseItemsItemRequestParamsNamed
16169
16471
  var preRecordedControllerGetPreRecordedJobsV2ResponseItemsItemRequestParamsCustomSpellingDefault = false;
16170
16472
  var preRecordedControllerGetPreRecordedJobsV2ResponseItemsItemRequestParamsSentimentAnalysisDefault = false;
16171
16473
  var preRecordedControllerGetPreRecordedJobsV2ResponseItemsItemRequestParamsAudioToLlmDefault = false;
16172
- var preRecordedControllerGetPreRecordedJobsV2ResponseItemsItemRequestParamsAudioToLlmConfigModelDefault = "openai/gpt-3.5-turbo";
16474
+ var preRecordedControllerGetPreRecordedJobsV2ResponseItemsItemRequestParamsAudioToLlmConfigModelDefault = "openai/gpt-5.4-nano";
16173
16475
  var preRecordedControllerGetPreRecordedJobsV2ResponseItemsItemRequestParamsPiiRedactionDefault = false;
16174
16476
  var preRecordedControllerGetPreRecordedJobsV2ResponseItemsItemRequestParamsSentencesDefault = false;
16175
16477
  var preRecordedControllerGetPreRecordedJobsV2ResponseItemsItemRequestParamsPunctuationEnhancedDefault = false;
@@ -16517,12 +16819,12 @@ var preRecordedControllerGetPreRecordedJobsV2Response = import_zod5.z.object({
16517
16819
  "Forces the translation to use informal language forms when available in the target language."
16518
16820
  )
16519
16821
  }).optional().describe("**[Beta]** Translation configuration, if `translation` is enabled"),
16520
- summarization: import_zod5.z.boolean().optional().describe("**[Beta]** Enable summarization for this audio"),
16822
+ summarization: import_zod5.z.boolean().optional().describe("Enable summarization for this audio"),
16521
16823
  summarization_config: import_zod5.z.object({
16522
16824
  type: import_zod5.z.enum(["general", "bullet_points", "concise"]).describe("The type of summarization to apply").default(
16523
16825
  preRecordedControllerGetPreRecordedJobsV2ResponseItemsItemRequestParamsSummarizationConfigTypeDefault
16524
16826
  ).describe("The type of summarization to apply")
16525
- }).optional().describe("**[Beta]** Summarization configuration, if `summarization` is enabled"),
16827
+ }).optional().describe("Summarization configuration, if `summarization` is enabled"),
16526
16828
  named_entity_recognition: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable named entity recognition for this audio"),
16527
16829
  custom_spelling: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable custom spelling for this audio"),
16528
16830
  custom_spelling_config: import_zod5.z.object({
@@ -16531,7 +16833,7 @@ var preRecordedControllerGetPreRecordedJobsV2Response = import_zod5.z.object({
16531
16833
  "**[Alpha]** Custom spelling configuration, if `custom_spelling` is enabled"
16532
16834
  ),
16533
16835
  sentiment_analysis: import_zod5.z.boolean().optional().describe("Enable sentiment analysis for this audio"),
16534
- audio_to_llm: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable audio to llm processing for this audio"),
16836
+ audio_to_llm: import_zod5.z.boolean().optional().describe("Enable audio to LLM processing for this audio"),
16535
16837
  audio_to_llm_config: import_zod5.z.object({
16536
16838
  prompts: import_zod5.z.array(import_zod5.z.array(import_zod5.z.unknown())).min(1).describe("The list of prompts applied on the audio transcription"),
16537
16839
  model: import_zod5.z.string().default(
@@ -16539,7 +16841,7 @@ var preRecordedControllerGetPreRecordedJobsV2Response = import_zod5.z.object({
16539
16841
  ).describe(
16540
16842
  "The model to use for the prompt execution. You can find the list of supported models [here](https://openrouter.ai/models)."
16541
16843
  )
16542
- }).optional().describe("**[Alpha]** Audio to llm configuration, if `audio_to_llm` is enabled"),
16844
+ }).optional().describe("Audio to LLM configuration, if `audio_to_llm` is enabled"),
16543
16845
  pii_redaction: import_zod5.z.boolean().optional().describe("Enable PII redaction for this audio"),
16544
16846
  pii_redaction_config: import_zod5.z.object({
16545
16847
  entity_types: import_zod5.z.enum([
@@ -17676,7 +17978,7 @@ var preRecordedControllerGetPreRecordedJobV2ResponseRequestParamsNamedEntityReco
17676
17978
  var preRecordedControllerGetPreRecordedJobV2ResponseRequestParamsCustomSpellingDefault = false;
17677
17979
  var preRecordedControllerGetPreRecordedJobV2ResponseRequestParamsSentimentAnalysisDefault = false;
17678
17980
  var preRecordedControllerGetPreRecordedJobV2ResponseRequestParamsAudioToLlmDefault = false;
17679
- var preRecordedControllerGetPreRecordedJobV2ResponseRequestParamsAudioToLlmConfigModelDefault = "openai/gpt-3.5-turbo";
17981
+ var preRecordedControllerGetPreRecordedJobV2ResponseRequestParamsAudioToLlmConfigModelDefault = "openai/gpt-5.4-nano";
17680
17982
  var preRecordedControllerGetPreRecordedJobV2ResponseRequestParamsPiiRedactionDefault = false;
17681
17983
  var preRecordedControllerGetPreRecordedJobV2ResponseRequestParamsSentencesDefault = false;
17682
17984
  var preRecordedControllerGetPreRecordedJobV2ResponseRequestParamsPunctuationEnhancedDefault = false;
@@ -18017,19 +18319,19 @@ var preRecordedControllerGetPreRecordedJobV2Response = import_zod5.z.object({
18017
18319
  "Forces the translation to use informal language forms when available in the target language."
18018
18320
  )
18019
18321
  }).optional().describe("**[Beta]** Translation configuration, if `translation` is enabled"),
18020
- summarization: import_zod5.z.boolean().optional().describe("**[Beta]** Enable summarization for this audio"),
18322
+ summarization: import_zod5.z.boolean().optional().describe("Enable summarization for this audio"),
18021
18323
  summarization_config: import_zod5.z.object({
18022
18324
  type: import_zod5.z.enum(["general", "bullet_points", "concise"]).describe("The type of summarization to apply").default(
18023
18325
  preRecordedControllerGetPreRecordedJobV2ResponseRequestParamsSummarizationConfigTypeDefault
18024
18326
  ).describe("The type of summarization to apply")
18025
- }).optional().describe("**[Beta]** Summarization configuration, if `summarization` is enabled"),
18327
+ }).optional().describe("Summarization configuration, if `summarization` is enabled"),
18026
18328
  named_entity_recognition: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable named entity recognition for this audio"),
18027
18329
  custom_spelling: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable custom spelling for this audio"),
18028
18330
  custom_spelling_config: import_zod5.z.object({
18029
18331
  spelling_dictionary: import_zod5.z.record(import_zod5.z.string(), import_zod5.z.array(import_zod5.z.string())).describe("The list of spelling applied on the audio transcription")
18030
18332
  }).optional().describe("**[Alpha]** Custom spelling configuration, if `custom_spelling` is enabled"),
18031
18333
  sentiment_analysis: import_zod5.z.boolean().optional().describe("Enable sentiment analysis for this audio"),
18032
- audio_to_llm: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable audio to llm processing for this audio"),
18334
+ audio_to_llm: import_zod5.z.boolean().optional().describe("Enable audio to LLM processing for this audio"),
18033
18335
  audio_to_llm_config: import_zod5.z.object({
18034
18336
  prompts: import_zod5.z.array(import_zod5.z.array(import_zod5.z.unknown())).min(1).describe("The list of prompts applied on the audio transcription"),
18035
18337
  model: import_zod5.z.string().default(
@@ -18037,7 +18339,7 @@ var preRecordedControllerGetPreRecordedJobV2Response = import_zod5.z.object({
18037
18339
  ).describe(
18038
18340
  "The model to use for the prompt execution. You can find the list of supported models [here](https://openrouter.ai/models)."
18039
18341
  )
18040
- }).optional().describe("**[Alpha]** Audio to llm configuration, if `audio_to_llm` is enabled"),
18342
+ }).optional().describe("Audio to LLM configuration, if `audio_to_llm` is enabled"),
18041
18343
  pii_redaction: import_zod5.z.boolean().optional().describe("Enable PII redaction for this audio"),
18042
18344
  pii_redaction_config: import_zod5.z.object({
18043
18345
  entity_types: import_zod5.z.enum([
@@ -19150,7 +19452,7 @@ var transcriptionControllerInitPreRecordedJobV2BodyNamedEntityRecognitionDefault
19150
19452
  var transcriptionControllerInitPreRecordedJobV2BodyCustomSpellingDefault = false;
19151
19453
  var transcriptionControllerInitPreRecordedJobV2BodySentimentAnalysisDefault = false;
19152
19454
  var transcriptionControllerInitPreRecordedJobV2BodyAudioToLlmDefault = false;
19153
- var transcriptionControllerInitPreRecordedJobV2BodyAudioToLlmConfigModelDefault = "openai/gpt-3.5-turbo";
19455
+ var transcriptionControllerInitPreRecordedJobV2BodyAudioToLlmConfigModelDefault = "openai/gpt-5.4-nano";
19154
19456
  var transcriptionControllerInitPreRecordedJobV2BodyPiiRedactionDefault = false;
19155
19457
  var transcriptionControllerInitPreRecordedJobV2BodySentencesDefault = false;
19156
19458
  var transcriptionControllerInitPreRecordedJobV2BodyPunctuationEnhancedDefault = false;
@@ -19443,23 +19745,23 @@ var transcriptionControllerInitPreRecordedJobV2Body = import_zod5.z.object({
19443
19745
  "Forces the translation to use informal language forms when available in the target language."
19444
19746
  )
19445
19747
  }).optional().describe("**[Beta]** Translation configuration, if `translation` is enabled"),
19446
- summarization: import_zod5.z.boolean().optional().describe("**[Beta]** Enable summarization for this audio"),
19748
+ summarization: import_zod5.z.boolean().optional().describe("Enable summarization for this audio"),
19447
19749
  summarization_config: import_zod5.z.object({
19448
19750
  type: import_zod5.z.enum(["general", "bullet_points", "concise"]).describe("The type of summarization to apply").default(transcriptionControllerInitPreRecordedJobV2BodySummarizationConfigTypeDefault).describe("The type of summarization to apply")
19449
- }).optional().describe("**[Beta]** Summarization configuration, if `summarization` is enabled"),
19751
+ }).optional().describe("Summarization configuration, if `summarization` is enabled"),
19450
19752
  named_entity_recognition: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable named entity recognition for this audio"),
19451
19753
  custom_spelling: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable custom spelling for this audio"),
19452
19754
  custom_spelling_config: import_zod5.z.object({
19453
19755
  spelling_dictionary: import_zod5.z.record(import_zod5.z.string(), import_zod5.z.array(import_zod5.z.string())).describe("The list of spelling applied on the audio transcription")
19454
19756
  }).optional().describe("**[Alpha]** Custom spelling configuration, if `custom_spelling` is enabled"),
19455
19757
  sentiment_analysis: import_zod5.z.boolean().optional().describe("Enable sentiment analysis for this audio"),
19456
- audio_to_llm: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable audio to llm processing for this audio"),
19758
+ audio_to_llm: import_zod5.z.boolean().optional().describe("Enable audio to LLM processing for this audio"),
19457
19759
  audio_to_llm_config: import_zod5.z.object({
19458
19760
  prompts: import_zod5.z.array(import_zod5.z.array(import_zod5.z.unknown())).min(1).describe("The list of prompts applied on the audio transcription"),
19459
19761
  model: import_zod5.z.string().default(transcriptionControllerInitPreRecordedJobV2BodyAudioToLlmConfigModelDefault).describe(
19460
19762
  "The model to use for the prompt execution. You can find the list of supported models [here](https://openrouter.ai/models)."
19461
19763
  )
19462
- }).optional().describe("**[Alpha]** Audio to llm configuration, if `audio_to_llm` is enabled"),
19764
+ }).optional().describe("Audio to LLM configuration, if `audio_to_llm` is enabled"),
19463
19765
  pii_redaction: import_zod5.z.boolean().optional().describe("Enable PII redaction for this audio"),
19464
19766
  pii_redaction_config: import_zod5.z.object({
19465
19767
  entity_types: import_zod5.z.enum([
@@ -19717,7 +20019,7 @@ var transcriptionControllerListV2ResponseItemsItemRequestParamsNamedEntityRecogn
19717
20019
  var transcriptionControllerListV2ResponseItemsItemRequestParamsCustomSpellingDefault = false;
19718
20020
  var transcriptionControllerListV2ResponseItemsItemRequestParamsSentimentAnalysisDefault = false;
19719
20021
  var transcriptionControllerListV2ResponseItemsItemRequestParamsAudioToLlmDefault = false;
19720
- var transcriptionControllerListV2ResponseItemsItemRequestParamsAudioToLlmConfigModelDefault = "openai/gpt-3.5-turbo";
20022
+ var transcriptionControllerListV2ResponseItemsItemRequestParamsAudioToLlmConfigModelDefault = "openai/gpt-5.4-nano";
19721
20023
  var transcriptionControllerListV2ResponseItemsItemRequestParamsPiiRedactionDefault = false;
19722
20024
  var transcriptionControllerListV2ResponseItemsItemRequestParamsSentencesDefault = false;
19723
20025
  var transcriptionControllerListV2ResponseItemsItemRequestParamsPunctuationEnhancedDefault = false;
@@ -20128,12 +20430,12 @@ var transcriptionControllerListV2Response = import_zod5.z.object({
20128
20430
  "Forces the translation to use informal language forms when available in the target language."
20129
20431
  )
20130
20432
  }).optional().describe("**[Beta]** Translation configuration, if `translation` is enabled"),
20131
- summarization: import_zod5.z.boolean().optional().describe("**[Beta]** Enable summarization for this audio"),
20433
+ summarization: import_zod5.z.boolean().optional().describe("Enable summarization for this audio"),
20132
20434
  summarization_config: import_zod5.z.object({
20133
20435
  type: import_zod5.z.enum(["general", "bullet_points", "concise"]).describe("The type of summarization to apply").default(
20134
20436
  transcriptionControllerListV2ResponseItemsItemRequestParamsSummarizationConfigTypeDefault
20135
20437
  ).describe("The type of summarization to apply")
20136
- }).optional().describe("**[Beta]** Summarization configuration, if `summarization` is enabled"),
20438
+ }).optional().describe("Summarization configuration, if `summarization` is enabled"),
20137
20439
  named_entity_recognition: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable named entity recognition for this audio"),
20138
20440
  custom_spelling: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable custom spelling for this audio"),
20139
20441
  custom_spelling_config: import_zod5.z.object({
@@ -20142,7 +20444,7 @@ var transcriptionControllerListV2Response = import_zod5.z.object({
20142
20444
  "**[Alpha]** Custom spelling configuration, if `custom_spelling` is enabled"
20143
20445
  ),
20144
20446
  sentiment_analysis: import_zod5.z.boolean().optional().describe("Enable sentiment analysis for this audio"),
20145
- audio_to_llm: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable audio to llm processing for this audio"),
20447
+ audio_to_llm: import_zod5.z.boolean().optional().describe("Enable audio to LLM processing for this audio"),
20146
20448
  audio_to_llm_config: import_zod5.z.object({
20147
20449
  prompts: import_zod5.z.array(import_zod5.z.array(import_zod5.z.unknown())).min(1).describe("The list of prompts applied on the audio transcription"),
20148
20450
  model: import_zod5.z.string().default(
@@ -20150,7 +20452,7 @@ var transcriptionControllerListV2Response = import_zod5.z.object({
20150
20452
  ).describe(
20151
20453
  "The model to use for the prompt execution. You can find the list of supported models [here](https://openrouter.ai/models)."
20152
20454
  )
20153
- }).optional().describe("**[Alpha]** Audio to llm configuration, if `audio_to_llm` is enabled"),
20455
+ }).optional().describe("Audio to LLM configuration, if `audio_to_llm` is enabled"),
20154
20456
  pii_redaction: import_zod5.z.boolean().optional().describe("Enable PII redaction for this audio"),
20155
20457
  pii_redaction_config: import_zod5.z.object({
20156
20458
  entity_types: import_zod5.z.enum([
@@ -22468,7 +22770,7 @@ var transcriptionControllerGetTranscriptV2ResponseRequestParamsNamedEntityRecogn
22468
22770
  var transcriptionControllerGetTranscriptV2ResponseRequestParamsCustomSpellingDefault = false;
22469
22771
  var transcriptionControllerGetTranscriptV2ResponseRequestParamsSentimentAnalysisDefault = false;
22470
22772
  var transcriptionControllerGetTranscriptV2ResponseRequestParamsAudioToLlmDefault = false;
22471
- var transcriptionControllerGetTranscriptV2ResponseRequestParamsAudioToLlmConfigModelDefault = "openai/gpt-3.5-turbo";
22773
+ var transcriptionControllerGetTranscriptV2ResponseRequestParamsAudioToLlmConfigModelDefault = "openai/gpt-5.4-nano";
22472
22774
  var transcriptionControllerGetTranscriptV2ResponseRequestParamsPiiRedactionDefault = false;
22473
22775
  var transcriptionControllerGetTranscriptV2ResponseRequestParamsSentencesDefault = false;
22474
22776
  var transcriptionControllerGetTranscriptV2ResponseRequestParamsPunctuationEnhancedDefault = false;
@@ -22873,19 +23175,19 @@ var transcriptionControllerGetTranscriptV2Response = import_zod5.z.discriminated
22873
23175
  "Forces the translation to use informal language forms when available in the target language."
22874
23176
  )
22875
23177
  }).optional().describe("**[Beta]** Translation configuration, if `translation` is enabled"),
22876
- summarization: import_zod5.z.boolean().optional().describe("**[Beta]** Enable summarization for this audio"),
23178
+ summarization: import_zod5.z.boolean().optional().describe("Enable summarization for this audio"),
22877
23179
  summarization_config: import_zod5.z.object({
22878
23180
  type: import_zod5.z.enum(["general", "bullet_points", "concise"]).describe("The type of summarization to apply").default(
22879
23181
  transcriptionControllerGetTranscriptV2ResponseRequestParamsSummarizationConfigTypeDefault
22880
23182
  ).describe("The type of summarization to apply")
22881
- }).optional().describe("**[Beta]** Summarization configuration, if `summarization` is enabled"),
23183
+ }).optional().describe("Summarization configuration, if `summarization` is enabled"),
22882
23184
  named_entity_recognition: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable named entity recognition for this audio"),
22883
23185
  custom_spelling: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable custom spelling for this audio"),
22884
23186
  custom_spelling_config: import_zod5.z.object({
22885
23187
  spelling_dictionary: import_zod5.z.record(import_zod5.z.string(), import_zod5.z.array(import_zod5.z.string())).describe("The list of spelling applied on the audio transcription")
22886
23188
  }).optional().describe("**[Alpha]** Custom spelling configuration, if `custom_spelling` is enabled"),
22887
23189
  sentiment_analysis: import_zod5.z.boolean().optional().describe("Enable sentiment analysis for this audio"),
22888
- audio_to_llm: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable audio to llm processing for this audio"),
23190
+ audio_to_llm: import_zod5.z.boolean().optional().describe("Enable audio to LLM processing for this audio"),
22889
23191
  audio_to_llm_config: import_zod5.z.object({
22890
23192
  prompts: import_zod5.z.array(import_zod5.z.array(import_zod5.z.unknown())).min(1).describe("The list of prompts applied on the audio transcription"),
22891
23193
  model: import_zod5.z.string().default(
@@ -22893,7 +23195,7 @@ var transcriptionControllerGetTranscriptV2Response = import_zod5.z.discriminated
22893
23195
  ).describe(
22894
23196
  "The model to use for the prompt execution. You can find the list of supported models [here](https://openrouter.ai/models)."
22895
23197
  )
22896
- }).optional().describe("**[Alpha]** Audio to llm configuration, if `audio_to_llm` is enabled"),
23198
+ }).optional().describe("Audio to LLM configuration, if `audio_to_llm` is enabled"),
22897
23199
  pii_redaction: import_zod5.z.boolean().optional().describe("Enable PII redaction for this audio"),
22898
23200
  pii_redaction_config: import_zod5.z.object({
22899
23201
  entity_types: import_zod5.z.enum([
@@ -25605,7 +25907,7 @@ var historyControllerGetListV1ResponseItemsItemRequestParamsNamedEntityRecogniti
25605
25907
  var historyControllerGetListV1ResponseItemsItemRequestParamsCustomSpellingDefault = false;
25606
25908
  var historyControllerGetListV1ResponseItemsItemRequestParamsSentimentAnalysisDefault = false;
25607
25909
  var historyControllerGetListV1ResponseItemsItemRequestParamsAudioToLlmDefault = false;
25608
- var historyControllerGetListV1ResponseItemsItemRequestParamsAudioToLlmConfigModelDefault = "openai/gpt-3.5-turbo";
25910
+ var historyControllerGetListV1ResponseItemsItemRequestParamsAudioToLlmConfigModelDefault = "openai/gpt-5.4-nano";
25609
25911
  var historyControllerGetListV1ResponseItemsItemRequestParamsPiiRedactionDefault = false;
25610
25912
  var historyControllerGetListV1ResponseItemsItemRequestParamsSentencesDefault = false;
25611
25913
  var historyControllerGetListV1ResponseItemsItemRequestParamsPunctuationEnhancedDefault = false;
@@ -26016,12 +26318,12 @@ var historyControllerGetListV1Response = import_zod5.z.object({
26016
26318
  "Forces the translation to use informal language forms when available in the target language."
26017
26319
  )
26018
26320
  }).optional().describe("**[Beta]** Translation configuration, if `translation` is enabled"),
26019
- summarization: import_zod5.z.boolean().optional().describe("**[Beta]** Enable summarization for this audio"),
26321
+ summarization: import_zod5.z.boolean().optional().describe("Enable summarization for this audio"),
26020
26322
  summarization_config: import_zod5.z.object({
26021
26323
  type: import_zod5.z.enum(["general", "bullet_points", "concise"]).describe("The type of summarization to apply").default(
26022
26324
  historyControllerGetListV1ResponseItemsItemRequestParamsSummarizationConfigTypeDefault
26023
26325
  ).describe("The type of summarization to apply")
26024
- }).optional().describe("**[Beta]** Summarization configuration, if `summarization` is enabled"),
26326
+ }).optional().describe("Summarization configuration, if `summarization` is enabled"),
26025
26327
  named_entity_recognition: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable named entity recognition for this audio"),
26026
26328
  custom_spelling: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable custom spelling for this audio"),
26027
26329
  custom_spelling_config: import_zod5.z.object({
@@ -26030,7 +26332,7 @@ var historyControllerGetListV1Response = import_zod5.z.object({
26030
26332
  "**[Alpha]** Custom spelling configuration, if `custom_spelling` is enabled"
26031
26333
  ),
26032
26334
  sentiment_analysis: import_zod5.z.boolean().optional().describe("Enable sentiment analysis for this audio"),
26033
- audio_to_llm: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable audio to llm processing for this audio"),
26335
+ audio_to_llm: import_zod5.z.boolean().optional().describe("Enable audio to LLM processing for this audio"),
26034
26336
  audio_to_llm_config: import_zod5.z.object({
26035
26337
  prompts: import_zod5.z.array(import_zod5.z.array(import_zod5.z.unknown())).min(1).describe("The list of prompts applied on the audio transcription"),
26036
26338
  model: import_zod5.z.string().default(
@@ -26038,7 +26340,7 @@ var historyControllerGetListV1Response = import_zod5.z.object({
26038
26340
  ).describe(
26039
26341
  "The model to use for the prompt execution. You can find the list of supported models [here](https://openrouter.ai/models)."
26040
26342
  )
26041
- }).optional().describe("**[Alpha]** Audio to llm configuration, if `audio_to_llm` is enabled"),
26343
+ }).optional().describe("Audio to LLM configuration, if `audio_to_llm` is enabled"),
26042
26344
  pii_redaction: import_zod5.z.boolean().optional().describe("Enable PII redaction for this audio"),
26043
26345
  pii_redaction_config: import_zod5.z.object({
26044
26346
  entity_types: import_zod5.z.enum([
@@ -31283,6 +31585,7 @@ __export(openAIAudioRealtimeAPI_zod_exports, {
31283
31585
  createRealtimeClientSecretBodySessionPromptVariablesTypeDefault: () => createRealtimeClientSecretBodySessionPromptVariablesTypeDefault,
31284
31586
  createRealtimeClientSecretBodySessionPromptVariablesTypeDefaultOne: () => createRealtimeClientSecretBodySessionPromptVariablesTypeDefaultOne,
31285
31587
  createRealtimeClientSecretBodySessionPromptVariablesTypeDefaultTwo: () => createRealtimeClientSecretBodySessionPromptVariablesTypeDefaultTwo,
31588
+ createRealtimeClientSecretBodySessionReasoningEffortDefault: () => createRealtimeClientSecretBodySessionReasoningEffortDefault,
31286
31589
  createRealtimeClientSecretBodySessionToolChoiceDefault: () => createRealtimeClientSecretBodySessionToolChoiceDefault,
31287
31590
  createRealtimeClientSecretBodySessionToolsItemRequireApprovalDefaultOne: () => createRealtimeClientSecretBodySessionToolsItemRequireApprovalDefaultOne,
31288
31591
  createRealtimeClientSecretBodySessionTracingDefault: () => createRealtimeClientSecretBodySessionTracingDefault,
@@ -31307,6 +31610,7 @@ __export(openAIAudioRealtimeAPI_zod_exports, {
31307
31610
  createRealtimeClientSecretResponseSessionPromptVariablesTypeDefault: () => createRealtimeClientSecretResponseSessionPromptVariablesTypeDefault,
31308
31611
  createRealtimeClientSecretResponseSessionPromptVariablesTypeDefaultOne: () => createRealtimeClientSecretResponseSessionPromptVariablesTypeDefaultOne,
31309
31612
  createRealtimeClientSecretResponseSessionPromptVariablesTypeDefaultTwo: () => createRealtimeClientSecretResponseSessionPromptVariablesTypeDefaultTwo,
31613
+ createRealtimeClientSecretResponseSessionReasoningEffortDefault: () => createRealtimeClientSecretResponseSessionReasoningEffortDefault,
31310
31614
  createRealtimeClientSecretResponseSessionToolChoiceDefault: () => createRealtimeClientSecretResponseSessionToolChoiceDefault,
31311
31615
  createRealtimeClientSecretResponseSessionToolsItemRequireApprovalDefaultOne: () => createRealtimeClientSecretResponseSessionToolsItemRequireApprovalDefaultOne,
31312
31616
  createRealtimeClientSecretResponseSessionTracingDefaultOne: () => createRealtimeClientSecretResponseSessionTracingDefaultOne,
@@ -31663,6 +31967,7 @@ var createRealtimeClientSecretBodySessionTracingDefaultOne = "auto";
31663
31967
  var createRealtimeClientSecretBodySessionTracingDefault = null;
31664
31968
  var createRealtimeClientSecretBodySessionToolsItemRequireApprovalDefaultOne = "always";
31665
31969
  var createRealtimeClientSecretBodySessionToolChoiceDefault = "auto";
31970
+ var createRealtimeClientSecretBodySessionReasoningEffortDefault = "low";
31666
31971
  var createRealtimeClientSecretBodySessionTruncationRetentionRatioMin = 0;
31667
31972
  var createRealtimeClientSecretBodySessionTruncationRetentionRatioMax = 1;
31668
31973
  var createRealtimeClientSecretBodySessionTruncationTokenLimitsPostInstructionsMin = 0;
@@ -31698,6 +32003,7 @@ var createRealtimeClientSecretBody = import_zod6.z.object({
31698
32003
  import_zod6.z.enum([
31699
32004
  "gpt-realtime",
31700
32005
  "gpt-realtime-1.5",
32006
+ "gpt-realtime-2",
31701
32007
  "gpt-realtime-2025-08-28",
31702
32008
  "gpt-4o-realtime-preview",
31703
32009
  "gpt-4o-realtime-preview-2024-10-01",
@@ -31738,16 +32044,20 @@ var createRealtimeClientSecretBody = import_zod6.z.object({
31738
32044
  "gpt-4o-mini-transcribe",
31739
32045
  "gpt-4o-mini-transcribe-2025-12-15",
31740
32046
  "gpt-4o-transcribe",
31741
- "gpt-4o-transcribe-diarize"
32047
+ "gpt-4o-transcribe-diarize",
32048
+ "gpt-realtime-whisper"
31742
32049
  ])
31743
32050
  ).optional().describe(
31744
- "The model to use for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`, and `gpt-4o-transcribe-diarize`. Use `gpt-4o-transcribe-diarize` when you need diarization with speaker labels.\n"
32051
+ "The model to use for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`, `gpt-4o-transcribe-diarize`, and `gpt-realtime-whisper`. Use `gpt-4o-transcribe-diarize` when you need diarization with speaker labels.\n"
31745
32052
  ),
31746
32053
  language: import_zod6.z.string().optional().describe(
31747
32054
  "The language of the input audio. Supplying the input language in\n[ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`) format\nwill improve accuracy and latency.\n"
31748
32055
  ),
31749
32056
  prompt: import_zod6.z.string().optional().describe(
31750
- 'An optional text to guide the model\'s style or continue a previous audio\nsegment.\nFor `whisper-1`, the [prompt is a list of keywords](/docs/guides/speech-to-text#prompting).\nFor `gpt-4o-transcribe` models (excluding `gpt-4o-transcribe-diarize`), the prompt is a free text string, for example "expect words related to technology".\n'
32057
+ 'An optional text to guide the model\'s style or continue a previous audio\nsegment.\nFor `whisper-1`, the [prompt is a list of keywords](/docs/guides/speech-to-text#prompting).\nFor `gpt-4o-transcribe` models (excluding `gpt-4o-transcribe-diarize`), the prompt is a free text string, for example "expect words related to technology".\nPrompt is not supported with `gpt-realtime-whisper` in GA Realtime sessions.\n'
32058
+ ),
32059
+ delay: import_zod6.z.enum(["minimal", "low", "medium", "high", "xhigh"]).optional().describe(
32060
+ "Controls how long the model waits before emitting transcription text.\nHigher values can improve transcription accuracy at the cost of latency.\nOnly supported with `gpt-realtime-whisper` in GA Realtime sessions.\n"
31751
32061
  )
31752
32062
  }).optional(),
31753
32063
  noise_reduction: import_zod6.z.object({
@@ -31814,7 +32124,7 @@ var createRealtimeClientSecretBody = import_zod6.z.object({
31814
32124
  "Server-side semantic turn detection which uses a model to determine when the user has finished speaking."
31815
32125
  )
31816
32126
  ]).describe(
31817
- 'Configuration for turn detection, ether Server VAD or Semantic VAD. This can be set to `null` to turn off, in which case the client must manually trigger model response.\n\nServer VAD means that the model will detect the start and end of speech based on audio volume and respond at the end of user speech.\n\nSemantic VAD is more advanced and uses a turn detection model (in conjunction with VAD) to semantically estimate whether the user has finished speaking, then dynamically sets a timeout based on this probability. For example, if user audio trails off with "uhhm", the model will score a low probability of turn end and wait longer for the user to continue speaking. This can be useful for more natural conversations, but may have a higher latency.\n'
32127
+ 'Configuration for turn detection, ether Server VAD or Semantic VAD. This can be set to `null` to turn off, in which case the client must manually trigger model response.\n\nServer VAD means that the model will detect the start and end of speech based on audio volume and respond at the end of user speech.\n\nSemantic VAD is more advanced and uses a turn detection model (in conjunction with VAD) to semantically estimate whether the user has finished speaking, then dynamically sets a timeout based on this probability. For example, if user audio trails off with "uhhm", the model will score a low probability of turn end and wait longer for the user to continue speaking. This can be useful for more natural conversations, but may have a higher latency.\n\nFor `gpt-realtime-whisper` transcription sessions, turn detection must be\nset to `null`; VAD is not supported.\n'
31818
32128
  ).or(import_zod6.z.null()).optional()
31819
32129
  }).optional(),
31820
32130
  output: import_zod6.z.object({
@@ -31887,7 +32197,7 @@ var createRealtimeClientSecretBody = import_zod6.z.object({
31887
32197
  server_label: import_zod6.z.string().describe(
31888
32198
  "A label for this MCP server, used to identify it in tool calls.\n"
31889
32199
  ),
31890
- server_url: import_zod6.z.string().optional().describe(
32200
+ server_url: import_zod6.z.string().url().optional().describe(
31891
32201
  "The URL for the MCP server. One of `server_url` or `connector_id` must be\nprovided.\n"
31892
32202
  ),
31893
32203
  connector_id: import_zod6.z.enum([
@@ -31965,6 +32275,16 @@ var createRealtimeClientSecretBody = import_zod6.z.object({
31965
32275
  ).default(createRealtimeClientSecretBodySessionToolChoiceDefault).describe(
31966
32276
  "How the model chooses tools. Provide one of the string modes or force a specific\nfunction/MCP tool.\n"
31967
32277
  ),
32278
+ parallel_tool_calls: import_zod6.z.boolean().optional().describe(
32279
+ "Whether the model may call multiple tools in parallel. Only supported by\nreasoning Realtime models such as `gpt-realtime-2`.\n"
32280
+ ),
32281
+ reasoning: import_zod6.z.object({
32282
+ effort: import_zod6.z.enum(["minimal", "low", "medium", "high", "xhigh"]).default(createRealtimeClientSecretBodySessionReasoningEffortDefault).describe(
32283
+ "Constrains effort on reasoning for reasoning-capable Realtime models such as\n`gpt-realtime-2`.\n"
32284
+ )
32285
+ }).optional().describe(
32286
+ "Configuration for reasoning-capable Realtime models such as `gpt-realtime-2`.\n"
32287
+ ),
31968
32288
  max_output_tokens: import_zod6.z.number().or(import_zod6.z.enum(["inf"])).optional().describe(
31969
32289
  "Maximum number of output tokens for a single assistant response,\ninclusive of tool calls. Provide an integer between 1 and 4096 to\nlimit output tokens, or `inf` for the maximum available tokens for a\ngiven model. Defaults to `inf`.\n"
31970
32290
  ),
@@ -32004,7 +32324,7 @@ var createRealtimeClientSecretBody = import_zod6.z.object({
32004
32324
  ).or(
32005
32325
  import_zod6.z.object({
32006
32326
  type: import_zod6.z.enum(["input_image"]).describe("The type of the input item. Always `input_image`."),
32007
- image_url: import_zod6.z.string().describe(
32327
+ image_url: import_zod6.z.string().url().describe(
32008
32328
  "The URL of the image to be sent to the model. A fully qualified URL or base64 encoded image in a data URL."
32009
32329
  ).or(import_zod6.z.null()).optional(),
32010
32330
  file_id: import_zod6.z.string().describe("The ID of the file to be sent to the model.").or(import_zod6.z.null()).optional(),
@@ -32018,7 +32338,7 @@ var createRealtimeClientSecretBody = import_zod6.z.object({
32018
32338
  file_id: import_zod6.z.string().describe("The ID of the file to be sent to the model.").or(import_zod6.z.null()).optional(),
32019
32339
  filename: import_zod6.z.string().optional().describe("The name of the file to be sent to the model."),
32020
32340
  file_data: import_zod6.z.string().optional().describe("The content of the file to be sent to the model.\n"),
32021
- file_url: import_zod6.z.string().optional().describe("The URL of the file to be sent to the model."),
32341
+ file_url: import_zod6.z.string().url().optional().describe("The URL of the file to be sent to the model."),
32022
32342
  detail: import_zod6.z.enum(["low", "high"]).optional()
32023
32343
  }).describe("A file input to the model.")
32024
32344
  )
@@ -32054,16 +32374,20 @@ var createRealtimeClientSecretBody = import_zod6.z.object({
32054
32374
  "gpt-4o-mini-transcribe",
32055
32375
  "gpt-4o-mini-transcribe-2025-12-15",
32056
32376
  "gpt-4o-transcribe",
32057
- "gpt-4o-transcribe-diarize"
32377
+ "gpt-4o-transcribe-diarize",
32378
+ "gpt-realtime-whisper"
32058
32379
  ])
32059
32380
  ).optional().describe(
32060
- "The model to use for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`, and `gpt-4o-transcribe-diarize`. Use `gpt-4o-transcribe-diarize` when you need diarization with speaker labels.\n"
32381
+ "The model to use for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`, `gpt-4o-transcribe-diarize`, and `gpt-realtime-whisper`. Use `gpt-4o-transcribe-diarize` when you need diarization with speaker labels.\n"
32061
32382
  ),
32062
32383
  language: import_zod6.z.string().optional().describe(
32063
32384
  "The language of the input audio. Supplying the input language in\n[ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`) format\nwill improve accuracy and latency.\n"
32064
32385
  ),
32065
32386
  prompt: import_zod6.z.string().optional().describe(
32066
- 'An optional text to guide the model\'s style or continue a previous audio\nsegment.\nFor `whisper-1`, the [prompt is a list of keywords](/docs/guides/speech-to-text#prompting).\nFor `gpt-4o-transcribe` models (excluding `gpt-4o-transcribe-diarize`), the prompt is a free text string, for example "expect words related to technology".\n'
32387
+ 'An optional text to guide the model\'s style or continue a previous audio\nsegment.\nFor `whisper-1`, the [prompt is a list of keywords](/docs/guides/speech-to-text#prompting).\nFor `gpt-4o-transcribe` models (excluding `gpt-4o-transcribe-diarize`), the prompt is a free text string, for example "expect words related to technology".\nPrompt is not supported with `gpt-realtime-whisper` in GA Realtime sessions.\n'
32388
+ ),
32389
+ delay: import_zod6.z.enum(["minimal", "low", "medium", "high", "xhigh"]).optional().describe(
32390
+ "Controls how long the model waits before emitting transcription text.\nHigher values can improve transcription accuracy at the cost of latency.\nOnly supported with `gpt-realtime-whisper` in GA Realtime sessions.\n"
32067
32391
  )
32068
32392
  }).optional(),
32069
32393
  noise_reduction: import_zod6.z.object({
@@ -32130,7 +32454,7 @@ var createRealtimeClientSecretBody = import_zod6.z.object({
32130
32454
  "Server-side semantic turn detection which uses a model to determine when the user has finished speaking."
32131
32455
  )
32132
32456
  ]).describe(
32133
- 'Configuration for turn detection, ether Server VAD or Semantic VAD. This can be set to `null` to turn off, in which case the client must manually trigger model response.\n\nServer VAD means that the model will detect the start and end of speech based on audio volume and respond at the end of user speech.\n\nSemantic VAD is more advanced and uses a turn detection model (in conjunction with VAD) to semantically estimate whether the user has finished speaking, then dynamically sets a timeout based on this probability. For example, if user audio trails off with "uhhm", the model will score a low probability of turn end and wait longer for the user to continue speaking. This can be useful for more natural conversations, but may have a higher latency.\n'
32457
+ 'Configuration for turn detection, ether Server VAD or Semantic VAD. This can be set to `null` to turn off, in which case the client must manually trigger model response.\n\nServer VAD means that the model will detect the start and end of speech based on audio volume and respond at the end of user speech.\n\nSemantic VAD is more advanced and uses a turn detection model (in conjunction with VAD) to semantically estimate whether the user has finished speaking, then dynamically sets a timeout based on this probability. For example, if user audio trails off with "uhhm", the model will score a low probability of turn end and wait longer for the user to continue speaking. This can be useful for more natural conversations, but may have a higher latency.\n\nFor `gpt-realtime-whisper` transcription sessions, turn detection must be\nset to `null`; VAD is not supported.\n'
32134
32458
  ).or(import_zod6.z.null()).optional()
32135
32459
  }).optional()
32136
32460
  }).optional().describe("Configuration for input and output audio.\n"),
@@ -32161,6 +32485,7 @@ var createRealtimeClientSecretResponseSessionTracingDefaultTwo = "auto";
32161
32485
  var createRealtimeClientSecretResponseSessionTracingDefaultOne = null;
32162
32486
  var createRealtimeClientSecretResponseSessionToolsItemRequireApprovalDefaultOne = "always";
32163
32487
  var createRealtimeClientSecretResponseSessionToolChoiceDefault = "auto";
32488
+ var createRealtimeClientSecretResponseSessionReasoningEffortDefault = "low";
32164
32489
  var createRealtimeClientSecretResponseSessionTruncationRetentionRatioMin = 0;
32165
32490
  var createRealtimeClientSecretResponseSessionTruncationRetentionRatioMax = 1;
32166
32491
  var createRealtimeClientSecretResponseSessionTruncationTokenLimitsPostInstructionsMin = 0;
@@ -32170,17 +32495,14 @@ var createRealtimeClientSecretResponseSessionPromptVariablesTypeDefaultTwo = "in
32170
32495
  var createRealtimeClientSecretResponse = import_zod6.z.object({
32171
32496
  value: import_zod6.z.string().describe("The generated client secret value."),
32172
32497
  expires_at: import_zod6.z.number().describe("Expiration timestamp for the client secret, in seconds since epoch."),
32173
- session: import_zod6.z.discriminatedUnion("type", [
32498
+ session: import_zod6.z.union([
32174
32499
  import_zod6.z.object({
32175
- client_secret: import_zod6.z.object({
32176
- value: import_zod6.z.string().describe(
32177
- "Ephemeral key usable in client environments to authenticate connections to the Realtime API. Use this in client-side environments rather than a standard API token, which should only be used server-side.\n"
32178
- ),
32179
- expires_at: import_zod6.z.number().describe(
32180
- "Timestamp for when the token expires. Currently, all tokens expire\nafter one minute.\n"
32181
- )
32182
- }).describe("Ephemeral key returned by the API."),
32183
32500
  type: import_zod6.z.enum(["realtime"]).describe("The type of session to create. Always `realtime` for the Realtime API.\n"),
32501
+ id: import_zod6.z.string().describe(
32502
+ "Unique identifier for the session that looks like `sess_1234567890abcdef`.\n"
32503
+ ),
32504
+ object: import_zod6.z.enum(["realtime.session"]).describe("The object type. Always `realtime.session`."),
32505
+ expires_at: import_zod6.z.number().optional().describe("Expiration timestamp for the session, in seconds since epoch."),
32184
32506
  output_modalities: import_zod6.z.array(import_zod6.z.enum(["text", "audio"])).default(createRealtimeClientSecretResponseSessionOutputModalitiesDefault).describe(
32185
32507
  'The set of modalities the model can respond with. It defaults to `["audio"]`, indicating\nthat the model will respond with audio plus a transcript. `["text"]` can be used to make\nthe model respond with text only. It is not possible to request both `text` and `audio` at the same time.\n'
32186
32508
  ),
@@ -32188,6 +32510,7 @@ var createRealtimeClientSecretResponse = import_zod6.z.object({
32188
32510
  import_zod6.z.enum([
32189
32511
  "gpt-realtime",
32190
32512
  "gpt-realtime-1.5",
32513
+ "gpt-realtime-2",
32191
32514
  "gpt-realtime-2025-08-28",
32192
32515
  "gpt-4o-realtime-preview",
32193
32516
  "gpt-4o-realtime-preview-2024-10-01",
@@ -32210,15 +32533,15 @@ var createRealtimeClientSecretResponse = import_zod6.z.object({
32210
32533
  audio: import_zod6.z.object({
32211
32534
  input: import_zod6.z.object({
32212
32535
  format: import_zod6.z.object({
32213
- type: import_zod6.z.enum(["audio/pcm"]).describe("The audio format. Always `audio/pcm`."),
32214
- rate: import_zod6.z.literal(24e3).describe("The sample rate of the audio. Always `24000`.")
32536
+ type: import_zod6.z.enum(["audio/pcm"]).optional().describe("The audio format. Always `audio/pcm`."),
32537
+ rate: import_zod6.z.literal(24e3).optional().describe("The sample rate of the audio. Always `24000`.")
32215
32538
  }).describe("The PCM audio format. Only a 24kHz sample rate is supported.").or(
32216
32539
  import_zod6.z.object({
32217
- type: import_zod6.z.enum(["audio/pcmu"]).describe("The audio format. Always `audio/pcmu`.")
32540
+ type: import_zod6.z.enum(["audio/pcmu"]).optional().describe("The audio format. Always `audio/pcmu`.")
32218
32541
  }).describe("The G.711 \u03BC-law format.")
32219
32542
  ).or(
32220
32543
  import_zod6.z.object({
32221
- type: import_zod6.z.enum(["audio/pcma"]).describe("The audio format. Always `audio/pcma`.")
32544
+ type: import_zod6.z.enum(["audio/pcma"]).optional().describe("The audio format. Always `audio/pcma`.")
32222
32545
  }).describe("The G.711 A-law format.")
32223
32546
  ).optional(),
32224
32547
  transcription: import_zod6.z.object({
@@ -32228,20 +32551,19 @@ var createRealtimeClientSecretResponse = import_zod6.z.object({
32228
32551
  "gpt-4o-mini-transcribe",
32229
32552
  "gpt-4o-mini-transcribe-2025-12-15",
32230
32553
  "gpt-4o-transcribe",
32231
- "gpt-4o-transcribe-diarize"
32554
+ "gpt-4o-transcribe-diarize",
32555
+ "gpt-realtime-whisper"
32232
32556
  ])
32233
32557
  ).optional().describe(
32234
- "The model to use for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`, and `gpt-4o-transcribe-diarize`. Use `gpt-4o-transcribe-diarize` when you need diarization with speaker labels.\n"
32235
- ),
32236
- language: import_zod6.z.string().optional().describe(
32237
- "The language of the input audio. Supplying the input language in\n[ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`) format\nwill improve accuracy and latency.\n"
32558
+ "The model used for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`, `gpt-4o-transcribe-diarize`, and `gpt-realtime-whisper`.\n"
32238
32559
  ),
32560
+ language: import_zod6.z.string().optional().describe("The language of the input audio.\n"),
32239
32561
  prompt: import_zod6.z.string().optional().describe(
32240
- 'An optional text to guide the model\'s style or continue a previous audio\nsegment.\nFor `whisper-1`, the [prompt is a list of keywords](/docs/guides/speech-to-text#prompting).\nFor `gpt-4o-transcribe` models (excluding `gpt-4o-transcribe-diarize`), the prompt is a free text string, for example "expect words related to technology".\n'
32562
+ "The prompt configured for input audio transcription, when present.\n"
32241
32563
  )
32242
32564
  }).optional(),
32243
32565
  noise_reduction: import_zod6.z.object({
32244
- type: import_zod6.z.enum(["near_field", "far_field"]).describe(
32566
+ type: import_zod6.z.enum(["near_field", "far_field"]).optional().describe(
32245
32567
  "Type of noise reduction. `near_field` is for close-talking microphones such as headphones, `far_field` is for far-field microphones such as laptop or conference room microphones.\n"
32246
32568
  )
32247
32569
  }).optional().describe(
@@ -32304,20 +32626,20 @@ var createRealtimeClientSecretResponse = import_zod6.z.object({
32304
32626
  "Server-side semantic turn detection which uses a model to determine when the user has finished speaking."
32305
32627
  )
32306
32628
  ]).describe(
32307
- 'Configuration for turn detection, ether Server VAD or Semantic VAD. This can be set to `null` to turn off, in which case the client must manually trigger model response.\n\nServer VAD means that the model will detect the start and end of speech based on audio volume and respond at the end of user speech.\n\nSemantic VAD is more advanced and uses a turn detection model (in conjunction with VAD) to semantically estimate whether the user has finished speaking, then dynamically sets a timeout based on this probability. For example, if user audio trails off with "uhhm", the model will score a low probability of turn end and wait longer for the user to continue speaking. This can be useful for more natural conversations, but may have a higher latency.\n'
32629
+ 'Configuration for turn detection, ether Server VAD or Semantic VAD. This can be set to `null` to turn off, in which case the client must manually trigger model response.\n\nServer VAD means that the model will detect the start and end of speech based on audio volume and respond at the end of user speech.\n\nSemantic VAD is more advanced and uses a turn detection model (in conjunction with VAD) to semantically estimate whether the user has finished speaking, then dynamically sets a timeout based on this probability. For example, if user audio trails off with "uhhm", the model will score a low probability of turn end and wait longer for the user to continue speaking. This can be useful for more natural conversations, but may have a higher latency.\n\nFor `gpt-realtime-whisper` transcription sessions, turn detection must be\nset to `null`; VAD is not supported.\n'
32308
32630
  ).or(import_zod6.z.null()).optional()
32309
32631
  }).optional(),
32310
32632
  output: import_zod6.z.object({
32311
32633
  format: import_zod6.z.object({
32312
- type: import_zod6.z.enum(["audio/pcm"]).describe("The audio format. Always `audio/pcm`."),
32313
- rate: import_zod6.z.literal(24e3).describe("The sample rate of the audio. Always `24000`.")
32634
+ type: import_zod6.z.enum(["audio/pcm"]).optional().describe("The audio format. Always `audio/pcm`."),
32635
+ rate: import_zod6.z.literal(24e3).optional().describe("The sample rate of the audio. Always `24000`.")
32314
32636
  }).describe("The PCM audio format. Only a 24kHz sample rate is supported.").or(
32315
32637
  import_zod6.z.object({
32316
- type: import_zod6.z.enum(["audio/pcmu"]).describe("The audio format. Always `audio/pcmu`.")
32638
+ type: import_zod6.z.enum(["audio/pcmu"]).optional().describe("The audio format. Always `audio/pcmu`.")
32317
32639
  }).describe("The G.711 \u03BC-law format.")
32318
32640
  ).or(
32319
32641
  import_zod6.z.object({
32320
- type: import_zod6.z.enum(["audio/pcma"]).describe("The audio format. Always `audio/pcma`.")
32642
+ type: import_zod6.z.enum(["audio/pcma"]).optional().describe("The audio format. Always `audio/pcma`.")
32321
32643
  }).describe("The G.711 A-law format.")
32322
32644
  ).optional(),
32323
32645
  voice: import_zod6.z.string().or(
@@ -32361,7 +32683,7 @@ var createRealtimeClientSecretResponse = import_zod6.z.object({
32361
32683
  ).or(import_zod6.z.null()).optional(),
32362
32684
  tools: import_zod6.z.array(
32363
32685
  import_zod6.z.object({
32364
- type: import_zod6.z.enum(["function"]).describe("The type of the tool, i.e. `function`."),
32686
+ type: import_zod6.z.enum(["function"]).optional().describe("The type of the tool, i.e. `function`."),
32365
32687
  name: import_zod6.z.string().optional().describe("The name of the function."),
32366
32688
  description: import_zod6.z.string().optional().describe(
32367
32689
  "The description of the function, including guidance on when and how\nto call it, and guidance about what to tell the user when calling\n(if anything).\n"
@@ -32373,7 +32695,7 @@ var createRealtimeClientSecretResponse = import_zod6.z.object({
32373
32695
  server_label: import_zod6.z.string().describe(
32374
32696
  "A label for this MCP server, used to identify it in tool calls.\n"
32375
32697
  ),
32376
- server_url: import_zod6.z.string().optional().describe(
32698
+ server_url: import_zod6.z.string().url().optional().describe(
32377
32699
  "The URL for the MCP server. One of `server_url` or `connector_id` must be\nprovided.\n"
32378
32700
  ),
32379
32701
  connector_id: import_zod6.z.enum([
@@ -32385,7 +32707,7 @@ var createRealtimeClientSecretResponse = import_zod6.z.object({
32385
32707
  "connector_outlookcalendar",
32386
32708
  "connector_outlookemail",
32387
32709
  "connector_sharepoint"
32388
- ]).describe(
32710
+ ]).optional().describe(
32389
32711
  "Identifier for service connectors, like those available in ChatGPT. One of\n`server_url` or `connector_id` must be provided. Learn more about service\nconnectors [here](/docs/guides/tools-remote-mcp#connectors).\n\nCurrently supported `connector_id` values are:\n\n- Dropbox: `connector_dropbox`\n- Gmail: `connector_gmail`\n- Google Calendar: `connector_googlecalendar`\n- Google Drive: `connector_googledrive`\n- Microsoft Teams: `connector_microsoftteams`\n- Outlook Calendar: `connector_outlookcalendar`\n- Outlook Email: `connector_outlookemail`\n- SharePoint: `connector_sharepoint`\n"
32390
32712
  ),
32391
32713
  authorization: import_zod6.z.string().optional().describe(
@@ -32451,6 +32773,13 @@ var createRealtimeClientSecretResponse = import_zod6.z.object({
32451
32773
  ).default(createRealtimeClientSecretResponseSessionToolChoiceDefault).describe(
32452
32774
  "How the model chooses tools. Provide one of the string modes or force a specific\nfunction/MCP tool.\n"
32453
32775
  ),
32776
+ reasoning: import_zod6.z.object({
32777
+ effort: import_zod6.z.enum(["minimal", "low", "medium", "high", "xhigh"]).default(createRealtimeClientSecretResponseSessionReasoningEffortDefault).describe(
32778
+ "Constrains effort on reasoning for reasoning-capable Realtime models such as\n`gpt-realtime-2`.\n"
32779
+ )
32780
+ }).optional().describe(
32781
+ "Configuration for reasoning-capable Realtime models such as `gpt-realtime-2`.\n"
32782
+ ),
32454
32783
  max_output_tokens: import_zod6.z.number().or(import_zod6.z.enum(["inf"])).optional().describe(
32455
32784
  "Maximum number of output tokens for a single assistant response,\ninclusive of tool calls. Provide an integer between 1 and 4096 to\nlimit output tokens, or `inf` for the maximum available tokens for a\ngiven model. Defaults to `inf`.\n"
32456
32785
  ),
@@ -32490,7 +32819,7 @@ var createRealtimeClientSecretResponse = import_zod6.z.object({
32490
32819
  ).or(
32491
32820
  import_zod6.z.object({
32492
32821
  type: import_zod6.z.enum(["input_image"]).describe("The type of the input item. Always `input_image`."),
32493
- image_url: import_zod6.z.string().describe(
32822
+ image_url: import_zod6.z.string().url().describe(
32494
32823
  "The URL of the image to be sent to the model. A fully qualified URL or base64 encoded image in a data URL."
32495
32824
  ).or(import_zod6.z.null()).optional(),
32496
32825
  file_id: import_zod6.z.string().describe("The ID of the file to be sent to the model.").or(import_zod6.z.null()).optional(),
@@ -32504,8 +32833,8 @@ var createRealtimeClientSecretResponse = import_zod6.z.object({
32504
32833
  file_id: import_zod6.z.string().describe("The ID of the file to be sent to the model.").or(import_zod6.z.null()).optional(),
32505
32834
  filename: import_zod6.z.string().optional().describe("The name of the file to be sent to the model."),
32506
32835
  file_data: import_zod6.z.string().optional().describe("The content of the file to be sent to the model.\n"),
32507
- file_url: import_zod6.z.string().optional().describe("The URL of the file to be sent to the model."),
32508
- detail: import_zod6.z.enum(["low", "high"])
32836
+ file_url: import_zod6.z.string().url().optional().describe("The URL of the file to be sent to the model."),
32837
+ detail: import_zod6.z.enum(["low", "high"]).optional()
32509
32838
  }).describe("A file input to the model.")
32510
32839
  )
32511
32840
  ).describe(
@@ -32514,9 +32843,7 @@ var createRealtimeClientSecretResponse = import_zod6.z.object({
32514
32843
  }).describe(
32515
32844
  "Reference to a prompt template and its variables.\n[Learn more](/docs/guides/text?api-mode=responses#reusable-prompts).\n"
32516
32845
  ).or(import_zod6.z.null()).optional()
32517
- }).describe(
32518
- "A new Realtime session configuration, with an ephemeral key. Default TTL\nfor keys is one minute.\n"
32519
- ),
32846
+ }).describe("A Realtime session configuration object.\n"),
32520
32847
  import_zod6.z.object({
32521
32848
  type: import_zod6.z.enum(["transcription"]).describe(
32522
32849
  "The type of session. Always `transcription` for transcription sessions.\n"
@@ -32532,15 +32859,15 @@ var createRealtimeClientSecretResponse = import_zod6.z.object({
32532
32859
  audio: import_zod6.z.object({
32533
32860
  input: import_zod6.z.object({
32534
32861
  format: import_zod6.z.object({
32535
- type: import_zod6.z.enum(["audio/pcm"]).describe("The audio format. Always `audio/pcm`."),
32536
- rate: import_zod6.z.literal(24e3).describe("The sample rate of the audio. Always `24000`.")
32862
+ type: import_zod6.z.enum(["audio/pcm"]).optional().describe("The audio format. Always `audio/pcm`."),
32863
+ rate: import_zod6.z.literal(24e3).optional().describe("The sample rate of the audio. Always `24000`.")
32537
32864
  }).describe("The PCM audio format. Only a 24kHz sample rate is supported.").or(
32538
32865
  import_zod6.z.object({
32539
- type: import_zod6.z.enum(["audio/pcmu"]).describe("The audio format. Always `audio/pcmu`.")
32866
+ type: import_zod6.z.enum(["audio/pcmu"]).optional().describe("The audio format. Always `audio/pcmu`.")
32540
32867
  }).describe("The G.711 \u03BC-law format.")
32541
32868
  ).or(
32542
32869
  import_zod6.z.object({
32543
- type: import_zod6.z.enum(["audio/pcma"]).describe("The audio format. Always `audio/pcma`.")
32870
+ type: import_zod6.z.enum(["audio/pcma"]).optional().describe("The audio format. Always `audio/pcma`.")
32544
32871
  }).describe("The G.711 A-law format.")
32545
32872
  ).optional(),
32546
32873
  transcription: import_zod6.z.object({
@@ -32550,20 +32877,19 @@ var createRealtimeClientSecretResponse = import_zod6.z.object({
32550
32877
  "gpt-4o-mini-transcribe",
32551
32878
  "gpt-4o-mini-transcribe-2025-12-15",
32552
32879
  "gpt-4o-transcribe",
32553
- "gpt-4o-transcribe-diarize"
32880
+ "gpt-4o-transcribe-diarize",
32881
+ "gpt-realtime-whisper"
32554
32882
  ])
32555
32883
  ).optional().describe(
32556
- "The model to use for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`, and `gpt-4o-transcribe-diarize`. Use `gpt-4o-transcribe-diarize` when you need diarization with speaker labels.\n"
32557
- ),
32558
- language: import_zod6.z.string().optional().describe(
32559
- "The language of the input audio. Supplying the input language in\n[ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`) format\nwill improve accuracy and latency.\n"
32884
+ "The model used for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`, `gpt-4o-transcribe-diarize`, and `gpt-realtime-whisper`.\n"
32560
32885
  ),
32886
+ language: import_zod6.z.string().optional().describe("The language of the input audio.\n"),
32561
32887
  prompt: import_zod6.z.string().optional().describe(
32562
- 'An optional text to guide the model\'s style or continue a previous audio\nsegment.\nFor `whisper-1`, the [prompt is a list of keywords](/docs/guides/speech-to-text#prompting).\nFor `gpt-4o-transcribe` models (excluding `gpt-4o-transcribe-diarize`), the prompt is a free text string, for example "expect words related to technology".\n'
32888
+ "The prompt configured for input audio transcription, when present.\n"
32563
32889
  )
32564
32890
  }).optional(),
32565
32891
  noise_reduction: import_zod6.z.object({
32566
- type: import_zod6.z.enum(["near_field", "far_field"]).describe(
32892
+ type: import_zod6.z.enum(["near_field", "far_field"]).optional().describe(
32567
32893
  "Type of noise reduction. `near_field` is for close-talking microphones such as headphones, `far_field` is for far-field microphones such as laptop or conference room microphones.\n"
32568
32894
  )
32569
32895
  }).optional().describe("Configuration for input audio noise reduction.\n"),
@@ -32580,8 +32906,10 @@ var createRealtimeClientSecretResponse = import_zod6.z.object({
32580
32906
  silence_duration_ms: import_zod6.z.number().optional().describe(
32581
32907
  "Duration of silence to detect speech stop (in milliseconds). Defaults\nto 500ms. With shorter values the model will respond more quickly,\nbut may jump in on short pauses from the user.\n"
32582
32908
  )
32583
- }).optional().describe(
32584
- "Configuration for turn detection. Can be set to `null` to turn off. Server\nVAD means that the model will detect the start and end of speech based on\naudio volume and respond at the end of user speech.\n"
32909
+ }).describe(
32910
+ "Configuration for turn detection. Can be set to `null` to turn off. Server\nVAD means that the model will detect the start and end of speech based on\naudio volume and respond at the end of user speech. For `gpt-realtime-whisper`, this must be `null`; VAD is not supported.\n"
32911
+ ).or(import_zod6.z.null()).optional().describe(
32912
+ "Configuration for turn detection. For `gpt-realtime-whisper`, this must be `null`; VAD is not supported.\n"
32585
32913
  )
32586
32914
  }).optional()
32587
32915
  }).optional().describe("Configuration for input audio for the session.\n")
@@ -32721,7 +33049,7 @@ var createRealtimeSessionBody = import_zod6.z.object({
32721
33049
  ).or(
32722
33050
  import_zod6.z.object({
32723
33051
  type: import_zod6.z.enum(["input_image"]).describe("The type of the input item. Always `input_image`."),
32724
- image_url: import_zod6.z.string().describe(
33052
+ image_url: import_zod6.z.string().url().describe(
32725
33053
  "The URL of the image to be sent to the model. A fully qualified URL or base64 encoded image in a data URL."
32726
33054
  ).or(import_zod6.z.null()).optional(),
32727
33055
  file_id: import_zod6.z.string().describe("The ID of the file to be sent to the model.").or(import_zod6.z.null()).optional(),
@@ -32735,7 +33063,7 @@ var createRealtimeSessionBody = import_zod6.z.object({
32735
33063
  file_id: import_zod6.z.string().describe("The ID of the file to be sent to the model.").or(import_zod6.z.null()).optional(),
32736
33064
  filename: import_zod6.z.string().optional().describe("The name of the file to be sent to the model."),
32737
33065
  file_data: import_zod6.z.string().optional().describe("The content of the file to be sent to the model.\n"),
32738
- file_url: import_zod6.z.string().optional().describe("The URL of the file to be sent to the model."),
33066
+ file_url: import_zod6.z.string().url().optional().describe("The URL of the file to be sent to the model."),
32739
33067
  detail: import_zod6.z.enum(["low", "high"]).optional()
32740
33068
  }).describe("A file input to the model.")
32741
33069
  )
@@ -32784,17 +33112,14 @@ var createRealtimeSessionResponse = import_zod6.z.object({
32784
33112
  "gpt-4o-mini-transcribe",
32785
33113
  "gpt-4o-mini-transcribe-2025-12-15",
32786
33114
  "gpt-4o-transcribe",
32787
- "gpt-4o-transcribe-diarize"
33115
+ "gpt-4o-transcribe-diarize",
33116
+ "gpt-realtime-whisper"
32788
33117
  ])
32789
33118
  ).optional().describe(
32790
- "The model to use for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`, and `gpt-4o-transcribe-diarize`. Use `gpt-4o-transcribe-diarize` when you need diarization with speaker labels.\n"
32791
- ),
32792
- language: import_zod6.z.string().optional().describe(
32793
- "The language of the input audio. Supplying the input language in\n[ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`) format\nwill improve accuracy and latency.\n"
33119
+ "The model used for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`, `gpt-4o-transcribe-diarize`, and `gpt-realtime-whisper`.\n"
32794
33120
  ),
32795
- prompt: import_zod6.z.string().optional().describe(
32796
- 'An optional text to guide the model\'s style or continue a previous audio\nsegment.\nFor `whisper-1`, the [prompt is a list of keywords](/docs/guides/speech-to-text#prompting).\nFor `gpt-4o-transcribe` models (excluding `gpt-4o-transcribe-diarize`), the prompt is a free text string, for example "expect words related to technology".\n'
32797
- )
33121
+ language: import_zod6.z.string().optional().describe("The language of the input audio.\n"),
33122
+ prompt: import_zod6.z.string().optional().describe("The prompt configured for input audio transcription, when present.\n")
32798
33123
  }).optional(),
32799
33124
  noise_reduction: import_zod6.z.object({
32800
33125
  type: import_zod6.z.enum(["near_field", "far_field"]).optional().describe(
@@ -32920,16 +33245,20 @@ var createRealtimeTranscriptionSessionBody = import_zod6.z.object({
32920
33245
  "gpt-4o-mini-transcribe",
32921
33246
  "gpt-4o-mini-transcribe-2025-12-15",
32922
33247
  "gpt-4o-transcribe",
32923
- "gpt-4o-transcribe-diarize"
33248
+ "gpt-4o-transcribe-diarize",
33249
+ "gpt-realtime-whisper"
32924
33250
  ])
32925
33251
  ).optional().describe(
32926
- "The model to use for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`, and `gpt-4o-transcribe-diarize`. Use `gpt-4o-transcribe-diarize` when you need diarization with speaker labels.\n"
33252
+ "The model to use for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`, `gpt-4o-transcribe-diarize`, and `gpt-realtime-whisper`. Use `gpt-4o-transcribe-diarize` when you need diarization with speaker labels.\n"
32927
33253
  ),
32928
33254
  language: import_zod6.z.string().optional().describe(
32929
33255
  "The language of the input audio. Supplying the input language in\n[ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`) format\nwill improve accuracy and latency.\n"
32930
33256
  ),
32931
33257
  prompt: import_zod6.z.string().optional().describe(
32932
- 'An optional text to guide the model\'s style or continue a previous audio\nsegment.\nFor `whisper-1`, the [prompt is a list of keywords](/docs/guides/speech-to-text#prompting).\nFor `gpt-4o-transcribe` models (excluding `gpt-4o-transcribe-diarize`), the prompt is a free text string, for example "expect words related to technology".\n'
33258
+ 'An optional text to guide the model\'s style or continue a previous audio\nsegment.\nFor `whisper-1`, the [prompt is a list of keywords](/docs/guides/speech-to-text#prompting).\nFor `gpt-4o-transcribe` models (excluding `gpt-4o-transcribe-diarize`), the prompt is a free text string, for example "expect words related to technology".\nPrompt is not supported with `gpt-realtime-whisper` in GA Realtime sessions.\n'
33259
+ ),
33260
+ delay: import_zod6.z.enum(["minimal", "low", "medium", "high", "xhigh"]).optional().describe(
33261
+ "Controls how long the model waits before emitting transcription text.\nHigher values can improve transcription accuracy at the cost of latency.\nOnly supported with `gpt-realtime-whisper` in GA Realtime sessions.\n"
32933
33262
  )
32934
33263
  }).optional(),
32935
33264
  include: import_zod6.z.array(import_zod6.z.enum(["item.input_audio_transcription.logprobs"])).optional().describe(
@@ -32958,17 +33287,14 @@ var createRealtimeTranscriptionSessionResponse = import_zod6.z.object({
32958
33287
  "gpt-4o-mini-transcribe",
32959
33288
  "gpt-4o-mini-transcribe-2025-12-15",
32960
33289
  "gpt-4o-transcribe",
32961
- "gpt-4o-transcribe-diarize"
33290
+ "gpt-4o-transcribe-diarize",
33291
+ "gpt-realtime-whisper"
32962
33292
  ])
32963
33293
  ).optional().describe(
32964
- "The model to use for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`, and `gpt-4o-transcribe-diarize`. Use `gpt-4o-transcribe-diarize` when you need diarization with speaker labels.\n"
33294
+ "The model used for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`, `gpt-4o-transcribe-diarize`, and `gpt-realtime-whisper`.\n"
32965
33295
  ),
32966
- language: import_zod6.z.string().optional().describe(
32967
- "The language of the input audio. Supplying the input language in\n[ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`) format\nwill improve accuracy and latency.\n"
32968
- ),
32969
- prompt: import_zod6.z.string().optional().describe(
32970
- 'An optional text to guide the model\'s style or continue a previous audio\nsegment.\nFor `whisper-1`, the [prompt is a list of keywords](/docs/guides/speech-to-text#prompting).\nFor `gpt-4o-transcribe` models (excluding `gpt-4o-transcribe-diarize`), the prompt is a free text string, for example "expect words related to technology".\n'
32971
- )
33296
+ language: import_zod6.z.string().optional().describe("The language of the input audio.\n"),
33297
+ prompt: import_zod6.z.string().optional().describe("The prompt configured for input audio transcription, when present.\n")
32972
33298
  }).optional(),
32973
33299
  turn_detection: import_zod6.z.object({
32974
33300
  type: import_zod6.z.string().optional().describe("Type of turn detection, only `server_vad` is currently supported.\n"),
@@ -36353,6 +36679,7 @@ __export(sonioxPublicAPI_zod_exports, {
36353
36679
  createTranscriptionBodyWebhookUrlRegExpOne: () => createTranscriptionBodyWebhookUrlRegExpOne,
36354
36680
  deleteFileParams: () => deleteFileParams,
36355
36681
  deleteTranscriptionParams: () => deleteTranscriptionParams,
36682
+ getConcurrencyLimitsResponse: () => getConcurrencyLimitsResponse,
36356
36683
  getFileParams: () => getFileParams,
36357
36684
  getFileResponse: () => getFileResponse,
36358
36685
  getFilesCountResponse: () => getFilesCountResponse,
@@ -36370,6 +36697,12 @@ __export(sonioxPublicAPI_zod_exports, {
36370
36697
  getTranscriptionsQueryLimitMax: () => getTranscriptionsQueryLimitMax,
36371
36698
  getTranscriptionsQueryParams: () => getTranscriptionsQueryParams,
36372
36699
  getTranscriptionsResponse: () => getTranscriptionsResponse,
36700
+ getTtsModelsResponse: () => getTtsModelsResponse,
36701
+ getUsageLogsQueryLimitDefault: () => getUsageLogsQueryLimitDefault,
36702
+ getUsageLogsQueryLimitMax: () => getUsageLogsQueryLimitMax,
36703
+ getUsageLogsQueryParams: () => getUsageLogsQueryParams,
36704
+ getUsageLogsQuerySortDefault: () => getUsageLogsQuerySortDefault,
36705
+ getUsageLogsResponse: () => getUsageLogsResponse,
36373
36706
  uploadFileBody: () => uploadFileBody,
36374
36707
  uploadFileBodyClientReferenceIdMaxOne: () => uploadFileBodyClientReferenceIdMaxOne
36375
36708
  });
@@ -36620,11 +36953,73 @@ var getModelsResponse = import_zod10.z.object({
36620
36953
  })
36621
36954
  ).describe("List of available models and their attributes.")
36622
36955
  });
36956
+ var getTtsModelsResponse = import_zod10.z.object({
36957
+ models: import_zod10.z.array(
36958
+ import_zod10.z.object({
36959
+ id: import_zod10.z.string().describe("Unique identifier of the model."),
36960
+ aliased_model_id: import_zod10.z.string().or(import_zod10.z.null()).describe("If this is an alias, the id of the aliased model."),
36961
+ name: import_zod10.z.string().describe("Name of the model."),
36962
+ voices: import_zod10.z.array(
36963
+ import_zod10.z.object({
36964
+ id: import_zod10.z.string().describe("Unique identifier of the voice."),
36965
+ description: import_zod10.z.string().describe("Description of the TTS voice."),
36966
+ gender: import_zod10.z.enum(["male", "female", "neutral"])
36967
+ })
36968
+ ).describe("List of available voices for this model."),
36969
+ languages: import_zod10.z.array(
36970
+ import_zod10.z.object({
36971
+ code: import_zod10.z.string().describe("2-letter language code."),
36972
+ name: import_zod10.z.string().describe("Language name.")
36973
+ })
36974
+ ).describe("List of languages supported by the model.")
36975
+ })
36976
+ ).describe("List of available TTS models and their attributes.")
36977
+ });
36978
+ var getUsageLogsQueryLimitDefault = 1e3;
36979
+ var getUsageLogsQueryLimitMax = 1e3;
36980
+ var getUsageLogsQuerySortDefault = "end_time_asc";
36981
+ var getUsageLogsQueryParams = import_zod10.z.object({
36982
+ start_time: import_zod10.z.string().describe("Start of the time window (inclusive). Filters by request end time."),
36983
+ end_time: import_zod10.z.string().describe("End of the time window (exclusive). Filters by request end time."),
36984
+ limit: import_zod10.z.number().min(1).max(getUsageLogsQueryLimitMax).default(getUsageLogsQueryLimitDefault).describe("Maximum number of usage log entries to return."),
36985
+ sort: import_zod10.z.enum(["end_time_asc", "end_time_desc"]).default(getUsageLogsQuerySortDefault).describe(
36986
+ "Sort order by end_time.Use `end_time_desc` to get the most recent entries first. When paginating, pass the same `sort` value alongside the cursor."
36987
+ ),
36988
+ cursor: import_zod10.z.string().or(import_zod10.z.null()).optional().describe("Pagination cursor for the next page of results.")
36989
+ });
36990
+ var getUsageLogsResponse = import_zod10.z.object({
36991
+ usage_logs: import_zod10.z.array(
36992
+ import_zod10.z.object({
36993
+ uuid: import_zod10.z.string().uuid().describe("Unique identifier of the request."),
36994
+ request_scope: import_zod10.z.string().describe("Scope of the request (api / playground)."),
36995
+ client_reference_id: import_zod10.z.string().describe("Client reference ID supplied on the original request. Empty string if none."),
36996
+ model: import_zod10.z.string().describe("Model identifier."),
36997
+ start_time: import_zod10.z.string().datetime({}).describe("When the request started."),
36998
+ end_time: import_zod10.z.string().datetime({}).describe("When the request ended."),
36999
+ input_text_tokens: import_zod10.z.number(),
37000
+ input_audio_tokens: import_zod10.z.number(),
37001
+ input_audio_duration_ms: import_zod10.z.number(),
37002
+ output_text_tokens: import_zod10.z.number(),
37003
+ output_audio_tokens: import_zod10.z.number(),
37004
+ output_audio_duration_ms: import_zod10.z.number(),
37005
+ cost_usd: import_zod10.z.string(),
37006
+ input_cost_usd: import_zod10.z.string(),
37007
+ input_text_cost_usd: import_zod10.z.string(),
37008
+ input_audio_cost_usd: import_zod10.z.string(),
37009
+ output_cost_usd: import_zod10.z.string(),
37010
+ output_text_cost_usd: import_zod10.z.string(),
37011
+ output_audio_cost_usd: import_zod10.z.string()
37012
+ })
37013
+ ).describe("Per-request usage log entries ordered by end_time, uuid (per `sort`)."),
37014
+ next_page_cursor: import_zod10.z.string().or(import_zod10.z.null()).optional().describe(
37015
+ "A pagination token that references the next page of results. When more data is available, this field contains a value to pass in the cursor parameter of a subsequent request. When null, no additional results are available."
37016
+ )
37017
+ });
36623
37018
  var createTemporaryApiKeyBodyExpiresInSecondsMax = 3600;
36624
37019
  var createTemporaryApiKeyBodyClientReferenceIdMaxOne = 256;
36625
37020
  var createTemporaryApiKeyBodyMaxSessionDurationSecondsMaxOne = 18e3;
36626
37021
  var createTemporaryApiKeyBody = import_zod10.z.object({
36627
- usage_type: import_zod10.z.enum(["transcribe_websocket"]),
37022
+ usage_type: import_zod10.z.enum(["transcribe_websocket", "tts_rt"]),
36628
37023
  expires_in_seconds: import_zod10.z.number().min(1).max(createTemporaryApiKeyBodyExpiresInSecondsMax).describe("Duration in seconds until the temporary API key expires."),
36629
37024
  client_reference_id: import_zod10.z.string().max(createTemporaryApiKeyBodyClientReferenceIdMaxOne).or(import_zod10.z.null()).optional().describe("Optional tracking identifier string. Does not need to be unique."),
36630
37025
  single_use: import_zod10.z.boolean().or(import_zod10.z.null()).optional().describe("If true, the temporary API key can be used only once."),
@@ -36632,6 +37027,28 @@ var createTemporaryApiKeyBody = import_zod10.z.object({
36632
37027
  "Maximum WebSocket connection duration in seconds. If exceeded, the connection will be dropped. If not set, no limit is applied."
36633
37028
  )
36634
37029
  });
37030
+ var getConcurrencyLimitsResponse = import_zod10.z.object({
37031
+ project: import_zod10.z.object({
37032
+ current: import_zod10.z.object({
37033
+ transcribe_concurrent: import_zod10.z.number(),
37034
+ tts_concurrent: import_zod10.z.number()
37035
+ }).describe("Live counts read from Redis"),
37036
+ limits: import_zod10.z.object({
37037
+ transcribe_concurrent: import_zod10.z.number().or(import_zod10.z.null()),
37038
+ tts_concurrent: import_zod10.z.number().or(import_zod10.z.null())
37039
+ }).describe("Configured limits")
37040
+ }),
37041
+ organization: import_zod10.z.object({
37042
+ current: import_zod10.z.object({
37043
+ transcribe_concurrent: import_zod10.z.number(),
37044
+ tts_concurrent: import_zod10.z.number()
37045
+ }).describe("Live counts read from Redis"),
37046
+ limits: import_zod10.z.object({
37047
+ transcribe_concurrent: import_zod10.z.number().or(import_zod10.z.null()),
37048
+ tts_concurrent: import_zod10.z.number().or(import_zod10.z.null())
37049
+ }).describe("Configured limits")
37050
+ })
37051
+ });
36635
37052
 
36636
37053
  // src/generated/soniox/streaming-types.zod.ts
36637
37054
  var streaming_types_zod_exports = {};
@@ -36716,10 +37133,10 @@ var sonioxStructuredContextSchema = import_zod11.z.object({
36716
37133
  var sonioxContextSchema = import_zod11.z.union([sonioxStructuredContextSchema, import_zod11.z.string()]);
36717
37134
  var sonioxRealtimeModelSchema = import_zod11.z.enum([
36718
37135
  "stt-rt-v4",
36719
- "stt-rt-v3",
36720
37136
  "stt-rt-preview",
36721
37137
  "stt-rt-v3-preview",
36722
- "stt-rt-preview-v2"
37138
+ "stt-rt-preview-v2",
37139
+ "stt-rt-v3"
36723
37140
  ]);
36724
37141
  var streamingTranscriberParams3 = import_zod11.z.object({
36725
37142
  model: sonioxRealtimeModelSchema,
@@ -36727,12 +37144,16 @@ var streamingTranscriberParams3 = import_zod11.z.object({
36727
37144
  sampleRate: import_zod11.z.number().optional(),
36728
37145
  numChannels: import_zod11.z.number().optional(),
36729
37146
  languageHints: import_zod11.z.array(import_zod11.z.string()).optional(),
37147
+ languageHintsStrict: import_zod11.z.boolean().optional(),
36730
37148
  context: sonioxContextSchema.optional(),
36731
37149
  enableSpeakerDiarization: import_zod11.z.boolean().optional(),
36732
37150
  enableLanguageIdentification: import_zod11.z.boolean().optional(),
36733
37151
  enableEndpointDetection: import_zod11.z.boolean().optional(),
37152
+ maxEndpointDelayMs: import_zod11.z.number().optional(),
36734
37153
  translation: sonioxTranslationConfigSchema.optional(),
36735
- clientReferenceId: import_zod11.z.string().optional()
37154
+ clientReferenceId: import_zod11.z.string().optional(),
37155
+ keepaliveIntervalMs: import_zod11.z.number().optional(),
37156
+ connectTimeoutMs: import_zod11.z.number().optional()
36736
37157
  });
36737
37158
  var sonioxTranslationStatusSchema = import_zod11.z.enum(["original", "translation", "none"]);
36738
37159
  var sonioxTokenSchema = import_zod11.z.object({
@@ -37324,6 +37745,7 @@ __export(schema_exports5, {
37324
37745
  V1ListenPostParametersCallbackMethod: () => V1ListenPostParametersCallbackMethod,
37325
37746
  V1ListenPostParametersCustomIntentMode: () => V1ListenPostParametersCustomIntentMode,
37326
37747
  V1ListenPostParametersCustomTopicMode: () => V1ListenPostParametersCustomTopicMode,
37748
+ V1ListenPostParametersDiarizeModel: () => V1ListenPostParametersDiarizeModel,
37327
37749
  V1ListenPostParametersEncoding: () => V1ListenPostParametersEncoding,
37328
37750
  V1ListenPostParametersModel0: () => V1ListenPostParametersModel0,
37329
37751
  V1ListenPostParametersRedactSchemaOneOf1Items: () => V1ListenPostParametersRedactSchemaOneOf1Items,
@@ -37362,6 +37784,13 @@ __export(schema_exports5, {
37362
37784
  V1SpeakPostParametersSampleRate4: () => V1SpeakPostParametersSampleRate4
37363
37785
  });
37364
37786
 
37787
+ // src/generated/deepgram/schema/v1ListenPostParametersDiarizeModel.ts
37788
+ var V1ListenPostParametersDiarizeModel = {
37789
+ latest: "latest",
37790
+ v1: "v1",
37791
+ v2: "v2"
37792
+ };
37793
+
37365
37794
  // src/generated/deepgram/schema/v1ListenPostParametersModel0.ts
37366
37795
  var V1ListenPostParametersModel0 = {
37367
37796
  "nova-3": "nova-3",
@@ -37578,6 +38007,7 @@ var V1SpeakPostParametersSampleRate = {
37578
38007
  var schema_exports6 = {};
37579
38008
  __export(schema_exports6, {
37580
38009
  AudioResponseFormat: () => AudioResponseFormat,
38010
+ AudioTranscriptionDelay: () => AudioTranscriptionDelay,
37581
38011
  CreateSpeechRequestResponseFormat: () => CreateSpeechRequestResponseFormat,
37582
38012
  CreateSpeechRequestStreamFormat: () => CreateSpeechRequestStreamFormat,
37583
38013
  CreateTranscriptionRequestTimestampGranularitiesItem: () => CreateTranscriptionRequestTimestampGranularitiesItem,
@@ -37597,12 +38027,14 @@ __export(schema_exports6, {
37597
38027
  RealtimeAudioFormatsAnyOfType: () => RealtimeAudioFormatsAnyOfType,
37598
38028
  RealtimeCreateClientSecretRequestExpiresAfterAnchor: () => RealtimeCreateClientSecretRequestExpiresAfterAnchor,
37599
38029
  RealtimeFunctionToolType: () => RealtimeFunctionToolType,
38030
+ RealtimeReasoningEffort: () => RealtimeReasoningEffort,
37600
38031
  RealtimeSessionCreateRequestGAIncludeItem: () => RealtimeSessionCreateRequestGAIncludeItem,
37601
38032
  RealtimeSessionCreateRequestGAOutputModalitiesItem: () => RealtimeSessionCreateRequestGAOutputModalitiesItem,
37602
38033
  RealtimeSessionCreateRequestGAType: () => RealtimeSessionCreateRequestGAType,
37603
38034
  RealtimeSessionCreateRequestModalitiesItem: () => RealtimeSessionCreateRequestModalitiesItem,
37604
38035
  RealtimeSessionCreateRequestToolsItemType: () => RealtimeSessionCreateRequestToolsItemType,
37605
38036
  RealtimeSessionCreateResponseGAIncludeItem: () => RealtimeSessionCreateResponseGAIncludeItem,
38037
+ RealtimeSessionCreateResponseGAObject: () => RealtimeSessionCreateResponseGAObject,
37606
38038
  RealtimeSessionCreateResponseGAOutputModalitiesItem: () => RealtimeSessionCreateResponseGAOutputModalitiesItem,
37607
38039
  RealtimeSessionCreateResponseGAType: () => RealtimeSessionCreateResponseGAType,
37608
38040
  RealtimeSessionCreateResponseIncludeItem: () => RealtimeSessionCreateResponseIncludeItem,
@@ -37633,6 +38065,15 @@ __export(schema_exports6, {
37633
38065
  VoiceResourceObject: () => VoiceResourceObject
37634
38066
  });
37635
38067
 
38068
+ // src/generated/openai/schema/audioTranscriptionDelay.ts
38069
+ var AudioTranscriptionDelay = {
38070
+ minimal: "minimal",
38071
+ low: "low",
38072
+ medium: "medium",
38073
+ high: "high",
38074
+ xhigh: "xhigh"
38075
+ };
38076
+
37636
38077
  // src/generated/openai/schema/createSpeechRequestResponseFormat.ts
37637
38078
  var CreateSpeechRequestResponseFormat = {
37638
38079
  mp3: "mp3",
@@ -37745,6 +38186,15 @@ var RealtimeFunctionToolType = {
37745
38186
  function: "function"
37746
38187
  };
37747
38188
 
38189
+ // src/generated/openai/schema/realtimeReasoningEffort.ts
38190
+ var RealtimeReasoningEffort = {
38191
+ minimal: "minimal",
38192
+ low: "low",
38193
+ medium: "medium",
38194
+ high: "high",
38195
+ xhigh: "xhigh"
38196
+ };
38197
+
37748
38198
  // src/generated/openai/schema/realtimeSessionCreateRequestGAIncludeItem.ts
37749
38199
  var RealtimeSessionCreateRequestGAIncludeItem = {
37750
38200
  iteminput_audio_transcriptionlogprobs: "item.input_audio_transcription.logprobs"
@@ -37777,6 +38227,11 @@ var RealtimeSessionCreateResponseGAIncludeItem = {
37777
38227
  iteminput_audio_transcriptionlogprobs: "item.input_audio_transcription.logprobs"
37778
38228
  };
37779
38229
 
38230
+ // src/generated/openai/schema/realtimeSessionCreateResponseGAObject.ts
38231
+ var RealtimeSessionCreateResponseGAObject = {
38232
+ realtimesession: "realtime.session"
38233
+ };
38234
+
37780
38235
  // src/generated/openai/schema/realtimeSessionCreateResponseGAOutputModalitiesItem.ts
37781
38236
  var RealtimeSessionCreateResponseGAOutputModalitiesItem = {
37782
38237
  text: "text",
@@ -37921,6 +38376,7 @@ __export(schema_exports7, {
37921
38376
  AutoChaptersResultErrorType: () => AutoChaptersResultErrorType,
37922
38377
  ErrorResponseError: () => ErrorResponseError,
37923
38378
  GetJobsJobidAlignmentTags: () => GetJobsJobidAlignmentTags,
38379
+ GetJobsJobidObjectUrlsUrlForItem: () => GetJobsJobidObjectUrlsUrlForItem,
37924
38380
  GetJobsJobidTranscriptFormat: () => GetJobsJobidTranscriptFormat,
37925
38381
  JobDetailsStatus: () => JobDetailsStatus,
37926
38382
  JobMode: () => JobMode,
@@ -37990,6 +38446,13 @@ var GetJobsJobidAlignmentTags = {
37990
38446
  one_per_line: "one_per_line"
37991
38447
  };
37992
38448
 
38449
+ // src/generated/speechmatics/schema/getJobsJobidObjectUrlsUrlForItem.ts
38450
+ var GetJobsJobidObjectUrlsUrlForItem = {
38451
+ data: "data",
38452
+ audio_mp3: "audio_mp3",
38453
+ transcript: "transcript"
38454
+ };
38455
+
37993
38456
  // src/generated/speechmatics/schema/getJobsJobidTranscriptFormat.ts
37994
38457
  var GetJobsJobidTranscriptFormat = {
37995
38458
  "json-v2": "json-v2",
@@ -38106,6 +38569,15 @@ var WrittenFormRecognitionResultType = {
38106
38569
  word: "word"
38107
38570
  };
38108
38571
 
38572
+ // src/generated/soniox/sdk-types.ts
38573
+ var sdk_types_exports = {};
38574
+ __export(sdk_types_exports, {
38575
+ RealtimeSttSession: () => import_node.RealtimeSttSession,
38576
+ SonioxFetchHttpClient: () => import_node.FetchHttpClient,
38577
+ SonioxNodeClient: () => import_node.SonioxNodeClient
38578
+ });
38579
+ var import_node = require("@soniox/node");
38580
+
38109
38581
  // src/generated/elevenlabs/schema/index.ts
38110
38582
  var schema_exports8 = {};
38111
38583
  __export(schema_exports8, {
@@ -38183,6 +38655,10 @@ __export(speechmaticsASRRESTAPI_zod_exports, {
38183
38655
  deleteJobsJobidParams: () => deleteJobsJobidParams,
38184
38656
  deleteJobsJobidQueryParams: () => deleteJobsJobidQueryParams,
38185
38657
  deleteJobsJobidResponse: () => deleteJobsJobidResponse,
38658
+ deleteJobsJobidResponseJobConfigSummarizationConfigContentTypeDefault: () => deleteJobsJobidResponseJobConfigSummarizationConfigContentTypeDefault,
38659
+ deleteJobsJobidResponseJobConfigSummarizationConfigSummaryLengthDefault: () => deleteJobsJobidResponseJobConfigSummarizationConfigSummaryLengthDefault,
38660
+ deleteJobsJobidResponseJobConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMax: () => deleteJobsJobidResponseJobConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMax,
38661
+ deleteJobsJobidResponseJobConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMin: () => deleteJobsJobidResponseJobConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMin,
38186
38662
  deleteJobsJobidResponseJobConfigTranscriptionConfigChannelDiarizationLabelsItemRegExp: () => deleteJobsJobidResponseJobConfigTranscriptionConfigChannelDiarizationLabelsItemRegExp,
38187
38663
  deleteJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesPermittedMarksItemRegExp: () => deleteJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesPermittedMarksItemRegExp,
38188
38664
  deleteJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesSensitivityMax: () => deleteJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesSensitivityMax,
@@ -38198,8 +38674,15 @@ __export(speechmaticsASRRESTAPI_zod_exports, {
38198
38674
  getJobsJobidDataResponse: () => getJobsJobidDataResponse,
38199
38675
  getJobsJobidLogParams: () => getJobsJobidLogParams,
38200
38676
  getJobsJobidLogResponse: () => getJobsJobidLogResponse,
38677
+ getJobsJobidObjectUrlsParams: () => getJobsJobidObjectUrlsParams,
38678
+ getJobsJobidObjectUrlsQueryParams: () => getJobsJobidObjectUrlsQueryParams,
38679
+ getJobsJobidObjectUrlsResponse: () => getJobsJobidObjectUrlsResponse,
38201
38680
  getJobsJobidParams: () => getJobsJobidParams,
38202
38681
  getJobsJobidResponse: () => getJobsJobidResponse,
38682
+ getJobsJobidResponseJobConfigSummarizationConfigContentTypeDefault: () => getJobsJobidResponseJobConfigSummarizationConfigContentTypeDefault,
38683
+ getJobsJobidResponseJobConfigSummarizationConfigSummaryLengthDefault: () => getJobsJobidResponseJobConfigSummarizationConfigSummaryLengthDefault,
38684
+ getJobsJobidResponseJobConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMax: () => getJobsJobidResponseJobConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMax,
38685
+ getJobsJobidResponseJobConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMin: () => getJobsJobidResponseJobConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMin,
38203
38686
  getJobsJobidResponseJobConfigTranscriptionConfigChannelDiarizationLabelsItemRegExp: () => getJobsJobidResponseJobConfigTranscriptionConfigChannelDiarizationLabelsItemRegExp,
38204
38687
  getJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesPermittedMarksItemRegExp: () => getJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesPermittedMarksItemRegExp,
38205
38688
  getJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesSensitivityMax: () => getJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesSensitivityMax,
@@ -38214,6 +38697,8 @@ __export(speechmaticsASRRESTAPI_zod_exports, {
38214
38697
  getJobsJobidTranscriptQueryParams: () => getJobsJobidTranscriptQueryParams,
38215
38698
  getJobsJobidTranscriptResponse: () => getJobsJobidTranscriptResponse,
38216
38699
  getJobsJobidTranscriptResponseJobDurationMin: () => getJobsJobidTranscriptResponseJobDurationMin,
38700
+ getJobsJobidTranscriptResponseMetadataTranscriptionConfigAudioFilteringConfigVolumeThresholdMax: () => getJobsJobidTranscriptResponseMetadataTranscriptionConfigAudioFilteringConfigVolumeThresholdMax,
38701
+ getJobsJobidTranscriptResponseMetadataTranscriptionConfigAudioFilteringConfigVolumeThresholdMin: () => getJobsJobidTranscriptResponseMetadataTranscriptionConfigAudioFilteringConfigVolumeThresholdMin,
38217
38702
  getJobsJobidTranscriptResponseMetadataTranscriptionConfigChannelDiarizationLabelsItemRegExp: () => getJobsJobidTranscriptResponseMetadataTranscriptionConfigChannelDiarizationLabelsItemRegExp,
38218
38703
  getJobsJobidTranscriptResponseMetadataTranscriptionConfigPunctuationOverridesPermittedMarksItemRegExp: () => getJobsJobidTranscriptResponseMetadataTranscriptionConfigPunctuationOverridesPermittedMarksItemRegExp,
38219
38704
  getJobsJobidTranscriptResponseMetadataTranscriptionConfigPunctuationOverridesSensitivityMax: () => getJobsJobidTranscriptResponseMetadataTranscriptionConfigPunctuationOverridesSensitivityMax,
@@ -38225,6 +38710,10 @@ __export(speechmaticsASRRESTAPI_zod_exports, {
38225
38710
  getJobsQueryLimitMax: () => getJobsQueryLimitMax,
38226
38711
  getJobsQueryParams: () => getJobsQueryParams,
38227
38712
  getJobsResponse: () => getJobsResponse,
38713
+ getJobsResponseJobsItemConfigSummarizationConfigContentTypeDefault: () => getJobsResponseJobsItemConfigSummarizationConfigContentTypeDefault,
38714
+ getJobsResponseJobsItemConfigSummarizationConfigSummaryLengthDefault: () => getJobsResponseJobsItemConfigSummarizationConfigSummaryLengthDefault,
38715
+ getJobsResponseJobsItemConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMax: () => getJobsResponseJobsItemConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMax,
38716
+ getJobsResponseJobsItemConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMin: () => getJobsResponseJobsItemConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMin,
38228
38717
  getJobsResponseJobsItemConfigTranscriptionConfigChannelDiarizationLabelsItemRegExp: () => getJobsResponseJobsItemConfigTranscriptionConfigChannelDiarizationLabelsItemRegExp,
38229
38718
  getJobsResponseJobsItemConfigTranscriptionConfigPunctuationOverridesPermittedMarksItemRegExp: () => getJobsResponseJobsItemConfigTranscriptionConfigPunctuationOverridesPermittedMarksItemRegExp,
38230
38719
  getJobsResponseJobsItemConfigTranscriptionConfigPunctuationOverridesSensitivityMax: () => getJobsResponseJobsItemConfigTranscriptionConfigPunctuationOverridesSensitivityMax,
@@ -38235,12 +38724,18 @@ __export(speechmaticsASRRESTAPI_zod_exports, {
38235
38724
  getJobsResponseJobsItemDurationMin: () => getJobsResponseJobsItemDurationMin,
38236
38725
  getUsageQueryParams: () => getUsageQueryParams,
38237
38726
  getUsageResponse: () => getUsageResponse,
38238
- postJobsBody: () => postJobsBody
38727
+ postJobsBody: () => postJobsBody,
38728
+ postJobsHeader: () => postJobsHeader
38239
38729
  });
38240
38730
  var import_zod12 = require("zod");
38731
+ var postJobsHeader = import_zod12.z.object({
38732
+ "X-SM-Processing-Data": import_zod12.z.string().optional().describe(
38733
+ '**Note**: Only available for on-prem\nJSON dictionary of processing settings for the job worker. Currently supports `parallel_engines` (integer), which controls the number of engines the worker can use in parallel for this job, and `user_id` (string), which is the user id for this job. Example: `{"parallel_engines": 4}`'
38734
+ )
38735
+ });
38241
38736
  var postJobsBody = import_zod12.z.object({
38242
38737
  config: import_zod12.z.string().describe(
38243
- "JSON containing a `JobConfig` model indicating the type and parameters for the recognition job."
38738
+ "JSON containing a [`JobConfig`](/speech-to-text/batch/input#jobconfig-schema) model indicating the type and parameters for the recognition job."
38244
38739
  ),
38245
38740
  data_file: import_zod12.z.instanceof(File).optional().describe(
38246
38741
  "The data file to be processed. Alternatively the data file can be fetched from a url specified in `JobConfig`."
@@ -38262,9 +38757,13 @@ var getJobsResponseJobsItemConfigTranscriptionConfigPunctuationOverridesSensitiv
38262
38757
  var getJobsResponseJobsItemConfigTranscriptionConfigPunctuationOverridesSensitivityMax = 1;
38263
38758
  var getJobsResponseJobsItemConfigTranscriptionConfigPunctuationOverridesPermittedMarksItemRegExp = /^(.|all)$/;
38264
38759
  var getJobsResponseJobsItemConfigTranscriptionConfigChannelDiarizationLabelsItemRegExp = /^[A-Za-z0-9._]+$/;
38760
+ var getJobsResponseJobsItemConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMin = 0;
38761
+ var getJobsResponseJobsItemConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMax = 100;
38265
38762
  var getJobsResponseJobsItemConfigTranscriptionConfigSpeakerDiarizationConfigSpeakerSensitivityMin = 0;
38266
38763
  var getJobsResponseJobsItemConfigTranscriptionConfigSpeakerDiarizationConfigSpeakerSensitivityMax = 1;
38267
38764
  var getJobsResponseJobsItemConfigTranslationConfigTargetLanguagesMax = 5;
38765
+ var getJobsResponseJobsItemConfigSummarizationConfigContentTypeDefault = "auto";
38766
+ var getJobsResponseJobsItemConfigSummarizationConfigSummaryLengthDefault = "brief";
38268
38767
  var getJobsResponse = import_zod12.z.object({
38269
38768
  jobs: import_zod12.z.array(
38270
38769
  import_zod12.z.object({
@@ -38344,19 +38843,30 @@ var getJobsResponse = import_zod12.z.object({
38344
38843
  max_delay_mode: import_zod12.z.enum(["fixed", "flexible"]).optional().describe(
38345
38844
  "Whether or not to enable flexible endpointing and allow the entity to continue to be spoken."
38346
38845
  ),
38846
+ audio_filtering_config: import_zod12.z.object({
38847
+ volume_threshold: import_zod12.z.number().min(
38848
+ getJobsResponseJobsItemConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMin
38849
+ ).max(
38850
+ getJobsResponseJobsItemConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMax
38851
+ ).optional().describe(
38852
+ "Controls the lower limit of audio volume at which speech and audio events will be transcribed. If the volume limit is very low, then most sound will be passed to the speech recognition engine. Higher numbers will cut out increasing amounts of sound."
38853
+ )
38854
+ }).optional().describe("Configuration for limiting the transcription of quiet audio."),
38347
38855
  transcript_filtering_config: import_zod12.z.object({
38348
38856
  remove_disfluencies: import_zod12.z.boolean().optional().describe(
38349
- "If true, words that are identified as disfluencies will be removed from the transcript. If false (default), they are tagged in the transcript as 'disfluency'."
38857
+ "If true, words identified as disfluencies (e.g., 'um', 'uh') will be removed from the transcript. If false (default), they are tagged in the transcript as 'disfluency'."
38350
38858
  ),
38351
38859
  replacements: import_zod12.z.array(
38352
38860
  import_zod12.z.object({
38353
- from: import_zod12.z.string(),
38354
- to: import_zod12.z.string()
38861
+ from: import_zod12.z.string().describe("The text or pattern identified to be replaced."),
38862
+ to: import_zod12.z.string().describe(
38863
+ "The corrected or formatted string to appear in the transcript."
38864
+ )
38355
38865
  })
38356
38866
  ).optional().describe(
38357
- "A list of replacements to apply to the transcript. Each replacement is a pair of strings, where the first string is the pattern to be replaced and the second string is the replacement text."
38867
+ 'An array of objects defining custom replacements. Each replacement contains a pair of strings: the text to find ("from:") and the text to replace it with ("to:").'
38358
38868
  )
38359
- }).optional().describe("Configuration for applying filtering to the transcription"),
38869
+ }).optional().describe("Configuration for applying filtering to the transcription."),
38360
38870
  speaker_diarization_config: import_zod12.z.object({
38361
38871
  prefer_current_speaker: import_zod12.z.boolean().optional().describe(
38362
38872
  'If true, the algorithm will prefer to stay with the current active speaker if it is a close enough match, even if other speakers may be closer. This is useful for cases where we can flip incorrectly between similar speakers during a single speaker section."'
@@ -38367,6 +38877,19 @@ var getJobsResponse = import_zod12.z.object({
38367
38877
  getJobsResponseJobsItemConfigTranscriptionConfigSpeakerDiarizationConfigSpeakerSensitivityMax
38368
38878
  ).optional().describe(
38369
38879
  "Controls how sensitive the algorithm is in terms of keeping similar speakers separate, as opposed to combining them into a single speaker. Higher values will typically lead to more speakers, as the degree of difference between speakers in order to allow them to remain distinct will be lower. A lower value for this parameter will conversely guide the algorithm towards being less sensitive in terms of retaining similar speakers, and as such may lead to fewer speakers overall. The default is 0.5."
38880
+ ),
38881
+ get_speakers: import_zod12.z.boolean().optional().describe(
38882
+ "If true, speaker identifiers will be returned at the end of transcript."
38883
+ ),
38884
+ speakers: import_zod12.z.array(
38885
+ import_zod12.z.object({
38886
+ label: import_zod12.z.string().min(1).describe(
38887
+ "Speaker label, which must not match the format used internally (e.g. S1, S2, etc)"
38888
+ ),
38889
+ speaker_identifiers: import_zod12.z.array(import_zod12.z.string().describe("Speaker identifiers.")).min(1)
38890
+ })
38891
+ ).optional().describe(
38892
+ "Use this option to provide speaker labels linked to their speaker identifiers. When passed, the transcription system will tag spoken words in the transcript with the provided speaker labels whenever any of the specified speakers is detected in the audio. A maximum of 50 speakers identifiers across all speakers can be provided."
38370
38893
  )
38371
38894
  }).optional().describe("Configuration for speaker diarization")
38372
38895
  }).optional(),
@@ -38424,10 +38947,14 @@ var getJobsResponse = import_zod12.z.object({
38424
38947
  default_language: import_zod12.z.string().optional()
38425
38948
  }).optional(),
38426
38949
  summarization_config: import_zod12.z.object({
38427
- content_type: import_zod12.z.enum(["auto", "informative", "conversational"]).optional(),
38428
- summary_length: import_zod12.z.enum(["brief", "detailed"]).optional(),
38950
+ content_type: import_zod12.z.enum(["auto", "informative", "conversational"]).default(getJobsResponseJobsItemConfigSummarizationConfigContentTypeDefault).describe(
38951
+ "Choose from three options:\n- `conversational` - Best suited for dialogues involving multiple participants, such as calls, meetings or discussions. It focuses on summarizing key points of the conversation.\n- `informative` - Recommended for more structured information delivered by one or more people, making it ideal for videos, podcasts, lectures, and presentations.\n- `auto` - Automatically selects the most appropriate content type based on an analysis of the transcript.\n"
38952
+ ),
38953
+ summary_length: import_zod12.z.enum(["brief", "detailed"]).default(getJobsResponseJobsItemConfigSummarizationConfigSummaryLengthDefault).describe(
38954
+ "Determines the depth of the summary:\n- `brief` - Provides a succinct summary, condensing the content into just a few sentences.\n- `detailed` - Provide a longer, structured summary. For _conversational_ content, it includes key topics and a summary of the entire conversation. For _informative_ content, it logically divides the audio into sections and provides a summary for each."
38955
+ ),
38429
38956
  summary_type: import_zod12.z.enum(["paragraphs", "bullets"]).optional()
38430
- }).optional(),
38957
+ }).optional().describe("Configuration options for summarization."),
38431
38958
  sentiment_analysis_config: import_zod12.z.object({}).optional(),
38432
38959
  topic_detection_config: import_zod12.z.object({
38433
38960
  topics: import_zod12.z.array(import_zod12.z.string()).optional()
@@ -38449,7 +38976,7 @@ var getJobsResponse = import_zod12.z.object({
38449
38976
  "Optional list of errors that have occurred in user interaction, for example: audio could not be fetched or notification could not be sent."
38450
38977
  )
38451
38978
  }).describe(
38452
- "Document describing a job. JobConfig will be present in JobDetails returned for GET jobs/<id> request in SaaS and in Batch Appliance, but it will not be present in JobDetails returned as item in RetrieveJobsResponse in case of Batch Appliance."
38979
+ "Document describing a job. JobConfig will be present in JobDetails returned for GET jobs/{id} request in SaaS and in Batch Appliance, but it will not be present in JobDetails returned as item in RetrieveJobsResponse in case of Batch Appliance."
38453
38980
  )
38454
38981
  )
38455
38982
  });
@@ -38461,9 +38988,13 @@ var getJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesSensitiv
38461
38988
  var getJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesSensitivityMax = 1;
38462
38989
  var getJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesPermittedMarksItemRegExp = /^(.|all)$/;
38463
38990
  var getJobsJobidResponseJobConfigTranscriptionConfigChannelDiarizationLabelsItemRegExp = /^[A-Za-z0-9._]+$/;
38991
+ var getJobsJobidResponseJobConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMin = 0;
38992
+ var getJobsJobidResponseJobConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMax = 100;
38464
38993
  var getJobsJobidResponseJobConfigTranscriptionConfigSpeakerDiarizationConfigSpeakerSensitivityMin = 0;
38465
38994
  var getJobsJobidResponseJobConfigTranscriptionConfigSpeakerDiarizationConfigSpeakerSensitivityMax = 1;
38466
38995
  var getJobsJobidResponseJobConfigTranslationConfigTargetLanguagesMax = 5;
38996
+ var getJobsJobidResponseJobConfigSummarizationConfigContentTypeDefault = "auto";
38997
+ var getJobsJobidResponseJobConfigSummarizationConfigSummaryLengthDefault = "brief";
38467
38998
  var getJobsJobidResponse = import_zod12.z.object({
38468
38999
  job: import_zod12.z.object({
38469
39000
  created_at: import_zod12.z.string().datetime({}).describe("The UTC date time the job was created."),
@@ -38540,19 +39071,30 @@ var getJobsJobidResponse = import_zod12.z.object({
38540
39071
  max_delay_mode: import_zod12.z.enum(["fixed", "flexible"]).optional().describe(
38541
39072
  "Whether or not to enable flexible endpointing and allow the entity to continue to be spoken."
38542
39073
  ),
39074
+ audio_filtering_config: import_zod12.z.object({
39075
+ volume_threshold: import_zod12.z.number().min(
39076
+ getJobsJobidResponseJobConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMin
39077
+ ).max(
39078
+ getJobsJobidResponseJobConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMax
39079
+ ).optional().describe(
39080
+ "Controls the lower limit of audio volume at which speech and audio events will be transcribed. If the volume limit is very low, then most sound will be passed to the speech recognition engine. Higher numbers will cut out increasing amounts of sound."
39081
+ )
39082
+ }).optional().describe("Configuration for limiting the transcription of quiet audio."),
38543
39083
  transcript_filtering_config: import_zod12.z.object({
38544
39084
  remove_disfluencies: import_zod12.z.boolean().optional().describe(
38545
- "If true, words that are identified as disfluencies will be removed from the transcript. If false (default), they are tagged in the transcript as 'disfluency'."
39085
+ "If true, words identified as disfluencies (e.g., 'um', 'uh') will be removed from the transcript. If false (default), they are tagged in the transcript as 'disfluency'."
38546
39086
  ),
38547
39087
  replacements: import_zod12.z.array(
38548
39088
  import_zod12.z.object({
38549
- from: import_zod12.z.string(),
38550
- to: import_zod12.z.string()
39089
+ from: import_zod12.z.string().describe("The text or pattern identified to be replaced."),
39090
+ to: import_zod12.z.string().describe(
39091
+ "The corrected or formatted string to appear in the transcript."
39092
+ )
38551
39093
  })
38552
39094
  ).optional().describe(
38553
- "A list of replacements to apply to the transcript. Each replacement is a pair of strings, where the first string is the pattern to be replaced and the second string is the replacement text."
39095
+ 'An array of objects defining custom replacements. Each replacement contains a pair of strings: the text to find ("from:") and the text to replace it with ("to:").'
38554
39096
  )
38555
- }).optional().describe("Configuration for applying filtering to the transcription"),
39097
+ }).optional().describe("Configuration for applying filtering to the transcription."),
38556
39098
  speaker_diarization_config: import_zod12.z.object({
38557
39099
  prefer_current_speaker: import_zod12.z.boolean().optional().describe(
38558
39100
  'If true, the algorithm will prefer to stay with the current active speaker if it is a close enough match, even if other speakers may be closer. This is useful for cases where we can flip incorrectly between similar speakers during a single speaker section."'
@@ -38563,6 +39105,19 @@ var getJobsJobidResponse = import_zod12.z.object({
38563
39105
  getJobsJobidResponseJobConfigTranscriptionConfigSpeakerDiarizationConfigSpeakerSensitivityMax
38564
39106
  ).optional().describe(
38565
39107
  "Controls how sensitive the algorithm is in terms of keeping similar speakers separate, as opposed to combining them into a single speaker. Higher values will typically lead to more speakers, as the degree of difference between speakers in order to allow them to remain distinct will be lower. A lower value for this parameter will conversely guide the algorithm towards being less sensitive in terms of retaining similar speakers, and as such may lead to fewer speakers overall. The default is 0.5."
39108
+ ),
39109
+ get_speakers: import_zod12.z.boolean().optional().describe(
39110
+ "If true, speaker identifiers will be returned at the end of transcript."
39111
+ ),
39112
+ speakers: import_zod12.z.array(
39113
+ import_zod12.z.object({
39114
+ label: import_zod12.z.string().min(1).describe(
39115
+ "Speaker label, which must not match the format used internally (e.g. S1, S2, etc)"
39116
+ ),
39117
+ speaker_identifiers: import_zod12.z.array(import_zod12.z.string().describe("Speaker identifiers.")).min(1)
39118
+ })
39119
+ ).optional().describe(
39120
+ "Use this option to provide speaker labels linked to their speaker identifiers. When passed, the transcription system will tag spoken words in the transcript with the provided speaker labels whenever any of the specified speakers is detected in the audio. A maximum of 50 speakers identifiers across all speakers can be provided."
38566
39121
  )
38567
39122
  }).optional().describe("Configuration for speaker diarization")
38568
39123
  }).optional(),
@@ -38618,10 +39173,14 @@ var getJobsJobidResponse = import_zod12.z.object({
38618
39173
  default_language: import_zod12.z.string().optional()
38619
39174
  }).optional(),
38620
39175
  summarization_config: import_zod12.z.object({
38621
- content_type: import_zod12.z.enum(["auto", "informative", "conversational"]).optional(),
38622
- summary_length: import_zod12.z.enum(["brief", "detailed"]).optional(),
39176
+ content_type: import_zod12.z.enum(["auto", "informative", "conversational"]).default(getJobsJobidResponseJobConfigSummarizationConfigContentTypeDefault).describe(
39177
+ "Choose from three options:\n- `conversational` - Best suited for dialogues involving multiple participants, such as calls, meetings or discussions. It focuses on summarizing key points of the conversation.\n- `informative` - Recommended for more structured information delivered by one or more people, making it ideal for videos, podcasts, lectures, and presentations.\n- `auto` - Automatically selects the most appropriate content type based on an analysis of the transcript.\n"
39178
+ ),
39179
+ summary_length: import_zod12.z.enum(["brief", "detailed"]).default(getJobsJobidResponseJobConfigSummarizationConfigSummaryLengthDefault).describe(
39180
+ "Determines the depth of the summary:\n- `brief` - Provides a succinct summary, condensing the content into just a few sentences.\n- `detailed` - Provide a longer, structured summary. For _conversational_ content, it includes key topics and a summary of the entire conversation. For _informative_ content, it logically divides the audio into sections and provides a summary for each."
39181
+ ),
38623
39182
  summary_type: import_zod12.z.enum(["paragraphs", "bullets"]).optional()
38624
- }).optional(),
39183
+ }).optional().describe("Configuration options for summarization."),
38625
39184
  sentiment_analysis_config: import_zod12.z.object({}).optional(),
38626
39185
  topic_detection_config: import_zod12.z.object({
38627
39186
  topics: import_zod12.z.array(import_zod12.z.string()).optional()
@@ -38643,7 +39202,7 @@ var getJobsJobidResponse = import_zod12.z.object({
38643
39202
  "Optional list of errors that have occurred in user interaction, for example: audio could not be fetched or notification could not be sent."
38644
39203
  )
38645
39204
  }).describe(
38646
- "Document describing a job. JobConfig will be present in JobDetails returned for GET jobs/<id> request in SaaS and in Batch Appliance, but it will not be present in JobDetails returned as item in RetrieveJobsResponse in case of Batch Appliance."
39205
+ "Document describing a job. JobConfig will be present in JobDetails returned for GET jobs/{id} request in SaaS and in Batch Appliance, but it will not be present in JobDetails returned as item in RetrieveJobsResponse in case of Batch Appliance."
38647
39206
  )
38648
39207
  });
38649
39208
  var deleteJobsJobidParams = import_zod12.z.object({
@@ -38659,9 +39218,13 @@ var deleteJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesSensi
38659
39218
  var deleteJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesSensitivityMax = 1;
38660
39219
  var deleteJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesPermittedMarksItemRegExp = /^(.|all)$/;
38661
39220
  var deleteJobsJobidResponseJobConfigTranscriptionConfigChannelDiarizationLabelsItemRegExp = /^[A-Za-z0-9._]+$/;
39221
+ var deleteJobsJobidResponseJobConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMin = 0;
39222
+ var deleteJobsJobidResponseJobConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMax = 100;
38662
39223
  var deleteJobsJobidResponseJobConfigTranscriptionConfigSpeakerDiarizationConfigSpeakerSensitivityMin = 0;
38663
39224
  var deleteJobsJobidResponseJobConfigTranscriptionConfigSpeakerDiarizationConfigSpeakerSensitivityMax = 1;
38664
39225
  var deleteJobsJobidResponseJobConfigTranslationConfigTargetLanguagesMax = 5;
39226
+ var deleteJobsJobidResponseJobConfigSummarizationConfigContentTypeDefault = "auto";
39227
+ var deleteJobsJobidResponseJobConfigSummarizationConfigSummaryLengthDefault = "brief";
38665
39228
  var deleteJobsJobidResponse = import_zod12.z.object({
38666
39229
  job: import_zod12.z.object({
38667
39230
  created_at: import_zod12.z.string().datetime({}).describe("The UTC date time the job was created."),
@@ -38738,19 +39301,30 @@ var deleteJobsJobidResponse = import_zod12.z.object({
38738
39301
  max_delay_mode: import_zod12.z.enum(["fixed", "flexible"]).optional().describe(
38739
39302
  "Whether or not to enable flexible endpointing and allow the entity to continue to be spoken."
38740
39303
  ),
39304
+ audio_filtering_config: import_zod12.z.object({
39305
+ volume_threshold: import_zod12.z.number().min(
39306
+ deleteJobsJobidResponseJobConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMin
39307
+ ).max(
39308
+ deleteJobsJobidResponseJobConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMax
39309
+ ).optional().describe(
39310
+ "Controls the lower limit of audio volume at which speech and audio events will be transcribed. If the volume limit is very low, then most sound will be passed to the speech recognition engine. Higher numbers will cut out increasing amounts of sound."
39311
+ )
39312
+ }).optional().describe("Configuration for limiting the transcription of quiet audio."),
38741
39313
  transcript_filtering_config: import_zod12.z.object({
38742
39314
  remove_disfluencies: import_zod12.z.boolean().optional().describe(
38743
- "If true, words that are identified as disfluencies will be removed from the transcript. If false (default), they are tagged in the transcript as 'disfluency'."
39315
+ "If true, words identified as disfluencies (e.g., 'um', 'uh') will be removed from the transcript. If false (default), they are tagged in the transcript as 'disfluency'."
38744
39316
  ),
38745
39317
  replacements: import_zod12.z.array(
38746
39318
  import_zod12.z.object({
38747
- from: import_zod12.z.string(),
38748
- to: import_zod12.z.string()
39319
+ from: import_zod12.z.string().describe("The text or pattern identified to be replaced."),
39320
+ to: import_zod12.z.string().describe(
39321
+ "The corrected or formatted string to appear in the transcript."
39322
+ )
38749
39323
  })
38750
39324
  ).optional().describe(
38751
- "A list of replacements to apply to the transcript. Each replacement is a pair of strings, where the first string is the pattern to be replaced and the second string is the replacement text."
39325
+ 'An array of objects defining custom replacements. Each replacement contains a pair of strings: the text to find ("from:") and the text to replace it with ("to:").'
38752
39326
  )
38753
- }).optional().describe("Configuration for applying filtering to the transcription"),
39327
+ }).optional().describe("Configuration for applying filtering to the transcription."),
38754
39328
  speaker_diarization_config: import_zod12.z.object({
38755
39329
  prefer_current_speaker: import_zod12.z.boolean().optional().describe(
38756
39330
  'If true, the algorithm will prefer to stay with the current active speaker if it is a close enough match, even if other speakers may be closer. This is useful for cases where we can flip incorrectly between similar speakers during a single speaker section."'
@@ -38761,6 +39335,19 @@ var deleteJobsJobidResponse = import_zod12.z.object({
38761
39335
  deleteJobsJobidResponseJobConfigTranscriptionConfigSpeakerDiarizationConfigSpeakerSensitivityMax
38762
39336
  ).optional().describe(
38763
39337
  "Controls how sensitive the algorithm is in terms of keeping similar speakers separate, as opposed to combining them into a single speaker. Higher values will typically lead to more speakers, as the degree of difference between speakers in order to allow them to remain distinct will be lower. A lower value for this parameter will conversely guide the algorithm towards being less sensitive in terms of retaining similar speakers, and as such may lead to fewer speakers overall. The default is 0.5."
39338
+ ),
39339
+ get_speakers: import_zod12.z.boolean().optional().describe(
39340
+ "If true, speaker identifiers will be returned at the end of transcript."
39341
+ ),
39342
+ speakers: import_zod12.z.array(
39343
+ import_zod12.z.object({
39344
+ label: import_zod12.z.string().min(1).describe(
39345
+ "Speaker label, which must not match the format used internally (e.g. S1, S2, etc)"
39346
+ ),
39347
+ speaker_identifiers: import_zod12.z.array(import_zod12.z.string().describe("Speaker identifiers.")).min(1)
39348
+ })
39349
+ ).optional().describe(
39350
+ "Use this option to provide speaker labels linked to their speaker identifiers. When passed, the transcription system will tag spoken words in the transcript with the provided speaker labels whenever any of the specified speakers is detected in the audio. A maximum of 50 speakers identifiers across all speakers can be provided."
38764
39351
  )
38765
39352
  }).optional().describe("Configuration for speaker diarization")
38766
39353
  }).optional(),
@@ -38816,10 +39403,14 @@ var deleteJobsJobidResponse = import_zod12.z.object({
38816
39403
  default_language: import_zod12.z.string().optional()
38817
39404
  }).optional(),
38818
39405
  summarization_config: import_zod12.z.object({
38819
- content_type: import_zod12.z.enum(["auto", "informative", "conversational"]).optional(),
38820
- summary_length: import_zod12.z.enum(["brief", "detailed"]).optional(),
39406
+ content_type: import_zod12.z.enum(["auto", "informative", "conversational"]).default(deleteJobsJobidResponseJobConfigSummarizationConfigContentTypeDefault).describe(
39407
+ "Choose from three options:\n- `conversational` - Best suited for dialogues involving multiple participants, such as calls, meetings or discussions. It focuses on summarizing key points of the conversation.\n- `informative` - Recommended for more structured information delivered by one or more people, making it ideal for videos, podcasts, lectures, and presentations.\n- `auto` - Automatically selects the most appropriate content type based on an analysis of the transcript.\n"
39408
+ ),
39409
+ summary_length: import_zod12.z.enum(["brief", "detailed"]).default(deleteJobsJobidResponseJobConfigSummarizationConfigSummaryLengthDefault).describe(
39410
+ "Determines the depth of the summary:\n- `brief` - Provides a succinct summary, condensing the content into just a few sentences.\n- `detailed` - Provide a longer, structured summary. For _conversational_ content, it includes key topics and a summary of the entire conversation. For _informative_ content, it logically divides the audio into sections and provides a summary for each."
39411
+ ),
38821
39412
  summary_type: import_zod12.z.enum(["paragraphs", "bullets"]).optional()
38822
- }).optional(),
39413
+ }).optional().describe("Configuration options for summarization."),
38823
39414
  sentiment_analysis_config: import_zod12.z.object({}).optional(),
38824
39415
  topic_detection_config: import_zod12.z.object({
38825
39416
  topics: import_zod12.z.array(import_zod12.z.string()).optional()
@@ -38841,7 +39432,7 @@ var deleteJobsJobidResponse = import_zod12.z.object({
38841
39432
  "Optional list of errors that have occurred in user interaction, for example: audio could not be fetched or notification could not be sent."
38842
39433
  )
38843
39434
  }).describe(
38844
- "Document describing a job. JobConfig will be present in JobDetails returned for GET jobs/<id> request in SaaS and in Batch Appliance, but it will not be present in JobDetails returned as item in RetrieveJobsResponse in case of Batch Appliance."
39435
+ "Document describing a job. JobConfig will be present in JobDetails returned for GET jobs/{id} request in SaaS and in Batch Appliance, but it will not be present in JobDetails returned as item in RetrieveJobsResponse in case of Batch Appliance."
38845
39436
  )
38846
39437
  });
38847
39438
  var getJobsJobidDataParams = import_zod12.z.object({
@@ -38863,6 +39454,8 @@ var getJobsJobidTranscriptResponseMetadataTranscriptionConfigPunctuationOverride
38863
39454
  var getJobsJobidTranscriptResponseMetadataTranscriptionConfigPunctuationOverridesSensitivityMax = 1;
38864
39455
  var getJobsJobidTranscriptResponseMetadataTranscriptionConfigPunctuationOverridesPermittedMarksItemRegExp = /^(.|all)$/;
38865
39456
  var getJobsJobidTranscriptResponseMetadataTranscriptionConfigChannelDiarizationLabelsItemRegExp = /^[A-Za-z0-9._]+$/;
39457
+ var getJobsJobidTranscriptResponseMetadataTranscriptionConfigAudioFilteringConfigVolumeThresholdMin = 0;
39458
+ var getJobsJobidTranscriptResponseMetadataTranscriptionConfigAudioFilteringConfigVolumeThresholdMax = 100;
38866
39459
  var getJobsJobidTranscriptResponseMetadataTranscriptionConfigSpeakerDiarizationConfigSpeakerSensitivityMin = 0;
38867
39460
  var getJobsJobidTranscriptResponseMetadataTranscriptionConfigSpeakerDiarizationConfigSpeakerSensitivityMax = 1;
38868
39461
  var getJobsJobidTranscriptResponseResultsItemVolumeMin = 0;
@@ -38934,19 +39527,28 @@ var getJobsJobidTranscriptResponse = import_zod12.z.object({
38934
39527
  max_delay_mode: import_zod12.z.enum(["fixed", "flexible"]).optional().describe(
38935
39528
  "Whether or not to enable flexible endpointing and allow the entity to continue to be spoken."
38936
39529
  ),
39530
+ audio_filtering_config: import_zod12.z.object({
39531
+ volume_threshold: import_zod12.z.number().min(
39532
+ getJobsJobidTranscriptResponseMetadataTranscriptionConfigAudioFilteringConfigVolumeThresholdMin
39533
+ ).max(
39534
+ getJobsJobidTranscriptResponseMetadataTranscriptionConfigAudioFilteringConfigVolumeThresholdMax
39535
+ ).optional().describe(
39536
+ "Controls the lower limit of audio volume at which speech and audio events will be transcribed. If the volume limit is very low, then most sound will be passed to the speech recognition engine. Higher numbers will cut out increasing amounts of sound."
39537
+ )
39538
+ }).optional().describe("Configuration for limiting the transcription of quiet audio."),
38937
39539
  transcript_filtering_config: import_zod12.z.object({
38938
39540
  remove_disfluencies: import_zod12.z.boolean().optional().describe(
38939
- "If true, words that are identified as disfluencies will be removed from the transcript. If false (default), they are tagged in the transcript as 'disfluency'."
39541
+ "If true, words identified as disfluencies (e.g., 'um', 'uh') will be removed from the transcript. If false (default), they are tagged in the transcript as 'disfluency'."
38940
39542
  ),
38941
39543
  replacements: import_zod12.z.array(
38942
39544
  import_zod12.z.object({
38943
- from: import_zod12.z.string(),
38944
- to: import_zod12.z.string()
39545
+ from: import_zod12.z.string().describe("The text or pattern identified to be replaced."),
39546
+ to: import_zod12.z.string().describe("The corrected or formatted string to appear in the transcript.")
38945
39547
  })
38946
39548
  ).optional().describe(
38947
- "A list of replacements to apply to the transcript. Each replacement is a pair of strings, where the first string is the pattern to be replaced and the second string is the replacement text."
39549
+ 'An array of objects defining custom replacements. Each replacement contains a pair of strings: the text to find ("from:") and the text to replace it with ("to:").'
38948
39550
  )
38949
- }).optional().describe("Configuration for applying filtering to the transcription"),
39551
+ }).optional().describe("Configuration for applying filtering to the transcription."),
38950
39552
  speaker_diarization_config: import_zod12.z.object({
38951
39553
  prefer_current_speaker: import_zod12.z.boolean().optional().describe(
38952
39554
  'If true, the algorithm will prefer to stay with the current active speaker if it is a close enough match, even if other speakers may be closer. This is useful for cases where we can flip incorrectly between similar speakers during a single speaker section."'
@@ -38957,9 +39559,23 @@ var getJobsJobidTranscriptResponse = import_zod12.z.object({
38957
39559
  getJobsJobidTranscriptResponseMetadataTranscriptionConfigSpeakerDiarizationConfigSpeakerSensitivityMax
38958
39560
  ).optional().describe(
38959
39561
  "Controls how sensitive the algorithm is in terms of keeping similar speakers separate, as opposed to combining them into a single speaker. Higher values will typically lead to more speakers, as the degree of difference between speakers in order to allow them to remain distinct will be lower. A lower value for this parameter will conversely guide the algorithm towards being less sensitive in terms of retaining similar speakers, and as such may lead to fewer speakers overall. The default is 0.5."
39562
+ ),
39563
+ get_speakers: import_zod12.z.boolean().optional().describe(
39564
+ "If true, speaker identifiers will be returned at the end of transcript."
39565
+ ),
39566
+ speakers: import_zod12.z.array(
39567
+ import_zod12.z.object({
39568
+ label: import_zod12.z.string().min(1).describe(
39569
+ "Speaker label, which must not match the format used internally (e.g. S1, S2, etc)"
39570
+ ),
39571
+ speaker_identifiers: import_zod12.z.array(import_zod12.z.string().describe("Speaker identifiers.")).min(1)
39572
+ })
39573
+ ).optional().describe(
39574
+ "Use this option to provide speaker labels linked to their speaker identifiers. When passed, the transcription system will tag spoken words in the transcript with the provided speaker labels whenever any of the specified speakers is detected in the audio. A maximum of 50 speakers identifiers across all speakers can be provided."
38960
39575
  )
38961
39576
  }).optional().describe("Configuration for speaker diarization")
38962
39577
  }).optional(),
39578
+ orchestrator_version: import_zod12.z.string().optional().describe("The engine version used to generate transcription output."),
38963
39579
  translation_errors: import_zod12.z.array(
38964
39580
  import_zod12.z.object({
38965
39581
  type: import_zod12.z.enum(["translation_failed", "unsupported_translation_pair"]).optional(),
@@ -39037,10 +39653,7 @@ var getJobsJobidTranscriptResponse = import_zod12.z.object({
39037
39653
  "OTHER"
39038
39654
  ]).optional(),
39039
39655
  message: import_zod12.z.string().optional()
39040
- }).optional(),
39041
- orchestrator_version: import_zod12.z.string().optional().describe(
39042
- "Orchestrator version in PEP 440 Format or set to 'version_not_found' as default."
39043
- )
39656
+ }).optional()
39044
39657
  }).describe(
39045
39658
  "Summary information about the output from an ASR job, comprising the job type and configuration parameters used when generating the output."
39046
39659
  ),
@@ -39123,6 +39736,12 @@ var getJobsJobidTranscriptResponse = import_zod12.z.object({
39123
39736
  "An ASR job output item. The primary item types are `word` and `punctuation`. Other item types may be present, for example to provide semantic information of different forms."
39124
39737
  )
39125
39738
  ),
39739
+ speakers: import_zod12.z.array(
39740
+ import_zod12.z.object({
39741
+ label: import_zod12.z.string().min(1).describe("Speaker label."),
39742
+ speaker_identifiers: import_zod12.z.array(import_zod12.z.string().describe("Speaker identifiers.")).min(1)
39743
+ })
39744
+ ).optional().describe("List of unique speaker identifiers detected in the transcript."),
39126
39745
  translations: import_zod12.z.record(
39127
39746
  import_zod12.z.string(),
39128
39747
  import_zod12.z.array(
@@ -39144,13 +39763,23 @@ var getJobsJobidTranscriptResponse = import_zod12.z.object({
39144
39763
  sentiment_analysis: import_zod12.z.object({
39145
39764
  segments: import_zod12.z.array(
39146
39765
  import_zod12.z.object({
39147
- text: import_zod12.z.string().optional(),
39148
- start_time: import_zod12.z.number().optional(),
39149
- end_time: import_zod12.z.number().optional(),
39150
- sentiment: import_zod12.z.string().optional(),
39151
- speaker: import_zod12.z.string().optional(),
39152
- channel: import_zod12.z.string().optional(),
39153
- confidence: import_zod12.z.number().optional()
39766
+ text: import_zod12.z.string().optional().describe("Represents the transcript of the analysed segment"),
39767
+ sentiment: import_zod12.z.string().optional().describe(
39768
+ "The assigned sentiment to the segment, which can be positive, neutral or negative"
39769
+ ),
39770
+ start_time: import_zod12.z.number().optional().describe(
39771
+ "The timestamp corresponding to the beginning of the transcription segment"
39772
+ ),
39773
+ end_time: import_zod12.z.number().optional().describe(
39774
+ "The timestamp corresponding to the end of the transcription segment"
39775
+ ),
39776
+ speaker: import_zod12.z.string().optional().describe(
39777
+ "The speaker label for the segment, if speaker diarization is enabled"
39778
+ ),
39779
+ channel: import_zod12.z.string().optional().describe(
39780
+ "The channel label for the segment, if channel diarization is enabled"
39781
+ ),
39782
+ confidence: import_zod12.z.number().optional().describe("A confidence score in the range of 0-1")
39154
39783
  }).describe("Represents a segment of text and its associated sentiment.")
39155
39784
  ).optional().describe(
39156
39785
  "An array of objects that represent a segment of text and its associated sentiment."
@@ -39209,10 +39838,10 @@ var getJobsJobidTranscriptResponse = import_zod12.z.object({
39209
39838
  }).optional().describe("Main object that holds topic detection results."),
39210
39839
  chapters: import_zod12.z.array(
39211
39840
  import_zod12.z.object({
39212
- title: import_zod12.z.string().optional(),
39213
- summary: import_zod12.z.string().optional(),
39214
- start_time: import_zod12.z.number().optional(),
39215
- end_time: import_zod12.z.number().optional()
39841
+ title: import_zod12.z.string().optional().describe("The auto-generated title for the chapter"),
39842
+ summary: import_zod12.z.string().optional().describe("An auto-generated paragraph-style, short summary of the chapter"),
39843
+ start_time: import_zod12.z.number().optional().describe("The start time of the chapter in the audio file"),
39844
+ end_time: import_zod12.z.number().optional().describe("The end time of the chapter in the audio file")
39216
39845
  })
39217
39846
  ).optional().describe("An array of objects that represent summarized chapters of the transcript"),
39218
39847
  audio_events: import_zod12.z.array(
@@ -39257,6 +39886,18 @@ var getJobsJobidLogParams = import_zod12.z.object({
39257
39886
  jobid: import_zod12.z.string().describe("ID of the job.")
39258
39887
  });
39259
39888
  var getJobsJobidLogResponse = import_zod12.z.instanceof(File);
39889
+ var getJobsJobidObjectUrlsParams = import_zod12.z.object({
39890
+ jobid: import_zod12.z.string().describe("ID of the job.")
39891
+ });
39892
+ var getJobsJobidObjectUrlsQueryParams = import_zod12.z.object({
39893
+ ttl: import_zod12.z.number().describe("Time to live in seconds for the signed URLs"),
39894
+ url_for: import_zod12.z.array(import_zod12.z.enum(["data", "audio_mp3", "transcript"]))
39895
+ });
39896
+ var getJobsJobidObjectUrlsResponse = import_zod12.z.object({
39897
+ data: import_zod12.z.string().optional(),
39898
+ audio_mp3: import_zod12.z.string().optional(),
39899
+ transcript: import_zod12.z.string().optional()
39900
+ });
39260
39901
  var getUsageQueryParams = import_zod12.z.object({
39261
39902
  since: import_zod12.z.string().date().optional().describe(
39262
39903
  "Include usage after the given date (inclusive). This is a [ISO-8601](https://en.wikipedia.org/wiki/ISO_8601) calendar date format: `YYYY-MM-DD`."
@@ -39390,7 +40031,7 @@ var speechToTextBodyKeytermsDefault = [];
39390
40031
  var speechToTextBody = import_zod13.z.object({
39391
40032
  model_id: import_zod13.z.enum(["scribe_v1", "scribe_v2"]).describe("The ID of the model to use for transcription."),
39392
40033
  file: import_zod13.z.instanceof(File).or(import_zod13.z.null()).optional().describe(
39393
- "The file to transcribe (100ms minimum audio length). All major audio and video formats are supported. Exactly one of the file or cloud_storage_url parameters must be provided. The file size must be less than 3.0GB."
40034
+ "The file to transcribe (100ms minimum audio length). All major audio and video formats are supported. Exactly one of the file or cloud_storage_url parameters must be provided. The file size must be less than 5.0GB."
39394
40035
  ),
39395
40036
  language_code: import_zod13.z.string().or(import_zod13.z.null()).optional().describe(
39396
40037
  "An ISO-639-1 or ISO-639-3 language_code corresponding to the language of the audio file. Can sometimes improve transcription performance if known beforehand. Defaults to null, in this case the language is predicted automatically."
@@ -39468,7 +40109,7 @@ var speechToTextBody = import_zod13.z.object({
39468
40109
  "The format of input audio. Options are 'pcm_s16le_16' or 'other' For `pcm_s16le_16`, the input audio must be 16-bit PCM at a 16kHz sample rate, single channel (mono), and little-endian byte order. Latency will be lower than with passing an encoded waveform."
39469
40110
  ),
39470
40111
  cloud_storage_url: import_zod13.z.string().or(import_zod13.z.null()).optional().describe(
39471
- "The HTTPS URL of the file to transcribe. Exactly one of the file or cloud_storage_url parameters must be provided. The file must be accessible via HTTPS and the file size must be less than 2GB. Any valid HTTPS URL is accepted, including URLs from cloud storage providers (AWS S3, Google Cloud Storage, Cloudflare R2, etc.), CDNs, or any other HTTPS source. URLs can be pre-signed or include authentication tokens in query parameters."
40112
+ "[Deprecated] This parameter is deprecated and will be removed in the future. Use 'source_url' instead.The HTTPS URL of the file to transcribe. Exactly one of the file or cloud_storage_url parameters must be provided. The file must be accessible via HTTPS and the file size must be less than 2GB. Any valid HTTPS URL is accepted, including URLs from cloud storage providers (AWS S3, Google Cloud Storage, Cloudflare R2, etc.), CDNs, or any other HTTPS source. URLs can be pre-signed or include authentication tokens in query parameters."
39472
40113
  ),
39473
40114
  source_url: import_zod13.z.string().or(import_zod13.z.null()).optional().describe(
39474
40115
  "The URL of an audio or video file to transcribe. Supports hosted video or audio files, YouTube video URLs, TikTok video URLs, and other video hosting services."
@@ -39507,7 +40148,7 @@ var speechToTextBody = import_zod13.z.object({
39507
40148
  "How to format redacted entities. 'redacted' replaces with {REDACTED}, 'entity_type' replaces with {ENTITY_TYPE}, 'enumerated_entity_type' replaces with {ENTITY_TYPE_N} where N enumerates each occurrence. Only used when entity_redaction is set."
39508
40149
  ),
39509
40150
  keyterms: import_zod13.z.array(import_zod13.z.string()).default(speechToTextBodyKeytermsDefault).describe(
39510
- 'A list of keyterms to bias the transcription towards. The keyterms are words or phrases you want the model to recognise more accurately. The number of keyterms cannot exceed 1000. The length of each keyterm must be less than 50 characters. Keyterms can contain at most 5 words (after normalisation). For example ["hello", "world", "technical term"]. Usage of this parameter will incur an additional 20% surcharge on the base transcription cost. When more than 100 keyterms are provided, a minimum billable duration of 20 seconds applies per request.'
40151
+ 'A list of keyterms to bias the transcription towards. The keyterms are words or phrases you want the model to recognise more accurately. The number of keyterms cannot exceed 1000. The length of each keyterm must be less than 50 characters. Keyterms can contain at most 5 words (after normalisation). For example ["hello", "world", "technical term"]. The following characters are not supported: `<`, `>`, `{`, `}`, `[`, `]`, `\\`. Usage of this parameter will incur an additional 20% surcharge on the base transcription cost. When more than 100 keyterms are provided, a minimum billable duration of 20 seconds applies per request.'
39511
40152
  )
39512
40153
  });
39513
40154
  var speechToTextResponse = import_zod13.z.object({
@@ -39873,6 +40514,7 @@ var deleteTranscriptByIdResponse = import_zod13.z.any();
39873
40514
  SonioxModels,
39874
40515
  SonioxRealtimeModel,
39875
40516
  SonioxRegion,
40517
+ SonioxSDK,
39876
40518
  SonioxStreamingSchema,
39877
40519
  SonioxStreamingTypes,
39878
40520
  SonioxStreamingUpdateSchema,