voice-router-dev 0.9.3 → 0.9.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -145,6 +145,7 @@ __export(src_exports, {
145
145
  SonioxModels: () => SonioxModels,
146
146
  SonioxRealtimeModel: () => SonioxRealtimeModel,
147
147
  SonioxRegion: () => SonioxRegion,
148
+ SonioxSDK: () => sdk_types_exports,
148
149
  SonioxStreamingSchema: () => SonioxStreamingSchema,
149
150
  SonioxStreamingTypes: () => streaming_types_zod_exports,
150
151
  SonioxStreamingUpdateSchema: () => SonioxStreamingUpdateSchema,
@@ -1333,7 +1334,6 @@ var AzureLocales = [
1333
1334
  { code: "ar-YE", name: "Arabic (Yemen)" },
1334
1335
  { code: "as-IN", name: "Assamese (India)" },
1335
1336
  { code: "az-AZ", name: "Azerbaijani (Azerbaijan)" },
1336
- { code: "be-BY", name: "Belarusian (Belarus)" },
1337
1337
  { code: "bg-BG", name: "Bulgarian (Bulgaria)" },
1338
1338
  { code: "bn-BD", name: "Bengali (Bangladesh)" },
1339
1339
  { code: "bn-IN", name: "Bengali (India)" },
@@ -1414,7 +1414,6 @@ var AzureLocales = [
1414
1414
  { code: "lo-LA", name: "Lao (Latin)" },
1415
1415
  { code: "lt-LT", name: "Lithuanian (Lithuania)" },
1416
1416
  { code: "lv-LV", name: "Latvian (Latvia)" },
1417
- { code: "mi-NZ", name: "Maori (New Zealand)" },
1418
1417
  { code: "mk-MK", name: "Macedonian (North Macedonia)" },
1419
1418
  { code: "ml-IN", name: "Malayalam (India)" },
1420
1419
  { code: "mn-MN", name: "Mongolian (Mongolia)" },
@@ -1490,7 +1489,6 @@ var AzureLocaleCodes = [
1490
1489
  "ar-YE",
1491
1490
  "as-IN",
1492
1491
  "az-AZ",
1493
- "be-BY",
1494
1492
  "bg-BG",
1495
1493
  "bn-BD",
1496
1494
  "bn-IN",
@@ -1571,7 +1569,6 @@ var AzureLocaleCodes = [
1571
1569
  "lo-LA",
1572
1570
  "lt-LT",
1573
1571
  "lv-LV",
1574
- "mi-NZ",
1575
1572
  "mk-MK",
1576
1573
  "ml-IN",
1577
1574
  "mn-MN",
@@ -1647,7 +1644,6 @@ var AzureLocaleLabels = {
1647
1644
  "ar-YE": "Arabic (Yemen)",
1648
1645
  "as-IN": "Assamese (India)",
1649
1646
  "az-AZ": "Azerbaijani (Azerbaijan)",
1650
- "be-BY": "Belarusian (Belarus)",
1651
1647
  "bg-BG": "Bulgarian (Bulgaria)",
1652
1648
  "bn-BD": "Bengali (Bangladesh)",
1653
1649
  "bn-IN": "Bengali (India)",
@@ -1728,7 +1724,6 @@ var AzureLocaleLabels = {
1728
1724
  "lo-LA": "Lao (Latin)",
1729
1725
  "lt-LT": "Lithuanian (Lithuania)",
1730
1726
  "lv-LV": "Latvian (Latvia)",
1731
- "mi-NZ": "Maori (New Zealand)",
1732
1727
  "mk-MK": "Macedonian (North Macedonia)",
1733
1728
  "ml-IN": "Malayalam (India)",
1734
1729
  "mn-MN": "Mongolian (Mongolia)",
@@ -1804,7 +1799,6 @@ var AzureLocale = {
1804
1799
  "ar-YE": "ar-YE",
1805
1800
  "as-IN": "as-IN",
1806
1801
  "az-AZ": "az-AZ",
1807
- "be-BY": "be-BY",
1808
1802
  "bg-BG": "bg-BG",
1809
1803
  "bn-BD": "bn-BD",
1810
1804
  "bn-IN": "bn-IN",
@@ -1885,7 +1879,6 @@ var AzureLocale = {
1885
1879
  "lo-LA": "lo-LA",
1886
1880
  "lt-LT": "lt-LT",
1887
1881
  "lv-LV": "lv-LV",
1888
- "mi-NZ": "mi-NZ",
1889
1882
  "mk-MK": "mk-MK",
1890
1883
  "ml-IN": "ml-IN",
1891
1884
  "mn-MN": "mn-MN",
@@ -1976,8 +1969,6 @@ var ElevenLabsLanguages = [
1976
1969
  { code: "hr", name: "Croatian" },
1977
1970
  { code: "bg", name: "Bulgarian" },
1978
1971
  { code: "lt", name: "Lithuanian" },
1979
- { code: "la", name: "Latin" },
1980
- { code: "mi", name: "Maori" },
1981
1972
  { code: "ml", name: "Malayalam" },
1982
1973
  { code: "cy", name: "Welsh" },
1983
1974
  { code: "sk", name: "Slovak" },
@@ -1991,20 +1982,16 @@ var ElevenLabsLanguages = [
1991
1982
  { code: "kn", name: "Kannada" },
1992
1983
  { code: "et", name: "Estonian" },
1993
1984
  { code: "mk", name: "Macedonian" },
1994
- { code: "br", name: "Breton" },
1995
- { code: "eu", name: "Basque" },
1996
1985
  { code: "is", name: "Icelandic" },
1997
1986
  { code: "hy", name: "Armenian" },
1998
1987
  { code: "ne", name: "Nepali" },
1999
1988
  { code: "mn", name: "Mongolian" },
2000
1989
  { code: "bs", name: "Bosnian" },
2001
1990
  { code: "kk", name: "Kazakh" },
2002
- { code: "sq", name: "Albanian" },
2003
1991
  { code: "sw", name: "Swahili" },
2004
1992
  { code: "gl", name: "Galician" },
2005
1993
  { code: "mr", name: "Marathi" },
2006
1994
  { code: "pa", name: "Punjabi" },
2007
- { code: "si", name: "Sinhala" },
2008
1995
  { code: "km", name: "Khmer" },
2009
1996
  { code: "sn", name: "Shona" },
2010
1997
  { code: "yo", name: "Yoruba" },
@@ -2017,29 +2004,16 @@ var ElevenLabsLanguages = [
2017
2004
  { code: "sd", name: "Sindhi" },
2018
2005
  { code: "gu", name: "Gujarati" },
2019
2006
  { code: "am", name: "Amharic" },
2020
- { code: "yi", name: "Yiddish" },
2021
2007
  { code: "lo", name: "Lao" },
2022
2008
  { code: "uz", name: "Uzbek" },
2023
- { code: "fo", name: "Faroese" },
2024
- { code: "ht", name: "Haitian Creole" },
2025
2009
  { code: "ps", name: "Pashto" },
2026
- { code: "tk", name: "Turkmen" },
2027
- { code: "nn", name: "Norwegian Nynorsk" },
2028
2010
  { code: "mt", name: "Maltese" },
2029
- { code: "sa", name: "Sanskrit" },
2030
2011
  { code: "lb", name: "Luxembourgish" },
2031
2012
  { code: "my", name: "Burmese" },
2032
- { code: "bo", name: "Tibetan" },
2033
- { code: "tl", name: "Tagalog" },
2034
- { code: "mg", name: "Malagasy" },
2035
2013
  { code: "as", name: "Assamese" },
2036
- { code: "tt", name: "Tatar" },
2037
- { code: "haw", name: "Hawaiian" },
2038
2014
  { code: "ln", name: "Lingala" },
2039
2015
  { code: "ha", name: "Hausa" },
2040
- { code: "ba", name: "Bashkir" },
2041
- { code: "jw", name: "Javanese" },
2042
- { code: "su", name: "Sundanese" }
2016
+ { code: "jw", name: "Javanese" }
2043
2017
  ];
2044
2018
  var ElevenLabsLanguageCodes = [
2045
2019
  "en",
@@ -2077,8 +2051,6 @@ var ElevenLabsLanguageCodes = [
2077
2051
  "hr",
2078
2052
  "bg",
2079
2053
  "lt",
2080
- "la",
2081
- "mi",
2082
2054
  "ml",
2083
2055
  "cy",
2084
2056
  "sk",
@@ -2092,20 +2064,16 @@ var ElevenLabsLanguageCodes = [
2092
2064
  "kn",
2093
2065
  "et",
2094
2066
  "mk",
2095
- "br",
2096
- "eu",
2097
2067
  "is",
2098
2068
  "hy",
2099
2069
  "ne",
2100
2070
  "mn",
2101
2071
  "bs",
2102
2072
  "kk",
2103
- "sq",
2104
2073
  "sw",
2105
2074
  "gl",
2106
2075
  "mr",
2107
2076
  "pa",
2108
- "si",
2109
2077
  "km",
2110
2078
  "sn",
2111
2079
  "yo",
@@ -2118,29 +2086,16 @@ var ElevenLabsLanguageCodes = [
2118
2086
  "sd",
2119
2087
  "gu",
2120
2088
  "am",
2121
- "yi",
2122
2089
  "lo",
2123
2090
  "uz",
2124
- "fo",
2125
- "ht",
2126
2091
  "ps",
2127
- "tk",
2128
- "nn",
2129
2092
  "mt",
2130
- "sa",
2131
2093
  "lb",
2132
2094
  "my",
2133
- "bo",
2134
- "tl",
2135
- "mg",
2136
2095
  "as",
2137
- "tt",
2138
- "haw",
2139
2096
  "ln",
2140
2097
  "ha",
2141
- "ba",
2142
- "jw",
2143
- "su"
2098
+ "jw"
2144
2099
  ];
2145
2100
  var ElevenLabsLanguageLabels = {
2146
2101
  en: "English",
@@ -2178,8 +2133,6 @@ var ElevenLabsLanguageLabels = {
2178
2133
  hr: "Croatian",
2179
2134
  bg: "Bulgarian",
2180
2135
  lt: "Lithuanian",
2181
- la: "Latin",
2182
- mi: "Maori",
2183
2136
  ml: "Malayalam",
2184
2137
  cy: "Welsh",
2185
2138
  sk: "Slovak",
@@ -2193,20 +2146,16 @@ var ElevenLabsLanguageLabels = {
2193
2146
  kn: "Kannada",
2194
2147
  et: "Estonian",
2195
2148
  mk: "Macedonian",
2196
- br: "Breton",
2197
- eu: "Basque",
2198
2149
  is: "Icelandic",
2199
2150
  hy: "Armenian",
2200
2151
  ne: "Nepali",
2201
2152
  mn: "Mongolian",
2202
2153
  bs: "Bosnian",
2203
2154
  kk: "Kazakh",
2204
- sq: "Albanian",
2205
2155
  sw: "Swahili",
2206
2156
  gl: "Galician",
2207
2157
  mr: "Marathi",
2208
2158
  pa: "Punjabi",
2209
- si: "Sinhala",
2210
2159
  km: "Khmer",
2211
2160
  sn: "Shona",
2212
2161
  yo: "Yoruba",
@@ -2219,29 +2168,16 @@ var ElevenLabsLanguageLabels = {
2219
2168
  sd: "Sindhi",
2220
2169
  gu: "Gujarati",
2221
2170
  am: "Amharic",
2222
- yi: "Yiddish",
2223
2171
  lo: "Lao",
2224
2172
  uz: "Uzbek",
2225
- fo: "Faroese",
2226
- ht: "Haitian Creole",
2227
2173
  ps: "Pashto",
2228
- tk: "Turkmen",
2229
- nn: "Norwegian Nynorsk",
2230
2174
  mt: "Maltese",
2231
- sa: "Sanskrit",
2232
2175
  lb: "Luxembourgish",
2233
2176
  my: "Burmese",
2234
- bo: "Tibetan",
2235
- tl: "Tagalog",
2236
- mg: "Malagasy",
2237
2177
  as: "Assamese",
2238
- tt: "Tatar",
2239
- haw: "Hawaiian",
2240
2178
  ln: "Lingala",
2241
2179
  ha: "Hausa",
2242
- ba: "Bashkir",
2243
- jw: "Javanese",
2244
- su: "Sundanese"
2180
+ jw: "Javanese"
2245
2181
  };
2246
2182
 
2247
2183
  // src/generated/gladia/schema/streamingSupportedBitDepthEnum.ts
@@ -2746,6 +2682,7 @@ var OpenAITranscriptionModel = {
2746
2682
  "gpt-4o-mini-transcribe-2025-12-15": "gpt-4o-mini-transcribe-2025-12-15",
2747
2683
  "gpt-4o-transcribe": "gpt-4o-transcribe",
2748
2684
  "gpt-4o-transcribe-diarize": "gpt-4o-transcribe-diarize",
2685
+ "gpt-realtime-whisper": "gpt-realtime-whisper",
2749
2686
  "whisper-1": "whisper-1"
2750
2687
  };
2751
2688
  var OpenAIRealtimeModel = {
@@ -2761,6 +2698,7 @@ var OpenAIRealtimeModel = {
2761
2698
  "gpt-audio-mini-2025-12-15": "gpt-audio-mini-2025-12-15",
2762
2699
  "gpt-realtime": "gpt-realtime",
2763
2700
  "gpt-realtime-1.5": "gpt-realtime-1.5",
2701
+ "gpt-realtime-2": "gpt-realtime-2",
2764
2702
  "gpt-realtime-2025-08-28": "gpt-realtime-2025-08-28",
2765
2703
  "gpt-realtime-mini": "gpt-realtime-mini",
2766
2704
  "gpt-realtime-mini-2025-10-06": "gpt-realtime-mini-2025-10-06",
@@ -4360,6 +4298,12 @@ var GladiaAdapter = class extends BaseAdapter {
4360
4298
  };
4361
4299
  this.baseUrl = "https://api.gladia.io";
4362
4300
  }
4301
+ initialize(config) {
4302
+ super.initialize(config);
4303
+ if (config.region) {
4304
+ this.streamingRegion = config.region;
4305
+ }
4306
+ }
4363
4307
  /**
4364
4308
  * Get axios config for generated API client functions
4365
4309
  * Configures headers and base URL using Gladia's x-gladia-key header
@@ -5021,9 +4965,10 @@ var GladiaAdapter = class extends BaseAdapter {
5021
4965
  async transcribeStream(options, callbacks) {
5022
4966
  this.validateConfig();
5023
4967
  const streamingRequest = this.buildStreamingRequest(options);
4968
+ const region = options?.region ?? this.streamingRegion;
5024
4969
  const initResponse = await streamingControllerInitStreamingSessionV2(
5025
4970
  streamingRequest,
5026
- options?.region ? { region: options.region } : void 0,
4971
+ region ? { region } : void 0,
5027
4972
  this.getAxiosConfig()
5028
4973
  );
5029
4974
  const { id, url: apiWsUrl } = initResponse.data;
@@ -5570,12 +5515,20 @@ var EntityType = {
5570
5515
  email_address: "email_address",
5571
5516
  event: "event",
5572
5517
  filename: "filename",
5518
+ gender: "gender",
5573
5519
  gender_sexuality: "gender_sexuality",
5574
5520
  healthcare_number: "healthcare_number",
5575
5521
  injury: "injury",
5576
5522
  ip_address: "ip_address",
5577
5523
  language: "language",
5578
5524
  location: "location",
5525
+ location_address: "location_address",
5526
+ location_address_street: "location_address_street",
5527
+ location_city: "location_city",
5528
+ location_coordinate: "location_coordinate",
5529
+ location_country: "location_country",
5530
+ location_state: "location_state",
5531
+ location_zip: "location_zip",
5579
5532
  marital_status: "marital_status",
5580
5533
  medical_condition: "medical_condition",
5581
5534
  medical_process: "medical_process",
@@ -5584,6 +5537,7 @@ var EntityType = {
5584
5537
  number_sequence: "number_sequence",
5585
5538
  occupation: "occupation",
5586
5539
  organization: "organization",
5540
+ organization_medical_facility: "organization_medical_facility",
5587
5541
  passport_number: "passport_number",
5588
5542
  password: "password",
5589
5543
  person_age: "person_age",
@@ -5592,6 +5546,7 @@ var EntityType = {
5592
5546
  physical_attribute: "physical_attribute",
5593
5547
  political_affiliation: "political_affiliation",
5594
5548
  religion: "religion",
5549
+ sexuality: "sexuality",
5595
5550
  statistics: "statistics",
5596
5551
  time: "time",
5597
5552
  url: "url",
@@ -5618,12 +5573,20 @@ var PiiPolicy = {
5618
5573
  email_address: "email_address",
5619
5574
  event: "event",
5620
5575
  filename: "filename",
5576
+ gender: "gender",
5621
5577
  gender_sexuality: "gender_sexuality",
5622
5578
  healthcare_number: "healthcare_number",
5623
5579
  injury: "injury",
5624
5580
  ip_address: "ip_address",
5625
5581
  language: "language",
5626
5582
  location: "location",
5583
+ location_address: "location_address",
5584
+ location_address_street: "location_address_street",
5585
+ location_city: "location_city",
5586
+ location_coordinate: "location_coordinate",
5587
+ location_country: "location_country",
5588
+ location_state: "location_state",
5589
+ location_zip: "location_zip",
5627
5590
  marital_status: "marital_status",
5628
5591
  medical_condition: "medical_condition",
5629
5592
  medical_process: "medical_process",
@@ -5632,6 +5595,7 @@ var PiiPolicy = {
5632
5595
  number_sequence: "number_sequence",
5633
5596
  occupation: "occupation",
5634
5597
  organization: "organization",
5598
+ organization_medical_facility: "organization_medical_facility",
5635
5599
  passport_number: "passport_number",
5636
5600
  password: "password",
5637
5601
  person_age: "person_age",
@@ -5640,6 +5604,7 @@ var PiiPolicy = {
5640
5604
  physical_attribute: "physical_attribute",
5641
5605
  political_affiliation: "political_affiliation",
5642
5606
  religion: "religion",
5607
+ sexuality: "sexuality",
5643
5608
  statistics: "statistics",
5644
5609
  time: "time",
5645
5610
  url: "url",
@@ -5708,7 +5673,8 @@ var TranscriptOptionalParamsRedactPiiAudioOptionsOverrideAudioRedactionMethod =
5708
5673
 
5709
5674
  // src/generated/assemblyai/schema/transcriptOptionalParamsRemoveAudioTags.ts
5710
5675
  var TranscriptOptionalParamsRemoveAudioTags = {
5711
- all: "all"
5676
+ all: "all",
5677
+ speaker: "speaker"
5712
5678
  };
5713
5679
 
5714
5680
  // src/generated/assemblyai/schema/transcriptRedactPiiAudioOptionsOverrideAudioRedactionMethod.ts
@@ -5718,7 +5684,8 @@ var TranscriptRedactPiiAudioOptionsOverrideAudioRedactionMethod = {
5718
5684
 
5719
5685
  // src/generated/assemblyai/schema/transcriptRemoveAudioTags.ts
5720
5686
  var TranscriptRemoveAudioTags = {
5721
- all: "all"
5687
+ all: "all",
5688
+ speaker: "speaker"
5722
5689
  };
5723
5690
 
5724
5691
  // src/generated/assemblyai/api/assemblyAIAPI.ts
@@ -9610,15 +9577,18 @@ var import_axios9 = __toESM(require("axios"));
9610
9577
  // src/generated/soniox/schema/index.ts
9611
9578
  var schema_exports4 = {};
9612
9579
  __export(schema_exports4, {
9580
+ TTSVoiceGender: () => TTSVoiceGender,
9613
9581
  TemporaryApiKeyUsageType: () => TemporaryApiKeyUsageType,
9614
9582
  TranscriptionMode: () => TranscriptionMode,
9615
9583
  TranscriptionStatus: () => TranscriptionStatus,
9616
- TranslationConfigType: () => TranslationConfigType
9584
+ TranslationConfigType: () => TranslationConfigType,
9585
+ UsageLogsSort: () => UsageLogsSort
9617
9586
  });
9618
9587
 
9619
9588
  // src/generated/soniox/schema/temporaryApiKeyUsageType.ts
9620
9589
  var TemporaryApiKeyUsageType = {
9621
- transcribe_websocket: "transcribe_websocket"
9590
+ transcribe_websocket: "transcribe_websocket",
9591
+ tts_rt: "tts_rt"
9622
9592
  };
9623
9593
 
9624
9594
  // src/generated/soniox/schema/transcriptionMode.ts
@@ -9633,6 +9603,19 @@ var TranslationConfigType = {
9633
9603
  two_way: "two_way"
9634
9604
  };
9635
9605
 
9606
+ // src/generated/soniox/schema/tTSVoiceGender.ts
9607
+ var TTSVoiceGender = {
9608
+ male: "male",
9609
+ female: "female",
9610
+ neutral: "neutral"
9611
+ };
9612
+
9613
+ // src/generated/soniox/schema/usageLogsSort.ts
9614
+ var UsageLogsSort = {
9615
+ end_time_asc: "end_time_asc",
9616
+ end_time_desc: "end_time_desc"
9617
+ };
9618
+
9636
9619
  // src/generated/soniox/api/sonioxPublicAPI.ts
9637
9620
  var uploadFile = (uploadFileBody2, options) => {
9638
9621
  const formData = new FormData();
@@ -11000,6 +10983,7 @@ __export(deepgramAPI_zod_exports, {
11000
10983
  speakGenerateQueryMipOptOutDefault: () => speakGenerateQueryMipOptOutDefault,
11001
10984
  speakGenerateQueryModelDefault: () => speakGenerateQueryModelDefault,
11002
10985
  speakGenerateQueryParams: () => speakGenerateQueryParams,
10986
+ speakGenerateQuerySpeedDefault: () => speakGenerateQuerySpeedDefault,
11003
10987
  speakGenerateResponse: () => speakGenerateResponse
11004
10988
  });
11005
10989
  var import_zod = require("zod");
@@ -11054,6 +11038,9 @@ var listenTranscribeQueryParams = import_zod.z.object({
11054
11038
  diarize: import_zod.z.boolean().optional().describe(
11055
11039
  "Recognize speaker changes. Each word in the transcript will be assigned a speaker number starting at 0"
11056
11040
  ),
11041
+ diarize_model: import_zod.z.enum(["latest", "v1", "v2"]).optional().describe(
11042
+ "Select and enable a specific batch diarization model version. If specifying this parameter, you should not set the deprecated `diarize=true` parameter. Not accepted on streaming requests."
11043
+ ),
11057
11044
  dictation: import_zod.z.boolean().optional().describe("Dictation mode for controlling formatting with dictated speech"),
11058
11045
  encoding: import_zod.z.enum(["linear16", "flac", "mulaw", "amr-nb", "amr-wb", "opus", "speex", "g729"]).optional().describe("Specify the expected encoding of your submitted audio"),
11059
11046
  filler_words: import_zod.z.boolean().optional().describe('Filler Words can help transcribe interruptions in your audio, like "uh" and "um"'),
@@ -11319,6 +11306,7 @@ var listenTranscribeResponse = import_zod.z.object({
11319
11306
  var speakGenerateQueryCallbackMethodDefault = "POST";
11320
11307
  var speakGenerateQueryMipOptOutDefault = false;
11321
11308
  var speakGenerateQueryModelDefault = "aura-asteria-en";
11309
+ var speakGenerateQuerySpeedDefault = 1;
11322
11310
  var speakGenerateQueryParams = import_zod.z.object({
11323
11311
  callback: import_zod.z.string().optional().describe("URL to which we'll make the callback request"),
11324
11312
  callback_method: import_zod.z.enum(["POST", "PUT"]).default(speakGenerateQueryCallbackMethodDefault).describe("HTTP method by which the callback request will be made"),
@@ -11430,6 +11418,9 @@ var speakGenerateQueryParams = import_zod.z.object({
11430
11418
  import_zod.z.enum(["22050"]).describe("Encoding - mp3. Sample rate is fixed and not configurable (22050 Hz).")
11431
11419
  ).or(import_zod.z.enum(["48000"]).describe("Encoding - opus. Sample rate is fixed at 48000 Hz.")).optional().describe(
11432
11420
  "Sample Rate specifies the sample rate for the output audio. Based on the encoding, different sample rates are supported. For some encodings, the sample rate is not configurable"
11421
+ ),
11422
+ speed: import_zod.z.number().default(speakGenerateQuerySpeedDefault).describe(
11423
+ "Speaking rate multiplier that adjusts the pace of generated speech while preserving natural prosody and voice quality. Not yet supported in all languages."
11433
11424
  )
11434
11425
  });
11435
11426
  var speakGenerateHeader = import_zod.z.object({
@@ -11754,6 +11745,7 @@ __export(assemblyAIAPI_zod_exports, {
11754
11745
  createTranscriptBodyRedactPiiAudioDefault: () => createTranscriptBodyRedactPiiAudioDefault,
11755
11746
  createTranscriptBodyRedactPiiAudioOptionsReturnRedactedNoSpeechAudioDefault: () => createTranscriptBodyRedactPiiAudioOptionsReturnRedactedNoSpeechAudioDefault,
11756
11747
  createTranscriptBodyRedactPiiDefault: () => createTranscriptBodyRedactPiiDefault,
11748
+ createTranscriptBodyRedactPiiReturnUnredactedDefault: () => createTranscriptBodyRedactPiiReturnUnredactedDefault,
11757
11749
  createTranscriptBodySentimentAnalysisDefault: () => createTranscriptBodySentimentAnalysisDefault,
11758
11750
  createTranscriptBodySpeakerLabelsDefault: () => createTranscriptBodySpeakerLabelsDefault,
11759
11751
  createTranscriptBodySpeakerOptionsMinSpeakersExpectedDefault: () => createTranscriptBodySpeakerOptionsMinSpeakersExpectedDefault,
@@ -11824,6 +11816,7 @@ var createTranscriptBodyPunctuateDefault = true;
11824
11816
  var createTranscriptBodyRedactPiiDefault = false;
11825
11817
  var createTranscriptBodyRedactPiiAudioDefault = false;
11826
11818
  var createTranscriptBodyRedactPiiAudioOptionsReturnRedactedNoSpeechAudioDefault = false;
11819
+ var createTranscriptBodyRedactPiiReturnUnredactedDefault = false;
11827
11820
  var createTranscriptBodySentimentAnalysisDefault = false;
11828
11821
  var createTranscriptBodySpeakerLabelsDefault = false;
11829
11822
  var createTranscriptBodySpeakerOptionsMinSpeakersExpectedDefault = 1;
@@ -11862,7 +11855,7 @@ var createTranscriptBody = import_zod3.z.object({
11862
11855
  "Customize how words are spelled and formatted using to and from values. See [Custom Spelling](https://www.assemblyai.com/docs/pre-recorded-audio/correct-spelling-of-terms) for more details."
11863
11856
  ),
11864
11857
  disfluencies: import_zod3.z.boolean().optional().describe(
11865
- 'Transcribe [Filler Words](https://www.assemblyai.com/docs/pre-recorded-audio/include-filler-words), like "umm", in your media file; can be true or false'
11858
+ 'Transcribe [Filler Words](https://www.assemblyai.com/docs/pre-recorded-audio/include-filler-words), like "umm", in your media file; can be true or false. Supported on Universal-3 Pro and Universal-2.'
11866
11859
  ),
11867
11860
  domain: import_zod3.z.string().nullish().describe(
11868
11861
  'Enable domain-specific transcription models to improve accuracy for specialized terminology. Set to `"medical-v1"` to enable [Medical Mode](https://www.assemblyai.com/docs/pre-recorded-audio/medical-mode) for improved accuracy of medical terms such as medications, procedures, conditions, and dosages.\n\nSupported languages: English (`en`), Spanish (`es`), German (`de`), French (`fr`). If used with an unsupported language, the parameter is ignored and a warning is returned.\n'
@@ -12169,12 +12162,20 @@ var createTranscriptBody = import_zod3.z.object({
12169
12162
  "email_address",
12170
12163
  "event",
12171
12164
  "filename",
12165
+ "gender",
12172
12166
  "gender_sexuality",
12173
12167
  "healthcare_number",
12174
12168
  "injury",
12175
12169
  "ip_address",
12176
12170
  "language",
12177
12171
  "location",
12172
+ "location_address",
12173
+ "location_address_street",
12174
+ "location_city",
12175
+ "location_coordinate",
12176
+ "location_country",
12177
+ "location_state",
12178
+ "location_zip",
12178
12179
  "marital_status",
12179
12180
  "medical_condition",
12180
12181
  "medical_process",
@@ -12183,6 +12184,7 @@ var createTranscriptBody = import_zod3.z.object({
12183
12184
  "number_sequence",
12184
12185
  "occupation",
12185
12186
  "organization",
12187
+ "organization_medical_facility",
12186
12188
  "passport_number",
12187
12189
  "password",
12188
12190
  "person_age",
@@ -12191,6 +12193,7 @@ var createTranscriptBody = import_zod3.z.object({
12191
12193
  "physical_attribute",
12192
12194
  "political_affiliation",
12193
12195
  "religion",
12196
+ "sexuality",
12194
12197
  "statistics",
12195
12198
  "time",
12196
12199
  "url",
@@ -12198,15 +12201,20 @@ var createTranscriptBody = import_zod3.z.object({
12198
12201
  "username",
12199
12202
  "vehicle_id",
12200
12203
  "zodiac_sign"
12201
- ]).describe("The type of PII to redact")
12204
+ ]).describe(
12205
+ "The type of PII to redact. See [PII redaction](https://www.assemblyai.com/docs/streaming/pii-redaction) for the full list of policies and their descriptions."
12206
+ )
12202
12207
  ).optional().describe(
12203
12208
  "The list of PII Redaction policies to enable. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more details."
12204
12209
  ),
12205
12210
  redact_pii_sub: import_zod3.z.enum(["entity_name", "hash"]).describe(
12206
- "The replacement logic for detected PII, can be `entity_name` or `hash`. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more details."
12211
+ "The replacement logic for detected PII, can be `entity_name` or `hash`. See [PII redaction](https://www.assemblyai.com/docs/streaming/pii-redaction) for more details."
12207
12212
  ).or(import_zod3.z.null()).optional().describe(
12208
12213
  "The replacement logic for detected PII, can be `entity_type` or `hash`. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more details."
12209
12214
  ),
12215
+ redact_pii_return_unredacted: import_zod3.z.boolean().optional().describe(
12216
+ "When set to `true`, returns the original unredacted transcript alongside the redacted one in the same response. Requires `redact_pii` to be `true`, otherwise a 400 error is returned.\n\nWhen enabled, the response includes the additional fields `unredacted_text`, `unredacted_words`, and `unredacted_utterances`. The existing `text`, `words`, and `utterances` fields remain fully redacted. When disabled (default), the response is unchanged and contains only the redacted transcript. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more details.\n"
12217
+ ),
12210
12218
  sentiment_analysis: import_zod3.z.boolean().optional().describe(
12211
12219
  "Enable [Sentiment Analysis](https://www.assemblyai.com/docs/speech-understanding/analyze-sentiment-of-speech), can be true or false"
12212
12220
  ),
@@ -12304,10 +12312,10 @@ var createTranscriptBody = import_zod3.z.object({
12304
12312
  ),
12305
12313
  summary_model: import_zod3.z.enum(["informative", "conversational", "catchy"]).optional().describe("The model to summarize the transcript"),
12306
12314
  summary_type: import_zod3.z.enum(["bullets", "bullets_verbose", "gist", "headline", "paragraph"]).optional().describe("The type of summary"),
12307
- remove_audio_tags: import_zod3.z.enum(["all"]).describe(
12308
- 'Remove [audio event tags](https://www.assemblyai.com/docs/pre-recorded-audio/universal-3-pro#audio-event-tags) from the transcript text. Set to `"all"` to remove all audio tags.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n'
12315
+ remove_audio_tags: import_zod3.z.enum(["all", "speaker"]).describe(
12316
+ 'Universal-3 Pro generates rich transcripts that can include inline annotations such as audio event markers and speaker cues. Set to `"all"` to remove all inline annotations, or `"speaker"` to remove only speaker cues while keeping other annotations.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n'
12309
12317
  ).or(import_zod3.z.null()).optional().describe(
12310
- 'Remove [audio event tags](https://www.assemblyai.com/docs/pre-recorded-audio/universal-3-pro#audio-event-tags) from the transcript text. Set to `"all"` to remove all audio tags.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n'
12318
+ 'Universal-3 Pro generates rich transcripts that can include inline annotations such as audio event markers and speaker cues. Set to `"all"` to remove all inline annotations, or `"speaker"` to remove only speaker cues while keeping other annotations.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n'
12311
12319
  ),
12312
12320
  temperature: import_zod3.z.number().optional().describe(
12313
12321
  "Control the amount of randomness injected into the model's response. See the [Prompting Guide](https://www.assemblyai.com/docs/pre-recorded-audio/prompting) for more details.\n\nNote: This parameter can only be used with the Universal-3 Pro model.\n"
@@ -12441,7 +12449,7 @@ var createTranscriptResponse = import_zod3.z.object({
12441
12449
  "Customize how words are spelled and formatted using to and from values. See [Custom Spelling](https://www.assemblyai.com/docs/pre-recorded-audio/correct-spelling-of-terms) for more details."
12442
12450
  ),
12443
12451
  disfluencies: import_zod3.z.boolean().nullish().describe(
12444
- 'Transcribe [Filler Words](https://www.assemblyai.com/docs/pre-recorded-audio/include-filler-words), like "umm", in your media file; can be true or false'
12452
+ 'Transcribe [Filler Words](https://www.assemblyai.com/docs/pre-recorded-audio/include-filler-words), like "umm", in your media file; can be true or false. Supported on Universal-3 Pro and Universal-2.'
12445
12453
  ),
12446
12454
  domain: import_zod3.z.string().nullish().describe(
12447
12455
  'The domain-specific model applied to the transcript. When set to `"medical-v1"`, [Medical Mode](https://www.assemblyai.com/docs/pre-recorded-audio/medical-mode) was used to improve accuracy for medical terminology.\n'
@@ -12464,12 +12472,20 @@ var createTranscriptResponse = import_zod3.z.object({
12464
12472
  "email_address",
12465
12473
  "event",
12466
12474
  "filename",
12475
+ "gender",
12467
12476
  "gender_sexuality",
12468
12477
  "healthcare_number",
12469
12478
  "injury",
12470
12479
  "ip_address",
12471
12480
  "language",
12472
12481
  "location",
12482
+ "location_address",
12483
+ "location_address_street",
12484
+ "location_city",
12485
+ "location_coordinate",
12486
+ "location_country",
12487
+ "location_state",
12488
+ "location_zip",
12473
12489
  "marital_status",
12474
12490
  "medical_condition",
12475
12491
  "medical_process",
@@ -12478,6 +12494,7 @@ var createTranscriptResponse = import_zod3.z.object({
12478
12494
  "number_sequence",
12479
12495
  "occupation",
12480
12496
  "organization",
12497
+ "organization_medical_facility",
12481
12498
  "passport_number",
12482
12499
  "password",
12483
12500
  "person_age",
@@ -12486,6 +12503,7 @@ var createTranscriptResponse = import_zod3.z.object({
12486
12503
  "physical_attribute",
12487
12504
  "political_affiliation",
12488
12505
  "religion",
12506
+ "sexuality",
12489
12507
  "statistics",
12490
12508
  "time",
12491
12509
  "url",
@@ -12790,6 +12808,24 @@ var createTranscriptResponse = import_zod3.z.object({
12790
12808
  }).optional().describe(
12791
12809
  "Specify options for [Automatic Language Detection](https://www.assemblyai.com/docs/pre-recorded-audio/language-detection)."
12792
12810
  ),
12811
+ metadata: import_zod3.z.object({
12812
+ domain_used: import_zod3.z.string().nullish().describe(
12813
+ 'The domain-specific model that was applied to the transcription (for example, `"medical-v1"` when [Medical Mode](https://www.assemblyai.com/docs/pre-recorded-audio/medical-mode) was applied), or `null` if no domain-specific model was used. Always present when `metadata` is present.\n'
12814
+ ),
12815
+ warnings: import_zod3.z.array(
12816
+ import_zod3.z.object({
12817
+ message: import_zod3.z.string().describe("A human-readable description of the warning.")
12818
+ }).describe(
12819
+ "A warning message emitted while processing a transcription request. Warnings are surfaced on the transcript response under `metadata.warnings`.\n"
12820
+ )
12821
+ ).optional().describe(
12822
+ "Warning messages emitted while processing the request. Each warning is an object with a human-readable `message`. When there are no warnings to report, this field is omitted from the `metadata` object entirely.\n"
12823
+ )
12824
+ }).describe(
12825
+ "Additional metadata about the transcription returned on the `Transcript` object under `metadata`. Only present when there is information to report \u2014 when all of its fields would be empty, the `metadata` object is omitted from the response entirely.\n"
12826
+ ).or(import_zod3.z.null()).optional().describe(
12827
+ "Additional metadata about the transcription, including any warnings emitted while processing the request. Only present when there is information to report; if no fields would be populated, `metadata` is omitted from the response entirely.\n"
12828
+ ),
12793
12829
  multichannel: import_zod3.z.boolean().nullish().describe(
12794
12830
  "Whether [Multichannel transcription](https://www.assemblyai.com/docs/pre-recorded-audio/transcribe-multiple-audio-channels) was enabled in the transcription request, either true or false"
12795
12831
  ),
@@ -12837,12 +12873,20 @@ var createTranscriptResponse = import_zod3.z.object({
12837
12873
  "email_address",
12838
12874
  "event",
12839
12875
  "filename",
12876
+ "gender",
12840
12877
  "gender_sexuality",
12841
12878
  "healthcare_number",
12842
12879
  "injury",
12843
12880
  "ip_address",
12844
12881
  "language",
12845
12882
  "location",
12883
+ "location_address",
12884
+ "location_address_street",
12885
+ "location_city",
12886
+ "location_coordinate",
12887
+ "location_country",
12888
+ "location_state",
12889
+ "location_zip",
12846
12890
  "marital_status",
12847
12891
  "medical_condition",
12848
12892
  "medical_process",
@@ -12851,6 +12895,7 @@ var createTranscriptResponse = import_zod3.z.object({
12851
12895
  "number_sequence",
12852
12896
  "occupation",
12853
12897
  "organization",
12898
+ "organization_medical_facility",
12854
12899
  "passport_number",
12855
12900
  "password",
12856
12901
  "person_age",
@@ -12859,6 +12904,7 @@ var createTranscriptResponse = import_zod3.z.object({
12859
12904
  "physical_attribute",
12860
12905
  "political_affiliation",
12861
12906
  "religion",
12907
+ "sexuality",
12862
12908
  "statistics",
12863
12909
  "time",
12864
12910
  "url",
@@ -12866,12 +12912,17 @@ var createTranscriptResponse = import_zod3.z.object({
12866
12912
  "username",
12867
12913
  "vehicle_id",
12868
12914
  "zodiac_sign"
12869
- ]).describe("The type of PII to redact")
12915
+ ]).describe(
12916
+ "The type of PII to redact. See [PII redaction](https://www.assemblyai.com/docs/streaming/pii-redaction) for the full list of policies and their descriptions."
12917
+ )
12870
12918
  ).nullish().describe(
12871
12919
  "The list of PII Redaction policies that were enabled, if PII Redaction is enabled.\nSee [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
12872
12920
  ),
12873
12921
  redact_pii_sub: import_zod3.z.enum(["entity_name", "hash"]).optional().describe(
12874
- "The replacement logic for detected PII, can be `entity_name` or `hash`. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more details."
12922
+ "The replacement logic for detected PII, can be `entity_name` or `hash`. See [PII redaction](https://www.assemblyai.com/docs/streaming/pii-redaction) for more details."
12923
+ ),
12924
+ redact_pii_return_unredacted: import_zod3.z.boolean().nullish().describe(
12925
+ "Whether the original unredacted transcript was also returned alongside the redacted one. When `true`, the response includes `unredacted_text`, `unredacted_words`, and `unredacted_utterances`. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
12875
12926
  ),
12876
12927
  sentiment_analysis: import_zod3.z.boolean().nullish().describe(
12877
12928
  "Whether [Sentiment Analysis](https://www.assemblyai.com/docs/speech-understanding/analyze-sentiment-of-speech) is enabled, can be true or false"
@@ -13008,20 +13059,23 @@ var createTranscriptResponse = import_zod3.z.object({
13008
13059
  "The generated summary of the media file, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details."
13009
13060
  ),
13010
13061
  summary_model: import_zod3.z.string().nullish().describe(
13011
- "The Summarization model used to generate the summary,\nif [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts#summary-models) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details.\n"
13062
+ "The Summarization model used to generate the summary,\nif [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details.\n"
13012
13063
  ),
13013
13064
  summary_type: import_zod3.z.string().nullish().describe(
13014
- "The type of summary generated, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts#summary-types) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details."
13065
+ "The type of summary generated, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details."
13015
13066
  ),
13016
- remove_audio_tags: import_zod3.z.enum(["all"]).describe(
13017
- "Whether [audio event tags](https://www.assemblyai.com/docs/pre-recorded-audio/universal-3-pro#audio-event-tags) were removed from the transcript text.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n"
13067
+ remove_audio_tags: import_zod3.z.enum(["all", "speaker"]).describe(
13068
+ 'Universal-3 Pro generates rich transcripts that can include inline annotations such as audio event markers and speaker cues. Set to `"all"` to remove all inline annotations, or `"speaker"` to remove only speaker cues while keeping other annotations.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n'
13018
13069
  ).or(import_zod3.z.null()).optional().describe(
13019
- "Whether [audio event tags](https://www.assemblyai.com/docs/pre-recorded-audio/universal-3-pro#audio-event-tags) were removed from the transcript text.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n"
13070
+ 'Universal-3 Pro generates rich transcripts that can include inline annotations such as audio event markers and speaker cues. Set to `"all"` to remove all inline annotations, or `"speaker"` to remove only speaker cues while keeping other annotations.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n'
13020
13071
  ),
13021
13072
  temperature: import_zod3.z.number().nullish().describe(
13022
13073
  "The temperature that was used for the model's response. See the [Prompting Guide](https://www.assemblyai.com/docs/pre-recorded-audio/prompting) for more details.\n\nNote: This parameter can only be used with the Universal-3 Pro model.\n"
13023
13074
  ),
13024
13075
  text: import_zod3.z.string().nullish().describe("The textual transcript of your media file"),
13076
+ unredacted_text: import_zod3.z.string().nullish().describe(
13077
+ "The original textual transcript of your media file before PII redaction was applied. Only returned when `redact_pii_return_unredacted` was set to `true` on the transcription request, otherwise this field is omitted and the `text` field remains fully redacted. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
13078
+ ),
13025
13079
  throttled: import_zod3.z.boolean().nullish().describe(
13026
13080
  "True while a request is throttled and false when a request is no longer throttled"
13027
13081
  ),
@@ -13058,6 +13112,39 @@ var createTranscriptResponse = import_zod3.z.object({
13058
13112
  ).nullish().describe(
13059
13113
  "When multichannel or speaker_labels is enabled, a list of turn-by-turn utterance objects.\nSee [Speaker diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) and [Multichannel transcription](https://www.assemblyai.com/docs/pre-recorded-audio/transcribe-multiple-audio-channels) for more information.\n"
13060
13114
  ),
13115
+ unredacted_utterances: import_zod3.z.array(
13116
+ import_zod3.z.object({
13117
+ confidence: import_zod3.z.number().describe("The confidence score for the transcript of this utterance"),
13118
+ start: import_zod3.z.number().describe("The starting time, in milliseconds, of the utterance in the audio file"),
13119
+ end: import_zod3.z.number().describe("The ending time, in milliseconds, of the utterance in the audio file"),
13120
+ text: import_zod3.z.string().describe("The text for this utterance"),
13121
+ words: import_zod3.z.array(
13122
+ import_zod3.z.object({
13123
+ confidence: import_zod3.z.number().describe("The confidence score for the transcript of this word"),
13124
+ start: import_zod3.z.number().describe("The starting time, in milliseconds, for the word"),
13125
+ end: import_zod3.z.number().describe("The ending time, in milliseconds, for the word"),
13126
+ text: import_zod3.z.string().describe("The text of the word"),
13127
+ channel: import_zod3.z.string().nullish().describe(
13128
+ "The channel of the word. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
13129
+ ),
13130
+ speaker: import_zod3.z.string().nullable().describe(
13131
+ "The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) is enabled, else null"
13132
+ )
13133
+ })
13134
+ ).describe("The words in the utterance."),
13135
+ channel: import_zod3.z.string().nullish().describe(
13136
+ "The channel of this utterance. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
13137
+ ),
13138
+ speaker: import_zod3.z.string().describe(
13139
+ 'The speaker of this utterance, where each speaker is assigned a sequential capital letter - e.g. "A" for Speaker A, "B" for Speaker B, etc.'
13140
+ ),
13141
+ translated_texts: import_zod3.z.record(import_zod3.z.string(), import_zod3.z.string()).optional().describe(
13142
+ 'Translations keyed by language code (e.g., `{"es": "Texto traducido", "de": "\xDCbersetzter Text"}`). Only present when `match_original_utterance` is enabled with translation.'
13143
+ )
13144
+ })
13145
+ ).nullish().describe(
13146
+ "The original turn-by-turn utterance objects before PII redaction was applied. Same shape as `utterances`. Only returned when `redact_pii_return_unredacted` was set to `true` on the transcription request, otherwise this field is omitted and the `utterances` field remains fully redacted. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
13147
+ ),
13061
13148
  webhook_auth: import_zod3.z.boolean().describe(
13062
13149
  "Whether [webhook](https://www.assemblyai.com/docs/deployment/webhooks-for-pre-recorded-audio) authentication details were provided"
13063
13150
  ),
@@ -13086,6 +13173,22 @@ var createTranscriptResponse = import_zod3.z.object({
13086
13173
  ).nullish().describe(
13087
13174
  "An array of temporally-sequential word objects, one for each word in the transcript.\n"
13088
13175
  ),
13176
+ unredacted_words: import_zod3.z.array(
13177
+ import_zod3.z.object({
13178
+ confidence: import_zod3.z.number().describe("The confidence score for the transcript of this word"),
13179
+ start: import_zod3.z.number().describe("The starting time, in milliseconds, for the word"),
13180
+ end: import_zod3.z.number().describe("The ending time, in milliseconds, for the word"),
13181
+ text: import_zod3.z.string().describe("The text of the word"),
13182
+ channel: import_zod3.z.string().nullish().describe(
13183
+ "The channel of the word. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
13184
+ ),
13185
+ speaker: import_zod3.z.string().nullable().describe(
13186
+ "The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) is enabled, else null"
13187
+ )
13188
+ })
13189
+ ).nullish().describe(
13190
+ "The original temporally-sequential word objects before PII redaction was applied. Same shape as `words`. Only returned when `redact_pii_return_unredacted` was set to `true` on the transcription request, otherwise this field is omitted and the `words` field remains fully redacted. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
13191
+ ),
13089
13192
  acoustic_model: import_zod3.z.string().describe("This parameter does not currently have any functionality attached to it."),
13090
13193
  custom_topics: import_zod3.z.boolean().nullish().describe("This parameter does not currently have any functionality attached to it."),
13091
13194
  language_model: import_zod3.z.string().describe("This parameter does not currently have any functionality attached to it."),
@@ -13261,7 +13364,7 @@ var getTranscriptResponse = import_zod3.z.object({
13261
13364
  "Customize how words are spelled and formatted using to and from values. See [Custom Spelling](https://www.assemblyai.com/docs/pre-recorded-audio/correct-spelling-of-terms) for more details."
13262
13365
  ),
13263
13366
  disfluencies: import_zod3.z.boolean().nullish().describe(
13264
- 'Transcribe [Filler Words](https://www.assemblyai.com/docs/pre-recorded-audio/include-filler-words), like "umm", in your media file; can be true or false'
13367
+ 'Transcribe [Filler Words](https://www.assemblyai.com/docs/pre-recorded-audio/include-filler-words), like "umm", in your media file; can be true or false. Supported on Universal-3 Pro and Universal-2.'
13265
13368
  ),
13266
13369
  domain: import_zod3.z.string().nullish().describe(
13267
13370
  'The domain-specific model applied to the transcript. When set to `"medical-v1"`, [Medical Mode](https://www.assemblyai.com/docs/pre-recorded-audio/medical-mode) was used to improve accuracy for medical terminology.\n'
@@ -13284,12 +13387,20 @@ var getTranscriptResponse = import_zod3.z.object({
13284
13387
  "email_address",
13285
13388
  "event",
13286
13389
  "filename",
13390
+ "gender",
13287
13391
  "gender_sexuality",
13288
13392
  "healthcare_number",
13289
13393
  "injury",
13290
13394
  "ip_address",
13291
13395
  "language",
13292
13396
  "location",
13397
+ "location_address",
13398
+ "location_address_street",
13399
+ "location_city",
13400
+ "location_coordinate",
13401
+ "location_country",
13402
+ "location_state",
13403
+ "location_zip",
13293
13404
  "marital_status",
13294
13405
  "medical_condition",
13295
13406
  "medical_process",
@@ -13298,6 +13409,7 @@ var getTranscriptResponse = import_zod3.z.object({
13298
13409
  "number_sequence",
13299
13410
  "occupation",
13300
13411
  "organization",
13412
+ "organization_medical_facility",
13301
13413
  "passport_number",
13302
13414
  "password",
13303
13415
  "person_age",
@@ -13306,6 +13418,7 @@ var getTranscriptResponse = import_zod3.z.object({
13306
13418
  "physical_attribute",
13307
13419
  "political_affiliation",
13308
13420
  "religion",
13421
+ "sexuality",
13309
13422
  "statistics",
13310
13423
  "time",
13311
13424
  "url",
@@ -13610,6 +13723,24 @@ var getTranscriptResponse = import_zod3.z.object({
13610
13723
  }).optional().describe(
13611
13724
  "Specify options for [Automatic Language Detection](https://www.assemblyai.com/docs/pre-recorded-audio/language-detection)."
13612
13725
  ),
13726
+ metadata: import_zod3.z.object({
13727
+ domain_used: import_zod3.z.string().nullish().describe(
13728
+ 'The domain-specific model that was applied to the transcription (for example, `"medical-v1"` when [Medical Mode](https://www.assemblyai.com/docs/pre-recorded-audio/medical-mode) was applied), or `null` if no domain-specific model was used. Always present when `metadata` is present.\n'
13729
+ ),
13730
+ warnings: import_zod3.z.array(
13731
+ import_zod3.z.object({
13732
+ message: import_zod3.z.string().describe("A human-readable description of the warning.")
13733
+ }).describe(
13734
+ "A warning message emitted while processing a transcription request. Warnings are surfaced on the transcript response under `metadata.warnings`.\n"
13735
+ )
13736
+ ).optional().describe(
13737
+ "Warning messages emitted while processing the request. Each warning is an object with a human-readable `message`. When there are no warnings to report, this field is omitted from the `metadata` object entirely.\n"
13738
+ )
13739
+ }).describe(
13740
+ "Additional metadata about the transcription returned on the `Transcript` object under `metadata`. Only present when there is information to report \u2014 when all of its fields would be empty, the `metadata` object is omitted from the response entirely.\n"
13741
+ ).or(import_zod3.z.null()).optional().describe(
13742
+ "Additional metadata about the transcription, including any warnings emitted while processing the request. Only present when there is information to report; if no fields would be populated, `metadata` is omitted from the response entirely.\n"
13743
+ ),
13613
13744
  multichannel: import_zod3.z.boolean().nullish().describe(
13614
13745
  "Whether [Multichannel transcription](https://www.assemblyai.com/docs/pre-recorded-audio/transcribe-multiple-audio-channels) was enabled in the transcription request, either true or false"
13615
13746
  ),
@@ -13657,12 +13788,20 @@ var getTranscriptResponse = import_zod3.z.object({
13657
13788
  "email_address",
13658
13789
  "event",
13659
13790
  "filename",
13791
+ "gender",
13660
13792
  "gender_sexuality",
13661
13793
  "healthcare_number",
13662
13794
  "injury",
13663
13795
  "ip_address",
13664
13796
  "language",
13665
13797
  "location",
13798
+ "location_address",
13799
+ "location_address_street",
13800
+ "location_city",
13801
+ "location_coordinate",
13802
+ "location_country",
13803
+ "location_state",
13804
+ "location_zip",
13666
13805
  "marital_status",
13667
13806
  "medical_condition",
13668
13807
  "medical_process",
@@ -13671,6 +13810,7 @@ var getTranscriptResponse = import_zod3.z.object({
13671
13810
  "number_sequence",
13672
13811
  "occupation",
13673
13812
  "organization",
13813
+ "organization_medical_facility",
13674
13814
  "passport_number",
13675
13815
  "password",
13676
13816
  "person_age",
@@ -13679,6 +13819,7 @@ var getTranscriptResponse = import_zod3.z.object({
13679
13819
  "physical_attribute",
13680
13820
  "political_affiliation",
13681
13821
  "religion",
13822
+ "sexuality",
13682
13823
  "statistics",
13683
13824
  "time",
13684
13825
  "url",
@@ -13686,12 +13827,17 @@ var getTranscriptResponse = import_zod3.z.object({
13686
13827
  "username",
13687
13828
  "vehicle_id",
13688
13829
  "zodiac_sign"
13689
- ]).describe("The type of PII to redact")
13830
+ ]).describe(
13831
+ "The type of PII to redact. See [PII redaction](https://www.assemblyai.com/docs/streaming/pii-redaction) for the full list of policies and their descriptions."
13832
+ )
13690
13833
  ).nullish().describe(
13691
13834
  "The list of PII Redaction policies that were enabled, if PII Redaction is enabled.\nSee [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
13692
13835
  ),
13693
13836
  redact_pii_sub: import_zod3.z.enum(["entity_name", "hash"]).optional().describe(
13694
- "The replacement logic for detected PII, can be `entity_name` or `hash`. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more details."
13837
+ "The replacement logic for detected PII, can be `entity_name` or `hash`. See [PII redaction](https://www.assemblyai.com/docs/streaming/pii-redaction) for more details."
13838
+ ),
13839
+ redact_pii_return_unredacted: import_zod3.z.boolean().nullish().describe(
13840
+ "Whether the original unredacted transcript was also returned alongside the redacted one. When `true`, the response includes `unredacted_text`, `unredacted_words`, and `unredacted_utterances`. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
13695
13841
  ),
13696
13842
  sentiment_analysis: import_zod3.z.boolean().nullish().describe(
13697
13843
  "Whether [Sentiment Analysis](https://www.assemblyai.com/docs/speech-understanding/analyze-sentiment-of-speech) is enabled, can be true or false"
@@ -13828,20 +13974,23 @@ var getTranscriptResponse = import_zod3.z.object({
13828
13974
  "The generated summary of the media file, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details."
13829
13975
  ),
13830
13976
  summary_model: import_zod3.z.string().nullish().describe(
13831
- "The Summarization model used to generate the summary,\nif [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts#summary-models) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details.\n"
13977
+ "The Summarization model used to generate the summary,\nif [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details.\n"
13832
13978
  ),
13833
13979
  summary_type: import_zod3.z.string().nullish().describe(
13834
- "The type of summary generated, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts#summary-types) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details."
13980
+ "The type of summary generated, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details."
13835
13981
  ),
13836
- remove_audio_tags: import_zod3.z.enum(["all"]).describe(
13837
- "Whether [audio event tags](https://www.assemblyai.com/docs/pre-recorded-audio/universal-3-pro#audio-event-tags) were removed from the transcript text.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n"
13982
+ remove_audio_tags: import_zod3.z.enum(["all", "speaker"]).describe(
13983
+ 'Universal-3 Pro generates rich transcripts that can include inline annotations such as audio event markers and speaker cues. Set to `"all"` to remove all inline annotations, or `"speaker"` to remove only speaker cues while keeping other annotations.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n'
13838
13984
  ).or(import_zod3.z.null()).optional().describe(
13839
- "Whether [audio event tags](https://www.assemblyai.com/docs/pre-recorded-audio/universal-3-pro#audio-event-tags) were removed from the transcript text.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n"
13985
+ 'Universal-3 Pro generates rich transcripts that can include inline annotations such as audio event markers and speaker cues. Set to `"all"` to remove all inline annotations, or `"speaker"` to remove only speaker cues while keeping other annotations.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n'
13840
13986
  ),
13841
13987
  temperature: import_zod3.z.number().nullish().describe(
13842
13988
  "The temperature that was used for the model's response. See the [Prompting Guide](https://www.assemblyai.com/docs/pre-recorded-audio/prompting) for more details.\n\nNote: This parameter can only be used with the Universal-3 Pro model.\n"
13843
13989
  ),
13844
13990
  text: import_zod3.z.string().nullish().describe("The textual transcript of your media file"),
13991
+ unredacted_text: import_zod3.z.string().nullish().describe(
13992
+ "The original textual transcript of your media file before PII redaction was applied. Only returned when `redact_pii_return_unredacted` was set to `true` on the transcription request, otherwise this field is omitted and the `text` field remains fully redacted. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
13993
+ ),
13845
13994
  throttled: import_zod3.z.boolean().nullish().describe(
13846
13995
  "True while a request is throttled and false when a request is no longer throttled"
13847
13996
  ),
@@ -13878,6 +14027,39 @@ var getTranscriptResponse = import_zod3.z.object({
13878
14027
  ).nullish().describe(
13879
14028
  "When multichannel or speaker_labels is enabled, a list of turn-by-turn utterance objects.\nSee [Speaker diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) and [Multichannel transcription](https://www.assemblyai.com/docs/pre-recorded-audio/transcribe-multiple-audio-channels) for more information.\n"
13880
14029
  ),
14030
+ unredacted_utterances: import_zod3.z.array(
14031
+ import_zod3.z.object({
14032
+ confidence: import_zod3.z.number().describe("The confidence score for the transcript of this utterance"),
14033
+ start: import_zod3.z.number().describe("The starting time, in milliseconds, of the utterance in the audio file"),
14034
+ end: import_zod3.z.number().describe("The ending time, in milliseconds, of the utterance in the audio file"),
14035
+ text: import_zod3.z.string().describe("The text for this utterance"),
14036
+ words: import_zod3.z.array(
14037
+ import_zod3.z.object({
14038
+ confidence: import_zod3.z.number().describe("The confidence score for the transcript of this word"),
14039
+ start: import_zod3.z.number().describe("The starting time, in milliseconds, for the word"),
14040
+ end: import_zod3.z.number().describe("The ending time, in milliseconds, for the word"),
14041
+ text: import_zod3.z.string().describe("The text of the word"),
14042
+ channel: import_zod3.z.string().nullish().describe(
14043
+ "The channel of the word. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
14044
+ ),
14045
+ speaker: import_zod3.z.string().nullable().describe(
14046
+ "The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) is enabled, else null"
14047
+ )
14048
+ })
14049
+ ).describe("The words in the utterance."),
14050
+ channel: import_zod3.z.string().nullish().describe(
14051
+ "The channel of this utterance. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
14052
+ ),
14053
+ speaker: import_zod3.z.string().describe(
14054
+ 'The speaker of this utterance, where each speaker is assigned a sequential capital letter - e.g. "A" for Speaker A, "B" for Speaker B, etc.'
14055
+ ),
14056
+ translated_texts: import_zod3.z.record(import_zod3.z.string(), import_zod3.z.string()).optional().describe(
14057
+ 'Translations keyed by language code (e.g., `{"es": "Texto traducido", "de": "\xDCbersetzter Text"}`). Only present when `match_original_utterance` is enabled with translation.'
14058
+ )
14059
+ })
14060
+ ).nullish().describe(
14061
+ "The original turn-by-turn utterance objects before PII redaction was applied. Same shape as `utterances`. Only returned when `redact_pii_return_unredacted` was set to `true` on the transcription request, otherwise this field is omitted and the `utterances` field remains fully redacted. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
14062
+ ),
13881
14063
  webhook_auth: import_zod3.z.boolean().describe(
13882
14064
  "Whether [webhook](https://www.assemblyai.com/docs/deployment/webhooks-for-pre-recorded-audio) authentication details were provided"
13883
14065
  ),
@@ -13906,6 +14088,22 @@ var getTranscriptResponse = import_zod3.z.object({
13906
14088
  ).nullish().describe(
13907
14089
  "An array of temporally-sequential word objects, one for each word in the transcript.\n"
13908
14090
  ),
14091
+ unredacted_words: import_zod3.z.array(
14092
+ import_zod3.z.object({
14093
+ confidence: import_zod3.z.number().describe("The confidence score for the transcript of this word"),
14094
+ start: import_zod3.z.number().describe("The starting time, in milliseconds, for the word"),
14095
+ end: import_zod3.z.number().describe("The ending time, in milliseconds, for the word"),
14096
+ text: import_zod3.z.string().describe("The text of the word"),
14097
+ channel: import_zod3.z.string().nullish().describe(
14098
+ "The channel of the word. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
14099
+ ),
14100
+ speaker: import_zod3.z.string().nullable().describe(
14101
+ "The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) is enabled, else null"
14102
+ )
14103
+ })
14104
+ ).nullish().describe(
14105
+ "The original temporally-sequential word objects before PII redaction was applied. Same shape as `words`. Only returned when `redact_pii_return_unredacted` was set to `true` on the transcription request, otherwise this field is omitted and the `words` field remains fully redacted. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
14106
+ ),
13909
14107
  acoustic_model: import_zod3.z.string().describe("This parameter does not currently have any functionality attached to it."),
13910
14108
  custom_topics: import_zod3.z.boolean().nullish().describe("This parameter does not currently have any functionality attached to it."),
13911
14109
  language_model: import_zod3.z.string().describe("This parameter does not currently have any functionality attached to it."),
@@ -14041,7 +14239,7 @@ var deleteTranscriptResponse = import_zod3.z.object({
14041
14239
  "Customize how words are spelled and formatted using to and from values. See [Custom Spelling](https://www.assemblyai.com/docs/pre-recorded-audio/correct-spelling-of-terms) for more details."
14042
14240
  ),
14043
14241
  disfluencies: import_zod3.z.boolean().nullish().describe(
14044
- 'Transcribe [Filler Words](https://www.assemblyai.com/docs/pre-recorded-audio/include-filler-words), like "umm", in your media file; can be true or false'
14242
+ 'Transcribe [Filler Words](https://www.assemblyai.com/docs/pre-recorded-audio/include-filler-words), like "umm", in your media file; can be true or false. Supported on Universal-3 Pro and Universal-2.'
14045
14243
  ),
14046
14244
  domain: import_zod3.z.string().nullish().describe(
14047
14245
  'The domain-specific model applied to the transcript. When set to `"medical-v1"`, [Medical Mode](https://www.assemblyai.com/docs/pre-recorded-audio/medical-mode) was used to improve accuracy for medical terminology.\n'
@@ -14064,12 +14262,20 @@ var deleteTranscriptResponse = import_zod3.z.object({
14064
14262
  "email_address",
14065
14263
  "event",
14066
14264
  "filename",
14265
+ "gender",
14067
14266
  "gender_sexuality",
14068
14267
  "healthcare_number",
14069
14268
  "injury",
14070
14269
  "ip_address",
14071
14270
  "language",
14072
14271
  "location",
14272
+ "location_address",
14273
+ "location_address_street",
14274
+ "location_city",
14275
+ "location_coordinate",
14276
+ "location_country",
14277
+ "location_state",
14278
+ "location_zip",
14073
14279
  "marital_status",
14074
14280
  "medical_condition",
14075
14281
  "medical_process",
@@ -14078,6 +14284,7 @@ var deleteTranscriptResponse = import_zod3.z.object({
14078
14284
  "number_sequence",
14079
14285
  "occupation",
14080
14286
  "organization",
14287
+ "organization_medical_facility",
14081
14288
  "passport_number",
14082
14289
  "password",
14083
14290
  "person_age",
@@ -14086,6 +14293,7 @@ var deleteTranscriptResponse = import_zod3.z.object({
14086
14293
  "physical_attribute",
14087
14294
  "political_affiliation",
14088
14295
  "religion",
14296
+ "sexuality",
14089
14297
  "statistics",
14090
14298
  "time",
14091
14299
  "url",
@@ -14390,6 +14598,24 @@ var deleteTranscriptResponse = import_zod3.z.object({
14390
14598
  }).optional().describe(
14391
14599
  "Specify options for [Automatic Language Detection](https://www.assemblyai.com/docs/pre-recorded-audio/language-detection)."
14392
14600
  ),
14601
+ metadata: import_zod3.z.object({
14602
+ domain_used: import_zod3.z.string().nullish().describe(
14603
+ 'The domain-specific model that was applied to the transcription (for example, `"medical-v1"` when [Medical Mode](https://www.assemblyai.com/docs/pre-recorded-audio/medical-mode) was applied), or `null` if no domain-specific model was used. Always present when `metadata` is present.\n'
14604
+ ),
14605
+ warnings: import_zod3.z.array(
14606
+ import_zod3.z.object({
14607
+ message: import_zod3.z.string().describe("A human-readable description of the warning.")
14608
+ }).describe(
14609
+ "A warning message emitted while processing a transcription request. Warnings are surfaced on the transcript response under `metadata.warnings`.\n"
14610
+ )
14611
+ ).optional().describe(
14612
+ "Warning messages emitted while processing the request. Each warning is an object with a human-readable `message`. When there are no warnings to report, this field is omitted from the `metadata` object entirely.\n"
14613
+ )
14614
+ }).describe(
14615
+ "Additional metadata about the transcription returned on the `Transcript` object under `metadata`. Only present when there is information to report \u2014 when all of its fields would be empty, the `metadata` object is omitted from the response entirely.\n"
14616
+ ).or(import_zod3.z.null()).optional().describe(
14617
+ "Additional metadata about the transcription, including any warnings emitted while processing the request. Only present when there is information to report; if no fields would be populated, `metadata` is omitted from the response entirely.\n"
14618
+ ),
14393
14619
  multichannel: import_zod3.z.boolean().nullish().describe(
14394
14620
  "Whether [Multichannel transcription](https://www.assemblyai.com/docs/pre-recorded-audio/transcribe-multiple-audio-channels) was enabled in the transcription request, either true or false"
14395
14621
  ),
@@ -14437,12 +14663,20 @@ var deleteTranscriptResponse = import_zod3.z.object({
14437
14663
  "email_address",
14438
14664
  "event",
14439
14665
  "filename",
14666
+ "gender",
14440
14667
  "gender_sexuality",
14441
14668
  "healthcare_number",
14442
14669
  "injury",
14443
14670
  "ip_address",
14444
14671
  "language",
14445
14672
  "location",
14673
+ "location_address",
14674
+ "location_address_street",
14675
+ "location_city",
14676
+ "location_coordinate",
14677
+ "location_country",
14678
+ "location_state",
14679
+ "location_zip",
14446
14680
  "marital_status",
14447
14681
  "medical_condition",
14448
14682
  "medical_process",
@@ -14451,6 +14685,7 @@ var deleteTranscriptResponse = import_zod3.z.object({
14451
14685
  "number_sequence",
14452
14686
  "occupation",
14453
14687
  "organization",
14688
+ "organization_medical_facility",
14454
14689
  "passport_number",
14455
14690
  "password",
14456
14691
  "person_age",
@@ -14459,6 +14694,7 @@ var deleteTranscriptResponse = import_zod3.z.object({
14459
14694
  "physical_attribute",
14460
14695
  "political_affiliation",
14461
14696
  "religion",
14697
+ "sexuality",
14462
14698
  "statistics",
14463
14699
  "time",
14464
14700
  "url",
@@ -14466,12 +14702,17 @@ var deleteTranscriptResponse = import_zod3.z.object({
14466
14702
  "username",
14467
14703
  "vehicle_id",
14468
14704
  "zodiac_sign"
14469
- ]).describe("The type of PII to redact")
14705
+ ]).describe(
14706
+ "The type of PII to redact. See [PII redaction](https://www.assemblyai.com/docs/streaming/pii-redaction) for the full list of policies and their descriptions."
14707
+ )
14470
14708
  ).nullish().describe(
14471
14709
  "The list of PII Redaction policies that were enabled, if PII Redaction is enabled.\nSee [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
14472
14710
  ),
14473
14711
  redact_pii_sub: import_zod3.z.enum(["entity_name", "hash"]).optional().describe(
14474
- "The replacement logic for detected PII, can be `entity_name` or `hash`. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more details."
14712
+ "The replacement logic for detected PII, can be `entity_name` or `hash`. See [PII redaction](https://www.assemblyai.com/docs/streaming/pii-redaction) for more details."
14713
+ ),
14714
+ redact_pii_return_unredacted: import_zod3.z.boolean().nullish().describe(
14715
+ "Whether the original unredacted transcript was also returned alongside the redacted one. When `true`, the response includes `unredacted_text`, `unredacted_words`, and `unredacted_utterances`. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
14475
14716
  ),
14476
14717
  sentiment_analysis: import_zod3.z.boolean().nullish().describe(
14477
14718
  "Whether [Sentiment Analysis](https://www.assemblyai.com/docs/speech-understanding/analyze-sentiment-of-speech) is enabled, can be true or false"
@@ -14608,20 +14849,23 @@ var deleteTranscriptResponse = import_zod3.z.object({
14608
14849
  "The generated summary of the media file, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details."
14609
14850
  ),
14610
14851
  summary_model: import_zod3.z.string().nullish().describe(
14611
- "The Summarization model used to generate the summary,\nif [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts#summary-models) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details.\n"
14852
+ "The Summarization model used to generate the summary,\nif [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details.\n"
14612
14853
  ),
14613
14854
  summary_type: import_zod3.z.string().nullish().describe(
14614
- "The type of summary generated, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts#summary-types) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details."
14855
+ "The type of summary generated, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details."
14615
14856
  ),
14616
- remove_audio_tags: import_zod3.z.enum(["all"]).describe(
14617
- "Whether [audio event tags](https://www.assemblyai.com/docs/pre-recorded-audio/universal-3-pro#audio-event-tags) were removed from the transcript text.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n"
14857
+ remove_audio_tags: import_zod3.z.enum(["all", "speaker"]).describe(
14858
+ 'Universal-3 Pro generates rich transcripts that can include inline annotations such as audio event markers and speaker cues. Set to `"all"` to remove all inline annotations, or `"speaker"` to remove only speaker cues while keeping other annotations.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n'
14618
14859
  ).or(import_zod3.z.null()).optional().describe(
14619
- "Whether [audio event tags](https://www.assemblyai.com/docs/pre-recorded-audio/universal-3-pro#audio-event-tags) were removed from the transcript text.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n"
14860
+ 'Universal-3 Pro generates rich transcripts that can include inline annotations such as audio event markers and speaker cues. Set to `"all"` to remove all inline annotations, or `"speaker"` to remove only speaker cues while keeping other annotations.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n'
14620
14861
  ),
14621
14862
  temperature: import_zod3.z.number().nullish().describe(
14622
14863
  "The temperature that was used for the model's response. See the [Prompting Guide](https://www.assemblyai.com/docs/pre-recorded-audio/prompting) for more details.\n\nNote: This parameter can only be used with the Universal-3 Pro model.\n"
14623
14864
  ),
14624
14865
  text: import_zod3.z.string().nullish().describe("The textual transcript of your media file"),
14866
+ unredacted_text: import_zod3.z.string().nullish().describe(
14867
+ "The original textual transcript of your media file before PII redaction was applied. Only returned when `redact_pii_return_unredacted` was set to `true` on the transcription request, otherwise this field is omitted and the `text` field remains fully redacted. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
14868
+ ),
14625
14869
  throttled: import_zod3.z.boolean().nullish().describe(
14626
14870
  "True while a request is throttled and false when a request is no longer throttled"
14627
14871
  ),
@@ -14658,6 +14902,39 @@ var deleteTranscriptResponse = import_zod3.z.object({
14658
14902
  ).nullish().describe(
14659
14903
  "When multichannel or speaker_labels is enabled, a list of turn-by-turn utterance objects.\nSee [Speaker diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) and [Multichannel transcription](https://www.assemblyai.com/docs/pre-recorded-audio/transcribe-multiple-audio-channels) for more information.\n"
14660
14904
  ),
14905
+ unredacted_utterances: import_zod3.z.array(
14906
+ import_zod3.z.object({
14907
+ confidence: import_zod3.z.number().describe("The confidence score for the transcript of this utterance"),
14908
+ start: import_zod3.z.number().describe("The starting time, in milliseconds, of the utterance in the audio file"),
14909
+ end: import_zod3.z.number().describe("The ending time, in milliseconds, of the utterance in the audio file"),
14910
+ text: import_zod3.z.string().describe("The text for this utterance"),
14911
+ words: import_zod3.z.array(
14912
+ import_zod3.z.object({
14913
+ confidence: import_zod3.z.number().describe("The confidence score for the transcript of this word"),
14914
+ start: import_zod3.z.number().describe("The starting time, in milliseconds, for the word"),
14915
+ end: import_zod3.z.number().describe("The ending time, in milliseconds, for the word"),
14916
+ text: import_zod3.z.string().describe("The text of the word"),
14917
+ channel: import_zod3.z.string().nullish().describe(
14918
+ "The channel of the word. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
14919
+ ),
14920
+ speaker: import_zod3.z.string().nullable().describe(
14921
+ "The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) is enabled, else null"
14922
+ )
14923
+ })
14924
+ ).describe("The words in the utterance."),
14925
+ channel: import_zod3.z.string().nullish().describe(
14926
+ "The channel of this utterance. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
14927
+ ),
14928
+ speaker: import_zod3.z.string().describe(
14929
+ 'The speaker of this utterance, where each speaker is assigned a sequential capital letter - e.g. "A" for Speaker A, "B" for Speaker B, etc.'
14930
+ ),
14931
+ translated_texts: import_zod3.z.record(import_zod3.z.string(), import_zod3.z.string()).optional().describe(
14932
+ 'Translations keyed by language code (e.g., `{"es": "Texto traducido", "de": "\xDCbersetzter Text"}`). Only present when `match_original_utterance` is enabled with translation.'
14933
+ )
14934
+ })
14935
+ ).nullish().describe(
14936
+ "The original turn-by-turn utterance objects before PII redaction was applied. Same shape as `utterances`. Only returned when `redact_pii_return_unredacted` was set to `true` on the transcription request, otherwise this field is omitted and the `utterances` field remains fully redacted. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
14937
+ ),
14661
14938
  webhook_auth: import_zod3.z.boolean().describe(
14662
14939
  "Whether [webhook](https://www.assemblyai.com/docs/deployment/webhooks-for-pre-recorded-audio) authentication details were provided"
14663
14940
  ),
@@ -14686,6 +14963,22 @@ var deleteTranscriptResponse = import_zod3.z.object({
14686
14963
  ).nullish().describe(
14687
14964
  "An array of temporally-sequential word objects, one for each word in the transcript.\n"
14688
14965
  ),
14966
+ unredacted_words: import_zod3.z.array(
14967
+ import_zod3.z.object({
14968
+ confidence: import_zod3.z.number().describe("The confidence score for the transcript of this word"),
14969
+ start: import_zod3.z.number().describe("The starting time, in milliseconds, for the word"),
14970
+ end: import_zod3.z.number().describe("The ending time, in milliseconds, for the word"),
14971
+ text: import_zod3.z.string().describe("The text of the word"),
14972
+ channel: import_zod3.z.string().nullish().describe(
14973
+ "The channel of the word. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
14974
+ ),
14975
+ speaker: import_zod3.z.string().nullable().describe(
14976
+ "The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) is enabled, else null"
14977
+ )
14978
+ })
14979
+ ).nullish().describe(
14980
+ "The original temporally-sequential word objects before PII redaction was applied. Same shape as `words`. Only returned when `redact_pii_return_unredacted` was set to `true` on the transcription request, otherwise this field is omitted and the `words` field remains fully redacted. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
14981
+ ),
14689
14982
  acoustic_model: import_zod3.z.string().describe("This parameter does not currently have any functionality attached to it."),
14690
14983
  custom_topics: import_zod3.z.boolean().nullish().describe("This parameter does not currently have any functionality attached to it."),
14691
14984
  language_model: import_zod3.z.string().describe("This parameter does not currently have any functionality attached to it."),
@@ -14841,7 +15134,21 @@ var streamingTranscriberParams = import_zod4.z.object({
14841
15134
  inactivityTimeout: import_zod4.z.number().optional().describe("From SDK v3"),
14842
15135
  speakerLabels: import_zod4.z.boolean().optional().describe("From SDK v3"),
14843
15136
  maxSpeakers: import_zod4.z.number().optional().describe("From SDK v3"),
14844
- llmGateway: import_zod4.z.unknown().optional().describe("From SDK v3")
15137
+ voiceFocus: import_zod4.z.unknown().optional().describe("From SDK v3"),
15138
+ voiceFocusThreshold: import_zod4.z.number().optional().describe("From SDK v3"),
15139
+ continuousPartials: import_zod4.z.boolean().optional().describe("From SDK v3"),
15140
+ interruptionDelay: import_zod4.z.number().optional().describe("From SDK v3"),
15141
+ turnLeftPadMs: import_zod4.z.number().optional().describe("From SDK v3"),
15142
+ customerSupportAudioCapture: import_zod4.z.boolean().optional().describe("From SDK v3"),
15143
+ includePartialTurns: import_zod4.z.boolean().optional().describe("From SDK v3"),
15144
+ redactPii: import_zod4.z.boolean().optional().describe("From SDK v3"),
15145
+ redactPiiPolicies: import_zod4.z.unknown().optional().describe("From SDK v3"),
15146
+ redactPiiSub: import_zod4.z.unknown().optional().describe("From SDK v3"),
15147
+ llmGateway: import_zod4.z.unknown().optional().describe("From SDK v3"),
15148
+ webhookUrl: import_zod4.z.string().optional().describe("From SDK v3"),
15149
+ webhookAuthHeaderName: import_zod4.z.string().optional().describe("From SDK v3"),
15150
+ webhookAuthHeaderValue: import_zod4.z.string().optional().describe("From SDK v3"),
15151
+ mode: import_zod4.z.unknown().describe("From SDK v3")
14845
15152
  });
14846
15153
  var streamingUpdateConfigParams = import_zod4.z.object({
14847
15154
  end_utterance_silence_threshold: import_zod4.z.number().min(0).max(2e4).optional().describe("The duration threshold in milliseconds"),
@@ -14853,7 +15160,9 @@ var streamingUpdateConfigParams = import_zod4.z.object({
14853
15160
  format_turns: import_zod4.z.boolean().optional().describe("From SDK v3"),
14854
15161
  keyterms_prompt: import_zod4.z.array(import_zod4.z.string()).optional().describe("From SDK v3"),
14855
15162
  prompt: import_zod4.z.string().optional().describe("From SDK v3"),
14856
- filter_profanity: import_zod4.z.boolean().optional().describe("From SDK v3")
15163
+ filter_profanity: import_zod4.z.boolean().optional().describe("From SDK v3"),
15164
+ interruption_delay: import_zod4.z.number().optional().describe("From SDK v3"),
15165
+ turn_left_pad_ms: import_zod4.z.number().optional().describe("From SDK v3")
14857
15166
  });
14858
15167
 
14859
15168
  // src/generated/gladia/api/gladiaControlAPI.zod.ts
@@ -15602,7 +15911,7 @@ var preRecordedControllerInitPreRecordedJobV2BodyNamedEntityRecognitionDefault =
15602
15911
  var preRecordedControllerInitPreRecordedJobV2BodyCustomSpellingDefault = false;
15603
15912
  var preRecordedControllerInitPreRecordedJobV2BodySentimentAnalysisDefault = false;
15604
15913
  var preRecordedControllerInitPreRecordedJobV2BodyAudioToLlmDefault = false;
15605
- var preRecordedControllerInitPreRecordedJobV2BodyAudioToLlmConfigModelDefault = "openai/gpt-3.5-turbo";
15914
+ var preRecordedControllerInitPreRecordedJobV2BodyAudioToLlmConfigModelDefault = "openai/gpt-5.4-nano";
15606
15915
  var preRecordedControllerInitPreRecordedJobV2BodyPiiRedactionDefault = false;
15607
15916
  var preRecordedControllerInitPreRecordedJobV2BodySentencesDefault = false;
15608
15917
  var preRecordedControllerInitPreRecordedJobV2BodyPunctuationEnhancedDefault = false;
@@ -15891,23 +16200,23 @@ var preRecordedControllerInitPreRecordedJobV2Body = import_zod5.z.object({
15891
16200
  "Forces the translation to use informal language forms when available in the target language."
15892
16201
  )
15893
16202
  }).optional().describe("**[Beta]** Translation configuration, if `translation` is enabled"),
15894
- summarization: import_zod5.z.boolean().optional().describe("**[Beta]** Enable summarization for this audio"),
16203
+ summarization: import_zod5.z.boolean().optional().describe("Enable summarization for this audio"),
15895
16204
  summarization_config: import_zod5.z.object({
15896
16205
  type: import_zod5.z.enum(["general", "bullet_points", "concise"]).describe("The type of summarization to apply").default(preRecordedControllerInitPreRecordedJobV2BodySummarizationConfigTypeDefault).describe("The type of summarization to apply")
15897
- }).optional().describe("**[Beta]** Summarization configuration, if `summarization` is enabled"),
16206
+ }).optional().describe("Summarization configuration, if `summarization` is enabled"),
15898
16207
  named_entity_recognition: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable named entity recognition for this audio"),
15899
16208
  custom_spelling: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable custom spelling for this audio"),
15900
16209
  custom_spelling_config: import_zod5.z.object({
15901
16210
  spelling_dictionary: import_zod5.z.record(import_zod5.z.string(), import_zod5.z.array(import_zod5.z.string())).describe("The list of spelling applied on the audio transcription")
15902
16211
  }).optional().describe("**[Alpha]** Custom spelling configuration, if `custom_spelling` is enabled"),
15903
16212
  sentiment_analysis: import_zod5.z.boolean().optional().describe("Enable sentiment analysis for this audio"),
15904
- audio_to_llm: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable audio to llm processing for this audio"),
16213
+ audio_to_llm: import_zod5.z.boolean().optional().describe("Enable audio to LLM processing for this audio"),
15905
16214
  audio_to_llm_config: import_zod5.z.object({
15906
16215
  prompts: import_zod5.z.array(import_zod5.z.array(import_zod5.z.unknown())).min(1).describe("The list of prompts applied on the audio transcription"),
15907
16216
  model: import_zod5.z.string().default(preRecordedControllerInitPreRecordedJobV2BodyAudioToLlmConfigModelDefault).describe(
15908
16217
  "The model to use for the prompt execution. You can find the list of supported models [here](https://openrouter.ai/models)."
15909
16218
  )
15910
- }).optional().describe("**[Alpha]** Audio to llm configuration, if `audio_to_llm` is enabled"),
16219
+ }).optional().describe("Audio to LLM configuration, if `audio_to_llm` is enabled"),
15911
16220
  pii_redaction: import_zod5.z.boolean().optional().describe("Enable PII redaction for this audio"),
15912
16221
  pii_redaction_config: import_zod5.z.object({
15913
16222
  entity_types: import_zod5.z.enum([
@@ -16162,7 +16471,7 @@ var preRecordedControllerGetPreRecordedJobsV2ResponseItemsItemRequestParamsNamed
16162
16471
  var preRecordedControllerGetPreRecordedJobsV2ResponseItemsItemRequestParamsCustomSpellingDefault = false;
16163
16472
  var preRecordedControllerGetPreRecordedJobsV2ResponseItemsItemRequestParamsSentimentAnalysisDefault = false;
16164
16473
  var preRecordedControllerGetPreRecordedJobsV2ResponseItemsItemRequestParamsAudioToLlmDefault = false;
16165
- var preRecordedControllerGetPreRecordedJobsV2ResponseItemsItemRequestParamsAudioToLlmConfigModelDefault = "openai/gpt-3.5-turbo";
16474
+ var preRecordedControllerGetPreRecordedJobsV2ResponseItemsItemRequestParamsAudioToLlmConfigModelDefault = "openai/gpt-5.4-nano";
16166
16475
  var preRecordedControllerGetPreRecordedJobsV2ResponseItemsItemRequestParamsPiiRedactionDefault = false;
16167
16476
  var preRecordedControllerGetPreRecordedJobsV2ResponseItemsItemRequestParamsSentencesDefault = false;
16168
16477
  var preRecordedControllerGetPreRecordedJobsV2ResponseItemsItemRequestParamsPunctuationEnhancedDefault = false;
@@ -16510,12 +16819,12 @@ var preRecordedControllerGetPreRecordedJobsV2Response = import_zod5.z.object({
16510
16819
  "Forces the translation to use informal language forms when available in the target language."
16511
16820
  )
16512
16821
  }).optional().describe("**[Beta]** Translation configuration, if `translation` is enabled"),
16513
- summarization: import_zod5.z.boolean().optional().describe("**[Beta]** Enable summarization for this audio"),
16822
+ summarization: import_zod5.z.boolean().optional().describe("Enable summarization for this audio"),
16514
16823
  summarization_config: import_zod5.z.object({
16515
16824
  type: import_zod5.z.enum(["general", "bullet_points", "concise"]).describe("The type of summarization to apply").default(
16516
16825
  preRecordedControllerGetPreRecordedJobsV2ResponseItemsItemRequestParamsSummarizationConfigTypeDefault
16517
16826
  ).describe("The type of summarization to apply")
16518
- }).optional().describe("**[Beta]** Summarization configuration, if `summarization` is enabled"),
16827
+ }).optional().describe("Summarization configuration, if `summarization` is enabled"),
16519
16828
  named_entity_recognition: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable named entity recognition for this audio"),
16520
16829
  custom_spelling: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable custom spelling for this audio"),
16521
16830
  custom_spelling_config: import_zod5.z.object({
@@ -16524,7 +16833,7 @@ var preRecordedControllerGetPreRecordedJobsV2Response = import_zod5.z.object({
16524
16833
  "**[Alpha]** Custom spelling configuration, if `custom_spelling` is enabled"
16525
16834
  ),
16526
16835
  sentiment_analysis: import_zod5.z.boolean().optional().describe("Enable sentiment analysis for this audio"),
16527
- audio_to_llm: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable audio to llm processing for this audio"),
16836
+ audio_to_llm: import_zod5.z.boolean().optional().describe("Enable audio to LLM processing for this audio"),
16528
16837
  audio_to_llm_config: import_zod5.z.object({
16529
16838
  prompts: import_zod5.z.array(import_zod5.z.array(import_zod5.z.unknown())).min(1).describe("The list of prompts applied on the audio transcription"),
16530
16839
  model: import_zod5.z.string().default(
@@ -16532,7 +16841,7 @@ var preRecordedControllerGetPreRecordedJobsV2Response = import_zod5.z.object({
16532
16841
  ).describe(
16533
16842
  "The model to use for the prompt execution. You can find the list of supported models [here](https://openrouter.ai/models)."
16534
16843
  )
16535
- }).optional().describe("**[Alpha]** Audio to llm configuration, if `audio_to_llm` is enabled"),
16844
+ }).optional().describe("Audio to LLM configuration, if `audio_to_llm` is enabled"),
16536
16845
  pii_redaction: import_zod5.z.boolean().optional().describe("Enable PII redaction for this audio"),
16537
16846
  pii_redaction_config: import_zod5.z.object({
16538
16847
  entity_types: import_zod5.z.enum([
@@ -17669,7 +17978,7 @@ var preRecordedControllerGetPreRecordedJobV2ResponseRequestParamsNamedEntityReco
17669
17978
  var preRecordedControllerGetPreRecordedJobV2ResponseRequestParamsCustomSpellingDefault = false;
17670
17979
  var preRecordedControllerGetPreRecordedJobV2ResponseRequestParamsSentimentAnalysisDefault = false;
17671
17980
  var preRecordedControllerGetPreRecordedJobV2ResponseRequestParamsAudioToLlmDefault = false;
17672
- var preRecordedControllerGetPreRecordedJobV2ResponseRequestParamsAudioToLlmConfigModelDefault = "openai/gpt-3.5-turbo";
17981
+ var preRecordedControllerGetPreRecordedJobV2ResponseRequestParamsAudioToLlmConfigModelDefault = "openai/gpt-5.4-nano";
17673
17982
  var preRecordedControllerGetPreRecordedJobV2ResponseRequestParamsPiiRedactionDefault = false;
17674
17983
  var preRecordedControllerGetPreRecordedJobV2ResponseRequestParamsSentencesDefault = false;
17675
17984
  var preRecordedControllerGetPreRecordedJobV2ResponseRequestParamsPunctuationEnhancedDefault = false;
@@ -18010,19 +18319,19 @@ var preRecordedControllerGetPreRecordedJobV2Response = import_zod5.z.object({
18010
18319
  "Forces the translation to use informal language forms when available in the target language."
18011
18320
  )
18012
18321
  }).optional().describe("**[Beta]** Translation configuration, if `translation` is enabled"),
18013
- summarization: import_zod5.z.boolean().optional().describe("**[Beta]** Enable summarization for this audio"),
18322
+ summarization: import_zod5.z.boolean().optional().describe("Enable summarization for this audio"),
18014
18323
  summarization_config: import_zod5.z.object({
18015
18324
  type: import_zod5.z.enum(["general", "bullet_points", "concise"]).describe("The type of summarization to apply").default(
18016
18325
  preRecordedControllerGetPreRecordedJobV2ResponseRequestParamsSummarizationConfigTypeDefault
18017
18326
  ).describe("The type of summarization to apply")
18018
- }).optional().describe("**[Beta]** Summarization configuration, if `summarization` is enabled"),
18327
+ }).optional().describe("Summarization configuration, if `summarization` is enabled"),
18019
18328
  named_entity_recognition: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable named entity recognition for this audio"),
18020
18329
  custom_spelling: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable custom spelling for this audio"),
18021
18330
  custom_spelling_config: import_zod5.z.object({
18022
18331
  spelling_dictionary: import_zod5.z.record(import_zod5.z.string(), import_zod5.z.array(import_zod5.z.string())).describe("The list of spelling applied on the audio transcription")
18023
18332
  }).optional().describe("**[Alpha]** Custom spelling configuration, if `custom_spelling` is enabled"),
18024
18333
  sentiment_analysis: import_zod5.z.boolean().optional().describe("Enable sentiment analysis for this audio"),
18025
- audio_to_llm: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable audio to llm processing for this audio"),
18334
+ audio_to_llm: import_zod5.z.boolean().optional().describe("Enable audio to LLM processing for this audio"),
18026
18335
  audio_to_llm_config: import_zod5.z.object({
18027
18336
  prompts: import_zod5.z.array(import_zod5.z.array(import_zod5.z.unknown())).min(1).describe("The list of prompts applied on the audio transcription"),
18028
18337
  model: import_zod5.z.string().default(
@@ -18030,7 +18339,7 @@ var preRecordedControllerGetPreRecordedJobV2Response = import_zod5.z.object({
18030
18339
  ).describe(
18031
18340
  "The model to use for the prompt execution. You can find the list of supported models [here](https://openrouter.ai/models)."
18032
18341
  )
18033
- }).optional().describe("**[Alpha]** Audio to llm configuration, if `audio_to_llm` is enabled"),
18342
+ }).optional().describe("Audio to LLM configuration, if `audio_to_llm` is enabled"),
18034
18343
  pii_redaction: import_zod5.z.boolean().optional().describe("Enable PII redaction for this audio"),
18035
18344
  pii_redaction_config: import_zod5.z.object({
18036
18345
  entity_types: import_zod5.z.enum([
@@ -19143,7 +19452,7 @@ var transcriptionControllerInitPreRecordedJobV2BodyNamedEntityRecognitionDefault
19143
19452
  var transcriptionControllerInitPreRecordedJobV2BodyCustomSpellingDefault = false;
19144
19453
  var transcriptionControllerInitPreRecordedJobV2BodySentimentAnalysisDefault = false;
19145
19454
  var transcriptionControllerInitPreRecordedJobV2BodyAudioToLlmDefault = false;
19146
- var transcriptionControllerInitPreRecordedJobV2BodyAudioToLlmConfigModelDefault = "openai/gpt-3.5-turbo";
19455
+ var transcriptionControllerInitPreRecordedJobV2BodyAudioToLlmConfigModelDefault = "openai/gpt-5.4-nano";
19147
19456
  var transcriptionControllerInitPreRecordedJobV2BodyPiiRedactionDefault = false;
19148
19457
  var transcriptionControllerInitPreRecordedJobV2BodySentencesDefault = false;
19149
19458
  var transcriptionControllerInitPreRecordedJobV2BodyPunctuationEnhancedDefault = false;
@@ -19436,23 +19745,23 @@ var transcriptionControllerInitPreRecordedJobV2Body = import_zod5.z.object({
19436
19745
  "Forces the translation to use informal language forms when available in the target language."
19437
19746
  )
19438
19747
  }).optional().describe("**[Beta]** Translation configuration, if `translation` is enabled"),
19439
- summarization: import_zod5.z.boolean().optional().describe("**[Beta]** Enable summarization for this audio"),
19748
+ summarization: import_zod5.z.boolean().optional().describe("Enable summarization for this audio"),
19440
19749
  summarization_config: import_zod5.z.object({
19441
19750
  type: import_zod5.z.enum(["general", "bullet_points", "concise"]).describe("The type of summarization to apply").default(transcriptionControllerInitPreRecordedJobV2BodySummarizationConfigTypeDefault).describe("The type of summarization to apply")
19442
- }).optional().describe("**[Beta]** Summarization configuration, if `summarization` is enabled"),
19751
+ }).optional().describe("Summarization configuration, if `summarization` is enabled"),
19443
19752
  named_entity_recognition: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable named entity recognition for this audio"),
19444
19753
  custom_spelling: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable custom spelling for this audio"),
19445
19754
  custom_spelling_config: import_zod5.z.object({
19446
19755
  spelling_dictionary: import_zod5.z.record(import_zod5.z.string(), import_zod5.z.array(import_zod5.z.string())).describe("The list of spelling applied on the audio transcription")
19447
19756
  }).optional().describe("**[Alpha]** Custom spelling configuration, if `custom_spelling` is enabled"),
19448
19757
  sentiment_analysis: import_zod5.z.boolean().optional().describe("Enable sentiment analysis for this audio"),
19449
- audio_to_llm: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable audio to llm processing for this audio"),
19758
+ audio_to_llm: import_zod5.z.boolean().optional().describe("Enable audio to LLM processing for this audio"),
19450
19759
  audio_to_llm_config: import_zod5.z.object({
19451
19760
  prompts: import_zod5.z.array(import_zod5.z.array(import_zod5.z.unknown())).min(1).describe("The list of prompts applied on the audio transcription"),
19452
19761
  model: import_zod5.z.string().default(transcriptionControllerInitPreRecordedJobV2BodyAudioToLlmConfigModelDefault).describe(
19453
19762
  "The model to use for the prompt execution. You can find the list of supported models [here](https://openrouter.ai/models)."
19454
19763
  )
19455
- }).optional().describe("**[Alpha]** Audio to llm configuration, if `audio_to_llm` is enabled"),
19764
+ }).optional().describe("Audio to LLM configuration, if `audio_to_llm` is enabled"),
19456
19765
  pii_redaction: import_zod5.z.boolean().optional().describe("Enable PII redaction for this audio"),
19457
19766
  pii_redaction_config: import_zod5.z.object({
19458
19767
  entity_types: import_zod5.z.enum([
@@ -19710,7 +20019,7 @@ var transcriptionControllerListV2ResponseItemsItemRequestParamsNamedEntityRecogn
19710
20019
  var transcriptionControllerListV2ResponseItemsItemRequestParamsCustomSpellingDefault = false;
19711
20020
  var transcriptionControllerListV2ResponseItemsItemRequestParamsSentimentAnalysisDefault = false;
19712
20021
  var transcriptionControllerListV2ResponseItemsItemRequestParamsAudioToLlmDefault = false;
19713
- var transcriptionControllerListV2ResponseItemsItemRequestParamsAudioToLlmConfigModelDefault = "openai/gpt-3.5-turbo";
20022
+ var transcriptionControllerListV2ResponseItemsItemRequestParamsAudioToLlmConfigModelDefault = "openai/gpt-5.4-nano";
19714
20023
  var transcriptionControllerListV2ResponseItemsItemRequestParamsPiiRedactionDefault = false;
19715
20024
  var transcriptionControllerListV2ResponseItemsItemRequestParamsSentencesDefault = false;
19716
20025
  var transcriptionControllerListV2ResponseItemsItemRequestParamsPunctuationEnhancedDefault = false;
@@ -20121,12 +20430,12 @@ var transcriptionControllerListV2Response = import_zod5.z.object({
20121
20430
  "Forces the translation to use informal language forms when available in the target language."
20122
20431
  )
20123
20432
  }).optional().describe("**[Beta]** Translation configuration, if `translation` is enabled"),
20124
- summarization: import_zod5.z.boolean().optional().describe("**[Beta]** Enable summarization for this audio"),
20433
+ summarization: import_zod5.z.boolean().optional().describe("Enable summarization for this audio"),
20125
20434
  summarization_config: import_zod5.z.object({
20126
20435
  type: import_zod5.z.enum(["general", "bullet_points", "concise"]).describe("The type of summarization to apply").default(
20127
20436
  transcriptionControllerListV2ResponseItemsItemRequestParamsSummarizationConfigTypeDefault
20128
20437
  ).describe("The type of summarization to apply")
20129
- }).optional().describe("**[Beta]** Summarization configuration, if `summarization` is enabled"),
20438
+ }).optional().describe("Summarization configuration, if `summarization` is enabled"),
20130
20439
  named_entity_recognition: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable named entity recognition for this audio"),
20131
20440
  custom_spelling: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable custom spelling for this audio"),
20132
20441
  custom_spelling_config: import_zod5.z.object({
@@ -20135,7 +20444,7 @@ var transcriptionControllerListV2Response = import_zod5.z.object({
20135
20444
  "**[Alpha]** Custom spelling configuration, if `custom_spelling` is enabled"
20136
20445
  ),
20137
20446
  sentiment_analysis: import_zod5.z.boolean().optional().describe("Enable sentiment analysis for this audio"),
20138
- audio_to_llm: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable audio to llm processing for this audio"),
20447
+ audio_to_llm: import_zod5.z.boolean().optional().describe("Enable audio to LLM processing for this audio"),
20139
20448
  audio_to_llm_config: import_zod5.z.object({
20140
20449
  prompts: import_zod5.z.array(import_zod5.z.array(import_zod5.z.unknown())).min(1).describe("The list of prompts applied on the audio transcription"),
20141
20450
  model: import_zod5.z.string().default(
@@ -20143,7 +20452,7 @@ var transcriptionControllerListV2Response = import_zod5.z.object({
20143
20452
  ).describe(
20144
20453
  "The model to use for the prompt execution. You can find the list of supported models [here](https://openrouter.ai/models)."
20145
20454
  )
20146
- }).optional().describe("**[Alpha]** Audio to llm configuration, if `audio_to_llm` is enabled"),
20455
+ }).optional().describe("Audio to LLM configuration, if `audio_to_llm` is enabled"),
20147
20456
  pii_redaction: import_zod5.z.boolean().optional().describe("Enable PII redaction for this audio"),
20148
20457
  pii_redaction_config: import_zod5.z.object({
20149
20458
  entity_types: import_zod5.z.enum([
@@ -22461,7 +22770,7 @@ var transcriptionControllerGetTranscriptV2ResponseRequestParamsNamedEntityRecogn
22461
22770
  var transcriptionControllerGetTranscriptV2ResponseRequestParamsCustomSpellingDefault = false;
22462
22771
  var transcriptionControllerGetTranscriptV2ResponseRequestParamsSentimentAnalysisDefault = false;
22463
22772
  var transcriptionControllerGetTranscriptV2ResponseRequestParamsAudioToLlmDefault = false;
22464
- var transcriptionControllerGetTranscriptV2ResponseRequestParamsAudioToLlmConfigModelDefault = "openai/gpt-3.5-turbo";
22773
+ var transcriptionControllerGetTranscriptV2ResponseRequestParamsAudioToLlmConfigModelDefault = "openai/gpt-5.4-nano";
22465
22774
  var transcriptionControllerGetTranscriptV2ResponseRequestParamsPiiRedactionDefault = false;
22466
22775
  var transcriptionControllerGetTranscriptV2ResponseRequestParamsSentencesDefault = false;
22467
22776
  var transcriptionControllerGetTranscriptV2ResponseRequestParamsPunctuationEnhancedDefault = false;
@@ -22866,19 +23175,19 @@ var transcriptionControllerGetTranscriptV2Response = import_zod5.z.discriminated
22866
23175
  "Forces the translation to use informal language forms when available in the target language."
22867
23176
  )
22868
23177
  }).optional().describe("**[Beta]** Translation configuration, if `translation` is enabled"),
22869
- summarization: import_zod5.z.boolean().optional().describe("**[Beta]** Enable summarization for this audio"),
23178
+ summarization: import_zod5.z.boolean().optional().describe("Enable summarization for this audio"),
22870
23179
  summarization_config: import_zod5.z.object({
22871
23180
  type: import_zod5.z.enum(["general", "bullet_points", "concise"]).describe("The type of summarization to apply").default(
22872
23181
  transcriptionControllerGetTranscriptV2ResponseRequestParamsSummarizationConfigTypeDefault
22873
23182
  ).describe("The type of summarization to apply")
22874
- }).optional().describe("**[Beta]** Summarization configuration, if `summarization` is enabled"),
23183
+ }).optional().describe("Summarization configuration, if `summarization` is enabled"),
22875
23184
  named_entity_recognition: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable named entity recognition for this audio"),
22876
23185
  custom_spelling: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable custom spelling for this audio"),
22877
23186
  custom_spelling_config: import_zod5.z.object({
22878
23187
  spelling_dictionary: import_zod5.z.record(import_zod5.z.string(), import_zod5.z.array(import_zod5.z.string())).describe("The list of spelling applied on the audio transcription")
22879
23188
  }).optional().describe("**[Alpha]** Custom spelling configuration, if `custom_spelling` is enabled"),
22880
23189
  sentiment_analysis: import_zod5.z.boolean().optional().describe("Enable sentiment analysis for this audio"),
22881
- audio_to_llm: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable audio to llm processing for this audio"),
23190
+ audio_to_llm: import_zod5.z.boolean().optional().describe("Enable audio to LLM processing for this audio"),
22882
23191
  audio_to_llm_config: import_zod5.z.object({
22883
23192
  prompts: import_zod5.z.array(import_zod5.z.array(import_zod5.z.unknown())).min(1).describe("The list of prompts applied on the audio transcription"),
22884
23193
  model: import_zod5.z.string().default(
@@ -22886,7 +23195,7 @@ var transcriptionControllerGetTranscriptV2Response = import_zod5.z.discriminated
22886
23195
  ).describe(
22887
23196
  "The model to use for the prompt execution. You can find the list of supported models [here](https://openrouter.ai/models)."
22888
23197
  )
22889
- }).optional().describe("**[Alpha]** Audio to llm configuration, if `audio_to_llm` is enabled"),
23198
+ }).optional().describe("Audio to LLM configuration, if `audio_to_llm` is enabled"),
22890
23199
  pii_redaction: import_zod5.z.boolean().optional().describe("Enable PII redaction for this audio"),
22891
23200
  pii_redaction_config: import_zod5.z.object({
22892
23201
  entity_types: import_zod5.z.enum([
@@ -25598,7 +25907,7 @@ var historyControllerGetListV1ResponseItemsItemRequestParamsNamedEntityRecogniti
25598
25907
  var historyControllerGetListV1ResponseItemsItemRequestParamsCustomSpellingDefault = false;
25599
25908
  var historyControllerGetListV1ResponseItemsItemRequestParamsSentimentAnalysisDefault = false;
25600
25909
  var historyControllerGetListV1ResponseItemsItemRequestParamsAudioToLlmDefault = false;
25601
- var historyControllerGetListV1ResponseItemsItemRequestParamsAudioToLlmConfigModelDefault = "openai/gpt-3.5-turbo";
25910
+ var historyControllerGetListV1ResponseItemsItemRequestParamsAudioToLlmConfigModelDefault = "openai/gpt-5.4-nano";
25602
25911
  var historyControllerGetListV1ResponseItemsItemRequestParamsPiiRedactionDefault = false;
25603
25912
  var historyControllerGetListV1ResponseItemsItemRequestParamsSentencesDefault = false;
25604
25913
  var historyControllerGetListV1ResponseItemsItemRequestParamsPunctuationEnhancedDefault = false;
@@ -26009,12 +26318,12 @@ var historyControllerGetListV1Response = import_zod5.z.object({
26009
26318
  "Forces the translation to use informal language forms when available in the target language."
26010
26319
  )
26011
26320
  }).optional().describe("**[Beta]** Translation configuration, if `translation` is enabled"),
26012
- summarization: import_zod5.z.boolean().optional().describe("**[Beta]** Enable summarization for this audio"),
26321
+ summarization: import_zod5.z.boolean().optional().describe("Enable summarization for this audio"),
26013
26322
  summarization_config: import_zod5.z.object({
26014
26323
  type: import_zod5.z.enum(["general", "bullet_points", "concise"]).describe("The type of summarization to apply").default(
26015
26324
  historyControllerGetListV1ResponseItemsItemRequestParamsSummarizationConfigTypeDefault
26016
26325
  ).describe("The type of summarization to apply")
26017
- }).optional().describe("**[Beta]** Summarization configuration, if `summarization` is enabled"),
26326
+ }).optional().describe("Summarization configuration, if `summarization` is enabled"),
26018
26327
  named_entity_recognition: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable named entity recognition for this audio"),
26019
26328
  custom_spelling: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable custom spelling for this audio"),
26020
26329
  custom_spelling_config: import_zod5.z.object({
@@ -26023,7 +26332,7 @@ var historyControllerGetListV1Response = import_zod5.z.object({
26023
26332
  "**[Alpha]** Custom spelling configuration, if `custom_spelling` is enabled"
26024
26333
  ),
26025
26334
  sentiment_analysis: import_zod5.z.boolean().optional().describe("Enable sentiment analysis for this audio"),
26026
- audio_to_llm: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable audio to llm processing for this audio"),
26335
+ audio_to_llm: import_zod5.z.boolean().optional().describe("Enable audio to LLM processing for this audio"),
26027
26336
  audio_to_llm_config: import_zod5.z.object({
26028
26337
  prompts: import_zod5.z.array(import_zod5.z.array(import_zod5.z.unknown())).min(1).describe("The list of prompts applied on the audio transcription"),
26029
26338
  model: import_zod5.z.string().default(
@@ -26031,7 +26340,7 @@ var historyControllerGetListV1Response = import_zod5.z.object({
26031
26340
  ).describe(
26032
26341
  "The model to use for the prompt execution. You can find the list of supported models [here](https://openrouter.ai/models)."
26033
26342
  )
26034
- }).optional().describe("**[Alpha]** Audio to llm configuration, if `audio_to_llm` is enabled"),
26343
+ }).optional().describe("Audio to LLM configuration, if `audio_to_llm` is enabled"),
26035
26344
  pii_redaction: import_zod5.z.boolean().optional().describe("Enable PII redaction for this audio"),
26036
26345
  pii_redaction_config: import_zod5.z.object({
26037
26346
  entity_types: import_zod5.z.enum([
@@ -31276,6 +31585,7 @@ __export(openAIAudioRealtimeAPI_zod_exports, {
31276
31585
  createRealtimeClientSecretBodySessionPromptVariablesTypeDefault: () => createRealtimeClientSecretBodySessionPromptVariablesTypeDefault,
31277
31586
  createRealtimeClientSecretBodySessionPromptVariablesTypeDefaultOne: () => createRealtimeClientSecretBodySessionPromptVariablesTypeDefaultOne,
31278
31587
  createRealtimeClientSecretBodySessionPromptVariablesTypeDefaultTwo: () => createRealtimeClientSecretBodySessionPromptVariablesTypeDefaultTwo,
31588
+ createRealtimeClientSecretBodySessionReasoningEffortDefault: () => createRealtimeClientSecretBodySessionReasoningEffortDefault,
31279
31589
  createRealtimeClientSecretBodySessionToolChoiceDefault: () => createRealtimeClientSecretBodySessionToolChoiceDefault,
31280
31590
  createRealtimeClientSecretBodySessionToolsItemRequireApprovalDefaultOne: () => createRealtimeClientSecretBodySessionToolsItemRequireApprovalDefaultOne,
31281
31591
  createRealtimeClientSecretBodySessionTracingDefault: () => createRealtimeClientSecretBodySessionTracingDefault,
@@ -31300,6 +31610,7 @@ __export(openAIAudioRealtimeAPI_zod_exports, {
31300
31610
  createRealtimeClientSecretResponseSessionPromptVariablesTypeDefault: () => createRealtimeClientSecretResponseSessionPromptVariablesTypeDefault,
31301
31611
  createRealtimeClientSecretResponseSessionPromptVariablesTypeDefaultOne: () => createRealtimeClientSecretResponseSessionPromptVariablesTypeDefaultOne,
31302
31612
  createRealtimeClientSecretResponseSessionPromptVariablesTypeDefaultTwo: () => createRealtimeClientSecretResponseSessionPromptVariablesTypeDefaultTwo,
31613
+ createRealtimeClientSecretResponseSessionReasoningEffortDefault: () => createRealtimeClientSecretResponseSessionReasoningEffortDefault,
31303
31614
  createRealtimeClientSecretResponseSessionToolChoiceDefault: () => createRealtimeClientSecretResponseSessionToolChoiceDefault,
31304
31615
  createRealtimeClientSecretResponseSessionToolsItemRequireApprovalDefaultOne: () => createRealtimeClientSecretResponseSessionToolsItemRequireApprovalDefaultOne,
31305
31616
  createRealtimeClientSecretResponseSessionTracingDefaultOne: () => createRealtimeClientSecretResponseSessionTracingDefaultOne,
@@ -31656,6 +31967,7 @@ var createRealtimeClientSecretBodySessionTracingDefaultOne = "auto";
31656
31967
  var createRealtimeClientSecretBodySessionTracingDefault = null;
31657
31968
  var createRealtimeClientSecretBodySessionToolsItemRequireApprovalDefaultOne = "always";
31658
31969
  var createRealtimeClientSecretBodySessionToolChoiceDefault = "auto";
31970
+ var createRealtimeClientSecretBodySessionReasoningEffortDefault = "low";
31659
31971
  var createRealtimeClientSecretBodySessionTruncationRetentionRatioMin = 0;
31660
31972
  var createRealtimeClientSecretBodySessionTruncationRetentionRatioMax = 1;
31661
31973
  var createRealtimeClientSecretBodySessionTruncationTokenLimitsPostInstructionsMin = 0;
@@ -31691,6 +32003,7 @@ var createRealtimeClientSecretBody = import_zod6.z.object({
31691
32003
  import_zod6.z.enum([
31692
32004
  "gpt-realtime",
31693
32005
  "gpt-realtime-1.5",
32006
+ "gpt-realtime-2",
31694
32007
  "gpt-realtime-2025-08-28",
31695
32008
  "gpt-4o-realtime-preview",
31696
32009
  "gpt-4o-realtime-preview-2024-10-01",
@@ -31731,16 +32044,20 @@ var createRealtimeClientSecretBody = import_zod6.z.object({
31731
32044
  "gpt-4o-mini-transcribe",
31732
32045
  "gpt-4o-mini-transcribe-2025-12-15",
31733
32046
  "gpt-4o-transcribe",
31734
- "gpt-4o-transcribe-diarize"
32047
+ "gpt-4o-transcribe-diarize",
32048
+ "gpt-realtime-whisper"
31735
32049
  ])
31736
32050
  ).optional().describe(
31737
- "The model to use for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`, and `gpt-4o-transcribe-diarize`. Use `gpt-4o-transcribe-diarize` when you need diarization with speaker labels.\n"
32051
+ "The model to use for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`, `gpt-4o-transcribe-diarize`, and `gpt-realtime-whisper`. Use `gpt-4o-transcribe-diarize` when you need diarization with speaker labels.\n"
31738
32052
  ),
31739
32053
  language: import_zod6.z.string().optional().describe(
31740
32054
  "The language of the input audio. Supplying the input language in\n[ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`) format\nwill improve accuracy and latency.\n"
31741
32055
  ),
31742
32056
  prompt: import_zod6.z.string().optional().describe(
31743
- 'An optional text to guide the model\'s style or continue a previous audio\nsegment.\nFor `whisper-1`, the [prompt is a list of keywords](/docs/guides/speech-to-text#prompting).\nFor `gpt-4o-transcribe` models (excluding `gpt-4o-transcribe-diarize`), the prompt is a free text string, for example "expect words related to technology".\n'
32057
+ 'An optional text to guide the model\'s style or continue a previous audio\nsegment.\nFor `whisper-1`, the [prompt is a list of keywords](/docs/guides/speech-to-text#prompting).\nFor `gpt-4o-transcribe` models (excluding `gpt-4o-transcribe-diarize`), the prompt is a free text string, for example "expect words related to technology".\nPrompt is not supported with `gpt-realtime-whisper` in GA Realtime sessions.\n'
32058
+ ),
32059
+ delay: import_zod6.z.enum(["minimal", "low", "medium", "high", "xhigh"]).optional().describe(
32060
+ "Controls how long the model waits before emitting transcription text.\nHigher values can improve transcription accuracy at the cost of latency.\nOnly supported with `gpt-realtime-whisper` in GA Realtime sessions.\n"
31744
32061
  )
31745
32062
  }).optional(),
31746
32063
  noise_reduction: import_zod6.z.object({
@@ -31807,7 +32124,7 @@ var createRealtimeClientSecretBody = import_zod6.z.object({
31807
32124
  "Server-side semantic turn detection which uses a model to determine when the user has finished speaking."
31808
32125
  )
31809
32126
  ]).describe(
31810
- 'Configuration for turn detection, ether Server VAD or Semantic VAD. This can be set to `null` to turn off, in which case the client must manually trigger model response.\n\nServer VAD means that the model will detect the start and end of speech based on audio volume and respond at the end of user speech.\n\nSemantic VAD is more advanced and uses a turn detection model (in conjunction with VAD) to semantically estimate whether the user has finished speaking, then dynamically sets a timeout based on this probability. For example, if user audio trails off with "uhhm", the model will score a low probability of turn end and wait longer for the user to continue speaking. This can be useful for more natural conversations, but may have a higher latency.\n'
32127
+ 'Configuration for turn detection, ether Server VAD or Semantic VAD. This can be set to `null` to turn off, in which case the client must manually trigger model response.\n\nServer VAD means that the model will detect the start and end of speech based on audio volume and respond at the end of user speech.\n\nSemantic VAD is more advanced and uses a turn detection model (in conjunction with VAD) to semantically estimate whether the user has finished speaking, then dynamically sets a timeout based on this probability. For example, if user audio trails off with "uhhm", the model will score a low probability of turn end and wait longer for the user to continue speaking. This can be useful for more natural conversations, but may have a higher latency.\n\nFor `gpt-realtime-whisper` transcription sessions, turn detection must be\nset to `null`; VAD is not supported.\n'
31811
32128
  ).or(import_zod6.z.null()).optional()
31812
32129
  }).optional(),
31813
32130
  output: import_zod6.z.object({
@@ -31880,7 +32197,7 @@ var createRealtimeClientSecretBody = import_zod6.z.object({
31880
32197
  server_label: import_zod6.z.string().describe(
31881
32198
  "A label for this MCP server, used to identify it in tool calls.\n"
31882
32199
  ),
31883
- server_url: import_zod6.z.string().optional().describe(
32200
+ server_url: import_zod6.z.string().url().optional().describe(
31884
32201
  "The URL for the MCP server. One of `server_url` or `connector_id` must be\nprovided.\n"
31885
32202
  ),
31886
32203
  connector_id: import_zod6.z.enum([
@@ -31958,6 +32275,16 @@ var createRealtimeClientSecretBody = import_zod6.z.object({
31958
32275
  ).default(createRealtimeClientSecretBodySessionToolChoiceDefault).describe(
31959
32276
  "How the model chooses tools. Provide one of the string modes or force a specific\nfunction/MCP tool.\n"
31960
32277
  ),
32278
+ parallel_tool_calls: import_zod6.z.boolean().optional().describe(
32279
+ "Whether the model may call multiple tools in parallel. Only supported by\nreasoning Realtime models such as `gpt-realtime-2`.\n"
32280
+ ),
32281
+ reasoning: import_zod6.z.object({
32282
+ effort: import_zod6.z.enum(["minimal", "low", "medium", "high", "xhigh"]).default(createRealtimeClientSecretBodySessionReasoningEffortDefault).describe(
32283
+ "Constrains effort on reasoning for reasoning-capable Realtime models such as\n`gpt-realtime-2`.\n"
32284
+ )
32285
+ }).optional().describe(
32286
+ "Configuration for reasoning-capable Realtime models such as `gpt-realtime-2`.\n"
32287
+ ),
31961
32288
  max_output_tokens: import_zod6.z.number().or(import_zod6.z.enum(["inf"])).optional().describe(
31962
32289
  "Maximum number of output tokens for a single assistant response,\ninclusive of tool calls. Provide an integer between 1 and 4096 to\nlimit output tokens, or `inf` for the maximum available tokens for a\ngiven model. Defaults to `inf`.\n"
31963
32290
  ),
@@ -31997,7 +32324,7 @@ var createRealtimeClientSecretBody = import_zod6.z.object({
31997
32324
  ).or(
31998
32325
  import_zod6.z.object({
31999
32326
  type: import_zod6.z.enum(["input_image"]).describe("The type of the input item. Always `input_image`."),
32000
- image_url: import_zod6.z.string().describe(
32327
+ image_url: import_zod6.z.string().url().describe(
32001
32328
  "The URL of the image to be sent to the model. A fully qualified URL or base64 encoded image in a data URL."
32002
32329
  ).or(import_zod6.z.null()).optional(),
32003
32330
  file_id: import_zod6.z.string().describe("The ID of the file to be sent to the model.").or(import_zod6.z.null()).optional(),
@@ -32011,7 +32338,7 @@ var createRealtimeClientSecretBody = import_zod6.z.object({
32011
32338
  file_id: import_zod6.z.string().describe("The ID of the file to be sent to the model.").or(import_zod6.z.null()).optional(),
32012
32339
  filename: import_zod6.z.string().optional().describe("The name of the file to be sent to the model."),
32013
32340
  file_data: import_zod6.z.string().optional().describe("The content of the file to be sent to the model.\n"),
32014
- file_url: import_zod6.z.string().optional().describe("The URL of the file to be sent to the model."),
32341
+ file_url: import_zod6.z.string().url().optional().describe("The URL of the file to be sent to the model."),
32015
32342
  detail: import_zod6.z.enum(["low", "high"]).optional()
32016
32343
  }).describe("A file input to the model.")
32017
32344
  )
@@ -32047,16 +32374,20 @@ var createRealtimeClientSecretBody = import_zod6.z.object({
32047
32374
  "gpt-4o-mini-transcribe",
32048
32375
  "gpt-4o-mini-transcribe-2025-12-15",
32049
32376
  "gpt-4o-transcribe",
32050
- "gpt-4o-transcribe-diarize"
32377
+ "gpt-4o-transcribe-diarize",
32378
+ "gpt-realtime-whisper"
32051
32379
  ])
32052
32380
  ).optional().describe(
32053
- "The model to use for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`, and `gpt-4o-transcribe-diarize`. Use `gpt-4o-transcribe-diarize` when you need diarization with speaker labels.\n"
32381
+ "The model to use for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`, `gpt-4o-transcribe-diarize`, and `gpt-realtime-whisper`. Use `gpt-4o-transcribe-diarize` when you need diarization with speaker labels.\n"
32054
32382
  ),
32055
32383
  language: import_zod6.z.string().optional().describe(
32056
32384
  "The language of the input audio. Supplying the input language in\n[ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`) format\nwill improve accuracy and latency.\n"
32057
32385
  ),
32058
32386
  prompt: import_zod6.z.string().optional().describe(
32059
- 'An optional text to guide the model\'s style or continue a previous audio\nsegment.\nFor `whisper-1`, the [prompt is a list of keywords](/docs/guides/speech-to-text#prompting).\nFor `gpt-4o-transcribe` models (excluding `gpt-4o-transcribe-diarize`), the prompt is a free text string, for example "expect words related to technology".\n'
32387
+ 'An optional text to guide the model\'s style or continue a previous audio\nsegment.\nFor `whisper-1`, the [prompt is a list of keywords](/docs/guides/speech-to-text#prompting).\nFor `gpt-4o-transcribe` models (excluding `gpt-4o-transcribe-diarize`), the prompt is a free text string, for example "expect words related to technology".\nPrompt is not supported with `gpt-realtime-whisper` in GA Realtime sessions.\n'
32388
+ ),
32389
+ delay: import_zod6.z.enum(["minimal", "low", "medium", "high", "xhigh"]).optional().describe(
32390
+ "Controls how long the model waits before emitting transcription text.\nHigher values can improve transcription accuracy at the cost of latency.\nOnly supported with `gpt-realtime-whisper` in GA Realtime sessions.\n"
32060
32391
  )
32061
32392
  }).optional(),
32062
32393
  noise_reduction: import_zod6.z.object({
@@ -32123,7 +32454,7 @@ var createRealtimeClientSecretBody = import_zod6.z.object({
32123
32454
  "Server-side semantic turn detection which uses a model to determine when the user has finished speaking."
32124
32455
  )
32125
32456
  ]).describe(
32126
- 'Configuration for turn detection, ether Server VAD or Semantic VAD. This can be set to `null` to turn off, in which case the client must manually trigger model response.\n\nServer VAD means that the model will detect the start and end of speech based on audio volume and respond at the end of user speech.\n\nSemantic VAD is more advanced and uses a turn detection model (in conjunction with VAD) to semantically estimate whether the user has finished speaking, then dynamically sets a timeout based on this probability. For example, if user audio trails off with "uhhm", the model will score a low probability of turn end and wait longer for the user to continue speaking. This can be useful for more natural conversations, but may have a higher latency.\n'
32457
+ 'Configuration for turn detection, ether Server VAD or Semantic VAD. This can be set to `null` to turn off, in which case the client must manually trigger model response.\n\nServer VAD means that the model will detect the start and end of speech based on audio volume and respond at the end of user speech.\n\nSemantic VAD is more advanced and uses a turn detection model (in conjunction with VAD) to semantically estimate whether the user has finished speaking, then dynamically sets a timeout based on this probability. For example, if user audio trails off with "uhhm", the model will score a low probability of turn end and wait longer for the user to continue speaking. This can be useful for more natural conversations, but may have a higher latency.\n\nFor `gpt-realtime-whisper` transcription sessions, turn detection must be\nset to `null`; VAD is not supported.\n'
32127
32458
  ).or(import_zod6.z.null()).optional()
32128
32459
  }).optional()
32129
32460
  }).optional().describe("Configuration for input and output audio.\n"),
@@ -32154,6 +32485,7 @@ var createRealtimeClientSecretResponseSessionTracingDefaultTwo = "auto";
32154
32485
  var createRealtimeClientSecretResponseSessionTracingDefaultOne = null;
32155
32486
  var createRealtimeClientSecretResponseSessionToolsItemRequireApprovalDefaultOne = "always";
32156
32487
  var createRealtimeClientSecretResponseSessionToolChoiceDefault = "auto";
32488
+ var createRealtimeClientSecretResponseSessionReasoningEffortDefault = "low";
32157
32489
  var createRealtimeClientSecretResponseSessionTruncationRetentionRatioMin = 0;
32158
32490
  var createRealtimeClientSecretResponseSessionTruncationRetentionRatioMax = 1;
32159
32491
  var createRealtimeClientSecretResponseSessionTruncationTokenLimitsPostInstructionsMin = 0;
@@ -32163,17 +32495,14 @@ var createRealtimeClientSecretResponseSessionPromptVariablesTypeDefaultTwo = "in
32163
32495
  var createRealtimeClientSecretResponse = import_zod6.z.object({
32164
32496
  value: import_zod6.z.string().describe("The generated client secret value."),
32165
32497
  expires_at: import_zod6.z.number().describe("Expiration timestamp for the client secret, in seconds since epoch."),
32166
- session: import_zod6.z.discriminatedUnion("type", [
32498
+ session: import_zod6.z.union([
32167
32499
  import_zod6.z.object({
32168
- client_secret: import_zod6.z.object({
32169
- value: import_zod6.z.string().describe(
32170
- "Ephemeral key usable in client environments to authenticate connections to the Realtime API. Use this in client-side environments rather than a standard API token, which should only be used server-side.\n"
32171
- ),
32172
- expires_at: import_zod6.z.number().describe(
32173
- "Timestamp for when the token expires. Currently, all tokens expire\nafter one minute.\n"
32174
- )
32175
- }).describe("Ephemeral key returned by the API."),
32176
32500
  type: import_zod6.z.enum(["realtime"]).describe("The type of session to create. Always `realtime` for the Realtime API.\n"),
32501
+ id: import_zod6.z.string().describe(
32502
+ "Unique identifier for the session that looks like `sess_1234567890abcdef`.\n"
32503
+ ),
32504
+ object: import_zod6.z.enum(["realtime.session"]).describe("The object type. Always `realtime.session`."),
32505
+ expires_at: import_zod6.z.number().optional().describe("Expiration timestamp for the session, in seconds since epoch."),
32177
32506
  output_modalities: import_zod6.z.array(import_zod6.z.enum(["text", "audio"])).default(createRealtimeClientSecretResponseSessionOutputModalitiesDefault).describe(
32178
32507
  'The set of modalities the model can respond with. It defaults to `["audio"]`, indicating\nthat the model will respond with audio plus a transcript. `["text"]` can be used to make\nthe model respond with text only. It is not possible to request both `text` and `audio` at the same time.\n'
32179
32508
  ),
@@ -32181,6 +32510,7 @@ var createRealtimeClientSecretResponse = import_zod6.z.object({
32181
32510
  import_zod6.z.enum([
32182
32511
  "gpt-realtime",
32183
32512
  "gpt-realtime-1.5",
32513
+ "gpt-realtime-2",
32184
32514
  "gpt-realtime-2025-08-28",
32185
32515
  "gpt-4o-realtime-preview",
32186
32516
  "gpt-4o-realtime-preview-2024-10-01",
@@ -32203,15 +32533,15 @@ var createRealtimeClientSecretResponse = import_zod6.z.object({
32203
32533
  audio: import_zod6.z.object({
32204
32534
  input: import_zod6.z.object({
32205
32535
  format: import_zod6.z.object({
32206
- type: import_zod6.z.enum(["audio/pcm"]).describe("The audio format. Always `audio/pcm`."),
32207
- rate: import_zod6.z.literal(24e3).describe("The sample rate of the audio. Always `24000`.")
32536
+ type: import_zod6.z.enum(["audio/pcm"]).optional().describe("The audio format. Always `audio/pcm`."),
32537
+ rate: import_zod6.z.literal(24e3).optional().describe("The sample rate of the audio. Always `24000`.")
32208
32538
  }).describe("The PCM audio format. Only a 24kHz sample rate is supported.").or(
32209
32539
  import_zod6.z.object({
32210
- type: import_zod6.z.enum(["audio/pcmu"]).describe("The audio format. Always `audio/pcmu`.")
32540
+ type: import_zod6.z.enum(["audio/pcmu"]).optional().describe("The audio format. Always `audio/pcmu`.")
32211
32541
  }).describe("The G.711 \u03BC-law format.")
32212
32542
  ).or(
32213
32543
  import_zod6.z.object({
32214
- type: import_zod6.z.enum(["audio/pcma"]).describe("The audio format. Always `audio/pcma`.")
32544
+ type: import_zod6.z.enum(["audio/pcma"]).optional().describe("The audio format. Always `audio/pcma`.")
32215
32545
  }).describe("The G.711 A-law format.")
32216
32546
  ).optional(),
32217
32547
  transcription: import_zod6.z.object({
@@ -32221,20 +32551,19 @@ var createRealtimeClientSecretResponse = import_zod6.z.object({
32221
32551
  "gpt-4o-mini-transcribe",
32222
32552
  "gpt-4o-mini-transcribe-2025-12-15",
32223
32553
  "gpt-4o-transcribe",
32224
- "gpt-4o-transcribe-diarize"
32554
+ "gpt-4o-transcribe-diarize",
32555
+ "gpt-realtime-whisper"
32225
32556
  ])
32226
32557
  ).optional().describe(
32227
- "The model to use for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`, and `gpt-4o-transcribe-diarize`. Use `gpt-4o-transcribe-diarize` when you need diarization with speaker labels.\n"
32228
- ),
32229
- language: import_zod6.z.string().optional().describe(
32230
- "The language of the input audio. Supplying the input language in\n[ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`) format\nwill improve accuracy and latency.\n"
32558
+ "The model used for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`, `gpt-4o-transcribe-diarize`, and `gpt-realtime-whisper`.\n"
32231
32559
  ),
32560
+ language: import_zod6.z.string().optional().describe("The language of the input audio.\n"),
32232
32561
  prompt: import_zod6.z.string().optional().describe(
32233
- 'An optional text to guide the model\'s style or continue a previous audio\nsegment.\nFor `whisper-1`, the [prompt is a list of keywords](/docs/guides/speech-to-text#prompting).\nFor `gpt-4o-transcribe` models (excluding `gpt-4o-transcribe-diarize`), the prompt is a free text string, for example "expect words related to technology".\n'
32562
+ "The prompt configured for input audio transcription, when present.\n"
32234
32563
  )
32235
32564
  }).optional(),
32236
32565
  noise_reduction: import_zod6.z.object({
32237
- type: import_zod6.z.enum(["near_field", "far_field"]).describe(
32566
+ type: import_zod6.z.enum(["near_field", "far_field"]).optional().describe(
32238
32567
  "Type of noise reduction. `near_field` is for close-talking microphones such as headphones, `far_field` is for far-field microphones such as laptop or conference room microphones.\n"
32239
32568
  )
32240
32569
  }).optional().describe(
@@ -32297,20 +32626,20 @@ var createRealtimeClientSecretResponse = import_zod6.z.object({
32297
32626
  "Server-side semantic turn detection which uses a model to determine when the user has finished speaking."
32298
32627
  )
32299
32628
  ]).describe(
32300
- 'Configuration for turn detection, ether Server VAD or Semantic VAD. This can be set to `null` to turn off, in which case the client must manually trigger model response.\n\nServer VAD means that the model will detect the start and end of speech based on audio volume and respond at the end of user speech.\n\nSemantic VAD is more advanced and uses a turn detection model (in conjunction with VAD) to semantically estimate whether the user has finished speaking, then dynamically sets a timeout based on this probability. For example, if user audio trails off with "uhhm", the model will score a low probability of turn end and wait longer for the user to continue speaking. This can be useful for more natural conversations, but may have a higher latency.\n'
32629
+ 'Configuration for turn detection, ether Server VAD or Semantic VAD. This can be set to `null` to turn off, in which case the client must manually trigger model response.\n\nServer VAD means that the model will detect the start and end of speech based on audio volume and respond at the end of user speech.\n\nSemantic VAD is more advanced and uses a turn detection model (in conjunction with VAD) to semantically estimate whether the user has finished speaking, then dynamically sets a timeout based on this probability. For example, if user audio trails off with "uhhm", the model will score a low probability of turn end and wait longer for the user to continue speaking. This can be useful for more natural conversations, but may have a higher latency.\n\nFor `gpt-realtime-whisper` transcription sessions, turn detection must be\nset to `null`; VAD is not supported.\n'
32301
32630
  ).or(import_zod6.z.null()).optional()
32302
32631
  }).optional(),
32303
32632
  output: import_zod6.z.object({
32304
32633
  format: import_zod6.z.object({
32305
- type: import_zod6.z.enum(["audio/pcm"]).describe("The audio format. Always `audio/pcm`."),
32306
- rate: import_zod6.z.literal(24e3).describe("The sample rate of the audio. Always `24000`.")
32634
+ type: import_zod6.z.enum(["audio/pcm"]).optional().describe("The audio format. Always `audio/pcm`."),
32635
+ rate: import_zod6.z.literal(24e3).optional().describe("The sample rate of the audio. Always `24000`.")
32307
32636
  }).describe("The PCM audio format. Only a 24kHz sample rate is supported.").or(
32308
32637
  import_zod6.z.object({
32309
- type: import_zod6.z.enum(["audio/pcmu"]).describe("The audio format. Always `audio/pcmu`.")
32638
+ type: import_zod6.z.enum(["audio/pcmu"]).optional().describe("The audio format. Always `audio/pcmu`.")
32310
32639
  }).describe("The G.711 \u03BC-law format.")
32311
32640
  ).or(
32312
32641
  import_zod6.z.object({
32313
- type: import_zod6.z.enum(["audio/pcma"]).describe("The audio format. Always `audio/pcma`.")
32642
+ type: import_zod6.z.enum(["audio/pcma"]).optional().describe("The audio format. Always `audio/pcma`.")
32314
32643
  }).describe("The G.711 A-law format.")
32315
32644
  ).optional(),
32316
32645
  voice: import_zod6.z.string().or(
@@ -32354,7 +32683,7 @@ var createRealtimeClientSecretResponse = import_zod6.z.object({
32354
32683
  ).or(import_zod6.z.null()).optional(),
32355
32684
  tools: import_zod6.z.array(
32356
32685
  import_zod6.z.object({
32357
- type: import_zod6.z.enum(["function"]).describe("The type of the tool, i.e. `function`."),
32686
+ type: import_zod6.z.enum(["function"]).optional().describe("The type of the tool, i.e. `function`."),
32358
32687
  name: import_zod6.z.string().optional().describe("The name of the function."),
32359
32688
  description: import_zod6.z.string().optional().describe(
32360
32689
  "The description of the function, including guidance on when and how\nto call it, and guidance about what to tell the user when calling\n(if anything).\n"
@@ -32366,7 +32695,7 @@ var createRealtimeClientSecretResponse = import_zod6.z.object({
32366
32695
  server_label: import_zod6.z.string().describe(
32367
32696
  "A label for this MCP server, used to identify it in tool calls.\n"
32368
32697
  ),
32369
- server_url: import_zod6.z.string().optional().describe(
32698
+ server_url: import_zod6.z.string().url().optional().describe(
32370
32699
  "The URL for the MCP server. One of `server_url` or `connector_id` must be\nprovided.\n"
32371
32700
  ),
32372
32701
  connector_id: import_zod6.z.enum([
@@ -32378,7 +32707,7 @@ var createRealtimeClientSecretResponse = import_zod6.z.object({
32378
32707
  "connector_outlookcalendar",
32379
32708
  "connector_outlookemail",
32380
32709
  "connector_sharepoint"
32381
- ]).describe(
32710
+ ]).optional().describe(
32382
32711
  "Identifier for service connectors, like those available in ChatGPT. One of\n`server_url` or `connector_id` must be provided. Learn more about service\nconnectors [here](/docs/guides/tools-remote-mcp#connectors).\n\nCurrently supported `connector_id` values are:\n\n- Dropbox: `connector_dropbox`\n- Gmail: `connector_gmail`\n- Google Calendar: `connector_googlecalendar`\n- Google Drive: `connector_googledrive`\n- Microsoft Teams: `connector_microsoftteams`\n- Outlook Calendar: `connector_outlookcalendar`\n- Outlook Email: `connector_outlookemail`\n- SharePoint: `connector_sharepoint`\n"
32383
32712
  ),
32384
32713
  authorization: import_zod6.z.string().optional().describe(
@@ -32444,6 +32773,13 @@ var createRealtimeClientSecretResponse = import_zod6.z.object({
32444
32773
  ).default(createRealtimeClientSecretResponseSessionToolChoiceDefault).describe(
32445
32774
  "How the model chooses tools. Provide one of the string modes or force a specific\nfunction/MCP tool.\n"
32446
32775
  ),
32776
+ reasoning: import_zod6.z.object({
32777
+ effort: import_zod6.z.enum(["minimal", "low", "medium", "high", "xhigh"]).default(createRealtimeClientSecretResponseSessionReasoningEffortDefault).describe(
32778
+ "Constrains effort on reasoning for reasoning-capable Realtime models such as\n`gpt-realtime-2`.\n"
32779
+ )
32780
+ }).optional().describe(
32781
+ "Configuration for reasoning-capable Realtime models such as `gpt-realtime-2`.\n"
32782
+ ),
32447
32783
  max_output_tokens: import_zod6.z.number().or(import_zod6.z.enum(["inf"])).optional().describe(
32448
32784
  "Maximum number of output tokens for a single assistant response,\ninclusive of tool calls. Provide an integer between 1 and 4096 to\nlimit output tokens, or `inf` for the maximum available tokens for a\ngiven model. Defaults to `inf`.\n"
32449
32785
  ),
@@ -32483,7 +32819,7 @@ var createRealtimeClientSecretResponse = import_zod6.z.object({
32483
32819
  ).or(
32484
32820
  import_zod6.z.object({
32485
32821
  type: import_zod6.z.enum(["input_image"]).describe("The type of the input item. Always `input_image`."),
32486
- image_url: import_zod6.z.string().describe(
32822
+ image_url: import_zod6.z.string().url().describe(
32487
32823
  "The URL of the image to be sent to the model. A fully qualified URL or base64 encoded image in a data URL."
32488
32824
  ).or(import_zod6.z.null()).optional(),
32489
32825
  file_id: import_zod6.z.string().describe("The ID of the file to be sent to the model.").or(import_zod6.z.null()).optional(),
@@ -32497,8 +32833,8 @@ var createRealtimeClientSecretResponse = import_zod6.z.object({
32497
32833
  file_id: import_zod6.z.string().describe("The ID of the file to be sent to the model.").or(import_zod6.z.null()).optional(),
32498
32834
  filename: import_zod6.z.string().optional().describe("The name of the file to be sent to the model."),
32499
32835
  file_data: import_zod6.z.string().optional().describe("The content of the file to be sent to the model.\n"),
32500
- file_url: import_zod6.z.string().optional().describe("The URL of the file to be sent to the model."),
32501
- detail: import_zod6.z.enum(["low", "high"])
32836
+ file_url: import_zod6.z.string().url().optional().describe("The URL of the file to be sent to the model."),
32837
+ detail: import_zod6.z.enum(["low", "high"]).optional()
32502
32838
  }).describe("A file input to the model.")
32503
32839
  )
32504
32840
  ).describe(
@@ -32507,9 +32843,7 @@ var createRealtimeClientSecretResponse = import_zod6.z.object({
32507
32843
  }).describe(
32508
32844
  "Reference to a prompt template and its variables.\n[Learn more](/docs/guides/text?api-mode=responses#reusable-prompts).\n"
32509
32845
  ).or(import_zod6.z.null()).optional()
32510
- }).describe(
32511
- "A new Realtime session configuration, with an ephemeral key. Default TTL\nfor keys is one minute.\n"
32512
- ),
32846
+ }).describe("A Realtime session configuration object.\n"),
32513
32847
  import_zod6.z.object({
32514
32848
  type: import_zod6.z.enum(["transcription"]).describe(
32515
32849
  "The type of session. Always `transcription` for transcription sessions.\n"
@@ -32525,15 +32859,15 @@ var createRealtimeClientSecretResponse = import_zod6.z.object({
32525
32859
  audio: import_zod6.z.object({
32526
32860
  input: import_zod6.z.object({
32527
32861
  format: import_zod6.z.object({
32528
- type: import_zod6.z.enum(["audio/pcm"]).describe("The audio format. Always `audio/pcm`."),
32529
- rate: import_zod6.z.literal(24e3).describe("The sample rate of the audio. Always `24000`.")
32862
+ type: import_zod6.z.enum(["audio/pcm"]).optional().describe("The audio format. Always `audio/pcm`."),
32863
+ rate: import_zod6.z.literal(24e3).optional().describe("The sample rate of the audio. Always `24000`.")
32530
32864
  }).describe("The PCM audio format. Only a 24kHz sample rate is supported.").or(
32531
32865
  import_zod6.z.object({
32532
- type: import_zod6.z.enum(["audio/pcmu"]).describe("The audio format. Always `audio/pcmu`.")
32866
+ type: import_zod6.z.enum(["audio/pcmu"]).optional().describe("The audio format. Always `audio/pcmu`.")
32533
32867
  }).describe("The G.711 \u03BC-law format.")
32534
32868
  ).or(
32535
32869
  import_zod6.z.object({
32536
- type: import_zod6.z.enum(["audio/pcma"]).describe("The audio format. Always `audio/pcma`.")
32870
+ type: import_zod6.z.enum(["audio/pcma"]).optional().describe("The audio format. Always `audio/pcma`.")
32537
32871
  }).describe("The G.711 A-law format.")
32538
32872
  ).optional(),
32539
32873
  transcription: import_zod6.z.object({
@@ -32543,20 +32877,19 @@ var createRealtimeClientSecretResponse = import_zod6.z.object({
32543
32877
  "gpt-4o-mini-transcribe",
32544
32878
  "gpt-4o-mini-transcribe-2025-12-15",
32545
32879
  "gpt-4o-transcribe",
32546
- "gpt-4o-transcribe-diarize"
32880
+ "gpt-4o-transcribe-diarize",
32881
+ "gpt-realtime-whisper"
32547
32882
  ])
32548
32883
  ).optional().describe(
32549
- "The model to use for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`, and `gpt-4o-transcribe-diarize`. Use `gpt-4o-transcribe-diarize` when you need diarization with speaker labels.\n"
32550
- ),
32551
- language: import_zod6.z.string().optional().describe(
32552
- "The language of the input audio. Supplying the input language in\n[ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`) format\nwill improve accuracy and latency.\n"
32884
+ "The model used for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`, `gpt-4o-transcribe-diarize`, and `gpt-realtime-whisper`.\n"
32553
32885
  ),
32886
+ language: import_zod6.z.string().optional().describe("The language of the input audio.\n"),
32554
32887
  prompt: import_zod6.z.string().optional().describe(
32555
- 'An optional text to guide the model\'s style or continue a previous audio\nsegment.\nFor `whisper-1`, the [prompt is a list of keywords](/docs/guides/speech-to-text#prompting).\nFor `gpt-4o-transcribe` models (excluding `gpt-4o-transcribe-diarize`), the prompt is a free text string, for example "expect words related to technology".\n'
32888
+ "The prompt configured for input audio transcription, when present.\n"
32556
32889
  )
32557
32890
  }).optional(),
32558
32891
  noise_reduction: import_zod6.z.object({
32559
- type: import_zod6.z.enum(["near_field", "far_field"]).describe(
32892
+ type: import_zod6.z.enum(["near_field", "far_field"]).optional().describe(
32560
32893
  "Type of noise reduction. `near_field` is for close-talking microphones such as headphones, `far_field` is for far-field microphones such as laptop or conference room microphones.\n"
32561
32894
  )
32562
32895
  }).optional().describe("Configuration for input audio noise reduction.\n"),
@@ -32573,8 +32906,10 @@ var createRealtimeClientSecretResponse = import_zod6.z.object({
32573
32906
  silence_duration_ms: import_zod6.z.number().optional().describe(
32574
32907
  "Duration of silence to detect speech stop (in milliseconds). Defaults\nto 500ms. With shorter values the model will respond more quickly,\nbut may jump in on short pauses from the user.\n"
32575
32908
  )
32576
- }).optional().describe(
32577
- "Configuration for turn detection. Can be set to `null` to turn off. Server\nVAD means that the model will detect the start and end of speech based on\naudio volume and respond at the end of user speech.\n"
32909
+ }).describe(
32910
+ "Configuration for turn detection. Can be set to `null` to turn off. Server\nVAD means that the model will detect the start and end of speech based on\naudio volume and respond at the end of user speech. For `gpt-realtime-whisper`, this must be `null`; VAD is not supported.\n"
32911
+ ).or(import_zod6.z.null()).optional().describe(
32912
+ "Configuration for turn detection. For `gpt-realtime-whisper`, this must be `null`; VAD is not supported.\n"
32578
32913
  )
32579
32914
  }).optional()
32580
32915
  }).optional().describe("Configuration for input audio for the session.\n")
@@ -32714,7 +33049,7 @@ var createRealtimeSessionBody = import_zod6.z.object({
32714
33049
  ).or(
32715
33050
  import_zod6.z.object({
32716
33051
  type: import_zod6.z.enum(["input_image"]).describe("The type of the input item. Always `input_image`."),
32717
- image_url: import_zod6.z.string().describe(
33052
+ image_url: import_zod6.z.string().url().describe(
32718
33053
  "The URL of the image to be sent to the model. A fully qualified URL or base64 encoded image in a data URL."
32719
33054
  ).or(import_zod6.z.null()).optional(),
32720
33055
  file_id: import_zod6.z.string().describe("The ID of the file to be sent to the model.").or(import_zod6.z.null()).optional(),
@@ -32728,7 +33063,7 @@ var createRealtimeSessionBody = import_zod6.z.object({
32728
33063
  file_id: import_zod6.z.string().describe("The ID of the file to be sent to the model.").or(import_zod6.z.null()).optional(),
32729
33064
  filename: import_zod6.z.string().optional().describe("The name of the file to be sent to the model."),
32730
33065
  file_data: import_zod6.z.string().optional().describe("The content of the file to be sent to the model.\n"),
32731
- file_url: import_zod6.z.string().optional().describe("The URL of the file to be sent to the model."),
33066
+ file_url: import_zod6.z.string().url().optional().describe("The URL of the file to be sent to the model."),
32732
33067
  detail: import_zod6.z.enum(["low", "high"]).optional()
32733
33068
  }).describe("A file input to the model.")
32734
33069
  )
@@ -32777,17 +33112,14 @@ var createRealtimeSessionResponse = import_zod6.z.object({
32777
33112
  "gpt-4o-mini-transcribe",
32778
33113
  "gpt-4o-mini-transcribe-2025-12-15",
32779
33114
  "gpt-4o-transcribe",
32780
- "gpt-4o-transcribe-diarize"
33115
+ "gpt-4o-transcribe-diarize",
33116
+ "gpt-realtime-whisper"
32781
33117
  ])
32782
33118
  ).optional().describe(
32783
- "The model to use for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`, and `gpt-4o-transcribe-diarize`. Use `gpt-4o-transcribe-diarize` when you need diarization with speaker labels.\n"
33119
+ "The model used for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`, `gpt-4o-transcribe-diarize`, and `gpt-realtime-whisper`.\n"
32784
33120
  ),
32785
- language: import_zod6.z.string().optional().describe(
32786
- "The language of the input audio. Supplying the input language in\n[ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`) format\nwill improve accuracy and latency.\n"
32787
- ),
32788
- prompt: import_zod6.z.string().optional().describe(
32789
- 'An optional text to guide the model\'s style or continue a previous audio\nsegment.\nFor `whisper-1`, the [prompt is a list of keywords](/docs/guides/speech-to-text#prompting).\nFor `gpt-4o-transcribe` models (excluding `gpt-4o-transcribe-diarize`), the prompt is a free text string, for example "expect words related to technology".\n'
32790
- )
33121
+ language: import_zod6.z.string().optional().describe("The language of the input audio.\n"),
33122
+ prompt: import_zod6.z.string().optional().describe("The prompt configured for input audio transcription, when present.\n")
32791
33123
  }).optional(),
32792
33124
  noise_reduction: import_zod6.z.object({
32793
33125
  type: import_zod6.z.enum(["near_field", "far_field"]).optional().describe(
@@ -32913,16 +33245,20 @@ var createRealtimeTranscriptionSessionBody = import_zod6.z.object({
32913
33245
  "gpt-4o-mini-transcribe",
32914
33246
  "gpt-4o-mini-transcribe-2025-12-15",
32915
33247
  "gpt-4o-transcribe",
32916
- "gpt-4o-transcribe-diarize"
33248
+ "gpt-4o-transcribe-diarize",
33249
+ "gpt-realtime-whisper"
32917
33250
  ])
32918
33251
  ).optional().describe(
32919
- "The model to use for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`, and `gpt-4o-transcribe-diarize`. Use `gpt-4o-transcribe-diarize` when you need diarization with speaker labels.\n"
33252
+ "The model to use for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`, `gpt-4o-transcribe-diarize`, and `gpt-realtime-whisper`. Use `gpt-4o-transcribe-diarize` when you need diarization with speaker labels.\n"
32920
33253
  ),
32921
33254
  language: import_zod6.z.string().optional().describe(
32922
33255
  "The language of the input audio. Supplying the input language in\n[ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`) format\nwill improve accuracy and latency.\n"
32923
33256
  ),
32924
33257
  prompt: import_zod6.z.string().optional().describe(
32925
- 'An optional text to guide the model\'s style or continue a previous audio\nsegment.\nFor `whisper-1`, the [prompt is a list of keywords](/docs/guides/speech-to-text#prompting).\nFor `gpt-4o-transcribe` models (excluding `gpt-4o-transcribe-diarize`), the prompt is a free text string, for example "expect words related to technology".\n'
33258
+ 'An optional text to guide the model\'s style or continue a previous audio\nsegment.\nFor `whisper-1`, the [prompt is a list of keywords](/docs/guides/speech-to-text#prompting).\nFor `gpt-4o-transcribe` models (excluding `gpt-4o-transcribe-diarize`), the prompt is a free text string, for example "expect words related to technology".\nPrompt is not supported with `gpt-realtime-whisper` in GA Realtime sessions.\n'
33259
+ ),
33260
+ delay: import_zod6.z.enum(["minimal", "low", "medium", "high", "xhigh"]).optional().describe(
33261
+ "Controls how long the model waits before emitting transcription text.\nHigher values can improve transcription accuracy at the cost of latency.\nOnly supported with `gpt-realtime-whisper` in GA Realtime sessions.\n"
32926
33262
  )
32927
33263
  }).optional(),
32928
33264
  include: import_zod6.z.array(import_zod6.z.enum(["item.input_audio_transcription.logprobs"])).optional().describe(
@@ -32951,17 +33287,14 @@ var createRealtimeTranscriptionSessionResponse = import_zod6.z.object({
32951
33287
  "gpt-4o-mini-transcribe",
32952
33288
  "gpt-4o-mini-transcribe-2025-12-15",
32953
33289
  "gpt-4o-transcribe",
32954
- "gpt-4o-transcribe-diarize"
33290
+ "gpt-4o-transcribe-diarize",
33291
+ "gpt-realtime-whisper"
32955
33292
  ])
32956
33293
  ).optional().describe(
32957
- "The model to use for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`, and `gpt-4o-transcribe-diarize`. Use `gpt-4o-transcribe-diarize` when you need diarization with speaker labels.\n"
33294
+ "The model used for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`, `gpt-4o-transcribe-diarize`, and `gpt-realtime-whisper`.\n"
32958
33295
  ),
32959
- language: import_zod6.z.string().optional().describe(
32960
- "The language of the input audio. Supplying the input language in\n[ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`) format\nwill improve accuracy and latency.\n"
32961
- ),
32962
- prompt: import_zod6.z.string().optional().describe(
32963
- 'An optional text to guide the model\'s style or continue a previous audio\nsegment.\nFor `whisper-1`, the [prompt is a list of keywords](/docs/guides/speech-to-text#prompting).\nFor `gpt-4o-transcribe` models (excluding `gpt-4o-transcribe-diarize`), the prompt is a free text string, for example "expect words related to technology".\n'
32964
- )
33296
+ language: import_zod6.z.string().optional().describe("The language of the input audio.\n"),
33297
+ prompt: import_zod6.z.string().optional().describe("The prompt configured for input audio transcription, when present.\n")
32965
33298
  }).optional(),
32966
33299
  turn_detection: import_zod6.z.object({
32967
33300
  type: import_zod6.z.string().optional().describe("Type of turn detection, only `server_vad` is currently supported.\n"),
@@ -36346,6 +36679,7 @@ __export(sonioxPublicAPI_zod_exports, {
36346
36679
  createTranscriptionBodyWebhookUrlRegExpOne: () => createTranscriptionBodyWebhookUrlRegExpOne,
36347
36680
  deleteFileParams: () => deleteFileParams,
36348
36681
  deleteTranscriptionParams: () => deleteTranscriptionParams,
36682
+ getConcurrencyLimitsResponse: () => getConcurrencyLimitsResponse,
36349
36683
  getFileParams: () => getFileParams,
36350
36684
  getFileResponse: () => getFileResponse,
36351
36685
  getFilesCountResponse: () => getFilesCountResponse,
@@ -36363,6 +36697,12 @@ __export(sonioxPublicAPI_zod_exports, {
36363
36697
  getTranscriptionsQueryLimitMax: () => getTranscriptionsQueryLimitMax,
36364
36698
  getTranscriptionsQueryParams: () => getTranscriptionsQueryParams,
36365
36699
  getTranscriptionsResponse: () => getTranscriptionsResponse,
36700
+ getTtsModelsResponse: () => getTtsModelsResponse,
36701
+ getUsageLogsQueryLimitDefault: () => getUsageLogsQueryLimitDefault,
36702
+ getUsageLogsQueryLimitMax: () => getUsageLogsQueryLimitMax,
36703
+ getUsageLogsQueryParams: () => getUsageLogsQueryParams,
36704
+ getUsageLogsQuerySortDefault: () => getUsageLogsQuerySortDefault,
36705
+ getUsageLogsResponse: () => getUsageLogsResponse,
36366
36706
  uploadFileBody: () => uploadFileBody,
36367
36707
  uploadFileBodyClientReferenceIdMaxOne: () => uploadFileBodyClientReferenceIdMaxOne
36368
36708
  });
@@ -36613,11 +36953,73 @@ var getModelsResponse = import_zod10.z.object({
36613
36953
  })
36614
36954
  ).describe("List of available models and their attributes.")
36615
36955
  });
36956
+ var getTtsModelsResponse = import_zod10.z.object({
36957
+ models: import_zod10.z.array(
36958
+ import_zod10.z.object({
36959
+ id: import_zod10.z.string().describe("Unique identifier of the model."),
36960
+ aliased_model_id: import_zod10.z.string().or(import_zod10.z.null()).describe("If this is an alias, the id of the aliased model."),
36961
+ name: import_zod10.z.string().describe("Name of the model."),
36962
+ voices: import_zod10.z.array(
36963
+ import_zod10.z.object({
36964
+ id: import_zod10.z.string().describe("Unique identifier of the voice."),
36965
+ description: import_zod10.z.string().describe("Description of the TTS voice."),
36966
+ gender: import_zod10.z.enum(["male", "female", "neutral"])
36967
+ })
36968
+ ).describe("List of available voices for this model."),
36969
+ languages: import_zod10.z.array(
36970
+ import_zod10.z.object({
36971
+ code: import_zod10.z.string().describe("2-letter language code."),
36972
+ name: import_zod10.z.string().describe("Language name.")
36973
+ })
36974
+ ).describe("List of languages supported by the model.")
36975
+ })
36976
+ ).describe("List of available TTS models and their attributes.")
36977
+ });
36978
+ var getUsageLogsQueryLimitDefault = 1e3;
36979
+ var getUsageLogsQueryLimitMax = 1e3;
36980
+ var getUsageLogsQuerySortDefault = "end_time_asc";
36981
+ var getUsageLogsQueryParams = import_zod10.z.object({
36982
+ start_time: import_zod10.z.string().describe("Start of the time window (inclusive). Filters by request end time."),
36983
+ end_time: import_zod10.z.string().describe("End of the time window (exclusive). Filters by request end time."),
36984
+ limit: import_zod10.z.number().min(1).max(getUsageLogsQueryLimitMax).default(getUsageLogsQueryLimitDefault).describe("Maximum number of usage log entries to return."),
36985
+ sort: import_zod10.z.enum(["end_time_asc", "end_time_desc"]).default(getUsageLogsQuerySortDefault).describe(
36986
+ "Sort order by end_time.Use `end_time_desc` to get the most recent entries first. When paginating, pass the same `sort` value alongside the cursor."
36987
+ ),
36988
+ cursor: import_zod10.z.string().or(import_zod10.z.null()).optional().describe("Pagination cursor for the next page of results.")
36989
+ });
36990
+ var getUsageLogsResponse = import_zod10.z.object({
36991
+ usage_logs: import_zod10.z.array(
36992
+ import_zod10.z.object({
36993
+ uuid: import_zod10.z.string().uuid().describe("Unique identifier of the request."),
36994
+ request_scope: import_zod10.z.string().describe("Scope of the request (api / playground)."),
36995
+ client_reference_id: import_zod10.z.string().describe("Client reference ID supplied on the original request. Empty string if none."),
36996
+ model: import_zod10.z.string().describe("Model identifier."),
36997
+ start_time: import_zod10.z.string().datetime({}).describe("When the request started."),
36998
+ end_time: import_zod10.z.string().datetime({}).describe("When the request ended."),
36999
+ input_text_tokens: import_zod10.z.number(),
37000
+ input_audio_tokens: import_zod10.z.number(),
37001
+ input_audio_duration_ms: import_zod10.z.number(),
37002
+ output_text_tokens: import_zod10.z.number(),
37003
+ output_audio_tokens: import_zod10.z.number(),
37004
+ output_audio_duration_ms: import_zod10.z.number(),
37005
+ cost_usd: import_zod10.z.string(),
37006
+ input_cost_usd: import_zod10.z.string(),
37007
+ input_text_cost_usd: import_zod10.z.string(),
37008
+ input_audio_cost_usd: import_zod10.z.string(),
37009
+ output_cost_usd: import_zod10.z.string(),
37010
+ output_text_cost_usd: import_zod10.z.string(),
37011
+ output_audio_cost_usd: import_zod10.z.string()
37012
+ })
37013
+ ).describe("Per-request usage log entries ordered by end_time, uuid (per `sort`)."),
37014
+ next_page_cursor: import_zod10.z.string().or(import_zod10.z.null()).optional().describe(
37015
+ "A pagination token that references the next page of results. When more data is available, this field contains a value to pass in the cursor parameter of a subsequent request. When null, no additional results are available."
37016
+ )
37017
+ });
36616
37018
  var createTemporaryApiKeyBodyExpiresInSecondsMax = 3600;
36617
37019
  var createTemporaryApiKeyBodyClientReferenceIdMaxOne = 256;
36618
37020
  var createTemporaryApiKeyBodyMaxSessionDurationSecondsMaxOne = 18e3;
36619
37021
  var createTemporaryApiKeyBody = import_zod10.z.object({
36620
- usage_type: import_zod10.z.enum(["transcribe_websocket"]),
37022
+ usage_type: import_zod10.z.enum(["transcribe_websocket", "tts_rt"]),
36621
37023
  expires_in_seconds: import_zod10.z.number().min(1).max(createTemporaryApiKeyBodyExpiresInSecondsMax).describe("Duration in seconds until the temporary API key expires."),
36622
37024
  client_reference_id: import_zod10.z.string().max(createTemporaryApiKeyBodyClientReferenceIdMaxOne).or(import_zod10.z.null()).optional().describe("Optional tracking identifier string. Does not need to be unique."),
36623
37025
  single_use: import_zod10.z.boolean().or(import_zod10.z.null()).optional().describe("If true, the temporary API key can be used only once."),
@@ -36625,6 +37027,28 @@ var createTemporaryApiKeyBody = import_zod10.z.object({
36625
37027
  "Maximum WebSocket connection duration in seconds. If exceeded, the connection will be dropped. If not set, no limit is applied."
36626
37028
  )
36627
37029
  });
37030
+ var getConcurrencyLimitsResponse = import_zod10.z.object({
37031
+ project: import_zod10.z.object({
37032
+ current: import_zod10.z.object({
37033
+ transcribe_concurrent: import_zod10.z.number(),
37034
+ tts_concurrent: import_zod10.z.number()
37035
+ }).describe("Live counts read from Redis"),
37036
+ limits: import_zod10.z.object({
37037
+ transcribe_concurrent: import_zod10.z.number().or(import_zod10.z.null()),
37038
+ tts_concurrent: import_zod10.z.number().or(import_zod10.z.null())
37039
+ }).describe("Configured limits")
37040
+ }),
37041
+ organization: import_zod10.z.object({
37042
+ current: import_zod10.z.object({
37043
+ transcribe_concurrent: import_zod10.z.number(),
37044
+ tts_concurrent: import_zod10.z.number()
37045
+ }).describe("Live counts read from Redis"),
37046
+ limits: import_zod10.z.object({
37047
+ transcribe_concurrent: import_zod10.z.number().or(import_zod10.z.null()),
37048
+ tts_concurrent: import_zod10.z.number().or(import_zod10.z.null())
37049
+ }).describe("Configured limits")
37050
+ })
37051
+ });
36628
37052
 
36629
37053
  // src/generated/soniox/streaming-types.zod.ts
36630
37054
  var streaming_types_zod_exports = {};
@@ -36709,10 +37133,10 @@ var sonioxStructuredContextSchema = import_zod11.z.object({
36709
37133
  var sonioxContextSchema = import_zod11.z.union([sonioxStructuredContextSchema, import_zod11.z.string()]);
36710
37134
  var sonioxRealtimeModelSchema = import_zod11.z.enum([
36711
37135
  "stt-rt-v4",
36712
- "stt-rt-v3",
36713
37136
  "stt-rt-preview",
36714
37137
  "stt-rt-v3-preview",
36715
- "stt-rt-preview-v2"
37138
+ "stt-rt-preview-v2",
37139
+ "stt-rt-v3"
36716
37140
  ]);
36717
37141
  var streamingTranscriberParams3 = import_zod11.z.object({
36718
37142
  model: sonioxRealtimeModelSchema,
@@ -36720,12 +37144,16 @@ var streamingTranscriberParams3 = import_zod11.z.object({
36720
37144
  sampleRate: import_zod11.z.number().optional(),
36721
37145
  numChannels: import_zod11.z.number().optional(),
36722
37146
  languageHints: import_zod11.z.array(import_zod11.z.string()).optional(),
37147
+ languageHintsStrict: import_zod11.z.boolean().optional(),
36723
37148
  context: sonioxContextSchema.optional(),
36724
37149
  enableSpeakerDiarization: import_zod11.z.boolean().optional(),
36725
37150
  enableLanguageIdentification: import_zod11.z.boolean().optional(),
36726
37151
  enableEndpointDetection: import_zod11.z.boolean().optional(),
37152
+ maxEndpointDelayMs: import_zod11.z.number().optional(),
36727
37153
  translation: sonioxTranslationConfigSchema.optional(),
36728
- clientReferenceId: import_zod11.z.string().optional()
37154
+ clientReferenceId: import_zod11.z.string().optional(),
37155
+ keepaliveIntervalMs: import_zod11.z.number().optional(),
37156
+ connectTimeoutMs: import_zod11.z.number().optional()
36729
37157
  });
36730
37158
  var sonioxTranslationStatusSchema = import_zod11.z.enum(["original", "translation", "none"]);
36731
37159
  var sonioxTokenSchema = import_zod11.z.object({
@@ -37317,6 +37745,7 @@ __export(schema_exports5, {
37317
37745
  V1ListenPostParametersCallbackMethod: () => V1ListenPostParametersCallbackMethod,
37318
37746
  V1ListenPostParametersCustomIntentMode: () => V1ListenPostParametersCustomIntentMode,
37319
37747
  V1ListenPostParametersCustomTopicMode: () => V1ListenPostParametersCustomTopicMode,
37748
+ V1ListenPostParametersDiarizeModel: () => V1ListenPostParametersDiarizeModel,
37320
37749
  V1ListenPostParametersEncoding: () => V1ListenPostParametersEncoding,
37321
37750
  V1ListenPostParametersModel0: () => V1ListenPostParametersModel0,
37322
37751
  V1ListenPostParametersRedactSchemaOneOf1Items: () => V1ListenPostParametersRedactSchemaOneOf1Items,
@@ -37355,6 +37784,13 @@ __export(schema_exports5, {
37355
37784
  V1SpeakPostParametersSampleRate4: () => V1SpeakPostParametersSampleRate4
37356
37785
  });
37357
37786
 
37787
+ // src/generated/deepgram/schema/v1ListenPostParametersDiarizeModel.ts
37788
+ var V1ListenPostParametersDiarizeModel = {
37789
+ latest: "latest",
37790
+ v1: "v1",
37791
+ v2: "v2"
37792
+ };
37793
+
37358
37794
  // src/generated/deepgram/schema/v1ListenPostParametersModel0.ts
37359
37795
  var V1ListenPostParametersModel0 = {
37360
37796
  "nova-3": "nova-3",
@@ -37571,6 +38007,7 @@ var V1SpeakPostParametersSampleRate = {
37571
38007
  var schema_exports6 = {};
37572
38008
  __export(schema_exports6, {
37573
38009
  AudioResponseFormat: () => AudioResponseFormat,
38010
+ AudioTranscriptionDelay: () => AudioTranscriptionDelay,
37574
38011
  CreateSpeechRequestResponseFormat: () => CreateSpeechRequestResponseFormat,
37575
38012
  CreateSpeechRequestStreamFormat: () => CreateSpeechRequestStreamFormat,
37576
38013
  CreateTranscriptionRequestTimestampGranularitiesItem: () => CreateTranscriptionRequestTimestampGranularitiesItem,
@@ -37590,12 +38027,14 @@ __export(schema_exports6, {
37590
38027
  RealtimeAudioFormatsAnyOfType: () => RealtimeAudioFormatsAnyOfType,
37591
38028
  RealtimeCreateClientSecretRequestExpiresAfterAnchor: () => RealtimeCreateClientSecretRequestExpiresAfterAnchor,
37592
38029
  RealtimeFunctionToolType: () => RealtimeFunctionToolType,
38030
+ RealtimeReasoningEffort: () => RealtimeReasoningEffort,
37593
38031
  RealtimeSessionCreateRequestGAIncludeItem: () => RealtimeSessionCreateRequestGAIncludeItem,
37594
38032
  RealtimeSessionCreateRequestGAOutputModalitiesItem: () => RealtimeSessionCreateRequestGAOutputModalitiesItem,
37595
38033
  RealtimeSessionCreateRequestGAType: () => RealtimeSessionCreateRequestGAType,
37596
38034
  RealtimeSessionCreateRequestModalitiesItem: () => RealtimeSessionCreateRequestModalitiesItem,
37597
38035
  RealtimeSessionCreateRequestToolsItemType: () => RealtimeSessionCreateRequestToolsItemType,
37598
38036
  RealtimeSessionCreateResponseGAIncludeItem: () => RealtimeSessionCreateResponseGAIncludeItem,
38037
+ RealtimeSessionCreateResponseGAObject: () => RealtimeSessionCreateResponseGAObject,
37599
38038
  RealtimeSessionCreateResponseGAOutputModalitiesItem: () => RealtimeSessionCreateResponseGAOutputModalitiesItem,
37600
38039
  RealtimeSessionCreateResponseGAType: () => RealtimeSessionCreateResponseGAType,
37601
38040
  RealtimeSessionCreateResponseIncludeItem: () => RealtimeSessionCreateResponseIncludeItem,
@@ -37626,6 +38065,15 @@ __export(schema_exports6, {
37626
38065
  VoiceResourceObject: () => VoiceResourceObject
37627
38066
  });
37628
38067
 
38068
+ // src/generated/openai/schema/audioTranscriptionDelay.ts
38069
+ var AudioTranscriptionDelay = {
38070
+ minimal: "minimal",
38071
+ low: "low",
38072
+ medium: "medium",
38073
+ high: "high",
38074
+ xhigh: "xhigh"
38075
+ };
38076
+
37629
38077
  // src/generated/openai/schema/createSpeechRequestResponseFormat.ts
37630
38078
  var CreateSpeechRequestResponseFormat = {
37631
38079
  mp3: "mp3",
@@ -37738,6 +38186,15 @@ var RealtimeFunctionToolType = {
37738
38186
  function: "function"
37739
38187
  };
37740
38188
 
38189
+ // src/generated/openai/schema/realtimeReasoningEffort.ts
38190
+ var RealtimeReasoningEffort = {
38191
+ minimal: "minimal",
38192
+ low: "low",
38193
+ medium: "medium",
38194
+ high: "high",
38195
+ xhigh: "xhigh"
38196
+ };
38197
+
37741
38198
  // src/generated/openai/schema/realtimeSessionCreateRequestGAIncludeItem.ts
37742
38199
  var RealtimeSessionCreateRequestGAIncludeItem = {
37743
38200
  iteminput_audio_transcriptionlogprobs: "item.input_audio_transcription.logprobs"
@@ -37770,6 +38227,11 @@ var RealtimeSessionCreateResponseGAIncludeItem = {
37770
38227
  iteminput_audio_transcriptionlogprobs: "item.input_audio_transcription.logprobs"
37771
38228
  };
37772
38229
 
38230
+ // src/generated/openai/schema/realtimeSessionCreateResponseGAObject.ts
38231
+ var RealtimeSessionCreateResponseGAObject = {
38232
+ realtimesession: "realtime.session"
38233
+ };
38234
+
37773
38235
  // src/generated/openai/schema/realtimeSessionCreateResponseGAOutputModalitiesItem.ts
37774
38236
  var RealtimeSessionCreateResponseGAOutputModalitiesItem = {
37775
38237
  text: "text",
@@ -37914,6 +38376,7 @@ __export(schema_exports7, {
37914
38376
  AutoChaptersResultErrorType: () => AutoChaptersResultErrorType,
37915
38377
  ErrorResponseError: () => ErrorResponseError,
37916
38378
  GetJobsJobidAlignmentTags: () => GetJobsJobidAlignmentTags,
38379
+ GetJobsJobidObjectUrlsUrlForItem: () => GetJobsJobidObjectUrlsUrlForItem,
37917
38380
  GetJobsJobidTranscriptFormat: () => GetJobsJobidTranscriptFormat,
37918
38381
  JobDetailsStatus: () => JobDetailsStatus,
37919
38382
  JobMode: () => JobMode,
@@ -37983,6 +38446,13 @@ var GetJobsJobidAlignmentTags = {
37983
38446
  one_per_line: "one_per_line"
37984
38447
  };
37985
38448
 
38449
+ // src/generated/speechmatics/schema/getJobsJobidObjectUrlsUrlForItem.ts
38450
+ var GetJobsJobidObjectUrlsUrlForItem = {
38451
+ data: "data",
38452
+ audio_mp3: "audio_mp3",
38453
+ transcript: "transcript"
38454
+ };
38455
+
37986
38456
  // src/generated/speechmatics/schema/getJobsJobidTranscriptFormat.ts
37987
38457
  var GetJobsJobidTranscriptFormat = {
37988
38458
  "json-v2": "json-v2",
@@ -38099,6 +38569,15 @@ var WrittenFormRecognitionResultType = {
38099
38569
  word: "word"
38100
38570
  };
38101
38571
 
38572
+ // src/generated/soniox/sdk-types.ts
38573
+ var sdk_types_exports = {};
38574
+ __export(sdk_types_exports, {
38575
+ RealtimeSttSession: () => import_node.RealtimeSttSession,
38576
+ SonioxFetchHttpClient: () => import_node.FetchHttpClient,
38577
+ SonioxNodeClient: () => import_node.SonioxNodeClient
38578
+ });
38579
+ var import_node = require("@soniox/node");
38580
+
38102
38581
  // src/generated/elevenlabs/schema/index.ts
38103
38582
  var schema_exports8 = {};
38104
38583
  __export(schema_exports8, {
@@ -38176,6 +38655,10 @@ __export(speechmaticsASRRESTAPI_zod_exports, {
38176
38655
  deleteJobsJobidParams: () => deleteJobsJobidParams,
38177
38656
  deleteJobsJobidQueryParams: () => deleteJobsJobidQueryParams,
38178
38657
  deleteJobsJobidResponse: () => deleteJobsJobidResponse,
38658
+ deleteJobsJobidResponseJobConfigSummarizationConfigContentTypeDefault: () => deleteJobsJobidResponseJobConfigSummarizationConfigContentTypeDefault,
38659
+ deleteJobsJobidResponseJobConfigSummarizationConfigSummaryLengthDefault: () => deleteJobsJobidResponseJobConfigSummarizationConfigSummaryLengthDefault,
38660
+ deleteJobsJobidResponseJobConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMax: () => deleteJobsJobidResponseJobConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMax,
38661
+ deleteJobsJobidResponseJobConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMin: () => deleteJobsJobidResponseJobConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMin,
38179
38662
  deleteJobsJobidResponseJobConfigTranscriptionConfigChannelDiarizationLabelsItemRegExp: () => deleteJobsJobidResponseJobConfigTranscriptionConfigChannelDiarizationLabelsItemRegExp,
38180
38663
  deleteJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesPermittedMarksItemRegExp: () => deleteJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesPermittedMarksItemRegExp,
38181
38664
  deleteJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesSensitivityMax: () => deleteJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesSensitivityMax,
@@ -38191,8 +38674,15 @@ __export(speechmaticsASRRESTAPI_zod_exports, {
38191
38674
  getJobsJobidDataResponse: () => getJobsJobidDataResponse,
38192
38675
  getJobsJobidLogParams: () => getJobsJobidLogParams,
38193
38676
  getJobsJobidLogResponse: () => getJobsJobidLogResponse,
38677
+ getJobsJobidObjectUrlsParams: () => getJobsJobidObjectUrlsParams,
38678
+ getJobsJobidObjectUrlsQueryParams: () => getJobsJobidObjectUrlsQueryParams,
38679
+ getJobsJobidObjectUrlsResponse: () => getJobsJobidObjectUrlsResponse,
38194
38680
  getJobsJobidParams: () => getJobsJobidParams,
38195
38681
  getJobsJobidResponse: () => getJobsJobidResponse,
38682
+ getJobsJobidResponseJobConfigSummarizationConfigContentTypeDefault: () => getJobsJobidResponseJobConfigSummarizationConfigContentTypeDefault,
38683
+ getJobsJobidResponseJobConfigSummarizationConfigSummaryLengthDefault: () => getJobsJobidResponseJobConfigSummarizationConfigSummaryLengthDefault,
38684
+ getJobsJobidResponseJobConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMax: () => getJobsJobidResponseJobConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMax,
38685
+ getJobsJobidResponseJobConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMin: () => getJobsJobidResponseJobConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMin,
38196
38686
  getJobsJobidResponseJobConfigTranscriptionConfigChannelDiarizationLabelsItemRegExp: () => getJobsJobidResponseJobConfigTranscriptionConfigChannelDiarizationLabelsItemRegExp,
38197
38687
  getJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesPermittedMarksItemRegExp: () => getJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesPermittedMarksItemRegExp,
38198
38688
  getJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesSensitivityMax: () => getJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesSensitivityMax,
@@ -38207,6 +38697,8 @@ __export(speechmaticsASRRESTAPI_zod_exports, {
38207
38697
  getJobsJobidTranscriptQueryParams: () => getJobsJobidTranscriptQueryParams,
38208
38698
  getJobsJobidTranscriptResponse: () => getJobsJobidTranscriptResponse,
38209
38699
  getJobsJobidTranscriptResponseJobDurationMin: () => getJobsJobidTranscriptResponseJobDurationMin,
38700
+ getJobsJobidTranscriptResponseMetadataTranscriptionConfigAudioFilteringConfigVolumeThresholdMax: () => getJobsJobidTranscriptResponseMetadataTranscriptionConfigAudioFilteringConfigVolumeThresholdMax,
38701
+ getJobsJobidTranscriptResponseMetadataTranscriptionConfigAudioFilteringConfigVolumeThresholdMin: () => getJobsJobidTranscriptResponseMetadataTranscriptionConfigAudioFilteringConfigVolumeThresholdMin,
38210
38702
  getJobsJobidTranscriptResponseMetadataTranscriptionConfigChannelDiarizationLabelsItemRegExp: () => getJobsJobidTranscriptResponseMetadataTranscriptionConfigChannelDiarizationLabelsItemRegExp,
38211
38703
  getJobsJobidTranscriptResponseMetadataTranscriptionConfigPunctuationOverridesPermittedMarksItemRegExp: () => getJobsJobidTranscriptResponseMetadataTranscriptionConfigPunctuationOverridesPermittedMarksItemRegExp,
38212
38704
  getJobsJobidTranscriptResponseMetadataTranscriptionConfigPunctuationOverridesSensitivityMax: () => getJobsJobidTranscriptResponseMetadataTranscriptionConfigPunctuationOverridesSensitivityMax,
@@ -38218,6 +38710,10 @@ __export(speechmaticsASRRESTAPI_zod_exports, {
38218
38710
  getJobsQueryLimitMax: () => getJobsQueryLimitMax,
38219
38711
  getJobsQueryParams: () => getJobsQueryParams,
38220
38712
  getJobsResponse: () => getJobsResponse,
38713
+ getJobsResponseJobsItemConfigSummarizationConfigContentTypeDefault: () => getJobsResponseJobsItemConfigSummarizationConfigContentTypeDefault,
38714
+ getJobsResponseJobsItemConfigSummarizationConfigSummaryLengthDefault: () => getJobsResponseJobsItemConfigSummarizationConfigSummaryLengthDefault,
38715
+ getJobsResponseJobsItemConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMax: () => getJobsResponseJobsItemConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMax,
38716
+ getJobsResponseJobsItemConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMin: () => getJobsResponseJobsItemConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMin,
38221
38717
  getJobsResponseJobsItemConfigTranscriptionConfigChannelDiarizationLabelsItemRegExp: () => getJobsResponseJobsItemConfigTranscriptionConfigChannelDiarizationLabelsItemRegExp,
38222
38718
  getJobsResponseJobsItemConfigTranscriptionConfigPunctuationOverridesPermittedMarksItemRegExp: () => getJobsResponseJobsItemConfigTranscriptionConfigPunctuationOverridesPermittedMarksItemRegExp,
38223
38719
  getJobsResponseJobsItemConfigTranscriptionConfigPunctuationOverridesSensitivityMax: () => getJobsResponseJobsItemConfigTranscriptionConfigPunctuationOverridesSensitivityMax,
@@ -38228,12 +38724,18 @@ __export(speechmaticsASRRESTAPI_zod_exports, {
38228
38724
  getJobsResponseJobsItemDurationMin: () => getJobsResponseJobsItemDurationMin,
38229
38725
  getUsageQueryParams: () => getUsageQueryParams,
38230
38726
  getUsageResponse: () => getUsageResponse,
38231
- postJobsBody: () => postJobsBody
38727
+ postJobsBody: () => postJobsBody,
38728
+ postJobsHeader: () => postJobsHeader
38232
38729
  });
38233
38730
  var import_zod12 = require("zod");
38731
+ var postJobsHeader = import_zod12.z.object({
38732
+ "X-SM-Processing-Data": import_zod12.z.string().optional().describe(
38733
+ '**Note**: Only available for on-prem\nJSON dictionary of processing settings for the job worker. Currently supports `parallel_engines` (integer), which controls the number of engines the worker can use in parallel for this job, and `user_id` (string), which is the user id for this job. Example: `{"parallel_engines": 4}`'
38734
+ )
38735
+ });
38234
38736
  var postJobsBody = import_zod12.z.object({
38235
38737
  config: import_zod12.z.string().describe(
38236
- "JSON containing a `JobConfig` model indicating the type and parameters for the recognition job."
38738
+ "JSON containing a [`JobConfig`](/speech-to-text/batch/input#jobconfig-schema) model indicating the type and parameters for the recognition job."
38237
38739
  ),
38238
38740
  data_file: import_zod12.z.instanceof(File).optional().describe(
38239
38741
  "The data file to be processed. Alternatively the data file can be fetched from a url specified in `JobConfig`."
@@ -38255,9 +38757,13 @@ var getJobsResponseJobsItemConfigTranscriptionConfigPunctuationOverridesSensitiv
38255
38757
  var getJobsResponseJobsItemConfigTranscriptionConfigPunctuationOverridesSensitivityMax = 1;
38256
38758
  var getJobsResponseJobsItemConfigTranscriptionConfigPunctuationOverridesPermittedMarksItemRegExp = /^(.|all)$/;
38257
38759
  var getJobsResponseJobsItemConfigTranscriptionConfigChannelDiarizationLabelsItemRegExp = /^[A-Za-z0-9._]+$/;
38760
+ var getJobsResponseJobsItemConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMin = 0;
38761
+ var getJobsResponseJobsItemConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMax = 100;
38258
38762
  var getJobsResponseJobsItemConfigTranscriptionConfigSpeakerDiarizationConfigSpeakerSensitivityMin = 0;
38259
38763
  var getJobsResponseJobsItemConfigTranscriptionConfigSpeakerDiarizationConfigSpeakerSensitivityMax = 1;
38260
38764
  var getJobsResponseJobsItemConfigTranslationConfigTargetLanguagesMax = 5;
38765
+ var getJobsResponseJobsItemConfigSummarizationConfigContentTypeDefault = "auto";
38766
+ var getJobsResponseJobsItemConfigSummarizationConfigSummaryLengthDefault = "brief";
38261
38767
  var getJobsResponse = import_zod12.z.object({
38262
38768
  jobs: import_zod12.z.array(
38263
38769
  import_zod12.z.object({
@@ -38337,19 +38843,30 @@ var getJobsResponse = import_zod12.z.object({
38337
38843
  max_delay_mode: import_zod12.z.enum(["fixed", "flexible"]).optional().describe(
38338
38844
  "Whether or not to enable flexible endpointing and allow the entity to continue to be spoken."
38339
38845
  ),
38846
+ audio_filtering_config: import_zod12.z.object({
38847
+ volume_threshold: import_zod12.z.number().min(
38848
+ getJobsResponseJobsItemConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMin
38849
+ ).max(
38850
+ getJobsResponseJobsItemConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMax
38851
+ ).optional().describe(
38852
+ "Controls the lower limit of audio volume at which speech and audio events will be transcribed. If the volume limit is very low, then most sound will be passed to the speech recognition engine. Higher numbers will cut out increasing amounts of sound."
38853
+ )
38854
+ }).optional().describe("Configuration for limiting the transcription of quiet audio."),
38340
38855
  transcript_filtering_config: import_zod12.z.object({
38341
38856
  remove_disfluencies: import_zod12.z.boolean().optional().describe(
38342
- "If true, words that are identified as disfluencies will be removed from the transcript. If false (default), they are tagged in the transcript as 'disfluency'."
38857
+ "If true, words identified as disfluencies (e.g., 'um', 'uh') will be removed from the transcript. If false (default), they are tagged in the transcript as 'disfluency'."
38343
38858
  ),
38344
38859
  replacements: import_zod12.z.array(
38345
38860
  import_zod12.z.object({
38346
- from: import_zod12.z.string(),
38347
- to: import_zod12.z.string()
38861
+ from: import_zod12.z.string().describe("The text or pattern identified to be replaced."),
38862
+ to: import_zod12.z.string().describe(
38863
+ "The corrected or formatted string to appear in the transcript."
38864
+ )
38348
38865
  })
38349
38866
  ).optional().describe(
38350
- "A list of replacements to apply to the transcript. Each replacement is a pair of strings, where the first string is the pattern to be replaced and the second string is the replacement text."
38867
+ 'An array of objects defining custom replacements. Each replacement contains a pair of strings: the text to find ("from:") and the text to replace it with ("to:").'
38351
38868
  )
38352
- }).optional().describe("Configuration for applying filtering to the transcription"),
38869
+ }).optional().describe("Configuration for applying filtering to the transcription."),
38353
38870
  speaker_diarization_config: import_zod12.z.object({
38354
38871
  prefer_current_speaker: import_zod12.z.boolean().optional().describe(
38355
38872
  'If true, the algorithm will prefer to stay with the current active speaker if it is a close enough match, even if other speakers may be closer. This is useful for cases where we can flip incorrectly between similar speakers during a single speaker section."'
@@ -38360,6 +38877,19 @@ var getJobsResponse = import_zod12.z.object({
38360
38877
  getJobsResponseJobsItemConfigTranscriptionConfigSpeakerDiarizationConfigSpeakerSensitivityMax
38361
38878
  ).optional().describe(
38362
38879
  "Controls how sensitive the algorithm is in terms of keeping similar speakers separate, as opposed to combining them into a single speaker. Higher values will typically lead to more speakers, as the degree of difference between speakers in order to allow them to remain distinct will be lower. A lower value for this parameter will conversely guide the algorithm towards being less sensitive in terms of retaining similar speakers, and as such may lead to fewer speakers overall. The default is 0.5."
38880
+ ),
38881
+ get_speakers: import_zod12.z.boolean().optional().describe(
38882
+ "If true, speaker identifiers will be returned at the end of transcript."
38883
+ ),
38884
+ speakers: import_zod12.z.array(
38885
+ import_zod12.z.object({
38886
+ label: import_zod12.z.string().min(1).describe(
38887
+ "Speaker label, which must not match the format used internally (e.g. S1, S2, etc)"
38888
+ ),
38889
+ speaker_identifiers: import_zod12.z.array(import_zod12.z.string().describe("Speaker identifiers.")).min(1)
38890
+ })
38891
+ ).optional().describe(
38892
+ "Use this option to provide speaker labels linked to their speaker identifiers. When passed, the transcription system will tag spoken words in the transcript with the provided speaker labels whenever any of the specified speakers is detected in the audio. A maximum of 50 speakers identifiers across all speakers can be provided."
38363
38893
  )
38364
38894
  }).optional().describe("Configuration for speaker diarization")
38365
38895
  }).optional(),
@@ -38417,10 +38947,14 @@ var getJobsResponse = import_zod12.z.object({
38417
38947
  default_language: import_zod12.z.string().optional()
38418
38948
  }).optional(),
38419
38949
  summarization_config: import_zod12.z.object({
38420
- content_type: import_zod12.z.enum(["auto", "informative", "conversational"]).optional(),
38421
- summary_length: import_zod12.z.enum(["brief", "detailed"]).optional(),
38950
+ content_type: import_zod12.z.enum(["auto", "informative", "conversational"]).default(getJobsResponseJobsItemConfigSummarizationConfigContentTypeDefault).describe(
38951
+ "Choose from three options:\n- `conversational` - Best suited for dialogues involving multiple participants, such as calls, meetings or discussions. It focuses on summarizing key points of the conversation.\n- `informative` - Recommended for more structured information delivered by one or more people, making it ideal for videos, podcasts, lectures, and presentations.\n- `auto` - Automatically selects the most appropriate content type based on an analysis of the transcript.\n"
38952
+ ),
38953
+ summary_length: import_zod12.z.enum(["brief", "detailed"]).default(getJobsResponseJobsItemConfigSummarizationConfigSummaryLengthDefault).describe(
38954
+ "Determines the depth of the summary:\n- `brief` - Provides a succinct summary, condensing the content into just a few sentences.\n- `detailed` - Provide a longer, structured summary. For _conversational_ content, it includes key topics and a summary of the entire conversation. For _informative_ content, it logically divides the audio into sections and provides a summary for each."
38955
+ ),
38422
38956
  summary_type: import_zod12.z.enum(["paragraphs", "bullets"]).optional()
38423
- }).optional(),
38957
+ }).optional().describe("Configuration options for summarization."),
38424
38958
  sentiment_analysis_config: import_zod12.z.object({}).optional(),
38425
38959
  topic_detection_config: import_zod12.z.object({
38426
38960
  topics: import_zod12.z.array(import_zod12.z.string()).optional()
@@ -38442,7 +38976,7 @@ var getJobsResponse = import_zod12.z.object({
38442
38976
  "Optional list of errors that have occurred in user interaction, for example: audio could not be fetched or notification could not be sent."
38443
38977
  )
38444
38978
  }).describe(
38445
- "Document describing a job. JobConfig will be present in JobDetails returned for GET jobs/<id> request in SaaS and in Batch Appliance, but it will not be present in JobDetails returned as item in RetrieveJobsResponse in case of Batch Appliance."
38979
+ "Document describing a job. JobConfig will be present in JobDetails returned for GET jobs/{id} request in SaaS and in Batch Appliance, but it will not be present in JobDetails returned as item in RetrieveJobsResponse in case of Batch Appliance."
38446
38980
  )
38447
38981
  )
38448
38982
  });
@@ -38454,9 +38988,13 @@ var getJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesSensitiv
38454
38988
  var getJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesSensitivityMax = 1;
38455
38989
  var getJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesPermittedMarksItemRegExp = /^(.|all)$/;
38456
38990
  var getJobsJobidResponseJobConfigTranscriptionConfigChannelDiarizationLabelsItemRegExp = /^[A-Za-z0-9._]+$/;
38991
+ var getJobsJobidResponseJobConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMin = 0;
38992
+ var getJobsJobidResponseJobConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMax = 100;
38457
38993
  var getJobsJobidResponseJobConfigTranscriptionConfigSpeakerDiarizationConfigSpeakerSensitivityMin = 0;
38458
38994
  var getJobsJobidResponseJobConfigTranscriptionConfigSpeakerDiarizationConfigSpeakerSensitivityMax = 1;
38459
38995
  var getJobsJobidResponseJobConfigTranslationConfigTargetLanguagesMax = 5;
38996
+ var getJobsJobidResponseJobConfigSummarizationConfigContentTypeDefault = "auto";
38997
+ var getJobsJobidResponseJobConfigSummarizationConfigSummaryLengthDefault = "brief";
38460
38998
  var getJobsJobidResponse = import_zod12.z.object({
38461
38999
  job: import_zod12.z.object({
38462
39000
  created_at: import_zod12.z.string().datetime({}).describe("The UTC date time the job was created."),
@@ -38533,19 +39071,30 @@ var getJobsJobidResponse = import_zod12.z.object({
38533
39071
  max_delay_mode: import_zod12.z.enum(["fixed", "flexible"]).optional().describe(
38534
39072
  "Whether or not to enable flexible endpointing and allow the entity to continue to be spoken."
38535
39073
  ),
39074
+ audio_filtering_config: import_zod12.z.object({
39075
+ volume_threshold: import_zod12.z.number().min(
39076
+ getJobsJobidResponseJobConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMin
39077
+ ).max(
39078
+ getJobsJobidResponseJobConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMax
39079
+ ).optional().describe(
39080
+ "Controls the lower limit of audio volume at which speech and audio events will be transcribed. If the volume limit is very low, then most sound will be passed to the speech recognition engine. Higher numbers will cut out increasing amounts of sound."
39081
+ )
39082
+ }).optional().describe("Configuration for limiting the transcription of quiet audio."),
38536
39083
  transcript_filtering_config: import_zod12.z.object({
38537
39084
  remove_disfluencies: import_zod12.z.boolean().optional().describe(
38538
- "If true, words that are identified as disfluencies will be removed from the transcript. If false (default), they are tagged in the transcript as 'disfluency'."
39085
+ "If true, words identified as disfluencies (e.g., 'um', 'uh') will be removed from the transcript. If false (default), they are tagged in the transcript as 'disfluency'."
38539
39086
  ),
38540
39087
  replacements: import_zod12.z.array(
38541
39088
  import_zod12.z.object({
38542
- from: import_zod12.z.string(),
38543
- to: import_zod12.z.string()
39089
+ from: import_zod12.z.string().describe("The text or pattern identified to be replaced."),
39090
+ to: import_zod12.z.string().describe(
39091
+ "The corrected or formatted string to appear in the transcript."
39092
+ )
38544
39093
  })
38545
39094
  ).optional().describe(
38546
- "A list of replacements to apply to the transcript. Each replacement is a pair of strings, where the first string is the pattern to be replaced and the second string is the replacement text."
39095
+ 'An array of objects defining custom replacements. Each replacement contains a pair of strings: the text to find ("from:") and the text to replace it with ("to:").'
38547
39096
  )
38548
- }).optional().describe("Configuration for applying filtering to the transcription"),
39097
+ }).optional().describe("Configuration for applying filtering to the transcription."),
38549
39098
  speaker_diarization_config: import_zod12.z.object({
38550
39099
  prefer_current_speaker: import_zod12.z.boolean().optional().describe(
38551
39100
  'If true, the algorithm will prefer to stay with the current active speaker if it is a close enough match, even if other speakers may be closer. This is useful for cases where we can flip incorrectly between similar speakers during a single speaker section."'
@@ -38556,6 +39105,19 @@ var getJobsJobidResponse = import_zod12.z.object({
38556
39105
  getJobsJobidResponseJobConfigTranscriptionConfigSpeakerDiarizationConfigSpeakerSensitivityMax
38557
39106
  ).optional().describe(
38558
39107
  "Controls how sensitive the algorithm is in terms of keeping similar speakers separate, as opposed to combining them into a single speaker. Higher values will typically lead to more speakers, as the degree of difference between speakers in order to allow them to remain distinct will be lower. A lower value for this parameter will conversely guide the algorithm towards being less sensitive in terms of retaining similar speakers, and as such may lead to fewer speakers overall. The default is 0.5."
39108
+ ),
39109
+ get_speakers: import_zod12.z.boolean().optional().describe(
39110
+ "If true, speaker identifiers will be returned at the end of transcript."
39111
+ ),
39112
+ speakers: import_zod12.z.array(
39113
+ import_zod12.z.object({
39114
+ label: import_zod12.z.string().min(1).describe(
39115
+ "Speaker label, which must not match the format used internally (e.g. S1, S2, etc)"
39116
+ ),
39117
+ speaker_identifiers: import_zod12.z.array(import_zod12.z.string().describe("Speaker identifiers.")).min(1)
39118
+ })
39119
+ ).optional().describe(
39120
+ "Use this option to provide speaker labels linked to their speaker identifiers. When passed, the transcription system will tag spoken words in the transcript with the provided speaker labels whenever any of the specified speakers is detected in the audio. A maximum of 50 speakers identifiers across all speakers can be provided."
38559
39121
  )
38560
39122
  }).optional().describe("Configuration for speaker diarization")
38561
39123
  }).optional(),
@@ -38611,10 +39173,14 @@ var getJobsJobidResponse = import_zod12.z.object({
38611
39173
  default_language: import_zod12.z.string().optional()
38612
39174
  }).optional(),
38613
39175
  summarization_config: import_zod12.z.object({
38614
- content_type: import_zod12.z.enum(["auto", "informative", "conversational"]).optional(),
38615
- summary_length: import_zod12.z.enum(["brief", "detailed"]).optional(),
39176
+ content_type: import_zod12.z.enum(["auto", "informative", "conversational"]).default(getJobsJobidResponseJobConfigSummarizationConfigContentTypeDefault).describe(
39177
+ "Choose from three options:\n- `conversational` - Best suited for dialogues involving multiple participants, such as calls, meetings or discussions. It focuses on summarizing key points of the conversation.\n- `informative` - Recommended for more structured information delivered by one or more people, making it ideal for videos, podcasts, lectures, and presentations.\n- `auto` - Automatically selects the most appropriate content type based on an analysis of the transcript.\n"
39178
+ ),
39179
+ summary_length: import_zod12.z.enum(["brief", "detailed"]).default(getJobsJobidResponseJobConfigSummarizationConfigSummaryLengthDefault).describe(
39180
+ "Determines the depth of the summary:\n- `brief` - Provides a succinct summary, condensing the content into just a few sentences.\n- `detailed` - Provide a longer, structured summary. For _conversational_ content, it includes key topics and a summary of the entire conversation. For _informative_ content, it logically divides the audio into sections and provides a summary for each."
39181
+ ),
38616
39182
  summary_type: import_zod12.z.enum(["paragraphs", "bullets"]).optional()
38617
- }).optional(),
39183
+ }).optional().describe("Configuration options for summarization."),
38618
39184
  sentiment_analysis_config: import_zod12.z.object({}).optional(),
38619
39185
  topic_detection_config: import_zod12.z.object({
38620
39186
  topics: import_zod12.z.array(import_zod12.z.string()).optional()
@@ -38636,7 +39202,7 @@ var getJobsJobidResponse = import_zod12.z.object({
38636
39202
  "Optional list of errors that have occurred in user interaction, for example: audio could not be fetched or notification could not be sent."
38637
39203
  )
38638
39204
  }).describe(
38639
- "Document describing a job. JobConfig will be present in JobDetails returned for GET jobs/<id> request in SaaS and in Batch Appliance, but it will not be present in JobDetails returned as item in RetrieveJobsResponse in case of Batch Appliance."
39205
+ "Document describing a job. JobConfig will be present in JobDetails returned for GET jobs/{id} request in SaaS and in Batch Appliance, but it will not be present in JobDetails returned as item in RetrieveJobsResponse in case of Batch Appliance."
38640
39206
  )
38641
39207
  });
38642
39208
  var deleteJobsJobidParams = import_zod12.z.object({
@@ -38652,9 +39218,13 @@ var deleteJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesSensi
38652
39218
  var deleteJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesSensitivityMax = 1;
38653
39219
  var deleteJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesPermittedMarksItemRegExp = /^(.|all)$/;
38654
39220
  var deleteJobsJobidResponseJobConfigTranscriptionConfigChannelDiarizationLabelsItemRegExp = /^[A-Za-z0-9._]+$/;
39221
+ var deleteJobsJobidResponseJobConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMin = 0;
39222
+ var deleteJobsJobidResponseJobConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMax = 100;
38655
39223
  var deleteJobsJobidResponseJobConfigTranscriptionConfigSpeakerDiarizationConfigSpeakerSensitivityMin = 0;
38656
39224
  var deleteJobsJobidResponseJobConfigTranscriptionConfigSpeakerDiarizationConfigSpeakerSensitivityMax = 1;
38657
39225
  var deleteJobsJobidResponseJobConfigTranslationConfigTargetLanguagesMax = 5;
39226
+ var deleteJobsJobidResponseJobConfigSummarizationConfigContentTypeDefault = "auto";
39227
+ var deleteJobsJobidResponseJobConfigSummarizationConfigSummaryLengthDefault = "brief";
38658
39228
  var deleteJobsJobidResponse = import_zod12.z.object({
38659
39229
  job: import_zod12.z.object({
38660
39230
  created_at: import_zod12.z.string().datetime({}).describe("The UTC date time the job was created."),
@@ -38731,19 +39301,30 @@ var deleteJobsJobidResponse = import_zod12.z.object({
38731
39301
  max_delay_mode: import_zod12.z.enum(["fixed", "flexible"]).optional().describe(
38732
39302
  "Whether or not to enable flexible endpointing and allow the entity to continue to be spoken."
38733
39303
  ),
39304
+ audio_filtering_config: import_zod12.z.object({
39305
+ volume_threshold: import_zod12.z.number().min(
39306
+ deleteJobsJobidResponseJobConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMin
39307
+ ).max(
39308
+ deleteJobsJobidResponseJobConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMax
39309
+ ).optional().describe(
39310
+ "Controls the lower limit of audio volume at which speech and audio events will be transcribed. If the volume limit is very low, then most sound will be passed to the speech recognition engine. Higher numbers will cut out increasing amounts of sound."
39311
+ )
39312
+ }).optional().describe("Configuration for limiting the transcription of quiet audio."),
38734
39313
  transcript_filtering_config: import_zod12.z.object({
38735
39314
  remove_disfluencies: import_zod12.z.boolean().optional().describe(
38736
- "If true, words that are identified as disfluencies will be removed from the transcript. If false (default), they are tagged in the transcript as 'disfluency'."
39315
+ "If true, words identified as disfluencies (e.g., 'um', 'uh') will be removed from the transcript. If false (default), they are tagged in the transcript as 'disfluency'."
38737
39316
  ),
38738
39317
  replacements: import_zod12.z.array(
38739
39318
  import_zod12.z.object({
38740
- from: import_zod12.z.string(),
38741
- to: import_zod12.z.string()
39319
+ from: import_zod12.z.string().describe("The text or pattern identified to be replaced."),
39320
+ to: import_zod12.z.string().describe(
39321
+ "The corrected or formatted string to appear in the transcript."
39322
+ )
38742
39323
  })
38743
39324
  ).optional().describe(
38744
- "A list of replacements to apply to the transcript. Each replacement is a pair of strings, where the first string is the pattern to be replaced and the second string is the replacement text."
39325
+ 'An array of objects defining custom replacements. Each replacement contains a pair of strings: the text to find ("from:") and the text to replace it with ("to:").'
38745
39326
  )
38746
- }).optional().describe("Configuration for applying filtering to the transcription"),
39327
+ }).optional().describe("Configuration for applying filtering to the transcription."),
38747
39328
  speaker_diarization_config: import_zod12.z.object({
38748
39329
  prefer_current_speaker: import_zod12.z.boolean().optional().describe(
38749
39330
  'If true, the algorithm will prefer to stay with the current active speaker if it is a close enough match, even if other speakers may be closer. This is useful for cases where we can flip incorrectly between similar speakers during a single speaker section."'
@@ -38754,6 +39335,19 @@ var deleteJobsJobidResponse = import_zod12.z.object({
38754
39335
  deleteJobsJobidResponseJobConfigTranscriptionConfigSpeakerDiarizationConfigSpeakerSensitivityMax
38755
39336
  ).optional().describe(
38756
39337
  "Controls how sensitive the algorithm is in terms of keeping similar speakers separate, as opposed to combining them into a single speaker. Higher values will typically lead to more speakers, as the degree of difference between speakers in order to allow them to remain distinct will be lower. A lower value for this parameter will conversely guide the algorithm towards being less sensitive in terms of retaining similar speakers, and as such may lead to fewer speakers overall. The default is 0.5."
39338
+ ),
39339
+ get_speakers: import_zod12.z.boolean().optional().describe(
39340
+ "If true, speaker identifiers will be returned at the end of transcript."
39341
+ ),
39342
+ speakers: import_zod12.z.array(
39343
+ import_zod12.z.object({
39344
+ label: import_zod12.z.string().min(1).describe(
39345
+ "Speaker label, which must not match the format used internally (e.g. S1, S2, etc)"
39346
+ ),
39347
+ speaker_identifiers: import_zod12.z.array(import_zod12.z.string().describe("Speaker identifiers.")).min(1)
39348
+ })
39349
+ ).optional().describe(
39350
+ "Use this option to provide speaker labels linked to their speaker identifiers. When passed, the transcription system will tag spoken words in the transcript with the provided speaker labels whenever any of the specified speakers is detected in the audio. A maximum of 50 speakers identifiers across all speakers can be provided."
38757
39351
  )
38758
39352
  }).optional().describe("Configuration for speaker diarization")
38759
39353
  }).optional(),
@@ -38809,10 +39403,14 @@ var deleteJobsJobidResponse = import_zod12.z.object({
38809
39403
  default_language: import_zod12.z.string().optional()
38810
39404
  }).optional(),
38811
39405
  summarization_config: import_zod12.z.object({
38812
- content_type: import_zod12.z.enum(["auto", "informative", "conversational"]).optional(),
38813
- summary_length: import_zod12.z.enum(["brief", "detailed"]).optional(),
39406
+ content_type: import_zod12.z.enum(["auto", "informative", "conversational"]).default(deleteJobsJobidResponseJobConfigSummarizationConfigContentTypeDefault).describe(
39407
+ "Choose from three options:\n- `conversational` - Best suited for dialogues involving multiple participants, such as calls, meetings or discussions. It focuses on summarizing key points of the conversation.\n- `informative` - Recommended for more structured information delivered by one or more people, making it ideal for videos, podcasts, lectures, and presentations.\n- `auto` - Automatically selects the most appropriate content type based on an analysis of the transcript.\n"
39408
+ ),
39409
+ summary_length: import_zod12.z.enum(["brief", "detailed"]).default(deleteJobsJobidResponseJobConfigSummarizationConfigSummaryLengthDefault).describe(
39410
+ "Determines the depth of the summary:\n- `brief` - Provides a succinct summary, condensing the content into just a few sentences.\n- `detailed` - Provide a longer, structured summary. For _conversational_ content, it includes key topics and a summary of the entire conversation. For _informative_ content, it logically divides the audio into sections and provides a summary for each."
39411
+ ),
38814
39412
  summary_type: import_zod12.z.enum(["paragraphs", "bullets"]).optional()
38815
- }).optional(),
39413
+ }).optional().describe("Configuration options for summarization."),
38816
39414
  sentiment_analysis_config: import_zod12.z.object({}).optional(),
38817
39415
  topic_detection_config: import_zod12.z.object({
38818
39416
  topics: import_zod12.z.array(import_zod12.z.string()).optional()
@@ -38834,7 +39432,7 @@ var deleteJobsJobidResponse = import_zod12.z.object({
38834
39432
  "Optional list of errors that have occurred in user interaction, for example: audio could not be fetched or notification could not be sent."
38835
39433
  )
38836
39434
  }).describe(
38837
- "Document describing a job. JobConfig will be present in JobDetails returned for GET jobs/<id> request in SaaS and in Batch Appliance, but it will not be present in JobDetails returned as item in RetrieveJobsResponse in case of Batch Appliance."
39435
+ "Document describing a job. JobConfig will be present in JobDetails returned for GET jobs/{id} request in SaaS and in Batch Appliance, but it will not be present in JobDetails returned as item in RetrieveJobsResponse in case of Batch Appliance."
38838
39436
  )
38839
39437
  });
38840
39438
  var getJobsJobidDataParams = import_zod12.z.object({
@@ -38856,6 +39454,8 @@ var getJobsJobidTranscriptResponseMetadataTranscriptionConfigPunctuationOverride
38856
39454
  var getJobsJobidTranscriptResponseMetadataTranscriptionConfigPunctuationOverridesSensitivityMax = 1;
38857
39455
  var getJobsJobidTranscriptResponseMetadataTranscriptionConfigPunctuationOverridesPermittedMarksItemRegExp = /^(.|all)$/;
38858
39456
  var getJobsJobidTranscriptResponseMetadataTranscriptionConfigChannelDiarizationLabelsItemRegExp = /^[A-Za-z0-9._]+$/;
39457
+ var getJobsJobidTranscriptResponseMetadataTranscriptionConfigAudioFilteringConfigVolumeThresholdMin = 0;
39458
+ var getJobsJobidTranscriptResponseMetadataTranscriptionConfigAudioFilteringConfigVolumeThresholdMax = 100;
38859
39459
  var getJobsJobidTranscriptResponseMetadataTranscriptionConfigSpeakerDiarizationConfigSpeakerSensitivityMin = 0;
38860
39460
  var getJobsJobidTranscriptResponseMetadataTranscriptionConfigSpeakerDiarizationConfigSpeakerSensitivityMax = 1;
38861
39461
  var getJobsJobidTranscriptResponseResultsItemVolumeMin = 0;
@@ -38927,19 +39527,28 @@ var getJobsJobidTranscriptResponse = import_zod12.z.object({
38927
39527
  max_delay_mode: import_zod12.z.enum(["fixed", "flexible"]).optional().describe(
38928
39528
  "Whether or not to enable flexible endpointing and allow the entity to continue to be spoken."
38929
39529
  ),
39530
+ audio_filtering_config: import_zod12.z.object({
39531
+ volume_threshold: import_zod12.z.number().min(
39532
+ getJobsJobidTranscriptResponseMetadataTranscriptionConfigAudioFilteringConfigVolumeThresholdMin
39533
+ ).max(
39534
+ getJobsJobidTranscriptResponseMetadataTranscriptionConfigAudioFilteringConfigVolumeThresholdMax
39535
+ ).optional().describe(
39536
+ "Controls the lower limit of audio volume at which speech and audio events will be transcribed. If the volume limit is very low, then most sound will be passed to the speech recognition engine. Higher numbers will cut out increasing amounts of sound."
39537
+ )
39538
+ }).optional().describe("Configuration for limiting the transcription of quiet audio."),
38930
39539
  transcript_filtering_config: import_zod12.z.object({
38931
39540
  remove_disfluencies: import_zod12.z.boolean().optional().describe(
38932
- "If true, words that are identified as disfluencies will be removed from the transcript. If false (default), they are tagged in the transcript as 'disfluency'."
39541
+ "If true, words identified as disfluencies (e.g., 'um', 'uh') will be removed from the transcript. If false (default), they are tagged in the transcript as 'disfluency'."
38933
39542
  ),
38934
39543
  replacements: import_zod12.z.array(
38935
39544
  import_zod12.z.object({
38936
- from: import_zod12.z.string(),
38937
- to: import_zod12.z.string()
39545
+ from: import_zod12.z.string().describe("The text or pattern identified to be replaced."),
39546
+ to: import_zod12.z.string().describe("The corrected or formatted string to appear in the transcript.")
38938
39547
  })
38939
39548
  ).optional().describe(
38940
- "A list of replacements to apply to the transcript. Each replacement is a pair of strings, where the first string is the pattern to be replaced and the second string is the replacement text."
39549
+ 'An array of objects defining custom replacements. Each replacement contains a pair of strings: the text to find ("from:") and the text to replace it with ("to:").'
38941
39550
  )
38942
- }).optional().describe("Configuration for applying filtering to the transcription"),
39551
+ }).optional().describe("Configuration for applying filtering to the transcription."),
38943
39552
  speaker_diarization_config: import_zod12.z.object({
38944
39553
  prefer_current_speaker: import_zod12.z.boolean().optional().describe(
38945
39554
  'If true, the algorithm will prefer to stay with the current active speaker if it is a close enough match, even if other speakers may be closer. This is useful for cases where we can flip incorrectly between similar speakers during a single speaker section."'
@@ -38950,9 +39559,23 @@ var getJobsJobidTranscriptResponse = import_zod12.z.object({
38950
39559
  getJobsJobidTranscriptResponseMetadataTranscriptionConfigSpeakerDiarizationConfigSpeakerSensitivityMax
38951
39560
  ).optional().describe(
38952
39561
  "Controls how sensitive the algorithm is in terms of keeping similar speakers separate, as opposed to combining them into a single speaker. Higher values will typically lead to more speakers, as the degree of difference between speakers in order to allow them to remain distinct will be lower. A lower value for this parameter will conversely guide the algorithm towards being less sensitive in terms of retaining similar speakers, and as such may lead to fewer speakers overall. The default is 0.5."
39562
+ ),
39563
+ get_speakers: import_zod12.z.boolean().optional().describe(
39564
+ "If true, speaker identifiers will be returned at the end of transcript."
39565
+ ),
39566
+ speakers: import_zod12.z.array(
39567
+ import_zod12.z.object({
39568
+ label: import_zod12.z.string().min(1).describe(
39569
+ "Speaker label, which must not match the format used internally (e.g. S1, S2, etc)"
39570
+ ),
39571
+ speaker_identifiers: import_zod12.z.array(import_zod12.z.string().describe("Speaker identifiers.")).min(1)
39572
+ })
39573
+ ).optional().describe(
39574
+ "Use this option to provide speaker labels linked to their speaker identifiers. When passed, the transcription system will tag spoken words in the transcript with the provided speaker labels whenever any of the specified speakers is detected in the audio. A maximum of 50 speakers identifiers across all speakers can be provided."
38953
39575
  )
38954
39576
  }).optional().describe("Configuration for speaker diarization")
38955
39577
  }).optional(),
39578
+ orchestrator_version: import_zod12.z.string().optional().describe("The engine version used to generate transcription output."),
38956
39579
  translation_errors: import_zod12.z.array(
38957
39580
  import_zod12.z.object({
38958
39581
  type: import_zod12.z.enum(["translation_failed", "unsupported_translation_pair"]).optional(),
@@ -39030,10 +39653,7 @@ var getJobsJobidTranscriptResponse = import_zod12.z.object({
39030
39653
  "OTHER"
39031
39654
  ]).optional(),
39032
39655
  message: import_zod12.z.string().optional()
39033
- }).optional(),
39034
- orchestrator_version: import_zod12.z.string().optional().describe(
39035
- "Orchestrator version in PEP 440 Format or set to 'version_not_found' as default."
39036
- )
39656
+ }).optional()
39037
39657
  }).describe(
39038
39658
  "Summary information about the output from an ASR job, comprising the job type and configuration parameters used when generating the output."
39039
39659
  ),
@@ -39116,6 +39736,12 @@ var getJobsJobidTranscriptResponse = import_zod12.z.object({
39116
39736
  "An ASR job output item. The primary item types are `word` and `punctuation`. Other item types may be present, for example to provide semantic information of different forms."
39117
39737
  )
39118
39738
  ),
39739
+ speakers: import_zod12.z.array(
39740
+ import_zod12.z.object({
39741
+ label: import_zod12.z.string().min(1).describe("Speaker label."),
39742
+ speaker_identifiers: import_zod12.z.array(import_zod12.z.string().describe("Speaker identifiers.")).min(1)
39743
+ })
39744
+ ).optional().describe("List of unique speaker identifiers detected in the transcript."),
39119
39745
  translations: import_zod12.z.record(
39120
39746
  import_zod12.z.string(),
39121
39747
  import_zod12.z.array(
@@ -39137,13 +39763,23 @@ var getJobsJobidTranscriptResponse = import_zod12.z.object({
39137
39763
  sentiment_analysis: import_zod12.z.object({
39138
39764
  segments: import_zod12.z.array(
39139
39765
  import_zod12.z.object({
39140
- text: import_zod12.z.string().optional(),
39141
- start_time: import_zod12.z.number().optional(),
39142
- end_time: import_zod12.z.number().optional(),
39143
- sentiment: import_zod12.z.string().optional(),
39144
- speaker: import_zod12.z.string().optional(),
39145
- channel: import_zod12.z.string().optional(),
39146
- confidence: import_zod12.z.number().optional()
39766
+ text: import_zod12.z.string().optional().describe("Represents the transcript of the analysed segment"),
39767
+ sentiment: import_zod12.z.string().optional().describe(
39768
+ "The assigned sentiment to the segment, which can be positive, neutral or negative"
39769
+ ),
39770
+ start_time: import_zod12.z.number().optional().describe(
39771
+ "The timestamp corresponding to the beginning of the transcription segment"
39772
+ ),
39773
+ end_time: import_zod12.z.number().optional().describe(
39774
+ "The timestamp corresponding to the end of the transcription segment"
39775
+ ),
39776
+ speaker: import_zod12.z.string().optional().describe(
39777
+ "The speaker label for the segment, if speaker diarization is enabled"
39778
+ ),
39779
+ channel: import_zod12.z.string().optional().describe(
39780
+ "The channel label for the segment, if channel diarization is enabled"
39781
+ ),
39782
+ confidence: import_zod12.z.number().optional().describe("A confidence score in the range of 0-1")
39147
39783
  }).describe("Represents a segment of text and its associated sentiment.")
39148
39784
  ).optional().describe(
39149
39785
  "An array of objects that represent a segment of text and its associated sentiment."
@@ -39202,10 +39838,10 @@ var getJobsJobidTranscriptResponse = import_zod12.z.object({
39202
39838
  }).optional().describe("Main object that holds topic detection results."),
39203
39839
  chapters: import_zod12.z.array(
39204
39840
  import_zod12.z.object({
39205
- title: import_zod12.z.string().optional(),
39206
- summary: import_zod12.z.string().optional(),
39207
- start_time: import_zod12.z.number().optional(),
39208
- end_time: import_zod12.z.number().optional()
39841
+ title: import_zod12.z.string().optional().describe("The auto-generated title for the chapter"),
39842
+ summary: import_zod12.z.string().optional().describe("An auto-generated paragraph-style, short summary of the chapter"),
39843
+ start_time: import_zod12.z.number().optional().describe("The start time of the chapter in the audio file"),
39844
+ end_time: import_zod12.z.number().optional().describe("The end time of the chapter in the audio file")
39209
39845
  })
39210
39846
  ).optional().describe("An array of objects that represent summarized chapters of the transcript"),
39211
39847
  audio_events: import_zod12.z.array(
@@ -39250,6 +39886,18 @@ var getJobsJobidLogParams = import_zod12.z.object({
39250
39886
  jobid: import_zod12.z.string().describe("ID of the job.")
39251
39887
  });
39252
39888
  var getJobsJobidLogResponse = import_zod12.z.instanceof(File);
39889
+ var getJobsJobidObjectUrlsParams = import_zod12.z.object({
39890
+ jobid: import_zod12.z.string().describe("ID of the job.")
39891
+ });
39892
+ var getJobsJobidObjectUrlsQueryParams = import_zod12.z.object({
39893
+ ttl: import_zod12.z.number().describe("Time to live in seconds for the signed URLs"),
39894
+ url_for: import_zod12.z.array(import_zod12.z.enum(["data", "audio_mp3", "transcript"]))
39895
+ });
39896
+ var getJobsJobidObjectUrlsResponse = import_zod12.z.object({
39897
+ data: import_zod12.z.string().optional(),
39898
+ audio_mp3: import_zod12.z.string().optional(),
39899
+ transcript: import_zod12.z.string().optional()
39900
+ });
39253
39901
  var getUsageQueryParams = import_zod12.z.object({
39254
39902
  since: import_zod12.z.string().date().optional().describe(
39255
39903
  "Include usage after the given date (inclusive). This is a [ISO-8601](https://en.wikipedia.org/wiki/ISO_8601) calendar date format: `YYYY-MM-DD`."
@@ -39383,7 +40031,7 @@ var speechToTextBodyKeytermsDefault = [];
39383
40031
  var speechToTextBody = import_zod13.z.object({
39384
40032
  model_id: import_zod13.z.enum(["scribe_v1", "scribe_v2"]).describe("The ID of the model to use for transcription."),
39385
40033
  file: import_zod13.z.instanceof(File).or(import_zod13.z.null()).optional().describe(
39386
- "The file to transcribe (100ms minimum audio length). All major audio and video formats are supported. Exactly one of the file or cloud_storage_url parameters must be provided. The file size must be less than 3.0GB."
40034
+ "The file to transcribe (100ms minimum audio length). All major audio and video formats are supported. Exactly one of the file or cloud_storage_url parameters must be provided. The file size must be less than 5.0GB."
39387
40035
  ),
39388
40036
  language_code: import_zod13.z.string().or(import_zod13.z.null()).optional().describe(
39389
40037
  "An ISO-639-1 or ISO-639-3 language_code corresponding to the language of the audio file. Can sometimes improve transcription performance if known beforehand. Defaults to null, in this case the language is predicted automatically."
@@ -39461,7 +40109,7 @@ var speechToTextBody = import_zod13.z.object({
39461
40109
  "The format of input audio. Options are 'pcm_s16le_16' or 'other' For `pcm_s16le_16`, the input audio must be 16-bit PCM at a 16kHz sample rate, single channel (mono), and little-endian byte order. Latency will be lower than with passing an encoded waveform."
39462
40110
  ),
39463
40111
  cloud_storage_url: import_zod13.z.string().or(import_zod13.z.null()).optional().describe(
39464
- "The HTTPS URL of the file to transcribe. Exactly one of the file or cloud_storage_url parameters must be provided. The file must be accessible via HTTPS and the file size must be less than 2GB. Any valid HTTPS URL is accepted, including URLs from cloud storage providers (AWS S3, Google Cloud Storage, Cloudflare R2, etc.), CDNs, or any other HTTPS source. URLs can be pre-signed or include authentication tokens in query parameters."
40112
+ "[Deprecated] This parameter is deprecated and will be removed in the future. Use 'source_url' instead.The HTTPS URL of the file to transcribe. Exactly one of the file or cloud_storage_url parameters must be provided. The file must be accessible via HTTPS and the file size must be less than 2GB. Any valid HTTPS URL is accepted, including URLs from cloud storage providers (AWS S3, Google Cloud Storage, Cloudflare R2, etc.), CDNs, or any other HTTPS source. URLs can be pre-signed or include authentication tokens in query parameters."
39465
40113
  ),
39466
40114
  source_url: import_zod13.z.string().or(import_zod13.z.null()).optional().describe(
39467
40115
  "The URL of an audio or video file to transcribe. Supports hosted video or audio files, YouTube video URLs, TikTok video URLs, and other video hosting services."
@@ -39500,7 +40148,7 @@ var speechToTextBody = import_zod13.z.object({
39500
40148
  "How to format redacted entities. 'redacted' replaces with {REDACTED}, 'entity_type' replaces with {ENTITY_TYPE}, 'enumerated_entity_type' replaces with {ENTITY_TYPE_N} where N enumerates each occurrence. Only used when entity_redaction is set."
39501
40149
  ),
39502
40150
  keyterms: import_zod13.z.array(import_zod13.z.string()).default(speechToTextBodyKeytermsDefault).describe(
39503
- 'A list of keyterms to bias the transcription towards. The keyterms are words or phrases you want the model to recognise more accurately. The number of keyterms cannot exceed 1000. The length of each keyterm must be less than 50 characters. Keyterms can contain at most 5 words (after normalisation). For example ["hello", "world", "technical term"]. Usage of this parameter will incur an additional 20% surcharge on the base transcription cost. When more than 100 keyterms are provided, a minimum billable duration of 20 seconds applies per request.'
40151
+ 'A list of keyterms to bias the transcription towards. The keyterms are words or phrases you want the model to recognise more accurately. The number of keyterms cannot exceed 1000. The length of each keyterm must be less than 50 characters. Keyterms can contain at most 5 words (after normalisation). For example ["hello", "world", "technical term"]. The following characters are not supported: `<`, `>`, `{`, `}`, `[`, `]`, `\\`. Usage of this parameter will incur an additional 20% surcharge on the base transcription cost. When more than 100 keyterms are provided, a minimum billable duration of 20 seconds applies per request.'
39504
40152
  )
39505
40153
  });
39506
40154
  var speechToTextResponse = import_zod13.z.object({
@@ -39866,6 +40514,7 @@ var deleteTranscriptByIdResponse = import_zod13.z.any();
39866
40514
  SonioxModels,
39867
40515
  SonioxRealtimeModel,
39868
40516
  SonioxRegion,
40517
+ SonioxSDK,
39869
40518
  SonioxStreamingSchema,
39870
40519
  SonioxStreamingTypes,
39871
40520
  SonioxStreamingUpdateSchema,