voice-router-dev 0.9.4 → 0.9.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs CHANGED
@@ -1102,7 +1102,6 @@ var AzureLocales = [
1102
1102
  { code: "ar-YE", name: "Arabic (Yemen)" },
1103
1103
  { code: "as-IN", name: "Assamese (India)" },
1104
1104
  { code: "az-AZ", name: "Azerbaijani (Azerbaijan)" },
1105
- { code: "be-BY", name: "Belarusian (Belarus)" },
1106
1105
  { code: "bg-BG", name: "Bulgarian (Bulgaria)" },
1107
1106
  { code: "bn-BD", name: "Bengali (Bangladesh)" },
1108
1107
  { code: "bn-IN", name: "Bengali (India)" },
@@ -1183,7 +1182,6 @@ var AzureLocales = [
1183
1182
  { code: "lo-LA", name: "Lao (Latin)" },
1184
1183
  { code: "lt-LT", name: "Lithuanian (Lithuania)" },
1185
1184
  { code: "lv-LV", name: "Latvian (Latvia)" },
1186
- { code: "mi-NZ", name: "Maori (New Zealand)" },
1187
1185
  { code: "mk-MK", name: "Macedonian (North Macedonia)" },
1188
1186
  { code: "ml-IN", name: "Malayalam (India)" },
1189
1187
  { code: "mn-MN", name: "Mongolian (Mongolia)" },
@@ -1259,7 +1257,6 @@ var AzureLocaleCodes = [
1259
1257
  "ar-YE",
1260
1258
  "as-IN",
1261
1259
  "az-AZ",
1262
- "be-BY",
1263
1260
  "bg-BG",
1264
1261
  "bn-BD",
1265
1262
  "bn-IN",
@@ -1340,7 +1337,6 @@ var AzureLocaleCodes = [
1340
1337
  "lo-LA",
1341
1338
  "lt-LT",
1342
1339
  "lv-LV",
1343
- "mi-NZ",
1344
1340
  "mk-MK",
1345
1341
  "ml-IN",
1346
1342
  "mn-MN",
@@ -1416,7 +1412,6 @@ var AzureLocaleLabels = {
1416
1412
  "ar-YE": "Arabic (Yemen)",
1417
1413
  "as-IN": "Assamese (India)",
1418
1414
  "az-AZ": "Azerbaijani (Azerbaijan)",
1419
- "be-BY": "Belarusian (Belarus)",
1420
1415
  "bg-BG": "Bulgarian (Bulgaria)",
1421
1416
  "bn-BD": "Bengali (Bangladesh)",
1422
1417
  "bn-IN": "Bengali (India)",
@@ -1497,7 +1492,6 @@ var AzureLocaleLabels = {
1497
1492
  "lo-LA": "Lao (Latin)",
1498
1493
  "lt-LT": "Lithuanian (Lithuania)",
1499
1494
  "lv-LV": "Latvian (Latvia)",
1500
- "mi-NZ": "Maori (New Zealand)",
1501
1495
  "mk-MK": "Macedonian (North Macedonia)",
1502
1496
  "ml-IN": "Malayalam (India)",
1503
1497
  "mn-MN": "Mongolian (Mongolia)",
@@ -1573,7 +1567,6 @@ var AzureLocale = {
1573
1567
  "ar-YE": "ar-YE",
1574
1568
  "as-IN": "as-IN",
1575
1569
  "az-AZ": "az-AZ",
1576
- "be-BY": "be-BY",
1577
1570
  "bg-BG": "bg-BG",
1578
1571
  "bn-BD": "bn-BD",
1579
1572
  "bn-IN": "bn-IN",
@@ -1654,7 +1647,6 @@ var AzureLocale = {
1654
1647
  "lo-LA": "lo-LA",
1655
1648
  "lt-LT": "lt-LT",
1656
1649
  "lv-LV": "lv-LV",
1657
- "mi-NZ": "mi-NZ",
1658
1650
  "mk-MK": "mk-MK",
1659
1651
  "ml-IN": "ml-IN",
1660
1652
  "mn-MN": "mn-MN",
@@ -1745,8 +1737,6 @@ var ElevenLabsLanguages = [
1745
1737
  { code: "hr", name: "Croatian" },
1746
1738
  { code: "bg", name: "Bulgarian" },
1747
1739
  { code: "lt", name: "Lithuanian" },
1748
- { code: "la", name: "Latin" },
1749
- { code: "mi", name: "Maori" },
1750
1740
  { code: "ml", name: "Malayalam" },
1751
1741
  { code: "cy", name: "Welsh" },
1752
1742
  { code: "sk", name: "Slovak" },
@@ -1760,20 +1750,16 @@ var ElevenLabsLanguages = [
1760
1750
  { code: "kn", name: "Kannada" },
1761
1751
  { code: "et", name: "Estonian" },
1762
1752
  { code: "mk", name: "Macedonian" },
1763
- { code: "br", name: "Breton" },
1764
- { code: "eu", name: "Basque" },
1765
1753
  { code: "is", name: "Icelandic" },
1766
1754
  { code: "hy", name: "Armenian" },
1767
1755
  { code: "ne", name: "Nepali" },
1768
1756
  { code: "mn", name: "Mongolian" },
1769
1757
  { code: "bs", name: "Bosnian" },
1770
1758
  { code: "kk", name: "Kazakh" },
1771
- { code: "sq", name: "Albanian" },
1772
1759
  { code: "sw", name: "Swahili" },
1773
1760
  { code: "gl", name: "Galician" },
1774
1761
  { code: "mr", name: "Marathi" },
1775
1762
  { code: "pa", name: "Punjabi" },
1776
- { code: "si", name: "Sinhala" },
1777
1763
  { code: "km", name: "Khmer" },
1778
1764
  { code: "sn", name: "Shona" },
1779
1765
  { code: "yo", name: "Yoruba" },
@@ -1786,29 +1772,16 @@ var ElevenLabsLanguages = [
1786
1772
  { code: "sd", name: "Sindhi" },
1787
1773
  { code: "gu", name: "Gujarati" },
1788
1774
  { code: "am", name: "Amharic" },
1789
- { code: "yi", name: "Yiddish" },
1790
1775
  { code: "lo", name: "Lao" },
1791
1776
  { code: "uz", name: "Uzbek" },
1792
- { code: "fo", name: "Faroese" },
1793
- { code: "ht", name: "Haitian Creole" },
1794
1777
  { code: "ps", name: "Pashto" },
1795
- { code: "tk", name: "Turkmen" },
1796
- { code: "nn", name: "Norwegian Nynorsk" },
1797
1778
  { code: "mt", name: "Maltese" },
1798
- { code: "sa", name: "Sanskrit" },
1799
1779
  { code: "lb", name: "Luxembourgish" },
1800
1780
  { code: "my", name: "Burmese" },
1801
- { code: "bo", name: "Tibetan" },
1802
- { code: "tl", name: "Tagalog" },
1803
- { code: "mg", name: "Malagasy" },
1804
1781
  { code: "as", name: "Assamese" },
1805
- { code: "tt", name: "Tatar" },
1806
- { code: "haw", name: "Hawaiian" },
1807
1782
  { code: "ln", name: "Lingala" },
1808
1783
  { code: "ha", name: "Hausa" },
1809
- { code: "ba", name: "Bashkir" },
1810
- { code: "jw", name: "Javanese" },
1811
- { code: "su", name: "Sundanese" }
1784
+ { code: "jw", name: "Javanese" }
1812
1785
  ];
1813
1786
  var ElevenLabsLanguageCodes = [
1814
1787
  "en",
@@ -1846,8 +1819,6 @@ var ElevenLabsLanguageCodes = [
1846
1819
  "hr",
1847
1820
  "bg",
1848
1821
  "lt",
1849
- "la",
1850
- "mi",
1851
1822
  "ml",
1852
1823
  "cy",
1853
1824
  "sk",
@@ -1861,20 +1832,16 @@ var ElevenLabsLanguageCodes = [
1861
1832
  "kn",
1862
1833
  "et",
1863
1834
  "mk",
1864
- "br",
1865
- "eu",
1866
1835
  "is",
1867
1836
  "hy",
1868
1837
  "ne",
1869
1838
  "mn",
1870
1839
  "bs",
1871
1840
  "kk",
1872
- "sq",
1873
1841
  "sw",
1874
1842
  "gl",
1875
1843
  "mr",
1876
1844
  "pa",
1877
- "si",
1878
1845
  "km",
1879
1846
  "sn",
1880
1847
  "yo",
@@ -1887,29 +1854,16 @@ var ElevenLabsLanguageCodes = [
1887
1854
  "sd",
1888
1855
  "gu",
1889
1856
  "am",
1890
- "yi",
1891
1857
  "lo",
1892
1858
  "uz",
1893
- "fo",
1894
- "ht",
1895
1859
  "ps",
1896
- "tk",
1897
- "nn",
1898
1860
  "mt",
1899
- "sa",
1900
1861
  "lb",
1901
1862
  "my",
1902
- "bo",
1903
- "tl",
1904
- "mg",
1905
1863
  "as",
1906
- "tt",
1907
- "haw",
1908
1864
  "ln",
1909
1865
  "ha",
1910
- "ba",
1911
- "jw",
1912
- "su"
1866
+ "jw"
1913
1867
  ];
1914
1868
  var ElevenLabsLanguageLabels = {
1915
1869
  en: "English",
@@ -1947,8 +1901,6 @@ var ElevenLabsLanguageLabels = {
1947
1901
  hr: "Croatian",
1948
1902
  bg: "Bulgarian",
1949
1903
  lt: "Lithuanian",
1950
- la: "Latin",
1951
- mi: "Maori",
1952
1904
  ml: "Malayalam",
1953
1905
  cy: "Welsh",
1954
1906
  sk: "Slovak",
@@ -1962,20 +1914,16 @@ var ElevenLabsLanguageLabels = {
1962
1914
  kn: "Kannada",
1963
1915
  et: "Estonian",
1964
1916
  mk: "Macedonian",
1965
- br: "Breton",
1966
- eu: "Basque",
1967
1917
  is: "Icelandic",
1968
1918
  hy: "Armenian",
1969
1919
  ne: "Nepali",
1970
1920
  mn: "Mongolian",
1971
1921
  bs: "Bosnian",
1972
1922
  kk: "Kazakh",
1973
- sq: "Albanian",
1974
1923
  sw: "Swahili",
1975
1924
  gl: "Galician",
1976
1925
  mr: "Marathi",
1977
1926
  pa: "Punjabi",
1978
- si: "Sinhala",
1979
1927
  km: "Khmer",
1980
1928
  sn: "Shona",
1981
1929
  yo: "Yoruba",
@@ -1988,29 +1936,16 @@ var ElevenLabsLanguageLabels = {
1988
1936
  sd: "Sindhi",
1989
1937
  gu: "Gujarati",
1990
1938
  am: "Amharic",
1991
- yi: "Yiddish",
1992
1939
  lo: "Lao",
1993
1940
  uz: "Uzbek",
1994
- fo: "Faroese",
1995
- ht: "Haitian Creole",
1996
1941
  ps: "Pashto",
1997
- tk: "Turkmen",
1998
- nn: "Norwegian Nynorsk",
1999
1942
  mt: "Maltese",
2000
- sa: "Sanskrit",
2001
1943
  lb: "Luxembourgish",
2002
1944
  my: "Burmese",
2003
- bo: "Tibetan",
2004
- tl: "Tagalog",
2005
- mg: "Malagasy",
2006
1945
  as: "Assamese",
2007
- tt: "Tatar",
2008
- haw: "Hawaiian",
2009
1946
  ln: "Lingala",
2010
1947
  ha: "Hausa",
2011
- ba: "Bashkir",
2012
- jw: "Javanese",
2013
- su: "Sundanese"
1948
+ jw: "Javanese"
2014
1949
  };
2015
1950
 
2016
1951
  // src/generated/gladia/schema/streamingSupportedBitDepthEnum.ts
@@ -2515,6 +2450,7 @@ var OpenAITranscriptionModel = {
2515
2450
  "gpt-4o-mini-transcribe-2025-12-15": "gpt-4o-mini-transcribe-2025-12-15",
2516
2451
  "gpt-4o-transcribe": "gpt-4o-transcribe",
2517
2452
  "gpt-4o-transcribe-diarize": "gpt-4o-transcribe-diarize",
2453
+ "gpt-realtime-whisper": "gpt-realtime-whisper",
2518
2454
  "whisper-1": "whisper-1"
2519
2455
  };
2520
2456
  var OpenAIRealtimeModel = {
@@ -2530,6 +2466,7 @@ var OpenAIRealtimeModel = {
2530
2466
  "gpt-audio-mini-2025-12-15": "gpt-audio-mini-2025-12-15",
2531
2467
  "gpt-realtime": "gpt-realtime",
2532
2468
  "gpt-realtime-1.5": "gpt-realtime-1.5",
2469
+ "gpt-realtime-2": "gpt-realtime-2",
2533
2470
  "gpt-realtime-2025-08-28": "gpt-realtime-2025-08-28",
2534
2471
  "gpt-realtime-mini": "gpt-realtime-mini",
2535
2472
  "gpt-realtime-mini-2025-10-06": "gpt-realtime-mini-2025-10-06",
@@ -5346,12 +5283,20 @@ var EntityType = {
5346
5283
  email_address: "email_address",
5347
5284
  event: "event",
5348
5285
  filename: "filename",
5286
+ gender: "gender",
5349
5287
  gender_sexuality: "gender_sexuality",
5350
5288
  healthcare_number: "healthcare_number",
5351
5289
  injury: "injury",
5352
5290
  ip_address: "ip_address",
5353
5291
  language: "language",
5354
5292
  location: "location",
5293
+ location_address: "location_address",
5294
+ location_address_street: "location_address_street",
5295
+ location_city: "location_city",
5296
+ location_coordinate: "location_coordinate",
5297
+ location_country: "location_country",
5298
+ location_state: "location_state",
5299
+ location_zip: "location_zip",
5355
5300
  marital_status: "marital_status",
5356
5301
  medical_condition: "medical_condition",
5357
5302
  medical_process: "medical_process",
@@ -5360,6 +5305,7 @@ var EntityType = {
5360
5305
  number_sequence: "number_sequence",
5361
5306
  occupation: "occupation",
5362
5307
  organization: "organization",
5308
+ organization_medical_facility: "organization_medical_facility",
5363
5309
  passport_number: "passport_number",
5364
5310
  password: "password",
5365
5311
  person_age: "person_age",
@@ -5368,6 +5314,7 @@ var EntityType = {
5368
5314
  physical_attribute: "physical_attribute",
5369
5315
  political_affiliation: "political_affiliation",
5370
5316
  religion: "religion",
5317
+ sexuality: "sexuality",
5371
5318
  statistics: "statistics",
5372
5319
  time: "time",
5373
5320
  url: "url",
@@ -5394,12 +5341,20 @@ var PiiPolicy = {
5394
5341
  email_address: "email_address",
5395
5342
  event: "event",
5396
5343
  filename: "filename",
5344
+ gender: "gender",
5397
5345
  gender_sexuality: "gender_sexuality",
5398
5346
  healthcare_number: "healthcare_number",
5399
5347
  injury: "injury",
5400
5348
  ip_address: "ip_address",
5401
5349
  language: "language",
5402
5350
  location: "location",
5351
+ location_address: "location_address",
5352
+ location_address_street: "location_address_street",
5353
+ location_city: "location_city",
5354
+ location_coordinate: "location_coordinate",
5355
+ location_country: "location_country",
5356
+ location_state: "location_state",
5357
+ location_zip: "location_zip",
5403
5358
  marital_status: "marital_status",
5404
5359
  medical_condition: "medical_condition",
5405
5360
  medical_process: "medical_process",
@@ -5408,6 +5363,7 @@ var PiiPolicy = {
5408
5363
  number_sequence: "number_sequence",
5409
5364
  occupation: "occupation",
5410
5365
  organization: "organization",
5366
+ organization_medical_facility: "organization_medical_facility",
5411
5367
  passport_number: "passport_number",
5412
5368
  password: "password",
5413
5369
  person_age: "person_age",
@@ -5416,6 +5372,7 @@ var PiiPolicy = {
5416
5372
  physical_attribute: "physical_attribute",
5417
5373
  political_affiliation: "political_affiliation",
5418
5374
  religion: "religion",
5375
+ sexuality: "sexuality",
5419
5376
  statistics: "statistics",
5420
5377
  time: "time",
5421
5378
  url: "url",
@@ -5484,7 +5441,8 @@ var TranscriptOptionalParamsRedactPiiAudioOptionsOverrideAudioRedactionMethod =
5484
5441
 
5485
5442
  // src/generated/assemblyai/schema/transcriptOptionalParamsRemoveAudioTags.ts
5486
5443
  var TranscriptOptionalParamsRemoveAudioTags = {
5487
- all: "all"
5444
+ all: "all",
5445
+ speaker: "speaker"
5488
5446
  };
5489
5447
 
5490
5448
  // src/generated/assemblyai/schema/transcriptRedactPiiAudioOptionsOverrideAudioRedactionMethod.ts
@@ -5494,7 +5452,8 @@ var TranscriptRedactPiiAudioOptionsOverrideAudioRedactionMethod = {
5494
5452
 
5495
5453
  // src/generated/assemblyai/schema/transcriptRemoveAudioTags.ts
5496
5454
  var TranscriptRemoveAudioTags = {
5497
- all: "all"
5455
+ all: "all",
5456
+ speaker: "speaker"
5498
5457
  };
5499
5458
 
5500
5459
  // src/generated/assemblyai/api/assemblyAIAPI.ts
@@ -9386,15 +9345,18 @@ import axios9 from "axios";
9386
9345
  // src/generated/soniox/schema/index.ts
9387
9346
  var schema_exports4 = {};
9388
9347
  __export(schema_exports4, {
9348
+ TTSVoiceGender: () => TTSVoiceGender,
9389
9349
  TemporaryApiKeyUsageType: () => TemporaryApiKeyUsageType,
9390
9350
  TranscriptionMode: () => TranscriptionMode,
9391
9351
  TranscriptionStatus: () => TranscriptionStatus,
9392
- TranslationConfigType: () => TranslationConfigType
9352
+ TranslationConfigType: () => TranslationConfigType,
9353
+ UsageLogsSort: () => UsageLogsSort
9393
9354
  });
9394
9355
 
9395
9356
  // src/generated/soniox/schema/temporaryApiKeyUsageType.ts
9396
9357
  var TemporaryApiKeyUsageType = {
9397
- transcribe_websocket: "transcribe_websocket"
9358
+ transcribe_websocket: "transcribe_websocket",
9359
+ tts_rt: "tts_rt"
9398
9360
  };
9399
9361
 
9400
9362
  // src/generated/soniox/schema/transcriptionMode.ts
@@ -9409,6 +9371,19 @@ var TranslationConfigType = {
9409
9371
  two_way: "two_way"
9410
9372
  };
9411
9373
 
9374
+ // src/generated/soniox/schema/tTSVoiceGender.ts
9375
+ var TTSVoiceGender = {
9376
+ male: "male",
9377
+ female: "female",
9378
+ neutral: "neutral"
9379
+ };
9380
+
9381
+ // src/generated/soniox/schema/usageLogsSort.ts
9382
+ var UsageLogsSort = {
9383
+ end_time_asc: "end_time_asc",
9384
+ end_time_desc: "end_time_desc"
9385
+ };
9386
+
9412
9387
  // src/generated/soniox/api/sonioxPublicAPI.ts
9413
9388
  var uploadFile = (uploadFileBody2, options) => {
9414
9389
  const formData = new FormData();
@@ -10776,6 +10751,7 @@ __export(deepgramAPI_zod_exports, {
10776
10751
  speakGenerateQueryMipOptOutDefault: () => speakGenerateQueryMipOptOutDefault,
10777
10752
  speakGenerateQueryModelDefault: () => speakGenerateQueryModelDefault,
10778
10753
  speakGenerateQueryParams: () => speakGenerateQueryParams,
10754
+ speakGenerateQuerySpeedDefault: () => speakGenerateQuerySpeedDefault,
10779
10755
  speakGenerateResponse: () => speakGenerateResponse
10780
10756
  });
10781
10757
  import { z as zod } from "zod";
@@ -10830,6 +10806,9 @@ var listenTranscribeQueryParams = zod.object({
10830
10806
  diarize: zod.boolean().optional().describe(
10831
10807
  "Recognize speaker changes. Each word in the transcript will be assigned a speaker number starting at 0"
10832
10808
  ),
10809
+ diarize_model: zod.enum(["latest", "v1", "v2"]).optional().describe(
10810
+ "Select and enable a specific batch diarization model version. If specifying this parameter, you should not set the deprecated `diarize=true` parameter. Not accepted on streaming requests."
10811
+ ),
10833
10812
  dictation: zod.boolean().optional().describe("Dictation mode for controlling formatting with dictated speech"),
10834
10813
  encoding: zod.enum(["linear16", "flac", "mulaw", "amr-nb", "amr-wb", "opus", "speex", "g729"]).optional().describe("Specify the expected encoding of your submitted audio"),
10835
10814
  filler_words: zod.boolean().optional().describe('Filler Words can help transcribe interruptions in your audio, like "uh" and "um"'),
@@ -11095,6 +11074,7 @@ var listenTranscribeResponse = zod.object({
11095
11074
  var speakGenerateQueryCallbackMethodDefault = "POST";
11096
11075
  var speakGenerateQueryMipOptOutDefault = false;
11097
11076
  var speakGenerateQueryModelDefault = "aura-asteria-en";
11077
+ var speakGenerateQuerySpeedDefault = 1;
11098
11078
  var speakGenerateQueryParams = zod.object({
11099
11079
  callback: zod.string().optional().describe("URL to which we'll make the callback request"),
11100
11080
  callback_method: zod.enum(["POST", "PUT"]).default(speakGenerateQueryCallbackMethodDefault).describe("HTTP method by which the callback request will be made"),
@@ -11206,6 +11186,9 @@ var speakGenerateQueryParams = zod.object({
11206
11186
  zod.enum(["22050"]).describe("Encoding - mp3. Sample rate is fixed and not configurable (22050 Hz).")
11207
11187
  ).or(zod.enum(["48000"]).describe("Encoding - opus. Sample rate is fixed at 48000 Hz.")).optional().describe(
11208
11188
  "Sample Rate specifies the sample rate for the output audio. Based on the encoding, different sample rates are supported. For some encodings, the sample rate is not configurable"
11189
+ ),
11190
+ speed: zod.number().default(speakGenerateQuerySpeedDefault).describe(
11191
+ "Speaking rate multiplier that adjusts the pace of generated speech while preserving natural prosody and voice quality. Not yet supported in all languages."
11209
11192
  )
11210
11193
  });
11211
11194
  var speakGenerateHeader = zod.object({
@@ -11530,6 +11513,7 @@ __export(assemblyAIAPI_zod_exports, {
11530
11513
  createTranscriptBodyRedactPiiAudioDefault: () => createTranscriptBodyRedactPiiAudioDefault,
11531
11514
  createTranscriptBodyRedactPiiAudioOptionsReturnRedactedNoSpeechAudioDefault: () => createTranscriptBodyRedactPiiAudioOptionsReturnRedactedNoSpeechAudioDefault,
11532
11515
  createTranscriptBodyRedactPiiDefault: () => createTranscriptBodyRedactPiiDefault,
11516
+ createTranscriptBodyRedactPiiReturnUnredactedDefault: () => createTranscriptBodyRedactPiiReturnUnredactedDefault,
11533
11517
  createTranscriptBodySentimentAnalysisDefault: () => createTranscriptBodySentimentAnalysisDefault,
11534
11518
  createTranscriptBodySpeakerLabelsDefault: () => createTranscriptBodySpeakerLabelsDefault,
11535
11519
  createTranscriptBodySpeakerOptionsMinSpeakersExpectedDefault: () => createTranscriptBodySpeakerOptionsMinSpeakersExpectedDefault,
@@ -11600,6 +11584,7 @@ var createTranscriptBodyPunctuateDefault = true;
11600
11584
  var createTranscriptBodyRedactPiiDefault = false;
11601
11585
  var createTranscriptBodyRedactPiiAudioDefault = false;
11602
11586
  var createTranscriptBodyRedactPiiAudioOptionsReturnRedactedNoSpeechAudioDefault = false;
11587
+ var createTranscriptBodyRedactPiiReturnUnredactedDefault = false;
11603
11588
  var createTranscriptBodySentimentAnalysisDefault = false;
11604
11589
  var createTranscriptBodySpeakerLabelsDefault = false;
11605
11590
  var createTranscriptBodySpeakerOptionsMinSpeakersExpectedDefault = 1;
@@ -11638,7 +11623,7 @@ var createTranscriptBody = zod3.object({
11638
11623
  "Customize how words are spelled and formatted using to and from values. See [Custom Spelling](https://www.assemblyai.com/docs/pre-recorded-audio/correct-spelling-of-terms) for more details."
11639
11624
  ),
11640
11625
  disfluencies: zod3.boolean().optional().describe(
11641
- 'Transcribe [Filler Words](https://www.assemblyai.com/docs/pre-recorded-audio/include-filler-words), like "umm", in your media file; can be true or false'
11626
+ 'Transcribe [Filler Words](https://www.assemblyai.com/docs/pre-recorded-audio/include-filler-words), like "umm", in your media file; can be true or false. Supported on Universal-3 Pro and Universal-2.'
11642
11627
  ),
11643
11628
  domain: zod3.string().nullish().describe(
11644
11629
  'Enable domain-specific transcription models to improve accuracy for specialized terminology. Set to `"medical-v1"` to enable [Medical Mode](https://www.assemblyai.com/docs/pre-recorded-audio/medical-mode) for improved accuracy of medical terms such as medications, procedures, conditions, and dosages.\n\nSupported languages: English (`en`), Spanish (`es`), German (`de`), French (`fr`). If used with an unsupported language, the parameter is ignored and a warning is returned.\n'
@@ -11945,12 +11930,20 @@ var createTranscriptBody = zod3.object({
11945
11930
  "email_address",
11946
11931
  "event",
11947
11932
  "filename",
11933
+ "gender",
11948
11934
  "gender_sexuality",
11949
11935
  "healthcare_number",
11950
11936
  "injury",
11951
11937
  "ip_address",
11952
11938
  "language",
11953
11939
  "location",
11940
+ "location_address",
11941
+ "location_address_street",
11942
+ "location_city",
11943
+ "location_coordinate",
11944
+ "location_country",
11945
+ "location_state",
11946
+ "location_zip",
11954
11947
  "marital_status",
11955
11948
  "medical_condition",
11956
11949
  "medical_process",
@@ -11959,6 +11952,7 @@ var createTranscriptBody = zod3.object({
11959
11952
  "number_sequence",
11960
11953
  "occupation",
11961
11954
  "organization",
11955
+ "organization_medical_facility",
11962
11956
  "passport_number",
11963
11957
  "password",
11964
11958
  "person_age",
@@ -11967,6 +11961,7 @@ var createTranscriptBody = zod3.object({
11967
11961
  "physical_attribute",
11968
11962
  "political_affiliation",
11969
11963
  "religion",
11964
+ "sexuality",
11970
11965
  "statistics",
11971
11966
  "time",
11972
11967
  "url",
@@ -11974,15 +11969,20 @@ var createTranscriptBody = zod3.object({
11974
11969
  "username",
11975
11970
  "vehicle_id",
11976
11971
  "zodiac_sign"
11977
- ]).describe("The type of PII to redact")
11972
+ ]).describe(
11973
+ "The type of PII to redact. See [PII redaction](https://www.assemblyai.com/docs/streaming/pii-redaction) for the full list of policies and their descriptions."
11974
+ )
11978
11975
  ).optional().describe(
11979
11976
  "The list of PII Redaction policies to enable. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more details."
11980
11977
  ),
11981
11978
  redact_pii_sub: zod3.enum(["entity_name", "hash"]).describe(
11982
- "The replacement logic for detected PII, can be `entity_name` or `hash`. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more details."
11979
+ "The replacement logic for detected PII, can be `entity_name` or `hash`. See [PII redaction](https://www.assemblyai.com/docs/streaming/pii-redaction) for more details."
11983
11980
  ).or(zod3.null()).optional().describe(
11984
11981
  "The replacement logic for detected PII, can be `entity_type` or `hash`. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more details."
11985
11982
  ),
11983
+ redact_pii_return_unredacted: zod3.boolean().optional().describe(
11984
+ "When set to `true`, returns the original unredacted transcript alongside the redacted one in the same response. Requires `redact_pii` to be `true`, otherwise a 400 error is returned.\n\nWhen enabled, the response includes the additional fields `unredacted_text`, `unredacted_words`, and `unredacted_utterances`. The existing `text`, `words`, and `utterances` fields remain fully redacted. When disabled (default), the response is unchanged and contains only the redacted transcript. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more details.\n"
11985
+ ),
11986
11986
  sentiment_analysis: zod3.boolean().optional().describe(
11987
11987
  "Enable [Sentiment Analysis](https://www.assemblyai.com/docs/speech-understanding/analyze-sentiment-of-speech), can be true or false"
11988
11988
  ),
@@ -12080,10 +12080,10 @@ var createTranscriptBody = zod3.object({
12080
12080
  ),
12081
12081
  summary_model: zod3.enum(["informative", "conversational", "catchy"]).optional().describe("The model to summarize the transcript"),
12082
12082
  summary_type: zod3.enum(["bullets", "bullets_verbose", "gist", "headline", "paragraph"]).optional().describe("The type of summary"),
12083
- remove_audio_tags: zod3.enum(["all"]).describe(
12084
- 'Remove [audio event tags](https://www.assemblyai.com/docs/pre-recorded-audio/universal-3-pro#audio-event-tags) from the transcript text. Set to `"all"` to remove all audio tags.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n'
12083
+ remove_audio_tags: zod3.enum(["all", "speaker"]).describe(
12084
+ 'Universal-3 Pro generates rich transcripts that can include inline annotations such as audio event markers and speaker cues. Set to `"all"` to remove all inline annotations, or `"speaker"` to remove only speaker cues while keeping other annotations.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n'
12085
12085
  ).or(zod3.null()).optional().describe(
12086
- 'Remove [audio event tags](https://www.assemblyai.com/docs/pre-recorded-audio/universal-3-pro#audio-event-tags) from the transcript text. Set to `"all"` to remove all audio tags.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n'
12086
+ 'Universal-3 Pro generates rich transcripts that can include inline annotations such as audio event markers and speaker cues. Set to `"all"` to remove all inline annotations, or `"speaker"` to remove only speaker cues while keeping other annotations.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n'
12087
12087
  ),
12088
12088
  temperature: zod3.number().optional().describe(
12089
12089
  "Control the amount of randomness injected into the model's response. See the [Prompting Guide](https://www.assemblyai.com/docs/pre-recorded-audio/prompting) for more details.\n\nNote: This parameter can only be used with the Universal-3 Pro model.\n"
@@ -12217,7 +12217,7 @@ var createTranscriptResponse = zod3.object({
12217
12217
  "Customize how words are spelled and formatted using to and from values. See [Custom Spelling](https://www.assemblyai.com/docs/pre-recorded-audio/correct-spelling-of-terms) for more details."
12218
12218
  ),
12219
12219
  disfluencies: zod3.boolean().nullish().describe(
12220
- 'Transcribe [Filler Words](https://www.assemblyai.com/docs/pre-recorded-audio/include-filler-words), like "umm", in your media file; can be true or false'
12220
+ 'Transcribe [Filler Words](https://www.assemblyai.com/docs/pre-recorded-audio/include-filler-words), like "umm", in your media file; can be true or false. Supported on Universal-3 Pro and Universal-2.'
12221
12221
  ),
12222
12222
  domain: zod3.string().nullish().describe(
12223
12223
  'The domain-specific model applied to the transcript. When set to `"medical-v1"`, [Medical Mode](https://www.assemblyai.com/docs/pre-recorded-audio/medical-mode) was used to improve accuracy for medical terminology.\n'
@@ -12240,12 +12240,20 @@ var createTranscriptResponse = zod3.object({
12240
12240
  "email_address",
12241
12241
  "event",
12242
12242
  "filename",
12243
+ "gender",
12243
12244
  "gender_sexuality",
12244
12245
  "healthcare_number",
12245
12246
  "injury",
12246
12247
  "ip_address",
12247
12248
  "language",
12248
12249
  "location",
12250
+ "location_address",
12251
+ "location_address_street",
12252
+ "location_city",
12253
+ "location_coordinate",
12254
+ "location_country",
12255
+ "location_state",
12256
+ "location_zip",
12249
12257
  "marital_status",
12250
12258
  "medical_condition",
12251
12259
  "medical_process",
@@ -12254,6 +12262,7 @@ var createTranscriptResponse = zod3.object({
12254
12262
  "number_sequence",
12255
12263
  "occupation",
12256
12264
  "organization",
12265
+ "organization_medical_facility",
12257
12266
  "passport_number",
12258
12267
  "password",
12259
12268
  "person_age",
@@ -12262,6 +12271,7 @@ var createTranscriptResponse = zod3.object({
12262
12271
  "physical_attribute",
12263
12272
  "political_affiliation",
12264
12273
  "religion",
12274
+ "sexuality",
12265
12275
  "statistics",
12266
12276
  "time",
12267
12277
  "url",
@@ -12566,6 +12576,24 @@ var createTranscriptResponse = zod3.object({
12566
12576
  }).optional().describe(
12567
12577
  "Specify options for [Automatic Language Detection](https://www.assemblyai.com/docs/pre-recorded-audio/language-detection)."
12568
12578
  ),
12579
+ metadata: zod3.object({
12580
+ domain_used: zod3.string().nullish().describe(
12581
+ 'The domain-specific model that was applied to the transcription (for example, `"medical-v1"` when [Medical Mode](https://www.assemblyai.com/docs/pre-recorded-audio/medical-mode) was applied), or `null` if no domain-specific model was used. Always present when `metadata` is present.\n'
12582
+ ),
12583
+ warnings: zod3.array(
12584
+ zod3.object({
12585
+ message: zod3.string().describe("A human-readable description of the warning.")
12586
+ }).describe(
12587
+ "A warning message emitted while processing a transcription request. Warnings are surfaced on the transcript response under `metadata.warnings`.\n"
12588
+ )
12589
+ ).optional().describe(
12590
+ "Warning messages emitted while processing the request. Each warning is an object with a human-readable `message`. When there are no warnings to report, this field is omitted from the `metadata` object entirely.\n"
12591
+ )
12592
+ }).describe(
12593
+ "Additional metadata about the transcription returned on the `Transcript` object under `metadata`. Only present when there is information to report \u2014 when all of its fields would be empty, the `metadata` object is omitted from the response entirely.\n"
12594
+ ).or(zod3.null()).optional().describe(
12595
+ "Additional metadata about the transcription, including any warnings emitted while processing the request. Only present when there is information to report; if no fields would be populated, `metadata` is omitted from the response entirely.\n"
12596
+ ),
12569
12597
  multichannel: zod3.boolean().nullish().describe(
12570
12598
  "Whether [Multichannel transcription](https://www.assemblyai.com/docs/pre-recorded-audio/transcribe-multiple-audio-channels) was enabled in the transcription request, either true or false"
12571
12599
  ),
@@ -12613,12 +12641,20 @@ var createTranscriptResponse = zod3.object({
12613
12641
  "email_address",
12614
12642
  "event",
12615
12643
  "filename",
12644
+ "gender",
12616
12645
  "gender_sexuality",
12617
12646
  "healthcare_number",
12618
12647
  "injury",
12619
12648
  "ip_address",
12620
12649
  "language",
12621
12650
  "location",
12651
+ "location_address",
12652
+ "location_address_street",
12653
+ "location_city",
12654
+ "location_coordinate",
12655
+ "location_country",
12656
+ "location_state",
12657
+ "location_zip",
12622
12658
  "marital_status",
12623
12659
  "medical_condition",
12624
12660
  "medical_process",
@@ -12627,6 +12663,7 @@ var createTranscriptResponse = zod3.object({
12627
12663
  "number_sequence",
12628
12664
  "occupation",
12629
12665
  "organization",
12666
+ "organization_medical_facility",
12630
12667
  "passport_number",
12631
12668
  "password",
12632
12669
  "person_age",
@@ -12635,6 +12672,7 @@ var createTranscriptResponse = zod3.object({
12635
12672
  "physical_attribute",
12636
12673
  "political_affiliation",
12637
12674
  "religion",
12675
+ "sexuality",
12638
12676
  "statistics",
12639
12677
  "time",
12640
12678
  "url",
@@ -12642,12 +12680,17 @@ var createTranscriptResponse = zod3.object({
12642
12680
  "username",
12643
12681
  "vehicle_id",
12644
12682
  "zodiac_sign"
12645
- ]).describe("The type of PII to redact")
12683
+ ]).describe(
12684
+ "The type of PII to redact. See [PII redaction](https://www.assemblyai.com/docs/streaming/pii-redaction) for the full list of policies and their descriptions."
12685
+ )
12646
12686
  ).nullish().describe(
12647
12687
  "The list of PII Redaction policies that were enabled, if PII Redaction is enabled.\nSee [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
12648
12688
  ),
12649
12689
  redact_pii_sub: zod3.enum(["entity_name", "hash"]).optional().describe(
12650
- "The replacement logic for detected PII, can be `entity_name` or `hash`. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more details."
12690
+ "The replacement logic for detected PII, can be `entity_name` or `hash`. See [PII redaction](https://www.assemblyai.com/docs/streaming/pii-redaction) for more details."
12691
+ ),
12692
+ redact_pii_return_unredacted: zod3.boolean().nullish().describe(
12693
+ "Whether the original unredacted transcript was also returned alongside the redacted one. When `true`, the response includes `unredacted_text`, `unredacted_words`, and `unredacted_utterances`. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
12651
12694
  ),
12652
12695
  sentiment_analysis: zod3.boolean().nullish().describe(
12653
12696
  "Whether [Sentiment Analysis](https://www.assemblyai.com/docs/speech-understanding/analyze-sentiment-of-speech) is enabled, can be true or false"
@@ -12784,20 +12827,23 @@ var createTranscriptResponse = zod3.object({
12784
12827
  "The generated summary of the media file, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details."
12785
12828
  ),
12786
12829
  summary_model: zod3.string().nullish().describe(
12787
- "The Summarization model used to generate the summary,\nif [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts#summary-models) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details.\n"
12830
+ "The Summarization model used to generate the summary,\nif [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details.\n"
12788
12831
  ),
12789
12832
  summary_type: zod3.string().nullish().describe(
12790
- "The type of summary generated, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts#summary-types) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details."
12833
+ "The type of summary generated, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details."
12791
12834
  ),
12792
- remove_audio_tags: zod3.enum(["all"]).describe(
12793
- "Whether [audio event tags](https://www.assemblyai.com/docs/pre-recorded-audio/universal-3-pro#audio-event-tags) were removed from the transcript text.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n"
12835
+ remove_audio_tags: zod3.enum(["all", "speaker"]).describe(
12836
+ 'Universal-3 Pro generates rich transcripts that can include inline annotations such as audio event markers and speaker cues. Set to `"all"` to remove all inline annotations, or `"speaker"` to remove only speaker cues while keeping other annotations.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n'
12794
12837
  ).or(zod3.null()).optional().describe(
12795
- "Whether [audio event tags](https://www.assemblyai.com/docs/pre-recorded-audio/universal-3-pro#audio-event-tags) were removed from the transcript text.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n"
12838
+ 'Universal-3 Pro generates rich transcripts that can include inline annotations such as audio event markers and speaker cues. Set to `"all"` to remove all inline annotations, or `"speaker"` to remove only speaker cues while keeping other annotations.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n'
12796
12839
  ),
12797
12840
  temperature: zod3.number().nullish().describe(
12798
12841
  "The temperature that was used for the model's response. See the [Prompting Guide](https://www.assemblyai.com/docs/pre-recorded-audio/prompting) for more details.\n\nNote: This parameter can only be used with the Universal-3 Pro model.\n"
12799
12842
  ),
12800
12843
  text: zod3.string().nullish().describe("The textual transcript of your media file"),
12844
+ unredacted_text: zod3.string().nullish().describe(
12845
+ "The original textual transcript of your media file before PII redaction was applied. Only returned when `redact_pii_return_unredacted` was set to `true` on the transcription request, otherwise this field is omitted and the `text` field remains fully redacted. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
12846
+ ),
12801
12847
  throttled: zod3.boolean().nullish().describe(
12802
12848
  "True while a request is throttled and false when a request is no longer throttled"
12803
12849
  ),
@@ -12834,6 +12880,39 @@ var createTranscriptResponse = zod3.object({
12834
12880
  ).nullish().describe(
12835
12881
  "When multichannel or speaker_labels is enabled, a list of turn-by-turn utterance objects.\nSee [Speaker diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) and [Multichannel transcription](https://www.assemblyai.com/docs/pre-recorded-audio/transcribe-multiple-audio-channels) for more information.\n"
12836
12882
  ),
12883
+ unredacted_utterances: zod3.array(
12884
+ zod3.object({
12885
+ confidence: zod3.number().describe("The confidence score for the transcript of this utterance"),
12886
+ start: zod3.number().describe("The starting time, in milliseconds, of the utterance in the audio file"),
12887
+ end: zod3.number().describe("The ending time, in milliseconds, of the utterance in the audio file"),
12888
+ text: zod3.string().describe("The text for this utterance"),
12889
+ words: zod3.array(
12890
+ zod3.object({
12891
+ confidence: zod3.number().describe("The confidence score for the transcript of this word"),
12892
+ start: zod3.number().describe("The starting time, in milliseconds, for the word"),
12893
+ end: zod3.number().describe("The ending time, in milliseconds, for the word"),
12894
+ text: zod3.string().describe("The text of the word"),
12895
+ channel: zod3.string().nullish().describe(
12896
+ "The channel of the word. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
12897
+ ),
12898
+ speaker: zod3.string().nullable().describe(
12899
+ "The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) is enabled, else null"
12900
+ )
12901
+ })
12902
+ ).describe("The words in the utterance."),
12903
+ channel: zod3.string().nullish().describe(
12904
+ "The channel of this utterance. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
12905
+ ),
12906
+ speaker: zod3.string().describe(
12907
+ 'The speaker of this utterance, where each speaker is assigned a sequential capital letter - e.g. "A" for Speaker A, "B" for Speaker B, etc.'
12908
+ ),
12909
+ translated_texts: zod3.record(zod3.string(), zod3.string()).optional().describe(
12910
+ 'Translations keyed by language code (e.g., `{"es": "Texto traducido", "de": "\xDCbersetzter Text"}`). Only present when `match_original_utterance` is enabled with translation.'
12911
+ )
12912
+ })
12913
+ ).nullish().describe(
12914
+ "The original turn-by-turn utterance objects before PII redaction was applied. Same shape as `utterances`. Only returned when `redact_pii_return_unredacted` was set to `true` on the transcription request, otherwise this field is omitted and the `utterances` field remains fully redacted. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
12915
+ ),
12837
12916
  webhook_auth: zod3.boolean().describe(
12838
12917
  "Whether [webhook](https://www.assemblyai.com/docs/deployment/webhooks-for-pre-recorded-audio) authentication details were provided"
12839
12918
  ),
@@ -12862,6 +12941,22 @@ var createTranscriptResponse = zod3.object({
12862
12941
  ).nullish().describe(
12863
12942
  "An array of temporally-sequential word objects, one for each word in the transcript.\n"
12864
12943
  ),
12944
+ unredacted_words: zod3.array(
12945
+ zod3.object({
12946
+ confidence: zod3.number().describe("The confidence score for the transcript of this word"),
12947
+ start: zod3.number().describe("The starting time, in milliseconds, for the word"),
12948
+ end: zod3.number().describe("The ending time, in milliseconds, for the word"),
12949
+ text: zod3.string().describe("The text of the word"),
12950
+ channel: zod3.string().nullish().describe(
12951
+ "The channel of the word. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
12952
+ ),
12953
+ speaker: zod3.string().nullable().describe(
12954
+ "The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) is enabled, else null"
12955
+ )
12956
+ })
12957
+ ).nullish().describe(
12958
+ "The original temporally-sequential word objects before PII redaction was applied. Same shape as `words`. Only returned when `redact_pii_return_unredacted` was set to `true` on the transcription request, otherwise this field is omitted and the `words` field remains fully redacted. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
12959
+ ),
12865
12960
  acoustic_model: zod3.string().describe("This parameter does not currently have any functionality attached to it."),
12866
12961
  custom_topics: zod3.boolean().nullish().describe("This parameter does not currently have any functionality attached to it."),
12867
12962
  language_model: zod3.string().describe("This parameter does not currently have any functionality attached to it."),
@@ -13037,7 +13132,7 @@ var getTranscriptResponse = zod3.object({
13037
13132
  "Customize how words are spelled and formatted using to and from values. See [Custom Spelling](https://www.assemblyai.com/docs/pre-recorded-audio/correct-spelling-of-terms) for more details."
13038
13133
  ),
13039
13134
  disfluencies: zod3.boolean().nullish().describe(
13040
- 'Transcribe [Filler Words](https://www.assemblyai.com/docs/pre-recorded-audio/include-filler-words), like "umm", in your media file; can be true or false'
13135
+ 'Transcribe [Filler Words](https://www.assemblyai.com/docs/pre-recorded-audio/include-filler-words), like "umm", in your media file; can be true or false. Supported on Universal-3 Pro and Universal-2.'
13041
13136
  ),
13042
13137
  domain: zod3.string().nullish().describe(
13043
13138
  'The domain-specific model applied to the transcript. When set to `"medical-v1"`, [Medical Mode](https://www.assemblyai.com/docs/pre-recorded-audio/medical-mode) was used to improve accuracy for medical terminology.\n'
@@ -13060,12 +13155,20 @@ var getTranscriptResponse = zod3.object({
13060
13155
  "email_address",
13061
13156
  "event",
13062
13157
  "filename",
13158
+ "gender",
13063
13159
  "gender_sexuality",
13064
13160
  "healthcare_number",
13065
13161
  "injury",
13066
13162
  "ip_address",
13067
13163
  "language",
13068
13164
  "location",
13165
+ "location_address",
13166
+ "location_address_street",
13167
+ "location_city",
13168
+ "location_coordinate",
13169
+ "location_country",
13170
+ "location_state",
13171
+ "location_zip",
13069
13172
  "marital_status",
13070
13173
  "medical_condition",
13071
13174
  "medical_process",
@@ -13074,6 +13177,7 @@ var getTranscriptResponse = zod3.object({
13074
13177
  "number_sequence",
13075
13178
  "occupation",
13076
13179
  "organization",
13180
+ "organization_medical_facility",
13077
13181
  "passport_number",
13078
13182
  "password",
13079
13183
  "person_age",
@@ -13082,6 +13186,7 @@ var getTranscriptResponse = zod3.object({
13082
13186
  "physical_attribute",
13083
13187
  "political_affiliation",
13084
13188
  "religion",
13189
+ "sexuality",
13085
13190
  "statistics",
13086
13191
  "time",
13087
13192
  "url",
@@ -13386,6 +13491,24 @@ var getTranscriptResponse = zod3.object({
13386
13491
  }).optional().describe(
13387
13492
  "Specify options for [Automatic Language Detection](https://www.assemblyai.com/docs/pre-recorded-audio/language-detection)."
13388
13493
  ),
13494
+ metadata: zod3.object({
13495
+ domain_used: zod3.string().nullish().describe(
13496
+ 'The domain-specific model that was applied to the transcription (for example, `"medical-v1"` when [Medical Mode](https://www.assemblyai.com/docs/pre-recorded-audio/medical-mode) was applied), or `null` if no domain-specific model was used. Always present when `metadata` is present.\n'
13497
+ ),
13498
+ warnings: zod3.array(
13499
+ zod3.object({
13500
+ message: zod3.string().describe("A human-readable description of the warning.")
13501
+ }).describe(
13502
+ "A warning message emitted while processing a transcription request. Warnings are surfaced on the transcript response under `metadata.warnings`.\n"
13503
+ )
13504
+ ).optional().describe(
13505
+ "Warning messages emitted while processing the request. Each warning is an object with a human-readable `message`. When there are no warnings to report, this field is omitted from the `metadata` object entirely.\n"
13506
+ )
13507
+ }).describe(
13508
+ "Additional metadata about the transcription returned on the `Transcript` object under `metadata`. Only present when there is information to report \u2014 when all of its fields would be empty, the `metadata` object is omitted from the response entirely.\n"
13509
+ ).or(zod3.null()).optional().describe(
13510
+ "Additional metadata about the transcription, including any warnings emitted while processing the request. Only present when there is information to report; if no fields would be populated, `metadata` is omitted from the response entirely.\n"
13511
+ ),
13389
13512
  multichannel: zod3.boolean().nullish().describe(
13390
13513
  "Whether [Multichannel transcription](https://www.assemblyai.com/docs/pre-recorded-audio/transcribe-multiple-audio-channels) was enabled in the transcription request, either true or false"
13391
13514
  ),
@@ -13433,12 +13556,20 @@ var getTranscriptResponse = zod3.object({
13433
13556
  "email_address",
13434
13557
  "event",
13435
13558
  "filename",
13559
+ "gender",
13436
13560
  "gender_sexuality",
13437
13561
  "healthcare_number",
13438
13562
  "injury",
13439
13563
  "ip_address",
13440
13564
  "language",
13441
13565
  "location",
13566
+ "location_address",
13567
+ "location_address_street",
13568
+ "location_city",
13569
+ "location_coordinate",
13570
+ "location_country",
13571
+ "location_state",
13572
+ "location_zip",
13442
13573
  "marital_status",
13443
13574
  "medical_condition",
13444
13575
  "medical_process",
@@ -13447,6 +13578,7 @@ var getTranscriptResponse = zod3.object({
13447
13578
  "number_sequence",
13448
13579
  "occupation",
13449
13580
  "organization",
13581
+ "organization_medical_facility",
13450
13582
  "passport_number",
13451
13583
  "password",
13452
13584
  "person_age",
@@ -13455,6 +13587,7 @@ var getTranscriptResponse = zod3.object({
13455
13587
  "physical_attribute",
13456
13588
  "political_affiliation",
13457
13589
  "religion",
13590
+ "sexuality",
13458
13591
  "statistics",
13459
13592
  "time",
13460
13593
  "url",
@@ -13462,12 +13595,17 @@ var getTranscriptResponse = zod3.object({
13462
13595
  "username",
13463
13596
  "vehicle_id",
13464
13597
  "zodiac_sign"
13465
- ]).describe("The type of PII to redact")
13598
+ ]).describe(
13599
+ "The type of PII to redact. See [PII redaction](https://www.assemblyai.com/docs/streaming/pii-redaction) for the full list of policies and their descriptions."
13600
+ )
13466
13601
  ).nullish().describe(
13467
13602
  "The list of PII Redaction policies that were enabled, if PII Redaction is enabled.\nSee [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
13468
13603
  ),
13469
13604
  redact_pii_sub: zod3.enum(["entity_name", "hash"]).optional().describe(
13470
- "The replacement logic for detected PII, can be `entity_name` or `hash`. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more details."
13605
+ "The replacement logic for detected PII, can be `entity_name` or `hash`. See [PII redaction](https://www.assemblyai.com/docs/streaming/pii-redaction) for more details."
13606
+ ),
13607
+ redact_pii_return_unredacted: zod3.boolean().nullish().describe(
13608
+ "Whether the original unredacted transcript was also returned alongside the redacted one. When `true`, the response includes `unredacted_text`, `unredacted_words`, and `unredacted_utterances`. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
13471
13609
  ),
13472
13610
  sentiment_analysis: zod3.boolean().nullish().describe(
13473
13611
  "Whether [Sentiment Analysis](https://www.assemblyai.com/docs/speech-understanding/analyze-sentiment-of-speech) is enabled, can be true or false"
@@ -13604,20 +13742,23 @@ var getTranscriptResponse = zod3.object({
13604
13742
  "The generated summary of the media file, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details."
13605
13743
  ),
13606
13744
  summary_model: zod3.string().nullish().describe(
13607
- "The Summarization model used to generate the summary,\nif [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts#summary-models) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details.\n"
13745
+ "The Summarization model used to generate the summary,\nif [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details.\n"
13608
13746
  ),
13609
13747
  summary_type: zod3.string().nullish().describe(
13610
- "The type of summary generated, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts#summary-types) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details."
13748
+ "The type of summary generated, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details."
13611
13749
  ),
13612
- remove_audio_tags: zod3.enum(["all"]).describe(
13613
- "Whether [audio event tags](https://www.assemblyai.com/docs/pre-recorded-audio/universal-3-pro#audio-event-tags) were removed from the transcript text.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n"
13750
+ remove_audio_tags: zod3.enum(["all", "speaker"]).describe(
13751
+ 'Universal-3 Pro generates rich transcripts that can include inline annotations such as audio event markers and speaker cues. Set to `"all"` to remove all inline annotations, or `"speaker"` to remove only speaker cues while keeping other annotations.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n'
13614
13752
  ).or(zod3.null()).optional().describe(
13615
- "Whether [audio event tags](https://www.assemblyai.com/docs/pre-recorded-audio/universal-3-pro#audio-event-tags) were removed from the transcript text.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n"
13753
+ 'Universal-3 Pro generates rich transcripts that can include inline annotations such as audio event markers and speaker cues. Set to `"all"` to remove all inline annotations, or `"speaker"` to remove only speaker cues while keeping other annotations.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n'
13616
13754
  ),
13617
13755
  temperature: zod3.number().nullish().describe(
13618
13756
  "The temperature that was used for the model's response. See the [Prompting Guide](https://www.assemblyai.com/docs/pre-recorded-audio/prompting) for more details.\n\nNote: This parameter can only be used with the Universal-3 Pro model.\n"
13619
13757
  ),
13620
13758
  text: zod3.string().nullish().describe("The textual transcript of your media file"),
13759
+ unredacted_text: zod3.string().nullish().describe(
13760
+ "The original textual transcript of your media file before PII redaction was applied. Only returned when `redact_pii_return_unredacted` was set to `true` on the transcription request, otherwise this field is omitted and the `text` field remains fully redacted. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
13761
+ ),
13621
13762
  throttled: zod3.boolean().nullish().describe(
13622
13763
  "True while a request is throttled and false when a request is no longer throttled"
13623
13764
  ),
@@ -13654,6 +13795,39 @@ var getTranscriptResponse = zod3.object({
13654
13795
  ).nullish().describe(
13655
13796
  "When multichannel or speaker_labels is enabled, a list of turn-by-turn utterance objects.\nSee [Speaker diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) and [Multichannel transcription](https://www.assemblyai.com/docs/pre-recorded-audio/transcribe-multiple-audio-channels) for more information.\n"
13656
13797
  ),
13798
+ unredacted_utterances: zod3.array(
13799
+ zod3.object({
13800
+ confidence: zod3.number().describe("The confidence score for the transcript of this utterance"),
13801
+ start: zod3.number().describe("The starting time, in milliseconds, of the utterance in the audio file"),
13802
+ end: zod3.number().describe("The ending time, in milliseconds, of the utterance in the audio file"),
13803
+ text: zod3.string().describe("The text for this utterance"),
13804
+ words: zod3.array(
13805
+ zod3.object({
13806
+ confidence: zod3.number().describe("The confidence score for the transcript of this word"),
13807
+ start: zod3.number().describe("The starting time, in milliseconds, for the word"),
13808
+ end: zod3.number().describe("The ending time, in milliseconds, for the word"),
13809
+ text: zod3.string().describe("The text of the word"),
13810
+ channel: zod3.string().nullish().describe(
13811
+ "The channel of the word. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
13812
+ ),
13813
+ speaker: zod3.string().nullable().describe(
13814
+ "The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) is enabled, else null"
13815
+ )
13816
+ })
13817
+ ).describe("The words in the utterance."),
13818
+ channel: zod3.string().nullish().describe(
13819
+ "The channel of this utterance. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
13820
+ ),
13821
+ speaker: zod3.string().describe(
13822
+ 'The speaker of this utterance, where each speaker is assigned a sequential capital letter - e.g. "A" for Speaker A, "B" for Speaker B, etc.'
13823
+ ),
13824
+ translated_texts: zod3.record(zod3.string(), zod3.string()).optional().describe(
13825
+ 'Translations keyed by language code (e.g., `{"es": "Texto traducido", "de": "\xDCbersetzter Text"}`). Only present when `match_original_utterance` is enabled with translation.'
13826
+ )
13827
+ })
13828
+ ).nullish().describe(
13829
+ "The original turn-by-turn utterance objects before PII redaction was applied. Same shape as `utterances`. Only returned when `redact_pii_return_unredacted` was set to `true` on the transcription request, otherwise this field is omitted and the `utterances` field remains fully redacted. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
13830
+ ),
13657
13831
  webhook_auth: zod3.boolean().describe(
13658
13832
  "Whether [webhook](https://www.assemblyai.com/docs/deployment/webhooks-for-pre-recorded-audio) authentication details were provided"
13659
13833
  ),
@@ -13682,6 +13856,22 @@ var getTranscriptResponse = zod3.object({
13682
13856
  ).nullish().describe(
13683
13857
  "An array of temporally-sequential word objects, one for each word in the transcript.\n"
13684
13858
  ),
13859
+ unredacted_words: zod3.array(
13860
+ zod3.object({
13861
+ confidence: zod3.number().describe("The confidence score for the transcript of this word"),
13862
+ start: zod3.number().describe("The starting time, in milliseconds, for the word"),
13863
+ end: zod3.number().describe("The ending time, in milliseconds, for the word"),
13864
+ text: zod3.string().describe("The text of the word"),
13865
+ channel: zod3.string().nullish().describe(
13866
+ "The channel of the word. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
13867
+ ),
13868
+ speaker: zod3.string().nullable().describe(
13869
+ "The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) is enabled, else null"
13870
+ )
13871
+ })
13872
+ ).nullish().describe(
13873
+ "The original temporally-sequential word objects before PII redaction was applied. Same shape as `words`. Only returned when `redact_pii_return_unredacted` was set to `true` on the transcription request, otherwise this field is omitted and the `words` field remains fully redacted. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
13874
+ ),
13685
13875
  acoustic_model: zod3.string().describe("This parameter does not currently have any functionality attached to it."),
13686
13876
  custom_topics: zod3.boolean().nullish().describe("This parameter does not currently have any functionality attached to it."),
13687
13877
  language_model: zod3.string().describe("This parameter does not currently have any functionality attached to it."),
@@ -13817,7 +14007,7 @@ var deleteTranscriptResponse = zod3.object({
13817
14007
  "Customize how words are spelled and formatted using to and from values. See [Custom Spelling](https://www.assemblyai.com/docs/pre-recorded-audio/correct-spelling-of-terms) for more details."
13818
14008
  ),
13819
14009
  disfluencies: zod3.boolean().nullish().describe(
13820
- 'Transcribe [Filler Words](https://www.assemblyai.com/docs/pre-recorded-audio/include-filler-words), like "umm", in your media file; can be true or false'
14010
+ 'Transcribe [Filler Words](https://www.assemblyai.com/docs/pre-recorded-audio/include-filler-words), like "umm", in your media file; can be true or false. Supported on Universal-3 Pro and Universal-2.'
13821
14011
  ),
13822
14012
  domain: zod3.string().nullish().describe(
13823
14013
  'The domain-specific model applied to the transcript. When set to `"medical-v1"`, [Medical Mode](https://www.assemblyai.com/docs/pre-recorded-audio/medical-mode) was used to improve accuracy for medical terminology.\n'
@@ -13840,12 +14030,20 @@ var deleteTranscriptResponse = zod3.object({
13840
14030
  "email_address",
13841
14031
  "event",
13842
14032
  "filename",
14033
+ "gender",
13843
14034
  "gender_sexuality",
13844
14035
  "healthcare_number",
13845
14036
  "injury",
13846
14037
  "ip_address",
13847
14038
  "language",
13848
14039
  "location",
14040
+ "location_address",
14041
+ "location_address_street",
14042
+ "location_city",
14043
+ "location_coordinate",
14044
+ "location_country",
14045
+ "location_state",
14046
+ "location_zip",
13849
14047
  "marital_status",
13850
14048
  "medical_condition",
13851
14049
  "medical_process",
@@ -13854,6 +14052,7 @@ var deleteTranscriptResponse = zod3.object({
13854
14052
  "number_sequence",
13855
14053
  "occupation",
13856
14054
  "organization",
14055
+ "organization_medical_facility",
13857
14056
  "passport_number",
13858
14057
  "password",
13859
14058
  "person_age",
@@ -13862,6 +14061,7 @@ var deleteTranscriptResponse = zod3.object({
13862
14061
  "physical_attribute",
13863
14062
  "political_affiliation",
13864
14063
  "religion",
14064
+ "sexuality",
13865
14065
  "statistics",
13866
14066
  "time",
13867
14067
  "url",
@@ -14166,6 +14366,24 @@ var deleteTranscriptResponse = zod3.object({
14166
14366
  }).optional().describe(
14167
14367
  "Specify options for [Automatic Language Detection](https://www.assemblyai.com/docs/pre-recorded-audio/language-detection)."
14168
14368
  ),
14369
+ metadata: zod3.object({
14370
+ domain_used: zod3.string().nullish().describe(
14371
+ 'The domain-specific model that was applied to the transcription (for example, `"medical-v1"` when [Medical Mode](https://www.assemblyai.com/docs/pre-recorded-audio/medical-mode) was applied), or `null` if no domain-specific model was used. Always present when `metadata` is present.\n'
14372
+ ),
14373
+ warnings: zod3.array(
14374
+ zod3.object({
14375
+ message: zod3.string().describe("A human-readable description of the warning.")
14376
+ }).describe(
14377
+ "A warning message emitted while processing a transcription request. Warnings are surfaced on the transcript response under `metadata.warnings`.\n"
14378
+ )
14379
+ ).optional().describe(
14380
+ "Warning messages emitted while processing the request. Each warning is an object with a human-readable `message`. When there are no warnings to report, this field is omitted from the `metadata` object entirely.\n"
14381
+ )
14382
+ }).describe(
14383
+ "Additional metadata about the transcription returned on the `Transcript` object under `metadata`. Only present when there is information to report \u2014 when all of its fields would be empty, the `metadata` object is omitted from the response entirely.\n"
14384
+ ).or(zod3.null()).optional().describe(
14385
+ "Additional metadata about the transcription, including any warnings emitted while processing the request. Only present when there is information to report; if no fields would be populated, `metadata` is omitted from the response entirely.\n"
14386
+ ),
14169
14387
  multichannel: zod3.boolean().nullish().describe(
14170
14388
  "Whether [Multichannel transcription](https://www.assemblyai.com/docs/pre-recorded-audio/transcribe-multiple-audio-channels) was enabled in the transcription request, either true or false"
14171
14389
  ),
@@ -14213,12 +14431,20 @@ var deleteTranscriptResponse = zod3.object({
14213
14431
  "email_address",
14214
14432
  "event",
14215
14433
  "filename",
14434
+ "gender",
14216
14435
  "gender_sexuality",
14217
14436
  "healthcare_number",
14218
14437
  "injury",
14219
14438
  "ip_address",
14220
14439
  "language",
14221
14440
  "location",
14441
+ "location_address",
14442
+ "location_address_street",
14443
+ "location_city",
14444
+ "location_coordinate",
14445
+ "location_country",
14446
+ "location_state",
14447
+ "location_zip",
14222
14448
  "marital_status",
14223
14449
  "medical_condition",
14224
14450
  "medical_process",
@@ -14227,6 +14453,7 @@ var deleteTranscriptResponse = zod3.object({
14227
14453
  "number_sequence",
14228
14454
  "occupation",
14229
14455
  "organization",
14456
+ "organization_medical_facility",
14230
14457
  "passport_number",
14231
14458
  "password",
14232
14459
  "person_age",
@@ -14235,6 +14462,7 @@ var deleteTranscriptResponse = zod3.object({
14235
14462
  "physical_attribute",
14236
14463
  "political_affiliation",
14237
14464
  "religion",
14465
+ "sexuality",
14238
14466
  "statistics",
14239
14467
  "time",
14240
14468
  "url",
@@ -14242,12 +14470,17 @@ var deleteTranscriptResponse = zod3.object({
14242
14470
  "username",
14243
14471
  "vehicle_id",
14244
14472
  "zodiac_sign"
14245
- ]).describe("The type of PII to redact")
14473
+ ]).describe(
14474
+ "The type of PII to redact. See [PII redaction](https://www.assemblyai.com/docs/streaming/pii-redaction) for the full list of policies and their descriptions."
14475
+ )
14246
14476
  ).nullish().describe(
14247
14477
  "The list of PII Redaction policies that were enabled, if PII Redaction is enabled.\nSee [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
14248
14478
  ),
14249
14479
  redact_pii_sub: zod3.enum(["entity_name", "hash"]).optional().describe(
14250
- "The replacement logic for detected PII, can be `entity_name` or `hash`. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more details."
14480
+ "The replacement logic for detected PII, can be `entity_name` or `hash`. See [PII redaction](https://www.assemblyai.com/docs/streaming/pii-redaction) for more details."
14481
+ ),
14482
+ redact_pii_return_unredacted: zod3.boolean().nullish().describe(
14483
+ "Whether the original unredacted transcript was also returned alongside the redacted one. When `true`, the response includes `unredacted_text`, `unredacted_words`, and `unredacted_utterances`. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
14251
14484
  ),
14252
14485
  sentiment_analysis: zod3.boolean().nullish().describe(
14253
14486
  "Whether [Sentiment Analysis](https://www.assemblyai.com/docs/speech-understanding/analyze-sentiment-of-speech) is enabled, can be true or false"
@@ -14384,20 +14617,23 @@ var deleteTranscriptResponse = zod3.object({
14384
14617
  "The generated summary of the media file, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details."
14385
14618
  ),
14386
14619
  summary_model: zod3.string().nullish().describe(
14387
- "The Summarization model used to generate the summary,\nif [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts#summary-models) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details.\n"
14620
+ "The Summarization model used to generate the summary,\nif [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details.\n"
14388
14621
  ),
14389
14622
  summary_type: zod3.string().nullish().describe(
14390
- "The type of summary generated, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts#summary-types) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details."
14623
+ "The type of summary generated, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details."
14391
14624
  ),
14392
- remove_audio_tags: zod3.enum(["all"]).describe(
14393
- "Whether [audio event tags](https://www.assemblyai.com/docs/pre-recorded-audio/universal-3-pro#audio-event-tags) were removed from the transcript text.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n"
14625
+ remove_audio_tags: zod3.enum(["all", "speaker"]).describe(
14626
+ 'Universal-3 Pro generates rich transcripts that can include inline annotations such as audio event markers and speaker cues. Set to `"all"` to remove all inline annotations, or `"speaker"` to remove only speaker cues while keeping other annotations.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n'
14394
14627
  ).or(zod3.null()).optional().describe(
14395
- "Whether [audio event tags](https://www.assemblyai.com/docs/pre-recorded-audio/universal-3-pro#audio-event-tags) were removed from the transcript text.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n"
14628
+ 'Universal-3 Pro generates rich transcripts that can include inline annotations such as audio event markers and speaker cues. Set to `"all"` to remove all inline annotations, or `"speaker"` to remove only speaker cues while keeping other annotations.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n'
14396
14629
  ),
14397
14630
  temperature: zod3.number().nullish().describe(
14398
14631
  "The temperature that was used for the model's response. See the [Prompting Guide](https://www.assemblyai.com/docs/pre-recorded-audio/prompting) for more details.\n\nNote: This parameter can only be used with the Universal-3 Pro model.\n"
14399
14632
  ),
14400
14633
  text: zod3.string().nullish().describe("The textual transcript of your media file"),
14634
+ unredacted_text: zod3.string().nullish().describe(
14635
+ "The original textual transcript of your media file before PII redaction was applied. Only returned when `redact_pii_return_unredacted` was set to `true` on the transcription request, otherwise this field is omitted and the `text` field remains fully redacted. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
14636
+ ),
14401
14637
  throttled: zod3.boolean().nullish().describe(
14402
14638
  "True while a request is throttled and false when a request is no longer throttled"
14403
14639
  ),
@@ -14434,6 +14670,39 @@ var deleteTranscriptResponse = zod3.object({
14434
14670
  ).nullish().describe(
14435
14671
  "When multichannel or speaker_labels is enabled, a list of turn-by-turn utterance objects.\nSee [Speaker diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) and [Multichannel transcription](https://www.assemblyai.com/docs/pre-recorded-audio/transcribe-multiple-audio-channels) for more information.\n"
14436
14672
  ),
14673
+ unredacted_utterances: zod3.array(
14674
+ zod3.object({
14675
+ confidence: zod3.number().describe("The confidence score for the transcript of this utterance"),
14676
+ start: zod3.number().describe("The starting time, in milliseconds, of the utterance in the audio file"),
14677
+ end: zod3.number().describe("The ending time, in milliseconds, of the utterance in the audio file"),
14678
+ text: zod3.string().describe("The text for this utterance"),
14679
+ words: zod3.array(
14680
+ zod3.object({
14681
+ confidence: zod3.number().describe("The confidence score for the transcript of this word"),
14682
+ start: zod3.number().describe("The starting time, in milliseconds, for the word"),
14683
+ end: zod3.number().describe("The ending time, in milliseconds, for the word"),
14684
+ text: zod3.string().describe("The text of the word"),
14685
+ channel: zod3.string().nullish().describe(
14686
+ "The channel of the word. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
14687
+ ),
14688
+ speaker: zod3.string().nullable().describe(
14689
+ "The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) is enabled, else null"
14690
+ )
14691
+ })
14692
+ ).describe("The words in the utterance."),
14693
+ channel: zod3.string().nullish().describe(
14694
+ "The channel of this utterance. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
14695
+ ),
14696
+ speaker: zod3.string().describe(
14697
+ 'The speaker of this utterance, where each speaker is assigned a sequential capital letter - e.g. "A" for Speaker A, "B" for Speaker B, etc.'
14698
+ ),
14699
+ translated_texts: zod3.record(zod3.string(), zod3.string()).optional().describe(
14700
+ 'Translations keyed by language code (e.g., `{"es": "Texto traducido", "de": "\xDCbersetzter Text"}`). Only present when `match_original_utterance` is enabled with translation.'
14701
+ )
14702
+ })
14703
+ ).nullish().describe(
14704
+ "The original turn-by-turn utterance objects before PII redaction was applied. Same shape as `utterances`. Only returned when `redact_pii_return_unredacted` was set to `true` on the transcription request, otherwise this field is omitted and the `utterances` field remains fully redacted. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
14705
+ ),
14437
14706
  webhook_auth: zod3.boolean().describe(
14438
14707
  "Whether [webhook](https://www.assemblyai.com/docs/deployment/webhooks-for-pre-recorded-audio) authentication details were provided"
14439
14708
  ),
@@ -14462,6 +14731,22 @@ var deleteTranscriptResponse = zod3.object({
14462
14731
  ).nullish().describe(
14463
14732
  "An array of temporally-sequential word objects, one for each word in the transcript.\n"
14464
14733
  ),
14734
+ unredacted_words: zod3.array(
14735
+ zod3.object({
14736
+ confidence: zod3.number().describe("The confidence score for the transcript of this word"),
14737
+ start: zod3.number().describe("The starting time, in milliseconds, for the word"),
14738
+ end: zod3.number().describe("The ending time, in milliseconds, for the word"),
14739
+ text: zod3.string().describe("The text of the word"),
14740
+ channel: zod3.string().nullish().describe(
14741
+ "The channel of the word. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
14742
+ ),
14743
+ speaker: zod3.string().nullable().describe(
14744
+ "The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) is enabled, else null"
14745
+ )
14746
+ })
14747
+ ).nullish().describe(
14748
+ "The original temporally-sequential word objects before PII redaction was applied. Same shape as `words`. Only returned when `redact_pii_return_unredacted` was set to `true` on the transcription request, otherwise this field is omitted and the `words` field remains fully redacted. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
14749
+ ),
14465
14750
  acoustic_model: zod3.string().describe("This parameter does not currently have any functionality attached to it."),
14466
14751
  custom_topics: zod3.boolean().nullish().describe("This parameter does not currently have any functionality attached to it."),
14467
14752
  language_model: zod3.string().describe("This parameter does not currently have any functionality attached to it."),
@@ -14617,7 +14902,21 @@ var streamingTranscriberParams = zod4.object({
14617
14902
  inactivityTimeout: zod4.number().optional().describe("From SDK v3"),
14618
14903
  speakerLabels: zod4.boolean().optional().describe("From SDK v3"),
14619
14904
  maxSpeakers: zod4.number().optional().describe("From SDK v3"),
14620
- llmGateway: zod4.unknown().optional().describe("From SDK v3")
14905
+ voiceFocus: zod4.unknown().optional().describe("From SDK v3"),
14906
+ voiceFocusThreshold: zod4.number().optional().describe("From SDK v3"),
14907
+ continuousPartials: zod4.boolean().optional().describe("From SDK v3"),
14908
+ interruptionDelay: zod4.number().optional().describe("From SDK v3"),
14909
+ turnLeftPadMs: zod4.number().optional().describe("From SDK v3"),
14910
+ customerSupportAudioCapture: zod4.boolean().optional().describe("From SDK v3"),
14911
+ includePartialTurns: zod4.boolean().optional().describe("From SDK v3"),
14912
+ redactPii: zod4.boolean().optional().describe("From SDK v3"),
14913
+ redactPiiPolicies: zod4.unknown().optional().describe("From SDK v3"),
14914
+ redactPiiSub: zod4.unknown().optional().describe("From SDK v3"),
14915
+ llmGateway: zod4.unknown().optional().describe("From SDK v3"),
14916
+ webhookUrl: zod4.string().optional().describe("From SDK v3"),
14917
+ webhookAuthHeaderName: zod4.string().optional().describe("From SDK v3"),
14918
+ webhookAuthHeaderValue: zod4.string().optional().describe("From SDK v3"),
14919
+ mode: zod4.unknown().describe("From SDK v3")
14621
14920
  });
14622
14921
  var streamingUpdateConfigParams = zod4.object({
14623
14922
  end_utterance_silence_threshold: zod4.number().min(0).max(2e4).optional().describe("The duration threshold in milliseconds"),
@@ -14629,7 +14928,9 @@ var streamingUpdateConfigParams = zod4.object({
14629
14928
  format_turns: zod4.boolean().optional().describe("From SDK v3"),
14630
14929
  keyterms_prompt: zod4.array(zod4.string()).optional().describe("From SDK v3"),
14631
14930
  prompt: zod4.string().optional().describe("From SDK v3"),
14632
- filter_profanity: zod4.boolean().optional().describe("From SDK v3")
14931
+ filter_profanity: zod4.boolean().optional().describe("From SDK v3"),
14932
+ interruption_delay: zod4.number().optional().describe("From SDK v3"),
14933
+ turn_left_pad_ms: zod4.number().optional().describe("From SDK v3")
14633
14934
  });
14634
14935
 
14635
14936
  // src/generated/gladia/api/gladiaControlAPI.zod.ts
@@ -15378,7 +15679,7 @@ var preRecordedControllerInitPreRecordedJobV2BodyNamedEntityRecognitionDefault =
15378
15679
  var preRecordedControllerInitPreRecordedJobV2BodyCustomSpellingDefault = false;
15379
15680
  var preRecordedControllerInitPreRecordedJobV2BodySentimentAnalysisDefault = false;
15380
15681
  var preRecordedControllerInitPreRecordedJobV2BodyAudioToLlmDefault = false;
15381
- var preRecordedControllerInitPreRecordedJobV2BodyAudioToLlmConfigModelDefault = "openai/gpt-3.5-turbo";
15682
+ var preRecordedControllerInitPreRecordedJobV2BodyAudioToLlmConfigModelDefault = "openai/gpt-5.4-nano";
15382
15683
  var preRecordedControllerInitPreRecordedJobV2BodyPiiRedactionDefault = false;
15383
15684
  var preRecordedControllerInitPreRecordedJobV2BodySentencesDefault = false;
15384
15685
  var preRecordedControllerInitPreRecordedJobV2BodyPunctuationEnhancedDefault = false;
@@ -15667,23 +15968,23 @@ var preRecordedControllerInitPreRecordedJobV2Body = zod5.object({
15667
15968
  "Forces the translation to use informal language forms when available in the target language."
15668
15969
  )
15669
15970
  }).optional().describe("**[Beta]** Translation configuration, if `translation` is enabled"),
15670
- summarization: zod5.boolean().optional().describe("**[Beta]** Enable summarization for this audio"),
15971
+ summarization: zod5.boolean().optional().describe("Enable summarization for this audio"),
15671
15972
  summarization_config: zod5.object({
15672
15973
  type: zod5.enum(["general", "bullet_points", "concise"]).describe("The type of summarization to apply").default(preRecordedControllerInitPreRecordedJobV2BodySummarizationConfigTypeDefault).describe("The type of summarization to apply")
15673
- }).optional().describe("**[Beta]** Summarization configuration, if `summarization` is enabled"),
15974
+ }).optional().describe("Summarization configuration, if `summarization` is enabled"),
15674
15975
  named_entity_recognition: zod5.boolean().optional().describe("**[Alpha]** Enable named entity recognition for this audio"),
15675
15976
  custom_spelling: zod5.boolean().optional().describe("**[Alpha]** Enable custom spelling for this audio"),
15676
15977
  custom_spelling_config: zod5.object({
15677
15978
  spelling_dictionary: zod5.record(zod5.string(), zod5.array(zod5.string())).describe("The list of spelling applied on the audio transcription")
15678
15979
  }).optional().describe("**[Alpha]** Custom spelling configuration, if `custom_spelling` is enabled"),
15679
15980
  sentiment_analysis: zod5.boolean().optional().describe("Enable sentiment analysis for this audio"),
15680
- audio_to_llm: zod5.boolean().optional().describe("**[Alpha]** Enable audio to llm processing for this audio"),
15981
+ audio_to_llm: zod5.boolean().optional().describe("Enable audio to LLM processing for this audio"),
15681
15982
  audio_to_llm_config: zod5.object({
15682
15983
  prompts: zod5.array(zod5.array(zod5.unknown())).min(1).describe("The list of prompts applied on the audio transcription"),
15683
15984
  model: zod5.string().default(preRecordedControllerInitPreRecordedJobV2BodyAudioToLlmConfigModelDefault).describe(
15684
15985
  "The model to use for the prompt execution. You can find the list of supported models [here](https://openrouter.ai/models)."
15685
15986
  )
15686
- }).optional().describe("**[Alpha]** Audio to llm configuration, if `audio_to_llm` is enabled"),
15987
+ }).optional().describe("Audio to LLM configuration, if `audio_to_llm` is enabled"),
15687
15988
  pii_redaction: zod5.boolean().optional().describe("Enable PII redaction for this audio"),
15688
15989
  pii_redaction_config: zod5.object({
15689
15990
  entity_types: zod5.enum([
@@ -15938,7 +16239,7 @@ var preRecordedControllerGetPreRecordedJobsV2ResponseItemsItemRequestParamsNamed
15938
16239
  var preRecordedControllerGetPreRecordedJobsV2ResponseItemsItemRequestParamsCustomSpellingDefault = false;
15939
16240
  var preRecordedControllerGetPreRecordedJobsV2ResponseItemsItemRequestParamsSentimentAnalysisDefault = false;
15940
16241
  var preRecordedControllerGetPreRecordedJobsV2ResponseItemsItemRequestParamsAudioToLlmDefault = false;
15941
- var preRecordedControllerGetPreRecordedJobsV2ResponseItemsItemRequestParamsAudioToLlmConfigModelDefault = "openai/gpt-3.5-turbo";
16242
+ var preRecordedControllerGetPreRecordedJobsV2ResponseItemsItemRequestParamsAudioToLlmConfigModelDefault = "openai/gpt-5.4-nano";
15942
16243
  var preRecordedControllerGetPreRecordedJobsV2ResponseItemsItemRequestParamsPiiRedactionDefault = false;
15943
16244
  var preRecordedControllerGetPreRecordedJobsV2ResponseItemsItemRequestParamsSentencesDefault = false;
15944
16245
  var preRecordedControllerGetPreRecordedJobsV2ResponseItemsItemRequestParamsPunctuationEnhancedDefault = false;
@@ -16286,12 +16587,12 @@ var preRecordedControllerGetPreRecordedJobsV2Response = zod5.object({
16286
16587
  "Forces the translation to use informal language forms when available in the target language."
16287
16588
  )
16288
16589
  }).optional().describe("**[Beta]** Translation configuration, if `translation` is enabled"),
16289
- summarization: zod5.boolean().optional().describe("**[Beta]** Enable summarization for this audio"),
16590
+ summarization: zod5.boolean().optional().describe("Enable summarization for this audio"),
16290
16591
  summarization_config: zod5.object({
16291
16592
  type: zod5.enum(["general", "bullet_points", "concise"]).describe("The type of summarization to apply").default(
16292
16593
  preRecordedControllerGetPreRecordedJobsV2ResponseItemsItemRequestParamsSummarizationConfigTypeDefault
16293
16594
  ).describe("The type of summarization to apply")
16294
- }).optional().describe("**[Beta]** Summarization configuration, if `summarization` is enabled"),
16595
+ }).optional().describe("Summarization configuration, if `summarization` is enabled"),
16295
16596
  named_entity_recognition: zod5.boolean().optional().describe("**[Alpha]** Enable named entity recognition for this audio"),
16296
16597
  custom_spelling: zod5.boolean().optional().describe("**[Alpha]** Enable custom spelling for this audio"),
16297
16598
  custom_spelling_config: zod5.object({
@@ -16300,7 +16601,7 @@ var preRecordedControllerGetPreRecordedJobsV2Response = zod5.object({
16300
16601
  "**[Alpha]** Custom spelling configuration, if `custom_spelling` is enabled"
16301
16602
  ),
16302
16603
  sentiment_analysis: zod5.boolean().optional().describe("Enable sentiment analysis for this audio"),
16303
- audio_to_llm: zod5.boolean().optional().describe("**[Alpha]** Enable audio to llm processing for this audio"),
16604
+ audio_to_llm: zod5.boolean().optional().describe("Enable audio to LLM processing for this audio"),
16304
16605
  audio_to_llm_config: zod5.object({
16305
16606
  prompts: zod5.array(zod5.array(zod5.unknown())).min(1).describe("The list of prompts applied on the audio transcription"),
16306
16607
  model: zod5.string().default(
@@ -16308,7 +16609,7 @@ var preRecordedControllerGetPreRecordedJobsV2Response = zod5.object({
16308
16609
  ).describe(
16309
16610
  "The model to use for the prompt execution. You can find the list of supported models [here](https://openrouter.ai/models)."
16310
16611
  )
16311
- }).optional().describe("**[Alpha]** Audio to llm configuration, if `audio_to_llm` is enabled"),
16612
+ }).optional().describe("Audio to LLM configuration, if `audio_to_llm` is enabled"),
16312
16613
  pii_redaction: zod5.boolean().optional().describe("Enable PII redaction for this audio"),
16313
16614
  pii_redaction_config: zod5.object({
16314
16615
  entity_types: zod5.enum([
@@ -17445,7 +17746,7 @@ var preRecordedControllerGetPreRecordedJobV2ResponseRequestParamsNamedEntityReco
17445
17746
  var preRecordedControllerGetPreRecordedJobV2ResponseRequestParamsCustomSpellingDefault = false;
17446
17747
  var preRecordedControllerGetPreRecordedJobV2ResponseRequestParamsSentimentAnalysisDefault = false;
17447
17748
  var preRecordedControllerGetPreRecordedJobV2ResponseRequestParamsAudioToLlmDefault = false;
17448
- var preRecordedControllerGetPreRecordedJobV2ResponseRequestParamsAudioToLlmConfigModelDefault = "openai/gpt-3.5-turbo";
17749
+ var preRecordedControllerGetPreRecordedJobV2ResponseRequestParamsAudioToLlmConfigModelDefault = "openai/gpt-5.4-nano";
17449
17750
  var preRecordedControllerGetPreRecordedJobV2ResponseRequestParamsPiiRedactionDefault = false;
17450
17751
  var preRecordedControllerGetPreRecordedJobV2ResponseRequestParamsSentencesDefault = false;
17451
17752
  var preRecordedControllerGetPreRecordedJobV2ResponseRequestParamsPunctuationEnhancedDefault = false;
@@ -17786,19 +18087,19 @@ var preRecordedControllerGetPreRecordedJobV2Response = zod5.object({
17786
18087
  "Forces the translation to use informal language forms when available in the target language."
17787
18088
  )
17788
18089
  }).optional().describe("**[Beta]** Translation configuration, if `translation` is enabled"),
17789
- summarization: zod5.boolean().optional().describe("**[Beta]** Enable summarization for this audio"),
18090
+ summarization: zod5.boolean().optional().describe("Enable summarization for this audio"),
17790
18091
  summarization_config: zod5.object({
17791
18092
  type: zod5.enum(["general", "bullet_points", "concise"]).describe("The type of summarization to apply").default(
17792
18093
  preRecordedControllerGetPreRecordedJobV2ResponseRequestParamsSummarizationConfigTypeDefault
17793
18094
  ).describe("The type of summarization to apply")
17794
- }).optional().describe("**[Beta]** Summarization configuration, if `summarization` is enabled"),
18095
+ }).optional().describe("Summarization configuration, if `summarization` is enabled"),
17795
18096
  named_entity_recognition: zod5.boolean().optional().describe("**[Alpha]** Enable named entity recognition for this audio"),
17796
18097
  custom_spelling: zod5.boolean().optional().describe("**[Alpha]** Enable custom spelling for this audio"),
17797
18098
  custom_spelling_config: zod5.object({
17798
18099
  spelling_dictionary: zod5.record(zod5.string(), zod5.array(zod5.string())).describe("The list of spelling applied on the audio transcription")
17799
18100
  }).optional().describe("**[Alpha]** Custom spelling configuration, if `custom_spelling` is enabled"),
17800
18101
  sentiment_analysis: zod5.boolean().optional().describe("Enable sentiment analysis for this audio"),
17801
- audio_to_llm: zod5.boolean().optional().describe("**[Alpha]** Enable audio to llm processing for this audio"),
18102
+ audio_to_llm: zod5.boolean().optional().describe("Enable audio to LLM processing for this audio"),
17802
18103
  audio_to_llm_config: zod5.object({
17803
18104
  prompts: zod5.array(zod5.array(zod5.unknown())).min(1).describe("The list of prompts applied on the audio transcription"),
17804
18105
  model: zod5.string().default(
@@ -17806,7 +18107,7 @@ var preRecordedControllerGetPreRecordedJobV2Response = zod5.object({
17806
18107
  ).describe(
17807
18108
  "The model to use for the prompt execution. You can find the list of supported models [here](https://openrouter.ai/models)."
17808
18109
  )
17809
- }).optional().describe("**[Alpha]** Audio to llm configuration, if `audio_to_llm` is enabled"),
18110
+ }).optional().describe("Audio to LLM configuration, if `audio_to_llm` is enabled"),
17810
18111
  pii_redaction: zod5.boolean().optional().describe("Enable PII redaction for this audio"),
17811
18112
  pii_redaction_config: zod5.object({
17812
18113
  entity_types: zod5.enum([
@@ -18919,7 +19220,7 @@ var transcriptionControllerInitPreRecordedJobV2BodyNamedEntityRecognitionDefault
18919
19220
  var transcriptionControllerInitPreRecordedJobV2BodyCustomSpellingDefault = false;
18920
19221
  var transcriptionControllerInitPreRecordedJobV2BodySentimentAnalysisDefault = false;
18921
19222
  var transcriptionControllerInitPreRecordedJobV2BodyAudioToLlmDefault = false;
18922
- var transcriptionControllerInitPreRecordedJobV2BodyAudioToLlmConfigModelDefault = "openai/gpt-3.5-turbo";
19223
+ var transcriptionControllerInitPreRecordedJobV2BodyAudioToLlmConfigModelDefault = "openai/gpt-5.4-nano";
18923
19224
  var transcriptionControllerInitPreRecordedJobV2BodyPiiRedactionDefault = false;
18924
19225
  var transcriptionControllerInitPreRecordedJobV2BodySentencesDefault = false;
18925
19226
  var transcriptionControllerInitPreRecordedJobV2BodyPunctuationEnhancedDefault = false;
@@ -19212,23 +19513,23 @@ var transcriptionControllerInitPreRecordedJobV2Body = zod5.object({
19212
19513
  "Forces the translation to use informal language forms when available in the target language."
19213
19514
  )
19214
19515
  }).optional().describe("**[Beta]** Translation configuration, if `translation` is enabled"),
19215
- summarization: zod5.boolean().optional().describe("**[Beta]** Enable summarization for this audio"),
19516
+ summarization: zod5.boolean().optional().describe("Enable summarization for this audio"),
19216
19517
  summarization_config: zod5.object({
19217
19518
  type: zod5.enum(["general", "bullet_points", "concise"]).describe("The type of summarization to apply").default(transcriptionControllerInitPreRecordedJobV2BodySummarizationConfigTypeDefault).describe("The type of summarization to apply")
19218
- }).optional().describe("**[Beta]** Summarization configuration, if `summarization` is enabled"),
19519
+ }).optional().describe("Summarization configuration, if `summarization` is enabled"),
19219
19520
  named_entity_recognition: zod5.boolean().optional().describe("**[Alpha]** Enable named entity recognition for this audio"),
19220
19521
  custom_spelling: zod5.boolean().optional().describe("**[Alpha]** Enable custom spelling for this audio"),
19221
19522
  custom_spelling_config: zod5.object({
19222
19523
  spelling_dictionary: zod5.record(zod5.string(), zod5.array(zod5.string())).describe("The list of spelling applied on the audio transcription")
19223
19524
  }).optional().describe("**[Alpha]** Custom spelling configuration, if `custom_spelling` is enabled"),
19224
19525
  sentiment_analysis: zod5.boolean().optional().describe("Enable sentiment analysis for this audio"),
19225
- audio_to_llm: zod5.boolean().optional().describe("**[Alpha]** Enable audio to llm processing for this audio"),
19526
+ audio_to_llm: zod5.boolean().optional().describe("Enable audio to LLM processing for this audio"),
19226
19527
  audio_to_llm_config: zod5.object({
19227
19528
  prompts: zod5.array(zod5.array(zod5.unknown())).min(1).describe("The list of prompts applied on the audio transcription"),
19228
19529
  model: zod5.string().default(transcriptionControllerInitPreRecordedJobV2BodyAudioToLlmConfigModelDefault).describe(
19229
19530
  "The model to use for the prompt execution. You can find the list of supported models [here](https://openrouter.ai/models)."
19230
19531
  )
19231
- }).optional().describe("**[Alpha]** Audio to llm configuration, if `audio_to_llm` is enabled"),
19532
+ }).optional().describe("Audio to LLM configuration, if `audio_to_llm` is enabled"),
19232
19533
  pii_redaction: zod5.boolean().optional().describe("Enable PII redaction for this audio"),
19233
19534
  pii_redaction_config: zod5.object({
19234
19535
  entity_types: zod5.enum([
@@ -19486,7 +19787,7 @@ var transcriptionControllerListV2ResponseItemsItemRequestParamsNamedEntityRecogn
19486
19787
  var transcriptionControllerListV2ResponseItemsItemRequestParamsCustomSpellingDefault = false;
19487
19788
  var transcriptionControllerListV2ResponseItemsItemRequestParamsSentimentAnalysisDefault = false;
19488
19789
  var transcriptionControllerListV2ResponseItemsItemRequestParamsAudioToLlmDefault = false;
19489
- var transcriptionControllerListV2ResponseItemsItemRequestParamsAudioToLlmConfigModelDefault = "openai/gpt-3.5-turbo";
19790
+ var transcriptionControllerListV2ResponseItemsItemRequestParamsAudioToLlmConfigModelDefault = "openai/gpt-5.4-nano";
19490
19791
  var transcriptionControllerListV2ResponseItemsItemRequestParamsPiiRedactionDefault = false;
19491
19792
  var transcriptionControllerListV2ResponseItemsItemRequestParamsSentencesDefault = false;
19492
19793
  var transcriptionControllerListV2ResponseItemsItemRequestParamsPunctuationEnhancedDefault = false;
@@ -19897,12 +20198,12 @@ var transcriptionControllerListV2Response = zod5.object({
19897
20198
  "Forces the translation to use informal language forms when available in the target language."
19898
20199
  )
19899
20200
  }).optional().describe("**[Beta]** Translation configuration, if `translation` is enabled"),
19900
- summarization: zod5.boolean().optional().describe("**[Beta]** Enable summarization for this audio"),
20201
+ summarization: zod5.boolean().optional().describe("Enable summarization for this audio"),
19901
20202
  summarization_config: zod5.object({
19902
20203
  type: zod5.enum(["general", "bullet_points", "concise"]).describe("The type of summarization to apply").default(
19903
20204
  transcriptionControllerListV2ResponseItemsItemRequestParamsSummarizationConfigTypeDefault
19904
20205
  ).describe("The type of summarization to apply")
19905
- }).optional().describe("**[Beta]** Summarization configuration, if `summarization` is enabled"),
20206
+ }).optional().describe("Summarization configuration, if `summarization` is enabled"),
19906
20207
  named_entity_recognition: zod5.boolean().optional().describe("**[Alpha]** Enable named entity recognition for this audio"),
19907
20208
  custom_spelling: zod5.boolean().optional().describe("**[Alpha]** Enable custom spelling for this audio"),
19908
20209
  custom_spelling_config: zod5.object({
@@ -19911,7 +20212,7 @@ var transcriptionControllerListV2Response = zod5.object({
19911
20212
  "**[Alpha]** Custom spelling configuration, if `custom_spelling` is enabled"
19912
20213
  ),
19913
20214
  sentiment_analysis: zod5.boolean().optional().describe("Enable sentiment analysis for this audio"),
19914
- audio_to_llm: zod5.boolean().optional().describe("**[Alpha]** Enable audio to llm processing for this audio"),
20215
+ audio_to_llm: zod5.boolean().optional().describe("Enable audio to LLM processing for this audio"),
19915
20216
  audio_to_llm_config: zod5.object({
19916
20217
  prompts: zod5.array(zod5.array(zod5.unknown())).min(1).describe("The list of prompts applied on the audio transcription"),
19917
20218
  model: zod5.string().default(
@@ -19919,7 +20220,7 @@ var transcriptionControllerListV2Response = zod5.object({
19919
20220
  ).describe(
19920
20221
  "The model to use for the prompt execution. You can find the list of supported models [here](https://openrouter.ai/models)."
19921
20222
  )
19922
- }).optional().describe("**[Alpha]** Audio to llm configuration, if `audio_to_llm` is enabled"),
20223
+ }).optional().describe("Audio to LLM configuration, if `audio_to_llm` is enabled"),
19923
20224
  pii_redaction: zod5.boolean().optional().describe("Enable PII redaction for this audio"),
19924
20225
  pii_redaction_config: zod5.object({
19925
20226
  entity_types: zod5.enum([
@@ -22237,7 +22538,7 @@ var transcriptionControllerGetTranscriptV2ResponseRequestParamsNamedEntityRecogn
22237
22538
  var transcriptionControllerGetTranscriptV2ResponseRequestParamsCustomSpellingDefault = false;
22238
22539
  var transcriptionControllerGetTranscriptV2ResponseRequestParamsSentimentAnalysisDefault = false;
22239
22540
  var transcriptionControllerGetTranscriptV2ResponseRequestParamsAudioToLlmDefault = false;
22240
- var transcriptionControllerGetTranscriptV2ResponseRequestParamsAudioToLlmConfigModelDefault = "openai/gpt-3.5-turbo";
22541
+ var transcriptionControllerGetTranscriptV2ResponseRequestParamsAudioToLlmConfigModelDefault = "openai/gpt-5.4-nano";
22241
22542
  var transcriptionControllerGetTranscriptV2ResponseRequestParamsPiiRedactionDefault = false;
22242
22543
  var transcriptionControllerGetTranscriptV2ResponseRequestParamsSentencesDefault = false;
22243
22544
  var transcriptionControllerGetTranscriptV2ResponseRequestParamsPunctuationEnhancedDefault = false;
@@ -22642,19 +22943,19 @@ var transcriptionControllerGetTranscriptV2Response = zod5.discriminatedUnion("ki
22642
22943
  "Forces the translation to use informal language forms when available in the target language."
22643
22944
  )
22644
22945
  }).optional().describe("**[Beta]** Translation configuration, if `translation` is enabled"),
22645
- summarization: zod5.boolean().optional().describe("**[Beta]** Enable summarization for this audio"),
22946
+ summarization: zod5.boolean().optional().describe("Enable summarization for this audio"),
22646
22947
  summarization_config: zod5.object({
22647
22948
  type: zod5.enum(["general", "bullet_points", "concise"]).describe("The type of summarization to apply").default(
22648
22949
  transcriptionControllerGetTranscriptV2ResponseRequestParamsSummarizationConfigTypeDefault
22649
22950
  ).describe("The type of summarization to apply")
22650
- }).optional().describe("**[Beta]** Summarization configuration, if `summarization` is enabled"),
22951
+ }).optional().describe("Summarization configuration, if `summarization` is enabled"),
22651
22952
  named_entity_recognition: zod5.boolean().optional().describe("**[Alpha]** Enable named entity recognition for this audio"),
22652
22953
  custom_spelling: zod5.boolean().optional().describe("**[Alpha]** Enable custom spelling for this audio"),
22653
22954
  custom_spelling_config: zod5.object({
22654
22955
  spelling_dictionary: zod5.record(zod5.string(), zod5.array(zod5.string())).describe("The list of spelling applied on the audio transcription")
22655
22956
  }).optional().describe("**[Alpha]** Custom spelling configuration, if `custom_spelling` is enabled"),
22656
22957
  sentiment_analysis: zod5.boolean().optional().describe("Enable sentiment analysis for this audio"),
22657
- audio_to_llm: zod5.boolean().optional().describe("**[Alpha]** Enable audio to llm processing for this audio"),
22958
+ audio_to_llm: zod5.boolean().optional().describe("Enable audio to LLM processing for this audio"),
22658
22959
  audio_to_llm_config: zod5.object({
22659
22960
  prompts: zod5.array(zod5.array(zod5.unknown())).min(1).describe("The list of prompts applied on the audio transcription"),
22660
22961
  model: zod5.string().default(
@@ -22662,7 +22963,7 @@ var transcriptionControllerGetTranscriptV2Response = zod5.discriminatedUnion("ki
22662
22963
  ).describe(
22663
22964
  "The model to use for the prompt execution. You can find the list of supported models [here](https://openrouter.ai/models)."
22664
22965
  )
22665
- }).optional().describe("**[Alpha]** Audio to llm configuration, if `audio_to_llm` is enabled"),
22966
+ }).optional().describe("Audio to LLM configuration, if `audio_to_llm` is enabled"),
22666
22967
  pii_redaction: zod5.boolean().optional().describe("Enable PII redaction for this audio"),
22667
22968
  pii_redaction_config: zod5.object({
22668
22969
  entity_types: zod5.enum([
@@ -25374,7 +25675,7 @@ var historyControllerGetListV1ResponseItemsItemRequestParamsNamedEntityRecogniti
25374
25675
  var historyControllerGetListV1ResponseItemsItemRequestParamsCustomSpellingDefault = false;
25375
25676
  var historyControllerGetListV1ResponseItemsItemRequestParamsSentimentAnalysisDefault = false;
25376
25677
  var historyControllerGetListV1ResponseItemsItemRequestParamsAudioToLlmDefault = false;
25377
- var historyControllerGetListV1ResponseItemsItemRequestParamsAudioToLlmConfigModelDefault = "openai/gpt-3.5-turbo";
25678
+ var historyControllerGetListV1ResponseItemsItemRequestParamsAudioToLlmConfigModelDefault = "openai/gpt-5.4-nano";
25378
25679
  var historyControllerGetListV1ResponseItemsItemRequestParamsPiiRedactionDefault = false;
25379
25680
  var historyControllerGetListV1ResponseItemsItemRequestParamsSentencesDefault = false;
25380
25681
  var historyControllerGetListV1ResponseItemsItemRequestParamsPunctuationEnhancedDefault = false;
@@ -25785,12 +26086,12 @@ var historyControllerGetListV1Response = zod5.object({
25785
26086
  "Forces the translation to use informal language forms when available in the target language."
25786
26087
  )
25787
26088
  }).optional().describe("**[Beta]** Translation configuration, if `translation` is enabled"),
25788
- summarization: zod5.boolean().optional().describe("**[Beta]** Enable summarization for this audio"),
26089
+ summarization: zod5.boolean().optional().describe("Enable summarization for this audio"),
25789
26090
  summarization_config: zod5.object({
25790
26091
  type: zod5.enum(["general", "bullet_points", "concise"]).describe("The type of summarization to apply").default(
25791
26092
  historyControllerGetListV1ResponseItemsItemRequestParamsSummarizationConfigTypeDefault
25792
26093
  ).describe("The type of summarization to apply")
25793
- }).optional().describe("**[Beta]** Summarization configuration, if `summarization` is enabled"),
26094
+ }).optional().describe("Summarization configuration, if `summarization` is enabled"),
25794
26095
  named_entity_recognition: zod5.boolean().optional().describe("**[Alpha]** Enable named entity recognition for this audio"),
25795
26096
  custom_spelling: zod5.boolean().optional().describe("**[Alpha]** Enable custom spelling for this audio"),
25796
26097
  custom_spelling_config: zod5.object({
@@ -25799,7 +26100,7 @@ var historyControllerGetListV1Response = zod5.object({
25799
26100
  "**[Alpha]** Custom spelling configuration, if `custom_spelling` is enabled"
25800
26101
  ),
25801
26102
  sentiment_analysis: zod5.boolean().optional().describe("Enable sentiment analysis for this audio"),
25802
- audio_to_llm: zod5.boolean().optional().describe("**[Alpha]** Enable audio to llm processing for this audio"),
26103
+ audio_to_llm: zod5.boolean().optional().describe("Enable audio to LLM processing for this audio"),
25803
26104
  audio_to_llm_config: zod5.object({
25804
26105
  prompts: zod5.array(zod5.array(zod5.unknown())).min(1).describe("The list of prompts applied on the audio transcription"),
25805
26106
  model: zod5.string().default(
@@ -25807,7 +26108,7 @@ var historyControllerGetListV1Response = zod5.object({
25807
26108
  ).describe(
25808
26109
  "The model to use for the prompt execution. You can find the list of supported models [here](https://openrouter.ai/models)."
25809
26110
  )
25810
- }).optional().describe("**[Alpha]** Audio to llm configuration, if `audio_to_llm` is enabled"),
26111
+ }).optional().describe("Audio to LLM configuration, if `audio_to_llm` is enabled"),
25811
26112
  pii_redaction: zod5.boolean().optional().describe("Enable PII redaction for this audio"),
25812
26113
  pii_redaction_config: zod5.object({
25813
26114
  entity_types: zod5.enum([
@@ -31052,6 +31353,7 @@ __export(openAIAudioRealtimeAPI_zod_exports, {
31052
31353
  createRealtimeClientSecretBodySessionPromptVariablesTypeDefault: () => createRealtimeClientSecretBodySessionPromptVariablesTypeDefault,
31053
31354
  createRealtimeClientSecretBodySessionPromptVariablesTypeDefaultOne: () => createRealtimeClientSecretBodySessionPromptVariablesTypeDefaultOne,
31054
31355
  createRealtimeClientSecretBodySessionPromptVariablesTypeDefaultTwo: () => createRealtimeClientSecretBodySessionPromptVariablesTypeDefaultTwo,
31356
+ createRealtimeClientSecretBodySessionReasoningEffortDefault: () => createRealtimeClientSecretBodySessionReasoningEffortDefault,
31055
31357
  createRealtimeClientSecretBodySessionToolChoiceDefault: () => createRealtimeClientSecretBodySessionToolChoiceDefault,
31056
31358
  createRealtimeClientSecretBodySessionToolsItemRequireApprovalDefaultOne: () => createRealtimeClientSecretBodySessionToolsItemRequireApprovalDefaultOne,
31057
31359
  createRealtimeClientSecretBodySessionTracingDefault: () => createRealtimeClientSecretBodySessionTracingDefault,
@@ -31076,6 +31378,7 @@ __export(openAIAudioRealtimeAPI_zod_exports, {
31076
31378
  createRealtimeClientSecretResponseSessionPromptVariablesTypeDefault: () => createRealtimeClientSecretResponseSessionPromptVariablesTypeDefault,
31077
31379
  createRealtimeClientSecretResponseSessionPromptVariablesTypeDefaultOne: () => createRealtimeClientSecretResponseSessionPromptVariablesTypeDefaultOne,
31078
31380
  createRealtimeClientSecretResponseSessionPromptVariablesTypeDefaultTwo: () => createRealtimeClientSecretResponseSessionPromptVariablesTypeDefaultTwo,
31381
+ createRealtimeClientSecretResponseSessionReasoningEffortDefault: () => createRealtimeClientSecretResponseSessionReasoningEffortDefault,
31079
31382
  createRealtimeClientSecretResponseSessionToolChoiceDefault: () => createRealtimeClientSecretResponseSessionToolChoiceDefault,
31080
31383
  createRealtimeClientSecretResponseSessionToolsItemRequireApprovalDefaultOne: () => createRealtimeClientSecretResponseSessionToolsItemRequireApprovalDefaultOne,
31081
31384
  createRealtimeClientSecretResponseSessionTracingDefaultOne: () => createRealtimeClientSecretResponseSessionTracingDefaultOne,
@@ -31432,6 +31735,7 @@ var createRealtimeClientSecretBodySessionTracingDefaultOne = "auto";
31432
31735
  var createRealtimeClientSecretBodySessionTracingDefault = null;
31433
31736
  var createRealtimeClientSecretBodySessionToolsItemRequireApprovalDefaultOne = "always";
31434
31737
  var createRealtimeClientSecretBodySessionToolChoiceDefault = "auto";
31738
+ var createRealtimeClientSecretBodySessionReasoningEffortDefault = "low";
31435
31739
  var createRealtimeClientSecretBodySessionTruncationRetentionRatioMin = 0;
31436
31740
  var createRealtimeClientSecretBodySessionTruncationRetentionRatioMax = 1;
31437
31741
  var createRealtimeClientSecretBodySessionTruncationTokenLimitsPostInstructionsMin = 0;
@@ -31467,6 +31771,7 @@ var createRealtimeClientSecretBody = zod6.object({
31467
31771
  zod6.enum([
31468
31772
  "gpt-realtime",
31469
31773
  "gpt-realtime-1.5",
31774
+ "gpt-realtime-2",
31470
31775
  "gpt-realtime-2025-08-28",
31471
31776
  "gpt-4o-realtime-preview",
31472
31777
  "gpt-4o-realtime-preview-2024-10-01",
@@ -31507,16 +31812,20 @@ var createRealtimeClientSecretBody = zod6.object({
31507
31812
  "gpt-4o-mini-transcribe",
31508
31813
  "gpt-4o-mini-transcribe-2025-12-15",
31509
31814
  "gpt-4o-transcribe",
31510
- "gpt-4o-transcribe-diarize"
31815
+ "gpt-4o-transcribe-diarize",
31816
+ "gpt-realtime-whisper"
31511
31817
  ])
31512
31818
  ).optional().describe(
31513
- "The model to use for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`, and `gpt-4o-transcribe-diarize`. Use `gpt-4o-transcribe-diarize` when you need diarization with speaker labels.\n"
31819
+ "The model to use for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`, `gpt-4o-transcribe-diarize`, and `gpt-realtime-whisper`. Use `gpt-4o-transcribe-diarize` when you need diarization with speaker labels.\n"
31514
31820
  ),
31515
31821
  language: zod6.string().optional().describe(
31516
31822
  "The language of the input audio. Supplying the input language in\n[ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`) format\nwill improve accuracy and latency.\n"
31517
31823
  ),
31518
31824
  prompt: zod6.string().optional().describe(
31519
- 'An optional text to guide the model\'s style or continue a previous audio\nsegment.\nFor `whisper-1`, the [prompt is a list of keywords](/docs/guides/speech-to-text#prompting).\nFor `gpt-4o-transcribe` models (excluding `gpt-4o-transcribe-diarize`), the prompt is a free text string, for example "expect words related to technology".\n'
31825
+ 'An optional text to guide the model\'s style or continue a previous audio\nsegment.\nFor `whisper-1`, the [prompt is a list of keywords](/docs/guides/speech-to-text#prompting).\nFor `gpt-4o-transcribe` models (excluding `gpt-4o-transcribe-diarize`), the prompt is a free text string, for example "expect words related to technology".\nPrompt is not supported with `gpt-realtime-whisper` in GA Realtime sessions.\n'
31826
+ ),
31827
+ delay: zod6.enum(["minimal", "low", "medium", "high", "xhigh"]).optional().describe(
31828
+ "Controls how long the model waits before emitting transcription text.\nHigher values can improve transcription accuracy at the cost of latency.\nOnly supported with `gpt-realtime-whisper` in GA Realtime sessions.\n"
31520
31829
  )
31521
31830
  }).optional(),
31522
31831
  noise_reduction: zod6.object({
@@ -31583,7 +31892,7 @@ var createRealtimeClientSecretBody = zod6.object({
31583
31892
  "Server-side semantic turn detection which uses a model to determine when the user has finished speaking."
31584
31893
  )
31585
31894
  ]).describe(
31586
- 'Configuration for turn detection, ether Server VAD or Semantic VAD. This can be set to `null` to turn off, in which case the client must manually trigger model response.\n\nServer VAD means that the model will detect the start and end of speech based on audio volume and respond at the end of user speech.\n\nSemantic VAD is more advanced and uses a turn detection model (in conjunction with VAD) to semantically estimate whether the user has finished speaking, then dynamically sets a timeout based on this probability. For example, if user audio trails off with "uhhm", the model will score a low probability of turn end and wait longer for the user to continue speaking. This can be useful for more natural conversations, but may have a higher latency.\n'
31895
+ 'Configuration for turn detection, ether Server VAD or Semantic VAD. This can be set to `null` to turn off, in which case the client must manually trigger model response.\n\nServer VAD means that the model will detect the start and end of speech based on audio volume and respond at the end of user speech.\n\nSemantic VAD is more advanced and uses a turn detection model (in conjunction with VAD) to semantically estimate whether the user has finished speaking, then dynamically sets a timeout based on this probability. For example, if user audio trails off with "uhhm", the model will score a low probability of turn end and wait longer for the user to continue speaking. This can be useful for more natural conversations, but may have a higher latency.\n\nFor `gpt-realtime-whisper` transcription sessions, turn detection must be\nset to `null`; VAD is not supported.\n'
31587
31896
  ).or(zod6.null()).optional()
31588
31897
  }).optional(),
31589
31898
  output: zod6.object({
@@ -31656,7 +31965,7 @@ var createRealtimeClientSecretBody = zod6.object({
31656
31965
  server_label: zod6.string().describe(
31657
31966
  "A label for this MCP server, used to identify it in tool calls.\n"
31658
31967
  ),
31659
- server_url: zod6.string().optional().describe(
31968
+ server_url: zod6.string().url().optional().describe(
31660
31969
  "The URL for the MCP server. One of `server_url` or `connector_id` must be\nprovided.\n"
31661
31970
  ),
31662
31971
  connector_id: zod6.enum([
@@ -31734,6 +32043,16 @@ var createRealtimeClientSecretBody = zod6.object({
31734
32043
  ).default(createRealtimeClientSecretBodySessionToolChoiceDefault).describe(
31735
32044
  "How the model chooses tools. Provide one of the string modes or force a specific\nfunction/MCP tool.\n"
31736
32045
  ),
32046
+ parallel_tool_calls: zod6.boolean().optional().describe(
32047
+ "Whether the model may call multiple tools in parallel. Only supported by\nreasoning Realtime models such as `gpt-realtime-2`.\n"
32048
+ ),
32049
+ reasoning: zod6.object({
32050
+ effort: zod6.enum(["minimal", "low", "medium", "high", "xhigh"]).default(createRealtimeClientSecretBodySessionReasoningEffortDefault).describe(
32051
+ "Constrains effort on reasoning for reasoning-capable Realtime models such as\n`gpt-realtime-2`.\n"
32052
+ )
32053
+ }).optional().describe(
32054
+ "Configuration for reasoning-capable Realtime models such as `gpt-realtime-2`.\n"
32055
+ ),
31737
32056
  max_output_tokens: zod6.number().or(zod6.enum(["inf"])).optional().describe(
31738
32057
  "Maximum number of output tokens for a single assistant response,\ninclusive of tool calls. Provide an integer between 1 and 4096 to\nlimit output tokens, or `inf` for the maximum available tokens for a\ngiven model. Defaults to `inf`.\n"
31739
32058
  ),
@@ -31773,7 +32092,7 @@ var createRealtimeClientSecretBody = zod6.object({
31773
32092
  ).or(
31774
32093
  zod6.object({
31775
32094
  type: zod6.enum(["input_image"]).describe("The type of the input item. Always `input_image`."),
31776
- image_url: zod6.string().describe(
32095
+ image_url: zod6.string().url().describe(
31777
32096
  "The URL of the image to be sent to the model. A fully qualified URL or base64 encoded image in a data URL."
31778
32097
  ).or(zod6.null()).optional(),
31779
32098
  file_id: zod6.string().describe("The ID of the file to be sent to the model.").or(zod6.null()).optional(),
@@ -31787,7 +32106,7 @@ var createRealtimeClientSecretBody = zod6.object({
31787
32106
  file_id: zod6.string().describe("The ID of the file to be sent to the model.").or(zod6.null()).optional(),
31788
32107
  filename: zod6.string().optional().describe("The name of the file to be sent to the model."),
31789
32108
  file_data: zod6.string().optional().describe("The content of the file to be sent to the model.\n"),
31790
- file_url: zod6.string().optional().describe("The URL of the file to be sent to the model."),
32109
+ file_url: zod6.string().url().optional().describe("The URL of the file to be sent to the model."),
31791
32110
  detail: zod6.enum(["low", "high"]).optional()
31792
32111
  }).describe("A file input to the model.")
31793
32112
  )
@@ -31823,16 +32142,20 @@ var createRealtimeClientSecretBody = zod6.object({
31823
32142
  "gpt-4o-mini-transcribe",
31824
32143
  "gpt-4o-mini-transcribe-2025-12-15",
31825
32144
  "gpt-4o-transcribe",
31826
- "gpt-4o-transcribe-diarize"
32145
+ "gpt-4o-transcribe-diarize",
32146
+ "gpt-realtime-whisper"
31827
32147
  ])
31828
32148
  ).optional().describe(
31829
- "The model to use for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`, and `gpt-4o-transcribe-diarize`. Use `gpt-4o-transcribe-diarize` when you need diarization with speaker labels.\n"
32149
+ "The model to use for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`, `gpt-4o-transcribe-diarize`, and `gpt-realtime-whisper`. Use `gpt-4o-transcribe-diarize` when you need diarization with speaker labels.\n"
31830
32150
  ),
31831
32151
  language: zod6.string().optional().describe(
31832
32152
  "The language of the input audio. Supplying the input language in\n[ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`) format\nwill improve accuracy and latency.\n"
31833
32153
  ),
31834
32154
  prompt: zod6.string().optional().describe(
31835
- 'An optional text to guide the model\'s style or continue a previous audio\nsegment.\nFor `whisper-1`, the [prompt is a list of keywords](/docs/guides/speech-to-text#prompting).\nFor `gpt-4o-transcribe` models (excluding `gpt-4o-transcribe-diarize`), the prompt is a free text string, for example "expect words related to technology".\n'
32155
+ 'An optional text to guide the model\'s style or continue a previous audio\nsegment.\nFor `whisper-1`, the [prompt is a list of keywords](/docs/guides/speech-to-text#prompting).\nFor `gpt-4o-transcribe` models (excluding `gpt-4o-transcribe-diarize`), the prompt is a free text string, for example "expect words related to technology".\nPrompt is not supported with `gpt-realtime-whisper` in GA Realtime sessions.\n'
32156
+ ),
32157
+ delay: zod6.enum(["minimal", "low", "medium", "high", "xhigh"]).optional().describe(
32158
+ "Controls how long the model waits before emitting transcription text.\nHigher values can improve transcription accuracy at the cost of latency.\nOnly supported with `gpt-realtime-whisper` in GA Realtime sessions.\n"
31836
32159
  )
31837
32160
  }).optional(),
31838
32161
  noise_reduction: zod6.object({
@@ -31899,7 +32222,7 @@ var createRealtimeClientSecretBody = zod6.object({
31899
32222
  "Server-side semantic turn detection which uses a model to determine when the user has finished speaking."
31900
32223
  )
31901
32224
  ]).describe(
31902
- 'Configuration for turn detection, ether Server VAD or Semantic VAD. This can be set to `null` to turn off, in which case the client must manually trigger model response.\n\nServer VAD means that the model will detect the start and end of speech based on audio volume and respond at the end of user speech.\n\nSemantic VAD is more advanced and uses a turn detection model (in conjunction with VAD) to semantically estimate whether the user has finished speaking, then dynamically sets a timeout based on this probability. For example, if user audio trails off with "uhhm", the model will score a low probability of turn end and wait longer for the user to continue speaking. This can be useful for more natural conversations, but may have a higher latency.\n'
32225
+ 'Configuration for turn detection, ether Server VAD or Semantic VAD. This can be set to `null` to turn off, in which case the client must manually trigger model response.\n\nServer VAD means that the model will detect the start and end of speech based on audio volume and respond at the end of user speech.\n\nSemantic VAD is more advanced and uses a turn detection model (in conjunction with VAD) to semantically estimate whether the user has finished speaking, then dynamically sets a timeout based on this probability. For example, if user audio trails off with "uhhm", the model will score a low probability of turn end and wait longer for the user to continue speaking. This can be useful for more natural conversations, but may have a higher latency.\n\nFor `gpt-realtime-whisper` transcription sessions, turn detection must be\nset to `null`; VAD is not supported.\n'
31903
32226
  ).or(zod6.null()).optional()
31904
32227
  }).optional()
31905
32228
  }).optional().describe("Configuration for input and output audio.\n"),
@@ -31930,6 +32253,7 @@ var createRealtimeClientSecretResponseSessionTracingDefaultTwo = "auto";
31930
32253
  var createRealtimeClientSecretResponseSessionTracingDefaultOne = null;
31931
32254
  var createRealtimeClientSecretResponseSessionToolsItemRequireApprovalDefaultOne = "always";
31932
32255
  var createRealtimeClientSecretResponseSessionToolChoiceDefault = "auto";
32256
+ var createRealtimeClientSecretResponseSessionReasoningEffortDefault = "low";
31933
32257
  var createRealtimeClientSecretResponseSessionTruncationRetentionRatioMin = 0;
31934
32258
  var createRealtimeClientSecretResponseSessionTruncationRetentionRatioMax = 1;
31935
32259
  var createRealtimeClientSecretResponseSessionTruncationTokenLimitsPostInstructionsMin = 0;
@@ -31939,17 +32263,14 @@ var createRealtimeClientSecretResponseSessionPromptVariablesTypeDefaultTwo = "in
31939
32263
  var createRealtimeClientSecretResponse = zod6.object({
31940
32264
  value: zod6.string().describe("The generated client secret value."),
31941
32265
  expires_at: zod6.number().describe("Expiration timestamp for the client secret, in seconds since epoch."),
31942
- session: zod6.discriminatedUnion("type", [
32266
+ session: zod6.union([
31943
32267
  zod6.object({
31944
- client_secret: zod6.object({
31945
- value: zod6.string().describe(
31946
- "Ephemeral key usable in client environments to authenticate connections to the Realtime API. Use this in client-side environments rather than a standard API token, which should only be used server-side.\n"
31947
- ),
31948
- expires_at: zod6.number().describe(
31949
- "Timestamp for when the token expires. Currently, all tokens expire\nafter one minute.\n"
31950
- )
31951
- }).describe("Ephemeral key returned by the API."),
31952
32268
  type: zod6.enum(["realtime"]).describe("The type of session to create. Always `realtime` for the Realtime API.\n"),
32269
+ id: zod6.string().describe(
32270
+ "Unique identifier for the session that looks like `sess_1234567890abcdef`.\n"
32271
+ ),
32272
+ object: zod6.enum(["realtime.session"]).describe("The object type. Always `realtime.session`."),
32273
+ expires_at: zod6.number().optional().describe("Expiration timestamp for the session, in seconds since epoch."),
31953
32274
  output_modalities: zod6.array(zod6.enum(["text", "audio"])).default(createRealtimeClientSecretResponseSessionOutputModalitiesDefault).describe(
31954
32275
  'The set of modalities the model can respond with. It defaults to `["audio"]`, indicating\nthat the model will respond with audio plus a transcript. `["text"]` can be used to make\nthe model respond with text only. It is not possible to request both `text` and `audio` at the same time.\n'
31955
32276
  ),
@@ -31957,6 +32278,7 @@ var createRealtimeClientSecretResponse = zod6.object({
31957
32278
  zod6.enum([
31958
32279
  "gpt-realtime",
31959
32280
  "gpt-realtime-1.5",
32281
+ "gpt-realtime-2",
31960
32282
  "gpt-realtime-2025-08-28",
31961
32283
  "gpt-4o-realtime-preview",
31962
32284
  "gpt-4o-realtime-preview-2024-10-01",
@@ -31979,15 +32301,15 @@ var createRealtimeClientSecretResponse = zod6.object({
31979
32301
  audio: zod6.object({
31980
32302
  input: zod6.object({
31981
32303
  format: zod6.object({
31982
- type: zod6.enum(["audio/pcm"]).describe("The audio format. Always `audio/pcm`."),
31983
- rate: zod6.literal(24e3).describe("The sample rate of the audio. Always `24000`.")
32304
+ type: zod6.enum(["audio/pcm"]).optional().describe("The audio format. Always `audio/pcm`."),
32305
+ rate: zod6.literal(24e3).optional().describe("The sample rate of the audio. Always `24000`.")
31984
32306
  }).describe("The PCM audio format. Only a 24kHz sample rate is supported.").or(
31985
32307
  zod6.object({
31986
- type: zod6.enum(["audio/pcmu"]).describe("The audio format. Always `audio/pcmu`.")
32308
+ type: zod6.enum(["audio/pcmu"]).optional().describe("The audio format. Always `audio/pcmu`.")
31987
32309
  }).describe("The G.711 \u03BC-law format.")
31988
32310
  ).or(
31989
32311
  zod6.object({
31990
- type: zod6.enum(["audio/pcma"]).describe("The audio format. Always `audio/pcma`.")
32312
+ type: zod6.enum(["audio/pcma"]).optional().describe("The audio format. Always `audio/pcma`.")
31991
32313
  }).describe("The G.711 A-law format.")
31992
32314
  ).optional(),
31993
32315
  transcription: zod6.object({
@@ -31997,20 +32319,19 @@ var createRealtimeClientSecretResponse = zod6.object({
31997
32319
  "gpt-4o-mini-transcribe",
31998
32320
  "gpt-4o-mini-transcribe-2025-12-15",
31999
32321
  "gpt-4o-transcribe",
32000
- "gpt-4o-transcribe-diarize"
32322
+ "gpt-4o-transcribe-diarize",
32323
+ "gpt-realtime-whisper"
32001
32324
  ])
32002
32325
  ).optional().describe(
32003
- "The model to use for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`, and `gpt-4o-transcribe-diarize`. Use `gpt-4o-transcribe-diarize` when you need diarization with speaker labels.\n"
32004
- ),
32005
- language: zod6.string().optional().describe(
32006
- "The language of the input audio. Supplying the input language in\n[ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`) format\nwill improve accuracy and latency.\n"
32326
+ "The model used for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`, `gpt-4o-transcribe-diarize`, and `gpt-realtime-whisper`.\n"
32007
32327
  ),
32328
+ language: zod6.string().optional().describe("The language of the input audio.\n"),
32008
32329
  prompt: zod6.string().optional().describe(
32009
- 'An optional text to guide the model\'s style or continue a previous audio\nsegment.\nFor `whisper-1`, the [prompt is a list of keywords](/docs/guides/speech-to-text#prompting).\nFor `gpt-4o-transcribe` models (excluding `gpt-4o-transcribe-diarize`), the prompt is a free text string, for example "expect words related to technology".\n'
32330
+ "The prompt configured for input audio transcription, when present.\n"
32010
32331
  )
32011
32332
  }).optional(),
32012
32333
  noise_reduction: zod6.object({
32013
- type: zod6.enum(["near_field", "far_field"]).describe(
32334
+ type: zod6.enum(["near_field", "far_field"]).optional().describe(
32014
32335
  "Type of noise reduction. `near_field` is for close-talking microphones such as headphones, `far_field` is for far-field microphones such as laptop or conference room microphones.\n"
32015
32336
  )
32016
32337
  }).optional().describe(
@@ -32073,20 +32394,20 @@ var createRealtimeClientSecretResponse = zod6.object({
32073
32394
  "Server-side semantic turn detection which uses a model to determine when the user has finished speaking."
32074
32395
  )
32075
32396
  ]).describe(
32076
- 'Configuration for turn detection, ether Server VAD or Semantic VAD. This can be set to `null` to turn off, in which case the client must manually trigger model response.\n\nServer VAD means that the model will detect the start and end of speech based on audio volume and respond at the end of user speech.\n\nSemantic VAD is more advanced and uses a turn detection model (in conjunction with VAD) to semantically estimate whether the user has finished speaking, then dynamically sets a timeout based on this probability. For example, if user audio trails off with "uhhm", the model will score a low probability of turn end and wait longer for the user to continue speaking. This can be useful for more natural conversations, but may have a higher latency.\n'
32397
+ 'Configuration for turn detection, ether Server VAD or Semantic VAD. This can be set to `null` to turn off, in which case the client must manually trigger model response.\n\nServer VAD means that the model will detect the start and end of speech based on audio volume and respond at the end of user speech.\n\nSemantic VAD is more advanced and uses a turn detection model (in conjunction with VAD) to semantically estimate whether the user has finished speaking, then dynamically sets a timeout based on this probability. For example, if user audio trails off with "uhhm", the model will score a low probability of turn end and wait longer for the user to continue speaking. This can be useful for more natural conversations, but may have a higher latency.\n\nFor `gpt-realtime-whisper` transcription sessions, turn detection must be\nset to `null`; VAD is not supported.\n'
32077
32398
  ).or(zod6.null()).optional()
32078
32399
  }).optional(),
32079
32400
  output: zod6.object({
32080
32401
  format: zod6.object({
32081
- type: zod6.enum(["audio/pcm"]).describe("The audio format. Always `audio/pcm`."),
32082
- rate: zod6.literal(24e3).describe("The sample rate of the audio. Always `24000`.")
32402
+ type: zod6.enum(["audio/pcm"]).optional().describe("The audio format. Always `audio/pcm`."),
32403
+ rate: zod6.literal(24e3).optional().describe("The sample rate of the audio. Always `24000`.")
32083
32404
  }).describe("The PCM audio format. Only a 24kHz sample rate is supported.").or(
32084
32405
  zod6.object({
32085
- type: zod6.enum(["audio/pcmu"]).describe("The audio format. Always `audio/pcmu`.")
32406
+ type: zod6.enum(["audio/pcmu"]).optional().describe("The audio format. Always `audio/pcmu`.")
32086
32407
  }).describe("The G.711 \u03BC-law format.")
32087
32408
  ).or(
32088
32409
  zod6.object({
32089
- type: zod6.enum(["audio/pcma"]).describe("The audio format. Always `audio/pcma`.")
32410
+ type: zod6.enum(["audio/pcma"]).optional().describe("The audio format. Always `audio/pcma`.")
32090
32411
  }).describe("The G.711 A-law format.")
32091
32412
  ).optional(),
32092
32413
  voice: zod6.string().or(
@@ -32130,7 +32451,7 @@ var createRealtimeClientSecretResponse = zod6.object({
32130
32451
  ).or(zod6.null()).optional(),
32131
32452
  tools: zod6.array(
32132
32453
  zod6.object({
32133
- type: zod6.enum(["function"]).describe("The type of the tool, i.e. `function`."),
32454
+ type: zod6.enum(["function"]).optional().describe("The type of the tool, i.e. `function`."),
32134
32455
  name: zod6.string().optional().describe("The name of the function."),
32135
32456
  description: zod6.string().optional().describe(
32136
32457
  "The description of the function, including guidance on when and how\nto call it, and guidance about what to tell the user when calling\n(if anything).\n"
@@ -32142,7 +32463,7 @@ var createRealtimeClientSecretResponse = zod6.object({
32142
32463
  server_label: zod6.string().describe(
32143
32464
  "A label for this MCP server, used to identify it in tool calls.\n"
32144
32465
  ),
32145
- server_url: zod6.string().optional().describe(
32466
+ server_url: zod6.string().url().optional().describe(
32146
32467
  "The URL for the MCP server. One of `server_url` or `connector_id` must be\nprovided.\n"
32147
32468
  ),
32148
32469
  connector_id: zod6.enum([
@@ -32154,7 +32475,7 @@ var createRealtimeClientSecretResponse = zod6.object({
32154
32475
  "connector_outlookcalendar",
32155
32476
  "connector_outlookemail",
32156
32477
  "connector_sharepoint"
32157
- ]).describe(
32478
+ ]).optional().describe(
32158
32479
  "Identifier for service connectors, like those available in ChatGPT. One of\n`server_url` or `connector_id` must be provided. Learn more about service\nconnectors [here](/docs/guides/tools-remote-mcp#connectors).\n\nCurrently supported `connector_id` values are:\n\n- Dropbox: `connector_dropbox`\n- Gmail: `connector_gmail`\n- Google Calendar: `connector_googlecalendar`\n- Google Drive: `connector_googledrive`\n- Microsoft Teams: `connector_microsoftteams`\n- Outlook Calendar: `connector_outlookcalendar`\n- Outlook Email: `connector_outlookemail`\n- SharePoint: `connector_sharepoint`\n"
32159
32480
  ),
32160
32481
  authorization: zod6.string().optional().describe(
@@ -32220,6 +32541,13 @@ var createRealtimeClientSecretResponse = zod6.object({
32220
32541
  ).default(createRealtimeClientSecretResponseSessionToolChoiceDefault).describe(
32221
32542
  "How the model chooses tools. Provide one of the string modes or force a specific\nfunction/MCP tool.\n"
32222
32543
  ),
32544
+ reasoning: zod6.object({
32545
+ effort: zod6.enum(["minimal", "low", "medium", "high", "xhigh"]).default(createRealtimeClientSecretResponseSessionReasoningEffortDefault).describe(
32546
+ "Constrains effort on reasoning for reasoning-capable Realtime models such as\n`gpt-realtime-2`.\n"
32547
+ )
32548
+ }).optional().describe(
32549
+ "Configuration for reasoning-capable Realtime models such as `gpt-realtime-2`.\n"
32550
+ ),
32223
32551
  max_output_tokens: zod6.number().or(zod6.enum(["inf"])).optional().describe(
32224
32552
  "Maximum number of output tokens for a single assistant response,\ninclusive of tool calls. Provide an integer between 1 and 4096 to\nlimit output tokens, or `inf` for the maximum available tokens for a\ngiven model. Defaults to `inf`.\n"
32225
32553
  ),
@@ -32259,7 +32587,7 @@ var createRealtimeClientSecretResponse = zod6.object({
32259
32587
  ).or(
32260
32588
  zod6.object({
32261
32589
  type: zod6.enum(["input_image"]).describe("The type of the input item. Always `input_image`."),
32262
- image_url: zod6.string().describe(
32590
+ image_url: zod6.string().url().describe(
32263
32591
  "The URL of the image to be sent to the model. A fully qualified URL or base64 encoded image in a data URL."
32264
32592
  ).or(zod6.null()).optional(),
32265
32593
  file_id: zod6.string().describe("The ID of the file to be sent to the model.").or(zod6.null()).optional(),
@@ -32273,8 +32601,8 @@ var createRealtimeClientSecretResponse = zod6.object({
32273
32601
  file_id: zod6.string().describe("The ID of the file to be sent to the model.").or(zod6.null()).optional(),
32274
32602
  filename: zod6.string().optional().describe("The name of the file to be sent to the model."),
32275
32603
  file_data: zod6.string().optional().describe("The content of the file to be sent to the model.\n"),
32276
- file_url: zod6.string().optional().describe("The URL of the file to be sent to the model."),
32277
- detail: zod6.enum(["low", "high"])
32604
+ file_url: zod6.string().url().optional().describe("The URL of the file to be sent to the model."),
32605
+ detail: zod6.enum(["low", "high"]).optional()
32278
32606
  }).describe("A file input to the model.")
32279
32607
  )
32280
32608
  ).describe(
@@ -32283,9 +32611,7 @@ var createRealtimeClientSecretResponse = zod6.object({
32283
32611
  }).describe(
32284
32612
  "Reference to a prompt template and its variables.\n[Learn more](/docs/guides/text?api-mode=responses#reusable-prompts).\n"
32285
32613
  ).or(zod6.null()).optional()
32286
- }).describe(
32287
- "A new Realtime session configuration, with an ephemeral key. Default TTL\nfor keys is one minute.\n"
32288
- ),
32614
+ }).describe("A Realtime session configuration object.\n"),
32289
32615
  zod6.object({
32290
32616
  type: zod6.enum(["transcription"]).describe(
32291
32617
  "The type of session. Always `transcription` for transcription sessions.\n"
@@ -32301,15 +32627,15 @@ var createRealtimeClientSecretResponse = zod6.object({
32301
32627
  audio: zod6.object({
32302
32628
  input: zod6.object({
32303
32629
  format: zod6.object({
32304
- type: zod6.enum(["audio/pcm"]).describe("The audio format. Always `audio/pcm`."),
32305
- rate: zod6.literal(24e3).describe("The sample rate of the audio. Always `24000`.")
32630
+ type: zod6.enum(["audio/pcm"]).optional().describe("The audio format. Always `audio/pcm`."),
32631
+ rate: zod6.literal(24e3).optional().describe("The sample rate of the audio. Always `24000`.")
32306
32632
  }).describe("The PCM audio format. Only a 24kHz sample rate is supported.").or(
32307
32633
  zod6.object({
32308
- type: zod6.enum(["audio/pcmu"]).describe("The audio format. Always `audio/pcmu`.")
32634
+ type: zod6.enum(["audio/pcmu"]).optional().describe("The audio format. Always `audio/pcmu`.")
32309
32635
  }).describe("The G.711 \u03BC-law format.")
32310
32636
  ).or(
32311
32637
  zod6.object({
32312
- type: zod6.enum(["audio/pcma"]).describe("The audio format. Always `audio/pcma`.")
32638
+ type: zod6.enum(["audio/pcma"]).optional().describe("The audio format. Always `audio/pcma`.")
32313
32639
  }).describe("The G.711 A-law format.")
32314
32640
  ).optional(),
32315
32641
  transcription: zod6.object({
@@ -32319,20 +32645,19 @@ var createRealtimeClientSecretResponse = zod6.object({
32319
32645
  "gpt-4o-mini-transcribe",
32320
32646
  "gpt-4o-mini-transcribe-2025-12-15",
32321
32647
  "gpt-4o-transcribe",
32322
- "gpt-4o-transcribe-diarize"
32648
+ "gpt-4o-transcribe-diarize",
32649
+ "gpt-realtime-whisper"
32323
32650
  ])
32324
32651
  ).optional().describe(
32325
- "The model to use for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`, and `gpt-4o-transcribe-diarize`. Use `gpt-4o-transcribe-diarize` when you need diarization with speaker labels.\n"
32326
- ),
32327
- language: zod6.string().optional().describe(
32328
- "The language of the input audio. Supplying the input language in\n[ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`) format\nwill improve accuracy and latency.\n"
32652
+ "The model used for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`, `gpt-4o-transcribe-diarize`, and `gpt-realtime-whisper`.\n"
32329
32653
  ),
32654
+ language: zod6.string().optional().describe("The language of the input audio.\n"),
32330
32655
  prompt: zod6.string().optional().describe(
32331
- 'An optional text to guide the model\'s style or continue a previous audio\nsegment.\nFor `whisper-1`, the [prompt is a list of keywords](/docs/guides/speech-to-text#prompting).\nFor `gpt-4o-transcribe` models (excluding `gpt-4o-transcribe-diarize`), the prompt is a free text string, for example "expect words related to technology".\n'
32656
+ "The prompt configured for input audio transcription, when present.\n"
32332
32657
  )
32333
32658
  }).optional(),
32334
32659
  noise_reduction: zod6.object({
32335
- type: zod6.enum(["near_field", "far_field"]).describe(
32660
+ type: zod6.enum(["near_field", "far_field"]).optional().describe(
32336
32661
  "Type of noise reduction. `near_field` is for close-talking microphones such as headphones, `far_field` is for far-field microphones such as laptop or conference room microphones.\n"
32337
32662
  )
32338
32663
  }).optional().describe("Configuration for input audio noise reduction.\n"),
@@ -32349,8 +32674,10 @@ var createRealtimeClientSecretResponse = zod6.object({
32349
32674
  silence_duration_ms: zod6.number().optional().describe(
32350
32675
  "Duration of silence to detect speech stop (in milliseconds). Defaults\nto 500ms. With shorter values the model will respond more quickly,\nbut may jump in on short pauses from the user.\n"
32351
32676
  )
32352
- }).optional().describe(
32353
- "Configuration for turn detection. Can be set to `null` to turn off. Server\nVAD means that the model will detect the start and end of speech based on\naudio volume and respond at the end of user speech.\n"
32677
+ }).describe(
32678
+ "Configuration for turn detection. Can be set to `null` to turn off. Server\nVAD means that the model will detect the start and end of speech based on\naudio volume and respond at the end of user speech. For `gpt-realtime-whisper`, this must be `null`; VAD is not supported.\n"
32679
+ ).or(zod6.null()).optional().describe(
32680
+ "Configuration for turn detection. For `gpt-realtime-whisper`, this must be `null`; VAD is not supported.\n"
32354
32681
  )
32355
32682
  }).optional()
32356
32683
  }).optional().describe("Configuration for input audio for the session.\n")
@@ -32490,7 +32817,7 @@ var createRealtimeSessionBody = zod6.object({
32490
32817
  ).or(
32491
32818
  zod6.object({
32492
32819
  type: zod6.enum(["input_image"]).describe("The type of the input item. Always `input_image`."),
32493
- image_url: zod6.string().describe(
32820
+ image_url: zod6.string().url().describe(
32494
32821
  "The URL of the image to be sent to the model. A fully qualified URL or base64 encoded image in a data URL."
32495
32822
  ).or(zod6.null()).optional(),
32496
32823
  file_id: zod6.string().describe("The ID of the file to be sent to the model.").or(zod6.null()).optional(),
@@ -32504,7 +32831,7 @@ var createRealtimeSessionBody = zod6.object({
32504
32831
  file_id: zod6.string().describe("The ID of the file to be sent to the model.").or(zod6.null()).optional(),
32505
32832
  filename: zod6.string().optional().describe("The name of the file to be sent to the model."),
32506
32833
  file_data: zod6.string().optional().describe("The content of the file to be sent to the model.\n"),
32507
- file_url: zod6.string().optional().describe("The URL of the file to be sent to the model."),
32834
+ file_url: zod6.string().url().optional().describe("The URL of the file to be sent to the model."),
32508
32835
  detail: zod6.enum(["low", "high"]).optional()
32509
32836
  }).describe("A file input to the model.")
32510
32837
  )
@@ -32553,17 +32880,14 @@ var createRealtimeSessionResponse = zod6.object({
32553
32880
  "gpt-4o-mini-transcribe",
32554
32881
  "gpt-4o-mini-transcribe-2025-12-15",
32555
32882
  "gpt-4o-transcribe",
32556
- "gpt-4o-transcribe-diarize"
32883
+ "gpt-4o-transcribe-diarize",
32884
+ "gpt-realtime-whisper"
32557
32885
  ])
32558
32886
  ).optional().describe(
32559
- "The model to use for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`, and `gpt-4o-transcribe-diarize`. Use `gpt-4o-transcribe-diarize` when you need diarization with speaker labels.\n"
32560
- ),
32561
- language: zod6.string().optional().describe(
32562
- "The language of the input audio. Supplying the input language in\n[ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`) format\nwill improve accuracy and latency.\n"
32887
+ "The model used for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`, `gpt-4o-transcribe-diarize`, and `gpt-realtime-whisper`.\n"
32563
32888
  ),
32564
- prompt: zod6.string().optional().describe(
32565
- 'An optional text to guide the model\'s style or continue a previous audio\nsegment.\nFor `whisper-1`, the [prompt is a list of keywords](/docs/guides/speech-to-text#prompting).\nFor `gpt-4o-transcribe` models (excluding `gpt-4o-transcribe-diarize`), the prompt is a free text string, for example "expect words related to technology".\n'
32566
- )
32889
+ language: zod6.string().optional().describe("The language of the input audio.\n"),
32890
+ prompt: zod6.string().optional().describe("The prompt configured for input audio transcription, when present.\n")
32567
32891
  }).optional(),
32568
32892
  noise_reduction: zod6.object({
32569
32893
  type: zod6.enum(["near_field", "far_field"]).optional().describe(
@@ -32689,16 +33013,20 @@ var createRealtimeTranscriptionSessionBody = zod6.object({
32689
33013
  "gpt-4o-mini-transcribe",
32690
33014
  "gpt-4o-mini-transcribe-2025-12-15",
32691
33015
  "gpt-4o-transcribe",
32692
- "gpt-4o-transcribe-diarize"
33016
+ "gpt-4o-transcribe-diarize",
33017
+ "gpt-realtime-whisper"
32693
33018
  ])
32694
33019
  ).optional().describe(
32695
- "The model to use for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`, and `gpt-4o-transcribe-diarize`. Use `gpt-4o-transcribe-diarize` when you need diarization with speaker labels.\n"
33020
+ "The model to use for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`, `gpt-4o-transcribe-diarize`, and `gpt-realtime-whisper`. Use `gpt-4o-transcribe-diarize` when you need diarization with speaker labels.\n"
32696
33021
  ),
32697
33022
  language: zod6.string().optional().describe(
32698
33023
  "The language of the input audio. Supplying the input language in\n[ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`) format\nwill improve accuracy and latency.\n"
32699
33024
  ),
32700
33025
  prompt: zod6.string().optional().describe(
32701
- 'An optional text to guide the model\'s style or continue a previous audio\nsegment.\nFor `whisper-1`, the [prompt is a list of keywords](/docs/guides/speech-to-text#prompting).\nFor `gpt-4o-transcribe` models (excluding `gpt-4o-transcribe-diarize`), the prompt is a free text string, for example "expect words related to technology".\n'
33026
+ 'An optional text to guide the model\'s style or continue a previous audio\nsegment.\nFor `whisper-1`, the [prompt is a list of keywords](/docs/guides/speech-to-text#prompting).\nFor `gpt-4o-transcribe` models (excluding `gpt-4o-transcribe-diarize`), the prompt is a free text string, for example "expect words related to technology".\nPrompt is not supported with `gpt-realtime-whisper` in GA Realtime sessions.\n'
33027
+ ),
33028
+ delay: zod6.enum(["minimal", "low", "medium", "high", "xhigh"]).optional().describe(
33029
+ "Controls how long the model waits before emitting transcription text.\nHigher values can improve transcription accuracy at the cost of latency.\nOnly supported with `gpt-realtime-whisper` in GA Realtime sessions.\n"
32702
33030
  )
32703
33031
  }).optional(),
32704
33032
  include: zod6.array(zod6.enum(["item.input_audio_transcription.logprobs"])).optional().describe(
@@ -32727,17 +33055,14 @@ var createRealtimeTranscriptionSessionResponse = zod6.object({
32727
33055
  "gpt-4o-mini-transcribe",
32728
33056
  "gpt-4o-mini-transcribe-2025-12-15",
32729
33057
  "gpt-4o-transcribe",
32730
- "gpt-4o-transcribe-diarize"
33058
+ "gpt-4o-transcribe-diarize",
33059
+ "gpt-realtime-whisper"
32731
33060
  ])
32732
33061
  ).optional().describe(
32733
- "The model to use for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`, and `gpt-4o-transcribe-diarize`. Use `gpt-4o-transcribe-diarize` when you need diarization with speaker labels.\n"
33062
+ "The model used for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`, `gpt-4o-transcribe-diarize`, and `gpt-realtime-whisper`.\n"
32734
33063
  ),
32735
- language: zod6.string().optional().describe(
32736
- "The language of the input audio. Supplying the input language in\n[ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`) format\nwill improve accuracy and latency.\n"
32737
- ),
32738
- prompt: zod6.string().optional().describe(
32739
- 'An optional text to guide the model\'s style or continue a previous audio\nsegment.\nFor `whisper-1`, the [prompt is a list of keywords](/docs/guides/speech-to-text#prompting).\nFor `gpt-4o-transcribe` models (excluding `gpt-4o-transcribe-diarize`), the prompt is a free text string, for example "expect words related to technology".\n'
32740
- )
33064
+ language: zod6.string().optional().describe("The language of the input audio.\n"),
33065
+ prompt: zod6.string().optional().describe("The prompt configured for input audio transcription, when present.\n")
32741
33066
  }).optional(),
32742
33067
  turn_detection: zod6.object({
32743
33068
  type: zod6.string().optional().describe("Type of turn detection, only `server_vad` is currently supported.\n"),
@@ -36122,6 +36447,7 @@ __export(sonioxPublicAPI_zod_exports, {
36122
36447
  createTranscriptionBodyWebhookUrlRegExpOne: () => createTranscriptionBodyWebhookUrlRegExpOne,
36123
36448
  deleteFileParams: () => deleteFileParams,
36124
36449
  deleteTranscriptionParams: () => deleteTranscriptionParams,
36450
+ getConcurrencyLimitsResponse: () => getConcurrencyLimitsResponse,
36125
36451
  getFileParams: () => getFileParams,
36126
36452
  getFileResponse: () => getFileResponse,
36127
36453
  getFilesCountResponse: () => getFilesCountResponse,
@@ -36139,6 +36465,12 @@ __export(sonioxPublicAPI_zod_exports, {
36139
36465
  getTranscriptionsQueryLimitMax: () => getTranscriptionsQueryLimitMax,
36140
36466
  getTranscriptionsQueryParams: () => getTranscriptionsQueryParams,
36141
36467
  getTranscriptionsResponse: () => getTranscriptionsResponse,
36468
+ getTtsModelsResponse: () => getTtsModelsResponse,
36469
+ getUsageLogsQueryLimitDefault: () => getUsageLogsQueryLimitDefault,
36470
+ getUsageLogsQueryLimitMax: () => getUsageLogsQueryLimitMax,
36471
+ getUsageLogsQueryParams: () => getUsageLogsQueryParams,
36472
+ getUsageLogsQuerySortDefault: () => getUsageLogsQuerySortDefault,
36473
+ getUsageLogsResponse: () => getUsageLogsResponse,
36142
36474
  uploadFileBody: () => uploadFileBody,
36143
36475
  uploadFileBodyClientReferenceIdMaxOne: () => uploadFileBodyClientReferenceIdMaxOne
36144
36476
  });
@@ -36389,11 +36721,73 @@ var getModelsResponse = zod10.object({
36389
36721
  })
36390
36722
  ).describe("List of available models and their attributes.")
36391
36723
  });
36724
+ var getTtsModelsResponse = zod10.object({
36725
+ models: zod10.array(
36726
+ zod10.object({
36727
+ id: zod10.string().describe("Unique identifier of the model."),
36728
+ aliased_model_id: zod10.string().or(zod10.null()).describe("If this is an alias, the id of the aliased model."),
36729
+ name: zod10.string().describe("Name of the model."),
36730
+ voices: zod10.array(
36731
+ zod10.object({
36732
+ id: zod10.string().describe("Unique identifier of the voice."),
36733
+ description: zod10.string().describe("Description of the TTS voice."),
36734
+ gender: zod10.enum(["male", "female", "neutral"])
36735
+ })
36736
+ ).describe("List of available voices for this model."),
36737
+ languages: zod10.array(
36738
+ zod10.object({
36739
+ code: zod10.string().describe("2-letter language code."),
36740
+ name: zod10.string().describe("Language name.")
36741
+ })
36742
+ ).describe("List of languages supported by the model.")
36743
+ })
36744
+ ).describe("List of available TTS models and their attributes.")
36745
+ });
36746
+ var getUsageLogsQueryLimitDefault = 1e3;
36747
+ var getUsageLogsQueryLimitMax = 1e3;
36748
+ var getUsageLogsQuerySortDefault = "end_time_asc";
36749
+ var getUsageLogsQueryParams = zod10.object({
36750
+ start_time: zod10.string().describe("Start of the time window (inclusive). Filters by request end time."),
36751
+ end_time: zod10.string().describe("End of the time window (exclusive). Filters by request end time."),
36752
+ limit: zod10.number().min(1).max(getUsageLogsQueryLimitMax).default(getUsageLogsQueryLimitDefault).describe("Maximum number of usage log entries to return."),
36753
+ sort: zod10.enum(["end_time_asc", "end_time_desc"]).default(getUsageLogsQuerySortDefault).describe(
36754
+ "Sort order by end_time.Use `end_time_desc` to get the most recent entries first. When paginating, pass the same `sort` value alongside the cursor."
36755
+ ),
36756
+ cursor: zod10.string().or(zod10.null()).optional().describe("Pagination cursor for the next page of results.")
36757
+ });
36758
+ var getUsageLogsResponse = zod10.object({
36759
+ usage_logs: zod10.array(
36760
+ zod10.object({
36761
+ uuid: zod10.string().uuid().describe("Unique identifier of the request."),
36762
+ request_scope: zod10.string().describe("Scope of the request (api / playground)."),
36763
+ client_reference_id: zod10.string().describe("Client reference ID supplied on the original request. Empty string if none."),
36764
+ model: zod10.string().describe("Model identifier."),
36765
+ start_time: zod10.string().datetime({}).describe("When the request started."),
36766
+ end_time: zod10.string().datetime({}).describe("When the request ended."),
36767
+ input_text_tokens: zod10.number(),
36768
+ input_audio_tokens: zod10.number(),
36769
+ input_audio_duration_ms: zod10.number(),
36770
+ output_text_tokens: zod10.number(),
36771
+ output_audio_tokens: zod10.number(),
36772
+ output_audio_duration_ms: zod10.number(),
36773
+ cost_usd: zod10.string(),
36774
+ input_cost_usd: zod10.string(),
36775
+ input_text_cost_usd: zod10.string(),
36776
+ input_audio_cost_usd: zod10.string(),
36777
+ output_cost_usd: zod10.string(),
36778
+ output_text_cost_usd: zod10.string(),
36779
+ output_audio_cost_usd: zod10.string()
36780
+ })
36781
+ ).describe("Per-request usage log entries ordered by end_time, uuid (per `sort`)."),
36782
+ next_page_cursor: zod10.string().or(zod10.null()).optional().describe(
36783
+ "A pagination token that references the next page of results. When more data is available, this field contains a value to pass in the cursor parameter of a subsequent request. When null, no additional results are available."
36784
+ )
36785
+ });
36392
36786
  var createTemporaryApiKeyBodyExpiresInSecondsMax = 3600;
36393
36787
  var createTemporaryApiKeyBodyClientReferenceIdMaxOne = 256;
36394
36788
  var createTemporaryApiKeyBodyMaxSessionDurationSecondsMaxOne = 18e3;
36395
36789
  var createTemporaryApiKeyBody = zod10.object({
36396
- usage_type: zod10.enum(["transcribe_websocket"]),
36790
+ usage_type: zod10.enum(["transcribe_websocket", "tts_rt"]),
36397
36791
  expires_in_seconds: zod10.number().min(1).max(createTemporaryApiKeyBodyExpiresInSecondsMax).describe("Duration in seconds until the temporary API key expires."),
36398
36792
  client_reference_id: zod10.string().max(createTemporaryApiKeyBodyClientReferenceIdMaxOne).or(zod10.null()).optional().describe("Optional tracking identifier string. Does not need to be unique."),
36399
36793
  single_use: zod10.boolean().or(zod10.null()).optional().describe("If true, the temporary API key can be used only once."),
@@ -36401,6 +36795,28 @@ var createTemporaryApiKeyBody = zod10.object({
36401
36795
  "Maximum WebSocket connection duration in seconds. If exceeded, the connection will be dropped. If not set, no limit is applied."
36402
36796
  )
36403
36797
  });
36798
+ var getConcurrencyLimitsResponse = zod10.object({
36799
+ project: zod10.object({
36800
+ current: zod10.object({
36801
+ transcribe_concurrent: zod10.number(),
36802
+ tts_concurrent: zod10.number()
36803
+ }).describe("Live counts read from Redis"),
36804
+ limits: zod10.object({
36805
+ transcribe_concurrent: zod10.number().or(zod10.null()),
36806
+ tts_concurrent: zod10.number().or(zod10.null())
36807
+ }).describe("Configured limits")
36808
+ }),
36809
+ organization: zod10.object({
36810
+ current: zod10.object({
36811
+ transcribe_concurrent: zod10.number(),
36812
+ tts_concurrent: zod10.number()
36813
+ }).describe("Live counts read from Redis"),
36814
+ limits: zod10.object({
36815
+ transcribe_concurrent: zod10.number().or(zod10.null()),
36816
+ tts_concurrent: zod10.number().or(zod10.null())
36817
+ }).describe("Configured limits")
36818
+ })
36819
+ });
36404
36820
 
36405
36821
  // src/generated/soniox/streaming-types.zod.ts
36406
36822
  var streaming_types_zod_exports = {};
@@ -36485,10 +36901,10 @@ var sonioxStructuredContextSchema = zod11.object({
36485
36901
  var sonioxContextSchema = zod11.union([sonioxStructuredContextSchema, zod11.string()]);
36486
36902
  var sonioxRealtimeModelSchema = zod11.enum([
36487
36903
  "stt-rt-v4",
36488
- "stt-rt-v3",
36489
36904
  "stt-rt-preview",
36490
36905
  "stt-rt-v3-preview",
36491
- "stt-rt-preview-v2"
36906
+ "stt-rt-preview-v2",
36907
+ "stt-rt-v3"
36492
36908
  ]);
36493
36909
  var streamingTranscriberParams3 = zod11.object({
36494
36910
  model: sonioxRealtimeModelSchema,
@@ -36496,12 +36912,16 @@ var streamingTranscriberParams3 = zod11.object({
36496
36912
  sampleRate: zod11.number().optional(),
36497
36913
  numChannels: zod11.number().optional(),
36498
36914
  languageHints: zod11.array(zod11.string()).optional(),
36915
+ languageHintsStrict: zod11.boolean().optional(),
36499
36916
  context: sonioxContextSchema.optional(),
36500
36917
  enableSpeakerDiarization: zod11.boolean().optional(),
36501
36918
  enableLanguageIdentification: zod11.boolean().optional(),
36502
36919
  enableEndpointDetection: zod11.boolean().optional(),
36920
+ maxEndpointDelayMs: zod11.number().optional(),
36503
36921
  translation: sonioxTranslationConfigSchema.optional(),
36504
- clientReferenceId: zod11.string().optional()
36922
+ clientReferenceId: zod11.string().optional(),
36923
+ keepaliveIntervalMs: zod11.number().optional(),
36924
+ connectTimeoutMs: zod11.number().optional()
36505
36925
  });
36506
36926
  var sonioxTranslationStatusSchema = zod11.enum(["original", "translation", "none"]);
36507
36927
  var sonioxTokenSchema = zod11.object({
@@ -37093,6 +37513,7 @@ __export(schema_exports5, {
37093
37513
  V1ListenPostParametersCallbackMethod: () => V1ListenPostParametersCallbackMethod,
37094
37514
  V1ListenPostParametersCustomIntentMode: () => V1ListenPostParametersCustomIntentMode,
37095
37515
  V1ListenPostParametersCustomTopicMode: () => V1ListenPostParametersCustomTopicMode,
37516
+ V1ListenPostParametersDiarizeModel: () => V1ListenPostParametersDiarizeModel,
37096
37517
  V1ListenPostParametersEncoding: () => V1ListenPostParametersEncoding,
37097
37518
  V1ListenPostParametersModel0: () => V1ListenPostParametersModel0,
37098
37519
  V1ListenPostParametersRedactSchemaOneOf1Items: () => V1ListenPostParametersRedactSchemaOneOf1Items,
@@ -37131,6 +37552,13 @@ __export(schema_exports5, {
37131
37552
  V1SpeakPostParametersSampleRate4: () => V1SpeakPostParametersSampleRate4
37132
37553
  });
37133
37554
 
37555
+ // src/generated/deepgram/schema/v1ListenPostParametersDiarizeModel.ts
37556
+ var V1ListenPostParametersDiarizeModel = {
37557
+ latest: "latest",
37558
+ v1: "v1",
37559
+ v2: "v2"
37560
+ };
37561
+
37134
37562
  // src/generated/deepgram/schema/v1ListenPostParametersModel0.ts
37135
37563
  var V1ListenPostParametersModel0 = {
37136
37564
  "nova-3": "nova-3",
@@ -37347,6 +37775,7 @@ var V1SpeakPostParametersSampleRate = {
37347
37775
  var schema_exports6 = {};
37348
37776
  __export(schema_exports6, {
37349
37777
  AudioResponseFormat: () => AudioResponseFormat,
37778
+ AudioTranscriptionDelay: () => AudioTranscriptionDelay,
37350
37779
  CreateSpeechRequestResponseFormat: () => CreateSpeechRequestResponseFormat,
37351
37780
  CreateSpeechRequestStreamFormat: () => CreateSpeechRequestStreamFormat,
37352
37781
  CreateTranscriptionRequestTimestampGranularitiesItem: () => CreateTranscriptionRequestTimestampGranularitiesItem,
@@ -37366,12 +37795,14 @@ __export(schema_exports6, {
37366
37795
  RealtimeAudioFormatsAnyOfType: () => RealtimeAudioFormatsAnyOfType,
37367
37796
  RealtimeCreateClientSecretRequestExpiresAfterAnchor: () => RealtimeCreateClientSecretRequestExpiresAfterAnchor,
37368
37797
  RealtimeFunctionToolType: () => RealtimeFunctionToolType,
37798
+ RealtimeReasoningEffort: () => RealtimeReasoningEffort,
37369
37799
  RealtimeSessionCreateRequestGAIncludeItem: () => RealtimeSessionCreateRequestGAIncludeItem,
37370
37800
  RealtimeSessionCreateRequestGAOutputModalitiesItem: () => RealtimeSessionCreateRequestGAOutputModalitiesItem,
37371
37801
  RealtimeSessionCreateRequestGAType: () => RealtimeSessionCreateRequestGAType,
37372
37802
  RealtimeSessionCreateRequestModalitiesItem: () => RealtimeSessionCreateRequestModalitiesItem,
37373
37803
  RealtimeSessionCreateRequestToolsItemType: () => RealtimeSessionCreateRequestToolsItemType,
37374
37804
  RealtimeSessionCreateResponseGAIncludeItem: () => RealtimeSessionCreateResponseGAIncludeItem,
37805
+ RealtimeSessionCreateResponseGAObject: () => RealtimeSessionCreateResponseGAObject,
37375
37806
  RealtimeSessionCreateResponseGAOutputModalitiesItem: () => RealtimeSessionCreateResponseGAOutputModalitiesItem,
37376
37807
  RealtimeSessionCreateResponseGAType: () => RealtimeSessionCreateResponseGAType,
37377
37808
  RealtimeSessionCreateResponseIncludeItem: () => RealtimeSessionCreateResponseIncludeItem,
@@ -37402,6 +37833,15 @@ __export(schema_exports6, {
37402
37833
  VoiceResourceObject: () => VoiceResourceObject
37403
37834
  });
37404
37835
 
37836
+ // src/generated/openai/schema/audioTranscriptionDelay.ts
37837
+ var AudioTranscriptionDelay = {
37838
+ minimal: "minimal",
37839
+ low: "low",
37840
+ medium: "medium",
37841
+ high: "high",
37842
+ xhigh: "xhigh"
37843
+ };
37844
+
37405
37845
  // src/generated/openai/schema/createSpeechRequestResponseFormat.ts
37406
37846
  var CreateSpeechRequestResponseFormat = {
37407
37847
  mp3: "mp3",
@@ -37514,6 +37954,15 @@ var RealtimeFunctionToolType = {
37514
37954
  function: "function"
37515
37955
  };
37516
37956
 
37957
+ // src/generated/openai/schema/realtimeReasoningEffort.ts
37958
+ var RealtimeReasoningEffort = {
37959
+ minimal: "minimal",
37960
+ low: "low",
37961
+ medium: "medium",
37962
+ high: "high",
37963
+ xhigh: "xhigh"
37964
+ };
37965
+
37517
37966
  // src/generated/openai/schema/realtimeSessionCreateRequestGAIncludeItem.ts
37518
37967
  var RealtimeSessionCreateRequestGAIncludeItem = {
37519
37968
  iteminput_audio_transcriptionlogprobs: "item.input_audio_transcription.logprobs"
@@ -37546,6 +37995,11 @@ var RealtimeSessionCreateResponseGAIncludeItem = {
37546
37995
  iteminput_audio_transcriptionlogprobs: "item.input_audio_transcription.logprobs"
37547
37996
  };
37548
37997
 
37998
+ // src/generated/openai/schema/realtimeSessionCreateResponseGAObject.ts
37999
+ var RealtimeSessionCreateResponseGAObject = {
38000
+ realtimesession: "realtime.session"
38001
+ };
38002
+
37549
38003
  // src/generated/openai/schema/realtimeSessionCreateResponseGAOutputModalitiesItem.ts
37550
38004
  var RealtimeSessionCreateResponseGAOutputModalitiesItem = {
37551
38005
  text: "text",
@@ -37690,6 +38144,7 @@ __export(schema_exports7, {
37690
38144
  AutoChaptersResultErrorType: () => AutoChaptersResultErrorType,
37691
38145
  ErrorResponseError: () => ErrorResponseError,
37692
38146
  GetJobsJobidAlignmentTags: () => GetJobsJobidAlignmentTags,
38147
+ GetJobsJobidObjectUrlsUrlForItem: () => GetJobsJobidObjectUrlsUrlForItem,
37693
38148
  GetJobsJobidTranscriptFormat: () => GetJobsJobidTranscriptFormat,
37694
38149
  JobDetailsStatus: () => JobDetailsStatus,
37695
38150
  JobMode: () => JobMode,
@@ -37759,6 +38214,13 @@ var GetJobsJobidAlignmentTags = {
37759
38214
  one_per_line: "one_per_line"
37760
38215
  };
37761
38216
 
38217
+ // src/generated/speechmatics/schema/getJobsJobidObjectUrlsUrlForItem.ts
38218
+ var GetJobsJobidObjectUrlsUrlForItem = {
38219
+ data: "data",
38220
+ audio_mp3: "audio_mp3",
38221
+ transcript: "transcript"
38222
+ };
38223
+
37762
38224
  // src/generated/speechmatics/schema/getJobsJobidTranscriptFormat.ts
37763
38225
  var GetJobsJobidTranscriptFormat = {
37764
38226
  "json-v2": "json-v2",
@@ -37875,6 +38337,19 @@ var WrittenFormRecognitionResultType = {
37875
38337
  word: "word"
37876
38338
  };
37877
38339
 
38340
+ // src/generated/soniox/sdk-types.ts
38341
+ var sdk_types_exports = {};
38342
+ __export(sdk_types_exports, {
38343
+ RealtimeSttSession: () => RealtimeSttSession,
38344
+ SonioxFetchHttpClient: () => FetchHttpClient,
38345
+ SonioxNodeClient: () => SonioxNodeClient
38346
+ });
38347
+ import {
38348
+ FetchHttpClient,
38349
+ RealtimeSttSession,
38350
+ SonioxNodeClient
38351
+ } from "@soniox/node";
38352
+
37878
38353
  // src/generated/elevenlabs/schema/index.ts
37879
38354
  var schema_exports8 = {};
37880
38355
  __export(schema_exports8, {
@@ -37952,6 +38427,10 @@ __export(speechmaticsASRRESTAPI_zod_exports, {
37952
38427
  deleteJobsJobidParams: () => deleteJobsJobidParams,
37953
38428
  deleteJobsJobidQueryParams: () => deleteJobsJobidQueryParams,
37954
38429
  deleteJobsJobidResponse: () => deleteJobsJobidResponse,
38430
+ deleteJobsJobidResponseJobConfigSummarizationConfigContentTypeDefault: () => deleteJobsJobidResponseJobConfigSummarizationConfigContentTypeDefault,
38431
+ deleteJobsJobidResponseJobConfigSummarizationConfigSummaryLengthDefault: () => deleteJobsJobidResponseJobConfigSummarizationConfigSummaryLengthDefault,
38432
+ deleteJobsJobidResponseJobConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMax: () => deleteJobsJobidResponseJobConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMax,
38433
+ deleteJobsJobidResponseJobConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMin: () => deleteJobsJobidResponseJobConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMin,
37955
38434
  deleteJobsJobidResponseJobConfigTranscriptionConfigChannelDiarizationLabelsItemRegExp: () => deleteJobsJobidResponseJobConfigTranscriptionConfigChannelDiarizationLabelsItemRegExp,
37956
38435
  deleteJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesPermittedMarksItemRegExp: () => deleteJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesPermittedMarksItemRegExp,
37957
38436
  deleteJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesSensitivityMax: () => deleteJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesSensitivityMax,
@@ -37967,8 +38446,15 @@ __export(speechmaticsASRRESTAPI_zod_exports, {
37967
38446
  getJobsJobidDataResponse: () => getJobsJobidDataResponse,
37968
38447
  getJobsJobidLogParams: () => getJobsJobidLogParams,
37969
38448
  getJobsJobidLogResponse: () => getJobsJobidLogResponse,
38449
+ getJobsJobidObjectUrlsParams: () => getJobsJobidObjectUrlsParams,
38450
+ getJobsJobidObjectUrlsQueryParams: () => getJobsJobidObjectUrlsQueryParams,
38451
+ getJobsJobidObjectUrlsResponse: () => getJobsJobidObjectUrlsResponse,
37970
38452
  getJobsJobidParams: () => getJobsJobidParams,
37971
38453
  getJobsJobidResponse: () => getJobsJobidResponse,
38454
+ getJobsJobidResponseJobConfigSummarizationConfigContentTypeDefault: () => getJobsJobidResponseJobConfigSummarizationConfigContentTypeDefault,
38455
+ getJobsJobidResponseJobConfigSummarizationConfigSummaryLengthDefault: () => getJobsJobidResponseJobConfigSummarizationConfigSummaryLengthDefault,
38456
+ getJobsJobidResponseJobConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMax: () => getJobsJobidResponseJobConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMax,
38457
+ getJobsJobidResponseJobConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMin: () => getJobsJobidResponseJobConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMin,
37972
38458
  getJobsJobidResponseJobConfigTranscriptionConfigChannelDiarizationLabelsItemRegExp: () => getJobsJobidResponseJobConfigTranscriptionConfigChannelDiarizationLabelsItemRegExp,
37973
38459
  getJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesPermittedMarksItemRegExp: () => getJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesPermittedMarksItemRegExp,
37974
38460
  getJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesSensitivityMax: () => getJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesSensitivityMax,
@@ -37983,6 +38469,8 @@ __export(speechmaticsASRRESTAPI_zod_exports, {
37983
38469
  getJobsJobidTranscriptQueryParams: () => getJobsJobidTranscriptQueryParams,
37984
38470
  getJobsJobidTranscriptResponse: () => getJobsJobidTranscriptResponse,
37985
38471
  getJobsJobidTranscriptResponseJobDurationMin: () => getJobsJobidTranscriptResponseJobDurationMin,
38472
+ getJobsJobidTranscriptResponseMetadataTranscriptionConfigAudioFilteringConfigVolumeThresholdMax: () => getJobsJobidTranscriptResponseMetadataTranscriptionConfigAudioFilteringConfigVolumeThresholdMax,
38473
+ getJobsJobidTranscriptResponseMetadataTranscriptionConfigAudioFilteringConfigVolumeThresholdMin: () => getJobsJobidTranscriptResponseMetadataTranscriptionConfigAudioFilteringConfigVolumeThresholdMin,
37986
38474
  getJobsJobidTranscriptResponseMetadataTranscriptionConfigChannelDiarizationLabelsItemRegExp: () => getJobsJobidTranscriptResponseMetadataTranscriptionConfigChannelDiarizationLabelsItemRegExp,
37987
38475
  getJobsJobidTranscriptResponseMetadataTranscriptionConfigPunctuationOverridesPermittedMarksItemRegExp: () => getJobsJobidTranscriptResponseMetadataTranscriptionConfigPunctuationOverridesPermittedMarksItemRegExp,
37988
38476
  getJobsJobidTranscriptResponseMetadataTranscriptionConfigPunctuationOverridesSensitivityMax: () => getJobsJobidTranscriptResponseMetadataTranscriptionConfigPunctuationOverridesSensitivityMax,
@@ -37994,6 +38482,10 @@ __export(speechmaticsASRRESTAPI_zod_exports, {
37994
38482
  getJobsQueryLimitMax: () => getJobsQueryLimitMax,
37995
38483
  getJobsQueryParams: () => getJobsQueryParams,
37996
38484
  getJobsResponse: () => getJobsResponse,
38485
+ getJobsResponseJobsItemConfigSummarizationConfigContentTypeDefault: () => getJobsResponseJobsItemConfigSummarizationConfigContentTypeDefault,
38486
+ getJobsResponseJobsItemConfigSummarizationConfigSummaryLengthDefault: () => getJobsResponseJobsItemConfigSummarizationConfigSummaryLengthDefault,
38487
+ getJobsResponseJobsItemConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMax: () => getJobsResponseJobsItemConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMax,
38488
+ getJobsResponseJobsItemConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMin: () => getJobsResponseJobsItemConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMin,
37997
38489
  getJobsResponseJobsItemConfigTranscriptionConfigChannelDiarizationLabelsItemRegExp: () => getJobsResponseJobsItemConfigTranscriptionConfigChannelDiarizationLabelsItemRegExp,
37998
38490
  getJobsResponseJobsItemConfigTranscriptionConfigPunctuationOverridesPermittedMarksItemRegExp: () => getJobsResponseJobsItemConfigTranscriptionConfigPunctuationOverridesPermittedMarksItemRegExp,
37999
38491
  getJobsResponseJobsItemConfigTranscriptionConfigPunctuationOverridesSensitivityMax: () => getJobsResponseJobsItemConfigTranscriptionConfigPunctuationOverridesSensitivityMax,
@@ -38004,12 +38496,18 @@ __export(speechmaticsASRRESTAPI_zod_exports, {
38004
38496
  getJobsResponseJobsItemDurationMin: () => getJobsResponseJobsItemDurationMin,
38005
38497
  getUsageQueryParams: () => getUsageQueryParams,
38006
38498
  getUsageResponse: () => getUsageResponse,
38007
- postJobsBody: () => postJobsBody
38499
+ postJobsBody: () => postJobsBody,
38500
+ postJobsHeader: () => postJobsHeader
38008
38501
  });
38009
38502
  import { z as zod12 } from "zod";
38503
+ var postJobsHeader = zod12.object({
38504
+ "X-SM-Processing-Data": zod12.string().optional().describe(
38505
+ '**Note**: Only available for on-prem\nJSON dictionary of processing settings for the job worker. Currently supports `parallel_engines` (integer), which controls the number of engines the worker can use in parallel for this job, and `user_id` (string), which is the user id for this job. Example: `{"parallel_engines": 4}`'
38506
+ )
38507
+ });
38010
38508
  var postJobsBody = zod12.object({
38011
38509
  config: zod12.string().describe(
38012
- "JSON containing a `JobConfig` model indicating the type and parameters for the recognition job."
38510
+ "JSON containing a [`JobConfig`](/speech-to-text/batch/input#jobconfig-schema) model indicating the type and parameters for the recognition job."
38013
38511
  ),
38014
38512
  data_file: zod12.instanceof(File).optional().describe(
38015
38513
  "The data file to be processed. Alternatively the data file can be fetched from a url specified in `JobConfig`."
@@ -38031,9 +38529,13 @@ var getJobsResponseJobsItemConfigTranscriptionConfigPunctuationOverridesSensitiv
38031
38529
  var getJobsResponseJobsItemConfigTranscriptionConfigPunctuationOverridesSensitivityMax = 1;
38032
38530
  var getJobsResponseJobsItemConfigTranscriptionConfigPunctuationOverridesPermittedMarksItemRegExp = /^(.|all)$/;
38033
38531
  var getJobsResponseJobsItemConfigTranscriptionConfigChannelDiarizationLabelsItemRegExp = /^[A-Za-z0-9._]+$/;
38532
+ var getJobsResponseJobsItemConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMin = 0;
38533
+ var getJobsResponseJobsItemConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMax = 100;
38034
38534
  var getJobsResponseJobsItemConfigTranscriptionConfigSpeakerDiarizationConfigSpeakerSensitivityMin = 0;
38035
38535
  var getJobsResponseJobsItemConfigTranscriptionConfigSpeakerDiarizationConfigSpeakerSensitivityMax = 1;
38036
38536
  var getJobsResponseJobsItemConfigTranslationConfigTargetLanguagesMax = 5;
38537
+ var getJobsResponseJobsItemConfigSummarizationConfigContentTypeDefault = "auto";
38538
+ var getJobsResponseJobsItemConfigSummarizationConfigSummaryLengthDefault = "brief";
38037
38539
  var getJobsResponse = zod12.object({
38038
38540
  jobs: zod12.array(
38039
38541
  zod12.object({
@@ -38113,19 +38615,30 @@ var getJobsResponse = zod12.object({
38113
38615
  max_delay_mode: zod12.enum(["fixed", "flexible"]).optional().describe(
38114
38616
  "Whether or not to enable flexible endpointing and allow the entity to continue to be spoken."
38115
38617
  ),
38618
+ audio_filtering_config: zod12.object({
38619
+ volume_threshold: zod12.number().min(
38620
+ getJobsResponseJobsItemConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMin
38621
+ ).max(
38622
+ getJobsResponseJobsItemConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMax
38623
+ ).optional().describe(
38624
+ "Controls the lower limit of audio volume at which speech and audio events will be transcribed. If the volume limit is very low, then most sound will be passed to the speech recognition engine. Higher numbers will cut out increasing amounts of sound."
38625
+ )
38626
+ }).optional().describe("Configuration for limiting the transcription of quiet audio."),
38116
38627
  transcript_filtering_config: zod12.object({
38117
38628
  remove_disfluencies: zod12.boolean().optional().describe(
38118
- "If true, words that are identified as disfluencies will be removed from the transcript. If false (default), they are tagged in the transcript as 'disfluency'."
38629
+ "If true, words identified as disfluencies (e.g., 'um', 'uh') will be removed from the transcript. If false (default), they are tagged in the transcript as 'disfluency'."
38119
38630
  ),
38120
38631
  replacements: zod12.array(
38121
38632
  zod12.object({
38122
- from: zod12.string(),
38123
- to: zod12.string()
38633
+ from: zod12.string().describe("The text or pattern identified to be replaced."),
38634
+ to: zod12.string().describe(
38635
+ "The corrected or formatted string to appear in the transcript."
38636
+ )
38124
38637
  })
38125
38638
  ).optional().describe(
38126
- "A list of replacements to apply to the transcript. Each replacement is a pair of strings, where the first string is the pattern to be replaced and the second string is the replacement text."
38639
+ 'An array of objects defining custom replacements. Each replacement contains a pair of strings: the text to find ("from:") and the text to replace it with ("to:").'
38127
38640
  )
38128
- }).optional().describe("Configuration for applying filtering to the transcription"),
38641
+ }).optional().describe("Configuration for applying filtering to the transcription."),
38129
38642
  speaker_diarization_config: zod12.object({
38130
38643
  prefer_current_speaker: zod12.boolean().optional().describe(
38131
38644
  'If true, the algorithm will prefer to stay with the current active speaker if it is a close enough match, even if other speakers may be closer. This is useful for cases where we can flip incorrectly between similar speakers during a single speaker section."'
@@ -38136,6 +38649,19 @@ var getJobsResponse = zod12.object({
38136
38649
  getJobsResponseJobsItemConfigTranscriptionConfigSpeakerDiarizationConfigSpeakerSensitivityMax
38137
38650
  ).optional().describe(
38138
38651
  "Controls how sensitive the algorithm is in terms of keeping similar speakers separate, as opposed to combining them into a single speaker. Higher values will typically lead to more speakers, as the degree of difference between speakers in order to allow them to remain distinct will be lower. A lower value for this parameter will conversely guide the algorithm towards being less sensitive in terms of retaining similar speakers, and as such may lead to fewer speakers overall. The default is 0.5."
38652
+ ),
38653
+ get_speakers: zod12.boolean().optional().describe(
38654
+ "If true, speaker identifiers will be returned at the end of transcript."
38655
+ ),
38656
+ speakers: zod12.array(
38657
+ zod12.object({
38658
+ label: zod12.string().min(1).describe(
38659
+ "Speaker label, which must not match the format used internally (e.g. S1, S2, etc)"
38660
+ ),
38661
+ speaker_identifiers: zod12.array(zod12.string().describe("Speaker identifiers.")).min(1)
38662
+ })
38663
+ ).optional().describe(
38664
+ "Use this option to provide speaker labels linked to their speaker identifiers. When passed, the transcription system will tag spoken words in the transcript with the provided speaker labels whenever any of the specified speakers is detected in the audio. A maximum of 50 speakers identifiers across all speakers can be provided."
38139
38665
  )
38140
38666
  }).optional().describe("Configuration for speaker diarization")
38141
38667
  }).optional(),
@@ -38193,10 +38719,14 @@ var getJobsResponse = zod12.object({
38193
38719
  default_language: zod12.string().optional()
38194
38720
  }).optional(),
38195
38721
  summarization_config: zod12.object({
38196
- content_type: zod12.enum(["auto", "informative", "conversational"]).optional(),
38197
- summary_length: zod12.enum(["brief", "detailed"]).optional(),
38722
+ content_type: zod12.enum(["auto", "informative", "conversational"]).default(getJobsResponseJobsItemConfigSummarizationConfigContentTypeDefault).describe(
38723
+ "Choose from three options:\n- `conversational` - Best suited for dialogues involving multiple participants, such as calls, meetings or discussions. It focuses on summarizing key points of the conversation.\n- `informative` - Recommended for more structured information delivered by one or more people, making it ideal for videos, podcasts, lectures, and presentations.\n- `auto` - Automatically selects the most appropriate content type based on an analysis of the transcript.\n"
38724
+ ),
38725
+ summary_length: zod12.enum(["brief", "detailed"]).default(getJobsResponseJobsItemConfigSummarizationConfigSummaryLengthDefault).describe(
38726
+ "Determines the depth of the summary:\n- `brief` - Provides a succinct summary, condensing the content into just a few sentences.\n- `detailed` - Provide a longer, structured summary. For _conversational_ content, it includes key topics and a summary of the entire conversation. For _informative_ content, it logically divides the audio into sections and provides a summary for each."
38727
+ ),
38198
38728
  summary_type: zod12.enum(["paragraphs", "bullets"]).optional()
38199
- }).optional(),
38729
+ }).optional().describe("Configuration options for summarization."),
38200
38730
  sentiment_analysis_config: zod12.object({}).optional(),
38201
38731
  topic_detection_config: zod12.object({
38202
38732
  topics: zod12.array(zod12.string()).optional()
@@ -38218,7 +38748,7 @@ var getJobsResponse = zod12.object({
38218
38748
  "Optional list of errors that have occurred in user interaction, for example: audio could not be fetched or notification could not be sent."
38219
38749
  )
38220
38750
  }).describe(
38221
- "Document describing a job. JobConfig will be present in JobDetails returned for GET jobs/<id> request in SaaS and in Batch Appliance, but it will not be present in JobDetails returned as item in RetrieveJobsResponse in case of Batch Appliance."
38751
+ "Document describing a job. JobConfig will be present in JobDetails returned for GET jobs/{id} request in SaaS and in Batch Appliance, but it will not be present in JobDetails returned as item in RetrieveJobsResponse in case of Batch Appliance."
38222
38752
  )
38223
38753
  )
38224
38754
  });
@@ -38230,9 +38760,13 @@ var getJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesSensitiv
38230
38760
  var getJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesSensitivityMax = 1;
38231
38761
  var getJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesPermittedMarksItemRegExp = /^(.|all)$/;
38232
38762
  var getJobsJobidResponseJobConfigTranscriptionConfigChannelDiarizationLabelsItemRegExp = /^[A-Za-z0-9._]+$/;
38763
+ var getJobsJobidResponseJobConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMin = 0;
38764
+ var getJobsJobidResponseJobConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMax = 100;
38233
38765
  var getJobsJobidResponseJobConfigTranscriptionConfigSpeakerDiarizationConfigSpeakerSensitivityMin = 0;
38234
38766
  var getJobsJobidResponseJobConfigTranscriptionConfigSpeakerDiarizationConfigSpeakerSensitivityMax = 1;
38235
38767
  var getJobsJobidResponseJobConfigTranslationConfigTargetLanguagesMax = 5;
38768
+ var getJobsJobidResponseJobConfigSummarizationConfigContentTypeDefault = "auto";
38769
+ var getJobsJobidResponseJobConfigSummarizationConfigSummaryLengthDefault = "brief";
38236
38770
  var getJobsJobidResponse = zod12.object({
38237
38771
  job: zod12.object({
38238
38772
  created_at: zod12.string().datetime({}).describe("The UTC date time the job was created."),
@@ -38309,19 +38843,30 @@ var getJobsJobidResponse = zod12.object({
38309
38843
  max_delay_mode: zod12.enum(["fixed", "flexible"]).optional().describe(
38310
38844
  "Whether or not to enable flexible endpointing and allow the entity to continue to be spoken."
38311
38845
  ),
38846
+ audio_filtering_config: zod12.object({
38847
+ volume_threshold: zod12.number().min(
38848
+ getJobsJobidResponseJobConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMin
38849
+ ).max(
38850
+ getJobsJobidResponseJobConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMax
38851
+ ).optional().describe(
38852
+ "Controls the lower limit of audio volume at which speech and audio events will be transcribed. If the volume limit is very low, then most sound will be passed to the speech recognition engine. Higher numbers will cut out increasing amounts of sound."
38853
+ )
38854
+ }).optional().describe("Configuration for limiting the transcription of quiet audio."),
38312
38855
  transcript_filtering_config: zod12.object({
38313
38856
  remove_disfluencies: zod12.boolean().optional().describe(
38314
- "If true, words that are identified as disfluencies will be removed from the transcript. If false (default), they are tagged in the transcript as 'disfluency'."
38857
+ "If true, words identified as disfluencies (e.g., 'um', 'uh') will be removed from the transcript. If false (default), they are tagged in the transcript as 'disfluency'."
38315
38858
  ),
38316
38859
  replacements: zod12.array(
38317
38860
  zod12.object({
38318
- from: zod12.string(),
38319
- to: zod12.string()
38861
+ from: zod12.string().describe("The text or pattern identified to be replaced."),
38862
+ to: zod12.string().describe(
38863
+ "The corrected or formatted string to appear in the transcript."
38864
+ )
38320
38865
  })
38321
38866
  ).optional().describe(
38322
- "A list of replacements to apply to the transcript. Each replacement is a pair of strings, where the first string is the pattern to be replaced and the second string is the replacement text."
38867
+ 'An array of objects defining custom replacements. Each replacement contains a pair of strings: the text to find ("from:") and the text to replace it with ("to:").'
38323
38868
  )
38324
- }).optional().describe("Configuration for applying filtering to the transcription"),
38869
+ }).optional().describe("Configuration for applying filtering to the transcription."),
38325
38870
  speaker_diarization_config: zod12.object({
38326
38871
  prefer_current_speaker: zod12.boolean().optional().describe(
38327
38872
  'If true, the algorithm will prefer to stay with the current active speaker if it is a close enough match, even if other speakers may be closer. This is useful for cases where we can flip incorrectly between similar speakers during a single speaker section."'
@@ -38332,6 +38877,19 @@ var getJobsJobidResponse = zod12.object({
38332
38877
  getJobsJobidResponseJobConfigTranscriptionConfigSpeakerDiarizationConfigSpeakerSensitivityMax
38333
38878
  ).optional().describe(
38334
38879
  "Controls how sensitive the algorithm is in terms of keeping similar speakers separate, as opposed to combining them into a single speaker. Higher values will typically lead to more speakers, as the degree of difference between speakers in order to allow them to remain distinct will be lower. A lower value for this parameter will conversely guide the algorithm towards being less sensitive in terms of retaining similar speakers, and as such may lead to fewer speakers overall. The default is 0.5."
38880
+ ),
38881
+ get_speakers: zod12.boolean().optional().describe(
38882
+ "If true, speaker identifiers will be returned at the end of transcript."
38883
+ ),
38884
+ speakers: zod12.array(
38885
+ zod12.object({
38886
+ label: zod12.string().min(1).describe(
38887
+ "Speaker label, which must not match the format used internally (e.g. S1, S2, etc)"
38888
+ ),
38889
+ speaker_identifiers: zod12.array(zod12.string().describe("Speaker identifiers.")).min(1)
38890
+ })
38891
+ ).optional().describe(
38892
+ "Use this option to provide speaker labels linked to their speaker identifiers. When passed, the transcription system will tag spoken words in the transcript with the provided speaker labels whenever any of the specified speakers is detected in the audio. A maximum of 50 speakers identifiers across all speakers can be provided."
38335
38893
  )
38336
38894
  }).optional().describe("Configuration for speaker diarization")
38337
38895
  }).optional(),
@@ -38387,10 +38945,14 @@ var getJobsJobidResponse = zod12.object({
38387
38945
  default_language: zod12.string().optional()
38388
38946
  }).optional(),
38389
38947
  summarization_config: zod12.object({
38390
- content_type: zod12.enum(["auto", "informative", "conversational"]).optional(),
38391
- summary_length: zod12.enum(["brief", "detailed"]).optional(),
38948
+ content_type: zod12.enum(["auto", "informative", "conversational"]).default(getJobsJobidResponseJobConfigSummarizationConfigContentTypeDefault).describe(
38949
+ "Choose from three options:\n- `conversational` - Best suited for dialogues involving multiple participants, such as calls, meetings or discussions. It focuses on summarizing key points of the conversation.\n- `informative` - Recommended for more structured information delivered by one or more people, making it ideal for videos, podcasts, lectures, and presentations.\n- `auto` - Automatically selects the most appropriate content type based on an analysis of the transcript.\n"
38950
+ ),
38951
+ summary_length: zod12.enum(["brief", "detailed"]).default(getJobsJobidResponseJobConfigSummarizationConfigSummaryLengthDefault).describe(
38952
+ "Determines the depth of the summary:\n- `brief` - Provides a succinct summary, condensing the content into just a few sentences.\n- `detailed` - Provide a longer, structured summary. For _conversational_ content, it includes key topics and a summary of the entire conversation. For _informative_ content, it logically divides the audio into sections and provides a summary for each."
38953
+ ),
38392
38954
  summary_type: zod12.enum(["paragraphs", "bullets"]).optional()
38393
- }).optional(),
38955
+ }).optional().describe("Configuration options for summarization."),
38394
38956
  sentiment_analysis_config: zod12.object({}).optional(),
38395
38957
  topic_detection_config: zod12.object({
38396
38958
  topics: zod12.array(zod12.string()).optional()
@@ -38412,7 +38974,7 @@ var getJobsJobidResponse = zod12.object({
38412
38974
  "Optional list of errors that have occurred in user interaction, for example: audio could not be fetched or notification could not be sent."
38413
38975
  )
38414
38976
  }).describe(
38415
- "Document describing a job. JobConfig will be present in JobDetails returned for GET jobs/<id> request in SaaS and in Batch Appliance, but it will not be present in JobDetails returned as item in RetrieveJobsResponse in case of Batch Appliance."
38977
+ "Document describing a job. JobConfig will be present in JobDetails returned for GET jobs/{id} request in SaaS and in Batch Appliance, but it will not be present in JobDetails returned as item in RetrieveJobsResponse in case of Batch Appliance."
38416
38978
  )
38417
38979
  });
38418
38980
  var deleteJobsJobidParams = zod12.object({
@@ -38428,9 +38990,13 @@ var deleteJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesSensi
38428
38990
  var deleteJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesSensitivityMax = 1;
38429
38991
  var deleteJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesPermittedMarksItemRegExp = /^(.|all)$/;
38430
38992
  var deleteJobsJobidResponseJobConfigTranscriptionConfigChannelDiarizationLabelsItemRegExp = /^[A-Za-z0-9._]+$/;
38993
+ var deleteJobsJobidResponseJobConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMin = 0;
38994
+ var deleteJobsJobidResponseJobConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMax = 100;
38431
38995
  var deleteJobsJobidResponseJobConfigTranscriptionConfigSpeakerDiarizationConfigSpeakerSensitivityMin = 0;
38432
38996
  var deleteJobsJobidResponseJobConfigTranscriptionConfigSpeakerDiarizationConfigSpeakerSensitivityMax = 1;
38433
38997
  var deleteJobsJobidResponseJobConfigTranslationConfigTargetLanguagesMax = 5;
38998
+ var deleteJobsJobidResponseJobConfigSummarizationConfigContentTypeDefault = "auto";
38999
+ var deleteJobsJobidResponseJobConfigSummarizationConfigSummaryLengthDefault = "brief";
38434
39000
  var deleteJobsJobidResponse = zod12.object({
38435
39001
  job: zod12.object({
38436
39002
  created_at: zod12.string().datetime({}).describe("The UTC date time the job was created."),
@@ -38507,19 +39073,30 @@ var deleteJobsJobidResponse = zod12.object({
38507
39073
  max_delay_mode: zod12.enum(["fixed", "flexible"]).optional().describe(
38508
39074
  "Whether or not to enable flexible endpointing and allow the entity to continue to be spoken."
38509
39075
  ),
39076
+ audio_filtering_config: zod12.object({
39077
+ volume_threshold: zod12.number().min(
39078
+ deleteJobsJobidResponseJobConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMin
39079
+ ).max(
39080
+ deleteJobsJobidResponseJobConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMax
39081
+ ).optional().describe(
39082
+ "Controls the lower limit of audio volume at which speech and audio events will be transcribed. If the volume limit is very low, then most sound will be passed to the speech recognition engine. Higher numbers will cut out increasing amounts of sound."
39083
+ )
39084
+ }).optional().describe("Configuration for limiting the transcription of quiet audio."),
38510
39085
  transcript_filtering_config: zod12.object({
38511
39086
  remove_disfluencies: zod12.boolean().optional().describe(
38512
- "If true, words that are identified as disfluencies will be removed from the transcript. If false (default), they are tagged in the transcript as 'disfluency'."
39087
+ "If true, words identified as disfluencies (e.g., 'um', 'uh') will be removed from the transcript. If false (default), they are tagged in the transcript as 'disfluency'."
38513
39088
  ),
38514
39089
  replacements: zod12.array(
38515
39090
  zod12.object({
38516
- from: zod12.string(),
38517
- to: zod12.string()
39091
+ from: zod12.string().describe("The text or pattern identified to be replaced."),
39092
+ to: zod12.string().describe(
39093
+ "The corrected or formatted string to appear in the transcript."
39094
+ )
38518
39095
  })
38519
39096
  ).optional().describe(
38520
- "A list of replacements to apply to the transcript. Each replacement is a pair of strings, where the first string is the pattern to be replaced and the second string is the replacement text."
39097
+ 'An array of objects defining custom replacements. Each replacement contains a pair of strings: the text to find ("from:") and the text to replace it with ("to:").'
38521
39098
  )
38522
- }).optional().describe("Configuration for applying filtering to the transcription"),
39099
+ }).optional().describe("Configuration for applying filtering to the transcription."),
38523
39100
  speaker_diarization_config: zod12.object({
38524
39101
  prefer_current_speaker: zod12.boolean().optional().describe(
38525
39102
  'If true, the algorithm will prefer to stay with the current active speaker if it is a close enough match, even if other speakers may be closer. This is useful for cases where we can flip incorrectly between similar speakers during a single speaker section."'
@@ -38530,6 +39107,19 @@ var deleteJobsJobidResponse = zod12.object({
38530
39107
  deleteJobsJobidResponseJobConfigTranscriptionConfigSpeakerDiarizationConfigSpeakerSensitivityMax
38531
39108
  ).optional().describe(
38532
39109
  "Controls how sensitive the algorithm is in terms of keeping similar speakers separate, as opposed to combining them into a single speaker. Higher values will typically lead to more speakers, as the degree of difference between speakers in order to allow them to remain distinct will be lower. A lower value for this parameter will conversely guide the algorithm towards being less sensitive in terms of retaining similar speakers, and as such may lead to fewer speakers overall. The default is 0.5."
39110
+ ),
39111
+ get_speakers: zod12.boolean().optional().describe(
39112
+ "If true, speaker identifiers will be returned at the end of transcript."
39113
+ ),
39114
+ speakers: zod12.array(
39115
+ zod12.object({
39116
+ label: zod12.string().min(1).describe(
39117
+ "Speaker label, which must not match the format used internally (e.g. S1, S2, etc)"
39118
+ ),
39119
+ speaker_identifiers: zod12.array(zod12.string().describe("Speaker identifiers.")).min(1)
39120
+ })
39121
+ ).optional().describe(
39122
+ "Use this option to provide speaker labels linked to their speaker identifiers. When passed, the transcription system will tag spoken words in the transcript with the provided speaker labels whenever any of the specified speakers is detected in the audio. A maximum of 50 speakers identifiers across all speakers can be provided."
38533
39123
  )
38534
39124
  }).optional().describe("Configuration for speaker diarization")
38535
39125
  }).optional(),
@@ -38585,10 +39175,14 @@ var deleteJobsJobidResponse = zod12.object({
38585
39175
  default_language: zod12.string().optional()
38586
39176
  }).optional(),
38587
39177
  summarization_config: zod12.object({
38588
- content_type: zod12.enum(["auto", "informative", "conversational"]).optional(),
38589
- summary_length: zod12.enum(["brief", "detailed"]).optional(),
39178
+ content_type: zod12.enum(["auto", "informative", "conversational"]).default(deleteJobsJobidResponseJobConfigSummarizationConfigContentTypeDefault).describe(
39179
+ "Choose from three options:\n- `conversational` - Best suited for dialogues involving multiple participants, such as calls, meetings or discussions. It focuses on summarizing key points of the conversation.\n- `informative` - Recommended for more structured information delivered by one or more people, making it ideal for videos, podcasts, lectures, and presentations.\n- `auto` - Automatically selects the most appropriate content type based on an analysis of the transcript.\n"
39180
+ ),
39181
+ summary_length: zod12.enum(["brief", "detailed"]).default(deleteJobsJobidResponseJobConfigSummarizationConfigSummaryLengthDefault).describe(
39182
+ "Determines the depth of the summary:\n- `brief` - Provides a succinct summary, condensing the content into just a few sentences.\n- `detailed` - Provide a longer, structured summary. For _conversational_ content, it includes key topics and a summary of the entire conversation. For _informative_ content, it logically divides the audio into sections and provides a summary for each."
39183
+ ),
38590
39184
  summary_type: zod12.enum(["paragraphs", "bullets"]).optional()
38591
- }).optional(),
39185
+ }).optional().describe("Configuration options for summarization."),
38592
39186
  sentiment_analysis_config: zod12.object({}).optional(),
38593
39187
  topic_detection_config: zod12.object({
38594
39188
  topics: zod12.array(zod12.string()).optional()
@@ -38610,7 +39204,7 @@ var deleteJobsJobidResponse = zod12.object({
38610
39204
  "Optional list of errors that have occurred in user interaction, for example: audio could not be fetched or notification could not be sent."
38611
39205
  )
38612
39206
  }).describe(
38613
- "Document describing a job. JobConfig will be present in JobDetails returned for GET jobs/<id> request in SaaS and in Batch Appliance, but it will not be present in JobDetails returned as item in RetrieveJobsResponse in case of Batch Appliance."
39207
+ "Document describing a job. JobConfig will be present in JobDetails returned for GET jobs/{id} request in SaaS and in Batch Appliance, but it will not be present in JobDetails returned as item in RetrieveJobsResponse in case of Batch Appliance."
38614
39208
  )
38615
39209
  });
38616
39210
  var getJobsJobidDataParams = zod12.object({
@@ -38632,6 +39226,8 @@ var getJobsJobidTranscriptResponseMetadataTranscriptionConfigPunctuationOverride
38632
39226
  var getJobsJobidTranscriptResponseMetadataTranscriptionConfigPunctuationOverridesSensitivityMax = 1;
38633
39227
  var getJobsJobidTranscriptResponseMetadataTranscriptionConfigPunctuationOverridesPermittedMarksItemRegExp = /^(.|all)$/;
38634
39228
  var getJobsJobidTranscriptResponseMetadataTranscriptionConfigChannelDiarizationLabelsItemRegExp = /^[A-Za-z0-9._]+$/;
39229
+ var getJobsJobidTranscriptResponseMetadataTranscriptionConfigAudioFilteringConfigVolumeThresholdMin = 0;
39230
+ var getJobsJobidTranscriptResponseMetadataTranscriptionConfigAudioFilteringConfigVolumeThresholdMax = 100;
38635
39231
  var getJobsJobidTranscriptResponseMetadataTranscriptionConfigSpeakerDiarizationConfigSpeakerSensitivityMin = 0;
38636
39232
  var getJobsJobidTranscriptResponseMetadataTranscriptionConfigSpeakerDiarizationConfigSpeakerSensitivityMax = 1;
38637
39233
  var getJobsJobidTranscriptResponseResultsItemVolumeMin = 0;
@@ -38703,19 +39299,28 @@ var getJobsJobidTranscriptResponse = zod12.object({
38703
39299
  max_delay_mode: zod12.enum(["fixed", "flexible"]).optional().describe(
38704
39300
  "Whether or not to enable flexible endpointing and allow the entity to continue to be spoken."
38705
39301
  ),
39302
+ audio_filtering_config: zod12.object({
39303
+ volume_threshold: zod12.number().min(
39304
+ getJobsJobidTranscriptResponseMetadataTranscriptionConfigAudioFilteringConfigVolumeThresholdMin
39305
+ ).max(
39306
+ getJobsJobidTranscriptResponseMetadataTranscriptionConfigAudioFilteringConfigVolumeThresholdMax
39307
+ ).optional().describe(
39308
+ "Controls the lower limit of audio volume at which speech and audio events will be transcribed. If the volume limit is very low, then most sound will be passed to the speech recognition engine. Higher numbers will cut out increasing amounts of sound."
39309
+ )
39310
+ }).optional().describe("Configuration for limiting the transcription of quiet audio."),
38706
39311
  transcript_filtering_config: zod12.object({
38707
39312
  remove_disfluencies: zod12.boolean().optional().describe(
38708
- "If true, words that are identified as disfluencies will be removed from the transcript. If false (default), they are tagged in the transcript as 'disfluency'."
39313
+ "If true, words identified as disfluencies (e.g., 'um', 'uh') will be removed from the transcript. If false (default), they are tagged in the transcript as 'disfluency'."
38709
39314
  ),
38710
39315
  replacements: zod12.array(
38711
39316
  zod12.object({
38712
- from: zod12.string(),
38713
- to: zod12.string()
39317
+ from: zod12.string().describe("The text or pattern identified to be replaced."),
39318
+ to: zod12.string().describe("The corrected or formatted string to appear in the transcript.")
38714
39319
  })
38715
39320
  ).optional().describe(
38716
- "A list of replacements to apply to the transcript. Each replacement is a pair of strings, where the first string is the pattern to be replaced and the second string is the replacement text."
39321
+ 'An array of objects defining custom replacements. Each replacement contains a pair of strings: the text to find ("from:") and the text to replace it with ("to:").'
38717
39322
  )
38718
- }).optional().describe("Configuration for applying filtering to the transcription"),
39323
+ }).optional().describe("Configuration for applying filtering to the transcription."),
38719
39324
  speaker_diarization_config: zod12.object({
38720
39325
  prefer_current_speaker: zod12.boolean().optional().describe(
38721
39326
  'If true, the algorithm will prefer to stay with the current active speaker if it is a close enough match, even if other speakers may be closer. This is useful for cases where we can flip incorrectly between similar speakers during a single speaker section."'
@@ -38726,9 +39331,23 @@ var getJobsJobidTranscriptResponse = zod12.object({
38726
39331
  getJobsJobidTranscriptResponseMetadataTranscriptionConfigSpeakerDiarizationConfigSpeakerSensitivityMax
38727
39332
  ).optional().describe(
38728
39333
  "Controls how sensitive the algorithm is in terms of keeping similar speakers separate, as opposed to combining them into a single speaker. Higher values will typically lead to more speakers, as the degree of difference between speakers in order to allow them to remain distinct will be lower. A lower value for this parameter will conversely guide the algorithm towards being less sensitive in terms of retaining similar speakers, and as such may lead to fewer speakers overall. The default is 0.5."
39334
+ ),
39335
+ get_speakers: zod12.boolean().optional().describe(
39336
+ "If true, speaker identifiers will be returned at the end of transcript."
39337
+ ),
39338
+ speakers: zod12.array(
39339
+ zod12.object({
39340
+ label: zod12.string().min(1).describe(
39341
+ "Speaker label, which must not match the format used internally (e.g. S1, S2, etc)"
39342
+ ),
39343
+ speaker_identifiers: zod12.array(zod12.string().describe("Speaker identifiers.")).min(1)
39344
+ })
39345
+ ).optional().describe(
39346
+ "Use this option to provide speaker labels linked to their speaker identifiers. When passed, the transcription system will tag spoken words in the transcript with the provided speaker labels whenever any of the specified speakers is detected in the audio. A maximum of 50 speakers identifiers across all speakers can be provided."
38729
39347
  )
38730
39348
  }).optional().describe("Configuration for speaker diarization")
38731
39349
  }).optional(),
39350
+ orchestrator_version: zod12.string().optional().describe("The engine version used to generate transcription output."),
38732
39351
  translation_errors: zod12.array(
38733
39352
  zod12.object({
38734
39353
  type: zod12.enum(["translation_failed", "unsupported_translation_pair"]).optional(),
@@ -38806,10 +39425,7 @@ var getJobsJobidTranscriptResponse = zod12.object({
38806
39425
  "OTHER"
38807
39426
  ]).optional(),
38808
39427
  message: zod12.string().optional()
38809
- }).optional(),
38810
- orchestrator_version: zod12.string().optional().describe(
38811
- "Orchestrator version in PEP 440 Format or set to 'version_not_found' as default."
38812
- )
39428
+ }).optional()
38813
39429
  }).describe(
38814
39430
  "Summary information about the output from an ASR job, comprising the job type and configuration parameters used when generating the output."
38815
39431
  ),
@@ -38892,6 +39508,12 @@ var getJobsJobidTranscriptResponse = zod12.object({
38892
39508
  "An ASR job output item. The primary item types are `word` and `punctuation`. Other item types may be present, for example to provide semantic information of different forms."
38893
39509
  )
38894
39510
  ),
39511
+ speakers: zod12.array(
39512
+ zod12.object({
39513
+ label: zod12.string().min(1).describe("Speaker label."),
39514
+ speaker_identifiers: zod12.array(zod12.string().describe("Speaker identifiers.")).min(1)
39515
+ })
39516
+ ).optional().describe("List of unique speaker identifiers detected in the transcript."),
38895
39517
  translations: zod12.record(
38896
39518
  zod12.string(),
38897
39519
  zod12.array(
@@ -38913,13 +39535,23 @@ var getJobsJobidTranscriptResponse = zod12.object({
38913
39535
  sentiment_analysis: zod12.object({
38914
39536
  segments: zod12.array(
38915
39537
  zod12.object({
38916
- text: zod12.string().optional(),
38917
- start_time: zod12.number().optional(),
38918
- end_time: zod12.number().optional(),
38919
- sentiment: zod12.string().optional(),
38920
- speaker: zod12.string().optional(),
38921
- channel: zod12.string().optional(),
38922
- confidence: zod12.number().optional()
39538
+ text: zod12.string().optional().describe("Represents the transcript of the analysed segment"),
39539
+ sentiment: zod12.string().optional().describe(
39540
+ "The assigned sentiment to the segment, which can be positive, neutral or negative"
39541
+ ),
39542
+ start_time: zod12.number().optional().describe(
39543
+ "The timestamp corresponding to the beginning of the transcription segment"
39544
+ ),
39545
+ end_time: zod12.number().optional().describe(
39546
+ "The timestamp corresponding to the end of the transcription segment"
39547
+ ),
39548
+ speaker: zod12.string().optional().describe(
39549
+ "The speaker label for the segment, if speaker diarization is enabled"
39550
+ ),
39551
+ channel: zod12.string().optional().describe(
39552
+ "The channel label for the segment, if channel diarization is enabled"
39553
+ ),
39554
+ confidence: zod12.number().optional().describe("A confidence score in the range of 0-1")
38923
39555
  }).describe("Represents a segment of text and its associated sentiment.")
38924
39556
  ).optional().describe(
38925
39557
  "An array of objects that represent a segment of text and its associated sentiment."
@@ -38978,10 +39610,10 @@ var getJobsJobidTranscriptResponse = zod12.object({
38978
39610
  }).optional().describe("Main object that holds topic detection results."),
38979
39611
  chapters: zod12.array(
38980
39612
  zod12.object({
38981
- title: zod12.string().optional(),
38982
- summary: zod12.string().optional(),
38983
- start_time: zod12.number().optional(),
38984
- end_time: zod12.number().optional()
39613
+ title: zod12.string().optional().describe("The auto-generated title for the chapter"),
39614
+ summary: zod12.string().optional().describe("An auto-generated paragraph-style, short summary of the chapter"),
39615
+ start_time: zod12.number().optional().describe("The start time of the chapter in the audio file"),
39616
+ end_time: zod12.number().optional().describe("The end time of the chapter in the audio file")
38985
39617
  })
38986
39618
  ).optional().describe("An array of objects that represent summarized chapters of the transcript"),
38987
39619
  audio_events: zod12.array(
@@ -39026,6 +39658,18 @@ var getJobsJobidLogParams = zod12.object({
39026
39658
  jobid: zod12.string().describe("ID of the job.")
39027
39659
  });
39028
39660
  var getJobsJobidLogResponse = zod12.instanceof(File);
39661
+ var getJobsJobidObjectUrlsParams = zod12.object({
39662
+ jobid: zod12.string().describe("ID of the job.")
39663
+ });
39664
+ var getJobsJobidObjectUrlsQueryParams = zod12.object({
39665
+ ttl: zod12.number().describe("Time to live in seconds for the signed URLs"),
39666
+ url_for: zod12.array(zod12.enum(["data", "audio_mp3", "transcript"]))
39667
+ });
39668
+ var getJobsJobidObjectUrlsResponse = zod12.object({
39669
+ data: zod12.string().optional(),
39670
+ audio_mp3: zod12.string().optional(),
39671
+ transcript: zod12.string().optional()
39672
+ });
39029
39673
  var getUsageQueryParams = zod12.object({
39030
39674
  since: zod12.string().date().optional().describe(
39031
39675
  "Include usage after the given date (inclusive). This is a [ISO-8601](https://en.wikipedia.org/wiki/ISO_8601) calendar date format: `YYYY-MM-DD`."
@@ -39159,7 +39803,7 @@ var speechToTextBodyKeytermsDefault = [];
39159
39803
  var speechToTextBody = zod13.object({
39160
39804
  model_id: zod13.enum(["scribe_v1", "scribe_v2"]).describe("The ID of the model to use for transcription."),
39161
39805
  file: zod13.instanceof(File).or(zod13.null()).optional().describe(
39162
- "The file to transcribe (100ms minimum audio length). All major audio and video formats are supported. Exactly one of the file or cloud_storage_url parameters must be provided. The file size must be less than 3.0GB."
39806
+ "The file to transcribe (100ms minimum audio length). All major audio and video formats are supported. Exactly one of the file or cloud_storage_url parameters must be provided. The file size must be less than 5.0GB."
39163
39807
  ),
39164
39808
  language_code: zod13.string().or(zod13.null()).optional().describe(
39165
39809
  "An ISO-639-1 or ISO-639-3 language_code corresponding to the language of the audio file. Can sometimes improve transcription performance if known beforehand. Defaults to null, in this case the language is predicted automatically."
@@ -39237,7 +39881,7 @@ var speechToTextBody = zod13.object({
39237
39881
  "The format of input audio. Options are 'pcm_s16le_16' or 'other' For `pcm_s16le_16`, the input audio must be 16-bit PCM at a 16kHz sample rate, single channel (mono), and little-endian byte order. Latency will be lower than with passing an encoded waveform."
39238
39882
  ),
39239
39883
  cloud_storage_url: zod13.string().or(zod13.null()).optional().describe(
39240
- "The HTTPS URL of the file to transcribe. Exactly one of the file or cloud_storage_url parameters must be provided. The file must be accessible via HTTPS and the file size must be less than 2GB. Any valid HTTPS URL is accepted, including URLs from cloud storage providers (AWS S3, Google Cloud Storage, Cloudflare R2, etc.), CDNs, or any other HTTPS source. URLs can be pre-signed or include authentication tokens in query parameters."
39884
+ "[Deprecated] This parameter is deprecated and will be removed in the future. Use 'source_url' instead.The HTTPS URL of the file to transcribe. Exactly one of the file or cloud_storage_url parameters must be provided. The file must be accessible via HTTPS and the file size must be less than 2GB. Any valid HTTPS URL is accepted, including URLs from cloud storage providers (AWS S3, Google Cloud Storage, Cloudflare R2, etc.), CDNs, or any other HTTPS source. URLs can be pre-signed or include authentication tokens in query parameters."
39241
39885
  ),
39242
39886
  source_url: zod13.string().or(zod13.null()).optional().describe(
39243
39887
  "The URL of an audio or video file to transcribe. Supports hosted video or audio files, YouTube video URLs, TikTok video URLs, and other video hosting services."
@@ -39276,7 +39920,7 @@ var speechToTextBody = zod13.object({
39276
39920
  "How to format redacted entities. 'redacted' replaces with {REDACTED}, 'entity_type' replaces with {ENTITY_TYPE}, 'enumerated_entity_type' replaces with {ENTITY_TYPE_N} where N enumerates each occurrence. Only used when entity_redaction is set."
39277
39921
  ),
39278
39922
  keyterms: zod13.array(zod13.string()).default(speechToTextBodyKeytermsDefault).describe(
39279
- 'A list of keyterms to bias the transcription towards. The keyterms are words or phrases you want the model to recognise more accurately. The number of keyterms cannot exceed 1000. The length of each keyterm must be less than 50 characters. Keyterms can contain at most 5 words (after normalisation). For example ["hello", "world", "technical term"]. Usage of this parameter will incur an additional 20% surcharge on the base transcription cost. When more than 100 keyterms are provided, a minimum billable duration of 20 seconds applies per request.'
39923
+ 'A list of keyterms to bias the transcription towards. The keyterms are words or phrases you want the model to recognise more accurately. The number of keyterms cannot exceed 1000. The length of each keyterm must be less than 50 characters. Keyterms can contain at most 5 words (after normalisation). For example ["hello", "world", "technical term"]. The following characters are not supported: `<`, `>`, `{`, `}`, `[`, `]`, `\\`. Usage of this parameter will incur an additional 20% surcharge on the base transcription cost. When more than 100 keyterms are provided, a minimum billable duration of 20 seconds applies per request.'
39280
39924
  )
39281
39925
  });
39282
39926
  var speechToTextResponse = zod13.object({
@@ -39641,6 +40285,7 @@ export {
39641
40285
  SonioxModels,
39642
40286
  SonioxRealtimeModel,
39643
40287
  SonioxRegion,
40288
+ sdk_types_exports as SonioxSDK,
39644
40289
  SonioxStreamingSchema,
39645
40290
  streaming_types_zod_exports as SonioxStreamingTypes,
39646
40291
  SonioxStreamingUpdateSchema,