voice-router-dev 0.9.4 → 0.9.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +66 -0
- package/dist/constants.d.mts +11 -92
- package/dist/constants.d.ts +11 -92
- package/dist/constants.js +11 -88
- package/dist/constants.mjs +11 -88
- package/dist/{field-configs-BXXH2T3E.d.mts → field-configs-Bt2iLgt_.d.mts} +8982 -7900
- package/dist/{field-configs-BXXH2T3E.d.ts → field-configs-Bt2iLgt_.d.ts} +8982 -7900
- package/dist/field-configs.d.mts +1 -1
- package/dist/field-configs.d.ts +1 -1
- package/dist/field-configs.js +583 -150
- package/dist/field-configs.mjs +583 -150
- package/dist/index.d.mts +1578 -560
- package/dist/index.d.ts +1578 -560
- package/dist/index.js +922 -279
- package/dist/index.mjs +925 -279
- package/dist/{provider-metadata-D1d-9cng.d.ts → provider-metadata-B5SFlFb0.d.ts} +6 -6
- package/dist/{provider-metadata-BJ29OPW1.d.mts → provider-metadata-J8URl-3i.d.mts} +6 -6
- package/dist/provider-metadata.d.mts +1 -1
- package/dist/provider-metadata.d.ts +1 -1
- package/dist/provider-metadata.js +3 -66
- package/dist/provider-metadata.mjs +3 -66
- package/dist/{speechToTextChunkResponseModel-BY2lGyZ3.d.ts → speechToTextChunkResponseModel-DhC5T0u1.d.ts} +364 -39
- package/dist/{speechToTextChunkResponseModel-KayxDiZ7.d.mts → speechToTextChunkResponseModel-SgJKP7kZ.d.mts} +364 -39
- package/dist/webhooks.d.mts +3 -2
- package/dist/webhooks.d.ts +3 -2
- package/package.json +8 -3
package/dist/index.mjs
CHANGED
|
@@ -1102,7 +1102,6 @@ var AzureLocales = [
|
|
|
1102
1102
|
{ code: "ar-YE", name: "Arabic (Yemen)" },
|
|
1103
1103
|
{ code: "as-IN", name: "Assamese (India)" },
|
|
1104
1104
|
{ code: "az-AZ", name: "Azerbaijani (Azerbaijan)" },
|
|
1105
|
-
{ code: "be-BY", name: "Belarusian (Belarus)" },
|
|
1106
1105
|
{ code: "bg-BG", name: "Bulgarian (Bulgaria)" },
|
|
1107
1106
|
{ code: "bn-BD", name: "Bengali (Bangladesh)" },
|
|
1108
1107
|
{ code: "bn-IN", name: "Bengali (India)" },
|
|
@@ -1183,7 +1182,6 @@ var AzureLocales = [
|
|
|
1183
1182
|
{ code: "lo-LA", name: "Lao (Latin)" },
|
|
1184
1183
|
{ code: "lt-LT", name: "Lithuanian (Lithuania)" },
|
|
1185
1184
|
{ code: "lv-LV", name: "Latvian (Latvia)" },
|
|
1186
|
-
{ code: "mi-NZ", name: "Maori (New Zealand)" },
|
|
1187
1185
|
{ code: "mk-MK", name: "Macedonian (North Macedonia)" },
|
|
1188
1186
|
{ code: "ml-IN", name: "Malayalam (India)" },
|
|
1189
1187
|
{ code: "mn-MN", name: "Mongolian (Mongolia)" },
|
|
@@ -1259,7 +1257,6 @@ var AzureLocaleCodes = [
|
|
|
1259
1257
|
"ar-YE",
|
|
1260
1258
|
"as-IN",
|
|
1261
1259
|
"az-AZ",
|
|
1262
|
-
"be-BY",
|
|
1263
1260
|
"bg-BG",
|
|
1264
1261
|
"bn-BD",
|
|
1265
1262
|
"bn-IN",
|
|
@@ -1340,7 +1337,6 @@ var AzureLocaleCodes = [
|
|
|
1340
1337
|
"lo-LA",
|
|
1341
1338
|
"lt-LT",
|
|
1342
1339
|
"lv-LV",
|
|
1343
|
-
"mi-NZ",
|
|
1344
1340
|
"mk-MK",
|
|
1345
1341
|
"ml-IN",
|
|
1346
1342
|
"mn-MN",
|
|
@@ -1416,7 +1412,6 @@ var AzureLocaleLabels = {
|
|
|
1416
1412
|
"ar-YE": "Arabic (Yemen)",
|
|
1417
1413
|
"as-IN": "Assamese (India)",
|
|
1418
1414
|
"az-AZ": "Azerbaijani (Azerbaijan)",
|
|
1419
|
-
"be-BY": "Belarusian (Belarus)",
|
|
1420
1415
|
"bg-BG": "Bulgarian (Bulgaria)",
|
|
1421
1416
|
"bn-BD": "Bengali (Bangladesh)",
|
|
1422
1417
|
"bn-IN": "Bengali (India)",
|
|
@@ -1497,7 +1492,6 @@ var AzureLocaleLabels = {
|
|
|
1497
1492
|
"lo-LA": "Lao (Latin)",
|
|
1498
1493
|
"lt-LT": "Lithuanian (Lithuania)",
|
|
1499
1494
|
"lv-LV": "Latvian (Latvia)",
|
|
1500
|
-
"mi-NZ": "Maori (New Zealand)",
|
|
1501
1495
|
"mk-MK": "Macedonian (North Macedonia)",
|
|
1502
1496
|
"ml-IN": "Malayalam (India)",
|
|
1503
1497
|
"mn-MN": "Mongolian (Mongolia)",
|
|
@@ -1573,7 +1567,6 @@ var AzureLocale = {
|
|
|
1573
1567
|
"ar-YE": "ar-YE",
|
|
1574
1568
|
"as-IN": "as-IN",
|
|
1575
1569
|
"az-AZ": "az-AZ",
|
|
1576
|
-
"be-BY": "be-BY",
|
|
1577
1570
|
"bg-BG": "bg-BG",
|
|
1578
1571
|
"bn-BD": "bn-BD",
|
|
1579
1572
|
"bn-IN": "bn-IN",
|
|
@@ -1654,7 +1647,6 @@ var AzureLocale = {
|
|
|
1654
1647
|
"lo-LA": "lo-LA",
|
|
1655
1648
|
"lt-LT": "lt-LT",
|
|
1656
1649
|
"lv-LV": "lv-LV",
|
|
1657
|
-
"mi-NZ": "mi-NZ",
|
|
1658
1650
|
"mk-MK": "mk-MK",
|
|
1659
1651
|
"ml-IN": "ml-IN",
|
|
1660
1652
|
"mn-MN": "mn-MN",
|
|
@@ -1745,8 +1737,6 @@ var ElevenLabsLanguages = [
|
|
|
1745
1737
|
{ code: "hr", name: "Croatian" },
|
|
1746
1738
|
{ code: "bg", name: "Bulgarian" },
|
|
1747
1739
|
{ code: "lt", name: "Lithuanian" },
|
|
1748
|
-
{ code: "la", name: "Latin" },
|
|
1749
|
-
{ code: "mi", name: "Maori" },
|
|
1750
1740
|
{ code: "ml", name: "Malayalam" },
|
|
1751
1741
|
{ code: "cy", name: "Welsh" },
|
|
1752
1742
|
{ code: "sk", name: "Slovak" },
|
|
@@ -1760,20 +1750,16 @@ var ElevenLabsLanguages = [
|
|
|
1760
1750
|
{ code: "kn", name: "Kannada" },
|
|
1761
1751
|
{ code: "et", name: "Estonian" },
|
|
1762
1752
|
{ code: "mk", name: "Macedonian" },
|
|
1763
|
-
{ code: "br", name: "Breton" },
|
|
1764
|
-
{ code: "eu", name: "Basque" },
|
|
1765
1753
|
{ code: "is", name: "Icelandic" },
|
|
1766
1754
|
{ code: "hy", name: "Armenian" },
|
|
1767
1755
|
{ code: "ne", name: "Nepali" },
|
|
1768
1756
|
{ code: "mn", name: "Mongolian" },
|
|
1769
1757
|
{ code: "bs", name: "Bosnian" },
|
|
1770
1758
|
{ code: "kk", name: "Kazakh" },
|
|
1771
|
-
{ code: "sq", name: "Albanian" },
|
|
1772
1759
|
{ code: "sw", name: "Swahili" },
|
|
1773
1760
|
{ code: "gl", name: "Galician" },
|
|
1774
1761
|
{ code: "mr", name: "Marathi" },
|
|
1775
1762
|
{ code: "pa", name: "Punjabi" },
|
|
1776
|
-
{ code: "si", name: "Sinhala" },
|
|
1777
1763
|
{ code: "km", name: "Khmer" },
|
|
1778
1764
|
{ code: "sn", name: "Shona" },
|
|
1779
1765
|
{ code: "yo", name: "Yoruba" },
|
|
@@ -1786,29 +1772,16 @@ var ElevenLabsLanguages = [
|
|
|
1786
1772
|
{ code: "sd", name: "Sindhi" },
|
|
1787
1773
|
{ code: "gu", name: "Gujarati" },
|
|
1788
1774
|
{ code: "am", name: "Amharic" },
|
|
1789
|
-
{ code: "yi", name: "Yiddish" },
|
|
1790
1775
|
{ code: "lo", name: "Lao" },
|
|
1791
1776
|
{ code: "uz", name: "Uzbek" },
|
|
1792
|
-
{ code: "fo", name: "Faroese" },
|
|
1793
|
-
{ code: "ht", name: "Haitian Creole" },
|
|
1794
1777
|
{ code: "ps", name: "Pashto" },
|
|
1795
|
-
{ code: "tk", name: "Turkmen" },
|
|
1796
|
-
{ code: "nn", name: "Norwegian Nynorsk" },
|
|
1797
1778
|
{ code: "mt", name: "Maltese" },
|
|
1798
|
-
{ code: "sa", name: "Sanskrit" },
|
|
1799
1779
|
{ code: "lb", name: "Luxembourgish" },
|
|
1800
1780
|
{ code: "my", name: "Burmese" },
|
|
1801
|
-
{ code: "bo", name: "Tibetan" },
|
|
1802
|
-
{ code: "tl", name: "Tagalog" },
|
|
1803
|
-
{ code: "mg", name: "Malagasy" },
|
|
1804
1781
|
{ code: "as", name: "Assamese" },
|
|
1805
|
-
{ code: "tt", name: "Tatar" },
|
|
1806
|
-
{ code: "haw", name: "Hawaiian" },
|
|
1807
1782
|
{ code: "ln", name: "Lingala" },
|
|
1808
1783
|
{ code: "ha", name: "Hausa" },
|
|
1809
|
-
{ code: "
|
|
1810
|
-
{ code: "jw", name: "Javanese" },
|
|
1811
|
-
{ code: "su", name: "Sundanese" }
|
|
1784
|
+
{ code: "jw", name: "Javanese" }
|
|
1812
1785
|
];
|
|
1813
1786
|
var ElevenLabsLanguageCodes = [
|
|
1814
1787
|
"en",
|
|
@@ -1846,8 +1819,6 @@ var ElevenLabsLanguageCodes = [
|
|
|
1846
1819
|
"hr",
|
|
1847
1820
|
"bg",
|
|
1848
1821
|
"lt",
|
|
1849
|
-
"la",
|
|
1850
|
-
"mi",
|
|
1851
1822
|
"ml",
|
|
1852
1823
|
"cy",
|
|
1853
1824
|
"sk",
|
|
@@ -1861,20 +1832,16 @@ var ElevenLabsLanguageCodes = [
|
|
|
1861
1832
|
"kn",
|
|
1862
1833
|
"et",
|
|
1863
1834
|
"mk",
|
|
1864
|
-
"br",
|
|
1865
|
-
"eu",
|
|
1866
1835
|
"is",
|
|
1867
1836
|
"hy",
|
|
1868
1837
|
"ne",
|
|
1869
1838
|
"mn",
|
|
1870
1839
|
"bs",
|
|
1871
1840
|
"kk",
|
|
1872
|
-
"sq",
|
|
1873
1841
|
"sw",
|
|
1874
1842
|
"gl",
|
|
1875
1843
|
"mr",
|
|
1876
1844
|
"pa",
|
|
1877
|
-
"si",
|
|
1878
1845
|
"km",
|
|
1879
1846
|
"sn",
|
|
1880
1847
|
"yo",
|
|
@@ -1887,29 +1854,16 @@ var ElevenLabsLanguageCodes = [
|
|
|
1887
1854
|
"sd",
|
|
1888
1855
|
"gu",
|
|
1889
1856
|
"am",
|
|
1890
|
-
"yi",
|
|
1891
1857
|
"lo",
|
|
1892
1858
|
"uz",
|
|
1893
|
-
"fo",
|
|
1894
|
-
"ht",
|
|
1895
1859
|
"ps",
|
|
1896
|
-
"tk",
|
|
1897
|
-
"nn",
|
|
1898
1860
|
"mt",
|
|
1899
|
-
"sa",
|
|
1900
1861
|
"lb",
|
|
1901
1862
|
"my",
|
|
1902
|
-
"bo",
|
|
1903
|
-
"tl",
|
|
1904
|
-
"mg",
|
|
1905
1863
|
"as",
|
|
1906
|
-
"tt",
|
|
1907
|
-
"haw",
|
|
1908
1864
|
"ln",
|
|
1909
1865
|
"ha",
|
|
1910
|
-
"
|
|
1911
|
-
"jw",
|
|
1912
|
-
"su"
|
|
1866
|
+
"jw"
|
|
1913
1867
|
];
|
|
1914
1868
|
var ElevenLabsLanguageLabels = {
|
|
1915
1869
|
en: "English",
|
|
@@ -1947,8 +1901,6 @@ var ElevenLabsLanguageLabels = {
|
|
|
1947
1901
|
hr: "Croatian",
|
|
1948
1902
|
bg: "Bulgarian",
|
|
1949
1903
|
lt: "Lithuanian",
|
|
1950
|
-
la: "Latin",
|
|
1951
|
-
mi: "Maori",
|
|
1952
1904
|
ml: "Malayalam",
|
|
1953
1905
|
cy: "Welsh",
|
|
1954
1906
|
sk: "Slovak",
|
|
@@ -1962,20 +1914,16 @@ var ElevenLabsLanguageLabels = {
|
|
|
1962
1914
|
kn: "Kannada",
|
|
1963
1915
|
et: "Estonian",
|
|
1964
1916
|
mk: "Macedonian",
|
|
1965
|
-
br: "Breton",
|
|
1966
|
-
eu: "Basque",
|
|
1967
1917
|
is: "Icelandic",
|
|
1968
1918
|
hy: "Armenian",
|
|
1969
1919
|
ne: "Nepali",
|
|
1970
1920
|
mn: "Mongolian",
|
|
1971
1921
|
bs: "Bosnian",
|
|
1972
1922
|
kk: "Kazakh",
|
|
1973
|
-
sq: "Albanian",
|
|
1974
1923
|
sw: "Swahili",
|
|
1975
1924
|
gl: "Galician",
|
|
1976
1925
|
mr: "Marathi",
|
|
1977
1926
|
pa: "Punjabi",
|
|
1978
|
-
si: "Sinhala",
|
|
1979
1927
|
km: "Khmer",
|
|
1980
1928
|
sn: "Shona",
|
|
1981
1929
|
yo: "Yoruba",
|
|
@@ -1988,29 +1936,16 @@ var ElevenLabsLanguageLabels = {
|
|
|
1988
1936
|
sd: "Sindhi",
|
|
1989
1937
|
gu: "Gujarati",
|
|
1990
1938
|
am: "Amharic",
|
|
1991
|
-
yi: "Yiddish",
|
|
1992
1939
|
lo: "Lao",
|
|
1993
1940
|
uz: "Uzbek",
|
|
1994
|
-
fo: "Faroese",
|
|
1995
|
-
ht: "Haitian Creole",
|
|
1996
1941
|
ps: "Pashto",
|
|
1997
|
-
tk: "Turkmen",
|
|
1998
|
-
nn: "Norwegian Nynorsk",
|
|
1999
1942
|
mt: "Maltese",
|
|
2000
|
-
sa: "Sanskrit",
|
|
2001
1943
|
lb: "Luxembourgish",
|
|
2002
1944
|
my: "Burmese",
|
|
2003
|
-
bo: "Tibetan",
|
|
2004
|
-
tl: "Tagalog",
|
|
2005
|
-
mg: "Malagasy",
|
|
2006
1945
|
as: "Assamese",
|
|
2007
|
-
tt: "Tatar",
|
|
2008
|
-
haw: "Hawaiian",
|
|
2009
1946
|
ln: "Lingala",
|
|
2010
1947
|
ha: "Hausa",
|
|
2011
|
-
|
|
2012
|
-
jw: "Javanese",
|
|
2013
|
-
su: "Sundanese"
|
|
1948
|
+
jw: "Javanese"
|
|
2014
1949
|
};
|
|
2015
1950
|
|
|
2016
1951
|
// src/generated/gladia/schema/streamingSupportedBitDepthEnum.ts
|
|
@@ -2515,6 +2450,7 @@ var OpenAITranscriptionModel = {
|
|
|
2515
2450
|
"gpt-4o-mini-transcribe-2025-12-15": "gpt-4o-mini-transcribe-2025-12-15",
|
|
2516
2451
|
"gpt-4o-transcribe": "gpt-4o-transcribe",
|
|
2517
2452
|
"gpt-4o-transcribe-diarize": "gpt-4o-transcribe-diarize",
|
|
2453
|
+
"gpt-realtime-whisper": "gpt-realtime-whisper",
|
|
2518
2454
|
"whisper-1": "whisper-1"
|
|
2519
2455
|
};
|
|
2520
2456
|
var OpenAIRealtimeModel = {
|
|
@@ -2530,6 +2466,7 @@ var OpenAIRealtimeModel = {
|
|
|
2530
2466
|
"gpt-audio-mini-2025-12-15": "gpt-audio-mini-2025-12-15",
|
|
2531
2467
|
"gpt-realtime": "gpt-realtime",
|
|
2532
2468
|
"gpt-realtime-1.5": "gpt-realtime-1.5",
|
|
2469
|
+
"gpt-realtime-2": "gpt-realtime-2",
|
|
2533
2470
|
"gpt-realtime-2025-08-28": "gpt-realtime-2025-08-28",
|
|
2534
2471
|
"gpt-realtime-mini": "gpt-realtime-mini",
|
|
2535
2472
|
"gpt-realtime-mini-2025-10-06": "gpt-realtime-mini-2025-10-06",
|
|
@@ -5346,12 +5283,20 @@ var EntityType = {
|
|
|
5346
5283
|
email_address: "email_address",
|
|
5347
5284
|
event: "event",
|
|
5348
5285
|
filename: "filename",
|
|
5286
|
+
gender: "gender",
|
|
5349
5287
|
gender_sexuality: "gender_sexuality",
|
|
5350
5288
|
healthcare_number: "healthcare_number",
|
|
5351
5289
|
injury: "injury",
|
|
5352
5290
|
ip_address: "ip_address",
|
|
5353
5291
|
language: "language",
|
|
5354
5292
|
location: "location",
|
|
5293
|
+
location_address: "location_address",
|
|
5294
|
+
location_address_street: "location_address_street",
|
|
5295
|
+
location_city: "location_city",
|
|
5296
|
+
location_coordinate: "location_coordinate",
|
|
5297
|
+
location_country: "location_country",
|
|
5298
|
+
location_state: "location_state",
|
|
5299
|
+
location_zip: "location_zip",
|
|
5355
5300
|
marital_status: "marital_status",
|
|
5356
5301
|
medical_condition: "medical_condition",
|
|
5357
5302
|
medical_process: "medical_process",
|
|
@@ -5360,6 +5305,7 @@ var EntityType = {
|
|
|
5360
5305
|
number_sequence: "number_sequence",
|
|
5361
5306
|
occupation: "occupation",
|
|
5362
5307
|
organization: "organization",
|
|
5308
|
+
organization_medical_facility: "organization_medical_facility",
|
|
5363
5309
|
passport_number: "passport_number",
|
|
5364
5310
|
password: "password",
|
|
5365
5311
|
person_age: "person_age",
|
|
@@ -5368,6 +5314,7 @@ var EntityType = {
|
|
|
5368
5314
|
physical_attribute: "physical_attribute",
|
|
5369
5315
|
political_affiliation: "political_affiliation",
|
|
5370
5316
|
religion: "religion",
|
|
5317
|
+
sexuality: "sexuality",
|
|
5371
5318
|
statistics: "statistics",
|
|
5372
5319
|
time: "time",
|
|
5373
5320
|
url: "url",
|
|
@@ -5394,12 +5341,20 @@ var PiiPolicy = {
|
|
|
5394
5341
|
email_address: "email_address",
|
|
5395
5342
|
event: "event",
|
|
5396
5343
|
filename: "filename",
|
|
5344
|
+
gender: "gender",
|
|
5397
5345
|
gender_sexuality: "gender_sexuality",
|
|
5398
5346
|
healthcare_number: "healthcare_number",
|
|
5399
5347
|
injury: "injury",
|
|
5400
5348
|
ip_address: "ip_address",
|
|
5401
5349
|
language: "language",
|
|
5402
5350
|
location: "location",
|
|
5351
|
+
location_address: "location_address",
|
|
5352
|
+
location_address_street: "location_address_street",
|
|
5353
|
+
location_city: "location_city",
|
|
5354
|
+
location_coordinate: "location_coordinate",
|
|
5355
|
+
location_country: "location_country",
|
|
5356
|
+
location_state: "location_state",
|
|
5357
|
+
location_zip: "location_zip",
|
|
5403
5358
|
marital_status: "marital_status",
|
|
5404
5359
|
medical_condition: "medical_condition",
|
|
5405
5360
|
medical_process: "medical_process",
|
|
@@ -5408,6 +5363,7 @@ var PiiPolicy = {
|
|
|
5408
5363
|
number_sequence: "number_sequence",
|
|
5409
5364
|
occupation: "occupation",
|
|
5410
5365
|
organization: "organization",
|
|
5366
|
+
organization_medical_facility: "organization_medical_facility",
|
|
5411
5367
|
passport_number: "passport_number",
|
|
5412
5368
|
password: "password",
|
|
5413
5369
|
person_age: "person_age",
|
|
@@ -5416,6 +5372,7 @@ var PiiPolicy = {
|
|
|
5416
5372
|
physical_attribute: "physical_attribute",
|
|
5417
5373
|
political_affiliation: "political_affiliation",
|
|
5418
5374
|
religion: "religion",
|
|
5375
|
+
sexuality: "sexuality",
|
|
5419
5376
|
statistics: "statistics",
|
|
5420
5377
|
time: "time",
|
|
5421
5378
|
url: "url",
|
|
@@ -5484,7 +5441,8 @@ var TranscriptOptionalParamsRedactPiiAudioOptionsOverrideAudioRedactionMethod =
|
|
|
5484
5441
|
|
|
5485
5442
|
// src/generated/assemblyai/schema/transcriptOptionalParamsRemoveAudioTags.ts
|
|
5486
5443
|
var TranscriptOptionalParamsRemoveAudioTags = {
|
|
5487
|
-
all: "all"
|
|
5444
|
+
all: "all",
|
|
5445
|
+
speaker: "speaker"
|
|
5488
5446
|
};
|
|
5489
5447
|
|
|
5490
5448
|
// src/generated/assemblyai/schema/transcriptRedactPiiAudioOptionsOverrideAudioRedactionMethod.ts
|
|
@@ -5494,7 +5452,8 @@ var TranscriptRedactPiiAudioOptionsOverrideAudioRedactionMethod = {
|
|
|
5494
5452
|
|
|
5495
5453
|
// src/generated/assemblyai/schema/transcriptRemoveAudioTags.ts
|
|
5496
5454
|
var TranscriptRemoveAudioTags = {
|
|
5497
|
-
all: "all"
|
|
5455
|
+
all: "all",
|
|
5456
|
+
speaker: "speaker"
|
|
5498
5457
|
};
|
|
5499
5458
|
|
|
5500
5459
|
// src/generated/assemblyai/api/assemblyAIAPI.ts
|
|
@@ -8931,11 +8890,12 @@ var SpeechmaticsAdapter = class extends BaseAdapter {
|
|
|
8931
8890
|
requestBody = { config: JSON.stringify(jobConfig) };
|
|
8932
8891
|
headers = { "Content-Type": "application/json" };
|
|
8933
8892
|
} else if (audio.type === "file") {
|
|
8934
|
-
|
|
8935
|
-
|
|
8936
|
-
|
|
8937
|
-
|
|
8938
|
-
|
|
8893
|
+
const formData = new FormData();
|
|
8894
|
+
formData.append("config", JSON.stringify(jobConfig));
|
|
8895
|
+
const fileBlob = audio.file instanceof Blob ? audio.file : new Blob([audio.file]);
|
|
8896
|
+
formData.append("data_file", fileBlob, "audio");
|
|
8897
|
+
requestBody = formData;
|
|
8898
|
+
headers = {};
|
|
8939
8899
|
} else {
|
|
8940
8900
|
return {
|
|
8941
8901
|
success: false,
|
|
@@ -9386,15 +9346,18 @@ import axios9 from "axios";
|
|
|
9386
9346
|
// src/generated/soniox/schema/index.ts
|
|
9387
9347
|
var schema_exports4 = {};
|
|
9388
9348
|
__export(schema_exports4, {
|
|
9349
|
+
TTSVoiceGender: () => TTSVoiceGender,
|
|
9389
9350
|
TemporaryApiKeyUsageType: () => TemporaryApiKeyUsageType,
|
|
9390
9351
|
TranscriptionMode: () => TranscriptionMode,
|
|
9391
9352
|
TranscriptionStatus: () => TranscriptionStatus,
|
|
9392
|
-
TranslationConfigType: () => TranslationConfigType
|
|
9353
|
+
TranslationConfigType: () => TranslationConfigType,
|
|
9354
|
+
UsageLogsSort: () => UsageLogsSort
|
|
9393
9355
|
});
|
|
9394
9356
|
|
|
9395
9357
|
// src/generated/soniox/schema/temporaryApiKeyUsageType.ts
|
|
9396
9358
|
var TemporaryApiKeyUsageType = {
|
|
9397
|
-
transcribe_websocket: "transcribe_websocket"
|
|
9359
|
+
transcribe_websocket: "transcribe_websocket",
|
|
9360
|
+
tts_rt: "tts_rt"
|
|
9398
9361
|
};
|
|
9399
9362
|
|
|
9400
9363
|
// src/generated/soniox/schema/transcriptionMode.ts
|
|
@@ -9409,6 +9372,19 @@ var TranslationConfigType = {
|
|
|
9409
9372
|
two_way: "two_way"
|
|
9410
9373
|
};
|
|
9411
9374
|
|
|
9375
|
+
// src/generated/soniox/schema/tTSVoiceGender.ts
|
|
9376
|
+
var TTSVoiceGender = {
|
|
9377
|
+
male: "male",
|
|
9378
|
+
female: "female",
|
|
9379
|
+
neutral: "neutral"
|
|
9380
|
+
};
|
|
9381
|
+
|
|
9382
|
+
// src/generated/soniox/schema/usageLogsSort.ts
|
|
9383
|
+
var UsageLogsSort = {
|
|
9384
|
+
end_time_asc: "end_time_asc",
|
|
9385
|
+
end_time_desc: "end_time_desc"
|
|
9386
|
+
};
|
|
9387
|
+
|
|
9412
9388
|
// src/generated/soniox/api/sonioxPublicAPI.ts
|
|
9413
9389
|
var uploadFile = (uploadFileBody2, options) => {
|
|
9414
9390
|
const formData = new FormData();
|
|
@@ -10776,6 +10752,7 @@ __export(deepgramAPI_zod_exports, {
|
|
|
10776
10752
|
speakGenerateQueryMipOptOutDefault: () => speakGenerateQueryMipOptOutDefault,
|
|
10777
10753
|
speakGenerateQueryModelDefault: () => speakGenerateQueryModelDefault,
|
|
10778
10754
|
speakGenerateQueryParams: () => speakGenerateQueryParams,
|
|
10755
|
+
speakGenerateQuerySpeedDefault: () => speakGenerateQuerySpeedDefault,
|
|
10779
10756
|
speakGenerateResponse: () => speakGenerateResponse
|
|
10780
10757
|
});
|
|
10781
10758
|
import { z as zod } from "zod";
|
|
@@ -10830,6 +10807,9 @@ var listenTranscribeQueryParams = zod.object({
|
|
|
10830
10807
|
diarize: zod.boolean().optional().describe(
|
|
10831
10808
|
"Recognize speaker changes. Each word in the transcript will be assigned a speaker number starting at 0"
|
|
10832
10809
|
),
|
|
10810
|
+
diarize_model: zod.enum(["latest", "v1", "v2"]).optional().describe(
|
|
10811
|
+
"Select and enable a specific batch diarization model version. If specifying this parameter, you should not set the deprecated `diarize=true` parameter. Not accepted on streaming requests."
|
|
10812
|
+
),
|
|
10833
10813
|
dictation: zod.boolean().optional().describe("Dictation mode for controlling formatting with dictated speech"),
|
|
10834
10814
|
encoding: zod.enum(["linear16", "flac", "mulaw", "amr-nb", "amr-wb", "opus", "speex", "g729"]).optional().describe("Specify the expected encoding of your submitted audio"),
|
|
10835
10815
|
filler_words: zod.boolean().optional().describe('Filler Words can help transcribe interruptions in your audio, like "uh" and "um"'),
|
|
@@ -11095,6 +11075,7 @@ var listenTranscribeResponse = zod.object({
|
|
|
11095
11075
|
var speakGenerateQueryCallbackMethodDefault = "POST";
|
|
11096
11076
|
var speakGenerateQueryMipOptOutDefault = false;
|
|
11097
11077
|
var speakGenerateQueryModelDefault = "aura-asteria-en";
|
|
11078
|
+
var speakGenerateQuerySpeedDefault = 1;
|
|
11098
11079
|
var speakGenerateQueryParams = zod.object({
|
|
11099
11080
|
callback: zod.string().optional().describe("URL to which we'll make the callback request"),
|
|
11100
11081
|
callback_method: zod.enum(["POST", "PUT"]).default(speakGenerateQueryCallbackMethodDefault).describe("HTTP method by which the callback request will be made"),
|
|
@@ -11206,6 +11187,9 @@ var speakGenerateQueryParams = zod.object({
|
|
|
11206
11187
|
zod.enum(["22050"]).describe("Encoding - mp3. Sample rate is fixed and not configurable (22050 Hz).")
|
|
11207
11188
|
).or(zod.enum(["48000"]).describe("Encoding - opus. Sample rate is fixed at 48000 Hz.")).optional().describe(
|
|
11208
11189
|
"Sample Rate specifies the sample rate for the output audio. Based on the encoding, different sample rates are supported. For some encodings, the sample rate is not configurable"
|
|
11190
|
+
),
|
|
11191
|
+
speed: zod.number().default(speakGenerateQuerySpeedDefault).describe(
|
|
11192
|
+
"Speaking rate multiplier that adjusts the pace of generated speech while preserving natural prosody and voice quality. Not yet supported in all languages."
|
|
11209
11193
|
)
|
|
11210
11194
|
});
|
|
11211
11195
|
var speakGenerateHeader = zod.object({
|
|
@@ -11530,6 +11514,7 @@ __export(assemblyAIAPI_zod_exports, {
|
|
|
11530
11514
|
createTranscriptBodyRedactPiiAudioDefault: () => createTranscriptBodyRedactPiiAudioDefault,
|
|
11531
11515
|
createTranscriptBodyRedactPiiAudioOptionsReturnRedactedNoSpeechAudioDefault: () => createTranscriptBodyRedactPiiAudioOptionsReturnRedactedNoSpeechAudioDefault,
|
|
11532
11516
|
createTranscriptBodyRedactPiiDefault: () => createTranscriptBodyRedactPiiDefault,
|
|
11517
|
+
createTranscriptBodyRedactPiiReturnUnredactedDefault: () => createTranscriptBodyRedactPiiReturnUnredactedDefault,
|
|
11533
11518
|
createTranscriptBodySentimentAnalysisDefault: () => createTranscriptBodySentimentAnalysisDefault,
|
|
11534
11519
|
createTranscriptBodySpeakerLabelsDefault: () => createTranscriptBodySpeakerLabelsDefault,
|
|
11535
11520
|
createTranscriptBodySpeakerOptionsMinSpeakersExpectedDefault: () => createTranscriptBodySpeakerOptionsMinSpeakersExpectedDefault,
|
|
@@ -11600,6 +11585,7 @@ var createTranscriptBodyPunctuateDefault = true;
|
|
|
11600
11585
|
var createTranscriptBodyRedactPiiDefault = false;
|
|
11601
11586
|
var createTranscriptBodyRedactPiiAudioDefault = false;
|
|
11602
11587
|
var createTranscriptBodyRedactPiiAudioOptionsReturnRedactedNoSpeechAudioDefault = false;
|
|
11588
|
+
var createTranscriptBodyRedactPiiReturnUnredactedDefault = false;
|
|
11603
11589
|
var createTranscriptBodySentimentAnalysisDefault = false;
|
|
11604
11590
|
var createTranscriptBodySpeakerLabelsDefault = false;
|
|
11605
11591
|
var createTranscriptBodySpeakerOptionsMinSpeakersExpectedDefault = 1;
|
|
@@ -11638,7 +11624,7 @@ var createTranscriptBody = zod3.object({
|
|
|
11638
11624
|
"Customize how words are spelled and formatted using to and from values. See [Custom Spelling](https://www.assemblyai.com/docs/pre-recorded-audio/correct-spelling-of-terms) for more details."
|
|
11639
11625
|
),
|
|
11640
11626
|
disfluencies: zod3.boolean().optional().describe(
|
|
11641
|
-
'Transcribe [Filler Words](https://www.assemblyai.com/docs/pre-recorded-audio/include-filler-words), like "umm", in your media file; can be true or false'
|
|
11627
|
+
'Transcribe [Filler Words](https://www.assemblyai.com/docs/pre-recorded-audio/include-filler-words), like "umm", in your media file; can be true or false. Supported on Universal-3 Pro and Universal-2.'
|
|
11642
11628
|
),
|
|
11643
11629
|
domain: zod3.string().nullish().describe(
|
|
11644
11630
|
'Enable domain-specific transcription models to improve accuracy for specialized terminology. Set to `"medical-v1"` to enable [Medical Mode](https://www.assemblyai.com/docs/pre-recorded-audio/medical-mode) for improved accuracy of medical terms such as medications, procedures, conditions, and dosages.\n\nSupported languages: English (`en`), Spanish (`es`), German (`de`), French (`fr`). If used with an unsupported language, the parameter is ignored and a warning is returned.\n'
|
|
@@ -11945,12 +11931,20 @@ var createTranscriptBody = zod3.object({
|
|
|
11945
11931
|
"email_address",
|
|
11946
11932
|
"event",
|
|
11947
11933
|
"filename",
|
|
11934
|
+
"gender",
|
|
11948
11935
|
"gender_sexuality",
|
|
11949
11936
|
"healthcare_number",
|
|
11950
11937
|
"injury",
|
|
11951
11938
|
"ip_address",
|
|
11952
11939
|
"language",
|
|
11953
11940
|
"location",
|
|
11941
|
+
"location_address",
|
|
11942
|
+
"location_address_street",
|
|
11943
|
+
"location_city",
|
|
11944
|
+
"location_coordinate",
|
|
11945
|
+
"location_country",
|
|
11946
|
+
"location_state",
|
|
11947
|
+
"location_zip",
|
|
11954
11948
|
"marital_status",
|
|
11955
11949
|
"medical_condition",
|
|
11956
11950
|
"medical_process",
|
|
@@ -11959,6 +11953,7 @@ var createTranscriptBody = zod3.object({
|
|
|
11959
11953
|
"number_sequence",
|
|
11960
11954
|
"occupation",
|
|
11961
11955
|
"organization",
|
|
11956
|
+
"organization_medical_facility",
|
|
11962
11957
|
"passport_number",
|
|
11963
11958
|
"password",
|
|
11964
11959
|
"person_age",
|
|
@@ -11967,6 +11962,7 @@ var createTranscriptBody = zod3.object({
|
|
|
11967
11962
|
"physical_attribute",
|
|
11968
11963
|
"political_affiliation",
|
|
11969
11964
|
"religion",
|
|
11965
|
+
"sexuality",
|
|
11970
11966
|
"statistics",
|
|
11971
11967
|
"time",
|
|
11972
11968
|
"url",
|
|
@@ -11974,15 +11970,20 @@ var createTranscriptBody = zod3.object({
|
|
|
11974
11970
|
"username",
|
|
11975
11971
|
"vehicle_id",
|
|
11976
11972
|
"zodiac_sign"
|
|
11977
|
-
]).describe(
|
|
11973
|
+
]).describe(
|
|
11974
|
+
"The type of PII to redact. See [PII redaction](https://www.assemblyai.com/docs/streaming/pii-redaction) for the full list of policies and their descriptions."
|
|
11975
|
+
)
|
|
11978
11976
|
).optional().describe(
|
|
11979
11977
|
"The list of PII Redaction policies to enable. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more details."
|
|
11980
11978
|
),
|
|
11981
11979
|
redact_pii_sub: zod3.enum(["entity_name", "hash"]).describe(
|
|
11982
|
-
"The replacement logic for detected PII, can be `entity_name` or `hash`. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more details."
|
|
11980
|
+
"The replacement logic for detected PII, can be `entity_name` or `hash`. See [PII redaction](https://www.assemblyai.com/docs/streaming/pii-redaction) for more details."
|
|
11983
11981
|
).or(zod3.null()).optional().describe(
|
|
11984
11982
|
"The replacement logic for detected PII, can be `entity_type` or `hash`. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more details."
|
|
11985
11983
|
),
|
|
11984
|
+
redact_pii_return_unredacted: zod3.boolean().optional().describe(
|
|
11985
|
+
"When set to `true`, returns the original unredacted transcript alongside the redacted one in the same response. Requires `redact_pii` to be `true`, otherwise a 400 error is returned.\n\nWhen enabled, the response includes the additional fields `unredacted_text`, `unredacted_words`, and `unredacted_utterances`. The existing `text`, `words`, and `utterances` fields remain fully redacted. When disabled (default), the response is unchanged and contains only the redacted transcript. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more details.\n"
|
|
11986
|
+
),
|
|
11986
11987
|
sentiment_analysis: zod3.boolean().optional().describe(
|
|
11987
11988
|
"Enable [Sentiment Analysis](https://www.assemblyai.com/docs/speech-understanding/analyze-sentiment-of-speech), can be true or false"
|
|
11988
11989
|
),
|
|
@@ -12080,10 +12081,10 @@ var createTranscriptBody = zod3.object({
|
|
|
12080
12081
|
),
|
|
12081
12082
|
summary_model: zod3.enum(["informative", "conversational", "catchy"]).optional().describe("The model to summarize the transcript"),
|
|
12082
12083
|
summary_type: zod3.enum(["bullets", "bullets_verbose", "gist", "headline", "paragraph"]).optional().describe("The type of summary"),
|
|
12083
|
-
remove_audio_tags: zod3.enum(["all"]).describe(
|
|
12084
|
-
'
|
|
12084
|
+
remove_audio_tags: zod3.enum(["all", "speaker"]).describe(
|
|
12085
|
+
'Universal-3 Pro generates rich transcripts that can include inline annotations such as audio event markers and speaker cues. Set to `"all"` to remove all inline annotations, or `"speaker"` to remove only speaker cues while keeping other annotations.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n'
|
|
12085
12086
|
).or(zod3.null()).optional().describe(
|
|
12086
|
-
'
|
|
12087
|
+
'Universal-3 Pro generates rich transcripts that can include inline annotations such as audio event markers and speaker cues. Set to `"all"` to remove all inline annotations, or `"speaker"` to remove only speaker cues while keeping other annotations.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n'
|
|
12087
12088
|
),
|
|
12088
12089
|
temperature: zod3.number().optional().describe(
|
|
12089
12090
|
"Control the amount of randomness injected into the model's response. See the [Prompting Guide](https://www.assemblyai.com/docs/pre-recorded-audio/prompting) for more details.\n\nNote: This parameter can only be used with the Universal-3 Pro model.\n"
|
|
@@ -12217,7 +12218,7 @@ var createTranscriptResponse = zod3.object({
|
|
|
12217
12218
|
"Customize how words are spelled and formatted using to and from values. See [Custom Spelling](https://www.assemblyai.com/docs/pre-recorded-audio/correct-spelling-of-terms) for more details."
|
|
12218
12219
|
),
|
|
12219
12220
|
disfluencies: zod3.boolean().nullish().describe(
|
|
12220
|
-
'Transcribe [Filler Words](https://www.assemblyai.com/docs/pre-recorded-audio/include-filler-words), like "umm", in your media file; can be true or false'
|
|
12221
|
+
'Transcribe [Filler Words](https://www.assemblyai.com/docs/pre-recorded-audio/include-filler-words), like "umm", in your media file; can be true or false. Supported on Universal-3 Pro and Universal-2.'
|
|
12221
12222
|
),
|
|
12222
12223
|
domain: zod3.string().nullish().describe(
|
|
12223
12224
|
'The domain-specific model applied to the transcript. When set to `"medical-v1"`, [Medical Mode](https://www.assemblyai.com/docs/pre-recorded-audio/medical-mode) was used to improve accuracy for medical terminology.\n'
|
|
@@ -12240,12 +12241,20 @@ var createTranscriptResponse = zod3.object({
|
|
|
12240
12241
|
"email_address",
|
|
12241
12242
|
"event",
|
|
12242
12243
|
"filename",
|
|
12244
|
+
"gender",
|
|
12243
12245
|
"gender_sexuality",
|
|
12244
12246
|
"healthcare_number",
|
|
12245
12247
|
"injury",
|
|
12246
12248
|
"ip_address",
|
|
12247
12249
|
"language",
|
|
12248
12250
|
"location",
|
|
12251
|
+
"location_address",
|
|
12252
|
+
"location_address_street",
|
|
12253
|
+
"location_city",
|
|
12254
|
+
"location_coordinate",
|
|
12255
|
+
"location_country",
|
|
12256
|
+
"location_state",
|
|
12257
|
+
"location_zip",
|
|
12249
12258
|
"marital_status",
|
|
12250
12259
|
"medical_condition",
|
|
12251
12260
|
"medical_process",
|
|
@@ -12254,6 +12263,7 @@ var createTranscriptResponse = zod3.object({
|
|
|
12254
12263
|
"number_sequence",
|
|
12255
12264
|
"occupation",
|
|
12256
12265
|
"organization",
|
|
12266
|
+
"organization_medical_facility",
|
|
12257
12267
|
"passport_number",
|
|
12258
12268
|
"password",
|
|
12259
12269
|
"person_age",
|
|
@@ -12262,6 +12272,7 @@ var createTranscriptResponse = zod3.object({
|
|
|
12262
12272
|
"physical_attribute",
|
|
12263
12273
|
"political_affiliation",
|
|
12264
12274
|
"religion",
|
|
12275
|
+
"sexuality",
|
|
12265
12276
|
"statistics",
|
|
12266
12277
|
"time",
|
|
12267
12278
|
"url",
|
|
@@ -12566,6 +12577,24 @@ var createTranscriptResponse = zod3.object({
|
|
|
12566
12577
|
}).optional().describe(
|
|
12567
12578
|
"Specify options for [Automatic Language Detection](https://www.assemblyai.com/docs/pre-recorded-audio/language-detection)."
|
|
12568
12579
|
),
|
|
12580
|
+
metadata: zod3.object({
|
|
12581
|
+
domain_used: zod3.string().nullish().describe(
|
|
12582
|
+
'The domain-specific model that was applied to the transcription (for example, `"medical-v1"` when [Medical Mode](https://www.assemblyai.com/docs/pre-recorded-audio/medical-mode) was applied), or `null` if no domain-specific model was used. Always present when `metadata` is present.\n'
|
|
12583
|
+
),
|
|
12584
|
+
warnings: zod3.array(
|
|
12585
|
+
zod3.object({
|
|
12586
|
+
message: zod3.string().describe("A human-readable description of the warning.")
|
|
12587
|
+
}).describe(
|
|
12588
|
+
"A warning message emitted while processing a transcription request. Warnings are surfaced on the transcript response under `metadata.warnings`.\n"
|
|
12589
|
+
)
|
|
12590
|
+
).optional().describe(
|
|
12591
|
+
"Warning messages emitted while processing the request. Each warning is an object with a human-readable `message`. When there are no warnings to report, this field is omitted from the `metadata` object entirely.\n"
|
|
12592
|
+
)
|
|
12593
|
+
}).describe(
|
|
12594
|
+
"Additional metadata about the transcription returned on the `Transcript` object under `metadata`. Only present when there is information to report \u2014 when all of its fields would be empty, the `metadata` object is omitted from the response entirely.\n"
|
|
12595
|
+
).or(zod3.null()).optional().describe(
|
|
12596
|
+
"Additional metadata about the transcription, including any warnings emitted while processing the request. Only present when there is information to report; if no fields would be populated, `metadata` is omitted from the response entirely.\n"
|
|
12597
|
+
),
|
|
12569
12598
|
multichannel: zod3.boolean().nullish().describe(
|
|
12570
12599
|
"Whether [Multichannel transcription](https://www.assemblyai.com/docs/pre-recorded-audio/transcribe-multiple-audio-channels) was enabled in the transcription request, either true or false"
|
|
12571
12600
|
),
|
|
@@ -12613,12 +12642,20 @@ var createTranscriptResponse = zod3.object({
|
|
|
12613
12642
|
"email_address",
|
|
12614
12643
|
"event",
|
|
12615
12644
|
"filename",
|
|
12645
|
+
"gender",
|
|
12616
12646
|
"gender_sexuality",
|
|
12617
12647
|
"healthcare_number",
|
|
12618
12648
|
"injury",
|
|
12619
12649
|
"ip_address",
|
|
12620
12650
|
"language",
|
|
12621
12651
|
"location",
|
|
12652
|
+
"location_address",
|
|
12653
|
+
"location_address_street",
|
|
12654
|
+
"location_city",
|
|
12655
|
+
"location_coordinate",
|
|
12656
|
+
"location_country",
|
|
12657
|
+
"location_state",
|
|
12658
|
+
"location_zip",
|
|
12622
12659
|
"marital_status",
|
|
12623
12660
|
"medical_condition",
|
|
12624
12661
|
"medical_process",
|
|
@@ -12627,6 +12664,7 @@ var createTranscriptResponse = zod3.object({
|
|
|
12627
12664
|
"number_sequence",
|
|
12628
12665
|
"occupation",
|
|
12629
12666
|
"organization",
|
|
12667
|
+
"organization_medical_facility",
|
|
12630
12668
|
"passport_number",
|
|
12631
12669
|
"password",
|
|
12632
12670
|
"person_age",
|
|
@@ -12635,6 +12673,7 @@ var createTranscriptResponse = zod3.object({
|
|
|
12635
12673
|
"physical_attribute",
|
|
12636
12674
|
"political_affiliation",
|
|
12637
12675
|
"religion",
|
|
12676
|
+
"sexuality",
|
|
12638
12677
|
"statistics",
|
|
12639
12678
|
"time",
|
|
12640
12679
|
"url",
|
|
@@ -12642,12 +12681,17 @@ var createTranscriptResponse = zod3.object({
|
|
|
12642
12681
|
"username",
|
|
12643
12682
|
"vehicle_id",
|
|
12644
12683
|
"zodiac_sign"
|
|
12645
|
-
]).describe(
|
|
12684
|
+
]).describe(
|
|
12685
|
+
"The type of PII to redact. See [PII redaction](https://www.assemblyai.com/docs/streaming/pii-redaction) for the full list of policies and their descriptions."
|
|
12686
|
+
)
|
|
12646
12687
|
).nullish().describe(
|
|
12647
12688
|
"The list of PII Redaction policies that were enabled, if PII Redaction is enabled.\nSee [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
|
|
12648
12689
|
),
|
|
12649
12690
|
redact_pii_sub: zod3.enum(["entity_name", "hash"]).optional().describe(
|
|
12650
|
-
"The replacement logic for detected PII, can be `entity_name` or `hash`. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more details."
|
|
12691
|
+
"The replacement logic for detected PII, can be `entity_name` or `hash`. See [PII redaction](https://www.assemblyai.com/docs/streaming/pii-redaction) for more details."
|
|
12692
|
+
),
|
|
12693
|
+
redact_pii_return_unredacted: zod3.boolean().nullish().describe(
|
|
12694
|
+
"Whether the original unredacted transcript was also returned alongside the redacted one. When `true`, the response includes `unredacted_text`, `unredacted_words`, and `unredacted_utterances`. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
|
|
12651
12695
|
),
|
|
12652
12696
|
sentiment_analysis: zod3.boolean().nullish().describe(
|
|
12653
12697
|
"Whether [Sentiment Analysis](https://www.assemblyai.com/docs/speech-understanding/analyze-sentiment-of-speech) is enabled, can be true or false"
|
|
@@ -12784,20 +12828,23 @@ var createTranscriptResponse = zod3.object({
|
|
|
12784
12828
|
"The generated summary of the media file, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details."
|
|
12785
12829
|
),
|
|
12786
12830
|
summary_model: zod3.string().nullish().describe(
|
|
12787
|
-
"The Summarization model used to generate the summary,\nif [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts
|
|
12831
|
+
"The Summarization model used to generate the summary,\nif [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details.\n"
|
|
12788
12832
|
),
|
|
12789
12833
|
summary_type: zod3.string().nullish().describe(
|
|
12790
|
-
"The type of summary generated, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts
|
|
12834
|
+
"The type of summary generated, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details."
|
|
12791
12835
|
),
|
|
12792
|
-
remove_audio_tags: zod3.enum(["all"]).describe(
|
|
12793
|
-
|
|
12836
|
+
remove_audio_tags: zod3.enum(["all", "speaker"]).describe(
|
|
12837
|
+
'Universal-3 Pro generates rich transcripts that can include inline annotations such as audio event markers and speaker cues. Set to `"all"` to remove all inline annotations, or `"speaker"` to remove only speaker cues while keeping other annotations.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n'
|
|
12794
12838
|
).or(zod3.null()).optional().describe(
|
|
12795
|
-
|
|
12839
|
+
'Universal-3 Pro generates rich transcripts that can include inline annotations such as audio event markers and speaker cues. Set to `"all"` to remove all inline annotations, or `"speaker"` to remove only speaker cues while keeping other annotations.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n'
|
|
12796
12840
|
),
|
|
12797
12841
|
temperature: zod3.number().nullish().describe(
|
|
12798
12842
|
"The temperature that was used for the model's response. See the [Prompting Guide](https://www.assemblyai.com/docs/pre-recorded-audio/prompting) for more details.\n\nNote: This parameter can only be used with the Universal-3 Pro model.\n"
|
|
12799
12843
|
),
|
|
12800
12844
|
text: zod3.string().nullish().describe("The textual transcript of your media file"),
|
|
12845
|
+
unredacted_text: zod3.string().nullish().describe(
|
|
12846
|
+
"The original textual transcript of your media file before PII redaction was applied. Only returned when `redact_pii_return_unredacted` was set to `true` on the transcription request, otherwise this field is omitted and the `text` field remains fully redacted. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
|
|
12847
|
+
),
|
|
12801
12848
|
throttled: zod3.boolean().nullish().describe(
|
|
12802
12849
|
"True while a request is throttled and false when a request is no longer throttled"
|
|
12803
12850
|
),
|
|
@@ -12834,6 +12881,39 @@ var createTranscriptResponse = zod3.object({
|
|
|
12834
12881
|
).nullish().describe(
|
|
12835
12882
|
"When multichannel or speaker_labels is enabled, a list of turn-by-turn utterance objects.\nSee [Speaker diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) and [Multichannel transcription](https://www.assemblyai.com/docs/pre-recorded-audio/transcribe-multiple-audio-channels) for more information.\n"
|
|
12836
12883
|
),
|
|
12884
|
+
unredacted_utterances: zod3.array(
|
|
12885
|
+
zod3.object({
|
|
12886
|
+
confidence: zod3.number().describe("The confidence score for the transcript of this utterance"),
|
|
12887
|
+
start: zod3.number().describe("The starting time, in milliseconds, of the utterance in the audio file"),
|
|
12888
|
+
end: zod3.number().describe("The ending time, in milliseconds, of the utterance in the audio file"),
|
|
12889
|
+
text: zod3.string().describe("The text for this utterance"),
|
|
12890
|
+
words: zod3.array(
|
|
12891
|
+
zod3.object({
|
|
12892
|
+
confidence: zod3.number().describe("The confidence score for the transcript of this word"),
|
|
12893
|
+
start: zod3.number().describe("The starting time, in milliseconds, for the word"),
|
|
12894
|
+
end: zod3.number().describe("The ending time, in milliseconds, for the word"),
|
|
12895
|
+
text: zod3.string().describe("The text of the word"),
|
|
12896
|
+
channel: zod3.string().nullish().describe(
|
|
12897
|
+
"The channel of the word. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
|
|
12898
|
+
),
|
|
12899
|
+
speaker: zod3.string().nullable().describe(
|
|
12900
|
+
"The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) is enabled, else null"
|
|
12901
|
+
)
|
|
12902
|
+
})
|
|
12903
|
+
).describe("The words in the utterance."),
|
|
12904
|
+
channel: zod3.string().nullish().describe(
|
|
12905
|
+
"The channel of this utterance. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
|
|
12906
|
+
),
|
|
12907
|
+
speaker: zod3.string().describe(
|
|
12908
|
+
'The speaker of this utterance, where each speaker is assigned a sequential capital letter - e.g. "A" for Speaker A, "B" for Speaker B, etc.'
|
|
12909
|
+
),
|
|
12910
|
+
translated_texts: zod3.record(zod3.string(), zod3.string()).optional().describe(
|
|
12911
|
+
'Translations keyed by language code (e.g., `{"es": "Texto traducido", "de": "\xDCbersetzter Text"}`). Only present when `match_original_utterance` is enabled with translation.'
|
|
12912
|
+
)
|
|
12913
|
+
})
|
|
12914
|
+
).nullish().describe(
|
|
12915
|
+
"The original turn-by-turn utterance objects before PII redaction was applied. Same shape as `utterances`. Only returned when `redact_pii_return_unredacted` was set to `true` on the transcription request, otherwise this field is omitted and the `utterances` field remains fully redacted. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
|
|
12916
|
+
),
|
|
12837
12917
|
webhook_auth: zod3.boolean().describe(
|
|
12838
12918
|
"Whether [webhook](https://www.assemblyai.com/docs/deployment/webhooks-for-pre-recorded-audio) authentication details were provided"
|
|
12839
12919
|
),
|
|
@@ -12862,6 +12942,22 @@ var createTranscriptResponse = zod3.object({
|
|
|
12862
12942
|
).nullish().describe(
|
|
12863
12943
|
"An array of temporally-sequential word objects, one for each word in the transcript.\n"
|
|
12864
12944
|
),
|
|
12945
|
+
unredacted_words: zod3.array(
|
|
12946
|
+
zod3.object({
|
|
12947
|
+
confidence: zod3.number().describe("The confidence score for the transcript of this word"),
|
|
12948
|
+
start: zod3.number().describe("The starting time, in milliseconds, for the word"),
|
|
12949
|
+
end: zod3.number().describe("The ending time, in milliseconds, for the word"),
|
|
12950
|
+
text: zod3.string().describe("The text of the word"),
|
|
12951
|
+
channel: zod3.string().nullish().describe(
|
|
12952
|
+
"The channel of the word. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
|
|
12953
|
+
),
|
|
12954
|
+
speaker: zod3.string().nullable().describe(
|
|
12955
|
+
"The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) is enabled, else null"
|
|
12956
|
+
)
|
|
12957
|
+
})
|
|
12958
|
+
).nullish().describe(
|
|
12959
|
+
"The original temporally-sequential word objects before PII redaction was applied. Same shape as `words`. Only returned when `redact_pii_return_unredacted` was set to `true` on the transcription request, otherwise this field is omitted and the `words` field remains fully redacted. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
|
|
12960
|
+
),
|
|
12865
12961
|
acoustic_model: zod3.string().describe("This parameter does not currently have any functionality attached to it."),
|
|
12866
12962
|
custom_topics: zod3.boolean().nullish().describe("This parameter does not currently have any functionality attached to it."),
|
|
12867
12963
|
language_model: zod3.string().describe("This parameter does not currently have any functionality attached to it."),
|
|
@@ -13037,7 +13133,7 @@ var getTranscriptResponse = zod3.object({
|
|
|
13037
13133
|
"Customize how words are spelled and formatted using to and from values. See [Custom Spelling](https://www.assemblyai.com/docs/pre-recorded-audio/correct-spelling-of-terms) for more details."
|
|
13038
13134
|
),
|
|
13039
13135
|
disfluencies: zod3.boolean().nullish().describe(
|
|
13040
|
-
'Transcribe [Filler Words](https://www.assemblyai.com/docs/pre-recorded-audio/include-filler-words), like "umm", in your media file; can be true or false'
|
|
13136
|
+
'Transcribe [Filler Words](https://www.assemblyai.com/docs/pre-recorded-audio/include-filler-words), like "umm", in your media file; can be true or false. Supported on Universal-3 Pro and Universal-2.'
|
|
13041
13137
|
),
|
|
13042
13138
|
domain: zod3.string().nullish().describe(
|
|
13043
13139
|
'The domain-specific model applied to the transcript. When set to `"medical-v1"`, [Medical Mode](https://www.assemblyai.com/docs/pre-recorded-audio/medical-mode) was used to improve accuracy for medical terminology.\n'
|
|
@@ -13060,12 +13156,20 @@ var getTranscriptResponse = zod3.object({
|
|
|
13060
13156
|
"email_address",
|
|
13061
13157
|
"event",
|
|
13062
13158
|
"filename",
|
|
13159
|
+
"gender",
|
|
13063
13160
|
"gender_sexuality",
|
|
13064
13161
|
"healthcare_number",
|
|
13065
13162
|
"injury",
|
|
13066
13163
|
"ip_address",
|
|
13067
13164
|
"language",
|
|
13068
13165
|
"location",
|
|
13166
|
+
"location_address",
|
|
13167
|
+
"location_address_street",
|
|
13168
|
+
"location_city",
|
|
13169
|
+
"location_coordinate",
|
|
13170
|
+
"location_country",
|
|
13171
|
+
"location_state",
|
|
13172
|
+
"location_zip",
|
|
13069
13173
|
"marital_status",
|
|
13070
13174
|
"medical_condition",
|
|
13071
13175
|
"medical_process",
|
|
@@ -13074,6 +13178,7 @@ var getTranscriptResponse = zod3.object({
|
|
|
13074
13178
|
"number_sequence",
|
|
13075
13179
|
"occupation",
|
|
13076
13180
|
"organization",
|
|
13181
|
+
"organization_medical_facility",
|
|
13077
13182
|
"passport_number",
|
|
13078
13183
|
"password",
|
|
13079
13184
|
"person_age",
|
|
@@ -13082,6 +13187,7 @@ var getTranscriptResponse = zod3.object({
|
|
|
13082
13187
|
"physical_attribute",
|
|
13083
13188
|
"political_affiliation",
|
|
13084
13189
|
"religion",
|
|
13190
|
+
"sexuality",
|
|
13085
13191
|
"statistics",
|
|
13086
13192
|
"time",
|
|
13087
13193
|
"url",
|
|
@@ -13386,6 +13492,24 @@ var getTranscriptResponse = zod3.object({
|
|
|
13386
13492
|
}).optional().describe(
|
|
13387
13493
|
"Specify options for [Automatic Language Detection](https://www.assemblyai.com/docs/pre-recorded-audio/language-detection)."
|
|
13388
13494
|
),
|
|
13495
|
+
metadata: zod3.object({
|
|
13496
|
+
domain_used: zod3.string().nullish().describe(
|
|
13497
|
+
'The domain-specific model that was applied to the transcription (for example, `"medical-v1"` when [Medical Mode](https://www.assemblyai.com/docs/pre-recorded-audio/medical-mode) was applied), or `null` if no domain-specific model was used. Always present when `metadata` is present.\n'
|
|
13498
|
+
),
|
|
13499
|
+
warnings: zod3.array(
|
|
13500
|
+
zod3.object({
|
|
13501
|
+
message: zod3.string().describe("A human-readable description of the warning.")
|
|
13502
|
+
}).describe(
|
|
13503
|
+
"A warning message emitted while processing a transcription request. Warnings are surfaced on the transcript response under `metadata.warnings`.\n"
|
|
13504
|
+
)
|
|
13505
|
+
).optional().describe(
|
|
13506
|
+
"Warning messages emitted while processing the request. Each warning is an object with a human-readable `message`. When there are no warnings to report, this field is omitted from the `metadata` object entirely.\n"
|
|
13507
|
+
)
|
|
13508
|
+
}).describe(
|
|
13509
|
+
"Additional metadata about the transcription returned on the `Transcript` object under `metadata`. Only present when there is information to report \u2014 when all of its fields would be empty, the `metadata` object is omitted from the response entirely.\n"
|
|
13510
|
+
).or(zod3.null()).optional().describe(
|
|
13511
|
+
"Additional metadata about the transcription, including any warnings emitted while processing the request. Only present when there is information to report; if no fields would be populated, `metadata` is omitted from the response entirely.\n"
|
|
13512
|
+
),
|
|
13389
13513
|
multichannel: zod3.boolean().nullish().describe(
|
|
13390
13514
|
"Whether [Multichannel transcription](https://www.assemblyai.com/docs/pre-recorded-audio/transcribe-multiple-audio-channels) was enabled in the transcription request, either true or false"
|
|
13391
13515
|
),
|
|
@@ -13433,12 +13557,20 @@ var getTranscriptResponse = zod3.object({
|
|
|
13433
13557
|
"email_address",
|
|
13434
13558
|
"event",
|
|
13435
13559
|
"filename",
|
|
13560
|
+
"gender",
|
|
13436
13561
|
"gender_sexuality",
|
|
13437
13562
|
"healthcare_number",
|
|
13438
13563
|
"injury",
|
|
13439
13564
|
"ip_address",
|
|
13440
13565
|
"language",
|
|
13441
13566
|
"location",
|
|
13567
|
+
"location_address",
|
|
13568
|
+
"location_address_street",
|
|
13569
|
+
"location_city",
|
|
13570
|
+
"location_coordinate",
|
|
13571
|
+
"location_country",
|
|
13572
|
+
"location_state",
|
|
13573
|
+
"location_zip",
|
|
13442
13574
|
"marital_status",
|
|
13443
13575
|
"medical_condition",
|
|
13444
13576
|
"medical_process",
|
|
@@ -13447,6 +13579,7 @@ var getTranscriptResponse = zod3.object({
|
|
|
13447
13579
|
"number_sequence",
|
|
13448
13580
|
"occupation",
|
|
13449
13581
|
"organization",
|
|
13582
|
+
"organization_medical_facility",
|
|
13450
13583
|
"passport_number",
|
|
13451
13584
|
"password",
|
|
13452
13585
|
"person_age",
|
|
@@ -13455,6 +13588,7 @@ var getTranscriptResponse = zod3.object({
|
|
|
13455
13588
|
"physical_attribute",
|
|
13456
13589
|
"political_affiliation",
|
|
13457
13590
|
"religion",
|
|
13591
|
+
"sexuality",
|
|
13458
13592
|
"statistics",
|
|
13459
13593
|
"time",
|
|
13460
13594
|
"url",
|
|
@@ -13462,12 +13596,17 @@ var getTranscriptResponse = zod3.object({
|
|
|
13462
13596
|
"username",
|
|
13463
13597
|
"vehicle_id",
|
|
13464
13598
|
"zodiac_sign"
|
|
13465
|
-
]).describe(
|
|
13599
|
+
]).describe(
|
|
13600
|
+
"The type of PII to redact. See [PII redaction](https://www.assemblyai.com/docs/streaming/pii-redaction) for the full list of policies and their descriptions."
|
|
13601
|
+
)
|
|
13466
13602
|
).nullish().describe(
|
|
13467
13603
|
"The list of PII Redaction policies that were enabled, if PII Redaction is enabled.\nSee [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
|
|
13468
13604
|
),
|
|
13469
13605
|
redact_pii_sub: zod3.enum(["entity_name", "hash"]).optional().describe(
|
|
13470
|
-
"The replacement logic for detected PII, can be `entity_name` or `hash`. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more details."
|
|
13606
|
+
"The replacement logic for detected PII, can be `entity_name` or `hash`. See [PII redaction](https://www.assemblyai.com/docs/streaming/pii-redaction) for more details."
|
|
13607
|
+
),
|
|
13608
|
+
redact_pii_return_unredacted: zod3.boolean().nullish().describe(
|
|
13609
|
+
"Whether the original unredacted transcript was also returned alongside the redacted one. When `true`, the response includes `unredacted_text`, `unredacted_words`, and `unredacted_utterances`. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
|
|
13471
13610
|
),
|
|
13472
13611
|
sentiment_analysis: zod3.boolean().nullish().describe(
|
|
13473
13612
|
"Whether [Sentiment Analysis](https://www.assemblyai.com/docs/speech-understanding/analyze-sentiment-of-speech) is enabled, can be true or false"
|
|
@@ -13604,20 +13743,23 @@ var getTranscriptResponse = zod3.object({
|
|
|
13604
13743
|
"The generated summary of the media file, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details."
|
|
13605
13744
|
),
|
|
13606
13745
|
summary_model: zod3.string().nullish().describe(
|
|
13607
|
-
"The Summarization model used to generate the summary,\nif [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts
|
|
13746
|
+
"The Summarization model used to generate the summary,\nif [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details.\n"
|
|
13608
13747
|
),
|
|
13609
13748
|
summary_type: zod3.string().nullish().describe(
|
|
13610
|
-
"The type of summary generated, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts
|
|
13749
|
+
"The type of summary generated, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details."
|
|
13611
13750
|
),
|
|
13612
|
-
remove_audio_tags: zod3.enum(["all"]).describe(
|
|
13613
|
-
|
|
13751
|
+
remove_audio_tags: zod3.enum(["all", "speaker"]).describe(
|
|
13752
|
+
'Universal-3 Pro generates rich transcripts that can include inline annotations such as audio event markers and speaker cues. Set to `"all"` to remove all inline annotations, or `"speaker"` to remove only speaker cues while keeping other annotations.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n'
|
|
13614
13753
|
).or(zod3.null()).optional().describe(
|
|
13615
|
-
|
|
13754
|
+
'Universal-3 Pro generates rich transcripts that can include inline annotations such as audio event markers and speaker cues. Set to `"all"` to remove all inline annotations, or `"speaker"` to remove only speaker cues while keeping other annotations.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n'
|
|
13616
13755
|
),
|
|
13617
13756
|
temperature: zod3.number().nullish().describe(
|
|
13618
13757
|
"The temperature that was used for the model's response. See the [Prompting Guide](https://www.assemblyai.com/docs/pre-recorded-audio/prompting) for more details.\n\nNote: This parameter can only be used with the Universal-3 Pro model.\n"
|
|
13619
13758
|
),
|
|
13620
13759
|
text: zod3.string().nullish().describe("The textual transcript of your media file"),
|
|
13760
|
+
unredacted_text: zod3.string().nullish().describe(
|
|
13761
|
+
"The original textual transcript of your media file before PII redaction was applied. Only returned when `redact_pii_return_unredacted` was set to `true` on the transcription request, otherwise this field is omitted and the `text` field remains fully redacted. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
|
|
13762
|
+
),
|
|
13621
13763
|
throttled: zod3.boolean().nullish().describe(
|
|
13622
13764
|
"True while a request is throttled and false when a request is no longer throttled"
|
|
13623
13765
|
),
|
|
@@ -13654,6 +13796,39 @@ var getTranscriptResponse = zod3.object({
|
|
|
13654
13796
|
).nullish().describe(
|
|
13655
13797
|
"When multichannel or speaker_labels is enabled, a list of turn-by-turn utterance objects.\nSee [Speaker diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) and [Multichannel transcription](https://www.assemblyai.com/docs/pre-recorded-audio/transcribe-multiple-audio-channels) for more information.\n"
|
|
13656
13798
|
),
|
|
13799
|
+
unredacted_utterances: zod3.array(
|
|
13800
|
+
zod3.object({
|
|
13801
|
+
confidence: zod3.number().describe("The confidence score for the transcript of this utterance"),
|
|
13802
|
+
start: zod3.number().describe("The starting time, in milliseconds, of the utterance in the audio file"),
|
|
13803
|
+
end: zod3.number().describe("The ending time, in milliseconds, of the utterance in the audio file"),
|
|
13804
|
+
text: zod3.string().describe("The text for this utterance"),
|
|
13805
|
+
words: zod3.array(
|
|
13806
|
+
zod3.object({
|
|
13807
|
+
confidence: zod3.number().describe("The confidence score for the transcript of this word"),
|
|
13808
|
+
start: zod3.number().describe("The starting time, in milliseconds, for the word"),
|
|
13809
|
+
end: zod3.number().describe("The ending time, in milliseconds, for the word"),
|
|
13810
|
+
text: zod3.string().describe("The text of the word"),
|
|
13811
|
+
channel: zod3.string().nullish().describe(
|
|
13812
|
+
"The channel of the word. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
|
|
13813
|
+
),
|
|
13814
|
+
speaker: zod3.string().nullable().describe(
|
|
13815
|
+
"The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) is enabled, else null"
|
|
13816
|
+
)
|
|
13817
|
+
})
|
|
13818
|
+
).describe("The words in the utterance."),
|
|
13819
|
+
channel: zod3.string().nullish().describe(
|
|
13820
|
+
"The channel of this utterance. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
|
|
13821
|
+
),
|
|
13822
|
+
speaker: zod3.string().describe(
|
|
13823
|
+
'The speaker of this utterance, where each speaker is assigned a sequential capital letter - e.g. "A" for Speaker A, "B" for Speaker B, etc.'
|
|
13824
|
+
),
|
|
13825
|
+
translated_texts: zod3.record(zod3.string(), zod3.string()).optional().describe(
|
|
13826
|
+
'Translations keyed by language code (e.g., `{"es": "Texto traducido", "de": "\xDCbersetzter Text"}`). Only present when `match_original_utterance` is enabled with translation.'
|
|
13827
|
+
)
|
|
13828
|
+
})
|
|
13829
|
+
).nullish().describe(
|
|
13830
|
+
"The original turn-by-turn utterance objects before PII redaction was applied. Same shape as `utterances`. Only returned when `redact_pii_return_unredacted` was set to `true` on the transcription request, otherwise this field is omitted and the `utterances` field remains fully redacted. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
|
|
13831
|
+
),
|
|
13657
13832
|
webhook_auth: zod3.boolean().describe(
|
|
13658
13833
|
"Whether [webhook](https://www.assemblyai.com/docs/deployment/webhooks-for-pre-recorded-audio) authentication details were provided"
|
|
13659
13834
|
),
|
|
@@ -13682,6 +13857,22 @@ var getTranscriptResponse = zod3.object({
|
|
|
13682
13857
|
).nullish().describe(
|
|
13683
13858
|
"An array of temporally-sequential word objects, one for each word in the transcript.\n"
|
|
13684
13859
|
),
|
|
13860
|
+
unredacted_words: zod3.array(
|
|
13861
|
+
zod3.object({
|
|
13862
|
+
confidence: zod3.number().describe("The confidence score for the transcript of this word"),
|
|
13863
|
+
start: zod3.number().describe("The starting time, in milliseconds, for the word"),
|
|
13864
|
+
end: zod3.number().describe("The ending time, in milliseconds, for the word"),
|
|
13865
|
+
text: zod3.string().describe("The text of the word"),
|
|
13866
|
+
channel: zod3.string().nullish().describe(
|
|
13867
|
+
"The channel of the word. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
|
|
13868
|
+
),
|
|
13869
|
+
speaker: zod3.string().nullable().describe(
|
|
13870
|
+
"The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) is enabled, else null"
|
|
13871
|
+
)
|
|
13872
|
+
})
|
|
13873
|
+
).nullish().describe(
|
|
13874
|
+
"The original temporally-sequential word objects before PII redaction was applied. Same shape as `words`. Only returned when `redact_pii_return_unredacted` was set to `true` on the transcription request, otherwise this field is omitted and the `words` field remains fully redacted. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
|
|
13875
|
+
),
|
|
13685
13876
|
acoustic_model: zod3.string().describe("This parameter does not currently have any functionality attached to it."),
|
|
13686
13877
|
custom_topics: zod3.boolean().nullish().describe("This parameter does not currently have any functionality attached to it."),
|
|
13687
13878
|
language_model: zod3.string().describe("This parameter does not currently have any functionality attached to it."),
|
|
@@ -13817,7 +14008,7 @@ var deleteTranscriptResponse = zod3.object({
|
|
|
13817
14008
|
"Customize how words are spelled and formatted using to and from values. See [Custom Spelling](https://www.assemblyai.com/docs/pre-recorded-audio/correct-spelling-of-terms) for more details."
|
|
13818
14009
|
),
|
|
13819
14010
|
disfluencies: zod3.boolean().nullish().describe(
|
|
13820
|
-
'Transcribe [Filler Words](https://www.assemblyai.com/docs/pre-recorded-audio/include-filler-words), like "umm", in your media file; can be true or false'
|
|
14011
|
+
'Transcribe [Filler Words](https://www.assemblyai.com/docs/pre-recorded-audio/include-filler-words), like "umm", in your media file; can be true or false. Supported on Universal-3 Pro and Universal-2.'
|
|
13821
14012
|
),
|
|
13822
14013
|
domain: zod3.string().nullish().describe(
|
|
13823
14014
|
'The domain-specific model applied to the transcript. When set to `"medical-v1"`, [Medical Mode](https://www.assemblyai.com/docs/pre-recorded-audio/medical-mode) was used to improve accuracy for medical terminology.\n'
|
|
@@ -13840,12 +14031,20 @@ var deleteTranscriptResponse = zod3.object({
|
|
|
13840
14031
|
"email_address",
|
|
13841
14032
|
"event",
|
|
13842
14033
|
"filename",
|
|
14034
|
+
"gender",
|
|
13843
14035
|
"gender_sexuality",
|
|
13844
14036
|
"healthcare_number",
|
|
13845
14037
|
"injury",
|
|
13846
14038
|
"ip_address",
|
|
13847
14039
|
"language",
|
|
13848
14040
|
"location",
|
|
14041
|
+
"location_address",
|
|
14042
|
+
"location_address_street",
|
|
14043
|
+
"location_city",
|
|
14044
|
+
"location_coordinate",
|
|
14045
|
+
"location_country",
|
|
14046
|
+
"location_state",
|
|
14047
|
+
"location_zip",
|
|
13849
14048
|
"marital_status",
|
|
13850
14049
|
"medical_condition",
|
|
13851
14050
|
"medical_process",
|
|
@@ -13854,6 +14053,7 @@ var deleteTranscriptResponse = zod3.object({
|
|
|
13854
14053
|
"number_sequence",
|
|
13855
14054
|
"occupation",
|
|
13856
14055
|
"organization",
|
|
14056
|
+
"organization_medical_facility",
|
|
13857
14057
|
"passport_number",
|
|
13858
14058
|
"password",
|
|
13859
14059
|
"person_age",
|
|
@@ -13862,6 +14062,7 @@ var deleteTranscriptResponse = zod3.object({
|
|
|
13862
14062
|
"physical_attribute",
|
|
13863
14063
|
"political_affiliation",
|
|
13864
14064
|
"religion",
|
|
14065
|
+
"sexuality",
|
|
13865
14066
|
"statistics",
|
|
13866
14067
|
"time",
|
|
13867
14068
|
"url",
|
|
@@ -14166,6 +14367,24 @@ var deleteTranscriptResponse = zod3.object({
|
|
|
14166
14367
|
}).optional().describe(
|
|
14167
14368
|
"Specify options for [Automatic Language Detection](https://www.assemblyai.com/docs/pre-recorded-audio/language-detection)."
|
|
14168
14369
|
),
|
|
14370
|
+
metadata: zod3.object({
|
|
14371
|
+
domain_used: zod3.string().nullish().describe(
|
|
14372
|
+
'The domain-specific model that was applied to the transcription (for example, `"medical-v1"` when [Medical Mode](https://www.assemblyai.com/docs/pre-recorded-audio/medical-mode) was applied), or `null` if no domain-specific model was used. Always present when `metadata` is present.\n'
|
|
14373
|
+
),
|
|
14374
|
+
warnings: zod3.array(
|
|
14375
|
+
zod3.object({
|
|
14376
|
+
message: zod3.string().describe("A human-readable description of the warning.")
|
|
14377
|
+
}).describe(
|
|
14378
|
+
"A warning message emitted while processing a transcription request. Warnings are surfaced on the transcript response under `metadata.warnings`.\n"
|
|
14379
|
+
)
|
|
14380
|
+
).optional().describe(
|
|
14381
|
+
"Warning messages emitted while processing the request. Each warning is an object with a human-readable `message`. When there are no warnings to report, this field is omitted from the `metadata` object entirely.\n"
|
|
14382
|
+
)
|
|
14383
|
+
}).describe(
|
|
14384
|
+
"Additional metadata about the transcription returned on the `Transcript` object under `metadata`. Only present when there is information to report \u2014 when all of its fields would be empty, the `metadata` object is omitted from the response entirely.\n"
|
|
14385
|
+
).or(zod3.null()).optional().describe(
|
|
14386
|
+
"Additional metadata about the transcription, including any warnings emitted while processing the request. Only present when there is information to report; if no fields would be populated, `metadata` is omitted from the response entirely.\n"
|
|
14387
|
+
),
|
|
14169
14388
|
multichannel: zod3.boolean().nullish().describe(
|
|
14170
14389
|
"Whether [Multichannel transcription](https://www.assemblyai.com/docs/pre-recorded-audio/transcribe-multiple-audio-channels) was enabled in the transcription request, either true or false"
|
|
14171
14390
|
),
|
|
@@ -14213,12 +14432,20 @@ var deleteTranscriptResponse = zod3.object({
|
|
|
14213
14432
|
"email_address",
|
|
14214
14433
|
"event",
|
|
14215
14434
|
"filename",
|
|
14435
|
+
"gender",
|
|
14216
14436
|
"gender_sexuality",
|
|
14217
14437
|
"healthcare_number",
|
|
14218
14438
|
"injury",
|
|
14219
14439
|
"ip_address",
|
|
14220
14440
|
"language",
|
|
14221
14441
|
"location",
|
|
14442
|
+
"location_address",
|
|
14443
|
+
"location_address_street",
|
|
14444
|
+
"location_city",
|
|
14445
|
+
"location_coordinate",
|
|
14446
|
+
"location_country",
|
|
14447
|
+
"location_state",
|
|
14448
|
+
"location_zip",
|
|
14222
14449
|
"marital_status",
|
|
14223
14450
|
"medical_condition",
|
|
14224
14451
|
"medical_process",
|
|
@@ -14227,6 +14454,7 @@ var deleteTranscriptResponse = zod3.object({
|
|
|
14227
14454
|
"number_sequence",
|
|
14228
14455
|
"occupation",
|
|
14229
14456
|
"organization",
|
|
14457
|
+
"organization_medical_facility",
|
|
14230
14458
|
"passport_number",
|
|
14231
14459
|
"password",
|
|
14232
14460
|
"person_age",
|
|
@@ -14235,6 +14463,7 @@ var deleteTranscriptResponse = zod3.object({
|
|
|
14235
14463
|
"physical_attribute",
|
|
14236
14464
|
"political_affiliation",
|
|
14237
14465
|
"religion",
|
|
14466
|
+
"sexuality",
|
|
14238
14467
|
"statistics",
|
|
14239
14468
|
"time",
|
|
14240
14469
|
"url",
|
|
@@ -14242,12 +14471,17 @@ var deleteTranscriptResponse = zod3.object({
|
|
|
14242
14471
|
"username",
|
|
14243
14472
|
"vehicle_id",
|
|
14244
14473
|
"zodiac_sign"
|
|
14245
|
-
]).describe(
|
|
14474
|
+
]).describe(
|
|
14475
|
+
"The type of PII to redact. See [PII redaction](https://www.assemblyai.com/docs/streaming/pii-redaction) for the full list of policies and their descriptions."
|
|
14476
|
+
)
|
|
14246
14477
|
).nullish().describe(
|
|
14247
14478
|
"The list of PII Redaction policies that were enabled, if PII Redaction is enabled.\nSee [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
|
|
14248
14479
|
),
|
|
14249
14480
|
redact_pii_sub: zod3.enum(["entity_name", "hash"]).optional().describe(
|
|
14250
|
-
"The replacement logic for detected PII, can be `entity_name` or `hash`. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more details."
|
|
14481
|
+
"The replacement logic for detected PII, can be `entity_name` or `hash`. See [PII redaction](https://www.assemblyai.com/docs/streaming/pii-redaction) for more details."
|
|
14482
|
+
),
|
|
14483
|
+
redact_pii_return_unredacted: zod3.boolean().nullish().describe(
|
|
14484
|
+
"Whether the original unredacted transcript was also returned alongside the redacted one. When `true`, the response includes `unredacted_text`, `unredacted_words`, and `unredacted_utterances`. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
|
|
14251
14485
|
),
|
|
14252
14486
|
sentiment_analysis: zod3.boolean().nullish().describe(
|
|
14253
14487
|
"Whether [Sentiment Analysis](https://www.assemblyai.com/docs/speech-understanding/analyze-sentiment-of-speech) is enabled, can be true or false"
|
|
@@ -14384,20 +14618,23 @@ var deleteTranscriptResponse = zod3.object({
|
|
|
14384
14618
|
"The generated summary of the media file, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details."
|
|
14385
14619
|
),
|
|
14386
14620
|
summary_model: zod3.string().nullish().describe(
|
|
14387
|
-
"The Summarization model used to generate the summary,\nif [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts
|
|
14621
|
+
"The Summarization model used to generate the summary,\nif [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details.\n"
|
|
14388
14622
|
),
|
|
14389
14623
|
summary_type: zod3.string().nullish().describe(
|
|
14390
|
-
"The type of summary generated, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts
|
|
14624
|
+
"The type of summary generated, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details."
|
|
14391
14625
|
),
|
|
14392
|
-
remove_audio_tags: zod3.enum(["all"]).describe(
|
|
14393
|
-
|
|
14626
|
+
remove_audio_tags: zod3.enum(["all", "speaker"]).describe(
|
|
14627
|
+
'Universal-3 Pro generates rich transcripts that can include inline annotations such as audio event markers and speaker cues. Set to `"all"` to remove all inline annotations, or `"speaker"` to remove only speaker cues while keeping other annotations.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n'
|
|
14394
14628
|
).or(zod3.null()).optional().describe(
|
|
14395
|
-
|
|
14629
|
+
'Universal-3 Pro generates rich transcripts that can include inline annotations such as audio event markers and speaker cues. Set to `"all"` to remove all inline annotations, or `"speaker"` to remove only speaker cues while keeping other annotations.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n'
|
|
14396
14630
|
),
|
|
14397
14631
|
temperature: zod3.number().nullish().describe(
|
|
14398
14632
|
"The temperature that was used for the model's response. See the [Prompting Guide](https://www.assemblyai.com/docs/pre-recorded-audio/prompting) for more details.\n\nNote: This parameter can only be used with the Universal-3 Pro model.\n"
|
|
14399
14633
|
),
|
|
14400
14634
|
text: zod3.string().nullish().describe("The textual transcript of your media file"),
|
|
14635
|
+
unredacted_text: zod3.string().nullish().describe(
|
|
14636
|
+
"The original textual transcript of your media file before PII redaction was applied. Only returned when `redact_pii_return_unredacted` was set to `true` on the transcription request, otherwise this field is omitted and the `text` field remains fully redacted. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
|
|
14637
|
+
),
|
|
14401
14638
|
throttled: zod3.boolean().nullish().describe(
|
|
14402
14639
|
"True while a request is throttled and false when a request is no longer throttled"
|
|
14403
14640
|
),
|
|
@@ -14434,6 +14671,39 @@ var deleteTranscriptResponse = zod3.object({
|
|
|
14434
14671
|
).nullish().describe(
|
|
14435
14672
|
"When multichannel or speaker_labels is enabled, a list of turn-by-turn utterance objects.\nSee [Speaker diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) and [Multichannel transcription](https://www.assemblyai.com/docs/pre-recorded-audio/transcribe-multiple-audio-channels) for more information.\n"
|
|
14436
14673
|
),
|
|
14674
|
+
unredacted_utterances: zod3.array(
|
|
14675
|
+
zod3.object({
|
|
14676
|
+
confidence: zod3.number().describe("The confidence score for the transcript of this utterance"),
|
|
14677
|
+
start: zod3.number().describe("The starting time, in milliseconds, of the utterance in the audio file"),
|
|
14678
|
+
end: zod3.number().describe("The ending time, in milliseconds, of the utterance in the audio file"),
|
|
14679
|
+
text: zod3.string().describe("The text for this utterance"),
|
|
14680
|
+
words: zod3.array(
|
|
14681
|
+
zod3.object({
|
|
14682
|
+
confidence: zod3.number().describe("The confidence score for the transcript of this word"),
|
|
14683
|
+
start: zod3.number().describe("The starting time, in milliseconds, for the word"),
|
|
14684
|
+
end: zod3.number().describe("The ending time, in milliseconds, for the word"),
|
|
14685
|
+
text: zod3.string().describe("The text of the word"),
|
|
14686
|
+
channel: zod3.string().nullish().describe(
|
|
14687
|
+
"The channel of the word. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
|
|
14688
|
+
),
|
|
14689
|
+
speaker: zod3.string().nullable().describe(
|
|
14690
|
+
"The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) is enabled, else null"
|
|
14691
|
+
)
|
|
14692
|
+
})
|
|
14693
|
+
).describe("The words in the utterance."),
|
|
14694
|
+
channel: zod3.string().nullish().describe(
|
|
14695
|
+
"The channel of this utterance. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
|
|
14696
|
+
),
|
|
14697
|
+
speaker: zod3.string().describe(
|
|
14698
|
+
'The speaker of this utterance, where each speaker is assigned a sequential capital letter - e.g. "A" for Speaker A, "B" for Speaker B, etc.'
|
|
14699
|
+
),
|
|
14700
|
+
translated_texts: zod3.record(zod3.string(), zod3.string()).optional().describe(
|
|
14701
|
+
'Translations keyed by language code (e.g., `{"es": "Texto traducido", "de": "\xDCbersetzter Text"}`). Only present when `match_original_utterance` is enabled with translation.'
|
|
14702
|
+
)
|
|
14703
|
+
})
|
|
14704
|
+
).nullish().describe(
|
|
14705
|
+
"The original turn-by-turn utterance objects before PII redaction was applied. Same shape as `utterances`. Only returned when `redact_pii_return_unredacted` was set to `true` on the transcription request, otherwise this field is omitted and the `utterances` field remains fully redacted. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
|
|
14706
|
+
),
|
|
14437
14707
|
webhook_auth: zod3.boolean().describe(
|
|
14438
14708
|
"Whether [webhook](https://www.assemblyai.com/docs/deployment/webhooks-for-pre-recorded-audio) authentication details were provided"
|
|
14439
14709
|
),
|
|
@@ -14462,6 +14732,22 @@ var deleteTranscriptResponse = zod3.object({
|
|
|
14462
14732
|
).nullish().describe(
|
|
14463
14733
|
"An array of temporally-sequential word objects, one for each word in the transcript.\n"
|
|
14464
14734
|
),
|
|
14735
|
+
unredacted_words: zod3.array(
|
|
14736
|
+
zod3.object({
|
|
14737
|
+
confidence: zod3.number().describe("The confidence score for the transcript of this word"),
|
|
14738
|
+
start: zod3.number().describe("The starting time, in milliseconds, for the word"),
|
|
14739
|
+
end: zod3.number().describe("The ending time, in milliseconds, for the word"),
|
|
14740
|
+
text: zod3.string().describe("The text of the word"),
|
|
14741
|
+
channel: zod3.string().nullish().describe(
|
|
14742
|
+
"The channel of the word. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
|
|
14743
|
+
),
|
|
14744
|
+
speaker: zod3.string().nullable().describe(
|
|
14745
|
+
"The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) is enabled, else null"
|
|
14746
|
+
)
|
|
14747
|
+
})
|
|
14748
|
+
).nullish().describe(
|
|
14749
|
+
"The original temporally-sequential word objects before PII redaction was applied. Same shape as `words`. Only returned when `redact_pii_return_unredacted` was set to `true` on the transcription request, otherwise this field is omitted and the `words` field remains fully redacted. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
|
|
14750
|
+
),
|
|
14465
14751
|
acoustic_model: zod3.string().describe("This parameter does not currently have any functionality attached to it."),
|
|
14466
14752
|
custom_topics: zod3.boolean().nullish().describe("This parameter does not currently have any functionality attached to it."),
|
|
14467
14753
|
language_model: zod3.string().describe("This parameter does not currently have any functionality attached to it."),
|
|
@@ -14617,7 +14903,21 @@ var streamingTranscriberParams = zod4.object({
|
|
|
14617
14903
|
inactivityTimeout: zod4.number().optional().describe("From SDK v3"),
|
|
14618
14904
|
speakerLabels: zod4.boolean().optional().describe("From SDK v3"),
|
|
14619
14905
|
maxSpeakers: zod4.number().optional().describe("From SDK v3"),
|
|
14620
|
-
|
|
14906
|
+
voiceFocus: zod4.unknown().optional().describe("From SDK v3"),
|
|
14907
|
+
voiceFocusThreshold: zod4.number().optional().describe("From SDK v3"),
|
|
14908
|
+
continuousPartials: zod4.boolean().optional().describe("From SDK v3"),
|
|
14909
|
+
interruptionDelay: zod4.number().optional().describe("From SDK v3"),
|
|
14910
|
+
turnLeftPadMs: zod4.number().optional().describe("From SDK v3"),
|
|
14911
|
+
customerSupportAudioCapture: zod4.boolean().optional().describe("From SDK v3"),
|
|
14912
|
+
includePartialTurns: zod4.boolean().optional().describe("From SDK v3"),
|
|
14913
|
+
redactPii: zod4.boolean().optional().describe("From SDK v3"),
|
|
14914
|
+
redactPiiPolicies: zod4.unknown().optional().describe("From SDK v3"),
|
|
14915
|
+
redactPiiSub: zod4.unknown().optional().describe("From SDK v3"),
|
|
14916
|
+
llmGateway: zod4.unknown().optional().describe("From SDK v3"),
|
|
14917
|
+
webhookUrl: zod4.string().optional().describe("From SDK v3"),
|
|
14918
|
+
webhookAuthHeaderName: zod4.string().optional().describe("From SDK v3"),
|
|
14919
|
+
webhookAuthHeaderValue: zod4.string().optional().describe("From SDK v3"),
|
|
14920
|
+
mode: zod4.unknown().describe("From SDK v3")
|
|
14621
14921
|
});
|
|
14622
14922
|
var streamingUpdateConfigParams = zod4.object({
|
|
14623
14923
|
end_utterance_silence_threshold: zod4.number().min(0).max(2e4).optional().describe("The duration threshold in milliseconds"),
|
|
@@ -14629,7 +14929,9 @@ var streamingUpdateConfigParams = zod4.object({
|
|
|
14629
14929
|
format_turns: zod4.boolean().optional().describe("From SDK v3"),
|
|
14630
14930
|
keyterms_prompt: zod4.array(zod4.string()).optional().describe("From SDK v3"),
|
|
14631
14931
|
prompt: zod4.string().optional().describe("From SDK v3"),
|
|
14632
|
-
filter_profanity: zod4.boolean().optional().describe("From SDK v3")
|
|
14932
|
+
filter_profanity: zod4.boolean().optional().describe("From SDK v3"),
|
|
14933
|
+
interruption_delay: zod4.number().optional().describe("From SDK v3"),
|
|
14934
|
+
turn_left_pad_ms: zod4.number().optional().describe("From SDK v3")
|
|
14633
14935
|
});
|
|
14634
14936
|
|
|
14635
14937
|
// src/generated/gladia/api/gladiaControlAPI.zod.ts
|
|
@@ -15378,7 +15680,7 @@ var preRecordedControllerInitPreRecordedJobV2BodyNamedEntityRecognitionDefault =
|
|
|
15378
15680
|
var preRecordedControllerInitPreRecordedJobV2BodyCustomSpellingDefault = false;
|
|
15379
15681
|
var preRecordedControllerInitPreRecordedJobV2BodySentimentAnalysisDefault = false;
|
|
15380
15682
|
var preRecordedControllerInitPreRecordedJobV2BodyAudioToLlmDefault = false;
|
|
15381
|
-
var preRecordedControllerInitPreRecordedJobV2BodyAudioToLlmConfigModelDefault = "openai/gpt-
|
|
15683
|
+
var preRecordedControllerInitPreRecordedJobV2BodyAudioToLlmConfigModelDefault = "openai/gpt-5.4-nano";
|
|
15382
15684
|
var preRecordedControllerInitPreRecordedJobV2BodyPiiRedactionDefault = false;
|
|
15383
15685
|
var preRecordedControllerInitPreRecordedJobV2BodySentencesDefault = false;
|
|
15384
15686
|
var preRecordedControllerInitPreRecordedJobV2BodyPunctuationEnhancedDefault = false;
|
|
@@ -15667,23 +15969,23 @@ var preRecordedControllerInitPreRecordedJobV2Body = zod5.object({
|
|
|
15667
15969
|
"Forces the translation to use informal language forms when available in the target language."
|
|
15668
15970
|
)
|
|
15669
15971
|
}).optional().describe("**[Beta]** Translation configuration, if `translation` is enabled"),
|
|
15670
|
-
summarization: zod5.boolean().optional().describe("
|
|
15972
|
+
summarization: zod5.boolean().optional().describe("Enable summarization for this audio"),
|
|
15671
15973
|
summarization_config: zod5.object({
|
|
15672
15974
|
type: zod5.enum(["general", "bullet_points", "concise"]).describe("The type of summarization to apply").default(preRecordedControllerInitPreRecordedJobV2BodySummarizationConfigTypeDefault).describe("The type of summarization to apply")
|
|
15673
|
-
}).optional().describe("
|
|
15975
|
+
}).optional().describe("Summarization configuration, if `summarization` is enabled"),
|
|
15674
15976
|
named_entity_recognition: zod5.boolean().optional().describe("**[Alpha]** Enable named entity recognition for this audio"),
|
|
15675
15977
|
custom_spelling: zod5.boolean().optional().describe("**[Alpha]** Enable custom spelling for this audio"),
|
|
15676
15978
|
custom_spelling_config: zod5.object({
|
|
15677
15979
|
spelling_dictionary: zod5.record(zod5.string(), zod5.array(zod5.string())).describe("The list of spelling applied on the audio transcription")
|
|
15678
15980
|
}).optional().describe("**[Alpha]** Custom spelling configuration, if `custom_spelling` is enabled"),
|
|
15679
15981
|
sentiment_analysis: zod5.boolean().optional().describe("Enable sentiment analysis for this audio"),
|
|
15680
|
-
audio_to_llm: zod5.boolean().optional().describe("
|
|
15982
|
+
audio_to_llm: zod5.boolean().optional().describe("Enable audio to LLM processing for this audio"),
|
|
15681
15983
|
audio_to_llm_config: zod5.object({
|
|
15682
15984
|
prompts: zod5.array(zod5.array(zod5.unknown())).min(1).describe("The list of prompts applied on the audio transcription"),
|
|
15683
15985
|
model: zod5.string().default(preRecordedControllerInitPreRecordedJobV2BodyAudioToLlmConfigModelDefault).describe(
|
|
15684
15986
|
"The model to use for the prompt execution. You can find the list of supported models [here](https://openrouter.ai/models)."
|
|
15685
15987
|
)
|
|
15686
|
-
}).optional().describe("
|
|
15988
|
+
}).optional().describe("Audio to LLM configuration, if `audio_to_llm` is enabled"),
|
|
15687
15989
|
pii_redaction: zod5.boolean().optional().describe("Enable PII redaction for this audio"),
|
|
15688
15990
|
pii_redaction_config: zod5.object({
|
|
15689
15991
|
entity_types: zod5.enum([
|
|
@@ -15938,7 +16240,7 @@ var preRecordedControllerGetPreRecordedJobsV2ResponseItemsItemRequestParamsNamed
|
|
|
15938
16240
|
var preRecordedControllerGetPreRecordedJobsV2ResponseItemsItemRequestParamsCustomSpellingDefault = false;
|
|
15939
16241
|
var preRecordedControllerGetPreRecordedJobsV2ResponseItemsItemRequestParamsSentimentAnalysisDefault = false;
|
|
15940
16242
|
var preRecordedControllerGetPreRecordedJobsV2ResponseItemsItemRequestParamsAudioToLlmDefault = false;
|
|
15941
|
-
var preRecordedControllerGetPreRecordedJobsV2ResponseItemsItemRequestParamsAudioToLlmConfigModelDefault = "openai/gpt-
|
|
16243
|
+
var preRecordedControllerGetPreRecordedJobsV2ResponseItemsItemRequestParamsAudioToLlmConfigModelDefault = "openai/gpt-5.4-nano";
|
|
15942
16244
|
var preRecordedControllerGetPreRecordedJobsV2ResponseItemsItemRequestParamsPiiRedactionDefault = false;
|
|
15943
16245
|
var preRecordedControllerGetPreRecordedJobsV2ResponseItemsItemRequestParamsSentencesDefault = false;
|
|
15944
16246
|
var preRecordedControllerGetPreRecordedJobsV2ResponseItemsItemRequestParamsPunctuationEnhancedDefault = false;
|
|
@@ -16286,12 +16588,12 @@ var preRecordedControllerGetPreRecordedJobsV2Response = zod5.object({
|
|
|
16286
16588
|
"Forces the translation to use informal language forms when available in the target language."
|
|
16287
16589
|
)
|
|
16288
16590
|
}).optional().describe("**[Beta]** Translation configuration, if `translation` is enabled"),
|
|
16289
|
-
summarization: zod5.boolean().optional().describe("
|
|
16591
|
+
summarization: zod5.boolean().optional().describe("Enable summarization for this audio"),
|
|
16290
16592
|
summarization_config: zod5.object({
|
|
16291
16593
|
type: zod5.enum(["general", "bullet_points", "concise"]).describe("The type of summarization to apply").default(
|
|
16292
16594
|
preRecordedControllerGetPreRecordedJobsV2ResponseItemsItemRequestParamsSummarizationConfigTypeDefault
|
|
16293
16595
|
).describe("The type of summarization to apply")
|
|
16294
|
-
}).optional().describe("
|
|
16596
|
+
}).optional().describe("Summarization configuration, if `summarization` is enabled"),
|
|
16295
16597
|
named_entity_recognition: zod5.boolean().optional().describe("**[Alpha]** Enable named entity recognition for this audio"),
|
|
16296
16598
|
custom_spelling: zod5.boolean().optional().describe("**[Alpha]** Enable custom spelling for this audio"),
|
|
16297
16599
|
custom_spelling_config: zod5.object({
|
|
@@ -16300,7 +16602,7 @@ var preRecordedControllerGetPreRecordedJobsV2Response = zod5.object({
|
|
|
16300
16602
|
"**[Alpha]** Custom spelling configuration, if `custom_spelling` is enabled"
|
|
16301
16603
|
),
|
|
16302
16604
|
sentiment_analysis: zod5.boolean().optional().describe("Enable sentiment analysis for this audio"),
|
|
16303
|
-
audio_to_llm: zod5.boolean().optional().describe("
|
|
16605
|
+
audio_to_llm: zod5.boolean().optional().describe("Enable audio to LLM processing for this audio"),
|
|
16304
16606
|
audio_to_llm_config: zod5.object({
|
|
16305
16607
|
prompts: zod5.array(zod5.array(zod5.unknown())).min(1).describe("The list of prompts applied on the audio transcription"),
|
|
16306
16608
|
model: zod5.string().default(
|
|
@@ -16308,7 +16610,7 @@ var preRecordedControllerGetPreRecordedJobsV2Response = zod5.object({
|
|
|
16308
16610
|
).describe(
|
|
16309
16611
|
"The model to use for the prompt execution. You can find the list of supported models [here](https://openrouter.ai/models)."
|
|
16310
16612
|
)
|
|
16311
|
-
}).optional().describe("
|
|
16613
|
+
}).optional().describe("Audio to LLM configuration, if `audio_to_llm` is enabled"),
|
|
16312
16614
|
pii_redaction: zod5.boolean().optional().describe("Enable PII redaction for this audio"),
|
|
16313
16615
|
pii_redaction_config: zod5.object({
|
|
16314
16616
|
entity_types: zod5.enum([
|
|
@@ -17445,7 +17747,7 @@ var preRecordedControllerGetPreRecordedJobV2ResponseRequestParamsNamedEntityReco
|
|
|
17445
17747
|
var preRecordedControllerGetPreRecordedJobV2ResponseRequestParamsCustomSpellingDefault = false;
|
|
17446
17748
|
var preRecordedControllerGetPreRecordedJobV2ResponseRequestParamsSentimentAnalysisDefault = false;
|
|
17447
17749
|
var preRecordedControllerGetPreRecordedJobV2ResponseRequestParamsAudioToLlmDefault = false;
|
|
17448
|
-
var preRecordedControllerGetPreRecordedJobV2ResponseRequestParamsAudioToLlmConfigModelDefault = "openai/gpt-
|
|
17750
|
+
var preRecordedControllerGetPreRecordedJobV2ResponseRequestParamsAudioToLlmConfigModelDefault = "openai/gpt-5.4-nano";
|
|
17449
17751
|
var preRecordedControllerGetPreRecordedJobV2ResponseRequestParamsPiiRedactionDefault = false;
|
|
17450
17752
|
var preRecordedControllerGetPreRecordedJobV2ResponseRequestParamsSentencesDefault = false;
|
|
17451
17753
|
var preRecordedControllerGetPreRecordedJobV2ResponseRequestParamsPunctuationEnhancedDefault = false;
|
|
@@ -17786,19 +18088,19 @@ var preRecordedControllerGetPreRecordedJobV2Response = zod5.object({
|
|
|
17786
18088
|
"Forces the translation to use informal language forms when available in the target language."
|
|
17787
18089
|
)
|
|
17788
18090
|
}).optional().describe("**[Beta]** Translation configuration, if `translation` is enabled"),
|
|
17789
|
-
summarization: zod5.boolean().optional().describe("
|
|
18091
|
+
summarization: zod5.boolean().optional().describe("Enable summarization for this audio"),
|
|
17790
18092
|
summarization_config: zod5.object({
|
|
17791
18093
|
type: zod5.enum(["general", "bullet_points", "concise"]).describe("The type of summarization to apply").default(
|
|
17792
18094
|
preRecordedControllerGetPreRecordedJobV2ResponseRequestParamsSummarizationConfigTypeDefault
|
|
17793
18095
|
).describe("The type of summarization to apply")
|
|
17794
|
-
}).optional().describe("
|
|
18096
|
+
}).optional().describe("Summarization configuration, if `summarization` is enabled"),
|
|
17795
18097
|
named_entity_recognition: zod5.boolean().optional().describe("**[Alpha]** Enable named entity recognition for this audio"),
|
|
17796
18098
|
custom_spelling: zod5.boolean().optional().describe("**[Alpha]** Enable custom spelling for this audio"),
|
|
17797
18099
|
custom_spelling_config: zod5.object({
|
|
17798
18100
|
spelling_dictionary: zod5.record(zod5.string(), zod5.array(zod5.string())).describe("The list of spelling applied on the audio transcription")
|
|
17799
18101
|
}).optional().describe("**[Alpha]** Custom spelling configuration, if `custom_spelling` is enabled"),
|
|
17800
18102
|
sentiment_analysis: zod5.boolean().optional().describe("Enable sentiment analysis for this audio"),
|
|
17801
|
-
audio_to_llm: zod5.boolean().optional().describe("
|
|
18103
|
+
audio_to_llm: zod5.boolean().optional().describe("Enable audio to LLM processing for this audio"),
|
|
17802
18104
|
audio_to_llm_config: zod5.object({
|
|
17803
18105
|
prompts: zod5.array(zod5.array(zod5.unknown())).min(1).describe("The list of prompts applied on the audio transcription"),
|
|
17804
18106
|
model: zod5.string().default(
|
|
@@ -17806,7 +18108,7 @@ var preRecordedControllerGetPreRecordedJobV2Response = zod5.object({
|
|
|
17806
18108
|
).describe(
|
|
17807
18109
|
"The model to use for the prompt execution. You can find the list of supported models [here](https://openrouter.ai/models)."
|
|
17808
18110
|
)
|
|
17809
|
-
}).optional().describe("
|
|
18111
|
+
}).optional().describe("Audio to LLM configuration, if `audio_to_llm` is enabled"),
|
|
17810
18112
|
pii_redaction: zod5.boolean().optional().describe("Enable PII redaction for this audio"),
|
|
17811
18113
|
pii_redaction_config: zod5.object({
|
|
17812
18114
|
entity_types: zod5.enum([
|
|
@@ -18919,7 +19221,7 @@ var transcriptionControllerInitPreRecordedJobV2BodyNamedEntityRecognitionDefault
|
|
|
18919
19221
|
var transcriptionControllerInitPreRecordedJobV2BodyCustomSpellingDefault = false;
|
|
18920
19222
|
var transcriptionControllerInitPreRecordedJobV2BodySentimentAnalysisDefault = false;
|
|
18921
19223
|
var transcriptionControllerInitPreRecordedJobV2BodyAudioToLlmDefault = false;
|
|
18922
|
-
var transcriptionControllerInitPreRecordedJobV2BodyAudioToLlmConfigModelDefault = "openai/gpt-
|
|
19224
|
+
var transcriptionControllerInitPreRecordedJobV2BodyAudioToLlmConfigModelDefault = "openai/gpt-5.4-nano";
|
|
18923
19225
|
var transcriptionControllerInitPreRecordedJobV2BodyPiiRedactionDefault = false;
|
|
18924
19226
|
var transcriptionControllerInitPreRecordedJobV2BodySentencesDefault = false;
|
|
18925
19227
|
var transcriptionControllerInitPreRecordedJobV2BodyPunctuationEnhancedDefault = false;
|
|
@@ -19212,23 +19514,23 @@ var transcriptionControllerInitPreRecordedJobV2Body = zod5.object({
|
|
|
19212
19514
|
"Forces the translation to use informal language forms when available in the target language."
|
|
19213
19515
|
)
|
|
19214
19516
|
}).optional().describe("**[Beta]** Translation configuration, if `translation` is enabled"),
|
|
19215
|
-
summarization: zod5.boolean().optional().describe("
|
|
19517
|
+
summarization: zod5.boolean().optional().describe("Enable summarization for this audio"),
|
|
19216
19518
|
summarization_config: zod5.object({
|
|
19217
19519
|
type: zod5.enum(["general", "bullet_points", "concise"]).describe("The type of summarization to apply").default(transcriptionControllerInitPreRecordedJobV2BodySummarizationConfigTypeDefault).describe("The type of summarization to apply")
|
|
19218
|
-
}).optional().describe("
|
|
19520
|
+
}).optional().describe("Summarization configuration, if `summarization` is enabled"),
|
|
19219
19521
|
named_entity_recognition: zod5.boolean().optional().describe("**[Alpha]** Enable named entity recognition for this audio"),
|
|
19220
19522
|
custom_spelling: zod5.boolean().optional().describe("**[Alpha]** Enable custom spelling for this audio"),
|
|
19221
19523
|
custom_spelling_config: zod5.object({
|
|
19222
19524
|
spelling_dictionary: zod5.record(zod5.string(), zod5.array(zod5.string())).describe("The list of spelling applied on the audio transcription")
|
|
19223
19525
|
}).optional().describe("**[Alpha]** Custom spelling configuration, if `custom_spelling` is enabled"),
|
|
19224
19526
|
sentiment_analysis: zod5.boolean().optional().describe("Enable sentiment analysis for this audio"),
|
|
19225
|
-
audio_to_llm: zod5.boolean().optional().describe("
|
|
19527
|
+
audio_to_llm: zod5.boolean().optional().describe("Enable audio to LLM processing for this audio"),
|
|
19226
19528
|
audio_to_llm_config: zod5.object({
|
|
19227
19529
|
prompts: zod5.array(zod5.array(zod5.unknown())).min(1).describe("The list of prompts applied on the audio transcription"),
|
|
19228
19530
|
model: zod5.string().default(transcriptionControllerInitPreRecordedJobV2BodyAudioToLlmConfigModelDefault).describe(
|
|
19229
19531
|
"The model to use for the prompt execution. You can find the list of supported models [here](https://openrouter.ai/models)."
|
|
19230
19532
|
)
|
|
19231
|
-
}).optional().describe("
|
|
19533
|
+
}).optional().describe("Audio to LLM configuration, if `audio_to_llm` is enabled"),
|
|
19232
19534
|
pii_redaction: zod5.boolean().optional().describe("Enable PII redaction for this audio"),
|
|
19233
19535
|
pii_redaction_config: zod5.object({
|
|
19234
19536
|
entity_types: zod5.enum([
|
|
@@ -19486,7 +19788,7 @@ var transcriptionControllerListV2ResponseItemsItemRequestParamsNamedEntityRecogn
|
|
|
19486
19788
|
var transcriptionControllerListV2ResponseItemsItemRequestParamsCustomSpellingDefault = false;
|
|
19487
19789
|
var transcriptionControllerListV2ResponseItemsItemRequestParamsSentimentAnalysisDefault = false;
|
|
19488
19790
|
var transcriptionControllerListV2ResponseItemsItemRequestParamsAudioToLlmDefault = false;
|
|
19489
|
-
var transcriptionControllerListV2ResponseItemsItemRequestParamsAudioToLlmConfigModelDefault = "openai/gpt-
|
|
19791
|
+
var transcriptionControllerListV2ResponseItemsItemRequestParamsAudioToLlmConfigModelDefault = "openai/gpt-5.4-nano";
|
|
19490
19792
|
var transcriptionControllerListV2ResponseItemsItemRequestParamsPiiRedactionDefault = false;
|
|
19491
19793
|
var transcriptionControllerListV2ResponseItemsItemRequestParamsSentencesDefault = false;
|
|
19492
19794
|
var transcriptionControllerListV2ResponseItemsItemRequestParamsPunctuationEnhancedDefault = false;
|
|
@@ -19897,12 +20199,12 @@ var transcriptionControllerListV2Response = zod5.object({
|
|
|
19897
20199
|
"Forces the translation to use informal language forms when available in the target language."
|
|
19898
20200
|
)
|
|
19899
20201
|
}).optional().describe("**[Beta]** Translation configuration, if `translation` is enabled"),
|
|
19900
|
-
summarization: zod5.boolean().optional().describe("
|
|
20202
|
+
summarization: zod5.boolean().optional().describe("Enable summarization for this audio"),
|
|
19901
20203
|
summarization_config: zod5.object({
|
|
19902
20204
|
type: zod5.enum(["general", "bullet_points", "concise"]).describe("The type of summarization to apply").default(
|
|
19903
20205
|
transcriptionControllerListV2ResponseItemsItemRequestParamsSummarizationConfigTypeDefault
|
|
19904
20206
|
).describe("The type of summarization to apply")
|
|
19905
|
-
}).optional().describe("
|
|
20207
|
+
}).optional().describe("Summarization configuration, if `summarization` is enabled"),
|
|
19906
20208
|
named_entity_recognition: zod5.boolean().optional().describe("**[Alpha]** Enable named entity recognition for this audio"),
|
|
19907
20209
|
custom_spelling: zod5.boolean().optional().describe("**[Alpha]** Enable custom spelling for this audio"),
|
|
19908
20210
|
custom_spelling_config: zod5.object({
|
|
@@ -19911,7 +20213,7 @@ var transcriptionControllerListV2Response = zod5.object({
|
|
|
19911
20213
|
"**[Alpha]** Custom spelling configuration, if `custom_spelling` is enabled"
|
|
19912
20214
|
),
|
|
19913
20215
|
sentiment_analysis: zod5.boolean().optional().describe("Enable sentiment analysis for this audio"),
|
|
19914
|
-
audio_to_llm: zod5.boolean().optional().describe("
|
|
20216
|
+
audio_to_llm: zod5.boolean().optional().describe("Enable audio to LLM processing for this audio"),
|
|
19915
20217
|
audio_to_llm_config: zod5.object({
|
|
19916
20218
|
prompts: zod5.array(zod5.array(zod5.unknown())).min(1).describe("The list of prompts applied on the audio transcription"),
|
|
19917
20219
|
model: zod5.string().default(
|
|
@@ -19919,7 +20221,7 @@ var transcriptionControllerListV2Response = zod5.object({
|
|
|
19919
20221
|
).describe(
|
|
19920
20222
|
"The model to use for the prompt execution. You can find the list of supported models [here](https://openrouter.ai/models)."
|
|
19921
20223
|
)
|
|
19922
|
-
}).optional().describe("
|
|
20224
|
+
}).optional().describe("Audio to LLM configuration, if `audio_to_llm` is enabled"),
|
|
19923
20225
|
pii_redaction: zod5.boolean().optional().describe("Enable PII redaction for this audio"),
|
|
19924
20226
|
pii_redaction_config: zod5.object({
|
|
19925
20227
|
entity_types: zod5.enum([
|
|
@@ -22237,7 +22539,7 @@ var transcriptionControllerGetTranscriptV2ResponseRequestParamsNamedEntityRecogn
|
|
|
22237
22539
|
var transcriptionControllerGetTranscriptV2ResponseRequestParamsCustomSpellingDefault = false;
|
|
22238
22540
|
var transcriptionControllerGetTranscriptV2ResponseRequestParamsSentimentAnalysisDefault = false;
|
|
22239
22541
|
var transcriptionControllerGetTranscriptV2ResponseRequestParamsAudioToLlmDefault = false;
|
|
22240
|
-
var transcriptionControllerGetTranscriptV2ResponseRequestParamsAudioToLlmConfigModelDefault = "openai/gpt-
|
|
22542
|
+
var transcriptionControllerGetTranscriptV2ResponseRequestParamsAudioToLlmConfigModelDefault = "openai/gpt-5.4-nano";
|
|
22241
22543
|
var transcriptionControllerGetTranscriptV2ResponseRequestParamsPiiRedactionDefault = false;
|
|
22242
22544
|
var transcriptionControllerGetTranscriptV2ResponseRequestParamsSentencesDefault = false;
|
|
22243
22545
|
var transcriptionControllerGetTranscriptV2ResponseRequestParamsPunctuationEnhancedDefault = false;
|
|
@@ -22642,19 +22944,19 @@ var transcriptionControllerGetTranscriptV2Response = zod5.discriminatedUnion("ki
|
|
|
22642
22944
|
"Forces the translation to use informal language forms when available in the target language."
|
|
22643
22945
|
)
|
|
22644
22946
|
}).optional().describe("**[Beta]** Translation configuration, if `translation` is enabled"),
|
|
22645
|
-
summarization: zod5.boolean().optional().describe("
|
|
22947
|
+
summarization: zod5.boolean().optional().describe("Enable summarization for this audio"),
|
|
22646
22948
|
summarization_config: zod5.object({
|
|
22647
22949
|
type: zod5.enum(["general", "bullet_points", "concise"]).describe("The type of summarization to apply").default(
|
|
22648
22950
|
transcriptionControllerGetTranscriptV2ResponseRequestParamsSummarizationConfigTypeDefault
|
|
22649
22951
|
).describe("The type of summarization to apply")
|
|
22650
|
-
}).optional().describe("
|
|
22952
|
+
}).optional().describe("Summarization configuration, if `summarization` is enabled"),
|
|
22651
22953
|
named_entity_recognition: zod5.boolean().optional().describe("**[Alpha]** Enable named entity recognition for this audio"),
|
|
22652
22954
|
custom_spelling: zod5.boolean().optional().describe("**[Alpha]** Enable custom spelling for this audio"),
|
|
22653
22955
|
custom_spelling_config: zod5.object({
|
|
22654
22956
|
spelling_dictionary: zod5.record(zod5.string(), zod5.array(zod5.string())).describe("The list of spelling applied on the audio transcription")
|
|
22655
22957
|
}).optional().describe("**[Alpha]** Custom spelling configuration, if `custom_spelling` is enabled"),
|
|
22656
22958
|
sentiment_analysis: zod5.boolean().optional().describe("Enable sentiment analysis for this audio"),
|
|
22657
|
-
audio_to_llm: zod5.boolean().optional().describe("
|
|
22959
|
+
audio_to_llm: zod5.boolean().optional().describe("Enable audio to LLM processing for this audio"),
|
|
22658
22960
|
audio_to_llm_config: zod5.object({
|
|
22659
22961
|
prompts: zod5.array(zod5.array(zod5.unknown())).min(1).describe("The list of prompts applied on the audio transcription"),
|
|
22660
22962
|
model: zod5.string().default(
|
|
@@ -22662,7 +22964,7 @@ var transcriptionControllerGetTranscriptV2Response = zod5.discriminatedUnion("ki
|
|
|
22662
22964
|
).describe(
|
|
22663
22965
|
"The model to use for the prompt execution. You can find the list of supported models [here](https://openrouter.ai/models)."
|
|
22664
22966
|
)
|
|
22665
|
-
}).optional().describe("
|
|
22967
|
+
}).optional().describe("Audio to LLM configuration, if `audio_to_llm` is enabled"),
|
|
22666
22968
|
pii_redaction: zod5.boolean().optional().describe("Enable PII redaction for this audio"),
|
|
22667
22969
|
pii_redaction_config: zod5.object({
|
|
22668
22970
|
entity_types: zod5.enum([
|
|
@@ -25374,7 +25676,7 @@ var historyControllerGetListV1ResponseItemsItemRequestParamsNamedEntityRecogniti
|
|
|
25374
25676
|
var historyControllerGetListV1ResponseItemsItemRequestParamsCustomSpellingDefault = false;
|
|
25375
25677
|
var historyControllerGetListV1ResponseItemsItemRequestParamsSentimentAnalysisDefault = false;
|
|
25376
25678
|
var historyControllerGetListV1ResponseItemsItemRequestParamsAudioToLlmDefault = false;
|
|
25377
|
-
var historyControllerGetListV1ResponseItemsItemRequestParamsAudioToLlmConfigModelDefault = "openai/gpt-
|
|
25679
|
+
var historyControllerGetListV1ResponseItemsItemRequestParamsAudioToLlmConfigModelDefault = "openai/gpt-5.4-nano";
|
|
25378
25680
|
var historyControllerGetListV1ResponseItemsItemRequestParamsPiiRedactionDefault = false;
|
|
25379
25681
|
var historyControllerGetListV1ResponseItemsItemRequestParamsSentencesDefault = false;
|
|
25380
25682
|
var historyControllerGetListV1ResponseItemsItemRequestParamsPunctuationEnhancedDefault = false;
|
|
@@ -25785,12 +26087,12 @@ var historyControllerGetListV1Response = zod5.object({
|
|
|
25785
26087
|
"Forces the translation to use informal language forms when available in the target language."
|
|
25786
26088
|
)
|
|
25787
26089
|
}).optional().describe("**[Beta]** Translation configuration, if `translation` is enabled"),
|
|
25788
|
-
summarization: zod5.boolean().optional().describe("
|
|
26090
|
+
summarization: zod5.boolean().optional().describe("Enable summarization for this audio"),
|
|
25789
26091
|
summarization_config: zod5.object({
|
|
25790
26092
|
type: zod5.enum(["general", "bullet_points", "concise"]).describe("The type of summarization to apply").default(
|
|
25791
26093
|
historyControllerGetListV1ResponseItemsItemRequestParamsSummarizationConfigTypeDefault
|
|
25792
26094
|
).describe("The type of summarization to apply")
|
|
25793
|
-
}).optional().describe("
|
|
26095
|
+
}).optional().describe("Summarization configuration, if `summarization` is enabled"),
|
|
25794
26096
|
named_entity_recognition: zod5.boolean().optional().describe("**[Alpha]** Enable named entity recognition for this audio"),
|
|
25795
26097
|
custom_spelling: zod5.boolean().optional().describe("**[Alpha]** Enable custom spelling for this audio"),
|
|
25796
26098
|
custom_spelling_config: zod5.object({
|
|
@@ -25799,7 +26101,7 @@ var historyControllerGetListV1Response = zod5.object({
|
|
|
25799
26101
|
"**[Alpha]** Custom spelling configuration, if `custom_spelling` is enabled"
|
|
25800
26102
|
),
|
|
25801
26103
|
sentiment_analysis: zod5.boolean().optional().describe("Enable sentiment analysis for this audio"),
|
|
25802
|
-
audio_to_llm: zod5.boolean().optional().describe("
|
|
26104
|
+
audio_to_llm: zod5.boolean().optional().describe("Enable audio to LLM processing for this audio"),
|
|
25803
26105
|
audio_to_llm_config: zod5.object({
|
|
25804
26106
|
prompts: zod5.array(zod5.array(zod5.unknown())).min(1).describe("The list of prompts applied on the audio transcription"),
|
|
25805
26107
|
model: zod5.string().default(
|
|
@@ -25807,7 +26109,7 @@ var historyControllerGetListV1Response = zod5.object({
|
|
|
25807
26109
|
).describe(
|
|
25808
26110
|
"The model to use for the prompt execution. You can find the list of supported models [here](https://openrouter.ai/models)."
|
|
25809
26111
|
)
|
|
25810
|
-
}).optional().describe("
|
|
26112
|
+
}).optional().describe("Audio to LLM configuration, if `audio_to_llm` is enabled"),
|
|
25811
26113
|
pii_redaction: zod5.boolean().optional().describe("Enable PII redaction for this audio"),
|
|
25812
26114
|
pii_redaction_config: zod5.object({
|
|
25813
26115
|
entity_types: zod5.enum([
|
|
@@ -31052,6 +31354,7 @@ __export(openAIAudioRealtimeAPI_zod_exports, {
|
|
|
31052
31354
|
createRealtimeClientSecretBodySessionPromptVariablesTypeDefault: () => createRealtimeClientSecretBodySessionPromptVariablesTypeDefault,
|
|
31053
31355
|
createRealtimeClientSecretBodySessionPromptVariablesTypeDefaultOne: () => createRealtimeClientSecretBodySessionPromptVariablesTypeDefaultOne,
|
|
31054
31356
|
createRealtimeClientSecretBodySessionPromptVariablesTypeDefaultTwo: () => createRealtimeClientSecretBodySessionPromptVariablesTypeDefaultTwo,
|
|
31357
|
+
createRealtimeClientSecretBodySessionReasoningEffortDefault: () => createRealtimeClientSecretBodySessionReasoningEffortDefault,
|
|
31055
31358
|
createRealtimeClientSecretBodySessionToolChoiceDefault: () => createRealtimeClientSecretBodySessionToolChoiceDefault,
|
|
31056
31359
|
createRealtimeClientSecretBodySessionToolsItemRequireApprovalDefaultOne: () => createRealtimeClientSecretBodySessionToolsItemRequireApprovalDefaultOne,
|
|
31057
31360
|
createRealtimeClientSecretBodySessionTracingDefault: () => createRealtimeClientSecretBodySessionTracingDefault,
|
|
@@ -31076,6 +31379,7 @@ __export(openAIAudioRealtimeAPI_zod_exports, {
|
|
|
31076
31379
|
createRealtimeClientSecretResponseSessionPromptVariablesTypeDefault: () => createRealtimeClientSecretResponseSessionPromptVariablesTypeDefault,
|
|
31077
31380
|
createRealtimeClientSecretResponseSessionPromptVariablesTypeDefaultOne: () => createRealtimeClientSecretResponseSessionPromptVariablesTypeDefaultOne,
|
|
31078
31381
|
createRealtimeClientSecretResponseSessionPromptVariablesTypeDefaultTwo: () => createRealtimeClientSecretResponseSessionPromptVariablesTypeDefaultTwo,
|
|
31382
|
+
createRealtimeClientSecretResponseSessionReasoningEffortDefault: () => createRealtimeClientSecretResponseSessionReasoningEffortDefault,
|
|
31079
31383
|
createRealtimeClientSecretResponseSessionToolChoiceDefault: () => createRealtimeClientSecretResponseSessionToolChoiceDefault,
|
|
31080
31384
|
createRealtimeClientSecretResponseSessionToolsItemRequireApprovalDefaultOne: () => createRealtimeClientSecretResponseSessionToolsItemRequireApprovalDefaultOne,
|
|
31081
31385
|
createRealtimeClientSecretResponseSessionTracingDefaultOne: () => createRealtimeClientSecretResponseSessionTracingDefaultOne,
|
|
@@ -31432,6 +31736,7 @@ var createRealtimeClientSecretBodySessionTracingDefaultOne = "auto";
|
|
|
31432
31736
|
var createRealtimeClientSecretBodySessionTracingDefault = null;
|
|
31433
31737
|
var createRealtimeClientSecretBodySessionToolsItemRequireApprovalDefaultOne = "always";
|
|
31434
31738
|
var createRealtimeClientSecretBodySessionToolChoiceDefault = "auto";
|
|
31739
|
+
var createRealtimeClientSecretBodySessionReasoningEffortDefault = "low";
|
|
31435
31740
|
var createRealtimeClientSecretBodySessionTruncationRetentionRatioMin = 0;
|
|
31436
31741
|
var createRealtimeClientSecretBodySessionTruncationRetentionRatioMax = 1;
|
|
31437
31742
|
var createRealtimeClientSecretBodySessionTruncationTokenLimitsPostInstructionsMin = 0;
|
|
@@ -31467,6 +31772,7 @@ var createRealtimeClientSecretBody = zod6.object({
|
|
|
31467
31772
|
zod6.enum([
|
|
31468
31773
|
"gpt-realtime",
|
|
31469
31774
|
"gpt-realtime-1.5",
|
|
31775
|
+
"gpt-realtime-2",
|
|
31470
31776
|
"gpt-realtime-2025-08-28",
|
|
31471
31777
|
"gpt-4o-realtime-preview",
|
|
31472
31778
|
"gpt-4o-realtime-preview-2024-10-01",
|
|
@@ -31507,16 +31813,20 @@ var createRealtimeClientSecretBody = zod6.object({
|
|
|
31507
31813
|
"gpt-4o-mini-transcribe",
|
|
31508
31814
|
"gpt-4o-mini-transcribe-2025-12-15",
|
|
31509
31815
|
"gpt-4o-transcribe",
|
|
31510
|
-
"gpt-4o-transcribe-diarize"
|
|
31816
|
+
"gpt-4o-transcribe-diarize",
|
|
31817
|
+
"gpt-realtime-whisper"
|
|
31511
31818
|
])
|
|
31512
31819
|
).optional().describe(
|
|
31513
|
-
"The model to use for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`,
|
|
31820
|
+
"The model to use for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`, `gpt-4o-transcribe-diarize`, and `gpt-realtime-whisper`. Use `gpt-4o-transcribe-diarize` when you need diarization with speaker labels.\n"
|
|
31514
31821
|
),
|
|
31515
31822
|
language: zod6.string().optional().describe(
|
|
31516
31823
|
"The language of the input audio. Supplying the input language in\n[ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`) format\nwill improve accuracy and latency.\n"
|
|
31517
31824
|
),
|
|
31518
31825
|
prompt: zod6.string().optional().describe(
|
|
31519
|
-
'An optional text to guide the model\'s style or continue a previous audio\nsegment.\nFor `whisper-1`, the [prompt is a list of keywords](/docs/guides/speech-to-text#prompting).\nFor `gpt-4o-transcribe` models (excluding `gpt-4o-transcribe-diarize`), the prompt is a free text string, for example "expect words related to technology".\n'
|
|
31826
|
+
'An optional text to guide the model\'s style or continue a previous audio\nsegment.\nFor `whisper-1`, the [prompt is a list of keywords](/docs/guides/speech-to-text#prompting).\nFor `gpt-4o-transcribe` models (excluding `gpt-4o-transcribe-diarize`), the prompt is a free text string, for example "expect words related to technology".\nPrompt is not supported with `gpt-realtime-whisper` in GA Realtime sessions.\n'
|
|
31827
|
+
),
|
|
31828
|
+
delay: zod6.enum(["minimal", "low", "medium", "high", "xhigh"]).optional().describe(
|
|
31829
|
+
"Controls how long the model waits before emitting transcription text.\nHigher values can improve transcription accuracy at the cost of latency.\nOnly supported with `gpt-realtime-whisper` in GA Realtime sessions.\n"
|
|
31520
31830
|
)
|
|
31521
31831
|
}).optional(),
|
|
31522
31832
|
noise_reduction: zod6.object({
|
|
@@ -31583,7 +31893,7 @@ var createRealtimeClientSecretBody = zod6.object({
|
|
|
31583
31893
|
"Server-side semantic turn detection which uses a model to determine when the user has finished speaking."
|
|
31584
31894
|
)
|
|
31585
31895
|
]).describe(
|
|
31586
|
-
'Configuration for turn detection, ether Server VAD or Semantic VAD. This can be set to `null` to turn off, in which case the client must manually trigger model response.\n\nServer VAD means that the model will detect the start and end of speech based on audio volume and respond at the end of user speech.\n\nSemantic VAD is more advanced and uses a turn detection model (in conjunction with VAD) to semantically estimate whether the user has finished speaking, then dynamically sets a timeout based on this probability. For example, if user audio trails off with "uhhm", the model will score a low probability of turn end and wait longer for the user to continue speaking. This can be useful for more natural conversations, but may have a higher latency.\n'
|
|
31896
|
+
'Configuration for turn detection, ether Server VAD or Semantic VAD. This can be set to `null` to turn off, in which case the client must manually trigger model response.\n\nServer VAD means that the model will detect the start and end of speech based on audio volume and respond at the end of user speech.\n\nSemantic VAD is more advanced and uses a turn detection model (in conjunction with VAD) to semantically estimate whether the user has finished speaking, then dynamically sets a timeout based on this probability. For example, if user audio trails off with "uhhm", the model will score a low probability of turn end and wait longer for the user to continue speaking. This can be useful for more natural conversations, but may have a higher latency.\n\nFor `gpt-realtime-whisper` transcription sessions, turn detection must be\nset to `null`; VAD is not supported.\n'
|
|
31587
31897
|
).or(zod6.null()).optional()
|
|
31588
31898
|
}).optional(),
|
|
31589
31899
|
output: zod6.object({
|
|
@@ -31656,7 +31966,7 @@ var createRealtimeClientSecretBody = zod6.object({
|
|
|
31656
31966
|
server_label: zod6.string().describe(
|
|
31657
31967
|
"A label for this MCP server, used to identify it in tool calls.\n"
|
|
31658
31968
|
),
|
|
31659
|
-
server_url: zod6.string().optional().describe(
|
|
31969
|
+
server_url: zod6.string().url().optional().describe(
|
|
31660
31970
|
"The URL for the MCP server. One of `server_url` or `connector_id` must be\nprovided.\n"
|
|
31661
31971
|
),
|
|
31662
31972
|
connector_id: zod6.enum([
|
|
@@ -31734,6 +32044,16 @@ var createRealtimeClientSecretBody = zod6.object({
|
|
|
31734
32044
|
).default(createRealtimeClientSecretBodySessionToolChoiceDefault).describe(
|
|
31735
32045
|
"How the model chooses tools. Provide one of the string modes or force a specific\nfunction/MCP tool.\n"
|
|
31736
32046
|
),
|
|
32047
|
+
parallel_tool_calls: zod6.boolean().optional().describe(
|
|
32048
|
+
"Whether the model may call multiple tools in parallel. Only supported by\nreasoning Realtime models such as `gpt-realtime-2`.\n"
|
|
32049
|
+
),
|
|
32050
|
+
reasoning: zod6.object({
|
|
32051
|
+
effort: zod6.enum(["minimal", "low", "medium", "high", "xhigh"]).default(createRealtimeClientSecretBodySessionReasoningEffortDefault).describe(
|
|
32052
|
+
"Constrains effort on reasoning for reasoning-capable Realtime models such as\n`gpt-realtime-2`.\n"
|
|
32053
|
+
)
|
|
32054
|
+
}).optional().describe(
|
|
32055
|
+
"Configuration for reasoning-capable Realtime models such as `gpt-realtime-2`.\n"
|
|
32056
|
+
),
|
|
31737
32057
|
max_output_tokens: zod6.number().or(zod6.enum(["inf"])).optional().describe(
|
|
31738
32058
|
"Maximum number of output tokens for a single assistant response,\ninclusive of tool calls. Provide an integer between 1 and 4096 to\nlimit output tokens, or `inf` for the maximum available tokens for a\ngiven model. Defaults to `inf`.\n"
|
|
31739
32059
|
),
|
|
@@ -31773,7 +32093,7 @@ var createRealtimeClientSecretBody = zod6.object({
|
|
|
31773
32093
|
).or(
|
|
31774
32094
|
zod6.object({
|
|
31775
32095
|
type: zod6.enum(["input_image"]).describe("The type of the input item. Always `input_image`."),
|
|
31776
|
-
image_url: zod6.string().describe(
|
|
32096
|
+
image_url: zod6.string().url().describe(
|
|
31777
32097
|
"The URL of the image to be sent to the model. A fully qualified URL or base64 encoded image in a data URL."
|
|
31778
32098
|
).or(zod6.null()).optional(),
|
|
31779
32099
|
file_id: zod6.string().describe("The ID of the file to be sent to the model.").or(zod6.null()).optional(),
|
|
@@ -31787,7 +32107,7 @@ var createRealtimeClientSecretBody = zod6.object({
|
|
|
31787
32107
|
file_id: zod6.string().describe("The ID of the file to be sent to the model.").or(zod6.null()).optional(),
|
|
31788
32108
|
filename: zod6.string().optional().describe("The name of the file to be sent to the model."),
|
|
31789
32109
|
file_data: zod6.string().optional().describe("The content of the file to be sent to the model.\n"),
|
|
31790
|
-
file_url: zod6.string().optional().describe("The URL of the file to be sent to the model."),
|
|
32110
|
+
file_url: zod6.string().url().optional().describe("The URL of the file to be sent to the model."),
|
|
31791
32111
|
detail: zod6.enum(["low", "high"]).optional()
|
|
31792
32112
|
}).describe("A file input to the model.")
|
|
31793
32113
|
)
|
|
@@ -31823,16 +32143,20 @@ var createRealtimeClientSecretBody = zod6.object({
|
|
|
31823
32143
|
"gpt-4o-mini-transcribe",
|
|
31824
32144
|
"gpt-4o-mini-transcribe-2025-12-15",
|
|
31825
32145
|
"gpt-4o-transcribe",
|
|
31826
|
-
"gpt-4o-transcribe-diarize"
|
|
32146
|
+
"gpt-4o-transcribe-diarize",
|
|
32147
|
+
"gpt-realtime-whisper"
|
|
31827
32148
|
])
|
|
31828
32149
|
).optional().describe(
|
|
31829
|
-
"The model to use for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`,
|
|
32150
|
+
"The model to use for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`, `gpt-4o-transcribe-diarize`, and `gpt-realtime-whisper`. Use `gpt-4o-transcribe-diarize` when you need diarization with speaker labels.\n"
|
|
31830
32151
|
),
|
|
31831
32152
|
language: zod6.string().optional().describe(
|
|
31832
32153
|
"The language of the input audio. Supplying the input language in\n[ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`) format\nwill improve accuracy and latency.\n"
|
|
31833
32154
|
),
|
|
31834
32155
|
prompt: zod6.string().optional().describe(
|
|
31835
|
-
'An optional text to guide the model\'s style or continue a previous audio\nsegment.\nFor `whisper-1`, the [prompt is a list of keywords](/docs/guides/speech-to-text#prompting).\nFor `gpt-4o-transcribe` models (excluding `gpt-4o-transcribe-diarize`), the prompt is a free text string, for example "expect words related to technology".\n'
|
|
32156
|
+
'An optional text to guide the model\'s style or continue a previous audio\nsegment.\nFor `whisper-1`, the [prompt is a list of keywords](/docs/guides/speech-to-text#prompting).\nFor `gpt-4o-transcribe` models (excluding `gpt-4o-transcribe-diarize`), the prompt is a free text string, for example "expect words related to technology".\nPrompt is not supported with `gpt-realtime-whisper` in GA Realtime sessions.\n'
|
|
32157
|
+
),
|
|
32158
|
+
delay: zod6.enum(["minimal", "low", "medium", "high", "xhigh"]).optional().describe(
|
|
32159
|
+
"Controls how long the model waits before emitting transcription text.\nHigher values can improve transcription accuracy at the cost of latency.\nOnly supported with `gpt-realtime-whisper` in GA Realtime sessions.\n"
|
|
31836
32160
|
)
|
|
31837
32161
|
}).optional(),
|
|
31838
32162
|
noise_reduction: zod6.object({
|
|
@@ -31899,7 +32223,7 @@ var createRealtimeClientSecretBody = zod6.object({
|
|
|
31899
32223
|
"Server-side semantic turn detection which uses a model to determine when the user has finished speaking."
|
|
31900
32224
|
)
|
|
31901
32225
|
]).describe(
|
|
31902
|
-
'Configuration for turn detection, ether Server VAD or Semantic VAD. This can be set to `null` to turn off, in which case the client must manually trigger model response.\n\nServer VAD means that the model will detect the start and end of speech based on audio volume and respond at the end of user speech.\n\nSemantic VAD is more advanced and uses a turn detection model (in conjunction with VAD) to semantically estimate whether the user has finished speaking, then dynamically sets a timeout based on this probability. For example, if user audio trails off with "uhhm", the model will score a low probability of turn end and wait longer for the user to continue speaking. This can be useful for more natural conversations, but may have a higher latency.\n'
|
|
32226
|
+
'Configuration for turn detection, ether Server VAD or Semantic VAD. This can be set to `null` to turn off, in which case the client must manually trigger model response.\n\nServer VAD means that the model will detect the start and end of speech based on audio volume and respond at the end of user speech.\n\nSemantic VAD is more advanced and uses a turn detection model (in conjunction with VAD) to semantically estimate whether the user has finished speaking, then dynamically sets a timeout based on this probability. For example, if user audio trails off with "uhhm", the model will score a low probability of turn end and wait longer for the user to continue speaking. This can be useful for more natural conversations, but may have a higher latency.\n\nFor `gpt-realtime-whisper` transcription sessions, turn detection must be\nset to `null`; VAD is not supported.\n'
|
|
31903
32227
|
).or(zod6.null()).optional()
|
|
31904
32228
|
}).optional()
|
|
31905
32229
|
}).optional().describe("Configuration for input and output audio.\n"),
|
|
@@ -31930,6 +32254,7 @@ var createRealtimeClientSecretResponseSessionTracingDefaultTwo = "auto";
|
|
|
31930
32254
|
var createRealtimeClientSecretResponseSessionTracingDefaultOne = null;
|
|
31931
32255
|
var createRealtimeClientSecretResponseSessionToolsItemRequireApprovalDefaultOne = "always";
|
|
31932
32256
|
var createRealtimeClientSecretResponseSessionToolChoiceDefault = "auto";
|
|
32257
|
+
var createRealtimeClientSecretResponseSessionReasoningEffortDefault = "low";
|
|
31933
32258
|
var createRealtimeClientSecretResponseSessionTruncationRetentionRatioMin = 0;
|
|
31934
32259
|
var createRealtimeClientSecretResponseSessionTruncationRetentionRatioMax = 1;
|
|
31935
32260
|
var createRealtimeClientSecretResponseSessionTruncationTokenLimitsPostInstructionsMin = 0;
|
|
@@ -31939,17 +32264,14 @@ var createRealtimeClientSecretResponseSessionPromptVariablesTypeDefaultTwo = "in
|
|
|
31939
32264
|
var createRealtimeClientSecretResponse = zod6.object({
|
|
31940
32265
|
value: zod6.string().describe("The generated client secret value."),
|
|
31941
32266
|
expires_at: zod6.number().describe("Expiration timestamp for the client secret, in seconds since epoch."),
|
|
31942
|
-
session: zod6.
|
|
32267
|
+
session: zod6.union([
|
|
31943
32268
|
zod6.object({
|
|
31944
|
-
client_secret: zod6.object({
|
|
31945
|
-
value: zod6.string().describe(
|
|
31946
|
-
"Ephemeral key usable in client environments to authenticate connections to the Realtime API. Use this in client-side environments rather than a standard API token, which should only be used server-side.\n"
|
|
31947
|
-
),
|
|
31948
|
-
expires_at: zod6.number().describe(
|
|
31949
|
-
"Timestamp for when the token expires. Currently, all tokens expire\nafter one minute.\n"
|
|
31950
|
-
)
|
|
31951
|
-
}).describe("Ephemeral key returned by the API."),
|
|
31952
32269
|
type: zod6.enum(["realtime"]).describe("The type of session to create. Always `realtime` for the Realtime API.\n"),
|
|
32270
|
+
id: zod6.string().describe(
|
|
32271
|
+
"Unique identifier for the session that looks like `sess_1234567890abcdef`.\n"
|
|
32272
|
+
),
|
|
32273
|
+
object: zod6.enum(["realtime.session"]).describe("The object type. Always `realtime.session`."),
|
|
32274
|
+
expires_at: zod6.number().optional().describe("Expiration timestamp for the session, in seconds since epoch."),
|
|
31953
32275
|
output_modalities: zod6.array(zod6.enum(["text", "audio"])).default(createRealtimeClientSecretResponseSessionOutputModalitiesDefault).describe(
|
|
31954
32276
|
'The set of modalities the model can respond with. It defaults to `["audio"]`, indicating\nthat the model will respond with audio plus a transcript. `["text"]` can be used to make\nthe model respond with text only. It is not possible to request both `text` and `audio` at the same time.\n'
|
|
31955
32277
|
),
|
|
@@ -31957,6 +32279,7 @@ var createRealtimeClientSecretResponse = zod6.object({
|
|
|
31957
32279
|
zod6.enum([
|
|
31958
32280
|
"gpt-realtime",
|
|
31959
32281
|
"gpt-realtime-1.5",
|
|
32282
|
+
"gpt-realtime-2",
|
|
31960
32283
|
"gpt-realtime-2025-08-28",
|
|
31961
32284
|
"gpt-4o-realtime-preview",
|
|
31962
32285
|
"gpt-4o-realtime-preview-2024-10-01",
|
|
@@ -31979,15 +32302,15 @@ var createRealtimeClientSecretResponse = zod6.object({
|
|
|
31979
32302
|
audio: zod6.object({
|
|
31980
32303
|
input: zod6.object({
|
|
31981
32304
|
format: zod6.object({
|
|
31982
|
-
type: zod6.enum(["audio/pcm"]).describe("The audio format. Always `audio/pcm`."),
|
|
31983
|
-
rate: zod6.literal(24e3).describe("The sample rate of the audio. Always `24000`.")
|
|
32305
|
+
type: zod6.enum(["audio/pcm"]).optional().describe("The audio format. Always `audio/pcm`."),
|
|
32306
|
+
rate: zod6.literal(24e3).optional().describe("The sample rate of the audio. Always `24000`.")
|
|
31984
32307
|
}).describe("The PCM audio format. Only a 24kHz sample rate is supported.").or(
|
|
31985
32308
|
zod6.object({
|
|
31986
|
-
type: zod6.enum(["audio/pcmu"]).describe("The audio format. Always `audio/pcmu`.")
|
|
32309
|
+
type: zod6.enum(["audio/pcmu"]).optional().describe("The audio format. Always `audio/pcmu`.")
|
|
31987
32310
|
}).describe("The G.711 \u03BC-law format.")
|
|
31988
32311
|
).or(
|
|
31989
32312
|
zod6.object({
|
|
31990
|
-
type: zod6.enum(["audio/pcma"]).describe("The audio format. Always `audio/pcma`.")
|
|
32313
|
+
type: zod6.enum(["audio/pcma"]).optional().describe("The audio format. Always `audio/pcma`.")
|
|
31991
32314
|
}).describe("The G.711 A-law format.")
|
|
31992
32315
|
).optional(),
|
|
31993
32316
|
transcription: zod6.object({
|
|
@@ -31997,20 +32320,19 @@ var createRealtimeClientSecretResponse = zod6.object({
|
|
|
31997
32320
|
"gpt-4o-mini-transcribe",
|
|
31998
32321
|
"gpt-4o-mini-transcribe-2025-12-15",
|
|
31999
32322
|
"gpt-4o-transcribe",
|
|
32000
|
-
"gpt-4o-transcribe-diarize"
|
|
32323
|
+
"gpt-4o-transcribe-diarize",
|
|
32324
|
+
"gpt-realtime-whisper"
|
|
32001
32325
|
])
|
|
32002
32326
|
).optional().describe(
|
|
32003
|
-
"The model
|
|
32004
|
-
),
|
|
32005
|
-
language: zod6.string().optional().describe(
|
|
32006
|
-
"The language of the input audio. Supplying the input language in\n[ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`) format\nwill improve accuracy and latency.\n"
|
|
32327
|
+
"The model used for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`, `gpt-4o-transcribe-diarize`, and `gpt-realtime-whisper`.\n"
|
|
32007
32328
|
),
|
|
32329
|
+
language: zod6.string().optional().describe("The language of the input audio.\n"),
|
|
32008
32330
|
prompt: zod6.string().optional().describe(
|
|
32009
|
-
|
|
32331
|
+
"The prompt configured for input audio transcription, when present.\n"
|
|
32010
32332
|
)
|
|
32011
32333
|
}).optional(),
|
|
32012
32334
|
noise_reduction: zod6.object({
|
|
32013
|
-
type: zod6.enum(["near_field", "far_field"]).describe(
|
|
32335
|
+
type: zod6.enum(["near_field", "far_field"]).optional().describe(
|
|
32014
32336
|
"Type of noise reduction. `near_field` is for close-talking microphones such as headphones, `far_field` is for far-field microphones such as laptop or conference room microphones.\n"
|
|
32015
32337
|
)
|
|
32016
32338
|
}).optional().describe(
|
|
@@ -32073,20 +32395,20 @@ var createRealtimeClientSecretResponse = zod6.object({
|
|
|
32073
32395
|
"Server-side semantic turn detection which uses a model to determine when the user has finished speaking."
|
|
32074
32396
|
)
|
|
32075
32397
|
]).describe(
|
|
32076
|
-
'Configuration for turn detection, ether Server VAD or Semantic VAD. This can be set to `null` to turn off, in which case the client must manually trigger model response.\n\nServer VAD means that the model will detect the start and end of speech based on audio volume and respond at the end of user speech.\n\nSemantic VAD is more advanced and uses a turn detection model (in conjunction with VAD) to semantically estimate whether the user has finished speaking, then dynamically sets a timeout based on this probability. For example, if user audio trails off with "uhhm", the model will score a low probability of turn end and wait longer for the user to continue speaking. This can be useful for more natural conversations, but may have a higher latency.\n'
|
|
32398
|
+
'Configuration for turn detection, ether Server VAD or Semantic VAD. This can be set to `null` to turn off, in which case the client must manually trigger model response.\n\nServer VAD means that the model will detect the start and end of speech based on audio volume and respond at the end of user speech.\n\nSemantic VAD is more advanced and uses a turn detection model (in conjunction with VAD) to semantically estimate whether the user has finished speaking, then dynamically sets a timeout based on this probability. For example, if user audio trails off with "uhhm", the model will score a low probability of turn end and wait longer for the user to continue speaking. This can be useful for more natural conversations, but may have a higher latency.\n\nFor `gpt-realtime-whisper` transcription sessions, turn detection must be\nset to `null`; VAD is not supported.\n'
|
|
32077
32399
|
).or(zod6.null()).optional()
|
|
32078
32400
|
}).optional(),
|
|
32079
32401
|
output: zod6.object({
|
|
32080
32402
|
format: zod6.object({
|
|
32081
|
-
type: zod6.enum(["audio/pcm"]).describe("The audio format. Always `audio/pcm`."),
|
|
32082
|
-
rate: zod6.literal(24e3).describe("The sample rate of the audio. Always `24000`.")
|
|
32403
|
+
type: zod6.enum(["audio/pcm"]).optional().describe("The audio format. Always `audio/pcm`."),
|
|
32404
|
+
rate: zod6.literal(24e3).optional().describe("The sample rate of the audio. Always `24000`.")
|
|
32083
32405
|
}).describe("The PCM audio format. Only a 24kHz sample rate is supported.").or(
|
|
32084
32406
|
zod6.object({
|
|
32085
|
-
type: zod6.enum(["audio/pcmu"]).describe("The audio format. Always `audio/pcmu`.")
|
|
32407
|
+
type: zod6.enum(["audio/pcmu"]).optional().describe("The audio format. Always `audio/pcmu`.")
|
|
32086
32408
|
}).describe("The G.711 \u03BC-law format.")
|
|
32087
32409
|
).or(
|
|
32088
32410
|
zod6.object({
|
|
32089
|
-
type: zod6.enum(["audio/pcma"]).describe("The audio format. Always `audio/pcma`.")
|
|
32411
|
+
type: zod6.enum(["audio/pcma"]).optional().describe("The audio format. Always `audio/pcma`.")
|
|
32090
32412
|
}).describe("The G.711 A-law format.")
|
|
32091
32413
|
).optional(),
|
|
32092
32414
|
voice: zod6.string().or(
|
|
@@ -32130,7 +32452,7 @@ var createRealtimeClientSecretResponse = zod6.object({
|
|
|
32130
32452
|
).or(zod6.null()).optional(),
|
|
32131
32453
|
tools: zod6.array(
|
|
32132
32454
|
zod6.object({
|
|
32133
|
-
type: zod6.enum(["function"]).describe("The type of the tool, i.e. `function`."),
|
|
32455
|
+
type: zod6.enum(["function"]).optional().describe("The type of the tool, i.e. `function`."),
|
|
32134
32456
|
name: zod6.string().optional().describe("The name of the function."),
|
|
32135
32457
|
description: zod6.string().optional().describe(
|
|
32136
32458
|
"The description of the function, including guidance on when and how\nto call it, and guidance about what to tell the user when calling\n(if anything).\n"
|
|
@@ -32142,7 +32464,7 @@ var createRealtimeClientSecretResponse = zod6.object({
|
|
|
32142
32464
|
server_label: zod6.string().describe(
|
|
32143
32465
|
"A label for this MCP server, used to identify it in tool calls.\n"
|
|
32144
32466
|
),
|
|
32145
|
-
server_url: zod6.string().optional().describe(
|
|
32467
|
+
server_url: zod6.string().url().optional().describe(
|
|
32146
32468
|
"The URL for the MCP server. One of `server_url` or `connector_id` must be\nprovided.\n"
|
|
32147
32469
|
),
|
|
32148
32470
|
connector_id: zod6.enum([
|
|
@@ -32154,7 +32476,7 @@ var createRealtimeClientSecretResponse = zod6.object({
|
|
|
32154
32476
|
"connector_outlookcalendar",
|
|
32155
32477
|
"connector_outlookemail",
|
|
32156
32478
|
"connector_sharepoint"
|
|
32157
|
-
]).describe(
|
|
32479
|
+
]).optional().describe(
|
|
32158
32480
|
"Identifier for service connectors, like those available in ChatGPT. One of\n`server_url` or `connector_id` must be provided. Learn more about service\nconnectors [here](/docs/guides/tools-remote-mcp#connectors).\n\nCurrently supported `connector_id` values are:\n\n- Dropbox: `connector_dropbox`\n- Gmail: `connector_gmail`\n- Google Calendar: `connector_googlecalendar`\n- Google Drive: `connector_googledrive`\n- Microsoft Teams: `connector_microsoftteams`\n- Outlook Calendar: `connector_outlookcalendar`\n- Outlook Email: `connector_outlookemail`\n- SharePoint: `connector_sharepoint`\n"
|
|
32159
32481
|
),
|
|
32160
32482
|
authorization: zod6.string().optional().describe(
|
|
@@ -32220,6 +32542,13 @@ var createRealtimeClientSecretResponse = zod6.object({
|
|
|
32220
32542
|
).default(createRealtimeClientSecretResponseSessionToolChoiceDefault).describe(
|
|
32221
32543
|
"How the model chooses tools. Provide one of the string modes or force a specific\nfunction/MCP tool.\n"
|
|
32222
32544
|
),
|
|
32545
|
+
reasoning: zod6.object({
|
|
32546
|
+
effort: zod6.enum(["minimal", "low", "medium", "high", "xhigh"]).default(createRealtimeClientSecretResponseSessionReasoningEffortDefault).describe(
|
|
32547
|
+
"Constrains effort on reasoning for reasoning-capable Realtime models such as\n`gpt-realtime-2`.\n"
|
|
32548
|
+
)
|
|
32549
|
+
}).optional().describe(
|
|
32550
|
+
"Configuration for reasoning-capable Realtime models such as `gpt-realtime-2`.\n"
|
|
32551
|
+
),
|
|
32223
32552
|
max_output_tokens: zod6.number().or(zod6.enum(["inf"])).optional().describe(
|
|
32224
32553
|
"Maximum number of output tokens for a single assistant response,\ninclusive of tool calls. Provide an integer between 1 and 4096 to\nlimit output tokens, or `inf` for the maximum available tokens for a\ngiven model. Defaults to `inf`.\n"
|
|
32225
32554
|
),
|
|
@@ -32259,7 +32588,7 @@ var createRealtimeClientSecretResponse = zod6.object({
|
|
|
32259
32588
|
).or(
|
|
32260
32589
|
zod6.object({
|
|
32261
32590
|
type: zod6.enum(["input_image"]).describe("The type of the input item. Always `input_image`."),
|
|
32262
|
-
image_url: zod6.string().describe(
|
|
32591
|
+
image_url: zod6.string().url().describe(
|
|
32263
32592
|
"The URL of the image to be sent to the model. A fully qualified URL or base64 encoded image in a data URL."
|
|
32264
32593
|
).or(zod6.null()).optional(),
|
|
32265
32594
|
file_id: zod6.string().describe("The ID of the file to be sent to the model.").or(zod6.null()).optional(),
|
|
@@ -32273,8 +32602,8 @@ var createRealtimeClientSecretResponse = zod6.object({
|
|
|
32273
32602
|
file_id: zod6.string().describe("The ID of the file to be sent to the model.").or(zod6.null()).optional(),
|
|
32274
32603
|
filename: zod6.string().optional().describe("The name of the file to be sent to the model."),
|
|
32275
32604
|
file_data: zod6.string().optional().describe("The content of the file to be sent to the model.\n"),
|
|
32276
|
-
file_url: zod6.string().optional().describe("The URL of the file to be sent to the model."),
|
|
32277
|
-
detail: zod6.enum(["low", "high"])
|
|
32605
|
+
file_url: zod6.string().url().optional().describe("The URL of the file to be sent to the model."),
|
|
32606
|
+
detail: zod6.enum(["low", "high"]).optional()
|
|
32278
32607
|
}).describe("A file input to the model.")
|
|
32279
32608
|
)
|
|
32280
32609
|
).describe(
|
|
@@ -32283,9 +32612,7 @@ var createRealtimeClientSecretResponse = zod6.object({
|
|
|
32283
32612
|
}).describe(
|
|
32284
32613
|
"Reference to a prompt template and its variables.\n[Learn more](/docs/guides/text?api-mode=responses#reusable-prompts).\n"
|
|
32285
32614
|
).or(zod6.null()).optional()
|
|
32286
|
-
}).describe(
|
|
32287
|
-
"A new Realtime session configuration, with an ephemeral key. Default TTL\nfor keys is one minute.\n"
|
|
32288
|
-
),
|
|
32615
|
+
}).describe("A Realtime session configuration object.\n"),
|
|
32289
32616
|
zod6.object({
|
|
32290
32617
|
type: zod6.enum(["transcription"]).describe(
|
|
32291
32618
|
"The type of session. Always `transcription` for transcription sessions.\n"
|
|
@@ -32301,15 +32628,15 @@ var createRealtimeClientSecretResponse = zod6.object({
|
|
|
32301
32628
|
audio: zod6.object({
|
|
32302
32629
|
input: zod6.object({
|
|
32303
32630
|
format: zod6.object({
|
|
32304
|
-
type: zod6.enum(["audio/pcm"]).describe("The audio format. Always `audio/pcm`."),
|
|
32305
|
-
rate: zod6.literal(24e3).describe("The sample rate of the audio. Always `24000`.")
|
|
32631
|
+
type: zod6.enum(["audio/pcm"]).optional().describe("The audio format. Always `audio/pcm`."),
|
|
32632
|
+
rate: zod6.literal(24e3).optional().describe("The sample rate of the audio. Always `24000`.")
|
|
32306
32633
|
}).describe("The PCM audio format. Only a 24kHz sample rate is supported.").or(
|
|
32307
32634
|
zod6.object({
|
|
32308
|
-
type: zod6.enum(["audio/pcmu"]).describe("The audio format. Always `audio/pcmu`.")
|
|
32635
|
+
type: zod6.enum(["audio/pcmu"]).optional().describe("The audio format. Always `audio/pcmu`.")
|
|
32309
32636
|
}).describe("The G.711 \u03BC-law format.")
|
|
32310
32637
|
).or(
|
|
32311
32638
|
zod6.object({
|
|
32312
|
-
type: zod6.enum(["audio/pcma"]).describe("The audio format. Always `audio/pcma`.")
|
|
32639
|
+
type: zod6.enum(["audio/pcma"]).optional().describe("The audio format. Always `audio/pcma`.")
|
|
32313
32640
|
}).describe("The G.711 A-law format.")
|
|
32314
32641
|
).optional(),
|
|
32315
32642
|
transcription: zod6.object({
|
|
@@ -32319,20 +32646,19 @@ var createRealtimeClientSecretResponse = zod6.object({
|
|
|
32319
32646
|
"gpt-4o-mini-transcribe",
|
|
32320
32647
|
"gpt-4o-mini-transcribe-2025-12-15",
|
|
32321
32648
|
"gpt-4o-transcribe",
|
|
32322
|
-
"gpt-4o-transcribe-diarize"
|
|
32649
|
+
"gpt-4o-transcribe-diarize",
|
|
32650
|
+
"gpt-realtime-whisper"
|
|
32323
32651
|
])
|
|
32324
32652
|
).optional().describe(
|
|
32325
|
-
"The model
|
|
32326
|
-
),
|
|
32327
|
-
language: zod6.string().optional().describe(
|
|
32328
|
-
"The language of the input audio. Supplying the input language in\n[ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`) format\nwill improve accuracy and latency.\n"
|
|
32653
|
+
"The model used for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`, `gpt-4o-transcribe-diarize`, and `gpt-realtime-whisper`.\n"
|
|
32329
32654
|
),
|
|
32655
|
+
language: zod6.string().optional().describe("The language of the input audio.\n"),
|
|
32330
32656
|
prompt: zod6.string().optional().describe(
|
|
32331
|
-
|
|
32657
|
+
"The prompt configured for input audio transcription, when present.\n"
|
|
32332
32658
|
)
|
|
32333
32659
|
}).optional(),
|
|
32334
32660
|
noise_reduction: zod6.object({
|
|
32335
|
-
type: zod6.enum(["near_field", "far_field"]).describe(
|
|
32661
|
+
type: zod6.enum(["near_field", "far_field"]).optional().describe(
|
|
32336
32662
|
"Type of noise reduction. `near_field` is for close-talking microphones such as headphones, `far_field` is for far-field microphones such as laptop or conference room microphones.\n"
|
|
32337
32663
|
)
|
|
32338
32664
|
}).optional().describe("Configuration for input audio noise reduction.\n"),
|
|
@@ -32349,8 +32675,10 @@ var createRealtimeClientSecretResponse = zod6.object({
|
|
|
32349
32675
|
silence_duration_ms: zod6.number().optional().describe(
|
|
32350
32676
|
"Duration of silence to detect speech stop (in milliseconds). Defaults\nto 500ms. With shorter values the model will respond more quickly,\nbut may jump in on short pauses from the user.\n"
|
|
32351
32677
|
)
|
|
32352
|
-
}).
|
|
32353
|
-
"Configuration for turn detection. Can be set to `null` to turn off. Server\nVAD means that the model will detect the start and end of speech based on\naudio volume and respond at the end of user speech.\n"
|
|
32678
|
+
}).describe(
|
|
32679
|
+
"Configuration for turn detection. Can be set to `null` to turn off. Server\nVAD means that the model will detect the start and end of speech based on\naudio volume and respond at the end of user speech. For `gpt-realtime-whisper`, this must be `null`; VAD is not supported.\n"
|
|
32680
|
+
).or(zod6.null()).optional().describe(
|
|
32681
|
+
"Configuration for turn detection. For `gpt-realtime-whisper`, this must be `null`; VAD is not supported.\n"
|
|
32354
32682
|
)
|
|
32355
32683
|
}).optional()
|
|
32356
32684
|
}).optional().describe("Configuration for input audio for the session.\n")
|
|
@@ -32490,7 +32818,7 @@ var createRealtimeSessionBody = zod6.object({
|
|
|
32490
32818
|
).or(
|
|
32491
32819
|
zod6.object({
|
|
32492
32820
|
type: zod6.enum(["input_image"]).describe("The type of the input item. Always `input_image`."),
|
|
32493
|
-
image_url: zod6.string().describe(
|
|
32821
|
+
image_url: zod6.string().url().describe(
|
|
32494
32822
|
"The URL of the image to be sent to the model. A fully qualified URL or base64 encoded image in a data URL."
|
|
32495
32823
|
).or(zod6.null()).optional(),
|
|
32496
32824
|
file_id: zod6.string().describe("The ID of the file to be sent to the model.").or(zod6.null()).optional(),
|
|
@@ -32504,7 +32832,7 @@ var createRealtimeSessionBody = zod6.object({
|
|
|
32504
32832
|
file_id: zod6.string().describe("The ID of the file to be sent to the model.").or(zod6.null()).optional(),
|
|
32505
32833
|
filename: zod6.string().optional().describe("The name of the file to be sent to the model."),
|
|
32506
32834
|
file_data: zod6.string().optional().describe("The content of the file to be sent to the model.\n"),
|
|
32507
|
-
file_url: zod6.string().optional().describe("The URL of the file to be sent to the model."),
|
|
32835
|
+
file_url: zod6.string().url().optional().describe("The URL of the file to be sent to the model."),
|
|
32508
32836
|
detail: zod6.enum(["low", "high"]).optional()
|
|
32509
32837
|
}).describe("A file input to the model.")
|
|
32510
32838
|
)
|
|
@@ -32553,17 +32881,14 @@ var createRealtimeSessionResponse = zod6.object({
|
|
|
32553
32881
|
"gpt-4o-mini-transcribe",
|
|
32554
32882
|
"gpt-4o-mini-transcribe-2025-12-15",
|
|
32555
32883
|
"gpt-4o-transcribe",
|
|
32556
|
-
"gpt-4o-transcribe-diarize"
|
|
32884
|
+
"gpt-4o-transcribe-diarize",
|
|
32885
|
+
"gpt-realtime-whisper"
|
|
32557
32886
|
])
|
|
32558
32887
|
).optional().describe(
|
|
32559
|
-
"The model
|
|
32560
|
-
),
|
|
32561
|
-
language: zod6.string().optional().describe(
|
|
32562
|
-
"The language of the input audio. Supplying the input language in\n[ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`) format\nwill improve accuracy and latency.\n"
|
|
32888
|
+
"The model used for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`, `gpt-4o-transcribe-diarize`, and `gpt-realtime-whisper`.\n"
|
|
32563
32889
|
),
|
|
32564
|
-
|
|
32565
|
-
|
|
32566
|
-
)
|
|
32890
|
+
language: zod6.string().optional().describe("The language of the input audio.\n"),
|
|
32891
|
+
prompt: zod6.string().optional().describe("The prompt configured for input audio transcription, when present.\n")
|
|
32567
32892
|
}).optional(),
|
|
32568
32893
|
noise_reduction: zod6.object({
|
|
32569
32894
|
type: zod6.enum(["near_field", "far_field"]).optional().describe(
|
|
@@ -32689,16 +33014,20 @@ var createRealtimeTranscriptionSessionBody = zod6.object({
|
|
|
32689
33014
|
"gpt-4o-mini-transcribe",
|
|
32690
33015
|
"gpt-4o-mini-transcribe-2025-12-15",
|
|
32691
33016
|
"gpt-4o-transcribe",
|
|
32692
|
-
"gpt-4o-transcribe-diarize"
|
|
33017
|
+
"gpt-4o-transcribe-diarize",
|
|
33018
|
+
"gpt-realtime-whisper"
|
|
32693
33019
|
])
|
|
32694
33020
|
).optional().describe(
|
|
32695
|
-
"The model to use for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`,
|
|
33021
|
+
"The model to use for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`, `gpt-4o-transcribe-diarize`, and `gpt-realtime-whisper`. Use `gpt-4o-transcribe-diarize` when you need diarization with speaker labels.\n"
|
|
32696
33022
|
),
|
|
32697
33023
|
language: zod6.string().optional().describe(
|
|
32698
33024
|
"The language of the input audio. Supplying the input language in\n[ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`) format\nwill improve accuracy and latency.\n"
|
|
32699
33025
|
),
|
|
32700
33026
|
prompt: zod6.string().optional().describe(
|
|
32701
|
-
'An optional text to guide the model\'s style or continue a previous audio\nsegment.\nFor `whisper-1`, the [prompt is a list of keywords](/docs/guides/speech-to-text#prompting).\nFor `gpt-4o-transcribe` models (excluding `gpt-4o-transcribe-diarize`), the prompt is a free text string, for example "expect words related to technology".\n'
|
|
33027
|
+
'An optional text to guide the model\'s style or continue a previous audio\nsegment.\nFor `whisper-1`, the [prompt is a list of keywords](/docs/guides/speech-to-text#prompting).\nFor `gpt-4o-transcribe` models (excluding `gpt-4o-transcribe-diarize`), the prompt is a free text string, for example "expect words related to technology".\nPrompt is not supported with `gpt-realtime-whisper` in GA Realtime sessions.\n'
|
|
33028
|
+
),
|
|
33029
|
+
delay: zod6.enum(["minimal", "low", "medium", "high", "xhigh"]).optional().describe(
|
|
33030
|
+
"Controls how long the model waits before emitting transcription text.\nHigher values can improve transcription accuracy at the cost of latency.\nOnly supported with `gpt-realtime-whisper` in GA Realtime sessions.\n"
|
|
32702
33031
|
)
|
|
32703
33032
|
}).optional(),
|
|
32704
33033
|
include: zod6.array(zod6.enum(["item.input_audio_transcription.logprobs"])).optional().describe(
|
|
@@ -32727,17 +33056,14 @@ var createRealtimeTranscriptionSessionResponse = zod6.object({
|
|
|
32727
33056
|
"gpt-4o-mini-transcribe",
|
|
32728
33057
|
"gpt-4o-mini-transcribe-2025-12-15",
|
|
32729
33058
|
"gpt-4o-transcribe",
|
|
32730
|
-
"gpt-4o-transcribe-diarize"
|
|
33059
|
+
"gpt-4o-transcribe-diarize",
|
|
33060
|
+
"gpt-realtime-whisper"
|
|
32731
33061
|
])
|
|
32732
33062
|
).optional().describe(
|
|
32733
|
-
"The model
|
|
32734
|
-
),
|
|
32735
|
-
language: zod6.string().optional().describe(
|
|
32736
|
-
"The language of the input audio. Supplying the input language in\n[ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`) format\nwill improve accuracy and latency.\n"
|
|
33063
|
+
"The model used for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`, `gpt-4o-transcribe-diarize`, and `gpt-realtime-whisper`.\n"
|
|
32737
33064
|
),
|
|
32738
|
-
|
|
32739
|
-
|
|
32740
|
-
)
|
|
33065
|
+
language: zod6.string().optional().describe("The language of the input audio.\n"),
|
|
33066
|
+
prompt: zod6.string().optional().describe("The prompt configured for input audio transcription, when present.\n")
|
|
32741
33067
|
}).optional(),
|
|
32742
33068
|
turn_detection: zod6.object({
|
|
32743
33069
|
type: zod6.string().optional().describe("Type of turn detection, only `server_vad` is currently supported.\n"),
|
|
@@ -36122,6 +36448,7 @@ __export(sonioxPublicAPI_zod_exports, {
|
|
|
36122
36448
|
createTranscriptionBodyWebhookUrlRegExpOne: () => createTranscriptionBodyWebhookUrlRegExpOne,
|
|
36123
36449
|
deleteFileParams: () => deleteFileParams,
|
|
36124
36450
|
deleteTranscriptionParams: () => deleteTranscriptionParams,
|
|
36451
|
+
getConcurrencyLimitsResponse: () => getConcurrencyLimitsResponse,
|
|
36125
36452
|
getFileParams: () => getFileParams,
|
|
36126
36453
|
getFileResponse: () => getFileResponse,
|
|
36127
36454
|
getFilesCountResponse: () => getFilesCountResponse,
|
|
@@ -36139,6 +36466,12 @@ __export(sonioxPublicAPI_zod_exports, {
|
|
|
36139
36466
|
getTranscriptionsQueryLimitMax: () => getTranscriptionsQueryLimitMax,
|
|
36140
36467
|
getTranscriptionsQueryParams: () => getTranscriptionsQueryParams,
|
|
36141
36468
|
getTranscriptionsResponse: () => getTranscriptionsResponse,
|
|
36469
|
+
getTtsModelsResponse: () => getTtsModelsResponse,
|
|
36470
|
+
getUsageLogsQueryLimitDefault: () => getUsageLogsQueryLimitDefault,
|
|
36471
|
+
getUsageLogsQueryLimitMax: () => getUsageLogsQueryLimitMax,
|
|
36472
|
+
getUsageLogsQueryParams: () => getUsageLogsQueryParams,
|
|
36473
|
+
getUsageLogsQuerySortDefault: () => getUsageLogsQuerySortDefault,
|
|
36474
|
+
getUsageLogsResponse: () => getUsageLogsResponse,
|
|
36142
36475
|
uploadFileBody: () => uploadFileBody,
|
|
36143
36476
|
uploadFileBodyClientReferenceIdMaxOne: () => uploadFileBodyClientReferenceIdMaxOne
|
|
36144
36477
|
});
|
|
@@ -36389,11 +36722,73 @@ var getModelsResponse = zod10.object({
|
|
|
36389
36722
|
})
|
|
36390
36723
|
).describe("List of available models and their attributes.")
|
|
36391
36724
|
});
|
|
36725
|
+
var getTtsModelsResponse = zod10.object({
|
|
36726
|
+
models: zod10.array(
|
|
36727
|
+
zod10.object({
|
|
36728
|
+
id: zod10.string().describe("Unique identifier of the model."),
|
|
36729
|
+
aliased_model_id: zod10.string().or(zod10.null()).describe("If this is an alias, the id of the aliased model."),
|
|
36730
|
+
name: zod10.string().describe("Name of the model."),
|
|
36731
|
+
voices: zod10.array(
|
|
36732
|
+
zod10.object({
|
|
36733
|
+
id: zod10.string().describe("Unique identifier of the voice."),
|
|
36734
|
+
description: zod10.string().describe("Description of the TTS voice."),
|
|
36735
|
+
gender: zod10.enum(["male", "female", "neutral"])
|
|
36736
|
+
})
|
|
36737
|
+
).describe("List of available voices for this model."),
|
|
36738
|
+
languages: zod10.array(
|
|
36739
|
+
zod10.object({
|
|
36740
|
+
code: zod10.string().describe("2-letter language code."),
|
|
36741
|
+
name: zod10.string().describe("Language name.")
|
|
36742
|
+
})
|
|
36743
|
+
).describe("List of languages supported by the model.")
|
|
36744
|
+
})
|
|
36745
|
+
).describe("List of available TTS models and their attributes.")
|
|
36746
|
+
});
|
|
36747
|
+
var getUsageLogsQueryLimitDefault = 1e3;
|
|
36748
|
+
var getUsageLogsQueryLimitMax = 1e3;
|
|
36749
|
+
var getUsageLogsQuerySortDefault = "end_time_asc";
|
|
36750
|
+
var getUsageLogsQueryParams = zod10.object({
|
|
36751
|
+
start_time: zod10.string().describe("Start of the time window (inclusive). Filters by request end time."),
|
|
36752
|
+
end_time: zod10.string().describe("End of the time window (exclusive). Filters by request end time."),
|
|
36753
|
+
limit: zod10.number().min(1).max(getUsageLogsQueryLimitMax).default(getUsageLogsQueryLimitDefault).describe("Maximum number of usage log entries to return."),
|
|
36754
|
+
sort: zod10.enum(["end_time_asc", "end_time_desc"]).default(getUsageLogsQuerySortDefault).describe(
|
|
36755
|
+
"Sort order by end_time.Use `end_time_desc` to get the most recent entries first. When paginating, pass the same `sort` value alongside the cursor."
|
|
36756
|
+
),
|
|
36757
|
+
cursor: zod10.string().or(zod10.null()).optional().describe("Pagination cursor for the next page of results.")
|
|
36758
|
+
});
|
|
36759
|
+
var getUsageLogsResponse = zod10.object({
|
|
36760
|
+
usage_logs: zod10.array(
|
|
36761
|
+
zod10.object({
|
|
36762
|
+
uuid: zod10.string().uuid().describe("Unique identifier of the request."),
|
|
36763
|
+
request_scope: zod10.string().describe("Scope of the request (api / playground)."),
|
|
36764
|
+
client_reference_id: zod10.string().describe("Client reference ID supplied on the original request. Empty string if none."),
|
|
36765
|
+
model: zod10.string().describe("Model identifier."),
|
|
36766
|
+
start_time: zod10.string().datetime({}).describe("When the request started."),
|
|
36767
|
+
end_time: zod10.string().datetime({}).describe("When the request ended."),
|
|
36768
|
+
input_text_tokens: zod10.number(),
|
|
36769
|
+
input_audio_tokens: zod10.number(),
|
|
36770
|
+
input_audio_duration_ms: zod10.number(),
|
|
36771
|
+
output_text_tokens: zod10.number(),
|
|
36772
|
+
output_audio_tokens: zod10.number(),
|
|
36773
|
+
output_audio_duration_ms: zod10.number(),
|
|
36774
|
+
cost_usd: zod10.string(),
|
|
36775
|
+
input_cost_usd: zod10.string(),
|
|
36776
|
+
input_text_cost_usd: zod10.string(),
|
|
36777
|
+
input_audio_cost_usd: zod10.string(),
|
|
36778
|
+
output_cost_usd: zod10.string(),
|
|
36779
|
+
output_text_cost_usd: zod10.string(),
|
|
36780
|
+
output_audio_cost_usd: zod10.string()
|
|
36781
|
+
})
|
|
36782
|
+
).describe("Per-request usage log entries ordered by end_time, uuid (per `sort`)."),
|
|
36783
|
+
next_page_cursor: zod10.string().or(zod10.null()).optional().describe(
|
|
36784
|
+
"A pagination token that references the next page of results. When more data is available, this field contains a value to pass in the cursor parameter of a subsequent request. When null, no additional results are available."
|
|
36785
|
+
)
|
|
36786
|
+
});
|
|
36392
36787
|
var createTemporaryApiKeyBodyExpiresInSecondsMax = 3600;
|
|
36393
36788
|
var createTemporaryApiKeyBodyClientReferenceIdMaxOne = 256;
|
|
36394
36789
|
var createTemporaryApiKeyBodyMaxSessionDurationSecondsMaxOne = 18e3;
|
|
36395
36790
|
var createTemporaryApiKeyBody = zod10.object({
|
|
36396
|
-
usage_type: zod10.enum(["transcribe_websocket"]),
|
|
36791
|
+
usage_type: zod10.enum(["transcribe_websocket", "tts_rt"]),
|
|
36397
36792
|
expires_in_seconds: zod10.number().min(1).max(createTemporaryApiKeyBodyExpiresInSecondsMax).describe("Duration in seconds until the temporary API key expires."),
|
|
36398
36793
|
client_reference_id: zod10.string().max(createTemporaryApiKeyBodyClientReferenceIdMaxOne).or(zod10.null()).optional().describe("Optional tracking identifier string. Does not need to be unique."),
|
|
36399
36794
|
single_use: zod10.boolean().or(zod10.null()).optional().describe("If true, the temporary API key can be used only once."),
|
|
@@ -36401,6 +36796,28 @@ var createTemporaryApiKeyBody = zod10.object({
|
|
|
36401
36796
|
"Maximum WebSocket connection duration in seconds. If exceeded, the connection will be dropped. If not set, no limit is applied."
|
|
36402
36797
|
)
|
|
36403
36798
|
});
|
|
36799
|
+
var getConcurrencyLimitsResponse = zod10.object({
|
|
36800
|
+
project: zod10.object({
|
|
36801
|
+
current: zod10.object({
|
|
36802
|
+
transcribe_concurrent: zod10.number(),
|
|
36803
|
+
tts_concurrent: zod10.number()
|
|
36804
|
+
}).describe("Live counts read from Redis"),
|
|
36805
|
+
limits: zod10.object({
|
|
36806
|
+
transcribe_concurrent: zod10.number().or(zod10.null()),
|
|
36807
|
+
tts_concurrent: zod10.number().or(zod10.null())
|
|
36808
|
+
}).describe("Configured limits")
|
|
36809
|
+
}),
|
|
36810
|
+
organization: zod10.object({
|
|
36811
|
+
current: zod10.object({
|
|
36812
|
+
transcribe_concurrent: zod10.number(),
|
|
36813
|
+
tts_concurrent: zod10.number()
|
|
36814
|
+
}).describe("Live counts read from Redis"),
|
|
36815
|
+
limits: zod10.object({
|
|
36816
|
+
transcribe_concurrent: zod10.number().or(zod10.null()),
|
|
36817
|
+
tts_concurrent: zod10.number().or(zod10.null())
|
|
36818
|
+
}).describe("Configured limits")
|
|
36819
|
+
})
|
|
36820
|
+
});
|
|
36404
36821
|
|
|
36405
36822
|
// src/generated/soniox/streaming-types.zod.ts
|
|
36406
36823
|
var streaming_types_zod_exports = {};
|
|
@@ -36485,10 +36902,10 @@ var sonioxStructuredContextSchema = zod11.object({
|
|
|
36485
36902
|
var sonioxContextSchema = zod11.union([sonioxStructuredContextSchema, zod11.string()]);
|
|
36486
36903
|
var sonioxRealtimeModelSchema = zod11.enum([
|
|
36487
36904
|
"stt-rt-v4",
|
|
36488
|
-
"stt-rt-v3",
|
|
36489
36905
|
"stt-rt-preview",
|
|
36490
36906
|
"stt-rt-v3-preview",
|
|
36491
|
-
"stt-rt-preview-v2"
|
|
36907
|
+
"stt-rt-preview-v2",
|
|
36908
|
+
"stt-rt-v3"
|
|
36492
36909
|
]);
|
|
36493
36910
|
var streamingTranscriberParams3 = zod11.object({
|
|
36494
36911
|
model: sonioxRealtimeModelSchema,
|
|
@@ -36496,12 +36913,16 @@ var streamingTranscriberParams3 = zod11.object({
|
|
|
36496
36913
|
sampleRate: zod11.number().optional(),
|
|
36497
36914
|
numChannels: zod11.number().optional(),
|
|
36498
36915
|
languageHints: zod11.array(zod11.string()).optional(),
|
|
36916
|
+
languageHintsStrict: zod11.boolean().optional(),
|
|
36499
36917
|
context: sonioxContextSchema.optional(),
|
|
36500
36918
|
enableSpeakerDiarization: zod11.boolean().optional(),
|
|
36501
36919
|
enableLanguageIdentification: zod11.boolean().optional(),
|
|
36502
36920
|
enableEndpointDetection: zod11.boolean().optional(),
|
|
36921
|
+
maxEndpointDelayMs: zod11.number().optional(),
|
|
36503
36922
|
translation: sonioxTranslationConfigSchema.optional(),
|
|
36504
|
-
clientReferenceId: zod11.string().optional()
|
|
36923
|
+
clientReferenceId: zod11.string().optional(),
|
|
36924
|
+
keepaliveIntervalMs: zod11.number().optional(),
|
|
36925
|
+
connectTimeoutMs: zod11.number().optional()
|
|
36505
36926
|
});
|
|
36506
36927
|
var sonioxTranslationStatusSchema = zod11.enum(["original", "translation", "none"]);
|
|
36507
36928
|
var sonioxTokenSchema = zod11.object({
|
|
@@ -37093,6 +37514,7 @@ __export(schema_exports5, {
|
|
|
37093
37514
|
V1ListenPostParametersCallbackMethod: () => V1ListenPostParametersCallbackMethod,
|
|
37094
37515
|
V1ListenPostParametersCustomIntentMode: () => V1ListenPostParametersCustomIntentMode,
|
|
37095
37516
|
V1ListenPostParametersCustomTopicMode: () => V1ListenPostParametersCustomTopicMode,
|
|
37517
|
+
V1ListenPostParametersDiarizeModel: () => V1ListenPostParametersDiarizeModel,
|
|
37096
37518
|
V1ListenPostParametersEncoding: () => V1ListenPostParametersEncoding,
|
|
37097
37519
|
V1ListenPostParametersModel0: () => V1ListenPostParametersModel0,
|
|
37098
37520
|
V1ListenPostParametersRedactSchemaOneOf1Items: () => V1ListenPostParametersRedactSchemaOneOf1Items,
|
|
@@ -37131,6 +37553,13 @@ __export(schema_exports5, {
|
|
|
37131
37553
|
V1SpeakPostParametersSampleRate4: () => V1SpeakPostParametersSampleRate4
|
|
37132
37554
|
});
|
|
37133
37555
|
|
|
37556
|
+
// src/generated/deepgram/schema/v1ListenPostParametersDiarizeModel.ts
|
|
37557
|
+
var V1ListenPostParametersDiarizeModel = {
|
|
37558
|
+
latest: "latest",
|
|
37559
|
+
v1: "v1",
|
|
37560
|
+
v2: "v2"
|
|
37561
|
+
};
|
|
37562
|
+
|
|
37134
37563
|
// src/generated/deepgram/schema/v1ListenPostParametersModel0.ts
|
|
37135
37564
|
var V1ListenPostParametersModel0 = {
|
|
37136
37565
|
"nova-3": "nova-3",
|
|
@@ -37347,6 +37776,7 @@ var V1SpeakPostParametersSampleRate = {
|
|
|
37347
37776
|
var schema_exports6 = {};
|
|
37348
37777
|
__export(schema_exports6, {
|
|
37349
37778
|
AudioResponseFormat: () => AudioResponseFormat,
|
|
37779
|
+
AudioTranscriptionDelay: () => AudioTranscriptionDelay,
|
|
37350
37780
|
CreateSpeechRequestResponseFormat: () => CreateSpeechRequestResponseFormat,
|
|
37351
37781
|
CreateSpeechRequestStreamFormat: () => CreateSpeechRequestStreamFormat,
|
|
37352
37782
|
CreateTranscriptionRequestTimestampGranularitiesItem: () => CreateTranscriptionRequestTimestampGranularitiesItem,
|
|
@@ -37366,12 +37796,14 @@ __export(schema_exports6, {
|
|
|
37366
37796
|
RealtimeAudioFormatsAnyOfType: () => RealtimeAudioFormatsAnyOfType,
|
|
37367
37797
|
RealtimeCreateClientSecretRequestExpiresAfterAnchor: () => RealtimeCreateClientSecretRequestExpiresAfterAnchor,
|
|
37368
37798
|
RealtimeFunctionToolType: () => RealtimeFunctionToolType,
|
|
37799
|
+
RealtimeReasoningEffort: () => RealtimeReasoningEffort,
|
|
37369
37800
|
RealtimeSessionCreateRequestGAIncludeItem: () => RealtimeSessionCreateRequestGAIncludeItem,
|
|
37370
37801
|
RealtimeSessionCreateRequestGAOutputModalitiesItem: () => RealtimeSessionCreateRequestGAOutputModalitiesItem,
|
|
37371
37802
|
RealtimeSessionCreateRequestGAType: () => RealtimeSessionCreateRequestGAType,
|
|
37372
37803
|
RealtimeSessionCreateRequestModalitiesItem: () => RealtimeSessionCreateRequestModalitiesItem,
|
|
37373
37804
|
RealtimeSessionCreateRequestToolsItemType: () => RealtimeSessionCreateRequestToolsItemType,
|
|
37374
37805
|
RealtimeSessionCreateResponseGAIncludeItem: () => RealtimeSessionCreateResponseGAIncludeItem,
|
|
37806
|
+
RealtimeSessionCreateResponseGAObject: () => RealtimeSessionCreateResponseGAObject,
|
|
37375
37807
|
RealtimeSessionCreateResponseGAOutputModalitiesItem: () => RealtimeSessionCreateResponseGAOutputModalitiesItem,
|
|
37376
37808
|
RealtimeSessionCreateResponseGAType: () => RealtimeSessionCreateResponseGAType,
|
|
37377
37809
|
RealtimeSessionCreateResponseIncludeItem: () => RealtimeSessionCreateResponseIncludeItem,
|
|
@@ -37402,6 +37834,15 @@ __export(schema_exports6, {
|
|
|
37402
37834
|
VoiceResourceObject: () => VoiceResourceObject
|
|
37403
37835
|
});
|
|
37404
37836
|
|
|
37837
|
+
// src/generated/openai/schema/audioTranscriptionDelay.ts
|
|
37838
|
+
var AudioTranscriptionDelay = {
|
|
37839
|
+
minimal: "minimal",
|
|
37840
|
+
low: "low",
|
|
37841
|
+
medium: "medium",
|
|
37842
|
+
high: "high",
|
|
37843
|
+
xhigh: "xhigh"
|
|
37844
|
+
};
|
|
37845
|
+
|
|
37405
37846
|
// src/generated/openai/schema/createSpeechRequestResponseFormat.ts
|
|
37406
37847
|
var CreateSpeechRequestResponseFormat = {
|
|
37407
37848
|
mp3: "mp3",
|
|
@@ -37514,6 +37955,15 @@ var RealtimeFunctionToolType = {
|
|
|
37514
37955
|
function: "function"
|
|
37515
37956
|
};
|
|
37516
37957
|
|
|
37958
|
+
// src/generated/openai/schema/realtimeReasoningEffort.ts
|
|
37959
|
+
var RealtimeReasoningEffort = {
|
|
37960
|
+
minimal: "minimal",
|
|
37961
|
+
low: "low",
|
|
37962
|
+
medium: "medium",
|
|
37963
|
+
high: "high",
|
|
37964
|
+
xhigh: "xhigh"
|
|
37965
|
+
};
|
|
37966
|
+
|
|
37517
37967
|
// src/generated/openai/schema/realtimeSessionCreateRequestGAIncludeItem.ts
|
|
37518
37968
|
var RealtimeSessionCreateRequestGAIncludeItem = {
|
|
37519
37969
|
iteminput_audio_transcriptionlogprobs: "item.input_audio_transcription.logprobs"
|
|
@@ -37546,6 +37996,11 @@ var RealtimeSessionCreateResponseGAIncludeItem = {
|
|
|
37546
37996
|
iteminput_audio_transcriptionlogprobs: "item.input_audio_transcription.logprobs"
|
|
37547
37997
|
};
|
|
37548
37998
|
|
|
37999
|
+
// src/generated/openai/schema/realtimeSessionCreateResponseGAObject.ts
|
|
38000
|
+
var RealtimeSessionCreateResponseGAObject = {
|
|
38001
|
+
realtimesession: "realtime.session"
|
|
38002
|
+
};
|
|
38003
|
+
|
|
37549
38004
|
// src/generated/openai/schema/realtimeSessionCreateResponseGAOutputModalitiesItem.ts
|
|
37550
38005
|
var RealtimeSessionCreateResponseGAOutputModalitiesItem = {
|
|
37551
38006
|
text: "text",
|
|
@@ -37690,6 +38145,7 @@ __export(schema_exports7, {
|
|
|
37690
38145
|
AutoChaptersResultErrorType: () => AutoChaptersResultErrorType,
|
|
37691
38146
|
ErrorResponseError: () => ErrorResponseError,
|
|
37692
38147
|
GetJobsJobidAlignmentTags: () => GetJobsJobidAlignmentTags,
|
|
38148
|
+
GetJobsJobidObjectUrlsUrlForItem: () => GetJobsJobidObjectUrlsUrlForItem,
|
|
37693
38149
|
GetJobsJobidTranscriptFormat: () => GetJobsJobidTranscriptFormat,
|
|
37694
38150
|
JobDetailsStatus: () => JobDetailsStatus,
|
|
37695
38151
|
JobMode: () => JobMode,
|
|
@@ -37759,6 +38215,13 @@ var GetJobsJobidAlignmentTags = {
|
|
|
37759
38215
|
one_per_line: "one_per_line"
|
|
37760
38216
|
};
|
|
37761
38217
|
|
|
38218
|
+
// src/generated/speechmatics/schema/getJobsJobidObjectUrlsUrlForItem.ts
|
|
38219
|
+
var GetJobsJobidObjectUrlsUrlForItem = {
|
|
38220
|
+
data: "data",
|
|
38221
|
+
audio_mp3: "audio_mp3",
|
|
38222
|
+
transcript: "transcript"
|
|
38223
|
+
};
|
|
38224
|
+
|
|
37762
38225
|
// src/generated/speechmatics/schema/getJobsJobidTranscriptFormat.ts
|
|
37763
38226
|
var GetJobsJobidTranscriptFormat = {
|
|
37764
38227
|
"json-v2": "json-v2",
|
|
@@ -37875,6 +38338,19 @@ var WrittenFormRecognitionResultType = {
|
|
|
37875
38338
|
word: "word"
|
|
37876
38339
|
};
|
|
37877
38340
|
|
|
38341
|
+
// src/generated/soniox/sdk-types.ts
|
|
38342
|
+
var sdk_types_exports = {};
|
|
38343
|
+
__export(sdk_types_exports, {
|
|
38344
|
+
RealtimeSttSession: () => RealtimeSttSession,
|
|
38345
|
+
SonioxFetchHttpClient: () => FetchHttpClient,
|
|
38346
|
+
SonioxNodeClient: () => SonioxNodeClient
|
|
38347
|
+
});
|
|
38348
|
+
import {
|
|
38349
|
+
FetchHttpClient,
|
|
38350
|
+
RealtimeSttSession,
|
|
38351
|
+
SonioxNodeClient
|
|
38352
|
+
} from "@soniox/node";
|
|
38353
|
+
|
|
37878
38354
|
// src/generated/elevenlabs/schema/index.ts
|
|
37879
38355
|
var schema_exports8 = {};
|
|
37880
38356
|
__export(schema_exports8, {
|
|
@@ -37952,6 +38428,10 @@ __export(speechmaticsASRRESTAPI_zod_exports, {
|
|
|
37952
38428
|
deleteJobsJobidParams: () => deleteJobsJobidParams,
|
|
37953
38429
|
deleteJobsJobidQueryParams: () => deleteJobsJobidQueryParams,
|
|
37954
38430
|
deleteJobsJobidResponse: () => deleteJobsJobidResponse,
|
|
38431
|
+
deleteJobsJobidResponseJobConfigSummarizationConfigContentTypeDefault: () => deleteJobsJobidResponseJobConfigSummarizationConfigContentTypeDefault,
|
|
38432
|
+
deleteJobsJobidResponseJobConfigSummarizationConfigSummaryLengthDefault: () => deleteJobsJobidResponseJobConfigSummarizationConfigSummaryLengthDefault,
|
|
38433
|
+
deleteJobsJobidResponseJobConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMax: () => deleteJobsJobidResponseJobConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMax,
|
|
38434
|
+
deleteJobsJobidResponseJobConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMin: () => deleteJobsJobidResponseJobConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMin,
|
|
37955
38435
|
deleteJobsJobidResponseJobConfigTranscriptionConfigChannelDiarizationLabelsItemRegExp: () => deleteJobsJobidResponseJobConfigTranscriptionConfigChannelDiarizationLabelsItemRegExp,
|
|
37956
38436
|
deleteJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesPermittedMarksItemRegExp: () => deleteJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesPermittedMarksItemRegExp,
|
|
37957
38437
|
deleteJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesSensitivityMax: () => deleteJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesSensitivityMax,
|
|
@@ -37967,8 +38447,15 @@ __export(speechmaticsASRRESTAPI_zod_exports, {
|
|
|
37967
38447
|
getJobsJobidDataResponse: () => getJobsJobidDataResponse,
|
|
37968
38448
|
getJobsJobidLogParams: () => getJobsJobidLogParams,
|
|
37969
38449
|
getJobsJobidLogResponse: () => getJobsJobidLogResponse,
|
|
38450
|
+
getJobsJobidObjectUrlsParams: () => getJobsJobidObjectUrlsParams,
|
|
38451
|
+
getJobsJobidObjectUrlsQueryParams: () => getJobsJobidObjectUrlsQueryParams,
|
|
38452
|
+
getJobsJobidObjectUrlsResponse: () => getJobsJobidObjectUrlsResponse,
|
|
37970
38453
|
getJobsJobidParams: () => getJobsJobidParams,
|
|
37971
38454
|
getJobsJobidResponse: () => getJobsJobidResponse,
|
|
38455
|
+
getJobsJobidResponseJobConfigSummarizationConfigContentTypeDefault: () => getJobsJobidResponseJobConfigSummarizationConfigContentTypeDefault,
|
|
38456
|
+
getJobsJobidResponseJobConfigSummarizationConfigSummaryLengthDefault: () => getJobsJobidResponseJobConfigSummarizationConfigSummaryLengthDefault,
|
|
38457
|
+
getJobsJobidResponseJobConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMax: () => getJobsJobidResponseJobConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMax,
|
|
38458
|
+
getJobsJobidResponseJobConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMin: () => getJobsJobidResponseJobConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMin,
|
|
37972
38459
|
getJobsJobidResponseJobConfigTranscriptionConfigChannelDiarizationLabelsItemRegExp: () => getJobsJobidResponseJobConfigTranscriptionConfigChannelDiarizationLabelsItemRegExp,
|
|
37973
38460
|
getJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesPermittedMarksItemRegExp: () => getJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesPermittedMarksItemRegExp,
|
|
37974
38461
|
getJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesSensitivityMax: () => getJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesSensitivityMax,
|
|
@@ -37983,6 +38470,8 @@ __export(speechmaticsASRRESTAPI_zod_exports, {
|
|
|
37983
38470
|
getJobsJobidTranscriptQueryParams: () => getJobsJobidTranscriptQueryParams,
|
|
37984
38471
|
getJobsJobidTranscriptResponse: () => getJobsJobidTranscriptResponse,
|
|
37985
38472
|
getJobsJobidTranscriptResponseJobDurationMin: () => getJobsJobidTranscriptResponseJobDurationMin,
|
|
38473
|
+
getJobsJobidTranscriptResponseMetadataTranscriptionConfigAudioFilteringConfigVolumeThresholdMax: () => getJobsJobidTranscriptResponseMetadataTranscriptionConfigAudioFilteringConfigVolumeThresholdMax,
|
|
38474
|
+
getJobsJobidTranscriptResponseMetadataTranscriptionConfigAudioFilteringConfigVolumeThresholdMin: () => getJobsJobidTranscriptResponseMetadataTranscriptionConfigAudioFilteringConfigVolumeThresholdMin,
|
|
37986
38475
|
getJobsJobidTranscriptResponseMetadataTranscriptionConfigChannelDiarizationLabelsItemRegExp: () => getJobsJobidTranscriptResponseMetadataTranscriptionConfigChannelDiarizationLabelsItemRegExp,
|
|
37987
38476
|
getJobsJobidTranscriptResponseMetadataTranscriptionConfigPunctuationOverridesPermittedMarksItemRegExp: () => getJobsJobidTranscriptResponseMetadataTranscriptionConfigPunctuationOverridesPermittedMarksItemRegExp,
|
|
37988
38477
|
getJobsJobidTranscriptResponseMetadataTranscriptionConfigPunctuationOverridesSensitivityMax: () => getJobsJobidTranscriptResponseMetadataTranscriptionConfigPunctuationOverridesSensitivityMax,
|
|
@@ -37994,6 +38483,10 @@ __export(speechmaticsASRRESTAPI_zod_exports, {
|
|
|
37994
38483
|
getJobsQueryLimitMax: () => getJobsQueryLimitMax,
|
|
37995
38484
|
getJobsQueryParams: () => getJobsQueryParams,
|
|
37996
38485
|
getJobsResponse: () => getJobsResponse,
|
|
38486
|
+
getJobsResponseJobsItemConfigSummarizationConfigContentTypeDefault: () => getJobsResponseJobsItemConfigSummarizationConfigContentTypeDefault,
|
|
38487
|
+
getJobsResponseJobsItemConfigSummarizationConfigSummaryLengthDefault: () => getJobsResponseJobsItemConfigSummarizationConfigSummaryLengthDefault,
|
|
38488
|
+
getJobsResponseJobsItemConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMax: () => getJobsResponseJobsItemConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMax,
|
|
38489
|
+
getJobsResponseJobsItemConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMin: () => getJobsResponseJobsItemConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMin,
|
|
37997
38490
|
getJobsResponseJobsItemConfigTranscriptionConfigChannelDiarizationLabelsItemRegExp: () => getJobsResponseJobsItemConfigTranscriptionConfigChannelDiarizationLabelsItemRegExp,
|
|
37998
38491
|
getJobsResponseJobsItemConfigTranscriptionConfigPunctuationOverridesPermittedMarksItemRegExp: () => getJobsResponseJobsItemConfigTranscriptionConfigPunctuationOverridesPermittedMarksItemRegExp,
|
|
37999
38492
|
getJobsResponseJobsItemConfigTranscriptionConfigPunctuationOverridesSensitivityMax: () => getJobsResponseJobsItemConfigTranscriptionConfigPunctuationOverridesSensitivityMax,
|
|
@@ -38004,12 +38497,18 @@ __export(speechmaticsASRRESTAPI_zod_exports, {
|
|
|
38004
38497
|
getJobsResponseJobsItemDurationMin: () => getJobsResponseJobsItemDurationMin,
|
|
38005
38498
|
getUsageQueryParams: () => getUsageQueryParams,
|
|
38006
38499
|
getUsageResponse: () => getUsageResponse,
|
|
38007
|
-
postJobsBody: () => postJobsBody
|
|
38500
|
+
postJobsBody: () => postJobsBody,
|
|
38501
|
+
postJobsHeader: () => postJobsHeader
|
|
38008
38502
|
});
|
|
38009
38503
|
import { z as zod12 } from "zod";
|
|
38504
|
+
var postJobsHeader = zod12.object({
|
|
38505
|
+
"X-SM-Processing-Data": zod12.string().optional().describe(
|
|
38506
|
+
'**Note**: Only available for on-prem\nJSON dictionary of processing settings for the job worker. Currently supports `parallel_engines` (integer), which controls the number of engines the worker can use in parallel for this job, and `user_id` (string), which is the user id for this job. Example: `{"parallel_engines": 4}`'
|
|
38507
|
+
)
|
|
38508
|
+
});
|
|
38010
38509
|
var postJobsBody = zod12.object({
|
|
38011
38510
|
config: zod12.string().describe(
|
|
38012
|
-
"JSON containing a `JobConfig` model indicating the type and parameters for the recognition job."
|
|
38511
|
+
"JSON containing a [`JobConfig`](/speech-to-text/batch/input#jobconfig-schema) model indicating the type and parameters for the recognition job."
|
|
38013
38512
|
),
|
|
38014
38513
|
data_file: zod12.instanceof(File).optional().describe(
|
|
38015
38514
|
"The data file to be processed. Alternatively the data file can be fetched from a url specified in `JobConfig`."
|
|
@@ -38031,9 +38530,13 @@ var getJobsResponseJobsItemConfigTranscriptionConfigPunctuationOverridesSensitiv
|
|
|
38031
38530
|
var getJobsResponseJobsItemConfigTranscriptionConfigPunctuationOverridesSensitivityMax = 1;
|
|
38032
38531
|
var getJobsResponseJobsItemConfigTranscriptionConfigPunctuationOverridesPermittedMarksItemRegExp = /^(.|all)$/;
|
|
38033
38532
|
var getJobsResponseJobsItemConfigTranscriptionConfigChannelDiarizationLabelsItemRegExp = /^[A-Za-z0-9._]+$/;
|
|
38533
|
+
var getJobsResponseJobsItemConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMin = 0;
|
|
38534
|
+
var getJobsResponseJobsItemConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMax = 100;
|
|
38034
38535
|
var getJobsResponseJobsItemConfigTranscriptionConfigSpeakerDiarizationConfigSpeakerSensitivityMin = 0;
|
|
38035
38536
|
var getJobsResponseJobsItemConfigTranscriptionConfigSpeakerDiarizationConfigSpeakerSensitivityMax = 1;
|
|
38036
38537
|
var getJobsResponseJobsItemConfigTranslationConfigTargetLanguagesMax = 5;
|
|
38538
|
+
var getJobsResponseJobsItemConfigSummarizationConfigContentTypeDefault = "auto";
|
|
38539
|
+
var getJobsResponseJobsItemConfigSummarizationConfigSummaryLengthDefault = "brief";
|
|
38037
38540
|
var getJobsResponse = zod12.object({
|
|
38038
38541
|
jobs: zod12.array(
|
|
38039
38542
|
zod12.object({
|
|
@@ -38113,19 +38616,30 @@ var getJobsResponse = zod12.object({
|
|
|
38113
38616
|
max_delay_mode: zod12.enum(["fixed", "flexible"]).optional().describe(
|
|
38114
38617
|
"Whether or not to enable flexible endpointing and allow the entity to continue to be spoken."
|
|
38115
38618
|
),
|
|
38619
|
+
audio_filtering_config: zod12.object({
|
|
38620
|
+
volume_threshold: zod12.number().min(
|
|
38621
|
+
getJobsResponseJobsItemConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMin
|
|
38622
|
+
).max(
|
|
38623
|
+
getJobsResponseJobsItemConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMax
|
|
38624
|
+
).optional().describe(
|
|
38625
|
+
"Controls the lower limit of audio volume at which speech and audio events will be transcribed. If the volume limit is very low, then most sound will be passed to the speech recognition engine. Higher numbers will cut out increasing amounts of sound."
|
|
38626
|
+
)
|
|
38627
|
+
}).optional().describe("Configuration for limiting the transcription of quiet audio."),
|
|
38116
38628
|
transcript_filtering_config: zod12.object({
|
|
38117
38629
|
remove_disfluencies: zod12.boolean().optional().describe(
|
|
38118
|
-
"If true, words
|
|
38630
|
+
"If true, words identified as disfluencies (e.g., 'um', 'uh') will be removed from the transcript. If false (default), they are tagged in the transcript as 'disfluency'."
|
|
38119
38631
|
),
|
|
38120
38632
|
replacements: zod12.array(
|
|
38121
38633
|
zod12.object({
|
|
38122
|
-
from: zod12.string(),
|
|
38123
|
-
to: zod12.string()
|
|
38634
|
+
from: zod12.string().describe("The text or pattern identified to be replaced."),
|
|
38635
|
+
to: zod12.string().describe(
|
|
38636
|
+
"The corrected or formatted string to appear in the transcript."
|
|
38637
|
+
)
|
|
38124
38638
|
})
|
|
38125
38639
|
).optional().describe(
|
|
38126
|
-
|
|
38640
|
+
'An array of objects defining custom replacements. Each replacement contains a pair of strings: the text to find ("from:") and the text to replace it with ("to:").'
|
|
38127
38641
|
)
|
|
38128
|
-
}).optional().describe("Configuration for applying filtering to the transcription"),
|
|
38642
|
+
}).optional().describe("Configuration for applying filtering to the transcription."),
|
|
38129
38643
|
speaker_diarization_config: zod12.object({
|
|
38130
38644
|
prefer_current_speaker: zod12.boolean().optional().describe(
|
|
38131
38645
|
'If true, the algorithm will prefer to stay with the current active speaker if it is a close enough match, even if other speakers may be closer. This is useful for cases where we can flip incorrectly between similar speakers during a single speaker section."'
|
|
@@ -38136,6 +38650,19 @@ var getJobsResponse = zod12.object({
|
|
|
38136
38650
|
getJobsResponseJobsItemConfigTranscriptionConfigSpeakerDiarizationConfigSpeakerSensitivityMax
|
|
38137
38651
|
).optional().describe(
|
|
38138
38652
|
"Controls how sensitive the algorithm is in terms of keeping similar speakers separate, as opposed to combining them into a single speaker. Higher values will typically lead to more speakers, as the degree of difference between speakers in order to allow them to remain distinct will be lower. A lower value for this parameter will conversely guide the algorithm towards being less sensitive in terms of retaining similar speakers, and as such may lead to fewer speakers overall. The default is 0.5."
|
|
38653
|
+
),
|
|
38654
|
+
get_speakers: zod12.boolean().optional().describe(
|
|
38655
|
+
"If true, speaker identifiers will be returned at the end of transcript."
|
|
38656
|
+
),
|
|
38657
|
+
speakers: zod12.array(
|
|
38658
|
+
zod12.object({
|
|
38659
|
+
label: zod12.string().min(1).describe(
|
|
38660
|
+
"Speaker label, which must not match the format used internally (e.g. S1, S2, etc)"
|
|
38661
|
+
),
|
|
38662
|
+
speaker_identifiers: zod12.array(zod12.string().describe("Speaker identifiers.")).min(1)
|
|
38663
|
+
})
|
|
38664
|
+
).optional().describe(
|
|
38665
|
+
"Use this option to provide speaker labels linked to their speaker identifiers. When passed, the transcription system will tag spoken words in the transcript with the provided speaker labels whenever any of the specified speakers is detected in the audio. A maximum of 50 speakers identifiers across all speakers can be provided."
|
|
38139
38666
|
)
|
|
38140
38667
|
}).optional().describe("Configuration for speaker diarization")
|
|
38141
38668
|
}).optional(),
|
|
@@ -38193,10 +38720,14 @@ var getJobsResponse = zod12.object({
|
|
|
38193
38720
|
default_language: zod12.string().optional()
|
|
38194
38721
|
}).optional(),
|
|
38195
38722
|
summarization_config: zod12.object({
|
|
38196
|
-
content_type: zod12.enum(["auto", "informative", "conversational"]).
|
|
38197
|
-
|
|
38723
|
+
content_type: zod12.enum(["auto", "informative", "conversational"]).default(getJobsResponseJobsItemConfigSummarizationConfigContentTypeDefault).describe(
|
|
38724
|
+
"Choose from three options:\n- `conversational` - Best suited for dialogues involving multiple participants, such as calls, meetings or discussions. It focuses on summarizing key points of the conversation.\n- `informative` - Recommended for more structured information delivered by one or more people, making it ideal for videos, podcasts, lectures, and presentations.\n- `auto` - Automatically selects the most appropriate content type based on an analysis of the transcript.\n"
|
|
38725
|
+
),
|
|
38726
|
+
summary_length: zod12.enum(["brief", "detailed"]).default(getJobsResponseJobsItemConfigSummarizationConfigSummaryLengthDefault).describe(
|
|
38727
|
+
"Determines the depth of the summary:\n- `brief` - Provides a succinct summary, condensing the content into just a few sentences.\n- `detailed` - Provide a longer, structured summary. For _conversational_ content, it includes key topics and a summary of the entire conversation. For _informative_ content, it logically divides the audio into sections and provides a summary for each."
|
|
38728
|
+
),
|
|
38198
38729
|
summary_type: zod12.enum(["paragraphs", "bullets"]).optional()
|
|
38199
|
-
}).optional(),
|
|
38730
|
+
}).optional().describe("Configuration options for summarization."),
|
|
38200
38731
|
sentiment_analysis_config: zod12.object({}).optional(),
|
|
38201
38732
|
topic_detection_config: zod12.object({
|
|
38202
38733
|
topics: zod12.array(zod12.string()).optional()
|
|
@@ -38218,7 +38749,7 @@ var getJobsResponse = zod12.object({
|
|
|
38218
38749
|
"Optional list of errors that have occurred in user interaction, for example: audio could not be fetched or notification could not be sent."
|
|
38219
38750
|
)
|
|
38220
38751
|
}).describe(
|
|
38221
|
-
"Document describing a job. JobConfig will be present in JobDetails returned for GET jobs
|
|
38752
|
+
"Document describing a job. JobConfig will be present in JobDetails returned for GET jobs/{id} request in SaaS and in Batch Appliance, but it will not be present in JobDetails returned as item in RetrieveJobsResponse in case of Batch Appliance."
|
|
38222
38753
|
)
|
|
38223
38754
|
)
|
|
38224
38755
|
});
|
|
@@ -38230,9 +38761,13 @@ var getJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesSensitiv
|
|
|
38230
38761
|
var getJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesSensitivityMax = 1;
|
|
38231
38762
|
var getJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesPermittedMarksItemRegExp = /^(.|all)$/;
|
|
38232
38763
|
var getJobsJobidResponseJobConfigTranscriptionConfigChannelDiarizationLabelsItemRegExp = /^[A-Za-z0-9._]+$/;
|
|
38764
|
+
var getJobsJobidResponseJobConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMin = 0;
|
|
38765
|
+
var getJobsJobidResponseJobConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMax = 100;
|
|
38233
38766
|
var getJobsJobidResponseJobConfigTranscriptionConfigSpeakerDiarizationConfigSpeakerSensitivityMin = 0;
|
|
38234
38767
|
var getJobsJobidResponseJobConfigTranscriptionConfigSpeakerDiarizationConfigSpeakerSensitivityMax = 1;
|
|
38235
38768
|
var getJobsJobidResponseJobConfigTranslationConfigTargetLanguagesMax = 5;
|
|
38769
|
+
var getJobsJobidResponseJobConfigSummarizationConfigContentTypeDefault = "auto";
|
|
38770
|
+
var getJobsJobidResponseJobConfigSummarizationConfigSummaryLengthDefault = "brief";
|
|
38236
38771
|
var getJobsJobidResponse = zod12.object({
|
|
38237
38772
|
job: zod12.object({
|
|
38238
38773
|
created_at: zod12.string().datetime({}).describe("The UTC date time the job was created."),
|
|
@@ -38309,19 +38844,30 @@ var getJobsJobidResponse = zod12.object({
|
|
|
38309
38844
|
max_delay_mode: zod12.enum(["fixed", "flexible"]).optional().describe(
|
|
38310
38845
|
"Whether or not to enable flexible endpointing and allow the entity to continue to be spoken."
|
|
38311
38846
|
),
|
|
38847
|
+
audio_filtering_config: zod12.object({
|
|
38848
|
+
volume_threshold: zod12.number().min(
|
|
38849
|
+
getJobsJobidResponseJobConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMin
|
|
38850
|
+
).max(
|
|
38851
|
+
getJobsJobidResponseJobConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMax
|
|
38852
|
+
).optional().describe(
|
|
38853
|
+
"Controls the lower limit of audio volume at which speech and audio events will be transcribed. If the volume limit is very low, then most sound will be passed to the speech recognition engine. Higher numbers will cut out increasing amounts of sound."
|
|
38854
|
+
)
|
|
38855
|
+
}).optional().describe("Configuration for limiting the transcription of quiet audio."),
|
|
38312
38856
|
transcript_filtering_config: zod12.object({
|
|
38313
38857
|
remove_disfluencies: zod12.boolean().optional().describe(
|
|
38314
|
-
"If true, words
|
|
38858
|
+
"If true, words identified as disfluencies (e.g., 'um', 'uh') will be removed from the transcript. If false (default), they are tagged in the transcript as 'disfluency'."
|
|
38315
38859
|
),
|
|
38316
38860
|
replacements: zod12.array(
|
|
38317
38861
|
zod12.object({
|
|
38318
|
-
from: zod12.string(),
|
|
38319
|
-
to: zod12.string()
|
|
38862
|
+
from: zod12.string().describe("The text or pattern identified to be replaced."),
|
|
38863
|
+
to: zod12.string().describe(
|
|
38864
|
+
"The corrected or formatted string to appear in the transcript."
|
|
38865
|
+
)
|
|
38320
38866
|
})
|
|
38321
38867
|
).optional().describe(
|
|
38322
|
-
|
|
38868
|
+
'An array of objects defining custom replacements. Each replacement contains a pair of strings: the text to find ("from:") and the text to replace it with ("to:").'
|
|
38323
38869
|
)
|
|
38324
|
-
}).optional().describe("Configuration for applying filtering to the transcription"),
|
|
38870
|
+
}).optional().describe("Configuration for applying filtering to the transcription."),
|
|
38325
38871
|
speaker_diarization_config: zod12.object({
|
|
38326
38872
|
prefer_current_speaker: zod12.boolean().optional().describe(
|
|
38327
38873
|
'If true, the algorithm will prefer to stay with the current active speaker if it is a close enough match, even if other speakers may be closer. This is useful for cases where we can flip incorrectly between similar speakers during a single speaker section."'
|
|
@@ -38332,6 +38878,19 @@ var getJobsJobidResponse = zod12.object({
|
|
|
38332
38878
|
getJobsJobidResponseJobConfigTranscriptionConfigSpeakerDiarizationConfigSpeakerSensitivityMax
|
|
38333
38879
|
).optional().describe(
|
|
38334
38880
|
"Controls how sensitive the algorithm is in terms of keeping similar speakers separate, as opposed to combining them into a single speaker. Higher values will typically lead to more speakers, as the degree of difference between speakers in order to allow them to remain distinct will be lower. A lower value for this parameter will conversely guide the algorithm towards being less sensitive in terms of retaining similar speakers, and as such may lead to fewer speakers overall. The default is 0.5."
|
|
38881
|
+
),
|
|
38882
|
+
get_speakers: zod12.boolean().optional().describe(
|
|
38883
|
+
"If true, speaker identifiers will be returned at the end of transcript."
|
|
38884
|
+
),
|
|
38885
|
+
speakers: zod12.array(
|
|
38886
|
+
zod12.object({
|
|
38887
|
+
label: zod12.string().min(1).describe(
|
|
38888
|
+
"Speaker label, which must not match the format used internally (e.g. S1, S2, etc)"
|
|
38889
|
+
),
|
|
38890
|
+
speaker_identifiers: zod12.array(zod12.string().describe("Speaker identifiers.")).min(1)
|
|
38891
|
+
})
|
|
38892
|
+
).optional().describe(
|
|
38893
|
+
"Use this option to provide speaker labels linked to their speaker identifiers. When passed, the transcription system will tag spoken words in the transcript with the provided speaker labels whenever any of the specified speakers is detected in the audio. A maximum of 50 speakers identifiers across all speakers can be provided."
|
|
38335
38894
|
)
|
|
38336
38895
|
}).optional().describe("Configuration for speaker diarization")
|
|
38337
38896
|
}).optional(),
|
|
@@ -38387,10 +38946,14 @@ var getJobsJobidResponse = zod12.object({
|
|
|
38387
38946
|
default_language: zod12.string().optional()
|
|
38388
38947
|
}).optional(),
|
|
38389
38948
|
summarization_config: zod12.object({
|
|
38390
|
-
content_type: zod12.enum(["auto", "informative", "conversational"]).
|
|
38391
|
-
|
|
38949
|
+
content_type: zod12.enum(["auto", "informative", "conversational"]).default(getJobsJobidResponseJobConfigSummarizationConfigContentTypeDefault).describe(
|
|
38950
|
+
"Choose from three options:\n- `conversational` - Best suited for dialogues involving multiple participants, such as calls, meetings or discussions. It focuses on summarizing key points of the conversation.\n- `informative` - Recommended for more structured information delivered by one or more people, making it ideal for videos, podcasts, lectures, and presentations.\n- `auto` - Automatically selects the most appropriate content type based on an analysis of the transcript.\n"
|
|
38951
|
+
),
|
|
38952
|
+
summary_length: zod12.enum(["brief", "detailed"]).default(getJobsJobidResponseJobConfigSummarizationConfigSummaryLengthDefault).describe(
|
|
38953
|
+
"Determines the depth of the summary:\n- `brief` - Provides a succinct summary, condensing the content into just a few sentences.\n- `detailed` - Provide a longer, structured summary. For _conversational_ content, it includes key topics and a summary of the entire conversation. For _informative_ content, it logically divides the audio into sections and provides a summary for each."
|
|
38954
|
+
),
|
|
38392
38955
|
summary_type: zod12.enum(["paragraphs", "bullets"]).optional()
|
|
38393
|
-
}).optional(),
|
|
38956
|
+
}).optional().describe("Configuration options for summarization."),
|
|
38394
38957
|
sentiment_analysis_config: zod12.object({}).optional(),
|
|
38395
38958
|
topic_detection_config: zod12.object({
|
|
38396
38959
|
topics: zod12.array(zod12.string()).optional()
|
|
@@ -38412,7 +38975,7 @@ var getJobsJobidResponse = zod12.object({
|
|
|
38412
38975
|
"Optional list of errors that have occurred in user interaction, for example: audio could not be fetched or notification could not be sent."
|
|
38413
38976
|
)
|
|
38414
38977
|
}).describe(
|
|
38415
|
-
"Document describing a job. JobConfig will be present in JobDetails returned for GET jobs
|
|
38978
|
+
"Document describing a job. JobConfig will be present in JobDetails returned for GET jobs/{id} request in SaaS and in Batch Appliance, but it will not be present in JobDetails returned as item in RetrieveJobsResponse in case of Batch Appliance."
|
|
38416
38979
|
)
|
|
38417
38980
|
});
|
|
38418
38981
|
var deleteJobsJobidParams = zod12.object({
|
|
@@ -38428,9 +38991,13 @@ var deleteJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesSensi
|
|
|
38428
38991
|
var deleteJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesSensitivityMax = 1;
|
|
38429
38992
|
var deleteJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesPermittedMarksItemRegExp = /^(.|all)$/;
|
|
38430
38993
|
var deleteJobsJobidResponseJobConfigTranscriptionConfigChannelDiarizationLabelsItemRegExp = /^[A-Za-z0-9._]+$/;
|
|
38994
|
+
var deleteJobsJobidResponseJobConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMin = 0;
|
|
38995
|
+
var deleteJobsJobidResponseJobConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMax = 100;
|
|
38431
38996
|
var deleteJobsJobidResponseJobConfigTranscriptionConfigSpeakerDiarizationConfigSpeakerSensitivityMin = 0;
|
|
38432
38997
|
var deleteJobsJobidResponseJobConfigTranscriptionConfigSpeakerDiarizationConfigSpeakerSensitivityMax = 1;
|
|
38433
38998
|
var deleteJobsJobidResponseJobConfigTranslationConfigTargetLanguagesMax = 5;
|
|
38999
|
+
var deleteJobsJobidResponseJobConfigSummarizationConfigContentTypeDefault = "auto";
|
|
39000
|
+
var deleteJobsJobidResponseJobConfigSummarizationConfigSummaryLengthDefault = "brief";
|
|
38434
39001
|
var deleteJobsJobidResponse = zod12.object({
|
|
38435
39002
|
job: zod12.object({
|
|
38436
39003
|
created_at: zod12.string().datetime({}).describe("The UTC date time the job was created."),
|
|
@@ -38507,19 +39074,30 @@ var deleteJobsJobidResponse = zod12.object({
|
|
|
38507
39074
|
max_delay_mode: zod12.enum(["fixed", "flexible"]).optional().describe(
|
|
38508
39075
|
"Whether or not to enable flexible endpointing and allow the entity to continue to be spoken."
|
|
38509
39076
|
),
|
|
39077
|
+
audio_filtering_config: zod12.object({
|
|
39078
|
+
volume_threshold: zod12.number().min(
|
|
39079
|
+
deleteJobsJobidResponseJobConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMin
|
|
39080
|
+
).max(
|
|
39081
|
+
deleteJobsJobidResponseJobConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMax
|
|
39082
|
+
).optional().describe(
|
|
39083
|
+
"Controls the lower limit of audio volume at which speech and audio events will be transcribed. If the volume limit is very low, then most sound will be passed to the speech recognition engine. Higher numbers will cut out increasing amounts of sound."
|
|
39084
|
+
)
|
|
39085
|
+
}).optional().describe("Configuration for limiting the transcription of quiet audio."),
|
|
38510
39086
|
transcript_filtering_config: zod12.object({
|
|
38511
39087
|
remove_disfluencies: zod12.boolean().optional().describe(
|
|
38512
|
-
"If true, words
|
|
39088
|
+
"If true, words identified as disfluencies (e.g., 'um', 'uh') will be removed from the transcript. If false (default), they are tagged in the transcript as 'disfluency'."
|
|
38513
39089
|
),
|
|
38514
39090
|
replacements: zod12.array(
|
|
38515
39091
|
zod12.object({
|
|
38516
|
-
from: zod12.string(),
|
|
38517
|
-
to: zod12.string()
|
|
39092
|
+
from: zod12.string().describe("The text or pattern identified to be replaced."),
|
|
39093
|
+
to: zod12.string().describe(
|
|
39094
|
+
"The corrected or formatted string to appear in the transcript."
|
|
39095
|
+
)
|
|
38518
39096
|
})
|
|
38519
39097
|
).optional().describe(
|
|
38520
|
-
|
|
39098
|
+
'An array of objects defining custom replacements. Each replacement contains a pair of strings: the text to find ("from:") and the text to replace it with ("to:").'
|
|
38521
39099
|
)
|
|
38522
|
-
}).optional().describe("Configuration for applying filtering to the transcription"),
|
|
39100
|
+
}).optional().describe("Configuration for applying filtering to the transcription."),
|
|
38523
39101
|
speaker_diarization_config: zod12.object({
|
|
38524
39102
|
prefer_current_speaker: zod12.boolean().optional().describe(
|
|
38525
39103
|
'If true, the algorithm will prefer to stay with the current active speaker if it is a close enough match, even if other speakers may be closer. This is useful for cases where we can flip incorrectly between similar speakers during a single speaker section."'
|
|
@@ -38530,6 +39108,19 @@ var deleteJobsJobidResponse = zod12.object({
|
|
|
38530
39108
|
deleteJobsJobidResponseJobConfigTranscriptionConfigSpeakerDiarizationConfigSpeakerSensitivityMax
|
|
38531
39109
|
).optional().describe(
|
|
38532
39110
|
"Controls how sensitive the algorithm is in terms of keeping similar speakers separate, as opposed to combining them into a single speaker. Higher values will typically lead to more speakers, as the degree of difference between speakers in order to allow them to remain distinct will be lower. A lower value for this parameter will conversely guide the algorithm towards being less sensitive in terms of retaining similar speakers, and as such may lead to fewer speakers overall. The default is 0.5."
|
|
39111
|
+
),
|
|
39112
|
+
get_speakers: zod12.boolean().optional().describe(
|
|
39113
|
+
"If true, speaker identifiers will be returned at the end of transcript."
|
|
39114
|
+
),
|
|
39115
|
+
speakers: zod12.array(
|
|
39116
|
+
zod12.object({
|
|
39117
|
+
label: zod12.string().min(1).describe(
|
|
39118
|
+
"Speaker label, which must not match the format used internally (e.g. S1, S2, etc)"
|
|
39119
|
+
),
|
|
39120
|
+
speaker_identifiers: zod12.array(zod12.string().describe("Speaker identifiers.")).min(1)
|
|
39121
|
+
})
|
|
39122
|
+
).optional().describe(
|
|
39123
|
+
"Use this option to provide speaker labels linked to their speaker identifiers. When passed, the transcription system will tag spoken words in the transcript with the provided speaker labels whenever any of the specified speakers is detected in the audio. A maximum of 50 speakers identifiers across all speakers can be provided."
|
|
38533
39124
|
)
|
|
38534
39125
|
}).optional().describe("Configuration for speaker diarization")
|
|
38535
39126
|
}).optional(),
|
|
@@ -38585,10 +39176,14 @@ var deleteJobsJobidResponse = zod12.object({
|
|
|
38585
39176
|
default_language: zod12.string().optional()
|
|
38586
39177
|
}).optional(),
|
|
38587
39178
|
summarization_config: zod12.object({
|
|
38588
|
-
content_type: zod12.enum(["auto", "informative", "conversational"]).
|
|
38589
|
-
|
|
39179
|
+
content_type: zod12.enum(["auto", "informative", "conversational"]).default(deleteJobsJobidResponseJobConfigSummarizationConfigContentTypeDefault).describe(
|
|
39180
|
+
"Choose from three options:\n- `conversational` - Best suited for dialogues involving multiple participants, such as calls, meetings or discussions. It focuses on summarizing key points of the conversation.\n- `informative` - Recommended for more structured information delivered by one or more people, making it ideal for videos, podcasts, lectures, and presentations.\n- `auto` - Automatically selects the most appropriate content type based on an analysis of the transcript.\n"
|
|
39181
|
+
),
|
|
39182
|
+
summary_length: zod12.enum(["brief", "detailed"]).default(deleteJobsJobidResponseJobConfigSummarizationConfigSummaryLengthDefault).describe(
|
|
39183
|
+
"Determines the depth of the summary:\n- `brief` - Provides a succinct summary, condensing the content into just a few sentences.\n- `detailed` - Provide a longer, structured summary. For _conversational_ content, it includes key topics and a summary of the entire conversation. For _informative_ content, it logically divides the audio into sections and provides a summary for each."
|
|
39184
|
+
),
|
|
38590
39185
|
summary_type: zod12.enum(["paragraphs", "bullets"]).optional()
|
|
38591
|
-
}).optional(),
|
|
39186
|
+
}).optional().describe("Configuration options for summarization."),
|
|
38592
39187
|
sentiment_analysis_config: zod12.object({}).optional(),
|
|
38593
39188
|
topic_detection_config: zod12.object({
|
|
38594
39189
|
topics: zod12.array(zod12.string()).optional()
|
|
@@ -38610,7 +39205,7 @@ var deleteJobsJobidResponse = zod12.object({
|
|
|
38610
39205
|
"Optional list of errors that have occurred in user interaction, for example: audio could not be fetched or notification could not be sent."
|
|
38611
39206
|
)
|
|
38612
39207
|
}).describe(
|
|
38613
|
-
"Document describing a job. JobConfig will be present in JobDetails returned for GET jobs
|
|
39208
|
+
"Document describing a job. JobConfig will be present in JobDetails returned for GET jobs/{id} request in SaaS and in Batch Appliance, but it will not be present in JobDetails returned as item in RetrieveJobsResponse in case of Batch Appliance."
|
|
38614
39209
|
)
|
|
38615
39210
|
});
|
|
38616
39211
|
var getJobsJobidDataParams = zod12.object({
|
|
@@ -38632,6 +39227,8 @@ var getJobsJobidTranscriptResponseMetadataTranscriptionConfigPunctuationOverride
|
|
|
38632
39227
|
var getJobsJobidTranscriptResponseMetadataTranscriptionConfigPunctuationOverridesSensitivityMax = 1;
|
|
38633
39228
|
var getJobsJobidTranscriptResponseMetadataTranscriptionConfigPunctuationOverridesPermittedMarksItemRegExp = /^(.|all)$/;
|
|
38634
39229
|
var getJobsJobidTranscriptResponseMetadataTranscriptionConfigChannelDiarizationLabelsItemRegExp = /^[A-Za-z0-9._]+$/;
|
|
39230
|
+
var getJobsJobidTranscriptResponseMetadataTranscriptionConfigAudioFilteringConfigVolumeThresholdMin = 0;
|
|
39231
|
+
var getJobsJobidTranscriptResponseMetadataTranscriptionConfigAudioFilteringConfigVolumeThresholdMax = 100;
|
|
38635
39232
|
var getJobsJobidTranscriptResponseMetadataTranscriptionConfigSpeakerDiarizationConfigSpeakerSensitivityMin = 0;
|
|
38636
39233
|
var getJobsJobidTranscriptResponseMetadataTranscriptionConfigSpeakerDiarizationConfigSpeakerSensitivityMax = 1;
|
|
38637
39234
|
var getJobsJobidTranscriptResponseResultsItemVolumeMin = 0;
|
|
@@ -38703,19 +39300,28 @@ var getJobsJobidTranscriptResponse = zod12.object({
|
|
|
38703
39300
|
max_delay_mode: zod12.enum(["fixed", "flexible"]).optional().describe(
|
|
38704
39301
|
"Whether or not to enable flexible endpointing and allow the entity to continue to be spoken."
|
|
38705
39302
|
),
|
|
39303
|
+
audio_filtering_config: zod12.object({
|
|
39304
|
+
volume_threshold: zod12.number().min(
|
|
39305
|
+
getJobsJobidTranscriptResponseMetadataTranscriptionConfigAudioFilteringConfigVolumeThresholdMin
|
|
39306
|
+
).max(
|
|
39307
|
+
getJobsJobidTranscriptResponseMetadataTranscriptionConfigAudioFilteringConfigVolumeThresholdMax
|
|
39308
|
+
).optional().describe(
|
|
39309
|
+
"Controls the lower limit of audio volume at which speech and audio events will be transcribed. If the volume limit is very low, then most sound will be passed to the speech recognition engine. Higher numbers will cut out increasing amounts of sound."
|
|
39310
|
+
)
|
|
39311
|
+
}).optional().describe("Configuration for limiting the transcription of quiet audio."),
|
|
38706
39312
|
transcript_filtering_config: zod12.object({
|
|
38707
39313
|
remove_disfluencies: zod12.boolean().optional().describe(
|
|
38708
|
-
"If true, words
|
|
39314
|
+
"If true, words identified as disfluencies (e.g., 'um', 'uh') will be removed from the transcript. If false (default), they are tagged in the transcript as 'disfluency'."
|
|
38709
39315
|
),
|
|
38710
39316
|
replacements: zod12.array(
|
|
38711
39317
|
zod12.object({
|
|
38712
|
-
from: zod12.string(),
|
|
38713
|
-
to: zod12.string()
|
|
39318
|
+
from: zod12.string().describe("The text or pattern identified to be replaced."),
|
|
39319
|
+
to: zod12.string().describe("The corrected or formatted string to appear in the transcript.")
|
|
38714
39320
|
})
|
|
38715
39321
|
).optional().describe(
|
|
38716
|
-
|
|
39322
|
+
'An array of objects defining custom replacements. Each replacement contains a pair of strings: the text to find ("from:") and the text to replace it with ("to:").'
|
|
38717
39323
|
)
|
|
38718
|
-
}).optional().describe("Configuration for applying filtering to the transcription"),
|
|
39324
|
+
}).optional().describe("Configuration for applying filtering to the transcription."),
|
|
38719
39325
|
speaker_diarization_config: zod12.object({
|
|
38720
39326
|
prefer_current_speaker: zod12.boolean().optional().describe(
|
|
38721
39327
|
'If true, the algorithm will prefer to stay with the current active speaker if it is a close enough match, even if other speakers may be closer. This is useful for cases where we can flip incorrectly between similar speakers during a single speaker section."'
|
|
@@ -38726,9 +39332,23 @@ var getJobsJobidTranscriptResponse = zod12.object({
|
|
|
38726
39332
|
getJobsJobidTranscriptResponseMetadataTranscriptionConfigSpeakerDiarizationConfigSpeakerSensitivityMax
|
|
38727
39333
|
).optional().describe(
|
|
38728
39334
|
"Controls how sensitive the algorithm is in terms of keeping similar speakers separate, as opposed to combining them into a single speaker. Higher values will typically lead to more speakers, as the degree of difference between speakers in order to allow them to remain distinct will be lower. A lower value for this parameter will conversely guide the algorithm towards being less sensitive in terms of retaining similar speakers, and as such may lead to fewer speakers overall. The default is 0.5."
|
|
39335
|
+
),
|
|
39336
|
+
get_speakers: zod12.boolean().optional().describe(
|
|
39337
|
+
"If true, speaker identifiers will be returned at the end of transcript."
|
|
39338
|
+
),
|
|
39339
|
+
speakers: zod12.array(
|
|
39340
|
+
zod12.object({
|
|
39341
|
+
label: zod12.string().min(1).describe(
|
|
39342
|
+
"Speaker label, which must not match the format used internally (e.g. S1, S2, etc)"
|
|
39343
|
+
),
|
|
39344
|
+
speaker_identifiers: zod12.array(zod12.string().describe("Speaker identifiers.")).min(1)
|
|
39345
|
+
})
|
|
39346
|
+
).optional().describe(
|
|
39347
|
+
"Use this option to provide speaker labels linked to their speaker identifiers. When passed, the transcription system will tag spoken words in the transcript with the provided speaker labels whenever any of the specified speakers is detected in the audio. A maximum of 50 speakers identifiers across all speakers can be provided."
|
|
38729
39348
|
)
|
|
38730
39349
|
}).optional().describe("Configuration for speaker diarization")
|
|
38731
39350
|
}).optional(),
|
|
39351
|
+
orchestrator_version: zod12.string().optional().describe("The engine version used to generate transcription output."),
|
|
38732
39352
|
translation_errors: zod12.array(
|
|
38733
39353
|
zod12.object({
|
|
38734
39354
|
type: zod12.enum(["translation_failed", "unsupported_translation_pair"]).optional(),
|
|
@@ -38806,10 +39426,7 @@ var getJobsJobidTranscriptResponse = zod12.object({
|
|
|
38806
39426
|
"OTHER"
|
|
38807
39427
|
]).optional(),
|
|
38808
39428
|
message: zod12.string().optional()
|
|
38809
|
-
}).optional()
|
|
38810
|
-
orchestrator_version: zod12.string().optional().describe(
|
|
38811
|
-
"Orchestrator version in PEP 440 Format or set to 'version_not_found' as default."
|
|
38812
|
-
)
|
|
39429
|
+
}).optional()
|
|
38813
39430
|
}).describe(
|
|
38814
39431
|
"Summary information about the output from an ASR job, comprising the job type and configuration parameters used when generating the output."
|
|
38815
39432
|
),
|
|
@@ -38892,6 +39509,12 @@ var getJobsJobidTranscriptResponse = zod12.object({
|
|
|
38892
39509
|
"An ASR job output item. The primary item types are `word` and `punctuation`. Other item types may be present, for example to provide semantic information of different forms."
|
|
38893
39510
|
)
|
|
38894
39511
|
),
|
|
39512
|
+
speakers: zod12.array(
|
|
39513
|
+
zod12.object({
|
|
39514
|
+
label: zod12.string().min(1).describe("Speaker label."),
|
|
39515
|
+
speaker_identifiers: zod12.array(zod12.string().describe("Speaker identifiers.")).min(1)
|
|
39516
|
+
})
|
|
39517
|
+
).optional().describe("List of unique speaker identifiers detected in the transcript."),
|
|
38895
39518
|
translations: zod12.record(
|
|
38896
39519
|
zod12.string(),
|
|
38897
39520
|
zod12.array(
|
|
@@ -38913,13 +39536,23 @@ var getJobsJobidTranscriptResponse = zod12.object({
|
|
|
38913
39536
|
sentiment_analysis: zod12.object({
|
|
38914
39537
|
segments: zod12.array(
|
|
38915
39538
|
zod12.object({
|
|
38916
|
-
text: zod12.string().optional(),
|
|
38917
|
-
|
|
38918
|
-
|
|
38919
|
-
|
|
38920
|
-
|
|
38921
|
-
|
|
38922
|
-
|
|
39539
|
+
text: zod12.string().optional().describe("Represents the transcript of the analysed segment"),
|
|
39540
|
+
sentiment: zod12.string().optional().describe(
|
|
39541
|
+
"The assigned sentiment to the segment, which can be positive, neutral or negative"
|
|
39542
|
+
),
|
|
39543
|
+
start_time: zod12.number().optional().describe(
|
|
39544
|
+
"The timestamp corresponding to the beginning of the transcription segment"
|
|
39545
|
+
),
|
|
39546
|
+
end_time: zod12.number().optional().describe(
|
|
39547
|
+
"The timestamp corresponding to the end of the transcription segment"
|
|
39548
|
+
),
|
|
39549
|
+
speaker: zod12.string().optional().describe(
|
|
39550
|
+
"The speaker label for the segment, if speaker diarization is enabled"
|
|
39551
|
+
),
|
|
39552
|
+
channel: zod12.string().optional().describe(
|
|
39553
|
+
"The channel label for the segment, if channel diarization is enabled"
|
|
39554
|
+
),
|
|
39555
|
+
confidence: zod12.number().optional().describe("A confidence score in the range of 0-1")
|
|
38923
39556
|
}).describe("Represents a segment of text and its associated sentiment.")
|
|
38924
39557
|
).optional().describe(
|
|
38925
39558
|
"An array of objects that represent a segment of text and its associated sentiment."
|
|
@@ -38978,10 +39611,10 @@ var getJobsJobidTranscriptResponse = zod12.object({
|
|
|
38978
39611
|
}).optional().describe("Main object that holds topic detection results."),
|
|
38979
39612
|
chapters: zod12.array(
|
|
38980
39613
|
zod12.object({
|
|
38981
|
-
title: zod12.string().optional(),
|
|
38982
|
-
summary: zod12.string().optional(),
|
|
38983
|
-
start_time: zod12.number().optional(),
|
|
38984
|
-
end_time: zod12.number().optional()
|
|
39614
|
+
title: zod12.string().optional().describe("The auto-generated title for the chapter"),
|
|
39615
|
+
summary: zod12.string().optional().describe("An auto-generated paragraph-style, short summary of the chapter"),
|
|
39616
|
+
start_time: zod12.number().optional().describe("The start time of the chapter in the audio file"),
|
|
39617
|
+
end_time: zod12.number().optional().describe("The end time of the chapter in the audio file")
|
|
38985
39618
|
})
|
|
38986
39619
|
).optional().describe("An array of objects that represent summarized chapters of the transcript"),
|
|
38987
39620
|
audio_events: zod12.array(
|
|
@@ -39026,6 +39659,18 @@ var getJobsJobidLogParams = zod12.object({
|
|
|
39026
39659
|
jobid: zod12.string().describe("ID of the job.")
|
|
39027
39660
|
});
|
|
39028
39661
|
var getJobsJobidLogResponse = zod12.instanceof(File);
|
|
39662
|
+
var getJobsJobidObjectUrlsParams = zod12.object({
|
|
39663
|
+
jobid: zod12.string().describe("ID of the job.")
|
|
39664
|
+
});
|
|
39665
|
+
var getJobsJobidObjectUrlsQueryParams = zod12.object({
|
|
39666
|
+
ttl: zod12.number().describe("Time to live in seconds for the signed URLs"),
|
|
39667
|
+
url_for: zod12.array(zod12.enum(["data", "audio_mp3", "transcript"]))
|
|
39668
|
+
});
|
|
39669
|
+
var getJobsJobidObjectUrlsResponse = zod12.object({
|
|
39670
|
+
data: zod12.string().optional(),
|
|
39671
|
+
audio_mp3: zod12.string().optional(),
|
|
39672
|
+
transcript: zod12.string().optional()
|
|
39673
|
+
});
|
|
39029
39674
|
var getUsageQueryParams = zod12.object({
|
|
39030
39675
|
since: zod12.string().date().optional().describe(
|
|
39031
39676
|
"Include usage after the given date (inclusive). This is a [ISO-8601](https://en.wikipedia.org/wiki/ISO_8601) calendar date format: `YYYY-MM-DD`."
|
|
@@ -39159,7 +39804,7 @@ var speechToTextBodyKeytermsDefault = [];
|
|
|
39159
39804
|
var speechToTextBody = zod13.object({
|
|
39160
39805
|
model_id: zod13.enum(["scribe_v1", "scribe_v2"]).describe("The ID of the model to use for transcription."),
|
|
39161
39806
|
file: zod13.instanceof(File).or(zod13.null()).optional().describe(
|
|
39162
|
-
"The file to transcribe (100ms minimum audio length). All major audio and video formats are supported. Exactly one of the file or cloud_storage_url parameters must be provided. The file size must be less than
|
|
39807
|
+
"The file to transcribe (100ms minimum audio length). All major audio and video formats are supported. Exactly one of the file or cloud_storage_url parameters must be provided. The file size must be less than 5.0GB."
|
|
39163
39808
|
),
|
|
39164
39809
|
language_code: zod13.string().or(zod13.null()).optional().describe(
|
|
39165
39810
|
"An ISO-639-1 or ISO-639-3 language_code corresponding to the language of the audio file. Can sometimes improve transcription performance if known beforehand. Defaults to null, in this case the language is predicted automatically."
|
|
@@ -39237,7 +39882,7 @@ var speechToTextBody = zod13.object({
|
|
|
39237
39882
|
"The format of input audio. Options are 'pcm_s16le_16' or 'other' For `pcm_s16le_16`, the input audio must be 16-bit PCM at a 16kHz sample rate, single channel (mono), and little-endian byte order. Latency will be lower than with passing an encoded waveform."
|
|
39238
39883
|
),
|
|
39239
39884
|
cloud_storage_url: zod13.string().or(zod13.null()).optional().describe(
|
|
39240
|
-
"The HTTPS URL of the file to transcribe. Exactly one of the file or cloud_storage_url parameters must be provided. The file must be accessible via HTTPS and the file size must be less than 2GB. Any valid HTTPS URL is accepted, including URLs from cloud storage providers (AWS S3, Google Cloud Storage, Cloudflare R2, etc.), CDNs, or any other HTTPS source. URLs can be pre-signed or include authentication tokens in query parameters."
|
|
39885
|
+
"[Deprecated] This parameter is deprecated and will be removed in the future. Use 'source_url' instead.The HTTPS URL of the file to transcribe. Exactly one of the file or cloud_storage_url parameters must be provided. The file must be accessible via HTTPS and the file size must be less than 2GB. Any valid HTTPS URL is accepted, including URLs from cloud storage providers (AWS S3, Google Cloud Storage, Cloudflare R2, etc.), CDNs, or any other HTTPS source. URLs can be pre-signed or include authentication tokens in query parameters."
|
|
39241
39886
|
),
|
|
39242
39887
|
source_url: zod13.string().or(zod13.null()).optional().describe(
|
|
39243
39888
|
"The URL of an audio or video file to transcribe. Supports hosted video or audio files, YouTube video URLs, TikTok video URLs, and other video hosting services."
|
|
@@ -39276,7 +39921,7 @@ var speechToTextBody = zod13.object({
|
|
|
39276
39921
|
"How to format redacted entities. 'redacted' replaces with {REDACTED}, 'entity_type' replaces with {ENTITY_TYPE}, 'enumerated_entity_type' replaces with {ENTITY_TYPE_N} where N enumerates each occurrence. Only used when entity_redaction is set."
|
|
39277
39922
|
),
|
|
39278
39923
|
keyterms: zod13.array(zod13.string()).default(speechToTextBodyKeytermsDefault).describe(
|
|
39279
|
-
'A list of keyterms to bias the transcription towards. The keyterms are words or phrases you want the model to recognise more accurately. The number of keyterms cannot exceed 1000. The length of each keyterm must be less than 50 characters. Keyterms can contain at most 5 words (after normalisation). For example ["hello", "world", "technical term"]. Usage of this parameter will incur an additional 20% surcharge on the base transcription cost. When more than 100 keyterms are provided, a minimum billable duration of 20 seconds applies per request.'
|
|
39924
|
+
'A list of keyterms to bias the transcription towards. The keyterms are words or phrases you want the model to recognise more accurately. The number of keyterms cannot exceed 1000. The length of each keyterm must be less than 50 characters. Keyterms can contain at most 5 words (after normalisation). For example ["hello", "world", "technical term"]. The following characters are not supported: `<`, `>`, `{`, `}`, `[`, `]`, `\\`. Usage of this parameter will incur an additional 20% surcharge on the base transcription cost. When more than 100 keyterms are provided, a minimum billable duration of 20 seconds applies per request.'
|
|
39280
39925
|
)
|
|
39281
39926
|
});
|
|
39282
39927
|
var speechToTextResponse = zod13.object({
|
|
@@ -39641,6 +40286,7 @@ export {
|
|
|
39641
40286
|
SonioxModels,
|
|
39642
40287
|
SonioxRealtimeModel,
|
|
39643
40288
|
SonioxRegion,
|
|
40289
|
+
sdk_types_exports as SonioxSDK,
|
|
39644
40290
|
SonioxStreamingSchema,
|
|
39645
40291
|
streaming_types_zod_exports as SonioxStreamingTypes,
|
|
39646
40292
|
SonioxStreamingUpdateSchema,
|