voice-router-dev 0.9.3 → 0.9.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +12 -0
- package/dist/constants.d.mts +11 -92
- package/dist/constants.d.ts +11 -92
- package/dist/constants.js +11 -88
- package/dist/constants.mjs +11 -88
- package/dist/{field-configs-FbtCPxzs.d.mts → field-configs-BVOZQiG3.d.mts} +4855 -3773
- package/dist/{field-configs-FbtCPxzs.d.ts → field-configs-BVOZQiG3.d.ts} +4855 -3773
- package/dist/field-configs.d.mts +1 -1
- package/dist/field-configs.d.ts +1 -1
- package/dist/field-configs.js +583 -150
- package/dist/field-configs.mjs +583 -150
- package/dist/index.d.mts +1211 -162
- package/dist/index.d.ts +1211 -162
- package/dist/index.js +924 -275
- package/dist/index.mjs +927 -275
- package/dist/{provider-metadata-D1d-9cng.d.ts → provider-metadata-CiSA4fWP.d.ts} +2 -2
- package/dist/{provider-metadata-BJ29OPW1.d.mts → provider-metadata-oxzd1q6t.d.mts} +2 -2
- package/dist/provider-metadata.d.mts +1 -1
- package/dist/provider-metadata.d.ts +1 -1
- package/dist/provider-metadata.js +3 -66
- package/dist/provider-metadata.mjs +3 -66
- package/dist/{speechToTextChunkResponseModel-BY2lGyZ3.d.ts → speechToTextChunkResponseModel-Dns0Ma9x.d.ts} +364 -39
- package/dist/{speechToTextChunkResponseModel-KayxDiZ7.d.mts → speechToTextChunkResponseModel-_ZvHTD4e.d.mts} +364 -39
- package/dist/webhooks.d.mts +3 -2
- package/dist/webhooks.d.ts +3 -2
- package/package.json +8 -3
package/dist/index.mjs
CHANGED
|
@@ -1102,7 +1102,6 @@ var AzureLocales = [
|
|
|
1102
1102
|
{ code: "ar-YE", name: "Arabic (Yemen)" },
|
|
1103
1103
|
{ code: "as-IN", name: "Assamese (India)" },
|
|
1104
1104
|
{ code: "az-AZ", name: "Azerbaijani (Azerbaijan)" },
|
|
1105
|
-
{ code: "be-BY", name: "Belarusian (Belarus)" },
|
|
1106
1105
|
{ code: "bg-BG", name: "Bulgarian (Bulgaria)" },
|
|
1107
1106
|
{ code: "bn-BD", name: "Bengali (Bangladesh)" },
|
|
1108
1107
|
{ code: "bn-IN", name: "Bengali (India)" },
|
|
@@ -1183,7 +1182,6 @@ var AzureLocales = [
|
|
|
1183
1182
|
{ code: "lo-LA", name: "Lao (Latin)" },
|
|
1184
1183
|
{ code: "lt-LT", name: "Lithuanian (Lithuania)" },
|
|
1185
1184
|
{ code: "lv-LV", name: "Latvian (Latvia)" },
|
|
1186
|
-
{ code: "mi-NZ", name: "Maori (New Zealand)" },
|
|
1187
1185
|
{ code: "mk-MK", name: "Macedonian (North Macedonia)" },
|
|
1188
1186
|
{ code: "ml-IN", name: "Malayalam (India)" },
|
|
1189
1187
|
{ code: "mn-MN", name: "Mongolian (Mongolia)" },
|
|
@@ -1259,7 +1257,6 @@ var AzureLocaleCodes = [
|
|
|
1259
1257
|
"ar-YE",
|
|
1260
1258
|
"as-IN",
|
|
1261
1259
|
"az-AZ",
|
|
1262
|
-
"be-BY",
|
|
1263
1260
|
"bg-BG",
|
|
1264
1261
|
"bn-BD",
|
|
1265
1262
|
"bn-IN",
|
|
@@ -1340,7 +1337,6 @@ var AzureLocaleCodes = [
|
|
|
1340
1337
|
"lo-LA",
|
|
1341
1338
|
"lt-LT",
|
|
1342
1339
|
"lv-LV",
|
|
1343
|
-
"mi-NZ",
|
|
1344
1340
|
"mk-MK",
|
|
1345
1341
|
"ml-IN",
|
|
1346
1342
|
"mn-MN",
|
|
@@ -1416,7 +1412,6 @@ var AzureLocaleLabels = {
|
|
|
1416
1412
|
"ar-YE": "Arabic (Yemen)",
|
|
1417
1413
|
"as-IN": "Assamese (India)",
|
|
1418
1414
|
"az-AZ": "Azerbaijani (Azerbaijan)",
|
|
1419
|
-
"be-BY": "Belarusian (Belarus)",
|
|
1420
1415
|
"bg-BG": "Bulgarian (Bulgaria)",
|
|
1421
1416
|
"bn-BD": "Bengali (Bangladesh)",
|
|
1422
1417
|
"bn-IN": "Bengali (India)",
|
|
@@ -1497,7 +1492,6 @@ var AzureLocaleLabels = {
|
|
|
1497
1492
|
"lo-LA": "Lao (Latin)",
|
|
1498
1493
|
"lt-LT": "Lithuanian (Lithuania)",
|
|
1499
1494
|
"lv-LV": "Latvian (Latvia)",
|
|
1500
|
-
"mi-NZ": "Maori (New Zealand)",
|
|
1501
1495
|
"mk-MK": "Macedonian (North Macedonia)",
|
|
1502
1496
|
"ml-IN": "Malayalam (India)",
|
|
1503
1497
|
"mn-MN": "Mongolian (Mongolia)",
|
|
@@ -1573,7 +1567,6 @@ var AzureLocale = {
|
|
|
1573
1567
|
"ar-YE": "ar-YE",
|
|
1574
1568
|
"as-IN": "as-IN",
|
|
1575
1569
|
"az-AZ": "az-AZ",
|
|
1576
|
-
"be-BY": "be-BY",
|
|
1577
1570
|
"bg-BG": "bg-BG",
|
|
1578
1571
|
"bn-BD": "bn-BD",
|
|
1579
1572
|
"bn-IN": "bn-IN",
|
|
@@ -1654,7 +1647,6 @@ var AzureLocale = {
|
|
|
1654
1647
|
"lo-LA": "lo-LA",
|
|
1655
1648
|
"lt-LT": "lt-LT",
|
|
1656
1649
|
"lv-LV": "lv-LV",
|
|
1657
|
-
"mi-NZ": "mi-NZ",
|
|
1658
1650
|
"mk-MK": "mk-MK",
|
|
1659
1651
|
"ml-IN": "ml-IN",
|
|
1660
1652
|
"mn-MN": "mn-MN",
|
|
@@ -1745,8 +1737,6 @@ var ElevenLabsLanguages = [
|
|
|
1745
1737
|
{ code: "hr", name: "Croatian" },
|
|
1746
1738
|
{ code: "bg", name: "Bulgarian" },
|
|
1747
1739
|
{ code: "lt", name: "Lithuanian" },
|
|
1748
|
-
{ code: "la", name: "Latin" },
|
|
1749
|
-
{ code: "mi", name: "Maori" },
|
|
1750
1740
|
{ code: "ml", name: "Malayalam" },
|
|
1751
1741
|
{ code: "cy", name: "Welsh" },
|
|
1752
1742
|
{ code: "sk", name: "Slovak" },
|
|
@@ -1760,20 +1750,16 @@ var ElevenLabsLanguages = [
|
|
|
1760
1750
|
{ code: "kn", name: "Kannada" },
|
|
1761
1751
|
{ code: "et", name: "Estonian" },
|
|
1762
1752
|
{ code: "mk", name: "Macedonian" },
|
|
1763
|
-
{ code: "br", name: "Breton" },
|
|
1764
|
-
{ code: "eu", name: "Basque" },
|
|
1765
1753
|
{ code: "is", name: "Icelandic" },
|
|
1766
1754
|
{ code: "hy", name: "Armenian" },
|
|
1767
1755
|
{ code: "ne", name: "Nepali" },
|
|
1768
1756
|
{ code: "mn", name: "Mongolian" },
|
|
1769
1757
|
{ code: "bs", name: "Bosnian" },
|
|
1770
1758
|
{ code: "kk", name: "Kazakh" },
|
|
1771
|
-
{ code: "sq", name: "Albanian" },
|
|
1772
1759
|
{ code: "sw", name: "Swahili" },
|
|
1773
1760
|
{ code: "gl", name: "Galician" },
|
|
1774
1761
|
{ code: "mr", name: "Marathi" },
|
|
1775
1762
|
{ code: "pa", name: "Punjabi" },
|
|
1776
|
-
{ code: "si", name: "Sinhala" },
|
|
1777
1763
|
{ code: "km", name: "Khmer" },
|
|
1778
1764
|
{ code: "sn", name: "Shona" },
|
|
1779
1765
|
{ code: "yo", name: "Yoruba" },
|
|
@@ -1786,29 +1772,16 @@ var ElevenLabsLanguages = [
|
|
|
1786
1772
|
{ code: "sd", name: "Sindhi" },
|
|
1787
1773
|
{ code: "gu", name: "Gujarati" },
|
|
1788
1774
|
{ code: "am", name: "Amharic" },
|
|
1789
|
-
{ code: "yi", name: "Yiddish" },
|
|
1790
1775
|
{ code: "lo", name: "Lao" },
|
|
1791
1776
|
{ code: "uz", name: "Uzbek" },
|
|
1792
|
-
{ code: "fo", name: "Faroese" },
|
|
1793
|
-
{ code: "ht", name: "Haitian Creole" },
|
|
1794
1777
|
{ code: "ps", name: "Pashto" },
|
|
1795
|
-
{ code: "tk", name: "Turkmen" },
|
|
1796
|
-
{ code: "nn", name: "Norwegian Nynorsk" },
|
|
1797
1778
|
{ code: "mt", name: "Maltese" },
|
|
1798
|
-
{ code: "sa", name: "Sanskrit" },
|
|
1799
1779
|
{ code: "lb", name: "Luxembourgish" },
|
|
1800
1780
|
{ code: "my", name: "Burmese" },
|
|
1801
|
-
{ code: "bo", name: "Tibetan" },
|
|
1802
|
-
{ code: "tl", name: "Tagalog" },
|
|
1803
|
-
{ code: "mg", name: "Malagasy" },
|
|
1804
1781
|
{ code: "as", name: "Assamese" },
|
|
1805
|
-
{ code: "tt", name: "Tatar" },
|
|
1806
|
-
{ code: "haw", name: "Hawaiian" },
|
|
1807
1782
|
{ code: "ln", name: "Lingala" },
|
|
1808
1783
|
{ code: "ha", name: "Hausa" },
|
|
1809
|
-
{ code: "
|
|
1810
|
-
{ code: "jw", name: "Javanese" },
|
|
1811
|
-
{ code: "su", name: "Sundanese" }
|
|
1784
|
+
{ code: "jw", name: "Javanese" }
|
|
1812
1785
|
];
|
|
1813
1786
|
var ElevenLabsLanguageCodes = [
|
|
1814
1787
|
"en",
|
|
@@ -1846,8 +1819,6 @@ var ElevenLabsLanguageCodes = [
|
|
|
1846
1819
|
"hr",
|
|
1847
1820
|
"bg",
|
|
1848
1821
|
"lt",
|
|
1849
|
-
"la",
|
|
1850
|
-
"mi",
|
|
1851
1822
|
"ml",
|
|
1852
1823
|
"cy",
|
|
1853
1824
|
"sk",
|
|
@@ -1861,20 +1832,16 @@ var ElevenLabsLanguageCodes = [
|
|
|
1861
1832
|
"kn",
|
|
1862
1833
|
"et",
|
|
1863
1834
|
"mk",
|
|
1864
|
-
"br",
|
|
1865
|
-
"eu",
|
|
1866
1835
|
"is",
|
|
1867
1836
|
"hy",
|
|
1868
1837
|
"ne",
|
|
1869
1838
|
"mn",
|
|
1870
1839
|
"bs",
|
|
1871
1840
|
"kk",
|
|
1872
|
-
"sq",
|
|
1873
1841
|
"sw",
|
|
1874
1842
|
"gl",
|
|
1875
1843
|
"mr",
|
|
1876
1844
|
"pa",
|
|
1877
|
-
"si",
|
|
1878
1845
|
"km",
|
|
1879
1846
|
"sn",
|
|
1880
1847
|
"yo",
|
|
@@ -1887,29 +1854,16 @@ var ElevenLabsLanguageCodes = [
|
|
|
1887
1854
|
"sd",
|
|
1888
1855
|
"gu",
|
|
1889
1856
|
"am",
|
|
1890
|
-
"yi",
|
|
1891
1857
|
"lo",
|
|
1892
1858
|
"uz",
|
|
1893
|
-
"fo",
|
|
1894
|
-
"ht",
|
|
1895
1859
|
"ps",
|
|
1896
|
-
"tk",
|
|
1897
|
-
"nn",
|
|
1898
1860
|
"mt",
|
|
1899
|
-
"sa",
|
|
1900
1861
|
"lb",
|
|
1901
1862
|
"my",
|
|
1902
|
-
"bo",
|
|
1903
|
-
"tl",
|
|
1904
|
-
"mg",
|
|
1905
1863
|
"as",
|
|
1906
|
-
"tt",
|
|
1907
|
-
"haw",
|
|
1908
1864
|
"ln",
|
|
1909
1865
|
"ha",
|
|
1910
|
-
"
|
|
1911
|
-
"jw",
|
|
1912
|
-
"su"
|
|
1866
|
+
"jw"
|
|
1913
1867
|
];
|
|
1914
1868
|
var ElevenLabsLanguageLabels = {
|
|
1915
1869
|
en: "English",
|
|
@@ -1947,8 +1901,6 @@ var ElevenLabsLanguageLabels = {
|
|
|
1947
1901
|
hr: "Croatian",
|
|
1948
1902
|
bg: "Bulgarian",
|
|
1949
1903
|
lt: "Lithuanian",
|
|
1950
|
-
la: "Latin",
|
|
1951
|
-
mi: "Maori",
|
|
1952
1904
|
ml: "Malayalam",
|
|
1953
1905
|
cy: "Welsh",
|
|
1954
1906
|
sk: "Slovak",
|
|
@@ -1962,20 +1914,16 @@ var ElevenLabsLanguageLabels = {
|
|
|
1962
1914
|
kn: "Kannada",
|
|
1963
1915
|
et: "Estonian",
|
|
1964
1916
|
mk: "Macedonian",
|
|
1965
|
-
br: "Breton",
|
|
1966
|
-
eu: "Basque",
|
|
1967
1917
|
is: "Icelandic",
|
|
1968
1918
|
hy: "Armenian",
|
|
1969
1919
|
ne: "Nepali",
|
|
1970
1920
|
mn: "Mongolian",
|
|
1971
1921
|
bs: "Bosnian",
|
|
1972
1922
|
kk: "Kazakh",
|
|
1973
|
-
sq: "Albanian",
|
|
1974
1923
|
sw: "Swahili",
|
|
1975
1924
|
gl: "Galician",
|
|
1976
1925
|
mr: "Marathi",
|
|
1977
1926
|
pa: "Punjabi",
|
|
1978
|
-
si: "Sinhala",
|
|
1979
1927
|
km: "Khmer",
|
|
1980
1928
|
sn: "Shona",
|
|
1981
1929
|
yo: "Yoruba",
|
|
@@ -1988,29 +1936,16 @@ var ElevenLabsLanguageLabels = {
|
|
|
1988
1936
|
sd: "Sindhi",
|
|
1989
1937
|
gu: "Gujarati",
|
|
1990
1938
|
am: "Amharic",
|
|
1991
|
-
yi: "Yiddish",
|
|
1992
1939
|
lo: "Lao",
|
|
1993
1940
|
uz: "Uzbek",
|
|
1994
|
-
fo: "Faroese",
|
|
1995
|
-
ht: "Haitian Creole",
|
|
1996
1941
|
ps: "Pashto",
|
|
1997
|
-
tk: "Turkmen",
|
|
1998
|
-
nn: "Norwegian Nynorsk",
|
|
1999
1942
|
mt: "Maltese",
|
|
2000
|
-
sa: "Sanskrit",
|
|
2001
1943
|
lb: "Luxembourgish",
|
|
2002
1944
|
my: "Burmese",
|
|
2003
|
-
bo: "Tibetan",
|
|
2004
|
-
tl: "Tagalog",
|
|
2005
|
-
mg: "Malagasy",
|
|
2006
1945
|
as: "Assamese",
|
|
2007
|
-
tt: "Tatar",
|
|
2008
|
-
haw: "Hawaiian",
|
|
2009
1946
|
ln: "Lingala",
|
|
2010
1947
|
ha: "Hausa",
|
|
2011
|
-
|
|
2012
|
-
jw: "Javanese",
|
|
2013
|
-
su: "Sundanese"
|
|
1948
|
+
jw: "Javanese"
|
|
2014
1949
|
};
|
|
2015
1950
|
|
|
2016
1951
|
// src/generated/gladia/schema/streamingSupportedBitDepthEnum.ts
|
|
@@ -2515,6 +2450,7 @@ var OpenAITranscriptionModel = {
|
|
|
2515
2450
|
"gpt-4o-mini-transcribe-2025-12-15": "gpt-4o-mini-transcribe-2025-12-15",
|
|
2516
2451
|
"gpt-4o-transcribe": "gpt-4o-transcribe",
|
|
2517
2452
|
"gpt-4o-transcribe-diarize": "gpt-4o-transcribe-diarize",
|
|
2453
|
+
"gpt-realtime-whisper": "gpt-realtime-whisper",
|
|
2518
2454
|
"whisper-1": "whisper-1"
|
|
2519
2455
|
};
|
|
2520
2456
|
var OpenAIRealtimeModel = {
|
|
@@ -2530,6 +2466,7 @@ var OpenAIRealtimeModel = {
|
|
|
2530
2466
|
"gpt-audio-mini-2025-12-15": "gpt-audio-mini-2025-12-15",
|
|
2531
2467
|
"gpt-realtime": "gpt-realtime",
|
|
2532
2468
|
"gpt-realtime-1.5": "gpt-realtime-1.5",
|
|
2469
|
+
"gpt-realtime-2": "gpt-realtime-2",
|
|
2533
2470
|
"gpt-realtime-2025-08-28": "gpt-realtime-2025-08-28",
|
|
2534
2471
|
"gpt-realtime-mini": "gpt-realtime-mini",
|
|
2535
2472
|
"gpt-realtime-mini-2025-10-06": "gpt-realtime-mini-2025-10-06",
|
|
@@ -4129,6 +4066,12 @@ var GladiaAdapter = class extends BaseAdapter {
|
|
|
4129
4066
|
};
|
|
4130
4067
|
this.baseUrl = "https://api.gladia.io";
|
|
4131
4068
|
}
|
|
4069
|
+
initialize(config) {
|
|
4070
|
+
super.initialize(config);
|
|
4071
|
+
if (config.region) {
|
|
4072
|
+
this.streamingRegion = config.region;
|
|
4073
|
+
}
|
|
4074
|
+
}
|
|
4132
4075
|
/**
|
|
4133
4076
|
* Get axios config for generated API client functions
|
|
4134
4077
|
* Configures headers and base URL using Gladia's x-gladia-key header
|
|
@@ -4790,9 +4733,10 @@ var GladiaAdapter = class extends BaseAdapter {
|
|
|
4790
4733
|
async transcribeStream(options, callbacks) {
|
|
4791
4734
|
this.validateConfig();
|
|
4792
4735
|
const streamingRequest = this.buildStreamingRequest(options);
|
|
4736
|
+
const region = options?.region ?? this.streamingRegion;
|
|
4793
4737
|
const initResponse = await streamingControllerInitStreamingSessionV2(
|
|
4794
4738
|
streamingRequest,
|
|
4795
|
-
|
|
4739
|
+
region ? { region } : void 0,
|
|
4796
4740
|
this.getAxiosConfig()
|
|
4797
4741
|
);
|
|
4798
4742
|
const { id, url: apiWsUrl } = initResponse.data;
|
|
@@ -5339,12 +5283,20 @@ var EntityType = {
|
|
|
5339
5283
|
email_address: "email_address",
|
|
5340
5284
|
event: "event",
|
|
5341
5285
|
filename: "filename",
|
|
5286
|
+
gender: "gender",
|
|
5342
5287
|
gender_sexuality: "gender_sexuality",
|
|
5343
5288
|
healthcare_number: "healthcare_number",
|
|
5344
5289
|
injury: "injury",
|
|
5345
5290
|
ip_address: "ip_address",
|
|
5346
5291
|
language: "language",
|
|
5347
5292
|
location: "location",
|
|
5293
|
+
location_address: "location_address",
|
|
5294
|
+
location_address_street: "location_address_street",
|
|
5295
|
+
location_city: "location_city",
|
|
5296
|
+
location_coordinate: "location_coordinate",
|
|
5297
|
+
location_country: "location_country",
|
|
5298
|
+
location_state: "location_state",
|
|
5299
|
+
location_zip: "location_zip",
|
|
5348
5300
|
marital_status: "marital_status",
|
|
5349
5301
|
medical_condition: "medical_condition",
|
|
5350
5302
|
medical_process: "medical_process",
|
|
@@ -5353,6 +5305,7 @@ var EntityType = {
|
|
|
5353
5305
|
number_sequence: "number_sequence",
|
|
5354
5306
|
occupation: "occupation",
|
|
5355
5307
|
organization: "organization",
|
|
5308
|
+
organization_medical_facility: "organization_medical_facility",
|
|
5356
5309
|
passport_number: "passport_number",
|
|
5357
5310
|
password: "password",
|
|
5358
5311
|
person_age: "person_age",
|
|
@@ -5361,6 +5314,7 @@ var EntityType = {
|
|
|
5361
5314
|
physical_attribute: "physical_attribute",
|
|
5362
5315
|
political_affiliation: "political_affiliation",
|
|
5363
5316
|
religion: "religion",
|
|
5317
|
+
sexuality: "sexuality",
|
|
5364
5318
|
statistics: "statistics",
|
|
5365
5319
|
time: "time",
|
|
5366
5320
|
url: "url",
|
|
@@ -5387,12 +5341,20 @@ var PiiPolicy = {
|
|
|
5387
5341
|
email_address: "email_address",
|
|
5388
5342
|
event: "event",
|
|
5389
5343
|
filename: "filename",
|
|
5344
|
+
gender: "gender",
|
|
5390
5345
|
gender_sexuality: "gender_sexuality",
|
|
5391
5346
|
healthcare_number: "healthcare_number",
|
|
5392
5347
|
injury: "injury",
|
|
5393
5348
|
ip_address: "ip_address",
|
|
5394
5349
|
language: "language",
|
|
5395
5350
|
location: "location",
|
|
5351
|
+
location_address: "location_address",
|
|
5352
|
+
location_address_street: "location_address_street",
|
|
5353
|
+
location_city: "location_city",
|
|
5354
|
+
location_coordinate: "location_coordinate",
|
|
5355
|
+
location_country: "location_country",
|
|
5356
|
+
location_state: "location_state",
|
|
5357
|
+
location_zip: "location_zip",
|
|
5396
5358
|
marital_status: "marital_status",
|
|
5397
5359
|
medical_condition: "medical_condition",
|
|
5398
5360
|
medical_process: "medical_process",
|
|
@@ -5401,6 +5363,7 @@ var PiiPolicy = {
|
|
|
5401
5363
|
number_sequence: "number_sequence",
|
|
5402
5364
|
occupation: "occupation",
|
|
5403
5365
|
organization: "organization",
|
|
5366
|
+
organization_medical_facility: "organization_medical_facility",
|
|
5404
5367
|
passport_number: "passport_number",
|
|
5405
5368
|
password: "password",
|
|
5406
5369
|
person_age: "person_age",
|
|
@@ -5409,6 +5372,7 @@ var PiiPolicy = {
|
|
|
5409
5372
|
physical_attribute: "physical_attribute",
|
|
5410
5373
|
political_affiliation: "political_affiliation",
|
|
5411
5374
|
religion: "religion",
|
|
5375
|
+
sexuality: "sexuality",
|
|
5412
5376
|
statistics: "statistics",
|
|
5413
5377
|
time: "time",
|
|
5414
5378
|
url: "url",
|
|
@@ -5477,7 +5441,8 @@ var TranscriptOptionalParamsRedactPiiAudioOptionsOverrideAudioRedactionMethod =
|
|
|
5477
5441
|
|
|
5478
5442
|
// src/generated/assemblyai/schema/transcriptOptionalParamsRemoveAudioTags.ts
|
|
5479
5443
|
var TranscriptOptionalParamsRemoveAudioTags = {
|
|
5480
|
-
all: "all"
|
|
5444
|
+
all: "all",
|
|
5445
|
+
speaker: "speaker"
|
|
5481
5446
|
};
|
|
5482
5447
|
|
|
5483
5448
|
// src/generated/assemblyai/schema/transcriptRedactPiiAudioOptionsOverrideAudioRedactionMethod.ts
|
|
@@ -5487,7 +5452,8 @@ var TranscriptRedactPiiAudioOptionsOverrideAudioRedactionMethod = {
|
|
|
5487
5452
|
|
|
5488
5453
|
// src/generated/assemblyai/schema/transcriptRemoveAudioTags.ts
|
|
5489
5454
|
var TranscriptRemoveAudioTags = {
|
|
5490
|
-
all: "all"
|
|
5455
|
+
all: "all",
|
|
5456
|
+
speaker: "speaker"
|
|
5491
5457
|
};
|
|
5492
5458
|
|
|
5493
5459
|
// src/generated/assemblyai/api/assemblyAIAPI.ts
|
|
@@ -9379,15 +9345,18 @@ import axios9 from "axios";
|
|
|
9379
9345
|
// src/generated/soniox/schema/index.ts
|
|
9380
9346
|
var schema_exports4 = {};
|
|
9381
9347
|
__export(schema_exports4, {
|
|
9348
|
+
TTSVoiceGender: () => TTSVoiceGender,
|
|
9382
9349
|
TemporaryApiKeyUsageType: () => TemporaryApiKeyUsageType,
|
|
9383
9350
|
TranscriptionMode: () => TranscriptionMode,
|
|
9384
9351
|
TranscriptionStatus: () => TranscriptionStatus,
|
|
9385
|
-
TranslationConfigType: () => TranslationConfigType
|
|
9352
|
+
TranslationConfigType: () => TranslationConfigType,
|
|
9353
|
+
UsageLogsSort: () => UsageLogsSort
|
|
9386
9354
|
});
|
|
9387
9355
|
|
|
9388
9356
|
// src/generated/soniox/schema/temporaryApiKeyUsageType.ts
|
|
9389
9357
|
var TemporaryApiKeyUsageType = {
|
|
9390
|
-
transcribe_websocket: "transcribe_websocket"
|
|
9358
|
+
transcribe_websocket: "transcribe_websocket",
|
|
9359
|
+
tts_rt: "tts_rt"
|
|
9391
9360
|
};
|
|
9392
9361
|
|
|
9393
9362
|
// src/generated/soniox/schema/transcriptionMode.ts
|
|
@@ -9402,6 +9371,19 @@ var TranslationConfigType = {
|
|
|
9402
9371
|
two_way: "two_way"
|
|
9403
9372
|
};
|
|
9404
9373
|
|
|
9374
|
+
// src/generated/soniox/schema/tTSVoiceGender.ts
|
|
9375
|
+
var TTSVoiceGender = {
|
|
9376
|
+
male: "male",
|
|
9377
|
+
female: "female",
|
|
9378
|
+
neutral: "neutral"
|
|
9379
|
+
};
|
|
9380
|
+
|
|
9381
|
+
// src/generated/soniox/schema/usageLogsSort.ts
|
|
9382
|
+
var UsageLogsSort = {
|
|
9383
|
+
end_time_asc: "end_time_asc",
|
|
9384
|
+
end_time_desc: "end_time_desc"
|
|
9385
|
+
};
|
|
9386
|
+
|
|
9405
9387
|
// src/generated/soniox/api/sonioxPublicAPI.ts
|
|
9406
9388
|
var uploadFile = (uploadFileBody2, options) => {
|
|
9407
9389
|
const formData = new FormData();
|
|
@@ -10769,6 +10751,7 @@ __export(deepgramAPI_zod_exports, {
|
|
|
10769
10751
|
speakGenerateQueryMipOptOutDefault: () => speakGenerateQueryMipOptOutDefault,
|
|
10770
10752
|
speakGenerateQueryModelDefault: () => speakGenerateQueryModelDefault,
|
|
10771
10753
|
speakGenerateQueryParams: () => speakGenerateQueryParams,
|
|
10754
|
+
speakGenerateQuerySpeedDefault: () => speakGenerateQuerySpeedDefault,
|
|
10772
10755
|
speakGenerateResponse: () => speakGenerateResponse
|
|
10773
10756
|
});
|
|
10774
10757
|
import { z as zod } from "zod";
|
|
@@ -10823,6 +10806,9 @@ var listenTranscribeQueryParams = zod.object({
|
|
|
10823
10806
|
diarize: zod.boolean().optional().describe(
|
|
10824
10807
|
"Recognize speaker changes. Each word in the transcript will be assigned a speaker number starting at 0"
|
|
10825
10808
|
),
|
|
10809
|
+
diarize_model: zod.enum(["latest", "v1", "v2"]).optional().describe(
|
|
10810
|
+
"Select and enable a specific batch diarization model version. If specifying this parameter, you should not set the deprecated `diarize=true` parameter. Not accepted on streaming requests."
|
|
10811
|
+
),
|
|
10826
10812
|
dictation: zod.boolean().optional().describe("Dictation mode for controlling formatting with dictated speech"),
|
|
10827
10813
|
encoding: zod.enum(["linear16", "flac", "mulaw", "amr-nb", "amr-wb", "opus", "speex", "g729"]).optional().describe("Specify the expected encoding of your submitted audio"),
|
|
10828
10814
|
filler_words: zod.boolean().optional().describe('Filler Words can help transcribe interruptions in your audio, like "uh" and "um"'),
|
|
@@ -11088,6 +11074,7 @@ var listenTranscribeResponse = zod.object({
|
|
|
11088
11074
|
var speakGenerateQueryCallbackMethodDefault = "POST";
|
|
11089
11075
|
var speakGenerateQueryMipOptOutDefault = false;
|
|
11090
11076
|
var speakGenerateQueryModelDefault = "aura-asteria-en";
|
|
11077
|
+
var speakGenerateQuerySpeedDefault = 1;
|
|
11091
11078
|
var speakGenerateQueryParams = zod.object({
|
|
11092
11079
|
callback: zod.string().optional().describe("URL to which we'll make the callback request"),
|
|
11093
11080
|
callback_method: zod.enum(["POST", "PUT"]).default(speakGenerateQueryCallbackMethodDefault).describe("HTTP method by which the callback request will be made"),
|
|
@@ -11199,6 +11186,9 @@ var speakGenerateQueryParams = zod.object({
|
|
|
11199
11186
|
zod.enum(["22050"]).describe("Encoding - mp3. Sample rate is fixed and not configurable (22050 Hz).")
|
|
11200
11187
|
).or(zod.enum(["48000"]).describe("Encoding - opus. Sample rate is fixed at 48000 Hz.")).optional().describe(
|
|
11201
11188
|
"Sample Rate specifies the sample rate for the output audio. Based on the encoding, different sample rates are supported. For some encodings, the sample rate is not configurable"
|
|
11189
|
+
),
|
|
11190
|
+
speed: zod.number().default(speakGenerateQuerySpeedDefault).describe(
|
|
11191
|
+
"Speaking rate multiplier that adjusts the pace of generated speech while preserving natural prosody and voice quality. Not yet supported in all languages."
|
|
11202
11192
|
)
|
|
11203
11193
|
});
|
|
11204
11194
|
var speakGenerateHeader = zod.object({
|
|
@@ -11523,6 +11513,7 @@ __export(assemblyAIAPI_zod_exports, {
|
|
|
11523
11513
|
createTranscriptBodyRedactPiiAudioDefault: () => createTranscriptBodyRedactPiiAudioDefault,
|
|
11524
11514
|
createTranscriptBodyRedactPiiAudioOptionsReturnRedactedNoSpeechAudioDefault: () => createTranscriptBodyRedactPiiAudioOptionsReturnRedactedNoSpeechAudioDefault,
|
|
11525
11515
|
createTranscriptBodyRedactPiiDefault: () => createTranscriptBodyRedactPiiDefault,
|
|
11516
|
+
createTranscriptBodyRedactPiiReturnUnredactedDefault: () => createTranscriptBodyRedactPiiReturnUnredactedDefault,
|
|
11526
11517
|
createTranscriptBodySentimentAnalysisDefault: () => createTranscriptBodySentimentAnalysisDefault,
|
|
11527
11518
|
createTranscriptBodySpeakerLabelsDefault: () => createTranscriptBodySpeakerLabelsDefault,
|
|
11528
11519
|
createTranscriptBodySpeakerOptionsMinSpeakersExpectedDefault: () => createTranscriptBodySpeakerOptionsMinSpeakersExpectedDefault,
|
|
@@ -11593,6 +11584,7 @@ var createTranscriptBodyPunctuateDefault = true;
|
|
|
11593
11584
|
var createTranscriptBodyRedactPiiDefault = false;
|
|
11594
11585
|
var createTranscriptBodyRedactPiiAudioDefault = false;
|
|
11595
11586
|
var createTranscriptBodyRedactPiiAudioOptionsReturnRedactedNoSpeechAudioDefault = false;
|
|
11587
|
+
var createTranscriptBodyRedactPiiReturnUnredactedDefault = false;
|
|
11596
11588
|
var createTranscriptBodySentimentAnalysisDefault = false;
|
|
11597
11589
|
var createTranscriptBodySpeakerLabelsDefault = false;
|
|
11598
11590
|
var createTranscriptBodySpeakerOptionsMinSpeakersExpectedDefault = 1;
|
|
@@ -11631,7 +11623,7 @@ var createTranscriptBody = zod3.object({
|
|
|
11631
11623
|
"Customize how words are spelled and formatted using to and from values. See [Custom Spelling](https://www.assemblyai.com/docs/pre-recorded-audio/correct-spelling-of-terms) for more details."
|
|
11632
11624
|
),
|
|
11633
11625
|
disfluencies: zod3.boolean().optional().describe(
|
|
11634
|
-
'Transcribe [Filler Words](https://www.assemblyai.com/docs/pre-recorded-audio/include-filler-words), like "umm", in your media file; can be true or false'
|
|
11626
|
+
'Transcribe [Filler Words](https://www.assemblyai.com/docs/pre-recorded-audio/include-filler-words), like "umm", in your media file; can be true or false. Supported on Universal-3 Pro and Universal-2.'
|
|
11635
11627
|
),
|
|
11636
11628
|
domain: zod3.string().nullish().describe(
|
|
11637
11629
|
'Enable domain-specific transcription models to improve accuracy for specialized terminology. Set to `"medical-v1"` to enable [Medical Mode](https://www.assemblyai.com/docs/pre-recorded-audio/medical-mode) for improved accuracy of medical terms such as medications, procedures, conditions, and dosages.\n\nSupported languages: English (`en`), Spanish (`es`), German (`de`), French (`fr`). If used with an unsupported language, the parameter is ignored and a warning is returned.\n'
|
|
@@ -11938,12 +11930,20 @@ var createTranscriptBody = zod3.object({
|
|
|
11938
11930
|
"email_address",
|
|
11939
11931
|
"event",
|
|
11940
11932
|
"filename",
|
|
11933
|
+
"gender",
|
|
11941
11934
|
"gender_sexuality",
|
|
11942
11935
|
"healthcare_number",
|
|
11943
11936
|
"injury",
|
|
11944
11937
|
"ip_address",
|
|
11945
11938
|
"language",
|
|
11946
11939
|
"location",
|
|
11940
|
+
"location_address",
|
|
11941
|
+
"location_address_street",
|
|
11942
|
+
"location_city",
|
|
11943
|
+
"location_coordinate",
|
|
11944
|
+
"location_country",
|
|
11945
|
+
"location_state",
|
|
11946
|
+
"location_zip",
|
|
11947
11947
|
"marital_status",
|
|
11948
11948
|
"medical_condition",
|
|
11949
11949
|
"medical_process",
|
|
@@ -11952,6 +11952,7 @@ var createTranscriptBody = zod3.object({
|
|
|
11952
11952
|
"number_sequence",
|
|
11953
11953
|
"occupation",
|
|
11954
11954
|
"organization",
|
|
11955
|
+
"organization_medical_facility",
|
|
11955
11956
|
"passport_number",
|
|
11956
11957
|
"password",
|
|
11957
11958
|
"person_age",
|
|
@@ -11960,6 +11961,7 @@ var createTranscriptBody = zod3.object({
|
|
|
11960
11961
|
"physical_attribute",
|
|
11961
11962
|
"political_affiliation",
|
|
11962
11963
|
"religion",
|
|
11964
|
+
"sexuality",
|
|
11963
11965
|
"statistics",
|
|
11964
11966
|
"time",
|
|
11965
11967
|
"url",
|
|
@@ -11967,15 +11969,20 @@ var createTranscriptBody = zod3.object({
|
|
|
11967
11969
|
"username",
|
|
11968
11970
|
"vehicle_id",
|
|
11969
11971
|
"zodiac_sign"
|
|
11970
|
-
]).describe(
|
|
11972
|
+
]).describe(
|
|
11973
|
+
"The type of PII to redact. See [PII redaction](https://www.assemblyai.com/docs/streaming/pii-redaction) for the full list of policies and their descriptions."
|
|
11974
|
+
)
|
|
11971
11975
|
).optional().describe(
|
|
11972
11976
|
"The list of PII Redaction policies to enable. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more details."
|
|
11973
11977
|
),
|
|
11974
11978
|
redact_pii_sub: zod3.enum(["entity_name", "hash"]).describe(
|
|
11975
|
-
"The replacement logic for detected PII, can be `entity_name` or `hash`. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more details."
|
|
11979
|
+
"The replacement logic for detected PII, can be `entity_name` or `hash`. See [PII redaction](https://www.assemblyai.com/docs/streaming/pii-redaction) for more details."
|
|
11976
11980
|
).or(zod3.null()).optional().describe(
|
|
11977
11981
|
"The replacement logic for detected PII, can be `entity_type` or `hash`. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more details."
|
|
11978
11982
|
),
|
|
11983
|
+
redact_pii_return_unredacted: zod3.boolean().optional().describe(
|
|
11984
|
+
"When set to `true`, returns the original unredacted transcript alongside the redacted one in the same response. Requires `redact_pii` to be `true`, otherwise a 400 error is returned.\n\nWhen enabled, the response includes the additional fields `unredacted_text`, `unredacted_words`, and `unredacted_utterances`. The existing `text`, `words`, and `utterances` fields remain fully redacted. When disabled (default), the response is unchanged and contains only the redacted transcript. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more details.\n"
|
|
11985
|
+
),
|
|
11979
11986
|
sentiment_analysis: zod3.boolean().optional().describe(
|
|
11980
11987
|
"Enable [Sentiment Analysis](https://www.assemblyai.com/docs/speech-understanding/analyze-sentiment-of-speech), can be true or false"
|
|
11981
11988
|
),
|
|
@@ -12073,10 +12080,10 @@ var createTranscriptBody = zod3.object({
|
|
|
12073
12080
|
),
|
|
12074
12081
|
summary_model: zod3.enum(["informative", "conversational", "catchy"]).optional().describe("The model to summarize the transcript"),
|
|
12075
12082
|
summary_type: zod3.enum(["bullets", "bullets_verbose", "gist", "headline", "paragraph"]).optional().describe("The type of summary"),
|
|
12076
|
-
remove_audio_tags: zod3.enum(["all"]).describe(
|
|
12077
|
-
'
|
|
12083
|
+
remove_audio_tags: zod3.enum(["all", "speaker"]).describe(
|
|
12084
|
+
'Universal-3 Pro generates rich transcripts that can include inline annotations such as audio event markers and speaker cues. Set to `"all"` to remove all inline annotations, or `"speaker"` to remove only speaker cues while keeping other annotations.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n'
|
|
12078
12085
|
).or(zod3.null()).optional().describe(
|
|
12079
|
-
'
|
|
12086
|
+
'Universal-3 Pro generates rich transcripts that can include inline annotations such as audio event markers and speaker cues. Set to `"all"` to remove all inline annotations, or `"speaker"` to remove only speaker cues while keeping other annotations.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n'
|
|
12080
12087
|
),
|
|
12081
12088
|
temperature: zod3.number().optional().describe(
|
|
12082
12089
|
"Control the amount of randomness injected into the model's response. See the [Prompting Guide](https://www.assemblyai.com/docs/pre-recorded-audio/prompting) for more details.\n\nNote: This parameter can only be used with the Universal-3 Pro model.\n"
|
|
@@ -12210,7 +12217,7 @@ var createTranscriptResponse = zod3.object({
|
|
|
12210
12217
|
"Customize how words are spelled and formatted using to and from values. See [Custom Spelling](https://www.assemblyai.com/docs/pre-recorded-audio/correct-spelling-of-terms) for more details."
|
|
12211
12218
|
),
|
|
12212
12219
|
disfluencies: zod3.boolean().nullish().describe(
|
|
12213
|
-
'Transcribe [Filler Words](https://www.assemblyai.com/docs/pre-recorded-audio/include-filler-words), like "umm", in your media file; can be true or false'
|
|
12220
|
+
'Transcribe [Filler Words](https://www.assemblyai.com/docs/pre-recorded-audio/include-filler-words), like "umm", in your media file; can be true or false. Supported on Universal-3 Pro and Universal-2.'
|
|
12214
12221
|
),
|
|
12215
12222
|
domain: zod3.string().nullish().describe(
|
|
12216
12223
|
'The domain-specific model applied to the transcript. When set to `"medical-v1"`, [Medical Mode](https://www.assemblyai.com/docs/pre-recorded-audio/medical-mode) was used to improve accuracy for medical terminology.\n'
|
|
@@ -12233,12 +12240,20 @@ var createTranscriptResponse = zod3.object({
|
|
|
12233
12240
|
"email_address",
|
|
12234
12241
|
"event",
|
|
12235
12242
|
"filename",
|
|
12243
|
+
"gender",
|
|
12236
12244
|
"gender_sexuality",
|
|
12237
12245
|
"healthcare_number",
|
|
12238
12246
|
"injury",
|
|
12239
12247
|
"ip_address",
|
|
12240
12248
|
"language",
|
|
12241
12249
|
"location",
|
|
12250
|
+
"location_address",
|
|
12251
|
+
"location_address_street",
|
|
12252
|
+
"location_city",
|
|
12253
|
+
"location_coordinate",
|
|
12254
|
+
"location_country",
|
|
12255
|
+
"location_state",
|
|
12256
|
+
"location_zip",
|
|
12242
12257
|
"marital_status",
|
|
12243
12258
|
"medical_condition",
|
|
12244
12259
|
"medical_process",
|
|
@@ -12247,6 +12262,7 @@ var createTranscriptResponse = zod3.object({
|
|
|
12247
12262
|
"number_sequence",
|
|
12248
12263
|
"occupation",
|
|
12249
12264
|
"organization",
|
|
12265
|
+
"organization_medical_facility",
|
|
12250
12266
|
"passport_number",
|
|
12251
12267
|
"password",
|
|
12252
12268
|
"person_age",
|
|
@@ -12255,6 +12271,7 @@ var createTranscriptResponse = zod3.object({
|
|
|
12255
12271
|
"physical_attribute",
|
|
12256
12272
|
"political_affiliation",
|
|
12257
12273
|
"religion",
|
|
12274
|
+
"sexuality",
|
|
12258
12275
|
"statistics",
|
|
12259
12276
|
"time",
|
|
12260
12277
|
"url",
|
|
@@ -12559,6 +12576,24 @@ var createTranscriptResponse = zod3.object({
|
|
|
12559
12576
|
}).optional().describe(
|
|
12560
12577
|
"Specify options for [Automatic Language Detection](https://www.assemblyai.com/docs/pre-recorded-audio/language-detection)."
|
|
12561
12578
|
),
|
|
12579
|
+
metadata: zod3.object({
|
|
12580
|
+
domain_used: zod3.string().nullish().describe(
|
|
12581
|
+
'The domain-specific model that was applied to the transcription (for example, `"medical-v1"` when [Medical Mode](https://www.assemblyai.com/docs/pre-recorded-audio/medical-mode) was applied), or `null` if no domain-specific model was used. Always present when `metadata` is present.\n'
|
|
12582
|
+
),
|
|
12583
|
+
warnings: zod3.array(
|
|
12584
|
+
zod3.object({
|
|
12585
|
+
message: zod3.string().describe("A human-readable description of the warning.")
|
|
12586
|
+
}).describe(
|
|
12587
|
+
"A warning message emitted while processing a transcription request. Warnings are surfaced on the transcript response under `metadata.warnings`.\n"
|
|
12588
|
+
)
|
|
12589
|
+
).optional().describe(
|
|
12590
|
+
"Warning messages emitted while processing the request. Each warning is an object with a human-readable `message`. When there are no warnings to report, this field is omitted from the `metadata` object entirely.\n"
|
|
12591
|
+
)
|
|
12592
|
+
}).describe(
|
|
12593
|
+
"Additional metadata about the transcription returned on the `Transcript` object under `metadata`. Only present when there is information to report \u2014 when all of its fields would be empty, the `metadata` object is omitted from the response entirely.\n"
|
|
12594
|
+
).or(zod3.null()).optional().describe(
|
|
12595
|
+
"Additional metadata about the transcription, including any warnings emitted while processing the request. Only present when there is information to report; if no fields would be populated, `metadata` is omitted from the response entirely.\n"
|
|
12596
|
+
),
|
|
12562
12597
|
multichannel: zod3.boolean().nullish().describe(
|
|
12563
12598
|
"Whether [Multichannel transcription](https://www.assemblyai.com/docs/pre-recorded-audio/transcribe-multiple-audio-channels) was enabled in the transcription request, either true or false"
|
|
12564
12599
|
),
|
|
@@ -12606,12 +12641,20 @@ var createTranscriptResponse = zod3.object({
|
|
|
12606
12641
|
"email_address",
|
|
12607
12642
|
"event",
|
|
12608
12643
|
"filename",
|
|
12644
|
+
"gender",
|
|
12609
12645
|
"gender_sexuality",
|
|
12610
12646
|
"healthcare_number",
|
|
12611
12647
|
"injury",
|
|
12612
12648
|
"ip_address",
|
|
12613
12649
|
"language",
|
|
12614
12650
|
"location",
|
|
12651
|
+
"location_address",
|
|
12652
|
+
"location_address_street",
|
|
12653
|
+
"location_city",
|
|
12654
|
+
"location_coordinate",
|
|
12655
|
+
"location_country",
|
|
12656
|
+
"location_state",
|
|
12657
|
+
"location_zip",
|
|
12615
12658
|
"marital_status",
|
|
12616
12659
|
"medical_condition",
|
|
12617
12660
|
"medical_process",
|
|
@@ -12620,6 +12663,7 @@ var createTranscriptResponse = zod3.object({
|
|
|
12620
12663
|
"number_sequence",
|
|
12621
12664
|
"occupation",
|
|
12622
12665
|
"organization",
|
|
12666
|
+
"organization_medical_facility",
|
|
12623
12667
|
"passport_number",
|
|
12624
12668
|
"password",
|
|
12625
12669
|
"person_age",
|
|
@@ -12628,6 +12672,7 @@ var createTranscriptResponse = zod3.object({
|
|
|
12628
12672
|
"physical_attribute",
|
|
12629
12673
|
"political_affiliation",
|
|
12630
12674
|
"religion",
|
|
12675
|
+
"sexuality",
|
|
12631
12676
|
"statistics",
|
|
12632
12677
|
"time",
|
|
12633
12678
|
"url",
|
|
@@ -12635,12 +12680,17 @@ var createTranscriptResponse = zod3.object({
|
|
|
12635
12680
|
"username",
|
|
12636
12681
|
"vehicle_id",
|
|
12637
12682
|
"zodiac_sign"
|
|
12638
|
-
]).describe(
|
|
12683
|
+
]).describe(
|
|
12684
|
+
"The type of PII to redact. See [PII redaction](https://www.assemblyai.com/docs/streaming/pii-redaction) for the full list of policies and their descriptions."
|
|
12685
|
+
)
|
|
12639
12686
|
).nullish().describe(
|
|
12640
12687
|
"The list of PII Redaction policies that were enabled, if PII Redaction is enabled.\nSee [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
|
|
12641
12688
|
),
|
|
12642
12689
|
redact_pii_sub: zod3.enum(["entity_name", "hash"]).optional().describe(
|
|
12643
|
-
"The replacement logic for detected PII, can be `entity_name` or `hash`. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more details."
|
|
12690
|
+
"The replacement logic for detected PII, can be `entity_name` or `hash`. See [PII redaction](https://www.assemblyai.com/docs/streaming/pii-redaction) for more details."
|
|
12691
|
+
),
|
|
12692
|
+
redact_pii_return_unredacted: zod3.boolean().nullish().describe(
|
|
12693
|
+
"Whether the original unredacted transcript was also returned alongside the redacted one. When `true`, the response includes `unredacted_text`, `unredacted_words`, and `unredacted_utterances`. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
|
|
12644
12694
|
),
|
|
12645
12695
|
sentiment_analysis: zod3.boolean().nullish().describe(
|
|
12646
12696
|
"Whether [Sentiment Analysis](https://www.assemblyai.com/docs/speech-understanding/analyze-sentiment-of-speech) is enabled, can be true or false"
|
|
@@ -12777,20 +12827,23 @@ var createTranscriptResponse = zod3.object({
|
|
|
12777
12827
|
"The generated summary of the media file, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details."
|
|
12778
12828
|
),
|
|
12779
12829
|
summary_model: zod3.string().nullish().describe(
|
|
12780
|
-
"The Summarization model used to generate the summary,\nif [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts
|
|
12830
|
+
"The Summarization model used to generate the summary,\nif [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details.\n"
|
|
12781
12831
|
),
|
|
12782
12832
|
summary_type: zod3.string().nullish().describe(
|
|
12783
|
-
"The type of summary generated, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts
|
|
12833
|
+
"The type of summary generated, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details."
|
|
12784
12834
|
),
|
|
12785
|
-
remove_audio_tags: zod3.enum(["all"]).describe(
|
|
12786
|
-
|
|
12835
|
+
remove_audio_tags: zod3.enum(["all", "speaker"]).describe(
|
|
12836
|
+
'Universal-3 Pro generates rich transcripts that can include inline annotations such as audio event markers and speaker cues. Set to `"all"` to remove all inline annotations, or `"speaker"` to remove only speaker cues while keeping other annotations.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n'
|
|
12787
12837
|
).or(zod3.null()).optional().describe(
|
|
12788
|
-
|
|
12838
|
+
'Universal-3 Pro generates rich transcripts that can include inline annotations such as audio event markers and speaker cues. Set to `"all"` to remove all inline annotations, or `"speaker"` to remove only speaker cues while keeping other annotations.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n'
|
|
12789
12839
|
),
|
|
12790
12840
|
temperature: zod3.number().nullish().describe(
|
|
12791
12841
|
"The temperature that was used for the model's response. See the [Prompting Guide](https://www.assemblyai.com/docs/pre-recorded-audio/prompting) for more details.\n\nNote: This parameter can only be used with the Universal-3 Pro model.\n"
|
|
12792
12842
|
),
|
|
12793
12843
|
text: zod3.string().nullish().describe("The textual transcript of your media file"),
|
|
12844
|
+
unredacted_text: zod3.string().nullish().describe(
|
|
12845
|
+
"The original textual transcript of your media file before PII redaction was applied. Only returned when `redact_pii_return_unredacted` was set to `true` on the transcription request, otherwise this field is omitted and the `text` field remains fully redacted. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
|
|
12846
|
+
),
|
|
12794
12847
|
throttled: zod3.boolean().nullish().describe(
|
|
12795
12848
|
"True while a request is throttled and false when a request is no longer throttled"
|
|
12796
12849
|
),
|
|
@@ -12827,6 +12880,39 @@ var createTranscriptResponse = zod3.object({
|
|
|
12827
12880
|
).nullish().describe(
|
|
12828
12881
|
"When multichannel or speaker_labels is enabled, a list of turn-by-turn utterance objects.\nSee [Speaker diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) and [Multichannel transcription](https://www.assemblyai.com/docs/pre-recorded-audio/transcribe-multiple-audio-channels) for more information.\n"
|
|
12829
12882
|
),
|
|
12883
|
+
unredacted_utterances: zod3.array(
|
|
12884
|
+
zod3.object({
|
|
12885
|
+
confidence: zod3.number().describe("The confidence score for the transcript of this utterance"),
|
|
12886
|
+
start: zod3.number().describe("The starting time, in milliseconds, of the utterance in the audio file"),
|
|
12887
|
+
end: zod3.number().describe("The ending time, in milliseconds, of the utterance in the audio file"),
|
|
12888
|
+
text: zod3.string().describe("The text for this utterance"),
|
|
12889
|
+
words: zod3.array(
|
|
12890
|
+
zod3.object({
|
|
12891
|
+
confidence: zod3.number().describe("The confidence score for the transcript of this word"),
|
|
12892
|
+
start: zod3.number().describe("The starting time, in milliseconds, for the word"),
|
|
12893
|
+
end: zod3.number().describe("The ending time, in milliseconds, for the word"),
|
|
12894
|
+
text: zod3.string().describe("The text of the word"),
|
|
12895
|
+
channel: zod3.string().nullish().describe(
|
|
12896
|
+
"The channel of the word. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
|
|
12897
|
+
),
|
|
12898
|
+
speaker: zod3.string().nullable().describe(
|
|
12899
|
+
"The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) is enabled, else null"
|
|
12900
|
+
)
|
|
12901
|
+
})
|
|
12902
|
+
).describe("The words in the utterance."),
|
|
12903
|
+
channel: zod3.string().nullish().describe(
|
|
12904
|
+
"The channel of this utterance. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
|
|
12905
|
+
),
|
|
12906
|
+
speaker: zod3.string().describe(
|
|
12907
|
+
'The speaker of this utterance, where each speaker is assigned a sequential capital letter - e.g. "A" for Speaker A, "B" for Speaker B, etc.'
|
|
12908
|
+
),
|
|
12909
|
+
translated_texts: zod3.record(zod3.string(), zod3.string()).optional().describe(
|
|
12910
|
+
'Translations keyed by language code (e.g., `{"es": "Texto traducido", "de": "\xDCbersetzter Text"}`). Only present when `match_original_utterance` is enabled with translation.'
|
|
12911
|
+
)
|
|
12912
|
+
})
|
|
12913
|
+
).nullish().describe(
|
|
12914
|
+
"The original turn-by-turn utterance objects before PII redaction was applied. Same shape as `utterances`. Only returned when `redact_pii_return_unredacted` was set to `true` on the transcription request, otherwise this field is omitted and the `utterances` field remains fully redacted. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
|
|
12915
|
+
),
|
|
12830
12916
|
webhook_auth: zod3.boolean().describe(
|
|
12831
12917
|
"Whether [webhook](https://www.assemblyai.com/docs/deployment/webhooks-for-pre-recorded-audio) authentication details were provided"
|
|
12832
12918
|
),
|
|
@@ -12855,6 +12941,22 @@ var createTranscriptResponse = zod3.object({
|
|
|
12855
12941
|
).nullish().describe(
|
|
12856
12942
|
"An array of temporally-sequential word objects, one for each word in the transcript.\n"
|
|
12857
12943
|
),
|
|
12944
|
+
unredacted_words: zod3.array(
|
|
12945
|
+
zod3.object({
|
|
12946
|
+
confidence: zod3.number().describe("The confidence score for the transcript of this word"),
|
|
12947
|
+
start: zod3.number().describe("The starting time, in milliseconds, for the word"),
|
|
12948
|
+
end: zod3.number().describe("The ending time, in milliseconds, for the word"),
|
|
12949
|
+
text: zod3.string().describe("The text of the word"),
|
|
12950
|
+
channel: zod3.string().nullish().describe(
|
|
12951
|
+
"The channel of the word. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
|
|
12952
|
+
),
|
|
12953
|
+
speaker: zod3.string().nullable().describe(
|
|
12954
|
+
"The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) is enabled, else null"
|
|
12955
|
+
)
|
|
12956
|
+
})
|
|
12957
|
+
).nullish().describe(
|
|
12958
|
+
"The original temporally-sequential word objects before PII redaction was applied. Same shape as `words`. Only returned when `redact_pii_return_unredacted` was set to `true` on the transcription request, otherwise this field is omitted and the `words` field remains fully redacted. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
|
|
12959
|
+
),
|
|
12858
12960
|
acoustic_model: zod3.string().describe("This parameter does not currently have any functionality attached to it."),
|
|
12859
12961
|
custom_topics: zod3.boolean().nullish().describe("This parameter does not currently have any functionality attached to it."),
|
|
12860
12962
|
language_model: zod3.string().describe("This parameter does not currently have any functionality attached to it."),
|
|
@@ -13030,7 +13132,7 @@ var getTranscriptResponse = zod3.object({
|
|
|
13030
13132
|
"Customize how words are spelled and formatted using to and from values. See [Custom Spelling](https://www.assemblyai.com/docs/pre-recorded-audio/correct-spelling-of-terms) for more details."
|
|
13031
13133
|
),
|
|
13032
13134
|
disfluencies: zod3.boolean().nullish().describe(
|
|
13033
|
-
'Transcribe [Filler Words](https://www.assemblyai.com/docs/pre-recorded-audio/include-filler-words), like "umm", in your media file; can be true or false'
|
|
13135
|
+
'Transcribe [Filler Words](https://www.assemblyai.com/docs/pre-recorded-audio/include-filler-words), like "umm", in your media file; can be true or false. Supported on Universal-3 Pro and Universal-2.'
|
|
13034
13136
|
),
|
|
13035
13137
|
domain: zod3.string().nullish().describe(
|
|
13036
13138
|
'The domain-specific model applied to the transcript. When set to `"medical-v1"`, [Medical Mode](https://www.assemblyai.com/docs/pre-recorded-audio/medical-mode) was used to improve accuracy for medical terminology.\n'
|
|
@@ -13053,12 +13155,20 @@ var getTranscriptResponse = zod3.object({
|
|
|
13053
13155
|
"email_address",
|
|
13054
13156
|
"event",
|
|
13055
13157
|
"filename",
|
|
13158
|
+
"gender",
|
|
13056
13159
|
"gender_sexuality",
|
|
13057
13160
|
"healthcare_number",
|
|
13058
13161
|
"injury",
|
|
13059
13162
|
"ip_address",
|
|
13060
13163
|
"language",
|
|
13061
13164
|
"location",
|
|
13165
|
+
"location_address",
|
|
13166
|
+
"location_address_street",
|
|
13167
|
+
"location_city",
|
|
13168
|
+
"location_coordinate",
|
|
13169
|
+
"location_country",
|
|
13170
|
+
"location_state",
|
|
13171
|
+
"location_zip",
|
|
13062
13172
|
"marital_status",
|
|
13063
13173
|
"medical_condition",
|
|
13064
13174
|
"medical_process",
|
|
@@ -13067,6 +13177,7 @@ var getTranscriptResponse = zod3.object({
|
|
|
13067
13177
|
"number_sequence",
|
|
13068
13178
|
"occupation",
|
|
13069
13179
|
"organization",
|
|
13180
|
+
"organization_medical_facility",
|
|
13070
13181
|
"passport_number",
|
|
13071
13182
|
"password",
|
|
13072
13183
|
"person_age",
|
|
@@ -13075,6 +13186,7 @@ var getTranscriptResponse = zod3.object({
|
|
|
13075
13186
|
"physical_attribute",
|
|
13076
13187
|
"political_affiliation",
|
|
13077
13188
|
"religion",
|
|
13189
|
+
"sexuality",
|
|
13078
13190
|
"statistics",
|
|
13079
13191
|
"time",
|
|
13080
13192
|
"url",
|
|
@@ -13379,6 +13491,24 @@ var getTranscriptResponse = zod3.object({
|
|
|
13379
13491
|
}).optional().describe(
|
|
13380
13492
|
"Specify options for [Automatic Language Detection](https://www.assemblyai.com/docs/pre-recorded-audio/language-detection)."
|
|
13381
13493
|
),
|
|
13494
|
+
metadata: zod3.object({
|
|
13495
|
+
domain_used: zod3.string().nullish().describe(
|
|
13496
|
+
'The domain-specific model that was applied to the transcription (for example, `"medical-v1"` when [Medical Mode](https://www.assemblyai.com/docs/pre-recorded-audio/medical-mode) was applied), or `null` if no domain-specific model was used. Always present when `metadata` is present.\n'
|
|
13497
|
+
),
|
|
13498
|
+
warnings: zod3.array(
|
|
13499
|
+
zod3.object({
|
|
13500
|
+
message: zod3.string().describe("A human-readable description of the warning.")
|
|
13501
|
+
}).describe(
|
|
13502
|
+
"A warning message emitted while processing a transcription request. Warnings are surfaced on the transcript response under `metadata.warnings`.\n"
|
|
13503
|
+
)
|
|
13504
|
+
).optional().describe(
|
|
13505
|
+
"Warning messages emitted while processing the request. Each warning is an object with a human-readable `message`. When there are no warnings to report, this field is omitted from the `metadata` object entirely.\n"
|
|
13506
|
+
)
|
|
13507
|
+
}).describe(
|
|
13508
|
+
"Additional metadata about the transcription returned on the `Transcript` object under `metadata`. Only present when there is information to report \u2014 when all of its fields would be empty, the `metadata` object is omitted from the response entirely.\n"
|
|
13509
|
+
).or(zod3.null()).optional().describe(
|
|
13510
|
+
"Additional metadata about the transcription, including any warnings emitted while processing the request. Only present when there is information to report; if no fields would be populated, `metadata` is omitted from the response entirely.\n"
|
|
13511
|
+
),
|
|
13382
13512
|
multichannel: zod3.boolean().nullish().describe(
|
|
13383
13513
|
"Whether [Multichannel transcription](https://www.assemblyai.com/docs/pre-recorded-audio/transcribe-multiple-audio-channels) was enabled in the transcription request, either true or false"
|
|
13384
13514
|
),
|
|
@@ -13426,12 +13556,20 @@ var getTranscriptResponse = zod3.object({
|
|
|
13426
13556
|
"email_address",
|
|
13427
13557
|
"event",
|
|
13428
13558
|
"filename",
|
|
13559
|
+
"gender",
|
|
13429
13560
|
"gender_sexuality",
|
|
13430
13561
|
"healthcare_number",
|
|
13431
13562
|
"injury",
|
|
13432
13563
|
"ip_address",
|
|
13433
13564
|
"language",
|
|
13434
13565
|
"location",
|
|
13566
|
+
"location_address",
|
|
13567
|
+
"location_address_street",
|
|
13568
|
+
"location_city",
|
|
13569
|
+
"location_coordinate",
|
|
13570
|
+
"location_country",
|
|
13571
|
+
"location_state",
|
|
13572
|
+
"location_zip",
|
|
13435
13573
|
"marital_status",
|
|
13436
13574
|
"medical_condition",
|
|
13437
13575
|
"medical_process",
|
|
@@ -13440,6 +13578,7 @@ var getTranscriptResponse = zod3.object({
|
|
|
13440
13578
|
"number_sequence",
|
|
13441
13579
|
"occupation",
|
|
13442
13580
|
"organization",
|
|
13581
|
+
"organization_medical_facility",
|
|
13443
13582
|
"passport_number",
|
|
13444
13583
|
"password",
|
|
13445
13584
|
"person_age",
|
|
@@ -13448,6 +13587,7 @@ var getTranscriptResponse = zod3.object({
|
|
|
13448
13587
|
"physical_attribute",
|
|
13449
13588
|
"political_affiliation",
|
|
13450
13589
|
"religion",
|
|
13590
|
+
"sexuality",
|
|
13451
13591
|
"statistics",
|
|
13452
13592
|
"time",
|
|
13453
13593
|
"url",
|
|
@@ -13455,12 +13595,17 @@ var getTranscriptResponse = zod3.object({
|
|
|
13455
13595
|
"username",
|
|
13456
13596
|
"vehicle_id",
|
|
13457
13597
|
"zodiac_sign"
|
|
13458
|
-
]).describe(
|
|
13598
|
+
]).describe(
|
|
13599
|
+
"The type of PII to redact. See [PII redaction](https://www.assemblyai.com/docs/streaming/pii-redaction) for the full list of policies and their descriptions."
|
|
13600
|
+
)
|
|
13459
13601
|
).nullish().describe(
|
|
13460
13602
|
"The list of PII Redaction policies that were enabled, if PII Redaction is enabled.\nSee [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
|
|
13461
13603
|
),
|
|
13462
13604
|
redact_pii_sub: zod3.enum(["entity_name", "hash"]).optional().describe(
|
|
13463
|
-
"The replacement logic for detected PII, can be `entity_name` or `hash`. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more details."
|
|
13605
|
+
"The replacement logic for detected PII, can be `entity_name` or `hash`. See [PII redaction](https://www.assemblyai.com/docs/streaming/pii-redaction) for more details."
|
|
13606
|
+
),
|
|
13607
|
+
redact_pii_return_unredacted: zod3.boolean().nullish().describe(
|
|
13608
|
+
"Whether the original unredacted transcript was also returned alongside the redacted one. When `true`, the response includes `unredacted_text`, `unredacted_words`, and `unredacted_utterances`. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
|
|
13464
13609
|
),
|
|
13465
13610
|
sentiment_analysis: zod3.boolean().nullish().describe(
|
|
13466
13611
|
"Whether [Sentiment Analysis](https://www.assemblyai.com/docs/speech-understanding/analyze-sentiment-of-speech) is enabled, can be true or false"
|
|
@@ -13597,20 +13742,23 @@ var getTranscriptResponse = zod3.object({
|
|
|
13597
13742
|
"The generated summary of the media file, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details."
|
|
13598
13743
|
),
|
|
13599
13744
|
summary_model: zod3.string().nullish().describe(
|
|
13600
|
-
"The Summarization model used to generate the summary,\nif [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts
|
|
13745
|
+
"The Summarization model used to generate the summary,\nif [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details.\n"
|
|
13601
13746
|
),
|
|
13602
13747
|
summary_type: zod3.string().nullish().describe(
|
|
13603
|
-
"The type of summary generated, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts
|
|
13748
|
+
"The type of summary generated, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details."
|
|
13604
13749
|
),
|
|
13605
|
-
remove_audio_tags: zod3.enum(["all"]).describe(
|
|
13606
|
-
|
|
13750
|
+
remove_audio_tags: zod3.enum(["all", "speaker"]).describe(
|
|
13751
|
+
'Universal-3 Pro generates rich transcripts that can include inline annotations such as audio event markers and speaker cues. Set to `"all"` to remove all inline annotations, or `"speaker"` to remove only speaker cues while keeping other annotations.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n'
|
|
13607
13752
|
).or(zod3.null()).optional().describe(
|
|
13608
|
-
|
|
13753
|
+
'Universal-3 Pro generates rich transcripts that can include inline annotations such as audio event markers and speaker cues. Set to `"all"` to remove all inline annotations, or `"speaker"` to remove only speaker cues while keeping other annotations.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n'
|
|
13609
13754
|
),
|
|
13610
13755
|
temperature: zod3.number().nullish().describe(
|
|
13611
13756
|
"The temperature that was used for the model's response. See the [Prompting Guide](https://www.assemblyai.com/docs/pre-recorded-audio/prompting) for more details.\n\nNote: This parameter can only be used with the Universal-3 Pro model.\n"
|
|
13612
13757
|
),
|
|
13613
13758
|
text: zod3.string().nullish().describe("The textual transcript of your media file"),
|
|
13759
|
+
unredacted_text: zod3.string().nullish().describe(
|
|
13760
|
+
"The original textual transcript of your media file before PII redaction was applied. Only returned when `redact_pii_return_unredacted` was set to `true` on the transcription request, otherwise this field is omitted and the `text` field remains fully redacted. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
|
|
13761
|
+
),
|
|
13614
13762
|
throttled: zod3.boolean().nullish().describe(
|
|
13615
13763
|
"True while a request is throttled and false when a request is no longer throttled"
|
|
13616
13764
|
),
|
|
@@ -13647,6 +13795,39 @@ var getTranscriptResponse = zod3.object({
|
|
|
13647
13795
|
).nullish().describe(
|
|
13648
13796
|
"When multichannel or speaker_labels is enabled, a list of turn-by-turn utterance objects.\nSee [Speaker diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) and [Multichannel transcription](https://www.assemblyai.com/docs/pre-recorded-audio/transcribe-multiple-audio-channels) for more information.\n"
|
|
13649
13797
|
),
|
|
13798
|
+
unredacted_utterances: zod3.array(
|
|
13799
|
+
zod3.object({
|
|
13800
|
+
confidence: zod3.number().describe("The confidence score for the transcript of this utterance"),
|
|
13801
|
+
start: zod3.number().describe("The starting time, in milliseconds, of the utterance in the audio file"),
|
|
13802
|
+
end: zod3.number().describe("The ending time, in milliseconds, of the utterance in the audio file"),
|
|
13803
|
+
text: zod3.string().describe("The text for this utterance"),
|
|
13804
|
+
words: zod3.array(
|
|
13805
|
+
zod3.object({
|
|
13806
|
+
confidence: zod3.number().describe("The confidence score for the transcript of this word"),
|
|
13807
|
+
start: zod3.number().describe("The starting time, in milliseconds, for the word"),
|
|
13808
|
+
end: zod3.number().describe("The ending time, in milliseconds, for the word"),
|
|
13809
|
+
text: zod3.string().describe("The text of the word"),
|
|
13810
|
+
channel: zod3.string().nullish().describe(
|
|
13811
|
+
"The channel of the word. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
|
|
13812
|
+
),
|
|
13813
|
+
speaker: zod3.string().nullable().describe(
|
|
13814
|
+
"The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) is enabled, else null"
|
|
13815
|
+
)
|
|
13816
|
+
})
|
|
13817
|
+
).describe("The words in the utterance."),
|
|
13818
|
+
channel: zod3.string().nullish().describe(
|
|
13819
|
+
"The channel of this utterance. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
|
|
13820
|
+
),
|
|
13821
|
+
speaker: zod3.string().describe(
|
|
13822
|
+
'The speaker of this utterance, where each speaker is assigned a sequential capital letter - e.g. "A" for Speaker A, "B" for Speaker B, etc.'
|
|
13823
|
+
),
|
|
13824
|
+
translated_texts: zod3.record(zod3.string(), zod3.string()).optional().describe(
|
|
13825
|
+
'Translations keyed by language code (e.g., `{"es": "Texto traducido", "de": "\xDCbersetzter Text"}`). Only present when `match_original_utterance` is enabled with translation.'
|
|
13826
|
+
)
|
|
13827
|
+
})
|
|
13828
|
+
).nullish().describe(
|
|
13829
|
+
"The original turn-by-turn utterance objects before PII redaction was applied. Same shape as `utterances`. Only returned when `redact_pii_return_unredacted` was set to `true` on the transcription request, otherwise this field is omitted and the `utterances` field remains fully redacted. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
|
|
13830
|
+
),
|
|
13650
13831
|
webhook_auth: zod3.boolean().describe(
|
|
13651
13832
|
"Whether [webhook](https://www.assemblyai.com/docs/deployment/webhooks-for-pre-recorded-audio) authentication details were provided"
|
|
13652
13833
|
),
|
|
@@ -13675,6 +13856,22 @@ var getTranscriptResponse = zod3.object({
|
|
|
13675
13856
|
).nullish().describe(
|
|
13676
13857
|
"An array of temporally-sequential word objects, one for each word in the transcript.\n"
|
|
13677
13858
|
),
|
|
13859
|
+
unredacted_words: zod3.array(
|
|
13860
|
+
zod3.object({
|
|
13861
|
+
confidence: zod3.number().describe("The confidence score for the transcript of this word"),
|
|
13862
|
+
start: zod3.number().describe("The starting time, in milliseconds, for the word"),
|
|
13863
|
+
end: zod3.number().describe("The ending time, in milliseconds, for the word"),
|
|
13864
|
+
text: zod3.string().describe("The text of the word"),
|
|
13865
|
+
channel: zod3.string().nullish().describe(
|
|
13866
|
+
"The channel of the word. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
|
|
13867
|
+
),
|
|
13868
|
+
speaker: zod3.string().nullable().describe(
|
|
13869
|
+
"The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) is enabled, else null"
|
|
13870
|
+
)
|
|
13871
|
+
})
|
|
13872
|
+
).nullish().describe(
|
|
13873
|
+
"The original temporally-sequential word objects before PII redaction was applied. Same shape as `words`. Only returned when `redact_pii_return_unredacted` was set to `true` on the transcription request, otherwise this field is omitted and the `words` field remains fully redacted. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
|
|
13874
|
+
),
|
|
13678
13875
|
acoustic_model: zod3.string().describe("This parameter does not currently have any functionality attached to it."),
|
|
13679
13876
|
custom_topics: zod3.boolean().nullish().describe("This parameter does not currently have any functionality attached to it."),
|
|
13680
13877
|
language_model: zod3.string().describe("This parameter does not currently have any functionality attached to it."),
|
|
@@ -13810,7 +14007,7 @@ var deleteTranscriptResponse = zod3.object({
|
|
|
13810
14007
|
"Customize how words are spelled and formatted using to and from values. See [Custom Spelling](https://www.assemblyai.com/docs/pre-recorded-audio/correct-spelling-of-terms) for more details."
|
|
13811
14008
|
),
|
|
13812
14009
|
disfluencies: zod3.boolean().nullish().describe(
|
|
13813
|
-
'Transcribe [Filler Words](https://www.assemblyai.com/docs/pre-recorded-audio/include-filler-words), like "umm", in your media file; can be true or false'
|
|
14010
|
+
'Transcribe [Filler Words](https://www.assemblyai.com/docs/pre-recorded-audio/include-filler-words), like "umm", in your media file; can be true or false. Supported on Universal-3 Pro and Universal-2.'
|
|
13814
14011
|
),
|
|
13815
14012
|
domain: zod3.string().nullish().describe(
|
|
13816
14013
|
'The domain-specific model applied to the transcript. When set to `"medical-v1"`, [Medical Mode](https://www.assemblyai.com/docs/pre-recorded-audio/medical-mode) was used to improve accuracy for medical terminology.\n'
|
|
@@ -13833,12 +14030,20 @@ var deleteTranscriptResponse = zod3.object({
|
|
|
13833
14030
|
"email_address",
|
|
13834
14031
|
"event",
|
|
13835
14032
|
"filename",
|
|
14033
|
+
"gender",
|
|
13836
14034
|
"gender_sexuality",
|
|
13837
14035
|
"healthcare_number",
|
|
13838
14036
|
"injury",
|
|
13839
14037
|
"ip_address",
|
|
13840
14038
|
"language",
|
|
13841
14039
|
"location",
|
|
14040
|
+
"location_address",
|
|
14041
|
+
"location_address_street",
|
|
14042
|
+
"location_city",
|
|
14043
|
+
"location_coordinate",
|
|
14044
|
+
"location_country",
|
|
14045
|
+
"location_state",
|
|
14046
|
+
"location_zip",
|
|
13842
14047
|
"marital_status",
|
|
13843
14048
|
"medical_condition",
|
|
13844
14049
|
"medical_process",
|
|
@@ -13847,6 +14052,7 @@ var deleteTranscriptResponse = zod3.object({
|
|
|
13847
14052
|
"number_sequence",
|
|
13848
14053
|
"occupation",
|
|
13849
14054
|
"organization",
|
|
14055
|
+
"organization_medical_facility",
|
|
13850
14056
|
"passport_number",
|
|
13851
14057
|
"password",
|
|
13852
14058
|
"person_age",
|
|
@@ -13855,6 +14061,7 @@ var deleteTranscriptResponse = zod3.object({
|
|
|
13855
14061
|
"physical_attribute",
|
|
13856
14062
|
"political_affiliation",
|
|
13857
14063
|
"religion",
|
|
14064
|
+
"sexuality",
|
|
13858
14065
|
"statistics",
|
|
13859
14066
|
"time",
|
|
13860
14067
|
"url",
|
|
@@ -14159,6 +14366,24 @@ var deleteTranscriptResponse = zod3.object({
|
|
|
14159
14366
|
}).optional().describe(
|
|
14160
14367
|
"Specify options for [Automatic Language Detection](https://www.assemblyai.com/docs/pre-recorded-audio/language-detection)."
|
|
14161
14368
|
),
|
|
14369
|
+
metadata: zod3.object({
|
|
14370
|
+
domain_used: zod3.string().nullish().describe(
|
|
14371
|
+
'The domain-specific model that was applied to the transcription (for example, `"medical-v1"` when [Medical Mode](https://www.assemblyai.com/docs/pre-recorded-audio/medical-mode) was applied), or `null` if no domain-specific model was used. Always present when `metadata` is present.\n'
|
|
14372
|
+
),
|
|
14373
|
+
warnings: zod3.array(
|
|
14374
|
+
zod3.object({
|
|
14375
|
+
message: zod3.string().describe("A human-readable description of the warning.")
|
|
14376
|
+
}).describe(
|
|
14377
|
+
"A warning message emitted while processing a transcription request. Warnings are surfaced on the transcript response under `metadata.warnings`.\n"
|
|
14378
|
+
)
|
|
14379
|
+
).optional().describe(
|
|
14380
|
+
"Warning messages emitted while processing the request. Each warning is an object with a human-readable `message`. When there are no warnings to report, this field is omitted from the `metadata` object entirely.\n"
|
|
14381
|
+
)
|
|
14382
|
+
}).describe(
|
|
14383
|
+
"Additional metadata about the transcription returned on the `Transcript` object under `metadata`. Only present when there is information to report \u2014 when all of its fields would be empty, the `metadata` object is omitted from the response entirely.\n"
|
|
14384
|
+
).or(zod3.null()).optional().describe(
|
|
14385
|
+
"Additional metadata about the transcription, including any warnings emitted while processing the request. Only present when there is information to report; if no fields would be populated, `metadata` is omitted from the response entirely.\n"
|
|
14386
|
+
),
|
|
14162
14387
|
multichannel: zod3.boolean().nullish().describe(
|
|
14163
14388
|
"Whether [Multichannel transcription](https://www.assemblyai.com/docs/pre-recorded-audio/transcribe-multiple-audio-channels) was enabled in the transcription request, either true or false"
|
|
14164
14389
|
),
|
|
@@ -14206,12 +14431,20 @@ var deleteTranscriptResponse = zod3.object({
|
|
|
14206
14431
|
"email_address",
|
|
14207
14432
|
"event",
|
|
14208
14433
|
"filename",
|
|
14434
|
+
"gender",
|
|
14209
14435
|
"gender_sexuality",
|
|
14210
14436
|
"healthcare_number",
|
|
14211
14437
|
"injury",
|
|
14212
14438
|
"ip_address",
|
|
14213
14439
|
"language",
|
|
14214
14440
|
"location",
|
|
14441
|
+
"location_address",
|
|
14442
|
+
"location_address_street",
|
|
14443
|
+
"location_city",
|
|
14444
|
+
"location_coordinate",
|
|
14445
|
+
"location_country",
|
|
14446
|
+
"location_state",
|
|
14447
|
+
"location_zip",
|
|
14215
14448
|
"marital_status",
|
|
14216
14449
|
"medical_condition",
|
|
14217
14450
|
"medical_process",
|
|
@@ -14220,6 +14453,7 @@ var deleteTranscriptResponse = zod3.object({
|
|
|
14220
14453
|
"number_sequence",
|
|
14221
14454
|
"occupation",
|
|
14222
14455
|
"organization",
|
|
14456
|
+
"organization_medical_facility",
|
|
14223
14457
|
"passport_number",
|
|
14224
14458
|
"password",
|
|
14225
14459
|
"person_age",
|
|
@@ -14228,6 +14462,7 @@ var deleteTranscriptResponse = zod3.object({
|
|
|
14228
14462
|
"physical_attribute",
|
|
14229
14463
|
"political_affiliation",
|
|
14230
14464
|
"religion",
|
|
14465
|
+
"sexuality",
|
|
14231
14466
|
"statistics",
|
|
14232
14467
|
"time",
|
|
14233
14468
|
"url",
|
|
@@ -14235,12 +14470,17 @@ var deleteTranscriptResponse = zod3.object({
|
|
|
14235
14470
|
"username",
|
|
14236
14471
|
"vehicle_id",
|
|
14237
14472
|
"zodiac_sign"
|
|
14238
|
-
]).describe(
|
|
14473
|
+
]).describe(
|
|
14474
|
+
"The type of PII to redact. See [PII redaction](https://www.assemblyai.com/docs/streaming/pii-redaction) for the full list of policies and their descriptions."
|
|
14475
|
+
)
|
|
14239
14476
|
).nullish().describe(
|
|
14240
14477
|
"The list of PII Redaction policies that were enabled, if PII Redaction is enabled.\nSee [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
|
|
14241
14478
|
),
|
|
14242
14479
|
redact_pii_sub: zod3.enum(["entity_name", "hash"]).optional().describe(
|
|
14243
|
-
"The replacement logic for detected PII, can be `entity_name` or `hash`. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more details."
|
|
14480
|
+
"The replacement logic for detected PII, can be `entity_name` or `hash`. See [PII redaction](https://www.assemblyai.com/docs/streaming/pii-redaction) for more details."
|
|
14481
|
+
),
|
|
14482
|
+
redact_pii_return_unredacted: zod3.boolean().nullish().describe(
|
|
14483
|
+
"Whether the original unredacted transcript was also returned alongside the redacted one. When `true`, the response includes `unredacted_text`, `unredacted_words`, and `unredacted_utterances`. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
|
|
14244
14484
|
),
|
|
14245
14485
|
sentiment_analysis: zod3.boolean().nullish().describe(
|
|
14246
14486
|
"Whether [Sentiment Analysis](https://www.assemblyai.com/docs/speech-understanding/analyze-sentiment-of-speech) is enabled, can be true or false"
|
|
@@ -14377,20 +14617,23 @@ var deleteTranscriptResponse = zod3.object({
|
|
|
14377
14617
|
"The generated summary of the media file, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details."
|
|
14378
14618
|
),
|
|
14379
14619
|
summary_model: zod3.string().nullish().describe(
|
|
14380
|
-
"The Summarization model used to generate the summary,\nif [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts
|
|
14620
|
+
"The Summarization model used to generate the summary,\nif [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details.\n"
|
|
14381
14621
|
),
|
|
14382
14622
|
summary_type: zod3.string().nullish().describe(
|
|
14383
|
-
"The type of summary generated, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts
|
|
14623
|
+
"The type of summary generated, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details."
|
|
14384
14624
|
),
|
|
14385
|
-
remove_audio_tags: zod3.enum(["all"]).describe(
|
|
14386
|
-
|
|
14625
|
+
remove_audio_tags: zod3.enum(["all", "speaker"]).describe(
|
|
14626
|
+
'Universal-3 Pro generates rich transcripts that can include inline annotations such as audio event markers and speaker cues. Set to `"all"` to remove all inline annotations, or `"speaker"` to remove only speaker cues while keeping other annotations.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n'
|
|
14387
14627
|
).or(zod3.null()).optional().describe(
|
|
14388
|
-
|
|
14628
|
+
'Universal-3 Pro generates rich transcripts that can include inline annotations such as audio event markers and speaker cues. Set to `"all"` to remove all inline annotations, or `"speaker"` to remove only speaker cues while keeping other annotations.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n'
|
|
14389
14629
|
),
|
|
14390
14630
|
temperature: zod3.number().nullish().describe(
|
|
14391
14631
|
"The temperature that was used for the model's response. See the [Prompting Guide](https://www.assemblyai.com/docs/pre-recorded-audio/prompting) for more details.\n\nNote: This parameter can only be used with the Universal-3 Pro model.\n"
|
|
14392
14632
|
),
|
|
14393
14633
|
text: zod3.string().nullish().describe("The textual transcript of your media file"),
|
|
14634
|
+
unredacted_text: zod3.string().nullish().describe(
|
|
14635
|
+
"The original textual transcript of your media file before PII redaction was applied. Only returned when `redact_pii_return_unredacted` was set to `true` on the transcription request, otherwise this field is omitted and the `text` field remains fully redacted. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
|
|
14636
|
+
),
|
|
14394
14637
|
throttled: zod3.boolean().nullish().describe(
|
|
14395
14638
|
"True while a request is throttled and false when a request is no longer throttled"
|
|
14396
14639
|
),
|
|
@@ -14427,6 +14670,39 @@ var deleteTranscriptResponse = zod3.object({
|
|
|
14427
14670
|
).nullish().describe(
|
|
14428
14671
|
"When multichannel or speaker_labels is enabled, a list of turn-by-turn utterance objects.\nSee [Speaker diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) and [Multichannel transcription](https://www.assemblyai.com/docs/pre-recorded-audio/transcribe-multiple-audio-channels) for more information.\n"
|
|
14429
14672
|
),
|
|
14673
|
+
unredacted_utterances: zod3.array(
|
|
14674
|
+
zod3.object({
|
|
14675
|
+
confidence: zod3.number().describe("The confidence score for the transcript of this utterance"),
|
|
14676
|
+
start: zod3.number().describe("The starting time, in milliseconds, of the utterance in the audio file"),
|
|
14677
|
+
end: zod3.number().describe("The ending time, in milliseconds, of the utterance in the audio file"),
|
|
14678
|
+
text: zod3.string().describe("The text for this utterance"),
|
|
14679
|
+
words: zod3.array(
|
|
14680
|
+
zod3.object({
|
|
14681
|
+
confidence: zod3.number().describe("The confidence score for the transcript of this word"),
|
|
14682
|
+
start: zod3.number().describe("The starting time, in milliseconds, for the word"),
|
|
14683
|
+
end: zod3.number().describe("The ending time, in milliseconds, for the word"),
|
|
14684
|
+
text: zod3.string().describe("The text of the word"),
|
|
14685
|
+
channel: zod3.string().nullish().describe(
|
|
14686
|
+
"The channel of the word. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
|
|
14687
|
+
),
|
|
14688
|
+
speaker: zod3.string().nullable().describe(
|
|
14689
|
+
"The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) is enabled, else null"
|
|
14690
|
+
)
|
|
14691
|
+
})
|
|
14692
|
+
).describe("The words in the utterance."),
|
|
14693
|
+
channel: zod3.string().nullish().describe(
|
|
14694
|
+
"The channel of this utterance. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
|
|
14695
|
+
),
|
|
14696
|
+
speaker: zod3.string().describe(
|
|
14697
|
+
'The speaker of this utterance, where each speaker is assigned a sequential capital letter - e.g. "A" for Speaker A, "B" for Speaker B, etc.'
|
|
14698
|
+
),
|
|
14699
|
+
translated_texts: zod3.record(zod3.string(), zod3.string()).optional().describe(
|
|
14700
|
+
'Translations keyed by language code (e.g., `{"es": "Texto traducido", "de": "\xDCbersetzter Text"}`). Only present when `match_original_utterance` is enabled with translation.'
|
|
14701
|
+
)
|
|
14702
|
+
})
|
|
14703
|
+
).nullish().describe(
|
|
14704
|
+
"The original turn-by-turn utterance objects before PII redaction was applied. Same shape as `utterances`. Only returned when `redact_pii_return_unredacted` was set to `true` on the transcription request, otherwise this field is omitted and the `utterances` field remains fully redacted. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
|
|
14705
|
+
),
|
|
14430
14706
|
webhook_auth: zod3.boolean().describe(
|
|
14431
14707
|
"Whether [webhook](https://www.assemblyai.com/docs/deployment/webhooks-for-pre-recorded-audio) authentication details were provided"
|
|
14432
14708
|
),
|
|
@@ -14455,6 +14731,22 @@ var deleteTranscriptResponse = zod3.object({
|
|
|
14455
14731
|
).nullish().describe(
|
|
14456
14732
|
"An array of temporally-sequential word objects, one for each word in the transcript.\n"
|
|
14457
14733
|
),
|
|
14734
|
+
unredacted_words: zod3.array(
|
|
14735
|
+
zod3.object({
|
|
14736
|
+
confidence: zod3.number().describe("The confidence score for the transcript of this word"),
|
|
14737
|
+
start: zod3.number().describe("The starting time, in milliseconds, for the word"),
|
|
14738
|
+
end: zod3.number().describe("The ending time, in milliseconds, for the word"),
|
|
14739
|
+
text: zod3.string().describe("The text of the word"),
|
|
14740
|
+
channel: zod3.string().nullish().describe(
|
|
14741
|
+
"The channel of the word. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
|
|
14742
|
+
),
|
|
14743
|
+
speaker: zod3.string().nullable().describe(
|
|
14744
|
+
"The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) is enabled, else null"
|
|
14745
|
+
)
|
|
14746
|
+
})
|
|
14747
|
+
).nullish().describe(
|
|
14748
|
+
"The original temporally-sequential word objects before PII redaction was applied. Same shape as `words`. Only returned when `redact_pii_return_unredacted` was set to `true` on the transcription request, otherwise this field is omitted and the `words` field remains fully redacted. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
|
|
14749
|
+
),
|
|
14458
14750
|
acoustic_model: zod3.string().describe("This parameter does not currently have any functionality attached to it."),
|
|
14459
14751
|
custom_topics: zod3.boolean().nullish().describe("This parameter does not currently have any functionality attached to it."),
|
|
14460
14752
|
language_model: zod3.string().describe("This parameter does not currently have any functionality attached to it."),
|
|
@@ -14610,7 +14902,21 @@ var streamingTranscriberParams = zod4.object({
|
|
|
14610
14902
|
inactivityTimeout: zod4.number().optional().describe("From SDK v3"),
|
|
14611
14903
|
speakerLabels: zod4.boolean().optional().describe("From SDK v3"),
|
|
14612
14904
|
maxSpeakers: zod4.number().optional().describe("From SDK v3"),
|
|
14613
|
-
|
|
14905
|
+
voiceFocus: zod4.unknown().optional().describe("From SDK v3"),
|
|
14906
|
+
voiceFocusThreshold: zod4.number().optional().describe("From SDK v3"),
|
|
14907
|
+
continuousPartials: zod4.boolean().optional().describe("From SDK v3"),
|
|
14908
|
+
interruptionDelay: zod4.number().optional().describe("From SDK v3"),
|
|
14909
|
+
turnLeftPadMs: zod4.number().optional().describe("From SDK v3"),
|
|
14910
|
+
customerSupportAudioCapture: zod4.boolean().optional().describe("From SDK v3"),
|
|
14911
|
+
includePartialTurns: zod4.boolean().optional().describe("From SDK v3"),
|
|
14912
|
+
redactPii: zod4.boolean().optional().describe("From SDK v3"),
|
|
14913
|
+
redactPiiPolicies: zod4.unknown().optional().describe("From SDK v3"),
|
|
14914
|
+
redactPiiSub: zod4.unknown().optional().describe("From SDK v3"),
|
|
14915
|
+
llmGateway: zod4.unknown().optional().describe("From SDK v3"),
|
|
14916
|
+
webhookUrl: zod4.string().optional().describe("From SDK v3"),
|
|
14917
|
+
webhookAuthHeaderName: zod4.string().optional().describe("From SDK v3"),
|
|
14918
|
+
webhookAuthHeaderValue: zod4.string().optional().describe("From SDK v3"),
|
|
14919
|
+
mode: zod4.unknown().describe("From SDK v3")
|
|
14614
14920
|
});
|
|
14615
14921
|
var streamingUpdateConfigParams = zod4.object({
|
|
14616
14922
|
end_utterance_silence_threshold: zod4.number().min(0).max(2e4).optional().describe("The duration threshold in milliseconds"),
|
|
@@ -14622,7 +14928,9 @@ var streamingUpdateConfigParams = zod4.object({
|
|
|
14622
14928
|
format_turns: zod4.boolean().optional().describe("From SDK v3"),
|
|
14623
14929
|
keyterms_prompt: zod4.array(zod4.string()).optional().describe("From SDK v3"),
|
|
14624
14930
|
prompt: zod4.string().optional().describe("From SDK v3"),
|
|
14625
|
-
filter_profanity: zod4.boolean().optional().describe("From SDK v3")
|
|
14931
|
+
filter_profanity: zod4.boolean().optional().describe("From SDK v3"),
|
|
14932
|
+
interruption_delay: zod4.number().optional().describe("From SDK v3"),
|
|
14933
|
+
turn_left_pad_ms: zod4.number().optional().describe("From SDK v3")
|
|
14626
14934
|
});
|
|
14627
14935
|
|
|
14628
14936
|
// src/generated/gladia/api/gladiaControlAPI.zod.ts
|
|
@@ -15371,7 +15679,7 @@ var preRecordedControllerInitPreRecordedJobV2BodyNamedEntityRecognitionDefault =
|
|
|
15371
15679
|
var preRecordedControllerInitPreRecordedJobV2BodyCustomSpellingDefault = false;
|
|
15372
15680
|
var preRecordedControllerInitPreRecordedJobV2BodySentimentAnalysisDefault = false;
|
|
15373
15681
|
var preRecordedControllerInitPreRecordedJobV2BodyAudioToLlmDefault = false;
|
|
15374
|
-
var preRecordedControllerInitPreRecordedJobV2BodyAudioToLlmConfigModelDefault = "openai/gpt-
|
|
15682
|
+
var preRecordedControllerInitPreRecordedJobV2BodyAudioToLlmConfigModelDefault = "openai/gpt-5.4-nano";
|
|
15375
15683
|
var preRecordedControllerInitPreRecordedJobV2BodyPiiRedactionDefault = false;
|
|
15376
15684
|
var preRecordedControllerInitPreRecordedJobV2BodySentencesDefault = false;
|
|
15377
15685
|
var preRecordedControllerInitPreRecordedJobV2BodyPunctuationEnhancedDefault = false;
|
|
@@ -15660,23 +15968,23 @@ var preRecordedControllerInitPreRecordedJobV2Body = zod5.object({
|
|
|
15660
15968
|
"Forces the translation to use informal language forms when available in the target language."
|
|
15661
15969
|
)
|
|
15662
15970
|
}).optional().describe("**[Beta]** Translation configuration, if `translation` is enabled"),
|
|
15663
|
-
summarization: zod5.boolean().optional().describe("
|
|
15971
|
+
summarization: zod5.boolean().optional().describe("Enable summarization for this audio"),
|
|
15664
15972
|
summarization_config: zod5.object({
|
|
15665
15973
|
type: zod5.enum(["general", "bullet_points", "concise"]).describe("The type of summarization to apply").default(preRecordedControllerInitPreRecordedJobV2BodySummarizationConfigTypeDefault).describe("The type of summarization to apply")
|
|
15666
|
-
}).optional().describe("
|
|
15974
|
+
}).optional().describe("Summarization configuration, if `summarization` is enabled"),
|
|
15667
15975
|
named_entity_recognition: zod5.boolean().optional().describe("**[Alpha]** Enable named entity recognition for this audio"),
|
|
15668
15976
|
custom_spelling: zod5.boolean().optional().describe("**[Alpha]** Enable custom spelling for this audio"),
|
|
15669
15977
|
custom_spelling_config: zod5.object({
|
|
15670
15978
|
spelling_dictionary: zod5.record(zod5.string(), zod5.array(zod5.string())).describe("The list of spelling applied on the audio transcription")
|
|
15671
15979
|
}).optional().describe("**[Alpha]** Custom spelling configuration, if `custom_spelling` is enabled"),
|
|
15672
15980
|
sentiment_analysis: zod5.boolean().optional().describe("Enable sentiment analysis for this audio"),
|
|
15673
|
-
audio_to_llm: zod5.boolean().optional().describe("
|
|
15981
|
+
audio_to_llm: zod5.boolean().optional().describe("Enable audio to LLM processing for this audio"),
|
|
15674
15982
|
audio_to_llm_config: zod5.object({
|
|
15675
15983
|
prompts: zod5.array(zod5.array(zod5.unknown())).min(1).describe("The list of prompts applied on the audio transcription"),
|
|
15676
15984
|
model: zod5.string().default(preRecordedControllerInitPreRecordedJobV2BodyAudioToLlmConfigModelDefault).describe(
|
|
15677
15985
|
"The model to use for the prompt execution. You can find the list of supported models [here](https://openrouter.ai/models)."
|
|
15678
15986
|
)
|
|
15679
|
-
}).optional().describe("
|
|
15987
|
+
}).optional().describe("Audio to LLM configuration, if `audio_to_llm` is enabled"),
|
|
15680
15988
|
pii_redaction: zod5.boolean().optional().describe("Enable PII redaction for this audio"),
|
|
15681
15989
|
pii_redaction_config: zod5.object({
|
|
15682
15990
|
entity_types: zod5.enum([
|
|
@@ -15931,7 +16239,7 @@ var preRecordedControllerGetPreRecordedJobsV2ResponseItemsItemRequestParamsNamed
|
|
|
15931
16239
|
var preRecordedControllerGetPreRecordedJobsV2ResponseItemsItemRequestParamsCustomSpellingDefault = false;
|
|
15932
16240
|
var preRecordedControllerGetPreRecordedJobsV2ResponseItemsItemRequestParamsSentimentAnalysisDefault = false;
|
|
15933
16241
|
var preRecordedControllerGetPreRecordedJobsV2ResponseItemsItemRequestParamsAudioToLlmDefault = false;
|
|
15934
|
-
var preRecordedControllerGetPreRecordedJobsV2ResponseItemsItemRequestParamsAudioToLlmConfigModelDefault = "openai/gpt-
|
|
16242
|
+
var preRecordedControllerGetPreRecordedJobsV2ResponseItemsItemRequestParamsAudioToLlmConfigModelDefault = "openai/gpt-5.4-nano";
|
|
15935
16243
|
var preRecordedControllerGetPreRecordedJobsV2ResponseItemsItemRequestParamsPiiRedactionDefault = false;
|
|
15936
16244
|
var preRecordedControllerGetPreRecordedJobsV2ResponseItemsItemRequestParamsSentencesDefault = false;
|
|
15937
16245
|
var preRecordedControllerGetPreRecordedJobsV2ResponseItemsItemRequestParamsPunctuationEnhancedDefault = false;
|
|
@@ -16279,12 +16587,12 @@ var preRecordedControllerGetPreRecordedJobsV2Response = zod5.object({
|
|
|
16279
16587
|
"Forces the translation to use informal language forms when available in the target language."
|
|
16280
16588
|
)
|
|
16281
16589
|
}).optional().describe("**[Beta]** Translation configuration, if `translation` is enabled"),
|
|
16282
|
-
summarization: zod5.boolean().optional().describe("
|
|
16590
|
+
summarization: zod5.boolean().optional().describe("Enable summarization for this audio"),
|
|
16283
16591
|
summarization_config: zod5.object({
|
|
16284
16592
|
type: zod5.enum(["general", "bullet_points", "concise"]).describe("The type of summarization to apply").default(
|
|
16285
16593
|
preRecordedControllerGetPreRecordedJobsV2ResponseItemsItemRequestParamsSummarizationConfigTypeDefault
|
|
16286
16594
|
).describe("The type of summarization to apply")
|
|
16287
|
-
}).optional().describe("
|
|
16595
|
+
}).optional().describe("Summarization configuration, if `summarization` is enabled"),
|
|
16288
16596
|
named_entity_recognition: zod5.boolean().optional().describe("**[Alpha]** Enable named entity recognition for this audio"),
|
|
16289
16597
|
custom_spelling: zod5.boolean().optional().describe("**[Alpha]** Enable custom spelling for this audio"),
|
|
16290
16598
|
custom_spelling_config: zod5.object({
|
|
@@ -16293,7 +16601,7 @@ var preRecordedControllerGetPreRecordedJobsV2Response = zod5.object({
|
|
|
16293
16601
|
"**[Alpha]** Custom spelling configuration, if `custom_spelling` is enabled"
|
|
16294
16602
|
),
|
|
16295
16603
|
sentiment_analysis: zod5.boolean().optional().describe("Enable sentiment analysis for this audio"),
|
|
16296
|
-
audio_to_llm: zod5.boolean().optional().describe("
|
|
16604
|
+
audio_to_llm: zod5.boolean().optional().describe("Enable audio to LLM processing for this audio"),
|
|
16297
16605
|
audio_to_llm_config: zod5.object({
|
|
16298
16606
|
prompts: zod5.array(zod5.array(zod5.unknown())).min(1).describe("The list of prompts applied on the audio transcription"),
|
|
16299
16607
|
model: zod5.string().default(
|
|
@@ -16301,7 +16609,7 @@ var preRecordedControllerGetPreRecordedJobsV2Response = zod5.object({
|
|
|
16301
16609
|
).describe(
|
|
16302
16610
|
"The model to use for the prompt execution. You can find the list of supported models [here](https://openrouter.ai/models)."
|
|
16303
16611
|
)
|
|
16304
|
-
}).optional().describe("
|
|
16612
|
+
}).optional().describe("Audio to LLM configuration, if `audio_to_llm` is enabled"),
|
|
16305
16613
|
pii_redaction: zod5.boolean().optional().describe("Enable PII redaction for this audio"),
|
|
16306
16614
|
pii_redaction_config: zod5.object({
|
|
16307
16615
|
entity_types: zod5.enum([
|
|
@@ -17438,7 +17746,7 @@ var preRecordedControllerGetPreRecordedJobV2ResponseRequestParamsNamedEntityReco
|
|
|
17438
17746
|
var preRecordedControllerGetPreRecordedJobV2ResponseRequestParamsCustomSpellingDefault = false;
|
|
17439
17747
|
var preRecordedControllerGetPreRecordedJobV2ResponseRequestParamsSentimentAnalysisDefault = false;
|
|
17440
17748
|
var preRecordedControllerGetPreRecordedJobV2ResponseRequestParamsAudioToLlmDefault = false;
|
|
17441
|
-
var preRecordedControllerGetPreRecordedJobV2ResponseRequestParamsAudioToLlmConfigModelDefault = "openai/gpt-
|
|
17749
|
+
var preRecordedControllerGetPreRecordedJobV2ResponseRequestParamsAudioToLlmConfigModelDefault = "openai/gpt-5.4-nano";
|
|
17442
17750
|
var preRecordedControllerGetPreRecordedJobV2ResponseRequestParamsPiiRedactionDefault = false;
|
|
17443
17751
|
var preRecordedControllerGetPreRecordedJobV2ResponseRequestParamsSentencesDefault = false;
|
|
17444
17752
|
var preRecordedControllerGetPreRecordedJobV2ResponseRequestParamsPunctuationEnhancedDefault = false;
|
|
@@ -17779,19 +18087,19 @@ var preRecordedControllerGetPreRecordedJobV2Response = zod5.object({
|
|
|
17779
18087
|
"Forces the translation to use informal language forms when available in the target language."
|
|
17780
18088
|
)
|
|
17781
18089
|
}).optional().describe("**[Beta]** Translation configuration, if `translation` is enabled"),
|
|
17782
|
-
summarization: zod5.boolean().optional().describe("
|
|
18090
|
+
summarization: zod5.boolean().optional().describe("Enable summarization for this audio"),
|
|
17783
18091
|
summarization_config: zod5.object({
|
|
17784
18092
|
type: zod5.enum(["general", "bullet_points", "concise"]).describe("The type of summarization to apply").default(
|
|
17785
18093
|
preRecordedControllerGetPreRecordedJobV2ResponseRequestParamsSummarizationConfigTypeDefault
|
|
17786
18094
|
).describe("The type of summarization to apply")
|
|
17787
|
-
}).optional().describe("
|
|
18095
|
+
}).optional().describe("Summarization configuration, if `summarization` is enabled"),
|
|
17788
18096
|
named_entity_recognition: zod5.boolean().optional().describe("**[Alpha]** Enable named entity recognition for this audio"),
|
|
17789
18097
|
custom_spelling: zod5.boolean().optional().describe("**[Alpha]** Enable custom spelling for this audio"),
|
|
17790
18098
|
custom_spelling_config: zod5.object({
|
|
17791
18099
|
spelling_dictionary: zod5.record(zod5.string(), zod5.array(zod5.string())).describe("The list of spelling applied on the audio transcription")
|
|
17792
18100
|
}).optional().describe("**[Alpha]** Custom spelling configuration, if `custom_spelling` is enabled"),
|
|
17793
18101
|
sentiment_analysis: zod5.boolean().optional().describe("Enable sentiment analysis for this audio"),
|
|
17794
|
-
audio_to_llm: zod5.boolean().optional().describe("
|
|
18102
|
+
audio_to_llm: zod5.boolean().optional().describe("Enable audio to LLM processing for this audio"),
|
|
17795
18103
|
audio_to_llm_config: zod5.object({
|
|
17796
18104
|
prompts: zod5.array(zod5.array(zod5.unknown())).min(1).describe("The list of prompts applied on the audio transcription"),
|
|
17797
18105
|
model: zod5.string().default(
|
|
@@ -17799,7 +18107,7 @@ var preRecordedControllerGetPreRecordedJobV2Response = zod5.object({
|
|
|
17799
18107
|
).describe(
|
|
17800
18108
|
"The model to use for the prompt execution. You can find the list of supported models [here](https://openrouter.ai/models)."
|
|
17801
18109
|
)
|
|
17802
|
-
}).optional().describe("
|
|
18110
|
+
}).optional().describe("Audio to LLM configuration, if `audio_to_llm` is enabled"),
|
|
17803
18111
|
pii_redaction: zod5.boolean().optional().describe("Enable PII redaction for this audio"),
|
|
17804
18112
|
pii_redaction_config: zod5.object({
|
|
17805
18113
|
entity_types: zod5.enum([
|
|
@@ -18912,7 +19220,7 @@ var transcriptionControllerInitPreRecordedJobV2BodyNamedEntityRecognitionDefault
|
|
|
18912
19220
|
var transcriptionControllerInitPreRecordedJobV2BodyCustomSpellingDefault = false;
|
|
18913
19221
|
var transcriptionControllerInitPreRecordedJobV2BodySentimentAnalysisDefault = false;
|
|
18914
19222
|
var transcriptionControllerInitPreRecordedJobV2BodyAudioToLlmDefault = false;
|
|
18915
|
-
var transcriptionControllerInitPreRecordedJobV2BodyAudioToLlmConfigModelDefault = "openai/gpt-
|
|
19223
|
+
var transcriptionControllerInitPreRecordedJobV2BodyAudioToLlmConfigModelDefault = "openai/gpt-5.4-nano";
|
|
18916
19224
|
var transcriptionControllerInitPreRecordedJobV2BodyPiiRedactionDefault = false;
|
|
18917
19225
|
var transcriptionControllerInitPreRecordedJobV2BodySentencesDefault = false;
|
|
18918
19226
|
var transcriptionControllerInitPreRecordedJobV2BodyPunctuationEnhancedDefault = false;
|
|
@@ -19205,23 +19513,23 @@ var transcriptionControllerInitPreRecordedJobV2Body = zod5.object({
|
|
|
19205
19513
|
"Forces the translation to use informal language forms when available in the target language."
|
|
19206
19514
|
)
|
|
19207
19515
|
}).optional().describe("**[Beta]** Translation configuration, if `translation` is enabled"),
|
|
19208
|
-
summarization: zod5.boolean().optional().describe("
|
|
19516
|
+
summarization: zod5.boolean().optional().describe("Enable summarization for this audio"),
|
|
19209
19517
|
summarization_config: zod5.object({
|
|
19210
19518
|
type: zod5.enum(["general", "bullet_points", "concise"]).describe("The type of summarization to apply").default(transcriptionControllerInitPreRecordedJobV2BodySummarizationConfigTypeDefault).describe("The type of summarization to apply")
|
|
19211
|
-
}).optional().describe("
|
|
19519
|
+
}).optional().describe("Summarization configuration, if `summarization` is enabled"),
|
|
19212
19520
|
named_entity_recognition: zod5.boolean().optional().describe("**[Alpha]** Enable named entity recognition for this audio"),
|
|
19213
19521
|
custom_spelling: zod5.boolean().optional().describe("**[Alpha]** Enable custom spelling for this audio"),
|
|
19214
19522
|
custom_spelling_config: zod5.object({
|
|
19215
19523
|
spelling_dictionary: zod5.record(zod5.string(), zod5.array(zod5.string())).describe("The list of spelling applied on the audio transcription")
|
|
19216
19524
|
}).optional().describe("**[Alpha]** Custom spelling configuration, if `custom_spelling` is enabled"),
|
|
19217
19525
|
sentiment_analysis: zod5.boolean().optional().describe("Enable sentiment analysis for this audio"),
|
|
19218
|
-
audio_to_llm: zod5.boolean().optional().describe("
|
|
19526
|
+
audio_to_llm: zod5.boolean().optional().describe("Enable audio to LLM processing for this audio"),
|
|
19219
19527
|
audio_to_llm_config: zod5.object({
|
|
19220
19528
|
prompts: zod5.array(zod5.array(zod5.unknown())).min(1).describe("The list of prompts applied on the audio transcription"),
|
|
19221
19529
|
model: zod5.string().default(transcriptionControllerInitPreRecordedJobV2BodyAudioToLlmConfigModelDefault).describe(
|
|
19222
19530
|
"The model to use for the prompt execution. You can find the list of supported models [here](https://openrouter.ai/models)."
|
|
19223
19531
|
)
|
|
19224
|
-
}).optional().describe("
|
|
19532
|
+
}).optional().describe("Audio to LLM configuration, if `audio_to_llm` is enabled"),
|
|
19225
19533
|
pii_redaction: zod5.boolean().optional().describe("Enable PII redaction for this audio"),
|
|
19226
19534
|
pii_redaction_config: zod5.object({
|
|
19227
19535
|
entity_types: zod5.enum([
|
|
@@ -19479,7 +19787,7 @@ var transcriptionControllerListV2ResponseItemsItemRequestParamsNamedEntityRecogn
|
|
|
19479
19787
|
var transcriptionControllerListV2ResponseItemsItemRequestParamsCustomSpellingDefault = false;
|
|
19480
19788
|
var transcriptionControllerListV2ResponseItemsItemRequestParamsSentimentAnalysisDefault = false;
|
|
19481
19789
|
var transcriptionControllerListV2ResponseItemsItemRequestParamsAudioToLlmDefault = false;
|
|
19482
|
-
var transcriptionControllerListV2ResponseItemsItemRequestParamsAudioToLlmConfigModelDefault = "openai/gpt-
|
|
19790
|
+
var transcriptionControllerListV2ResponseItemsItemRequestParamsAudioToLlmConfigModelDefault = "openai/gpt-5.4-nano";
|
|
19483
19791
|
var transcriptionControllerListV2ResponseItemsItemRequestParamsPiiRedactionDefault = false;
|
|
19484
19792
|
var transcriptionControllerListV2ResponseItemsItemRequestParamsSentencesDefault = false;
|
|
19485
19793
|
var transcriptionControllerListV2ResponseItemsItemRequestParamsPunctuationEnhancedDefault = false;
|
|
@@ -19890,12 +20198,12 @@ var transcriptionControllerListV2Response = zod5.object({
|
|
|
19890
20198
|
"Forces the translation to use informal language forms when available in the target language."
|
|
19891
20199
|
)
|
|
19892
20200
|
}).optional().describe("**[Beta]** Translation configuration, if `translation` is enabled"),
|
|
19893
|
-
summarization: zod5.boolean().optional().describe("
|
|
20201
|
+
summarization: zod5.boolean().optional().describe("Enable summarization for this audio"),
|
|
19894
20202
|
summarization_config: zod5.object({
|
|
19895
20203
|
type: zod5.enum(["general", "bullet_points", "concise"]).describe("The type of summarization to apply").default(
|
|
19896
20204
|
transcriptionControllerListV2ResponseItemsItemRequestParamsSummarizationConfigTypeDefault
|
|
19897
20205
|
).describe("The type of summarization to apply")
|
|
19898
|
-
}).optional().describe("
|
|
20206
|
+
}).optional().describe("Summarization configuration, if `summarization` is enabled"),
|
|
19899
20207
|
named_entity_recognition: zod5.boolean().optional().describe("**[Alpha]** Enable named entity recognition for this audio"),
|
|
19900
20208
|
custom_spelling: zod5.boolean().optional().describe("**[Alpha]** Enable custom spelling for this audio"),
|
|
19901
20209
|
custom_spelling_config: zod5.object({
|
|
@@ -19904,7 +20212,7 @@ var transcriptionControllerListV2Response = zod5.object({
|
|
|
19904
20212
|
"**[Alpha]** Custom spelling configuration, if `custom_spelling` is enabled"
|
|
19905
20213
|
),
|
|
19906
20214
|
sentiment_analysis: zod5.boolean().optional().describe("Enable sentiment analysis for this audio"),
|
|
19907
|
-
audio_to_llm: zod5.boolean().optional().describe("
|
|
20215
|
+
audio_to_llm: zod5.boolean().optional().describe("Enable audio to LLM processing for this audio"),
|
|
19908
20216
|
audio_to_llm_config: zod5.object({
|
|
19909
20217
|
prompts: zod5.array(zod5.array(zod5.unknown())).min(1).describe("The list of prompts applied on the audio transcription"),
|
|
19910
20218
|
model: zod5.string().default(
|
|
@@ -19912,7 +20220,7 @@ var transcriptionControllerListV2Response = zod5.object({
|
|
|
19912
20220
|
).describe(
|
|
19913
20221
|
"The model to use for the prompt execution. You can find the list of supported models [here](https://openrouter.ai/models)."
|
|
19914
20222
|
)
|
|
19915
|
-
}).optional().describe("
|
|
20223
|
+
}).optional().describe("Audio to LLM configuration, if `audio_to_llm` is enabled"),
|
|
19916
20224
|
pii_redaction: zod5.boolean().optional().describe("Enable PII redaction for this audio"),
|
|
19917
20225
|
pii_redaction_config: zod5.object({
|
|
19918
20226
|
entity_types: zod5.enum([
|
|
@@ -22230,7 +22538,7 @@ var transcriptionControllerGetTranscriptV2ResponseRequestParamsNamedEntityRecogn
|
|
|
22230
22538
|
var transcriptionControllerGetTranscriptV2ResponseRequestParamsCustomSpellingDefault = false;
|
|
22231
22539
|
var transcriptionControllerGetTranscriptV2ResponseRequestParamsSentimentAnalysisDefault = false;
|
|
22232
22540
|
var transcriptionControllerGetTranscriptV2ResponseRequestParamsAudioToLlmDefault = false;
|
|
22233
|
-
var transcriptionControllerGetTranscriptV2ResponseRequestParamsAudioToLlmConfigModelDefault = "openai/gpt-
|
|
22541
|
+
var transcriptionControllerGetTranscriptV2ResponseRequestParamsAudioToLlmConfigModelDefault = "openai/gpt-5.4-nano";
|
|
22234
22542
|
var transcriptionControllerGetTranscriptV2ResponseRequestParamsPiiRedactionDefault = false;
|
|
22235
22543
|
var transcriptionControllerGetTranscriptV2ResponseRequestParamsSentencesDefault = false;
|
|
22236
22544
|
var transcriptionControllerGetTranscriptV2ResponseRequestParamsPunctuationEnhancedDefault = false;
|
|
@@ -22635,19 +22943,19 @@ var transcriptionControllerGetTranscriptV2Response = zod5.discriminatedUnion("ki
|
|
|
22635
22943
|
"Forces the translation to use informal language forms when available in the target language."
|
|
22636
22944
|
)
|
|
22637
22945
|
}).optional().describe("**[Beta]** Translation configuration, if `translation` is enabled"),
|
|
22638
|
-
summarization: zod5.boolean().optional().describe("
|
|
22946
|
+
summarization: zod5.boolean().optional().describe("Enable summarization for this audio"),
|
|
22639
22947
|
summarization_config: zod5.object({
|
|
22640
22948
|
type: zod5.enum(["general", "bullet_points", "concise"]).describe("The type of summarization to apply").default(
|
|
22641
22949
|
transcriptionControllerGetTranscriptV2ResponseRequestParamsSummarizationConfigTypeDefault
|
|
22642
22950
|
).describe("The type of summarization to apply")
|
|
22643
|
-
}).optional().describe("
|
|
22951
|
+
}).optional().describe("Summarization configuration, if `summarization` is enabled"),
|
|
22644
22952
|
named_entity_recognition: zod5.boolean().optional().describe("**[Alpha]** Enable named entity recognition for this audio"),
|
|
22645
22953
|
custom_spelling: zod5.boolean().optional().describe("**[Alpha]** Enable custom spelling for this audio"),
|
|
22646
22954
|
custom_spelling_config: zod5.object({
|
|
22647
22955
|
spelling_dictionary: zod5.record(zod5.string(), zod5.array(zod5.string())).describe("The list of spelling applied on the audio transcription")
|
|
22648
22956
|
}).optional().describe("**[Alpha]** Custom spelling configuration, if `custom_spelling` is enabled"),
|
|
22649
22957
|
sentiment_analysis: zod5.boolean().optional().describe("Enable sentiment analysis for this audio"),
|
|
22650
|
-
audio_to_llm: zod5.boolean().optional().describe("
|
|
22958
|
+
audio_to_llm: zod5.boolean().optional().describe("Enable audio to LLM processing for this audio"),
|
|
22651
22959
|
audio_to_llm_config: zod5.object({
|
|
22652
22960
|
prompts: zod5.array(zod5.array(zod5.unknown())).min(1).describe("The list of prompts applied on the audio transcription"),
|
|
22653
22961
|
model: zod5.string().default(
|
|
@@ -22655,7 +22963,7 @@ var transcriptionControllerGetTranscriptV2Response = zod5.discriminatedUnion("ki
|
|
|
22655
22963
|
).describe(
|
|
22656
22964
|
"The model to use for the prompt execution. You can find the list of supported models [here](https://openrouter.ai/models)."
|
|
22657
22965
|
)
|
|
22658
|
-
}).optional().describe("
|
|
22966
|
+
}).optional().describe("Audio to LLM configuration, if `audio_to_llm` is enabled"),
|
|
22659
22967
|
pii_redaction: zod5.boolean().optional().describe("Enable PII redaction for this audio"),
|
|
22660
22968
|
pii_redaction_config: zod5.object({
|
|
22661
22969
|
entity_types: zod5.enum([
|
|
@@ -25367,7 +25675,7 @@ var historyControllerGetListV1ResponseItemsItemRequestParamsNamedEntityRecogniti
|
|
|
25367
25675
|
var historyControllerGetListV1ResponseItemsItemRequestParamsCustomSpellingDefault = false;
|
|
25368
25676
|
var historyControllerGetListV1ResponseItemsItemRequestParamsSentimentAnalysisDefault = false;
|
|
25369
25677
|
var historyControllerGetListV1ResponseItemsItemRequestParamsAudioToLlmDefault = false;
|
|
25370
|
-
var historyControllerGetListV1ResponseItemsItemRequestParamsAudioToLlmConfigModelDefault = "openai/gpt-
|
|
25678
|
+
var historyControllerGetListV1ResponseItemsItemRequestParamsAudioToLlmConfigModelDefault = "openai/gpt-5.4-nano";
|
|
25371
25679
|
var historyControllerGetListV1ResponseItemsItemRequestParamsPiiRedactionDefault = false;
|
|
25372
25680
|
var historyControllerGetListV1ResponseItemsItemRequestParamsSentencesDefault = false;
|
|
25373
25681
|
var historyControllerGetListV1ResponseItemsItemRequestParamsPunctuationEnhancedDefault = false;
|
|
@@ -25778,12 +26086,12 @@ var historyControllerGetListV1Response = zod5.object({
|
|
|
25778
26086
|
"Forces the translation to use informal language forms when available in the target language."
|
|
25779
26087
|
)
|
|
25780
26088
|
}).optional().describe("**[Beta]** Translation configuration, if `translation` is enabled"),
|
|
25781
|
-
summarization: zod5.boolean().optional().describe("
|
|
26089
|
+
summarization: zod5.boolean().optional().describe("Enable summarization for this audio"),
|
|
25782
26090
|
summarization_config: zod5.object({
|
|
25783
26091
|
type: zod5.enum(["general", "bullet_points", "concise"]).describe("The type of summarization to apply").default(
|
|
25784
26092
|
historyControllerGetListV1ResponseItemsItemRequestParamsSummarizationConfigTypeDefault
|
|
25785
26093
|
).describe("The type of summarization to apply")
|
|
25786
|
-
}).optional().describe("
|
|
26094
|
+
}).optional().describe("Summarization configuration, if `summarization` is enabled"),
|
|
25787
26095
|
named_entity_recognition: zod5.boolean().optional().describe("**[Alpha]** Enable named entity recognition for this audio"),
|
|
25788
26096
|
custom_spelling: zod5.boolean().optional().describe("**[Alpha]** Enable custom spelling for this audio"),
|
|
25789
26097
|
custom_spelling_config: zod5.object({
|
|
@@ -25792,7 +26100,7 @@ var historyControllerGetListV1Response = zod5.object({
|
|
|
25792
26100
|
"**[Alpha]** Custom spelling configuration, if `custom_spelling` is enabled"
|
|
25793
26101
|
),
|
|
25794
26102
|
sentiment_analysis: zod5.boolean().optional().describe("Enable sentiment analysis for this audio"),
|
|
25795
|
-
audio_to_llm: zod5.boolean().optional().describe("
|
|
26103
|
+
audio_to_llm: zod5.boolean().optional().describe("Enable audio to LLM processing for this audio"),
|
|
25796
26104
|
audio_to_llm_config: zod5.object({
|
|
25797
26105
|
prompts: zod5.array(zod5.array(zod5.unknown())).min(1).describe("The list of prompts applied on the audio transcription"),
|
|
25798
26106
|
model: zod5.string().default(
|
|
@@ -25800,7 +26108,7 @@ var historyControllerGetListV1Response = zod5.object({
|
|
|
25800
26108
|
).describe(
|
|
25801
26109
|
"The model to use for the prompt execution. You can find the list of supported models [here](https://openrouter.ai/models)."
|
|
25802
26110
|
)
|
|
25803
|
-
}).optional().describe("
|
|
26111
|
+
}).optional().describe("Audio to LLM configuration, if `audio_to_llm` is enabled"),
|
|
25804
26112
|
pii_redaction: zod5.boolean().optional().describe("Enable PII redaction for this audio"),
|
|
25805
26113
|
pii_redaction_config: zod5.object({
|
|
25806
26114
|
entity_types: zod5.enum([
|
|
@@ -31045,6 +31353,7 @@ __export(openAIAudioRealtimeAPI_zod_exports, {
|
|
|
31045
31353
|
createRealtimeClientSecretBodySessionPromptVariablesTypeDefault: () => createRealtimeClientSecretBodySessionPromptVariablesTypeDefault,
|
|
31046
31354
|
createRealtimeClientSecretBodySessionPromptVariablesTypeDefaultOne: () => createRealtimeClientSecretBodySessionPromptVariablesTypeDefaultOne,
|
|
31047
31355
|
createRealtimeClientSecretBodySessionPromptVariablesTypeDefaultTwo: () => createRealtimeClientSecretBodySessionPromptVariablesTypeDefaultTwo,
|
|
31356
|
+
createRealtimeClientSecretBodySessionReasoningEffortDefault: () => createRealtimeClientSecretBodySessionReasoningEffortDefault,
|
|
31048
31357
|
createRealtimeClientSecretBodySessionToolChoiceDefault: () => createRealtimeClientSecretBodySessionToolChoiceDefault,
|
|
31049
31358
|
createRealtimeClientSecretBodySessionToolsItemRequireApprovalDefaultOne: () => createRealtimeClientSecretBodySessionToolsItemRequireApprovalDefaultOne,
|
|
31050
31359
|
createRealtimeClientSecretBodySessionTracingDefault: () => createRealtimeClientSecretBodySessionTracingDefault,
|
|
@@ -31069,6 +31378,7 @@ __export(openAIAudioRealtimeAPI_zod_exports, {
|
|
|
31069
31378
|
createRealtimeClientSecretResponseSessionPromptVariablesTypeDefault: () => createRealtimeClientSecretResponseSessionPromptVariablesTypeDefault,
|
|
31070
31379
|
createRealtimeClientSecretResponseSessionPromptVariablesTypeDefaultOne: () => createRealtimeClientSecretResponseSessionPromptVariablesTypeDefaultOne,
|
|
31071
31380
|
createRealtimeClientSecretResponseSessionPromptVariablesTypeDefaultTwo: () => createRealtimeClientSecretResponseSessionPromptVariablesTypeDefaultTwo,
|
|
31381
|
+
createRealtimeClientSecretResponseSessionReasoningEffortDefault: () => createRealtimeClientSecretResponseSessionReasoningEffortDefault,
|
|
31072
31382
|
createRealtimeClientSecretResponseSessionToolChoiceDefault: () => createRealtimeClientSecretResponseSessionToolChoiceDefault,
|
|
31073
31383
|
createRealtimeClientSecretResponseSessionToolsItemRequireApprovalDefaultOne: () => createRealtimeClientSecretResponseSessionToolsItemRequireApprovalDefaultOne,
|
|
31074
31384
|
createRealtimeClientSecretResponseSessionTracingDefaultOne: () => createRealtimeClientSecretResponseSessionTracingDefaultOne,
|
|
@@ -31425,6 +31735,7 @@ var createRealtimeClientSecretBodySessionTracingDefaultOne = "auto";
|
|
|
31425
31735
|
var createRealtimeClientSecretBodySessionTracingDefault = null;
|
|
31426
31736
|
var createRealtimeClientSecretBodySessionToolsItemRequireApprovalDefaultOne = "always";
|
|
31427
31737
|
var createRealtimeClientSecretBodySessionToolChoiceDefault = "auto";
|
|
31738
|
+
var createRealtimeClientSecretBodySessionReasoningEffortDefault = "low";
|
|
31428
31739
|
var createRealtimeClientSecretBodySessionTruncationRetentionRatioMin = 0;
|
|
31429
31740
|
var createRealtimeClientSecretBodySessionTruncationRetentionRatioMax = 1;
|
|
31430
31741
|
var createRealtimeClientSecretBodySessionTruncationTokenLimitsPostInstructionsMin = 0;
|
|
@@ -31460,6 +31771,7 @@ var createRealtimeClientSecretBody = zod6.object({
|
|
|
31460
31771
|
zod6.enum([
|
|
31461
31772
|
"gpt-realtime",
|
|
31462
31773
|
"gpt-realtime-1.5",
|
|
31774
|
+
"gpt-realtime-2",
|
|
31463
31775
|
"gpt-realtime-2025-08-28",
|
|
31464
31776
|
"gpt-4o-realtime-preview",
|
|
31465
31777
|
"gpt-4o-realtime-preview-2024-10-01",
|
|
@@ -31500,16 +31812,20 @@ var createRealtimeClientSecretBody = zod6.object({
|
|
|
31500
31812
|
"gpt-4o-mini-transcribe",
|
|
31501
31813
|
"gpt-4o-mini-transcribe-2025-12-15",
|
|
31502
31814
|
"gpt-4o-transcribe",
|
|
31503
|
-
"gpt-4o-transcribe-diarize"
|
|
31815
|
+
"gpt-4o-transcribe-diarize",
|
|
31816
|
+
"gpt-realtime-whisper"
|
|
31504
31817
|
])
|
|
31505
31818
|
).optional().describe(
|
|
31506
|
-
"The model to use for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`,
|
|
31819
|
+
"The model to use for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`, `gpt-4o-transcribe-diarize`, and `gpt-realtime-whisper`. Use `gpt-4o-transcribe-diarize` when you need diarization with speaker labels.\n"
|
|
31507
31820
|
),
|
|
31508
31821
|
language: zod6.string().optional().describe(
|
|
31509
31822
|
"The language of the input audio. Supplying the input language in\n[ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`) format\nwill improve accuracy and latency.\n"
|
|
31510
31823
|
),
|
|
31511
31824
|
prompt: zod6.string().optional().describe(
|
|
31512
|
-
'An optional text to guide the model\'s style or continue a previous audio\nsegment.\nFor `whisper-1`, the [prompt is a list of keywords](/docs/guides/speech-to-text#prompting).\nFor `gpt-4o-transcribe` models (excluding `gpt-4o-transcribe-diarize`), the prompt is a free text string, for example "expect words related to technology".\n'
|
|
31825
|
+
'An optional text to guide the model\'s style or continue a previous audio\nsegment.\nFor `whisper-1`, the [prompt is a list of keywords](/docs/guides/speech-to-text#prompting).\nFor `gpt-4o-transcribe` models (excluding `gpt-4o-transcribe-diarize`), the prompt is a free text string, for example "expect words related to technology".\nPrompt is not supported with `gpt-realtime-whisper` in GA Realtime sessions.\n'
|
|
31826
|
+
),
|
|
31827
|
+
delay: zod6.enum(["minimal", "low", "medium", "high", "xhigh"]).optional().describe(
|
|
31828
|
+
"Controls how long the model waits before emitting transcription text.\nHigher values can improve transcription accuracy at the cost of latency.\nOnly supported with `gpt-realtime-whisper` in GA Realtime sessions.\n"
|
|
31513
31829
|
)
|
|
31514
31830
|
}).optional(),
|
|
31515
31831
|
noise_reduction: zod6.object({
|
|
@@ -31576,7 +31892,7 @@ var createRealtimeClientSecretBody = zod6.object({
|
|
|
31576
31892
|
"Server-side semantic turn detection which uses a model to determine when the user has finished speaking."
|
|
31577
31893
|
)
|
|
31578
31894
|
]).describe(
|
|
31579
|
-
'Configuration for turn detection, ether Server VAD or Semantic VAD. This can be set to `null` to turn off, in which case the client must manually trigger model response.\n\nServer VAD means that the model will detect the start and end of speech based on audio volume and respond at the end of user speech.\n\nSemantic VAD is more advanced and uses a turn detection model (in conjunction with VAD) to semantically estimate whether the user has finished speaking, then dynamically sets a timeout based on this probability. For example, if user audio trails off with "uhhm", the model will score a low probability of turn end and wait longer for the user to continue speaking. This can be useful for more natural conversations, but may have a higher latency.\n'
|
|
31895
|
+
'Configuration for turn detection, ether Server VAD or Semantic VAD. This can be set to `null` to turn off, in which case the client must manually trigger model response.\n\nServer VAD means that the model will detect the start and end of speech based on audio volume and respond at the end of user speech.\n\nSemantic VAD is more advanced and uses a turn detection model (in conjunction with VAD) to semantically estimate whether the user has finished speaking, then dynamically sets a timeout based on this probability. For example, if user audio trails off with "uhhm", the model will score a low probability of turn end and wait longer for the user to continue speaking. This can be useful for more natural conversations, but may have a higher latency.\n\nFor `gpt-realtime-whisper` transcription sessions, turn detection must be\nset to `null`; VAD is not supported.\n'
|
|
31580
31896
|
).or(zod6.null()).optional()
|
|
31581
31897
|
}).optional(),
|
|
31582
31898
|
output: zod6.object({
|
|
@@ -31649,7 +31965,7 @@ var createRealtimeClientSecretBody = zod6.object({
|
|
|
31649
31965
|
server_label: zod6.string().describe(
|
|
31650
31966
|
"A label for this MCP server, used to identify it in tool calls.\n"
|
|
31651
31967
|
),
|
|
31652
|
-
server_url: zod6.string().optional().describe(
|
|
31968
|
+
server_url: zod6.string().url().optional().describe(
|
|
31653
31969
|
"The URL for the MCP server. One of `server_url` or `connector_id` must be\nprovided.\n"
|
|
31654
31970
|
),
|
|
31655
31971
|
connector_id: zod6.enum([
|
|
@@ -31727,6 +32043,16 @@ var createRealtimeClientSecretBody = zod6.object({
|
|
|
31727
32043
|
).default(createRealtimeClientSecretBodySessionToolChoiceDefault).describe(
|
|
31728
32044
|
"How the model chooses tools. Provide one of the string modes or force a specific\nfunction/MCP tool.\n"
|
|
31729
32045
|
),
|
|
32046
|
+
parallel_tool_calls: zod6.boolean().optional().describe(
|
|
32047
|
+
"Whether the model may call multiple tools in parallel. Only supported by\nreasoning Realtime models such as `gpt-realtime-2`.\n"
|
|
32048
|
+
),
|
|
32049
|
+
reasoning: zod6.object({
|
|
32050
|
+
effort: zod6.enum(["minimal", "low", "medium", "high", "xhigh"]).default(createRealtimeClientSecretBodySessionReasoningEffortDefault).describe(
|
|
32051
|
+
"Constrains effort on reasoning for reasoning-capable Realtime models such as\n`gpt-realtime-2`.\n"
|
|
32052
|
+
)
|
|
32053
|
+
}).optional().describe(
|
|
32054
|
+
"Configuration for reasoning-capable Realtime models such as `gpt-realtime-2`.\n"
|
|
32055
|
+
),
|
|
31730
32056
|
max_output_tokens: zod6.number().or(zod6.enum(["inf"])).optional().describe(
|
|
31731
32057
|
"Maximum number of output tokens for a single assistant response,\ninclusive of tool calls. Provide an integer between 1 and 4096 to\nlimit output tokens, or `inf` for the maximum available tokens for a\ngiven model. Defaults to `inf`.\n"
|
|
31732
32058
|
),
|
|
@@ -31766,7 +32092,7 @@ var createRealtimeClientSecretBody = zod6.object({
|
|
|
31766
32092
|
).or(
|
|
31767
32093
|
zod6.object({
|
|
31768
32094
|
type: zod6.enum(["input_image"]).describe("The type of the input item. Always `input_image`."),
|
|
31769
|
-
image_url: zod6.string().describe(
|
|
32095
|
+
image_url: zod6.string().url().describe(
|
|
31770
32096
|
"The URL of the image to be sent to the model. A fully qualified URL or base64 encoded image in a data URL."
|
|
31771
32097
|
).or(zod6.null()).optional(),
|
|
31772
32098
|
file_id: zod6.string().describe("The ID of the file to be sent to the model.").or(zod6.null()).optional(),
|
|
@@ -31780,7 +32106,7 @@ var createRealtimeClientSecretBody = zod6.object({
|
|
|
31780
32106
|
file_id: zod6.string().describe("The ID of the file to be sent to the model.").or(zod6.null()).optional(),
|
|
31781
32107
|
filename: zod6.string().optional().describe("The name of the file to be sent to the model."),
|
|
31782
32108
|
file_data: zod6.string().optional().describe("The content of the file to be sent to the model.\n"),
|
|
31783
|
-
file_url: zod6.string().optional().describe("The URL of the file to be sent to the model."),
|
|
32109
|
+
file_url: zod6.string().url().optional().describe("The URL of the file to be sent to the model."),
|
|
31784
32110
|
detail: zod6.enum(["low", "high"]).optional()
|
|
31785
32111
|
}).describe("A file input to the model.")
|
|
31786
32112
|
)
|
|
@@ -31816,16 +32142,20 @@ var createRealtimeClientSecretBody = zod6.object({
|
|
|
31816
32142
|
"gpt-4o-mini-transcribe",
|
|
31817
32143
|
"gpt-4o-mini-transcribe-2025-12-15",
|
|
31818
32144
|
"gpt-4o-transcribe",
|
|
31819
|
-
"gpt-4o-transcribe-diarize"
|
|
32145
|
+
"gpt-4o-transcribe-diarize",
|
|
32146
|
+
"gpt-realtime-whisper"
|
|
31820
32147
|
])
|
|
31821
32148
|
).optional().describe(
|
|
31822
|
-
"The model to use for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`,
|
|
32149
|
+
"The model to use for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`, `gpt-4o-transcribe-diarize`, and `gpt-realtime-whisper`. Use `gpt-4o-transcribe-diarize` when you need diarization with speaker labels.\n"
|
|
31823
32150
|
),
|
|
31824
32151
|
language: zod6.string().optional().describe(
|
|
31825
32152
|
"The language of the input audio. Supplying the input language in\n[ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`) format\nwill improve accuracy and latency.\n"
|
|
31826
32153
|
),
|
|
31827
32154
|
prompt: zod6.string().optional().describe(
|
|
31828
|
-
'An optional text to guide the model\'s style or continue a previous audio\nsegment.\nFor `whisper-1`, the [prompt is a list of keywords](/docs/guides/speech-to-text#prompting).\nFor `gpt-4o-transcribe` models (excluding `gpt-4o-transcribe-diarize`), the prompt is a free text string, for example "expect words related to technology".\n'
|
|
32155
|
+
'An optional text to guide the model\'s style or continue a previous audio\nsegment.\nFor `whisper-1`, the [prompt is a list of keywords](/docs/guides/speech-to-text#prompting).\nFor `gpt-4o-transcribe` models (excluding `gpt-4o-transcribe-diarize`), the prompt is a free text string, for example "expect words related to technology".\nPrompt is not supported with `gpt-realtime-whisper` in GA Realtime sessions.\n'
|
|
32156
|
+
),
|
|
32157
|
+
delay: zod6.enum(["minimal", "low", "medium", "high", "xhigh"]).optional().describe(
|
|
32158
|
+
"Controls how long the model waits before emitting transcription text.\nHigher values can improve transcription accuracy at the cost of latency.\nOnly supported with `gpt-realtime-whisper` in GA Realtime sessions.\n"
|
|
31829
32159
|
)
|
|
31830
32160
|
}).optional(),
|
|
31831
32161
|
noise_reduction: zod6.object({
|
|
@@ -31892,7 +32222,7 @@ var createRealtimeClientSecretBody = zod6.object({
|
|
|
31892
32222
|
"Server-side semantic turn detection which uses a model to determine when the user has finished speaking."
|
|
31893
32223
|
)
|
|
31894
32224
|
]).describe(
|
|
31895
|
-
'Configuration for turn detection, ether Server VAD or Semantic VAD. This can be set to `null` to turn off, in which case the client must manually trigger model response.\n\nServer VAD means that the model will detect the start and end of speech based on audio volume and respond at the end of user speech.\n\nSemantic VAD is more advanced and uses a turn detection model (in conjunction with VAD) to semantically estimate whether the user has finished speaking, then dynamically sets a timeout based on this probability. For example, if user audio trails off with "uhhm", the model will score a low probability of turn end and wait longer for the user to continue speaking. This can be useful for more natural conversations, but may have a higher latency.\n'
|
|
32225
|
+
'Configuration for turn detection, ether Server VAD or Semantic VAD. This can be set to `null` to turn off, in which case the client must manually trigger model response.\n\nServer VAD means that the model will detect the start and end of speech based on audio volume and respond at the end of user speech.\n\nSemantic VAD is more advanced and uses a turn detection model (in conjunction with VAD) to semantically estimate whether the user has finished speaking, then dynamically sets a timeout based on this probability. For example, if user audio trails off with "uhhm", the model will score a low probability of turn end and wait longer for the user to continue speaking. This can be useful for more natural conversations, but may have a higher latency.\n\nFor `gpt-realtime-whisper` transcription sessions, turn detection must be\nset to `null`; VAD is not supported.\n'
|
|
31896
32226
|
).or(zod6.null()).optional()
|
|
31897
32227
|
}).optional()
|
|
31898
32228
|
}).optional().describe("Configuration for input and output audio.\n"),
|
|
@@ -31923,6 +32253,7 @@ var createRealtimeClientSecretResponseSessionTracingDefaultTwo = "auto";
|
|
|
31923
32253
|
var createRealtimeClientSecretResponseSessionTracingDefaultOne = null;
|
|
31924
32254
|
var createRealtimeClientSecretResponseSessionToolsItemRequireApprovalDefaultOne = "always";
|
|
31925
32255
|
var createRealtimeClientSecretResponseSessionToolChoiceDefault = "auto";
|
|
32256
|
+
var createRealtimeClientSecretResponseSessionReasoningEffortDefault = "low";
|
|
31926
32257
|
var createRealtimeClientSecretResponseSessionTruncationRetentionRatioMin = 0;
|
|
31927
32258
|
var createRealtimeClientSecretResponseSessionTruncationRetentionRatioMax = 1;
|
|
31928
32259
|
var createRealtimeClientSecretResponseSessionTruncationTokenLimitsPostInstructionsMin = 0;
|
|
@@ -31932,17 +32263,14 @@ var createRealtimeClientSecretResponseSessionPromptVariablesTypeDefaultTwo = "in
|
|
|
31932
32263
|
var createRealtimeClientSecretResponse = zod6.object({
|
|
31933
32264
|
value: zod6.string().describe("The generated client secret value."),
|
|
31934
32265
|
expires_at: zod6.number().describe("Expiration timestamp for the client secret, in seconds since epoch."),
|
|
31935
|
-
session: zod6.
|
|
32266
|
+
session: zod6.union([
|
|
31936
32267
|
zod6.object({
|
|
31937
|
-
client_secret: zod6.object({
|
|
31938
|
-
value: zod6.string().describe(
|
|
31939
|
-
"Ephemeral key usable in client environments to authenticate connections to the Realtime API. Use this in client-side environments rather than a standard API token, which should only be used server-side.\n"
|
|
31940
|
-
),
|
|
31941
|
-
expires_at: zod6.number().describe(
|
|
31942
|
-
"Timestamp for when the token expires. Currently, all tokens expire\nafter one minute.\n"
|
|
31943
|
-
)
|
|
31944
|
-
}).describe("Ephemeral key returned by the API."),
|
|
31945
32268
|
type: zod6.enum(["realtime"]).describe("The type of session to create. Always `realtime` for the Realtime API.\n"),
|
|
32269
|
+
id: zod6.string().describe(
|
|
32270
|
+
"Unique identifier for the session that looks like `sess_1234567890abcdef`.\n"
|
|
32271
|
+
),
|
|
32272
|
+
object: zod6.enum(["realtime.session"]).describe("The object type. Always `realtime.session`."),
|
|
32273
|
+
expires_at: zod6.number().optional().describe("Expiration timestamp for the session, in seconds since epoch."),
|
|
31946
32274
|
output_modalities: zod6.array(zod6.enum(["text", "audio"])).default(createRealtimeClientSecretResponseSessionOutputModalitiesDefault).describe(
|
|
31947
32275
|
'The set of modalities the model can respond with. It defaults to `["audio"]`, indicating\nthat the model will respond with audio plus a transcript. `["text"]` can be used to make\nthe model respond with text only. It is not possible to request both `text` and `audio` at the same time.\n'
|
|
31948
32276
|
),
|
|
@@ -31950,6 +32278,7 @@ var createRealtimeClientSecretResponse = zod6.object({
|
|
|
31950
32278
|
zod6.enum([
|
|
31951
32279
|
"gpt-realtime",
|
|
31952
32280
|
"gpt-realtime-1.5",
|
|
32281
|
+
"gpt-realtime-2",
|
|
31953
32282
|
"gpt-realtime-2025-08-28",
|
|
31954
32283
|
"gpt-4o-realtime-preview",
|
|
31955
32284
|
"gpt-4o-realtime-preview-2024-10-01",
|
|
@@ -31972,15 +32301,15 @@ var createRealtimeClientSecretResponse = zod6.object({
|
|
|
31972
32301
|
audio: zod6.object({
|
|
31973
32302
|
input: zod6.object({
|
|
31974
32303
|
format: zod6.object({
|
|
31975
|
-
type: zod6.enum(["audio/pcm"]).describe("The audio format. Always `audio/pcm`."),
|
|
31976
|
-
rate: zod6.literal(24e3).describe("The sample rate of the audio. Always `24000`.")
|
|
32304
|
+
type: zod6.enum(["audio/pcm"]).optional().describe("The audio format. Always `audio/pcm`."),
|
|
32305
|
+
rate: zod6.literal(24e3).optional().describe("The sample rate of the audio. Always `24000`.")
|
|
31977
32306
|
}).describe("The PCM audio format. Only a 24kHz sample rate is supported.").or(
|
|
31978
32307
|
zod6.object({
|
|
31979
|
-
type: zod6.enum(["audio/pcmu"]).describe("The audio format. Always `audio/pcmu`.")
|
|
32308
|
+
type: zod6.enum(["audio/pcmu"]).optional().describe("The audio format. Always `audio/pcmu`.")
|
|
31980
32309
|
}).describe("The G.711 \u03BC-law format.")
|
|
31981
32310
|
).or(
|
|
31982
32311
|
zod6.object({
|
|
31983
|
-
type: zod6.enum(["audio/pcma"]).describe("The audio format. Always `audio/pcma`.")
|
|
32312
|
+
type: zod6.enum(["audio/pcma"]).optional().describe("The audio format. Always `audio/pcma`.")
|
|
31984
32313
|
}).describe("The G.711 A-law format.")
|
|
31985
32314
|
).optional(),
|
|
31986
32315
|
transcription: zod6.object({
|
|
@@ -31990,20 +32319,19 @@ var createRealtimeClientSecretResponse = zod6.object({
|
|
|
31990
32319
|
"gpt-4o-mini-transcribe",
|
|
31991
32320
|
"gpt-4o-mini-transcribe-2025-12-15",
|
|
31992
32321
|
"gpt-4o-transcribe",
|
|
31993
|
-
"gpt-4o-transcribe-diarize"
|
|
32322
|
+
"gpt-4o-transcribe-diarize",
|
|
32323
|
+
"gpt-realtime-whisper"
|
|
31994
32324
|
])
|
|
31995
32325
|
).optional().describe(
|
|
31996
|
-
"The model
|
|
31997
|
-
),
|
|
31998
|
-
language: zod6.string().optional().describe(
|
|
31999
|
-
"The language of the input audio. Supplying the input language in\n[ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`) format\nwill improve accuracy and latency.\n"
|
|
32326
|
+
"The model used for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`, `gpt-4o-transcribe-diarize`, and `gpt-realtime-whisper`.\n"
|
|
32000
32327
|
),
|
|
32328
|
+
language: zod6.string().optional().describe("The language of the input audio.\n"),
|
|
32001
32329
|
prompt: zod6.string().optional().describe(
|
|
32002
|
-
|
|
32330
|
+
"The prompt configured for input audio transcription, when present.\n"
|
|
32003
32331
|
)
|
|
32004
32332
|
}).optional(),
|
|
32005
32333
|
noise_reduction: zod6.object({
|
|
32006
|
-
type: zod6.enum(["near_field", "far_field"]).describe(
|
|
32334
|
+
type: zod6.enum(["near_field", "far_field"]).optional().describe(
|
|
32007
32335
|
"Type of noise reduction. `near_field` is for close-talking microphones such as headphones, `far_field` is for far-field microphones such as laptop or conference room microphones.\n"
|
|
32008
32336
|
)
|
|
32009
32337
|
}).optional().describe(
|
|
@@ -32066,20 +32394,20 @@ var createRealtimeClientSecretResponse = zod6.object({
|
|
|
32066
32394
|
"Server-side semantic turn detection which uses a model to determine when the user has finished speaking."
|
|
32067
32395
|
)
|
|
32068
32396
|
]).describe(
|
|
32069
|
-
'Configuration for turn detection, ether Server VAD or Semantic VAD. This can be set to `null` to turn off, in which case the client must manually trigger model response.\n\nServer VAD means that the model will detect the start and end of speech based on audio volume and respond at the end of user speech.\n\nSemantic VAD is more advanced and uses a turn detection model (in conjunction with VAD) to semantically estimate whether the user has finished speaking, then dynamically sets a timeout based on this probability. For example, if user audio trails off with "uhhm", the model will score a low probability of turn end and wait longer for the user to continue speaking. This can be useful for more natural conversations, but may have a higher latency.\n'
|
|
32397
|
+
'Configuration for turn detection, ether Server VAD or Semantic VAD. This can be set to `null` to turn off, in which case the client must manually trigger model response.\n\nServer VAD means that the model will detect the start and end of speech based on audio volume and respond at the end of user speech.\n\nSemantic VAD is more advanced and uses a turn detection model (in conjunction with VAD) to semantically estimate whether the user has finished speaking, then dynamically sets a timeout based on this probability. For example, if user audio trails off with "uhhm", the model will score a low probability of turn end and wait longer for the user to continue speaking. This can be useful for more natural conversations, but may have a higher latency.\n\nFor `gpt-realtime-whisper` transcription sessions, turn detection must be\nset to `null`; VAD is not supported.\n'
|
|
32070
32398
|
).or(zod6.null()).optional()
|
|
32071
32399
|
}).optional(),
|
|
32072
32400
|
output: zod6.object({
|
|
32073
32401
|
format: zod6.object({
|
|
32074
|
-
type: zod6.enum(["audio/pcm"]).describe("The audio format. Always `audio/pcm`."),
|
|
32075
|
-
rate: zod6.literal(24e3).describe("The sample rate of the audio. Always `24000`.")
|
|
32402
|
+
type: zod6.enum(["audio/pcm"]).optional().describe("The audio format. Always `audio/pcm`."),
|
|
32403
|
+
rate: zod6.literal(24e3).optional().describe("The sample rate of the audio. Always `24000`.")
|
|
32076
32404
|
}).describe("The PCM audio format. Only a 24kHz sample rate is supported.").or(
|
|
32077
32405
|
zod6.object({
|
|
32078
|
-
type: zod6.enum(["audio/pcmu"]).describe("The audio format. Always `audio/pcmu`.")
|
|
32406
|
+
type: zod6.enum(["audio/pcmu"]).optional().describe("The audio format. Always `audio/pcmu`.")
|
|
32079
32407
|
}).describe("The G.711 \u03BC-law format.")
|
|
32080
32408
|
).or(
|
|
32081
32409
|
zod6.object({
|
|
32082
|
-
type: zod6.enum(["audio/pcma"]).describe("The audio format. Always `audio/pcma`.")
|
|
32410
|
+
type: zod6.enum(["audio/pcma"]).optional().describe("The audio format. Always `audio/pcma`.")
|
|
32083
32411
|
}).describe("The G.711 A-law format.")
|
|
32084
32412
|
).optional(),
|
|
32085
32413
|
voice: zod6.string().or(
|
|
@@ -32123,7 +32451,7 @@ var createRealtimeClientSecretResponse = zod6.object({
|
|
|
32123
32451
|
).or(zod6.null()).optional(),
|
|
32124
32452
|
tools: zod6.array(
|
|
32125
32453
|
zod6.object({
|
|
32126
|
-
type: zod6.enum(["function"]).describe("The type of the tool, i.e. `function`."),
|
|
32454
|
+
type: zod6.enum(["function"]).optional().describe("The type of the tool, i.e. `function`."),
|
|
32127
32455
|
name: zod6.string().optional().describe("The name of the function."),
|
|
32128
32456
|
description: zod6.string().optional().describe(
|
|
32129
32457
|
"The description of the function, including guidance on when and how\nto call it, and guidance about what to tell the user when calling\n(if anything).\n"
|
|
@@ -32135,7 +32463,7 @@ var createRealtimeClientSecretResponse = zod6.object({
|
|
|
32135
32463
|
server_label: zod6.string().describe(
|
|
32136
32464
|
"A label for this MCP server, used to identify it in tool calls.\n"
|
|
32137
32465
|
),
|
|
32138
|
-
server_url: zod6.string().optional().describe(
|
|
32466
|
+
server_url: zod6.string().url().optional().describe(
|
|
32139
32467
|
"The URL for the MCP server. One of `server_url` or `connector_id` must be\nprovided.\n"
|
|
32140
32468
|
),
|
|
32141
32469
|
connector_id: zod6.enum([
|
|
@@ -32147,7 +32475,7 @@ var createRealtimeClientSecretResponse = zod6.object({
|
|
|
32147
32475
|
"connector_outlookcalendar",
|
|
32148
32476
|
"connector_outlookemail",
|
|
32149
32477
|
"connector_sharepoint"
|
|
32150
|
-
]).describe(
|
|
32478
|
+
]).optional().describe(
|
|
32151
32479
|
"Identifier for service connectors, like those available in ChatGPT. One of\n`server_url` or `connector_id` must be provided. Learn more about service\nconnectors [here](/docs/guides/tools-remote-mcp#connectors).\n\nCurrently supported `connector_id` values are:\n\n- Dropbox: `connector_dropbox`\n- Gmail: `connector_gmail`\n- Google Calendar: `connector_googlecalendar`\n- Google Drive: `connector_googledrive`\n- Microsoft Teams: `connector_microsoftteams`\n- Outlook Calendar: `connector_outlookcalendar`\n- Outlook Email: `connector_outlookemail`\n- SharePoint: `connector_sharepoint`\n"
|
|
32152
32480
|
),
|
|
32153
32481
|
authorization: zod6.string().optional().describe(
|
|
@@ -32213,6 +32541,13 @@ var createRealtimeClientSecretResponse = zod6.object({
|
|
|
32213
32541
|
).default(createRealtimeClientSecretResponseSessionToolChoiceDefault).describe(
|
|
32214
32542
|
"How the model chooses tools. Provide one of the string modes or force a specific\nfunction/MCP tool.\n"
|
|
32215
32543
|
),
|
|
32544
|
+
reasoning: zod6.object({
|
|
32545
|
+
effort: zod6.enum(["minimal", "low", "medium", "high", "xhigh"]).default(createRealtimeClientSecretResponseSessionReasoningEffortDefault).describe(
|
|
32546
|
+
"Constrains effort on reasoning for reasoning-capable Realtime models such as\n`gpt-realtime-2`.\n"
|
|
32547
|
+
)
|
|
32548
|
+
}).optional().describe(
|
|
32549
|
+
"Configuration for reasoning-capable Realtime models such as `gpt-realtime-2`.\n"
|
|
32550
|
+
),
|
|
32216
32551
|
max_output_tokens: zod6.number().or(zod6.enum(["inf"])).optional().describe(
|
|
32217
32552
|
"Maximum number of output tokens for a single assistant response,\ninclusive of tool calls. Provide an integer between 1 and 4096 to\nlimit output tokens, or `inf` for the maximum available tokens for a\ngiven model. Defaults to `inf`.\n"
|
|
32218
32553
|
),
|
|
@@ -32252,7 +32587,7 @@ var createRealtimeClientSecretResponse = zod6.object({
|
|
|
32252
32587
|
).or(
|
|
32253
32588
|
zod6.object({
|
|
32254
32589
|
type: zod6.enum(["input_image"]).describe("The type of the input item. Always `input_image`."),
|
|
32255
|
-
image_url: zod6.string().describe(
|
|
32590
|
+
image_url: zod6.string().url().describe(
|
|
32256
32591
|
"The URL of the image to be sent to the model. A fully qualified URL or base64 encoded image in a data URL."
|
|
32257
32592
|
).or(zod6.null()).optional(),
|
|
32258
32593
|
file_id: zod6.string().describe("The ID of the file to be sent to the model.").or(zod6.null()).optional(),
|
|
@@ -32266,8 +32601,8 @@ var createRealtimeClientSecretResponse = zod6.object({
|
|
|
32266
32601
|
file_id: zod6.string().describe("The ID of the file to be sent to the model.").or(zod6.null()).optional(),
|
|
32267
32602
|
filename: zod6.string().optional().describe("The name of the file to be sent to the model."),
|
|
32268
32603
|
file_data: zod6.string().optional().describe("The content of the file to be sent to the model.\n"),
|
|
32269
|
-
file_url: zod6.string().optional().describe("The URL of the file to be sent to the model."),
|
|
32270
|
-
detail: zod6.enum(["low", "high"])
|
|
32604
|
+
file_url: zod6.string().url().optional().describe("The URL of the file to be sent to the model."),
|
|
32605
|
+
detail: zod6.enum(["low", "high"]).optional()
|
|
32271
32606
|
}).describe("A file input to the model.")
|
|
32272
32607
|
)
|
|
32273
32608
|
).describe(
|
|
@@ -32276,9 +32611,7 @@ var createRealtimeClientSecretResponse = zod6.object({
|
|
|
32276
32611
|
}).describe(
|
|
32277
32612
|
"Reference to a prompt template and its variables.\n[Learn more](/docs/guides/text?api-mode=responses#reusable-prompts).\n"
|
|
32278
32613
|
).or(zod6.null()).optional()
|
|
32279
|
-
}).describe(
|
|
32280
|
-
"A new Realtime session configuration, with an ephemeral key. Default TTL\nfor keys is one minute.\n"
|
|
32281
|
-
),
|
|
32614
|
+
}).describe("A Realtime session configuration object.\n"),
|
|
32282
32615
|
zod6.object({
|
|
32283
32616
|
type: zod6.enum(["transcription"]).describe(
|
|
32284
32617
|
"The type of session. Always `transcription` for transcription sessions.\n"
|
|
@@ -32294,15 +32627,15 @@ var createRealtimeClientSecretResponse = zod6.object({
|
|
|
32294
32627
|
audio: zod6.object({
|
|
32295
32628
|
input: zod6.object({
|
|
32296
32629
|
format: zod6.object({
|
|
32297
|
-
type: zod6.enum(["audio/pcm"]).describe("The audio format. Always `audio/pcm`."),
|
|
32298
|
-
rate: zod6.literal(24e3).describe("The sample rate of the audio. Always `24000`.")
|
|
32630
|
+
type: zod6.enum(["audio/pcm"]).optional().describe("The audio format. Always `audio/pcm`."),
|
|
32631
|
+
rate: zod6.literal(24e3).optional().describe("The sample rate of the audio. Always `24000`.")
|
|
32299
32632
|
}).describe("The PCM audio format. Only a 24kHz sample rate is supported.").or(
|
|
32300
32633
|
zod6.object({
|
|
32301
|
-
type: zod6.enum(["audio/pcmu"]).describe("The audio format. Always `audio/pcmu`.")
|
|
32634
|
+
type: zod6.enum(["audio/pcmu"]).optional().describe("The audio format. Always `audio/pcmu`.")
|
|
32302
32635
|
}).describe("The G.711 \u03BC-law format.")
|
|
32303
32636
|
).or(
|
|
32304
32637
|
zod6.object({
|
|
32305
|
-
type: zod6.enum(["audio/pcma"]).describe("The audio format. Always `audio/pcma`.")
|
|
32638
|
+
type: zod6.enum(["audio/pcma"]).optional().describe("The audio format. Always `audio/pcma`.")
|
|
32306
32639
|
}).describe("The G.711 A-law format.")
|
|
32307
32640
|
).optional(),
|
|
32308
32641
|
transcription: zod6.object({
|
|
@@ -32312,20 +32645,19 @@ var createRealtimeClientSecretResponse = zod6.object({
|
|
|
32312
32645
|
"gpt-4o-mini-transcribe",
|
|
32313
32646
|
"gpt-4o-mini-transcribe-2025-12-15",
|
|
32314
32647
|
"gpt-4o-transcribe",
|
|
32315
|
-
"gpt-4o-transcribe-diarize"
|
|
32648
|
+
"gpt-4o-transcribe-diarize",
|
|
32649
|
+
"gpt-realtime-whisper"
|
|
32316
32650
|
])
|
|
32317
32651
|
).optional().describe(
|
|
32318
|
-
"The model
|
|
32319
|
-
),
|
|
32320
|
-
language: zod6.string().optional().describe(
|
|
32321
|
-
"The language of the input audio. Supplying the input language in\n[ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`) format\nwill improve accuracy and latency.\n"
|
|
32652
|
+
"The model used for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`, `gpt-4o-transcribe-diarize`, and `gpt-realtime-whisper`.\n"
|
|
32322
32653
|
),
|
|
32654
|
+
language: zod6.string().optional().describe("The language of the input audio.\n"),
|
|
32323
32655
|
prompt: zod6.string().optional().describe(
|
|
32324
|
-
|
|
32656
|
+
"The prompt configured for input audio transcription, when present.\n"
|
|
32325
32657
|
)
|
|
32326
32658
|
}).optional(),
|
|
32327
32659
|
noise_reduction: zod6.object({
|
|
32328
|
-
type: zod6.enum(["near_field", "far_field"]).describe(
|
|
32660
|
+
type: zod6.enum(["near_field", "far_field"]).optional().describe(
|
|
32329
32661
|
"Type of noise reduction. `near_field` is for close-talking microphones such as headphones, `far_field` is for far-field microphones such as laptop or conference room microphones.\n"
|
|
32330
32662
|
)
|
|
32331
32663
|
}).optional().describe("Configuration for input audio noise reduction.\n"),
|
|
@@ -32342,8 +32674,10 @@ var createRealtimeClientSecretResponse = zod6.object({
|
|
|
32342
32674
|
silence_duration_ms: zod6.number().optional().describe(
|
|
32343
32675
|
"Duration of silence to detect speech stop (in milliseconds). Defaults\nto 500ms. With shorter values the model will respond more quickly,\nbut may jump in on short pauses from the user.\n"
|
|
32344
32676
|
)
|
|
32345
|
-
}).
|
|
32346
|
-
"Configuration for turn detection. Can be set to `null` to turn off. Server\nVAD means that the model will detect the start and end of speech based on\naudio volume and respond at the end of user speech.\n"
|
|
32677
|
+
}).describe(
|
|
32678
|
+
"Configuration for turn detection. Can be set to `null` to turn off. Server\nVAD means that the model will detect the start and end of speech based on\naudio volume and respond at the end of user speech. For `gpt-realtime-whisper`, this must be `null`; VAD is not supported.\n"
|
|
32679
|
+
).or(zod6.null()).optional().describe(
|
|
32680
|
+
"Configuration for turn detection. For `gpt-realtime-whisper`, this must be `null`; VAD is not supported.\n"
|
|
32347
32681
|
)
|
|
32348
32682
|
}).optional()
|
|
32349
32683
|
}).optional().describe("Configuration for input audio for the session.\n")
|
|
@@ -32483,7 +32817,7 @@ var createRealtimeSessionBody = zod6.object({
|
|
|
32483
32817
|
).or(
|
|
32484
32818
|
zod6.object({
|
|
32485
32819
|
type: zod6.enum(["input_image"]).describe("The type of the input item. Always `input_image`."),
|
|
32486
|
-
image_url: zod6.string().describe(
|
|
32820
|
+
image_url: zod6.string().url().describe(
|
|
32487
32821
|
"The URL of the image to be sent to the model. A fully qualified URL or base64 encoded image in a data URL."
|
|
32488
32822
|
).or(zod6.null()).optional(),
|
|
32489
32823
|
file_id: zod6.string().describe("The ID of the file to be sent to the model.").or(zod6.null()).optional(),
|
|
@@ -32497,7 +32831,7 @@ var createRealtimeSessionBody = zod6.object({
|
|
|
32497
32831
|
file_id: zod6.string().describe("The ID of the file to be sent to the model.").or(zod6.null()).optional(),
|
|
32498
32832
|
filename: zod6.string().optional().describe("The name of the file to be sent to the model."),
|
|
32499
32833
|
file_data: zod6.string().optional().describe("The content of the file to be sent to the model.\n"),
|
|
32500
|
-
file_url: zod6.string().optional().describe("The URL of the file to be sent to the model."),
|
|
32834
|
+
file_url: zod6.string().url().optional().describe("The URL of the file to be sent to the model."),
|
|
32501
32835
|
detail: zod6.enum(["low", "high"]).optional()
|
|
32502
32836
|
}).describe("A file input to the model.")
|
|
32503
32837
|
)
|
|
@@ -32546,17 +32880,14 @@ var createRealtimeSessionResponse = zod6.object({
|
|
|
32546
32880
|
"gpt-4o-mini-transcribe",
|
|
32547
32881
|
"gpt-4o-mini-transcribe-2025-12-15",
|
|
32548
32882
|
"gpt-4o-transcribe",
|
|
32549
|
-
"gpt-4o-transcribe-diarize"
|
|
32883
|
+
"gpt-4o-transcribe-diarize",
|
|
32884
|
+
"gpt-realtime-whisper"
|
|
32550
32885
|
])
|
|
32551
32886
|
).optional().describe(
|
|
32552
|
-
"The model
|
|
32887
|
+
"The model used for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`, `gpt-4o-transcribe-diarize`, and `gpt-realtime-whisper`.\n"
|
|
32553
32888
|
),
|
|
32554
|
-
language: zod6.string().optional().describe(
|
|
32555
|
-
|
|
32556
|
-
),
|
|
32557
|
-
prompt: zod6.string().optional().describe(
|
|
32558
|
-
'An optional text to guide the model\'s style or continue a previous audio\nsegment.\nFor `whisper-1`, the [prompt is a list of keywords](/docs/guides/speech-to-text#prompting).\nFor `gpt-4o-transcribe` models (excluding `gpt-4o-transcribe-diarize`), the prompt is a free text string, for example "expect words related to technology".\n'
|
|
32559
|
-
)
|
|
32889
|
+
language: zod6.string().optional().describe("The language of the input audio.\n"),
|
|
32890
|
+
prompt: zod6.string().optional().describe("The prompt configured for input audio transcription, when present.\n")
|
|
32560
32891
|
}).optional(),
|
|
32561
32892
|
noise_reduction: zod6.object({
|
|
32562
32893
|
type: zod6.enum(["near_field", "far_field"]).optional().describe(
|
|
@@ -32682,16 +33013,20 @@ var createRealtimeTranscriptionSessionBody = zod6.object({
|
|
|
32682
33013
|
"gpt-4o-mini-transcribe",
|
|
32683
33014
|
"gpt-4o-mini-transcribe-2025-12-15",
|
|
32684
33015
|
"gpt-4o-transcribe",
|
|
32685
|
-
"gpt-4o-transcribe-diarize"
|
|
33016
|
+
"gpt-4o-transcribe-diarize",
|
|
33017
|
+
"gpt-realtime-whisper"
|
|
32686
33018
|
])
|
|
32687
33019
|
).optional().describe(
|
|
32688
|
-
"The model to use for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`,
|
|
33020
|
+
"The model to use for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`, `gpt-4o-transcribe-diarize`, and `gpt-realtime-whisper`. Use `gpt-4o-transcribe-diarize` when you need diarization with speaker labels.\n"
|
|
32689
33021
|
),
|
|
32690
33022
|
language: zod6.string().optional().describe(
|
|
32691
33023
|
"The language of the input audio. Supplying the input language in\n[ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`) format\nwill improve accuracy and latency.\n"
|
|
32692
33024
|
),
|
|
32693
33025
|
prompt: zod6.string().optional().describe(
|
|
32694
|
-
'An optional text to guide the model\'s style or continue a previous audio\nsegment.\nFor `whisper-1`, the [prompt is a list of keywords](/docs/guides/speech-to-text#prompting).\nFor `gpt-4o-transcribe` models (excluding `gpt-4o-transcribe-diarize`), the prompt is a free text string, for example "expect words related to technology".\n'
|
|
33026
|
+
'An optional text to guide the model\'s style or continue a previous audio\nsegment.\nFor `whisper-1`, the [prompt is a list of keywords](/docs/guides/speech-to-text#prompting).\nFor `gpt-4o-transcribe` models (excluding `gpt-4o-transcribe-diarize`), the prompt is a free text string, for example "expect words related to technology".\nPrompt is not supported with `gpt-realtime-whisper` in GA Realtime sessions.\n'
|
|
33027
|
+
),
|
|
33028
|
+
delay: zod6.enum(["minimal", "low", "medium", "high", "xhigh"]).optional().describe(
|
|
33029
|
+
"Controls how long the model waits before emitting transcription text.\nHigher values can improve transcription accuracy at the cost of latency.\nOnly supported with `gpt-realtime-whisper` in GA Realtime sessions.\n"
|
|
32695
33030
|
)
|
|
32696
33031
|
}).optional(),
|
|
32697
33032
|
include: zod6.array(zod6.enum(["item.input_audio_transcription.logprobs"])).optional().describe(
|
|
@@ -32720,17 +33055,14 @@ var createRealtimeTranscriptionSessionResponse = zod6.object({
|
|
|
32720
33055
|
"gpt-4o-mini-transcribe",
|
|
32721
33056
|
"gpt-4o-mini-transcribe-2025-12-15",
|
|
32722
33057
|
"gpt-4o-transcribe",
|
|
32723
|
-
"gpt-4o-transcribe-diarize"
|
|
33058
|
+
"gpt-4o-transcribe-diarize",
|
|
33059
|
+
"gpt-realtime-whisper"
|
|
32724
33060
|
])
|
|
32725
33061
|
).optional().describe(
|
|
32726
|
-
"The model
|
|
33062
|
+
"The model used for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`, `gpt-4o-transcribe-diarize`, and `gpt-realtime-whisper`.\n"
|
|
32727
33063
|
),
|
|
32728
|
-
language: zod6.string().optional().describe(
|
|
32729
|
-
|
|
32730
|
-
),
|
|
32731
|
-
prompt: zod6.string().optional().describe(
|
|
32732
|
-
'An optional text to guide the model\'s style or continue a previous audio\nsegment.\nFor `whisper-1`, the [prompt is a list of keywords](/docs/guides/speech-to-text#prompting).\nFor `gpt-4o-transcribe` models (excluding `gpt-4o-transcribe-diarize`), the prompt is a free text string, for example "expect words related to technology".\n'
|
|
32733
|
-
)
|
|
33064
|
+
language: zod6.string().optional().describe("The language of the input audio.\n"),
|
|
33065
|
+
prompt: zod6.string().optional().describe("The prompt configured for input audio transcription, when present.\n")
|
|
32734
33066
|
}).optional(),
|
|
32735
33067
|
turn_detection: zod6.object({
|
|
32736
33068
|
type: zod6.string().optional().describe("Type of turn detection, only `server_vad` is currently supported.\n"),
|
|
@@ -36115,6 +36447,7 @@ __export(sonioxPublicAPI_zod_exports, {
|
|
|
36115
36447
|
createTranscriptionBodyWebhookUrlRegExpOne: () => createTranscriptionBodyWebhookUrlRegExpOne,
|
|
36116
36448
|
deleteFileParams: () => deleteFileParams,
|
|
36117
36449
|
deleteTranscriptionParams: () => deleteTranscriptionParams,
|
|
36450
|
+
getConcurrencyLimitsResponse: () => getConcurrencyLimitsResponse,
|
|
36118
36451
|
getFileParams: () => getFileParams,
|
|
36119
36452
|
getFileResponse: () => getFileResponse,
|
|
36120
36453
|
getFilesCountResponse: () => getFilesCountResponse,
|
|
@@ -36132,6 +36465,12 @@ __export(sonioxPublicAPI_zod_exports, {
|
|
|
36132
36465
|
getTranscriptionsQueryLimitMax: () => getTranscriptionsQueryLimitMax,
|
|
36133
36466
|
getTranscriptionsQueryParams: () => getTranscriptionsQueryParams,
|
|
36134
36467
|
getTranscriptionsResponse: () => getTranscriptionsResponse,
|
|
36468
|
+
getTtsModelsResponse: () => getTtsModelsResponse,
|
|
36469
|
+
getUsageLogsQueryLimitDefault: () => getUsageLogsQueryLimitDefault,
|
|
36470
|
+
getUsageLogsQueryLimitMax: () => getUsageLogsQueryLimitMax,
|
|
36471
|
+
getUsageLogsQueryParams: () => getUsageLogsQueryParams,
|
|
36472
|
+
getUsageLogsQuerySortDefault: () => getUsageLogsQuerySortDefault,
|
|
36473
|
+
getUsageLogsResponse: () => getUsageLogsResponse,
|
|
36135
36474
|
uploadFileBody: () => uploadFileBody,
|
|
36136
36475
|
uploadFileBodyClientReferenceIdMaxOne: () => uploadFileBodyClientReferenceIdMaxOne
|
|
36137
36476
|
});
|
|
@@ -36382,11 +36721,73 @@ var getModelsResponse = zod10.object({
|
|
|
36382
36721
|
})
|
|
36383
36722
|
).describe("List of available models and their attributes.")
|
|
36384
36723
|
});
|
|
36724
|
+
var getTtsModelsResponse = zod10.object({
|
|
36725
|
+
models: zod10.array(
|
|
36726
|
+
zod10.object({
|
|
36727
|
+
id: zod10.string().describe("Unique identifier of the model."),
|
|
36728
|
+
aliased_model_id: zod10.string().or(zod10.null()).describe("If this is an alias, the id of the aliased model."),
|
|
36729
|
+
name: zod10.string().describe("Name of the model."),
|
|
36730
|
+
voices: zod10.array(
|
|
36731
|
+
zod10.object({
|
|
36732
|
+
id: zod10.string().describe("Unique identifier of the voice."),
|
|
36733
|
+
description: zod10.string().describe("Description of the TTS voice."),
|
|
36734
|
+
gender: zod10.enum(["male", "female", "neutral"])
|
|
36735
|
+
})
|
|
36736
|
+
).describe("List of available voices for this model."),
|
|
36737
|
+
languages: zod10.array(
|
|
36738
|
+
zod10.object({
|
|
36739
|
+
code: zod10.string().describe("2-letter language code."),
|
|
36740
|
+
name: zod10.string().describe("Language name.")
|
|
36741
|
+
})
|
|
36742
|
+
).describe("List of languages supported by the model.")
|
|
36743
|
+
})
|
|
36744
|
+
).describe("List of available TTS models and their attributes.")
|
|
36745
|
+
});
|
|
36746
|
+
var getUsageLogsQueryLimitDefault = 1e3;
|
|
36747
|
+
var getUsageLogsQueryLimitMax = 1e3;
|
|
36748
|
+
var getUsageLogsQuerySortDefault = "end_time_asc";
|
|
36749
|
+
var getUsageLogsQueryParams = zod10.object({
|
|
36750
|
+
start_time: zod10.string().describe("Start of the time window (inclusive). Filters by request end time."),
|
|
36751
|
+
end_time: zod10.string().describe("End of the time window (exclusive). Filters by request end time."),
|
|
36752
|
+
limit: zod10.number().min(1).max(getUsageLogsQueryLimitMax).default(getUsageLogsQueryLimitDefault).describe("Maximum number of usage log entries to return."),
|
|
36753
|
+
sort: zod10.enum(["end_time_asc", "end_time_desc"]).default(getUsageLogsQuerySortDefault).describe(
|
|
36754
|
+
"Sort order by end_time.Use `end_time_desc` to get the most recent entries first. When paginating, pass the same `sort` value alongside the cursor."
|
|
36755
|
+
),
|
|
36756
|
+
cursor: zod10.string().or(zod10.null()).optional().describe("Pagination cursor for the next page of results.")
|
|
36757
|
+
});
|
|
36758
|
+
var getUsageLogsResponse = zod10.object({
|
|
36759
|
+
usage_logs: zod10.array(
|
|
36760
|
+
zod10.object({
|
|
36761
|
+
uuid: zod10.string().uuid().describe("Unique identifier of the request."),
|
|
36762
|
+
request_scope: zod10.string().describe("Scope of the request (api / playground)."),
|
|
36763
|
+
client_reference_id: zod10.string().describe("Client reference ID supplied on the original request. Empty string if none."),
|
|
36764
|
+
model: zod10.string().describe("Model identifier."),
|
|
36765
|
+
start_time: zod10.string().datetime({}).describe("When the request started."),
|
|
36766
|
+
end_time: zod10.string().datetime({}).describe("When the request ended."),
|
|
36767
|
+
input_text_tokens: zod10.number(),
|
|
36768
|
+
input_audio_tokens: zod10.number(),
|
|
36769
|
+
input_audio_duration_ms: zod10.number(),
|
|
36770
|
+
output_text_tokens: zod10.number(),
|
|
36771
|
+
output_audio_tokens: zod10.number(),
|
|
36772
|
+
output_audio_duration_ms: zod10.number(),
|
|
36773
|
+
cost_usd: zod10.string(),
|
|
36774
|
+
input_cost_usd: zod10.string(),
|
|
36775
|
+
input_text_cost_usd: zod10.string(),
|
|
36776
|
+
input_audio_cost_usd: zod10.string(),
|
|
36777
|
+
output_cost_usd: zod10.string(),
|
|
36778
|
+
output_text_cost_usd: zod10.string(),
|
|
36779
|
+
output_audio_cost_usd: zod10.string()
|
|
36780
|
+
})
|
|
36781
|
+
).describe("Per-request usage log entries ordered by end_time, uuid (per `sort`)."),
|
|
36782
|
+
next_page_cursor: zod10.string().or(zod10.null()).optional().describe(
|
|
36783
|
+
"A pagination token that references the next page of results. When more data is available, this field contains a value to pass in the cursor parameter of a subsequent request. When null, no additional results are available."
|
|
36784
|
+
)
|
|
36785
|
+
});
|
|
36385
36786
|
var createTemporaryApiKeyBodyExpiresInSecondsMax = 3600;
|
|
36386
36787
|
var createTemporaryApiKeyBodyClientReferenceIdMaxOne = 256;
|
|
36387
36788
|
var createTemporaryApiKeyBodyMaxSessionDurationSecondsMaxOne = 18e3;
|
|
36388
36789
|
var createTemporaryApiKeyBody = zod10.object({
|
|
36389
|
-
usage_type: zod10.enum(["transcribe_websocket"]),
|
|
36790
|
+
usage_type: zod10.enum(["transcribe_websocket", "tts_rt"]),
|
|
36390
36791
|
expires_in_seconds: zod10.number().min(1).max(createTemporaryApiKeyBodyExpiresInSecondsMax).describe("Duration in seconds until the temporary API key expires."),
|
|
36391
36792
|
client_reference_id: zod10.string().max(createTemporaryApiKeyBodyClientReferenceIdMaxOne).or(zod10.null()).optional().describe("Optional tracking identifier string. Does not need to be unique."),
|
|
36392
36793
|
single_use: zod10.boolean().or(zod10.null()).optional().describe("If true, the temporary API key can be used only once."),
|
|
@@ -36394,6 +36795,28 @@ var createTemporaryApiKeyBody = zod10.object({
|
|
|
36394
36795
|
"Maximum WebSocket connection duration in seconds. If exceeded, the connection will be dropped. If not set, no limit is applied."
|
|
36395
36796
|
)
|
|
36396
36797
|
});
|
|
36798
|
+
var getConcurrencyLimitsResponse = zod10.object({
|
|
36799
|
+
project: zod10.object({
|
|
36800
|
+
current: zod10.object({
|
|
36801
|
+
transcribe_concurrent: zod10.number(),
|
|
36802
|
+
tts_concurrent: zod10.number()
|
|
36803
|
+
}).describe("Live counts read from Redis"),
|
|
36804
|
+
limits: zod10.object({
|
|
36805
|
+
transcribe_concurrent: zod10.number().or(zod10.null()),
|
|
36806
|
+
tts_concurrent: zod10.number().or(zod10.null())
|
|
36807
|
+
}).describe("Configured limits")
|
|
36808
|
+
}),
|
|
36809
|
+
organization: zod10.object({
|
|
36810
|
+
current: zod10.object({
|
|
36811
|
+
transcribe_concurrent: zod10.number(),
|
|
36812
|
+
tts_concurrent: zod10.number()
|
|
36813
|
+
}).describe("Live counts read from Redis"),
|
|
36814
|
+
limits: zod10.object({
|
|
36815
|
+
transcribe_concurrent: zod10.number().or(zod10.null()),
|
|
36816
|
+
tts_concurrent: zod10.number().or(zod10.null())
|
|
36817
|
+
}).describe("Configured limits")
|
|
36818
|
+
})
|
|
36819
|
+
});
|
|
36397
36820
|
|
|
36398
36821
|
// src/generated/soniox/streaming-types.zod.ts
|
|
36399
36822
|
var streaming_types_zod_exports = {};
|
|
@@ -36478,10 +36901,10 @@ var sonioxStructuredContextSchema = zod11.object({
|
|
|
36478
36901
|
var sonioxContextSchema = zod11.union([sonioxStructuredContextSchema, zod11.string()]);
|
|
36479
36902
|
var sonioxRealtimeModelSchema = zod11.enum([
|
|
36480
36903
|
"stt-rt-v4",
|
|
36481
|
-
"stt-rt-v3",
|
|
36482
36904
|
"stt-rt-preview",
|
|
36483
36905
|
"stt-rt-v3-preview",
|
|
36484
|
-
"stt-rt-preview-v2"
|
|
36906
|
+
"stt-rt-preview-v2",
|
|
36907
|
+
"stt-rt-v3"
|
|
36485
36908
|
]);
|
|
36486
36909
|
var streamingTranscriberParams3 = zod11.object({
|
|
36487
36910
|
model: sonioxRealtimeModelSchema,
|
|
@@ -36489,12 +36912,16 @@ var streamingTranscriberParams3 = zod11.object({
|
|
|
36489
36912
|
sampleRate: zod11.number().optional(),
|
|
36490
36913
|
numChannels: zod11.number().optional(),
|
|
36491
36914
|
languageHints: zod11.array(zod11.string()).optional(),
|
|
36915
|
+
languageHintsStrict: zod11.boolean().optional(),
|
|
36492
36916
|
context: sonioxContextSchema.optional(),
|
|
36493
36917
|
enableSpeakerDiarization: zod11.boolean().optional(),
|
|
36494
36918
|
enableLanguageIdentification: zod11.boolean().optional(),
|
|
36495
36919
|
enableEndpointDetection: zod11.boolean().optional(),
|
|
36920
|
+
maxEndpointDelayMs: zod11.number().optional(),
|
|
36496
36921
|
translation: sonioxTranslationConfigSchema.optional(),
|
|
36497
|
-
clientReferenceId: zod11.string().optional()
|
|
36922
|
+
clientReferenceId: zod11.string().optional(),
|
|
36923
|
+
keepaliveIntervalMs: zod11.number().optional(),
|
|
36924
|
+
connectTimeoutMs: zod11.number().optional()
|
|
36498
36925
|
});
|
|
36499
36926
|
var sonioxTranslationStatusSchema = zod11.enum(["original", "translation", "none"]);
|
|
36500
36927
|
var sonioxTokenSchema = zod11.object({
|
|
@@ -37086,6 +37513,7 @@ __export(schema_exports5, {
|
|
|
37086
37513
|
V1ListenPostParametersCallbackMethod: () => V1ListenPostParametersCallbackMethod,
|
|
37087
37514
|
V1ListenPostParametersCustomIntentMode: () => V1ListenPostParametersCustomIntentMode,
|
|
37088
37515
|
V1ListenPostParametersCustomTopicMode: () => V1ListenPostParametersCustomTopicMode,
|
|
37516
|
+
V1ListenPostParametersDiarizeModel: () => V1ListenPostParametersDiarizeModel,
|
|
37089
37517
|
V1ListenPostParametersEncoding: () => V1ListenPostParametersEncoding,
|
|
37090
37518
|
V1ListenPostParametersModel0: () => V1ListenPostParametersModel0,
|
|
37091
37519
|
V1ListenPostParametersRedactSchemaOneOf1Items: () => V1ListenPostParametersRedactSchemaOneOf1Items,
|
|
@@ -37124,6 +37552,13 @@ __export(schema_exports5, {
|
|
|
37124
37552
|
V1SpeakPostParametersSampleRate4: () => V1SpeakPostParametersSampleRate4
|
|
37125
37553
|
});
|
|
37126
37554
|
|
|
37555
|
+
// src/generated/deepgram/schema/v1ListenPostParametersDiarizeModel.ts
|
|
37556
|
+
var V1ListenPostParametersDiarizeModel = {
|
|
37557
|
+
latest: "latest",
|
|
37558
|
+
v1: "v1",
|
|
37559
|
+
v2: "v2"
|
|
37560
|
+
};
|
|
37561
|
+
|
|
37127
37562
|
// src/generated/deepgram/schema/v1ListenPostParametersModel0.ts
|
|
37128
37563
|
var V1ListenPostParametersModel0 = {
|
|
37129
37564
|
"nova-3": "nova-3",
|
|
@@ -37340,6 +37775,7 @@ var V1SpeakPostParametersSampleRate = {
|
|
|
37340
37775
|
var schema_exports6 = {};
|
|
37341
37776
|
__export(schema_exports6, {
|
|
37342
37777
|
AudioResponseFormat: () => AudioResponseFormat,
|
|
37778
|
+
AudioTranscriptionDelay: () => AudioTranscriptionDelay,
|
|
37343
37779
|
CreateSpeechRequestResponseFormat: () => CreateSpeechRequestResponseFormat,
|
|
37344
37780
|
CreateSpeechRequestStreamFormat: () => CreateSpeechRequestStreamFormat,
|
|
37345
37781
|
CreateTranscriptionRequestTimestampGranularitiesItem: () => CreateTranscriptionRequestTimestampGranularitiesItem,
|
|
@@ -37359,12 +37795,14 @@ __export(schema_exports6, {
|
|
|
37359
37795
|
RealtimeAudioFormatsAnyOfType: () => RealtimeAudioFormatsAnyOfType,
|
|
37360
37796
|
RealtimeCreateClientSecretRequestExpiresAfterAnchor: () => RealtimeCreateClientSecretRequestExpiresAfterAnchor,
|
|
37361
37797
|
RealtimeFunctionToolType: () => RealtimeFunctionToolType,
|
|
37798
|
+
RealtimeReasoningEffort: () => RealtimeReasoningEffort,
|
|
37362
37799
|
RealtimeSessionCreateRequestGAIncludeItem: () => RealtimeSessionCreateRequestGAIncludeItem,
|
|
37363
37800
|
RealtimeSessionCreateRequestGAOutputModalitiesItem: () => RealtimeSessionCreateRequestGAOutputModalitiesItem,
|
|
37364
37801
|
RealtimeSessionCreateRequestGAType: () => RealtimeSessionCreateRequestGAType,
|
|
37365
37802
|
RealtimeSessionCreateRequestModalitiesItem: () => RealtimeSessionCreateRequestModalitiesItem,
|
|
37366
37803
|
RealtimeSessionCreateRequestToolsItemType: () => RealtimeSessionCreateRequestToolsItemType,
|
|
37367
37804
|
RealtimeSessionCreateResponseGAIncludeItem: () => RealtimeSessionCreateResponseGAIncludeItem,
|
|
37805
|
+
RealtimeSessionCreateResponseGAObject: () => RealtimeSessionCreateResponseGAObject,
|
|
37368
37806
|
RealtimeSessionCreateResponseGAOutputModalitiesItem: () => RealtimeSessionCreateResponseGAOutputModalitiesItem,
|
|
37369
37807
|
RealtimeSessionCreateResponseGAType: () => RealtimeSessionCreateResponseGAType,
|
|
37370
37808
|
RealtimeSessionCreateResponseIncludeItem: () => RealtimeSessionCreateResponseIncludeItem,
|
|
@@ -37395,6 +37833,15 @@ __export(schema_exports6, {
|
|
|
37395
37833
|
VoiceResourceObject: () => VoiceResourceObject
|
|
37396
37834
|
});
|
|
37397
37835
|
|
|
37836
|
+
// src/generated/openai/schema/audioTranscriptionDelay.ts
|
|
37837
|
+
var AudioTranscriptionDelay = {
|
|
37838
|
+
minimal: "minimal",
|
|
37839
|
+
low: "low",
|
|
37840
|
+
medium: "medium",
|
|
37841
|
+
high: "high",
|
|
37842
|
+
xhigh: "xhigh"
|
|
37843
|
+
};
|
|
37844
|
+
|
|
37398
37845
|
// src/generated/openai/schema/createSpeechRequestResponseFormat.ts
|
|
37399
37846
|
var CreateSpeechRequestResponseFormat = {
|
|
37400
37847
|
mp3: "mp3",
|
|
@@ -37507,6 +37954,15 @@ var RealtimeFunctionToolType = {
|
|
|
37507
37954
|
function: "function"
|
|
37508
37955
|
};
|
|
37509
37956
|
|
|
37957
|
+
// src/generated/openai/schema/realtimeReasoningEffort.ts
|
|
37958
|
+
var RealtimeReasoningEffort = {
|
|
37959
|
+
minimal: "minimal",
|
|
37960
|
+
low: "low",
|
|
37961
|
+
medium: "medium",
|
|
37962
|
+
high: "high",
|
|
37963
|
+
xhigh: "xhigh"
|
|
37964
|
+
};
|
|
37965
|
+
|
|
37510
37966
|
// src/generated/openai/schema/realtimeSessionCreateRequestGAIncludeItem.ts
|
|
37511
37967
|
var RealtimeSessionCreateRequestGAIncludeItem = {
|
|
37512
37968
|
iteminput_audio_transcriptionlogprobs: "item.input_audio_transcription.logprobs"
|
|
@@ -37539,6 +37995,11 @@ var RealtimeSessionCreateResponseGAIncludeItem = {
|
|
|
37539
37995
|
iteminput_audio_transcriptionlogprobs: "item.input_audio_transcription.logprobs"
|
|
37540
37996
|
};
|
|
37541
37997
|
|
|
37998
|
+
// src/generated/openai/schema/realtimeSessionCreateResponseGAObject.ts
|
|
37999
|
+
var RealtimeSessionCreateResponseGAObject = {
|
|
38000
|
+
realtimesession: "realtime.session"
|
|
38001
|
+
};
|
|
38002
|
+
|
|
37542
38003
|
// src/generated/openai/schema/realtimeSessionCreateResponseGAOutputModalitiesItem.ts
|
|
37543
38004
|
var RealtimeSessionCreateResponseGAOutputModalitiesItem = {
|
|
37544
38005
|
text: "text",
|
|
@@ -37683,6 +38144,7 @@ __export(schema_exports7, {
|
|
|
37683
38144
|
AutoChaptersResultErrorType: () => AutoChaptersResultErrorType,
|
|
37684
38145
|
ErrorResponseError: () => ErrorResponseError,
|
|
37685
38146
|
GetJobsJobidAlignmentTags: () => GetJobsJobidAlignmentTags,
|
|
38147
|
+
GetJobsJobidObjectUrlsUrlForItem: () => GetJobsJobidObjectUrlsUrlForItem,
|
|
37686
38148
|
GetJobsJobidTranscriptFormat: () => GetJobsJobidTranscriptFormat,
|
|
37687
38149
|
JobDetailsStatus: () => JobDetailsStatus,
|
|
37688
38150
|
JobMode: () => JobMode,
|
|
@@ -37752,6 +38214,13 @@ var GetJobsJobidAlignmentTags = {
|
|
|
37752
38214
|
one_per_line: "one_per_line"
|
|
37753
38215
|
};
|
|
37754
38216
|
|
|
38217
|
+
// src/generated/speechmatics/schema/getJobsJobidObjectUrlsUrlForItem.ts
|
|
38218
|
+
var GetJobsJobidObjectUrlsUrlForItem = {
|
|
38219
|
+
data: "data",
|
|
38220
|
+
audio_mp3: "audio_mp3",
|
|
38221
|
+
transcript: "transcript"
|
|
38222
|
+
};
|
|
38223
|
+
|
|
37755
38224
|
// src/generated/speechmatics/schema/getJobsJobidTranscriptFormat.ts
|
|
37756
38225
|
var GetJobsJobidTranscriptFormat = {
|
|
37757
38226
|
"json-v2": "json-v2",
|
|
@@ -37868,6 +38337,19 @@ var WrittenFormRecognitionResultType = {
|
|
|
37868
38337
|
word: "word"
|
|
37869
38338
|
};
|
|
37870
38339
|
|
|
38340
|
+
// src/generated/soniox/sdk-types.ts
|
|
38341
|
+
var sdk_types_exports = {};
|
|
38342
|
+
__export(sdk_types_exports, {
|
|
38343
|
+
RealtimeSttSession: () => RealtimeSttSession,
|
|
38344
|
+
SonioxFetchHttpClient: () => FetchHttpClient,
|
|
38345
|
+
SonioxNodeClient: () => SonioxNodeClient
|
|
38346
|
+
});
|
|
38347
|
+
import {
|
|
38348
|
+
FetchHttpClient,
|
|
38349
|
+
RealtimeSttSession,
|
|
38350
|
+
SonioxNodeClient
|
|
38351
|
+
} from "@soniox/node";
|
|
38352
|
+
|
|
37871
38353
|
// src/generated/elevenlabs/schema/index.ts
|
|
37872
38354
|
var schema_exports8 = {};
|
|
37873
38355
|
__export(schema_exports8, {
|
|
@@ -37945,6 +38427,10 @@ __export(speechmaticsASRRESTAPI_zod_exports, {
|
|
|
37945
38427
|
deleteJobsJobidParams: () => deleteJobsJobidParams,
|
|
37946
38428
|
deleteJobsJobidQueryParams: () => deleteJobsJobidQueryParams,
|
|
37947
38429
|
deleteJobsJobidResponse: () => deleteJobsJobidResponse,
|
|
38430
|
+
deleteJobsJobidResponseJobConfigSummarizationConfigContentTypeDefault: () => deleteJobsJobidResponseJobConfigSummarizationConfigContentTypeDefault,
|
|
38431
|
+
deleteJobsJobidResponseJobConfigSummarizationConfigSummaryLengthDefault: () => deleteJobsJobidResponseJobConfigSummarizationConfigSummaryLengthDefault,
|
|
38432
|
+
deleteJobsJobidResponseJobConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMax: () => deleteJobsJobidResponseJobConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMax,
|
|
38433
|
+
deleteJobsJobidResponseJobConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMin: () => deleteJobsJobidResponseJobConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMin,
|
|
37948
38434
|
deleteJobsJobidResponseJobConfigTranscriptionConfigChannelDiarizationLabelsItemRegExp: () => deleteJobsJobidResponseJobConfigTranscriptionConfigChannelDiarizationLabelsItemRegExp,
|
|
37949
38435
|
deleteJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesPermittedMarksItemRegExp: () => deleteJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesPermittedMarksItemRegExp,
|
|
37950
38436
|
deleteJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesSensitivityMax: () => deleteJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesSensitivityMax,
|
|
@@ -37960,8 +38446,15 @@ __export(speechmaticsASRRESTAPI_zod_exports, {
|
|
|
37960
38446
|
getJobsJobidDataResponse: () => getJobsJobidDataResponse,
|
|
37961
38447
|
getJobsJobidLogParams: () => getJobsJobidLogParams,
|
|
37962
38448
|
getJobsJobidLogResponse: () => getJobsJobidLogResponse,
|
|
38449
|
+
getJobsJobidObjectUrlsParams: () => getJobsJobidObjectUrlsParams,
|
|
38450
|
+
getJobsJobidObjectUrlsQueryParams: () => getJobsJobidObjectUrlsQueryParams,
|
|
38451
|
+
getJobsJobidObjectUrlsResponse: () => getJobsJobidObjectUrlsResponse,
|
|
37963
38452
|
getJobsJobidParams: () => getJobsJobidParams,
|
|
37964
38453
|
getJobsJobidResponse: () => getJobsJobidResponse,
|
|
38454
|
+
getJobsJobidResponseJobConfigSummarizationConfigContentTypeDefault: () => getJobsJobidResponseJobConfigSummarizationConfigContentTypeDefault,
|
|
38455
|
+
getJobsJobidResponseJobConfigSummarizationConfigSummaryLengthDefault: () => getJobsJobidResponseJobConfigSummarizationConfigSummaryLengthDefault,
|
|
38456
|
+
getJobsJobidResponseJobConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMax: () => getJobsJobidResponseJobConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMax,
|
|
38457
|
+
getJobsJobidResponseJobConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMin: () => getJobsJobidResponseJobConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMin,
|
|
37965
38458
|
getJobsJobidResponseJobConfigTranscriptionConfigChannelDiarizationLabelsItemRegExp: () => getJobsJobidResponseJobConfigTranscriptionConfigChannelDiarizationLabelsItemRegExp,
|
|
37966
38459
|
getJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesPermittedMarksItemRegExp: () => getJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesPermittedMarksItemRegExp,
|
|
37967
38460
|
getJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesSensitivityMax: () => getJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesSensitivityMax,
|
|
@@ -37976,6 +38469,8 @@ __export(speechmaticsASRRESTAPI_zod_exports, {
|
|
|
37976
38469
|
getJobsJobidTranscriptQueryParams: () => getJobsJobidTranscriptQueryParams,
|
|
37977
38470
|
getJobsJobidTranscriptResponse: () => getJobsJobidTranscriptResponse,
|
|
37978
38471
|
getJobsJobidTranscriptResponseJobDurationMin: () => getJobsJobidTranscriptResponseJobDurationMin,
|
|
38472
|
+
getJobsJobidTranscriptResponseMetadataTranscriptionConfigAudioFilteringConfigVolumeThresholdMax: () => getJobsJobidTranscriptResponseMetadataTranscriptionConfigAudioFilteringConfigVolumeThresholdMax,
|
|
38473
|
+
getJobsJobidTranscriptResponseMetadataTranscriptionConfigAudioFilteringConfigVolumeThresholdMin: () => getJobsJobidTranscriptResponseMetadataTranscriptionConfigAudioFilteringConfigVolumeThresholdMin,
|
|
37979
38474
|
getJobsJobidTranscriptResponseMetadataTranscriptionConfigChannelDiarizationLabelsItemRegExp: () => getJobsJobidTranscriptResponseMetadataTranscriptionConfigChannelDiarizationLabelsItemRegExp,
|
|
37980
38475
|
getJobsJobidTranscriptResponseMetadataTranscriptionConfigPunctuationOverridesPermittedMarksItemRegExp: () => getJobsJobidTranscriptResponseMetadataTranscriptionConfigPunctuationOverridesPermittedMarksItemRegExp,
|
|
37981
38476
|
getJobsJobidTranscriptResponseMetadataTranscriptionConfigPunctuationOverridesSensitivityMax: () => getJobsJobidTranscriptResponseMetadataTranscriptionConfigPunctuationOverridesSensitivityMax,
|
|
@@ -37987,6 +38482,10 @@ __export(speechmaticsASRRESTAPI_zod_exports, {
|
|
|
37987
38482
|
getJobsQueryLimitMax: () => getJobsQueryLimitMax,
|
|
37988
38483
|
getJobsQueryParams: () => getJobsQueryParams,
|
|
37989
38484
|
getJobsResponse: () => getJobsResponse,
|
|
38485
|
+
getJobsResponseJobsItemConfigSummarizationConfigContentTypeDefault: () => getJobsResponseJobsItemConfigSummarizationConfigContentTypeDefault,
|
|
38486
|
+
getJobsResponseJobsItemConfigSummarizationConfigSummaryLengthDefault: () => getJobsResponseJobsItemConfigSummarizationConfigSummaryLengthDefault,
|
|
38487
|
+
getJobsResponseJobsItemConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMax: () => getJobsResponseJobsItemConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMax,
|
|
38488
|
+
getJobsResponseJobsItemConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMin: () => getJobsResponseJobsItemConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMin,
|
|
37990
38489
|
getJobsResponseJobsItemConfigTranscriptionConfigChannelDiarizationLabelsItemRegExp: () => getJobsResponseJobsItemConfigTranscriptionConfigChannelDiarizationLabelsItemRegExp,
|
|
37991
38490
|
getJobsResponseJobsItemConfigTranscriptionConfigPunctuationOverridesPermittedMarksItemRegExp: () => getJobsResponseJobsItemConfigTranscriptionConfigPunctuationOverridesPermittedMarksItemRegExp,
|
|
37992
38491
|
getJobsResponseJobsItemConfigTranscriptionConfigPunctuationOverridesSensitivityMax: () => getJobsResponseJobsItemConfigTranscriptionConfigPunctuationOverridesSensitivityMax,
|
|
@@ -37997,12 +38496,18 @@ __export(speechmaticsASRRESTAPI_zod_exports, {
|
|
|
37997
38496
|
getJobsResponseJobsItemDurationMin: () => getJobsResponseJobsItemDurationMin,
|
|
37998
38497
|
getUsageQueryParams: () => getUsageQueryParams,
|
|
37999
38498
|
getUsageResponse: () => getUsageResponse,
|
|
38000
|
-
postJobsBody: () => postJobsBody
|
|
38499
|
+
postJobsBody: () => postJobsBody,
|
|
38500
|
+
postJobsHeader: () => postJobsHeader
|
|
38001
38501
|
});
|
|
38002
38502
|
import { z as zod12 } from "zod";
|
|
38503
|
+
var postJobsHeader = zod12.object({
|
|
38504
|
+
"X-SM-Processing-Data": zod12.string().optional().describe(
|
|
38505
|
+
'**Note**: Only available for on-prem\nJSON dictionary of processing settings for the job worker. Currently supports `parallel_engines` (integer), which controls the number of engines the worker can use in parallel for this job, and `user_id` (string), which is the user id for this job. Example: `{"parallel_engines": 4}`'
|
|
38506
|
+
)
|
|
38507
|
+
});
|
|
38003
38508
|
var postJobsBody = zod12.object({
|
|
38004
38509
|
config: zod12.string().describe(
|
|
38005
|
-
"JSON containing a `JobConfig` model indicating the type and parameters for the recognition job."
|
|
38510
|
+
"JSON containing a [`JobConfig`](/speech-to-text/batch/input#jobconfig-schema) model indicating the type and parameters for the recognition job."
|
|
38006
38511
|
),
|
|
38007
38512
|
data_file: zod12.instanceof(File).optional().describe(
|
|
38008
38513
|
"The data file to be processed. Alternatively the data file can be fetched from a url specified in `JobConfig`."
|
|
@@ -38024,9 +38529,13 @@ var getJobsResponseJobsItemConfigTranscriptionConfigPunctuationOverridesSensitiv
|
|
|
38024
38529
|
var getJobsResponseJobsItemConfigTranscriptionConfigPunctuationOverridesSensitivityMax = 1;
|
|
38025
38530
|
var getJobsResponseJobsItemConfigTranscriptionConfigPunctuationOverridesPermittedMarksItemRegExp = /^(.|all)$/;
|
|
38026
38531
|
var getJobsResponseJobsItemConfigTranscriptionConfigChannelDiarizationLabelsItemRegExp = /^[A-Za-z0-9._]+$/;
|
|
38532
|
+
var getJobsResponseJobsItemConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMin = 0;
|
|
38533
|
+
var getJobsResponseJobsItemConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMax = 100;
|
|
38027
38534
|
var getJobsResponseJobsItemConfigTranscriptionConfigSpeakerDiarizationConfigSpeakerSensitivityMin = 0;
|
|
38028
38535
|
var getJobsResponseJobsItemConfigTranscriptionConfigSpeakerDiarizationConfigSpeakerSensitivityMax = 1;
|
|
38029
38536
|
var getJobsResponseJobsItemConfigTranslationConfigTargetLanguagesMax = 5;
|
|
38537
|
+
var getJobsResponseJobsItemConfigSummarizationConfigContentTypeDefault = "auto";
|
|
38538
|
+
var getJobsResponseJobsItemConfigSummarizationConfigSummaryLengthDefault = "brief";
|
|
38030
38539
|
var getJobsResponse = zod12.object({
|
|
38031
38540
|
jobs: zod12.array(
|
|
38032
38541
|
zod12.object({
|
|
@@ -38106,19 +38615,30 @@ var getJobsResponse = zod12.object({
|
|
|
38106
38615
|
max_delay_mode: zod12.enum(["fixed", "flexible"]).optional().describe(
|
|
38107
38616
|
"Whether or not to enable flexible endpointing and allow the entity to continue to be spoken."
|
|
38108
38617
|
),
|
|
38618
|
+
audio_filtering_config: zod12.object({
|
|
38619
|
+
volume_threshold: zod12.number().min(
|
|
38620
|
+
getJobsResponseJobsItemConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMin
|
|
38621
|
+
).max(
|
|
38622
|
+
getJobsResponseJobsItemConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMax
|
|
38623
|
+
).optional().describe(
|
|
38624
|
+
"Controls the lower limit of audio volume at which speech and audio events will be transcribed. If the volume limit is very low, then most sound will be passed to the speech recognition engine. Higher numbers will cut out increasing amounts of sound."
|
|
38625
|
+
)
|
|
38626
|
+
}).optional().describe("Configuration for limiting the transcription of quiet audio."),
|
|
38109
38627
|
transcript_filtering_config: zod12.object({
|
|
38110
38628
|
remove_disfluencies: zod12.boolean().optional().describe(
|
|
38111
|
-
"If true, words
|
|
38629
|
+
"If true, words identified as disfluencies (e.g., 'um', 'uh') will be removed from the transcript. If false (default), they are tagged in the transcript as 'disfluency'."
|
|
38112
38630
|
),
|
|
38113
38631
|
replacements: zod12.array(
|
|
38114
38632
|
zod12.object({
|
|
38115
|
-
from: zod12.string(),
|
|
38116
|
-
to: zod12.string()
|
|
38633
|
+
from: zod12.string().describe("The text or pattern identified to be replaced."),
|
|
38634
|
+
to: zod12.string().describe(
|
|
38635
|
+
"The corrected or formatted string to appear in the transcript."
|
|
38636
|
+
)
|
|
38117
38637
|
})
|
|
38118
38638
|
).optional().describe(
|
|
38119
|
-
|
|
38639
|
+
'An array of objects defining custom replacements. Each replacement contains a pair of strings: the text to find ("from:") and the text to replace it with ("to:").'
|
|
38120
38640
|
)
|
|
38121
|
-
}).optional().describe("Configuration for applying filtering to the transcription"),
|
|
38641
|
+
}).optional().describe("Configuration for applying filtering to the transcription."),
|
|
38122
38642
|
speaker_diarization_config: zod12.object({
|
|
38123
38643
|
prefer_current_speaker: zod12.boolean().optional().describe(
|
|
38124
38644
|
'If true, the algorithm will prefer to stay with the current active speaker if it is a close enough match, even if other speakers may be closer. This is useful for cases where we can flip incorrectly between similar speakers during a single speaker section."'
|
|
@@ -38129,6 +38649,19 @@ var getJobsResponse = zod12.object({
|
|
|
38129
38649
|
getJobsResponseJobsItemConfigTranscriptionConfigSpeakerDiarizationConfigSpeakerSensitivityMax
|
|
38130
38650
|
).optional().describe(
|
|
38131
38651
|
"Controls how sensitive the algorithm is in terms of keeping similar speakers separate, as opposed to combining them into a single speaker. Higher values will typically lead to more speakers, as the degree of difference between speakers in order to allow them to remain distinct will be lower. A lower value for this parameter will conversely guide the algorithm towards being less sensitive in terms of retaining similar speakers, and as such may lead to fewer speakers overall. The default is 0.5."
|
|
38652
|
+
),
|
|
38653
|
+
get_speakers: zod12.boolean().optional().describe(
|
|
38654
|
+
"If true, speaker identifiers will be returned at the end of transcript."
|
|
38655
|
+
),
|
|
38656
|
+
speakers: zod12.array(
|
|
38657
|
+
zod12.object({
|
|
38658
|
+
label: zod12.string().min(1).describe(
|
|
38659
|
+
"Speaker label, which must not match the format used internally (e.g. S1, S2, etc)"
|
|
38660
|
+
),
|
|
38661
|
+
speaker_identifiers: zod12.array(zod12.string().describe("Speaker identifiers.")).min(1)
|
|
38662
|
+
})
|
|
38663
|
+
).optional().describe(
|
|
38664
|
+
"Use this option to provide speaker labels linked to their speaker identifiers. When passed, the transcription system will tag spoken words in the transcript with the provided speaker labels whenever any of the specified speakers is detected in the audio. A maximum of 50 speakers identifiers across all speakers can be provided."
|
|
38132
38665
|
)
|
|
38133
38666
|
}).optional().describe("Configuration for speaker diarization")
|
|
38134
38667
|
}).optional(),
|
|
@@ -38186,10 +38719,14 @@ var getJobsResponse = zod12.object({
|
|
|
38186
38719
|
default_language: zod12.string().optional()
|
|
38187
38720
|
}).optional(),
|
|
38188
38721
|
summarization_config: zod12.object({
|
|
38189
|
-
content_type: zod12.enum(["auto", "informative", "conversational"]).
|
|
38190
|
-
|
|
38722
|
+
content_type: zod12.enum(["auto", "informative", "conversational"]).default(getJobsResponseJobsItemConfigSummarizationConfigContentTypeDefault).describe(
|
|
38723
|
+
"Choose from three options:\n- `conversational` - Best suited for dialogues involving multiple participants, such as calls, meetings or discussions. It focuses on summarizing key points of the conversation.\n- `informative` - Recommended for more structured information delivered by one or more people, making it ideal for videos, podcasts, lectures, and presentations.\n- `auto` - Automatically selects the most appropriate content type based on an analysis of the transcript.\n"
|
|
38724
|
+
),
|
|
38725
|
+
summary_length: zod12.enum(["brief", "detailed"]).default(getJobsResponseJobsItemConfigSummarizationConfigSummaryLengthDefault).describe(
|
|
38726
|
+
"Determines the depth of the summary:\n- `brief` - Provides a succinct summary, condensing the content into just a few sentences.\n- `detailed` - Provide a longer, structured summary. For _conversational_ content, it includes key topics and a summary of the entire conversation. For _informative_ content, it logically divides the audio into sections and provides a summary for each."
|
|
38727
|
+
),
|
|
38191
38728
|
summary_type: zod12.enum(["paragraphs", "bullets"]).optional()
|
|
38192
|
-
}).optional(),
|
|
38729
|
+
}).optional().describe("Configuration options for summarization."),
|
|
38193
38730
|
sentiment_analysis_config: zod12.object({}).optional(),
|
|
38194
38731
|
topic_detection_config: zod12.object({
|
|
38195
38732
|
topics: zod12.array(zod12.string()).optional()
|
|
@@ -38211,7 +38748,7 @@ var getJobsResponse = zod12.object({
|
|
|
38211
38748
|
"Optional list of errors that have occurred in user interaction, for example: audio could not be fetched or notification could not be sent."
|
|
38212
38749
|
)
|
|
38213
38750
|
}).describe(
|
|
38214
|
-
"Document describing a job. JobConfig will be present in JobDetails returned for GET jobs
|
|
38751
|
+
"Document describing a job. JobConfig will be present in JobDetails returned for GET jobs/{id} request in SaaS and in Batch Appliance, but it will not be present in JobDetails returned as item in RetrieveJobsResponse in case of Batch Appliance."
|
|
38215
38752
|
)
|
|
38216
38753
|
)
|
|
38217
38754
|
});
|
|
@@ -38223,9 +38760,13 @@ var getJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesSensitiv
|
|
|
38223
38760
|
var getJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesSensitivityMax = 1;
|
|
38224
38761
|
var getJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesPermittedMarksItemRegExp = /^(.|all)$/;
|
|
38225
38762
|
var getJobsJobidResponseJobConfigTranscriptionConfigChannelDiarizationLabelsItemRegExp = /^[A-Za-z0-9._]+$/;
|
|
38763
|
+
var getJobsJobidResponseJobConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMin = 0;
|
|
38764
|
+
var getJobsJobidResponseJobConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMax = 100;
|
|
38226
38765
|
var getJobsJobidResponseJobConfigTranscriptionConfigSpeakerDiarizationConfigSpeakerSensitivityMin = 0;
|
|
38227
38766
|
var getJobsJobidResponseJobConfigTranscriptionConfigSpeakerDiarizationConfigSpeakerSensitivityMax = 1;
|
|
38228
38767
|
var getJobsJobidResponseJobConfigTranslationConfigTargetLanguagesMax = 5;
|
|
38768
|
+
var getJobsJobidResponseJobConfigSummarizationConfigContentTypeDefault = "auto";
|
|
38769
|
+
var getJobsJobidResponseJobConfigSummarizationConfigSummaryLengthDefault = "brief";
|
|
38229
38770
|
var getJobsJobidResponse = zod12.object({
|
|
38230
38771
|
job: zod12.object({
|
|
38231
38772
|
created_at: zod12.string().datetime({}).describe("The UTC date time the job was created."),
|
|
@@ -38302,19 +38843,30 @@ var getJobsJobidResponse = zod12.object({
|
|
|
38302
38843
|
max_delay_mode: zod12.enum(["fixed", "flexible"]).optional().describe(
|
|
38303
38844
|
"Whether or not to enable flexible endpointing and allow the entity to continue to be spoken."
|
|
38304
38845
|
),
|
|
38846
|
+
audio_filtering_config: zod12.object({
|
|
38847
|
+
volume_threshold: zod12.number().min(
|
|
38848
|
+
getJobsJobidResponseJobConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMin
|
|
38849
|
+
).max(
|
|
38850
|
+
getJobsJobidResponseJobConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMax
|
|
38851
|
+
).optional().describe(
|
|
38852
|
+
"Controls the lower limit of audio volume at which speech and audio events will be transcribed. If the volume limit is very low, then most sound will be passed to the speech recognition engine. Higher numbers will cut out increasing amounts of sound."
|
|
38853
|
+
)
|
|
38854
|
+
}).optional().describe("Configuration for limiting the transcription of quiet audio."),
|
|
38305
38855
|
transcript_filtering_config: zod12.object({
|
|
38306
38856
|
remove_disfluencies: zod12.boolean().optional().describe(
|
|
38307
|
-
"If true, words
|
|
38857
|
+
"If true, words identified as disfluencies (e.g., 'um', 'uh') will be removed from the transcript. If false (default), they are tagged in the transcript as 'disfluency'."
|
|
38308
38858
|
),
|
|
38309
38859
|
replacements: zod12.array(
|
|
38310
38860
|
zod12.object({
|
|
38311
|
-
from: zod12.string(),
|
|
38312
|
-
to: zod12.string()
|
|
38861
|
+
from: zod12.string().describe("The text or pattern identified to be replaced."),
|
|
38862
|
+
to: zod12.string().describe(
|
|
38863
|
+
"The corrected or formatted string to appear in the transcript."
|
|
38864
|
+
)
|
|
38313
38865
|
})
|
|
38314
38866
|
).optional().describe(
|
|
38315
|
-
|
|
38867
|
+
'An array of objects defining custom replacements. Each replacement contains a pair of strings: the text to find ("from:") and the text to replace it with ("to:").'
|
|
38316
38868
|
)
|
|
38317
|
-
}).optional().describe("Configuration for applying filtering to the transcription"),
|
|
38869
|
+
}).optional().describe("Configuration for applying filtering to the transcription."),
|
|
38318
38870
|
speaker_diarization_config: zod12.object({
|
|
38319
38871
|
prefer_current_speaker: zod12.boolean().optional().describe(
|
|
38320
38872
|
'If true, the algorithm will prefer to stay with the current active speaker if it is a close enough match, even if other speakers may be closer. This is useful for cases where we can flip incorrectly between similar speakers during a single speaker section."'
|
|
@@ -38325,6 +38877,19 @@ var getJobsJobidResponse = zod12.object({
|
|
|
38325
38877
|
getJobsJobidResponseJobConfigTranscriptionConfigSpeakerDiarizationConfigSpeakerSensitivityMax
|
|
38326
38878
|
).optional().describe(
|
|
38327
38879
|
"Controls how sensitive the algorithm is in terms of keeping similar speakers separate, as opposed to combining them into a single speaker. Higher values will typically lead to more speakers, as the degree of difference between speakers in order to allow them to remain distinct will be lower. A lower value for this parameter will conversely guide the algorithm towards being less sensitive in terms of retaining similar speakers, and as such may lead to fewer speakers overall. The default is 0.5."
|
|
38880
|
+
),
|
|
38881
|
+
get_speakers: zod12.boolean().optional().describe(
|
|
38882
|
+
"If true, speaker identifiers will be returned at the end of transcript."
|
|
38883
|
+
),
|
|
38884
|
+
speakers: zod12.array(
|
|
38885
|
+
zod12.object({
|
|
38886
|
+
label: zod12.string().min(1).describe(
|
|
38887
|
+
"Speaker label, which must not match the format used internally (e.g. S1, S2, etc)"
|
|
38888
|
+
),
|
|
38889
|
+
speaker_identifiers: zod12.array(zod12.string().describe("Speaker identifiers.")).min(1)
|
|
38890
|
+
})
|
|
38891
|
+
).optional().describe(
|
|
38892
|
+
"Use this option to provide speaker labels linked to their speaker identifiers. When passed, the transcription system will tag spoken words in the transcript with the provided speaker labels whenever any of the specified speakers is detected in the audio. A maximum of 50 speakers identifiers across all speakers can be provided."
|
|
38328
38893
|
)
|
|
38329
38894
|
}).optional().describe("Configuration for speaker diarization")
|
|
38330
38895
|
}).optional(),
|
|
@@ -38380,10 +38945,14 @@ var getJobsJobidResponse = zod12.object({
|
|
|
38380
38945
|
default_language: zod12.string().optional()
|
|
38381
38946
|
}).optional(),
|
|
38382
38947
|
summarization_config: zod12.object({
|
|
38383
|
-
content_type: zod12.enum(["auto", "informative", "conversational"]).
|
|
38384
|
-
|
|
38948
|
+
content_type: zod12.enum(["auto", "informative", "conversational"]).default(getJobsJobidResponseJobConfigSummarizationConfigContentTypeDefault).describe(
|
|
38949
|
+
"Choose from three options:\n- `conversational` - Best suited for dialogues involving multiple participants, such as calls, meetings or discussions. It focuses on summarizing key points of the conversation.\n- `informative` - Recommended for more structured information delivered by one or more people, making it ideal for videos, podcasts, lectures, and presentations.\n- `auto` - Automatically selects the most appropriate content type based on an analysis of the transcript.\n"
|
|
38950
|
+
),
|
|
38951
|
+
summary_length: zod12.enum(["brief", "detailed"]).default(getJobsJobidResponseJobConfigSummarizationConfigSummaryLengthDefault).describe(
|
|
38952
|
+
"Determines the depth of the summary:\n- `brief` - Provides a succinct summary, condensing the content into just a few sentences.\n- `detailed` - Provide a longer, structured summary. For _conversational_ content, it includes key topics and a summary of the entire conversation. For _informative_ content, it logically divides the audio into sections and provides a summary for each."
|
|
38953
|
+
),
|
|
38385
38954
|
summary_type: zod12.enum(["paragraphs", "bullets"]).optional()
|
|
38386
|
-
}).optional(),
|
|
38955
|
+
}).optional().describe("Configuration options for summarization."),
|
|
38387
38956
|
sentiment_analysis_config: zod12.object({}).optional(),
|
|
38388
38957
|
topic_detection_config: zod12.object({
|
|
38389
38958
|
topics: zod12.array(zod12.string()).optional()
|
|
@@ -38405,7 +38974,7 @@ var getJobsJobidResponse = zod12.object({
|
|
|
38405
38974
|
"Optional list of errors that have occurred in user interaction, for example: audio could not be fetched or notification could not be sent."
|
|
38406
38975
|
)
|
|
38407
38976
|
}).describe(
|
|
38408
|
-
"Document describing a job. JobConfig will be present in JobDetails returned for GET jobs
|
|
38977
|
+
"Document describing a job. JobConfig will be present in JobDetails returned for GET jobs/{id} request in SaaS and in Batch Appliance, but it will not be present in JobDetails returned as item in RetrieveJobsResponse in case of Batch Appliance."
|
|
38409
38978
|
)
|
|
38410
38979
|
});
|
|
38411
38980
|
var deleteJobsJobidParams = zod12.object({
|
|
@@ -38421,9 +38990,13 @@ var deleteJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesSensi
|
|
|
38421
38990
|
var deleteJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesSensitivityMax = 1;
|
|
38422
38991
|
var deleteJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesPermittedMarksItemRegExp = /^(.|all)$/;
|
|
38423
38992
|
var deleteJobsJobidResponseJobConfigTranscriptionConfigChannelDiarizationLabelsItemRegExp = /^[A-Za-z0-9._]+$/;
|
|
38993
|
+
var deleteJobsJobidResponseJobConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMin = 0;
|
|
38994
|
+
var deleteJobsJobidResponseJobConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMax = 100;
|
|
38424
38995
|
var deleteJobsJobidResponseJobConfigTranscriptionConfigSpeakerDiarizationConfigSpeakerSensitivityMin = 0;
|
|
38425
38996
|
var deleteJobsJobidResponseJobConfigTranscriptionConfigSpeakerDiarizationConfigSpeakerSensitivityMax = 1;
|
|
38426
38997
|
var deleteJobsJobidResponseJobConfigTranslationConfigTargetLanguagesMax = 5;
|
|
38998
|
+
var deleteJobsJobidResponseJobConfigSummarizationConfigContentTypeDefault = "auto";
|
|
38999
|
+
var deleteJobsJobidResponseJobConfigSummarizationConfigSummaryLengthDefault = "brief";
|
|
38427
39000
|
var deleteJobsJobidResponse = zod12.object({
|
|
38428
39001
|
job: zod12.object({
|
|
38429
39002
|
created_at: zod12.string().datetime({}).describe("The UTC date time the job was created."),
|
|
@@ -38500,19 +39073,30 @@ var deleteJobsJobidResponse = zod12.object({
|
|
|
38500
39073
|
max_delay_mode: zod12.enum(["fixed", "flexible"]).optional().describe(
|
|
38501
39074
|
"Whether or not to enable flexible endpointing and allow the entity to continue to be spoken."
|
|
38502
39075
|
),
|
|
39076
|
+
audio_filtering_config: zod12.object({
|
|
39077
|
+
volume_threshold: zod12.number().min(
|
|
39078
|
+
deleteJobsJobidResponseJobConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMin
|
|
39079
|
+
).max(
|
|
39080
|
+
deleteJobsJobidResponseJobConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMax
|
|
39081
|
+
).optional().describe(
|
|
39082
|
+
"Controls the lower limit of audio volume at which speech and audio events will be transcribed. If the volume limit is very low, then most sound will be passed to the speech recognition engine. Higher numbers will cut out increasing amounts of sound."
|
|
39083
|
+
)
|
|
39084
|
+
}).optional().describe("Configuration for limiting the transcription of quiet audio."),
|
|
38503
39085
|
transcript_filtering_config: zod12.object({
|
|
38504
39086
|
remove_disfluencies: zod12.boolean().optional().describe(
|
|
38505
|
-
"If true, words
|
|
39087
|
+
"If true, words identified as disfluencies (e.g., 'um', 'uh') will be removed from the transcript. If false (default), they are tagged in the transcript as 'disfluency'."
|
|
38506
39088
|
),
|
|
38507
39089
|
replacements: zod12.array(
|
|
38508
39090
|
zod12.object({
|
|
38509
|
-
from: zod12.string(),
|
|
38510
|
-
to: zod12.string()
|
|
39091
|
+
from: zod12.string().describe("The text or pattern identified to be replaced."),
|
|
39092
|
+
to: zod12.string().describe(
|
|
39093
|
+
"The corrected or formatted string to appear in the transcript."
|
|
39094
|
+
)
|
|
38511
39095
|
})
|
|
38512
39096
|
).optional().describe(
|
|
38513
|
-
|
|
39097
|
+
'An array of objects defining custom replacements. Each replacement contains a pair of strings: the text to find ("from:") and the text to replace it with ("to:").'
|
|
38514
39098
|
)
|
|
38515
|
-
}).optional().describe("Configuration for applying filtering to the transcription"),
|
|
39099
|
+
}).optional().describe("Configuration for applying filtering to the transcription."),
|
|
38516
39100
|
speaker_diarization_config: zod12.object({
|
|
38517
39101
|
prefer_current_speaker: zod12.boolean().optional().describe(
|
|
38518
39102
|
'If true, the algorithm will prefer to stay with the current active speaker if it is a close enough match, even if other speakers may be closer. This is useful for cases where we can flip incorrectly between similar speakers during a single speaker section."'
|
|
@@ -38523,6 +39107,19 @@ var deleteJobsJobidResponse = zod12.object({
|
|
|
38523
39107
|
deleteJobsJobidResponseJobConfigTranscriptionConfigSpeakerDiarizationConfigSpeakerSensitivityMax
|
|
38524
39108
|
).optional().describe(
|
|
38525
39109
|
"Controls how sensitive the algorithm is in terms of keeping similar speakers separate, as opposed to combining them into a single speaker. Higher values will typically lead to more speakers, as the degree of difference between speakers in order to allow them to remain distinct will be lower. A lower value for this parameter will conversely guide the algorithm towards being less sensitive in terms of retaining similar speakers, and as such may lead to fewer speakers overall. The default is 0.5."
|
|
39110
|
+
),
|
|
39111
|
+
get_speakers: zod12.boolean().optional().describe(
|
|
39112
|
+
"If true, speaker identifiers will be returned at the end of transcript."
|
|
39113
|
+
),
|
|
39114
|
+
speakers: zod12.array(
|
|
39115
|
+
zod12.object({
|
|
39116
|
+
label: zod12.string().min(1).describe(
|
|
39117
|
+
"Speaker label, which must not match the format used internally (e.g. S1, S2, etc)"
|
|
39118
|
+
),
|
|
39119
|
+
speaker_identifiers: zod12.array(zod12.string().describe("Speaker identifiers.")).min(1)
|
|
39120
|
+
})
|
|
39121
|
+
).optional().describe(
|
|
39122
|
+
"Use this option to provide speaker labels linked to their speaker identifiers. When passed, the transcription system will tag spoken words in the transcript with the provided speaker labels whenever any of the specified speakers is detected in the audio. A maximum of 50 speakers identifiers across all speakers can be provided."
|
|
38526
39123
|
)
|
|
38527
39124
|
}).optional().describe("Configuration for speaker diarization")
|
|
38528
39125
|
}).optional(),
|
|
@@ -38578,10 +39175,14 @@ var deleteJobsJobidResponse = zod12.object({
|
|
|
38578
39175
|
default_language: zod12.string().optional()
|
|
38579
39176
|
}).optional(),
|
|
38580
39177
|
summarization_config: zod12.object({
|
|
38581
|
-
content_type: zod12.enum(["auto", "informative", "conversational"]).
|
|
38582
|
-
|
|
39178
|
+
content_type: zod12.enum(["auto", "informative", "conversational"]).default(deleteJobsJobidResponseJobConfigSummarizationConfigContentTypeDefault).describe(
|
|
39179
|
+
"Choose from three options:\n- `conversational` - Best suited for dialogues involving multiple participants, such as calls, meetings or discussions. It focuses on summarizing key points of the conversation.\n- `informative` - Recommended for more structured information delivered by one or more people, making it ideal for videos, podcasts, lectures, and presentations.\n- `auto` - Automatically selects the most appropriate content type based on an analysis of the transcript.\n"
|
|
39180
|
+
),
|
|
39181
|
+
summary_length: zod12.enum(["brief", "detailed"]).default(deleteJobsJobidResponseJobConfigSummarizationConfigSummaryLengthDefault).describe(
|
|
39182
|
+
"Determines the depth of the summary:\n- `brief` - Provides a succinct summary, condensing the content into just a few sentences.\n- `detailed` - Provide a longer, structured summary. For _conversational_ content, it includes key topics and a summary of the entire conversation. For _informative_ content, it logically divides the audio into sections and provides a summary for each."
|
|
39183
|
+
),
|
|
38583
39184
|
summary_type: zod12.enum(["paragraphs", "bullets"]).optional()
|
|
38584
|
-
}).optional(),
|
|
39185
|
+
}).optional().describe("Configuration options for summarization."),
|
|
38585
39186
|
sentiment_analysis_config: zod12.object({}).optional(),
|
|
38586
39187
|
topic_detection_config: zod12.object({
|
|
38587
39188
|
topics: zod12.array(zod12.string()).optional()
|
|
@@ -38603,7 +39204,7 @@ var deleteJobsJobidResponse = zod12.object({
|
|
|
38603
39204
|
"Optional list of errors that have occurred in user interaction, for example: audio could not be fetched or notification could not be sent."
|
|
38604
39205
|
)
|
|
38605
39206
|
}).describe(
|
|
38606
|
-
"Document describing a job. JobConfig will be present in JobDetails returned for GET jobs
|
|
39207
|
+
"Document describing a job. JobConfig will be present in JobDetails returned for GET jobs/{id} request in SaaS and in Batch Appliance, but it will not be present in JobDetails returned as item in RetrieveJobsResponse in case of Batch Appliance."
|
|
38607
39208
|
)
|
|
38608
39209
|
});
|
|
38609
39210
|
var getJobsJobidDataParams = zod12.object({
|
|
@@ -38625,6 +39226,8 @@ var getJobsJobidTranscriptResponseMetadataTranscriptionConfigPunctuationOverride
|
|
|
38625
39226
|
var getJobsJobidTranscriptResponseMetadataTranscriptionConfigPunctuationOverridesSensitivityMax = 1;
|
|
38626
39227
|
var getJobsJobidTranscriptResponseMetadataTranscriptionConfigPunctuationOverridesPermittedMarksItemRegExp = /^(.|all)$/;
|
|
38627
39228
|
var getJobsJobidTranscriptResponseMetadataTranscriptionConfigChannelDiarizationLabelsItemRegExp = /^[A-Za-z0-9._]+$/;
|
|
39229
|
+
var getJobsJobidTranscriptResponseMetadataTranscriptionConfigAudioFilteringConfigVolumeThresholdMin = 0;
|
|
39230
|
+
var getJobsJobidTranscriptResponseMetadataTranscriptionConfigAudioFilteringConfigVolumeThresholdMax = 100;
|
|
38628
39231
|
var getJobsJobidTranscriptResponseMetadataTranscriptionConfigSpeakerDiarizationConfigSpeakerSensitivityMin = 0;
|
|
38629
39232
|
var getJobsJobidTranscriptResponseMetadataTranscriptionConfigSpeakerDiarizationConfigSpeakerSensitivityMax = 1;
|
|
38630
39233
|
var getJobsJobidTranscriptResponseResultsItemVolumeMin = 0;
|
|
@@ -38696,19 +39299,28 @@ var getJobsJobidTranscriptResponse = zod12.object({
|
|
|
38696
39299
|
max_delay_mode: zod12.enum(["fixed", "flexible"]).optional().describe(
|
|
38697
39300
|
"Whether or not to enable flexible endpointing and allow the entity to continue to be spoken."
|
|
38698
39301
|
),
|
|
39302
|
+
audio_filtering_config: zod12.object({
|
|
39303
|
+
volume_threshold: zod12.number().min(
|
|
39304
|
+
getJobsJobidTranscriptResponseMetadataTranscriptionConfigAudioFilteringConfigVolumeThresholdMin
|
|
39305
|
+
).max(
|
|
39306
|
+
getJobsJobidTranscriptResponseMetadataTranscriptionConfigAudioFilteringConfigVolumeThresholdMax
|
|
39307
|
+
).optional().describe(
|
|
39308
|
+
"Controls the lower limit of audio volume at which speech and audio events will be transcribed. If the volume limit is very low, then most sound will be passed to the speech recognition engine. Higher numbers will cut out increasing amounts of sound."
|
|
39309
|
+
)
|
|
39310
|
+
}).optional().describe("Configuration for limiting the transcription of quiet audio."),
|
|
38699
39311
|
transcript_filtering_config: zod12.object({
|
|
38700
39312
|
remove_disfluencies: zod12.boolean().optional().describe(
|
|
38701
|
-
"If true, words
|
|
39313
|
+
"If true, words identified as disfluencies (e.g., 'um', 'uh') will be removed from the transcript. If false (default), they are tagged in the transcript as 'disfluency'."
|
|
38702
39314
|
),
|
|
38703
39315
|
replacements: zod12.array(
|
|
38704
39316
|
zod12.object({
|
|
38705
|
-
from: zod12.string(),
|
|
38706
|
-
to: zod12.string()
|
|
39317
|
+
from: zod12.string().describe("The text or pattern identified to be replaced."),
|
|
39318
|
+
to: zod12.string().describe("The corrected or formatted string to appear in the transcript.")
|
|
38707
39319
|
})
|
|
38708
39320
|
).optional().describe(
|
|
38709
|
-
|
|
39321
|
+
'An array of objects defining custom replacements. Each replacement contains a pair of strings: the text to find ("from:") and the text to replace it with ("to:").'
|
|
38710
39322
|
)
|
|
38711
|
-
}).optional().describe("Configuration for applying filtering to the transcription"),
|
|
39323
|
+
}).optional().describe("Configuration for applying filtering to the transcription."),
|
|
38712
39324
|
speaker_diarization_config: zod12.object({
|
|
38713
39325
|
prefer_current_speaker: zod12.boolean().optional().describe(
|
|
38714
39326
|
'If true, the algorithm will prefer to stay with the current active speaker if it is a close enough match, even if other speakers may be closer. This is useful for cases where we can flip incorrectly between similar speakers during a single speaker section."'
|
|
@@ -38719,9 +39331,23 @@ var getJobsJobidTranscriptResponse = zod12.object({
|
|
|
38719
39331
|
getJobsJobidTranscriptResponseMetadataTranscriptionConfigSpeakerDiarizationConfigSpeakerSensitivityMax
|
|
38720
39332
|
).optional().describe(
|
|
38721
39333
|
"Controls how sensitive the algorithm is in terms of keeping similar speakers separate, as opposed to combining them into a single speaker. Higher values will typically lead to more speakers, as the degree of difference between speakers in order to allow them to remain distinct will be lower. A lower value for this parameter will conversely guide the algorithm towards being less sensitive in terms of retaining similar speakers, and as such may lead to fewer speakers overall. The default is 0.5."
|
|
39334
|
+
),
|
|
39335
|
+
get_speakers: zod12.boolean().optional().describe(
|
|
39336
|
+
"If true, speaker identifiers will be returned at the end of transcript."
|
|
39337
|
+
),
|
|
39338
|
+
speakers: zod12.array(
|
|
39339
|
+
zod12.object({
|
|
39340
|
+
label: zod12.string().min(1).describe(
|
|
39341
|
+
"Speaker label, which must not match the format used internally (e.g. S1, S2, etc)"
|
|
39342
|
+
),
|
|
39343
|
+
speaker_identifiers: zod12.array(zod12.string().describe("Speaker identifiers.")).min(1)
|
|
39344
|
+
})
|
|
39345
|
+
).optional().describe(
|
|
39346
|
+
"Use this option to provide speaker labels linked to their speaker identifiers. When passed, the transcription system will tag spoken words in the transcript with the provided speaker labels whenever any of the specified speakers is detected in the audio. A maximum of 50 speakers identifiers across all speakers can be provided."
|
|
38722
39347
|
)
|
|
38723
39348
|
}).optional().describe("Configuration for speaker diarization")
|
|
38724
39349
|
}).optional(),
|
|
39350
|
+
orchestrator_version: zod12.string().optional().describe("The engine version used to generate transcription output."),
|
|
38725
39351
|
translation_errors: zod12.array(
|
|
38726
39352
|
zod12.object({
|
|
38727
39353
|
type: zod12.enum(["translation_failed", "unsupported_translation_pair"]).optional(),
|
|
@@ -38799,10 +39425,7 @@ var getJobsJobidTranscriptResponse = zod12.object({
|
|
|
38799
39425
|
"OTHER"
|
|
38800
39426
|
]).optional(),
|
|
38801
39427
|
message: zod12.string().optional()
|
|
38802
|
-
}).optional()
|
|
38803
|
-
orchestrator_version: zod12.string().optional().describe(
|
|
38804
|
-
"Orchestrator version in PEP 440 Format or set to 'version_not_found' as default."
|
|
38805
|
-
)
|
|
39428
|
+
}).optional()
|
|
38806
39429
|
}).describe(
|
|
38807
39430
|
"Summary information about the output from an ASR job, comprising the job type and configuration parameters used when generating the output."
|
|
38808
39431
|
),
|
|
@@ -38885,6 +39508,12 @@ var getJobsJobidTranscriptResponse = zod12.object({
|
|
|
38885
39508
|
"An ASR job output item. The primary item types are `word` and `punctuation`. Other item types may be present, for example to provide semantic information of different forms."
|
|
38886
39509
|
)
|
|
38887
39510
|
),
|
|
39511
|
+
speakers: zod12.array(
|
|
39512
|
+
zod12.object({
|
|
39513
|
+
label: zod12.string().min(1).describe("Speaker label."),
|
|
39514
|
+
speaker_identifiers: zod12.array(zod12.string().describe("Speaker identifiers.")).min(1)
|
|
39515
|
+
})
|
|
39516
|
+
).optional().describe("List of unique speaker identifiers detected in the transcript."),
|
|
38888
39517
|
translations: zod12.record(
|
|
38889
39518
|
zod12.string(),
|
|
38890
39519
|
zod12.array(
|
|
@@ -38906,13 +39535,23 @@ var getJobsJobidTranscriptResponse = zod12.object({
|
|
|
38906
39535
|
sentiment_analysis: zod12.object({
|
|
38907
39536
|
segments: zod12.array(
|
|
38908
39537
|
zod12.object({
|
|
38909
|
-
text: zod12.string().optional(),
|
|
38910
|
-
|
|
38911
|
-
|
|
38912
|
-
|
|
38913
|
-
|
|
38914
|
-
|
|
38915
|
-
|
|
39538
|
+
text: zod12.string().optional().describe("Represents the transcript of the analysed segment"),
|
|
39539
|
+
sentiment: zod12.string().optional().describe(
|
|
39540
|
+
"The assigned sentiment to the segment, which can be positive, neutral or negative"
|
|
39541
|
+
),
|
|
39542
|
+
start_time: zod12.number().optional().describe(
|
|
39543
|
+
"The timestamp corresponding to the beginning of the transcription segment"
|
|
39544
|
+
),
|
|
39545
|
+
end_time: zod12.number().optional().describe(
|
|
39546
|
+
"The timestamp corresponding to the end of the transcription segment"
|
|
39547
|
+
),
|
|
39548
|
+
speaker: zod12.string().optional().describe(
|
|
39549
|
+
"The speaker label for the segment, if speaker diarization is enabled"
|
|
39550
|
+
),
|
|
39551
|
+
channel: zod12.string().optional().describe(
|
|
39552
|
+
"The channel label for the segment, if channel diarization is enabled"
|
|
39553
|
+
),
|
|
39554
|
+
confidence: zod12.number().optional().describe("A confidence score in the range of 0-1")
|
|
38916
39555
|
}).describe("Represents a segment of text and its associated sentiment.")
|
|
38917
39556
|
).optional().describe(
|
|
38918
39557
|
"An array of objects that represent a segment of text and its associated sentiment."
|
|
@@ -38971,10 +39610,10 @@ var getJobsJobidTranscriptResponse = zod12.object({
|
|
|
38971
39610
|
}).optional().describe("Main object that holds topic detection results."),
|
|
38972
39611
|
chapters: zod12.array(
|
|
38973
39612
|
zod12.object({
|
|
38974
|
-
title: zod12.string().optional(),
|
|
38975
|
-
summary: zod12.string().optional(),
|
|
38976
|
-
start_time: zod12.number().optional(),
|
|
38977
|
-
end_time: zod12.number().optional()
|
|
39613
|
+
title: zod12.string().optional().describe("The auto-generated title for the chapter"),
|
|
39614
|
+
summary: zod12.string().optional().describe("An auto-generated paragraph-style, short summary of the chapter"),
|
|
39615
|
+
start_time: zod12.number().optional().describe("The start time of the chapter in the audio file"),
|
|
39616
|
+
end_time: zod12.number().optional().describe("The end time of the chapter in the audio file")
|
|
38978
39617
|
})
|
|
38979
39618
|
).optional().describe("An array of objects that represent summarized chapters of the transcript"),
|
|
38980
39619
|
audio_events: zod12.array(
|
|
@@ -39019,6 +39658,18 @@ var getJobsJobidLogParams = zod12.object({
|
|
|
39019
39658
|
jobid: zod12.string().describe("ID of the job.")
|
|
39020
39659
|
});
|
|
39021
39660
|
var getJobsJobidLogResponse = zod12.instanceof(File);
|
|
39661
|
+
var getJobsJobidObjectUrlsParams = zod12.object({
|
|
39662
|
+
jobid: zod12.string().describe("ID of the job.")
|
|
39663
|
+
});
|
|
39664
|
+
var getJobsJobidObjectUrlsQueryParams = zod12.object({
|
|
39665
|
+
ttl: zod12.number().describe("Time to live in seconds for the signed URLs"),
|
|
39666
|
+
url_for: zod12.array(zod12.enum(["data", "audio_mp3", "transcript"]))
|
|
39667
|
+
});
|
|
39668
|
+
var getJobsJobidObjectUrlsResponse = zod12.object({
|
|
39669
|
+
data: zod12.string().optional(),
|
|
39670
|
+
audio_mp3: zod12.string().optional(),
|
|
39671
|
+
transcript: zod12.string().optional()
|
|
39672
|
+
});
|
|
39022
39673
|
var getUsageQueryParams = zod12.object({
|
|
39023
39674
|
since: zod12.string().date().optional().describe(
|
|
39024
39675
|
"Include usage after the given date (inclusive). This is a [ISO-8601](https://en.wikipedia.org/wiki/ISO_8601) calendar date format: `YYYY-MM-DD`."
|
|
@@ -39152,7 +39803,7 @@ var speechToTextBodyKeytermsDefault = [];
|
|
|
39152
39803
|
var speechToTextBody = zod13.object({
|
|
39153
39804
|
model_id: zod13.enum(["scribe_v1", "scribe_v2"]).describe("The ID of the model to use for transcription."),
|
|
39154
39805
|
file: zod13.instanceof(File).or(zod13.null()).optional().describe(
|
|
39155
|
-
"The file to transcribe (100ms minimum audio length). All major audio and video formats are supported. Exactly one of the file or cloud_storage_url parameters must be provided. The file size must be less than
|
|
39806
|
+
"The file to transcribe (100ms minimum audio length). All major audio and video formats are supported. Exactly one of the file or cloud_storage_url parameters must be provided. The file size must be less than 5.0GB."
|
|
39156
39807
|
),
|
|
39157
39808
|
language_code: zod13.string().or(zod13.null()).optional().describe(
|
|
39158
39809
|
"An ISO-639-1 or ISO-639-3 language_code corresponding to the language of the audio file. Can sometimes improve transcription performance if known beforehand. Defaults to null, in this case the language is predicted automatically."
|
|
@@ -39230,7 +39881,7 @@ var speechToTextBody = zod13.object({
|
|
|
39230
39881
|
"The format of input audio. Options are 'pcm_s16le_16' or 'other' For `pcm_s16le_16`, the input audio must be 16-bit PCM at a 16kHz sample rate, single channel (mono), and little-endian byte order. Latency will be lower than with passing an encoded waveform."
|
|
39231
39882
|
),
|
|
39232
39883
|
cloud_storage_url: zod13.string().or(zod13.null()).optional().describe(
|
|
39233
|
-
"The HTTPS URL of the file to transcribe. Exactly one of the file or cloud_storage_url parameters must be provided. The file must be accessible via HTTPS and the file size must be less than 2GB. Any valid HTTPS URL is accepted, including URLs from cloud storage providers (AWS S3, Google Cloud Storage, Cloudflare R2, etc.), CDNs, or any other HTTPS source. URLs can be pre-signed or include authentication tokens in query parameters."
|
|
39884
|
+
"[Deprecated] This parameter is deprecated and will be removed in the future. Use 'source_url' instead.The HTTPS URL of the file to transcribe. Exactly one of the file or cloud_storage_url parameters must be provided. The file must be accessible via HTTPS and the file size must be less than 2GB. Any valid HTTPS URL is accepted, including URLs from cloud storage providers (AWS S3, Google Cloud Storage, Cloudflare R2, etc.), CDNs, or any other HTTPS source. URLs can be pre-signed or include authentication tokens in query parameters."
|
|
39234
39885
|
),
|
|
39235
39886
|
source_url: zod13.string().or(zod13.null()).optional().describe(
|
|
39236
39887
|
"The URL of an audio or video file to transcribe. Supports hosted video or audio files, YouTube video URLs, TikTok video URLs, and other video hosting services."
|
|
@@ -39269,7 +39920,7 @@ var speechToTextBody = zod13.object({
|
|
|
39269
39920
|
"How to format redacted entities. 'redacted' replaces with {REDACTED}, 'entity_type' replaces with {ENTITY_TYPE}, 'enumerated_entity_type' replaces with {ENTITY_TYPE_N} where N enumerates each occurrence. Only used when entity_redaction is set."
|
|
39270
39921
|
),
|
|
39271
39922
|
keyterms: zod13.array(zod13.string()).default(speechToTextBodyKeytermsDefault).describe(
|
|
39272
|
-
'A list of keyterms to bias the transcription towards. The keyterms are words or phrases you want the model to recognise more accurately. The number of keyterms cannot exceed 1000. The length of each keyterm must be less than 50 characters. Keyterms can contain at most 5 words (after normalisation). For example ["hello", "world", "technical term"]. Usage of this parameter will incur an additional 20% surcharge on the base transcription cost. When more than 100 keyterms are provided, a minimum billable duration of 20 seconds applies per request.'
|
|
39923
|
+
'A list of keyterms to bias the transcription towards. The keyterms are words or phrases you want the model to recognise more accurately. The number of keyterms cannot exceed 1000. The length of each keyterm must be less than 50 characters. Keyterms can contain at most 5 words (after normalisation). For example ["hello", "world", "technical term"]. The following characters are not supported: `<`, `>`, `{`, `}`, `[`, `]`, `\\`. Usage of this parameter will incur an additional 20% surcharge on the base transcription cost. When more than 100 keyterms are provided, a minimum billable duration of 20 seconds applies per request.'
|
|
39273
39924
|
)
|
|
39274
39925
|
});
|
|
39275
39926
|
var speechToTextResponse = zod13.object({
|
|
@@ -39634,6 +40285,7 @@ export {
|
|
|
39634
40285
|
SonioxModels,
|
|
39635
40286
|
SonioxRealtimeModel,
|
|
39636
40287
|
SonioxRegion,
|
|
40288
|
+
sdk_types_exports as SonioxSDK,
|
|
39637
40289
|
SonioxStreamingSchema,
|
|
39638
40290
|
streaming_types_zod_exports as SonioxStreamingTypes,
|
|
39639
40291
|
SonioxStreamingUpdateSchema,
|