voice-router-dev 0.9.4 → 0.9.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +66 -0
- package/dist/constants.d.mts +11 -92
- package/dist/constants.d.ts +11 -92
- package/dist/constants.js +11 -88
- package/dist/constants.mjs +11 -88
- package/dist/{field-configs-BXXH2T3E.d.mts → field-configs-Bt2iLgt_.d.mts} +8982 -7900
- package/dist/{field-configs-BXXH2T3E.d.ts → field-configs-Bt2iLgt_.d.ts} +8982 -7900
- package/dist/field-configs.d.mts +1 -1
- package/dist/field-configs.d.ts +1 -1
- package/dist/field-configs.js +583 -150
- package/dist/field-configs.mjs +583 -150
- package/dist/index.d.mts +1578 -560
- package/dist/index.d.ts +1578 -560
- package/dist/index.js +922 -279
- package/dist/index.mjs +925 -279
- package/dist/{provider-metadata-D1d-9cng.d.ts → provider-metadata-B5SFlFb0.d.ts} +6 -6
- package/dist/{provider-metadata-BJ29OPW1.d.mts → provider-metadata-J8URl-3i.d.mts} +6 -6
- package/dist/provider-metadata.d.mts +1 -1
- package/dist/provider-metadata.d.ts +1 -1
- package/dist/provider-metadata.js +3 -66
- package/dist/provider-metadata.mjs +3 -66
- package/dist/{speechToTextChunkResponseModel-BY2lGyZ3.d.ts → speechToTextChunkResponseModel-DhC5T0u1.d.ts} +364 -39
- package/dist/{speechToTextChunkResponseModel-KayxDiZ7.d.mts → speechToTextChunkResponseModel-SgJKP7kZ.d.mts} +364 -39
- package/dist/webhooks.d.mts +3 -2
- package/dist/webhooks.d.ts +3 -2
- package/package.json +8 -3
package/dist/index.js
CHANGED
|
@@ -145,6 +145,7 @@ __export(src_exports, {
|
|
|
145
145
|
SonioxModels: () => SonioxModels,
|
|
146
146
|
SonioxRealtimeModel: () => SonioxRealtimeModel,
|
|
147
147
|
SonioxRegion: () => SonioxRegion,
|
|
148
|
+
SonioxSDK: () => sdk_types_exports,
|
|
148
149
|
SonioxStreamingSchema: () => SonioxStreamingSchema,
|
|
149
150
|
SonioxStreamingTypes: () => streaming_types_zod_exports,
|
|
150
151
|
SonioxStreamingUpdateSchema: () => SonioxStreamingUpdateSchema,
|
|
@@ -1333,7 +1334,6 @@ var AzureLocales = [
|
|
|
1333
1334
|
{ code: "ar-YE", name: "Arabic (Yemen)" },
|
|
1334
1335
|
{ code: "as-IN", name: "Assamese (India)" },
|
|
1335
1336
|
{ code: "az-AZ", name: "Azerbaijani (Azerbaijan)" },
|
|
1336
|
-
{ code: "be-BY", name: "Belarusian (Belarus)" },
|
|
1337
1337
|
{ code: "bg-BG", name: "Bulgarian (Bulgaria)" },
|
|
1338
1338
|
{ code: "bn-BD", name: "Bengali (Bangladesh)" },
|
|
1339
1339
|
{ code: "bn-IN", name: "Bengali (India)" },
|
|
@@ -1414,7 +1414,6 @@ var AzureLocales = [
|
|
|
1414
1414
|
{ code: "lo-LA", name: "Lao (Latin)" },
|
|
1415
1415
|
{ code: "lt-LT", name: "Lithuanian (Lithuania)" },
|
|
1416
1416
|
{ code: "lv-LV", name: "Latvian (Latvia)" },
|
|
1417
|
-
{ code: "mi-NZ", name: "Maori (New Zealand)" },
|
|
1418
1417
|
{ code: "mk-MK", name: "Macedonian (North Macedonia)" },
|
|
1419
1418
|
{ code: "ml-IN", name: "Malayalam (India)" },
|
|
1420
1419
|
{ code: "mn-MN", name: "Mongolian (Mongolia)" },
|
|
@@ -1490,7 +1489,6 @@ var AzureLocaleCodes = [
|
|
|
1490
1489
|
"ar-YE",
|
|
1491
1490
|
"as-IN",
|
|
1492
1491
|
"az-AZ",
|
|
1493
|
-
"be-BY",
|
|
1494
1492
|
"bg-BG",
|
|
1495
1493
|
"bn-BD",
|
|
1496
1494
|
"bn-IN",
|
|
@@ -1571,7 +1569,6 @@ var AzureLocaleCodes = [
|
|
|
1571
1569
|
"lo-LA",
|
|
1572
1570
|
"lt-LT",
|
|
1573
1571
|
"lv-LV",
|
|
1574
|
-
"mi-NZ",
|
|
1575
1572
|
"mk-MK",
|
|
1576
1573
|
"ml-IN",
|
|
1577
1574
|
"mn-MN",
|
|
@@ -1647,7 +1644,6 @@ var AzureLocaleLabels = {
|
|
|
1647
1644
|
"ar-YE": "Arabic (Yemen)",
|
|
1648
1645
|
"as-IN": "Assamese (India)",
|
|
1649
1646
|
"az-AZ": "Azerbaijani (Azerbaijan)",
|
|
1650
|
-
"be-BY": "Belarusian (Belarus)",
|
|
1651
1647
|
"bg-BG": "Bulgarian (Bulgaria)",
|
|
1652
1648
|
"bn-BD": "Bengali (Bangladesh)",
|
|
1653
1649
|
"bn-IN": "Bengali (India)",
|
|
@@ -1728,7 +1724,6 @@ var AzureLocaleLabels = {
|
|
|
1728
1724
|
"lo-LA": "Lao (Latin)",
|
|
1729
1725
|
"lt-LT": "Lithuanian (Lithuania)",
|
|
1730
1726
|
"lv-LV": "Latvian (Latvia)",
|
|
1731
|
-
"mi-NZ": "Maori (New Zealand)",
|
|
1732
1727
|
"mk-MK": "Macedonian (North Macedonia)",
|
|
1733
1728
|
"ml-IN": "Malayalam (India)",
|
|
1734
1729
|
"mn-MN": "Mongolian (Mongolia)",
|
|
@@ -1804,7 +1799,6 @@ var AzureLocale = {
|
|
|
1804
1799
|
"ar-YE": "ar-YE",
|
|
1805
1800
|
"as-IN": "as-IN",
|
|
1806
1801
|
"az-AZ": "az-AZ",
|
|
1807
|
-
"be-BY": "be-BY",
|
|
1808
1802
|
"bg-BG": "bg-BG",
|
|
1809
1803
|
"bn-BD": "bn-BD",
|
|
1810
1804
|
"bn-IN": "bn-IN",
|
|
@@ -1885,7 +1879,6 @@ var AzureLocale = {
|
|
|
1885
1879
|
"lo-LA": "lo-LA",
|
|
1886
1880
|
"lt-LT": "lt-LT",
|
|
1887
1881
|
"lv-LV": "lv-LV",
|
|
1888
|
-
"mi-NZ": "mi-NZ",
|
|
1889
1882
|
"mk-MK": "mk-MK",
|
|
1890
1883
|
"ml-IN": "ml-IN",
|
|
1891
1884
|
"mn-MN": "mn-MN",
|
|
@@ -1976,8 +1969,6 @@ var ElevenLabsLanguages = [
|
|
|
1976
1969
|
{ code: "hr", name: "Croatian" },
|
|
1977
1970
|
{ code: "bg", name: "Bulgarian" },
|
|
1978
1971
|
{ code: "lt", name: "Lithuanian" },
|
|
1979
|
-
{ code: "la", name: "Latin" },
|
|
1980
|
-
{ code: "mi", name: "Maori" },
|
|
1981
1972
|
{ code: "ml", name: "Malayalam" },
|
|
1982
1973
|
{ code: "cy", name: "Welsh" },
|
|
1983
1974
|
{ code: "sk", name: "Slovak" },
|
|
@@ -1991,20 +1982,16 @@ var ElevenLabsLanguages = [
|
|
|
1991
1982
|
{ code: "kn", name: "Kannada" },
|
|
1992
1983
|
{ code: "et", name: "Estonian" },
|
|
1993
1984
|
{ code: "mk", name: "Macedonian" },
|
|
1994
|
-
{ code: "br", name: "Breton" },
|
|
1995
|
-
{ code: "eu", name: "Basque" },
|
|
1996
1985
|
{ code: "is", name: "Icelandic" },
|
|
1997
1986
|
{ code: "hy", name: "Armenian" },
|
|
1998
1987
|
{ code: "ne", name: "Nepali" },
|
|
1999
1988
|
{ code: "mn", name: "Mongolian" },
|
|
2000
1989
|
{ code: "bs", name: "Bosnian" },
|
|
2001
1990
|
{ code: "kk", name: "Kazakh" },
|
|
2002
|
-
{ code: "sq", name: "Albanian" },
|
|
2003
1991
|
{ code: "sw", name: "Swahili" },
|
|
2004
1992
|
{ code: "gl", name: "Galician" },
|
|
2005
1993
|
{ code: "mr", name: "Marathi" },
|
|
2006
1994
|
{ code: "pa", name: "Punjabi" },
|
|
2007
|
-
{ code: "si", name: "Sinhala" },
|
|
2008
1995
|
{ code: "km", name: "Khmer" },
|
|
2009
1996
|
{ code: "sn", name: "Shona" },
|
|
2010
1997
|
{ code: "yo", name: "Yoruba" },
|
|
@@ -2017,29 +2004,16 @@ var ElevenLabsLanguages = [
|
|
|
2017
2004
|
{ code: "sd", name: "Sindhi" },
|
|
2018
2005
|
{ code: "gu", name: "Gujarati" },
|
|
2019
2006
|
{ code: "am", name: "Amharic" },
|
|
2020
|
-
{ code: "yi", name: "Yiddish" },
|
|
2021
2007
|
{ code: "lo", name: "Lao" },
|
|
2022
2008
|
{ code: "uz", name: "Uzbek" },
|
|
2023
|
-
{ code: "fo", name: "Faroese" },
|
|
2024
|
-
{ code: "ht", name: "Haitian Creole" },
|
|
2025
2009
|
{ code: "ps", name: "Pashto" },
|
|
2026
|
-
{ code: "tk", name: "Turkmen" },
|
|
2027
|
-
{ code: "nn", name: "Norwegian Nynorsk" },
|
|
2028
2010
|
{ code: "mt", name: "Maltese" },
|
|
2029
|
-
{ code: "sa", name: "Sanskrit" },
|
|
2030
2011
|
{ code: "lb", name: "Luxembourgish" },
|
|
2031
2012
|
{ code: "my", name: "Burmese" },
|
|
2032
|
-
{ code: "bo", name: "Tibetan" },
|
|
2033
|
-
{ code: "tl", name: "Tagalog" },
|
|
2034
|
-
{ code: "mg", name: "Malagasy" },
|
|
2035
2013
|
{ code: "as", name: "Assamese" },
|
|
2036
|
-
{ code: "tt", name: "Tatar" },
|
|
2037
|
-
{ code: "haw", name: "Hawaiian" },
|
|
2038
2014
|
{ code: "ln", name: "Lingala" },
|
|
2039
2015
|
{ code: "ha", name: "Hausa" },
|
|
2040
|
-
{ code: "
|
|
2041
|
-
{ code: "jw", name: "Javanese" },
|
|
2042
|
-
{ code: "su", name: "Sundanese" }
|
|
2016
|
+
{ code: "jw", name: "Javanese" }
|
|
2043
2017
|
];
|
|
2044
2018
|
var ElevenLabsLanguageCodes = [
|
|
2045
2019
|
"en",
|
|
@@ -2077,8 +2051,6 @@ var ElevenLabsLanguageCodes = [
|
|
|
2077
2051
|
"hr",
|
|
2078
2052
|
"bg",
|
|
2079
2053
|
"lt",
|
|
2080
|
-
"la",
|
|
2081
|
-
"mi",
|
|
2082
2054
|
"ml",
|
|
2083
2055
|
"cy",
|
|
2084
2056
|
"sk",
|
|
@@ -2092,20 +2064,16 @@ var ElevenLabsLanguageCodes = [
|
|
|
2092
2064
|
"kn",
|
|
2093
2065
|
"et",
|
|
2094
2066
|
"mk",
|
|
2095
|
-
"br",
|
|
2096
|
-
"eu",
|
|
2097
2067
|
"is",
|
|
2098
2068
|
"hy",
|
|
2099
2069
|
"ne",
|
|
2100
2070
|
"mn",
|
|
2101
2071
|
"bs",
|
|
2102
2072
|
"kk",
|
|
2103
|
-
"sq",
|
|
2104
2073
|
"sw",
|
|
2105
2074
|
"gl",
|
|
2106
2075
|
"mr",
|
|
2107
2076
|
"pa",
|
|
2108
|
-
"si",
|
|
2109
2077
|
"km",
|
|
2110
2078
|
"sn",
|
|
2111
2079
|
"yo",
|
|
@@ -2118,29 +2086,16 @@ var ElevenLabsLanguageCodes = [
|
|
|
2118
2086
|
"sd",
|
|
2119
2087
|
"gu",
|
|
2120
2088
|
"am",
|
|
2121
|
-
"yi",
|
|
2122
2089
|
"lo",
|
|
2123
2090
|
"uz",
|
|
2124
|
-
"fo",
|
|
2125
|
-
"ht",
|
|
2126
2091
|
"ps",
|
|
2127
|
-
"tk",
|
|
2128
|
-
"nn",
|
|
2129
2092
|
"mt",
|
|
2130
|
-
"sa",
|
|
2131
2093
|
"lb",
|
|
2132
2094
|
"my",
|
|
2133
|
-
"bo",
|
|
2134
|
-
"tl",
|
|
2135
|
-
"mg",
|
|
2136
2095
|
"as",
|
|
2137
|
-
"tt",
|
|
2138
|
-
"haw",
|
|
2139
2096
|
"ln",
|
|
2140
2097
|
"ha",
|
|
2141
|
-
"
|
|
2142
|
-
"jw",
|
|
2143
|
-
"su"
|
|
2098
|
+
"jw"
|
|
2144
2099
|
];
|
|
2145
2100
|
var ElevenLabsLanguageLabels = {
|
|
2146
2101
|
en: "English",
|
|
@@ -2178,8 +2133,6 @@ var ElevenLabsLanguageLabels = {
|
|
|
2178
2133
|
hr: "Croatian",
|
|
2179
2134
|
bg: "Bulgarian",
|
|
2180
2135
|
lt: "Lithuanian",
|
|
2181
|
-
la: "Latin",
|
|
2182
|
-
mi: "Maori",
|
|
2183
2136
|
ml: "Malayalam",
|
|
2184
2137
|
cy: "Welsh",
|
|
2185
2138
|
sk: "Slovak",
|
|
@@ -2193,20 +2146,16 @@ var ElevenLabsLanguageLabels = {
|
|
|
2193
2146
|
kn: "Kannada",
|
|
2194
2147
|
et: "Estonian",
|
|
2195
2148
|
mk: "Macedonian",
|
|
2196
|
-
br: "Breton",
|
|
2197
|
-
eu: "Basque",
|
|
2198
2149
|
is: "Icelandic",
|
|
2199
2150
|
hy: "Armenian",
|
|
2200
2151
|
ne: "Nepali",
|
|
2201
2152
|
mn: "Mongolian",
|
|
2202
2153
|
bs: "Bosnian",
|
|
2203
2154
|
kk: "Kazakh",
|
|
2204
|
-
sq: "Albanian",
|
|
2205
2155
|
sw: "Swahili",
|
|
2206
2156
|
gl: "Galician",
|
|
2207
2157
|
mr: "Marathi",
|
|
2208
2158
|
pa: "Punjabi",
|
|
2209
|
-
si: "Sinhala",
|
|
2210
2159
|
km: "Khmer",
|
|
2211
2160
|
sn: "Shona",
|
|
2212
2161
|
yo: "Yoruba",
|
|
@@ -2219,29 +2168,16 @@ var ElevenLabsLanguageLabels = {
|
|
|
2219
2168
|
sd: "Sindhi",
|
|
2220
2169
|
gu: "Gujarati",
|
|
2221
2170
|
am: "Amharic",
|
|
2222
|
-
yi: "Yiddish",
|
|
2223
2171
|
lo: "Lao",
|
|
2224
2172
|
uz: "Uzbek",
|
|
2225
|
-
fo: "Faroese",
|
|
2226
|
-
ht: "Haitian Creole",
|
|
2227
2173
|
ps: "Pashto",
|
|
2228
|
-
tk: "Turkmen",
|
|
2229
|
-
nn: "Norwegian Nynorsk",
|
|
2230
2174
|
mt: "Maltese",
|
|
2231
|
-
sa: "Sanskrit",
|
|
2232
2175
|
lb: "Luxembourgish",
|
|
2233
2176
|
my: "Burmese",
|
|
2234
|
-
bo: "Tibetan",
|
|
2235
|
-
tl: "Tagalog",
|
|
2236
|
-
mg: "Malagasy",
|
|
2237
2177
|
as: "Assamese",
|
|
2238
|
-
tt: "Tatar",
|
|
2239
|
-
haw: "Hawaiian",
|
|
2240
2178
|
ln: "Lingala",
|
|
2241
2179
|
ha: "Hausa",
|
|
2242
|
-
|
|
2243
|
-
jw: "Javanese",
|
|
2244
|
-
su: "Sundanese"
|
|
2180
|
+
jw: "Javanese"
|
|
2245
2181
|
};
|
|
2246
2182
|
|
|
2247
2183
|
// src/generated/gladia/schema/streamingSupportedBitDepthEnum.ts
|
|
@@ -2746,6 +2682,7 @@ var OpenAITranscriptionModel = {
|
|
|
2746
2682
|
"gpt-4o-mini-transcribe-2025-12-15": "gpt-4o-mini-transcribe-2025-12-15",
|
|
2747
2683
|
"gpt-4o-transcribe": "gpt-4o-transcribe",
|
|
2748
2684
|
"gpt-4o-transcribe-diarize": "gpt-4o-transcribe-diarize",
|
|
2685
|
+
"gpt-realtime-whisper": "gpt-realtime-whisper",
|
|
2749
2686
|
"whisper-1": "whisper-1"
|
|
2750
2687
|
};
|
|
2751
2688
|
var OpenAIRealtimeModel = {
|
|
@@ -2761,6 +2698,7 @@ var OpenAIRealtimeModel = {
|
|
|
2761
2698
|
"gpt-audio-mini-2025-12-15": "gpt-audio-mini-2025-12-15",
|
|
2762
2699
|
"gpt-realtime": "gpt-realtime",
|
|
2763
2700
|
"gpt-realtime-1.5": "gpt-realtime-1.5",
|
|
2701
|
+
"gpt-realtime-2": "gpt-realtime-2",
|
|
2764
2702
|
"gpt-realtime-2025-08-28": "gpt-realtime-2025-08-28",
|
|
2765
2703
|
"gpt-realtime-mini": "gpt-realtime-mini",
|
|
2766
2704
|
"gpt-realtime-mini-2025-10-06": "gpt-realtime-mini-2025-10-06",
|
|
@@ -5577,12 +5515,20 @@ var EntityType = {
|
|
|
5577
5515
|
email_address: "email_address",
|
|
5578
5516
|
event: "event",
|
|
5579
5517
|
filename: "filename",
|
|
5518
|
+
gender: "gender",
|
|
5580
5519
|
gender_sexuality: "gender_sexuality",
|
|
5581
5520
|
healthcare_number: "healthcare_number",
|
|
5582
5521
|
injury: "injury",
|
|
5583
5522
|
ip_address: "ip_address",
|
|
5584
5523
|
language: "language",
|
|
5585
5524
|
location: "location",
|
|
5525
|
+
location_address: "location_address",
|
|
5526
|
+
location_address_street: "location_address_street",
|
|
5527
|
+
location_city: "location_city",
|
|
5528
|
+
location_coordinate: "location_coordinate",
|
|
5529
|
+
location_country: "location_country",
|
|
5530
|
+
location_state: "location_state",
|
|
5531
|
+
location_zip: "location_zip",
|
|
5586
5532
|
marital_status: "marital_status",
|
|
5587
5533
|
medical_condition: "medical_condition",
|
|
5588
5534
|
medical_process: "medical_process",
|
|
@@ -5591,6 +5537,7 @@ var EntityType = {
|
|
|
5591
5537
|
number_sequence: "number_sequence",
|
|
5592
5538
|
occupation: "occupation",
|
|
5593
5539
|
organization: "organization",
|
|
5540
|
+
organization_medical_facility: "organization_medical_facility",
|
|
5594
5541
|
passport_number: "passport_number",
|
|
5595
5542
|
password: "password",
|
|
5596
5543
|
person_age: "person_age",
|
|
@@ -5599,6 +5546,7 @@ var EntityType = {
|
|
|
5599
5546
|
physical_attribute: "physical_attribute",
|
|
5600
5547
|
political_affiliation: "political_affiliation",
|
|
5601
5548
|
religion: "religion",
|
|
5549
|
+
sexuality: "sexuality",
|
|
5602
5550
|
statistics: "statistics",
|
|
5603
5551
|
time: "time",
|
|
5604
5552
|
url: "url",
|
|
@@ -5625,12 +5573,20 @@ var PiiPolicy = {
|
|
|
5625
5573
|
email_address: "email_address",
|
|
5626
5574
|
event: "event",
|
|
5627
5575
|
filename: "filename",
|
|
5576
|
+
gender: "gender",
|
|
5628
5577
|
gender_sexuality: "gender_sexuality",
|
|
5629
5578
|
healthcare_number: "healthcare_number",
|
|
5630
5579
|
injury: "injury",
|
|
5631
5580
|
ip_address: "ip_address",
|
|
5632
5581
|
language: "language",
|
|
5633
5582
|
location: "location",
|
|
5583
|
+
location_address: "location_address",
|
|
5584
|
+
location_address_street: "location_address_street",
|
|
5585
|
+
location_city: "location_city",
|
|
5586
|
+
location_coordinate: "location_coordinate",
|
|
5587
|
+
location_country: "location_country",
|
|
5588
|
+
location_state: "location_state",
|
|
5589
|
+
location_zip: "location_zip",
|
|
5634
5590
|
marital_status: "marital_status",
|
|
5635
5591
|
medical_condition: "medical_condition",
|
|
5636
5592
|
medical_process: "medical_process",
|
|
@@ -5639,6 +5595,7 @@ var PiiPolicy = {
|
|
|
5639
5595
|
number_sequence: "number_sequence",
|
|
5640
5596
|
occupation: "occupation",
|
|
5641
5597
|
organization: "organization",
|
|
5598
|
+
organization_medical_facility: "organization_medical_facility",
|
|
5642
5599
|
passport_number: "passport_number",
|
|
5643
5600
|
password: "password",
|
|
5644
5601
|
person_age: "person_age",
|
|
@@ -5647,6 +5604,7 @@ var PiiPolicy = {
|
|
|
5647
5604
|
physical_attribute: "physical_attribute",
|
|
5648
5605
|
political_affiliation: "political_affiliation",
|
|
5649
5606
|
religion: "religion",
|
|
5607
|
+
sexuality: "sexuality",
|
|
5650
5608
|
statistics: "statistics",
|
|
5651
5609
|
time: "time",
|
|
5652
5610
|
url: "url",
|
|
@@ -5715,7 +5673,8 @@ var TranscriptOptionalParamsRedactPiiAudioOptionsOverrideAudioRedactionMethod =
|
|
|
5715
5673
|
|
|
5716
5674
|
// src/generated/assemblyai/schema/transcriptOptionalParamsRemoveAudioTags.ts
|
|
5717
5675
|
var TranscriptOptionalParamsRemoveAudioTags = {
|
|
5718
|
-
all: "all"
|
|
5676
|
+
all: "all",
|
|
5677
|
+
speaker: "speaker"
|
|
5719
5678
|
};
|
|
5720
5679
|
|
|
5721
5680
|
// src/generated/assemblyai/schema/transcriptRedactPiiAudioOptionsOverrideAudioRedactionMethod.ts
|
|
@@ -5725,7 +5684,8 @@ var TranscriptRedactPiiAudioOptionsOverrideAudioRedactionMethod = {
|
|
|
5725
5684
|
|
|
5726
5685
|
// src/generated/assemblyai/schema/transcriptRemoveAudioTags.ts
|
|
5727
5686
|
var TranscriptRemoveAudioTags = {
|
|
5728
|
-
all: "all"
|
|
5687
|
+
all: "all",
|
|
5688
|
+
speaker: "speaker"
|
|
5729
5689
|
};
|
|
5730
5690
|
|
|
5731
5691
|
// src/generated/assemblyai/api/assemblyAIAPI.ts
|
|
@@ -9162,11 +9122,12 @@ var SpeechmaticsAdapter = class extends BaseAdapter {
|
|
|
9162
9122
|
requestBody = { config: JSON.stringify(jobConfig) };
|
|
9163
9123
|
headers = { "Content-Type": "application/json" };
|
|
9164
9124
|
} else if (audio.type === "file") {
|
|
9165
|
-
|
|
9166
|
-
|
|
9167
|
-
|
|
9168
|
-
|
|
9169
|
-
|
|
9125
|
+
const formData = new FormData();
|
|
9126
|
+
formData.append("config", JSON.stringify(jobConfig));
|
|
9127
|
+
const fileBlob = audio.file instanceof Blob ? audio.file : new Blob([audio.file]);
|
|
9128
|
+
formData.append("data_file", fileBlob, "audio");
|
|
9129
|
+
requestBody = formData;
|
|
9130
|
+
headers = {};
|
|
9170
9131
|
} else {
|
|
9171
9132
|
return {
|
|
9172
9133
|
success: false,
|
|
@@ -9617,15 +9578,18 @@ var import_axios9 = __toESM(require("axios"));
|
|
|
9617
9578
|
// src/generated/soniox/schema/index.ts
|
|
9618
9579
|
var schema_exports4 = {};
|
|
9619
9580
|
__export(schema_exports4, {
|
|
9581
|
+
TTSVoiceGender: () => TTSVoiceGender,
|
|
9620
9582
|
TemporaryApiKeyUsageType: () => TemporaryApiKeyUsageType,
|
|
9621
9583
|
TranscriptionMode: () => TranscriptionMode,
|
|
9622
9584
|
TranscriptionStatus: () => TranscriptionStatus,
|
|
9623
|
-
TranslationConfigType: () => TranslationConfigType
|
|
9585
|
+
TranslationConfigType: () => TranslationConfigType,
|
|
9586
|
+
UsageLogsSort: () => UsageLogsSort
|
|
9624
9587
|
});
|
|
9625
9588
|
|
|
9626
9589
|
// src/generated/soniox/schema/temporaryApiKeyUsageType.ts
|
|
9627
9590
|
var TemporaryApiKeyUsageType = {
|
|
9628
|
-
transcribe_websocket: "transcribe_websocket"
|
|
9591
|
+
transcribe_websocket: "transcribe_websocket",
|
|
9592
|
+
tts_rt: "tts_rt"
|
|
9629
9593
|
};
|
|
9630
9594
|
|
|
9631
9595
|
// src/generated/soniox/schema/transcriptionMode.ts
|
|
@@ -9640,6 +9604,19 @@ var TranslationConfigType = {
|
|
|
9640
9604
|
two_way: "two_way"
|
|
9641
9605
|
};
|
|
9642
9606
|
|
|
9607
|
+
// src/generated/soniox/schema/tTSVoiceGender.ts
|
|
9608
|
+
var TTSVoiceGender = {
|
|
9609
|
+
male: "male",
|
|
9610
|
+
female: "female",
|
|
9611
|
+
neutral: "neutral"
|
|
9612
|
+
};
|
|
9613
|
+
|
|
9614
|
+
// src/generated/soniox/schema/usageLogsSort.ts
|
|
9615
|
+
var UsageLogsSort = {
|
|
9616
|
+
end_time_asc: "end_time_asc",
|
|
9617
|
+
end_time_desc: "end_time_desc"
|
|
9618
|
+
};
|
|
9619
|
+
|
|
9643
9620
|
// src/generated/soniox/api/sonioxPublicAPI.ts
|
|
9644
9621
|
var uploadFile = (uploadFileBody2, options) => {
|
|
9645
9622
|
const formData = new FormData();
|
|
@@ -11007,6 +10984,7 @@ __export(deepgramAPI_zod_exports, {
|
|
|
11007
10984
|
speakGenerateQueryMipOptOutDefault: () => speakGenerateQueryMipOptOutDefault,
|
|
11008
10985
|
speakGenerateQueryModelDefault: () => speakGenerateQueryModelDefault,
|
|
11009
10986
|
speakGenerateQueryParams: () => speakGenerateQueryParams,
|
|
10987
|
+
speakGenerateQuerySpeedDefault: () => speakGenerateQuerySpeedDefault,
|
|
11010
10988
|
speakGenerateResponse: () => speakGenerateResponse
|
|
11011
10989
|
});
|
|
11012
10990
|
var import_zod = require("zod");
|
|
@@ -11061,6 +11039,9 @@ var listenTranscribeQueryParams = import_zod.z.object({
|
|
|
11061
11039
|
diarize: import_zod.z.boolean().optional().describe(
|
|
11062
11040
|
"Recognize speaker changes. Each word in the transcript will be assigned a speaker number starting at 0"
|
|
11063
11041
|
),
|
|
11042
|
+
diarize_model: import_zod.z.enum(["latest", "v1", "v2"]).optional().describe(
|
|
11043
|
+
"Select and enable a specific batch diarization model version. If specifying this parameter, you should not set the deprecated `diarize=true` parameter. Not accepted on streaming requests."
|
|
11044
|
+
),
|
|
11064
11045
|
dictation: import_zod.z.boolean().optional().describe("Dictation mode for controlling formatting with dictated speech"),
|
|
11065
11046
|
encoding: import_zod.z.enum(["linear16", "flac", "mulaw", "amr-nb", "amr-wb", "opus", "speex", "g729"]).optional().describe("Specify the expected encoding of your submitted audio"),
|
|
11066
11047
|
filler_words: import_zod.z.boolean().optional().describe('Filler Words can help transcribe interruptions in your audio, like "uh" and "um"'),
|
|
@@ -11326,6 +11307,7 @@ var listenTranscribeResponse = import_zod.z.object({
|
|
|
11326
11307
|
var speakGenerateQueryCallbackMethodDefault = "POST";
|
|
11327
11308
|
var speakGenerateQueryMipOptOutDefault = false;
|
|
11328
11309
|
var speakGenerateQueryModelDefault = "aura-asteria-en";
|
|
11310
|
+
var speakGenerateQuerySpeedDefault = 1;
|
|
11329
11311
|
var speakGenerateQueryParams = import_zod.z.object({
|
|
11330
11312
|
callback: import_zod.z.string().optional().describe("URL to which we'll make the callback request"),
|
|
11331
11313
|
callback_method: import_zod.z.enum(["POST", "PUT"]).default(speakGenerateQueryCallbackMethodDefault).describe("HTTP method by which the callback request will be made"),
|
|
@@ -11437,6 +11419,9 @@ var speakGenerateQueryParams = import_zod.z.object({
|
|
|
11437
11419
|
import_zod.z.enum(["22050"]).describe("Encoding - mp3. Sample rate is fixed and not configurable (22050 Hz).")
|
|
11438
11420
|
).or(import_zod.z.enum(["48000"]).describe("Encoding - opus. Sample rate is fixed at 48000 Hz.")).optional().describe(
|
|
11439
11421
|
"Sample Rate specifies the sample rate for the output audio. Based on the encoding, different sample rates are supported. For some encodings, the sample rate is not configurable"
|
|
11422
|
+
),
|
|
11423
|
+
speed: import_zod.z.number().default(speakGenerateQuerySpeedDefault).describe(
|
|
11424
|
+
"Speaking rate multiplier that adjusts the pace of generated speech while preserving natural prosody and voice quality. Not yet supported in all languages."
|
|
11440
11425
|
)
|
|
11441
11426
|
});
|
|
11442
11427
|
var speakGenerateHeader = import_zod.z.object({
|
|
@@ -11761,6 +11746,7 @@ __export(assemblyAIAPI_zod_exports, {
|
|
|
11761
11746
|
createTranscriptBodyRedactPiiAudioDefault: () => createTranscriptBodyRedactPiiAudioDefault,
|
|
11762
11747
|
createTranscriptBodyRedactPiiAudioOptionsReturnRedactedNoSpeechAudioDefault: () => createTranscriptBodyRedactPiiAudioOptionsReturnRedactedNoSpeechAudioDefault,
|
|
11763
11748
|
createTranscriptBodyRedactPiiDefault: () => createTranscriptBodyRedactPiiDefault,
|
|
11749
|
+
createTranscriptBodyRedactPiiReturnUnredactedDefault: () => createTranscriptBodyRedactPiiReturnUnredactedDefault,
|
|
11764
11750
|
createTranscriptBodySentimentAnalysisDefault: () => createTranscriptBodySentimentAnalysisDefault,
|
|
11765
11751
|
createTranscriptBodySpeakerLabelsDefault: () => createTranscriptBodySpeakerLabelsDefault,
|
|
11766
11752
|
createTranscriptBodySpeakerOptionsMinSpeakersExpectedDefault: () => createTranscriptBodySpeakerOptionsMinSpeakersExpectedDefault,
|
|
@@ -11831,6 +11817,7 @@ var createTranscriptBodyPunctuateDefault = true;
|
|
|
11831
11817
|
var createTranscriptBodyRedactPiiDefault = false;
|
|
11832
11818
|
var createTranscriptBodyRedactPiiAudioDefault = false;
|
|
11833
11819
|
var createTranscriptBodyRedactPiiAudioOptionsReturnRedactedNoSpeechAudioDefault = false;
|
|
11820
|
+
var createTranscriptBodyRedactPiiReturnUnredactedDefault = false;
|
|
11834
11821
|
var createTranscriptBodySentimentAnalysisDefault = false;
|
|
11835
11822
|
var createTranscriptBodySpeakerLabelsDefault = false;
|
|
11836
11823
|
var createTranscriptBodySpeakerOptionsMinSpeakersExpectedDefault = 1;
|
|
@@ -11869,7 +11856,7 @@ var createTranscriptBody = import_zod3.z.object({
|
|
|
11869
11856
|
"Customize how words are spelled and formatted using to and from values. See [Custom Spelling](https://www.assemblyai.com/docs/pre-recorded-audio/correct-spelling-of-terms) for more details."
|
|
11870
11857
|
),
|
|
11871
11858
|
disfluencies: import_zod3.z.boolean().optional().describe(
|
|
11872
|
-
'Transcribe [Filler Words](https://www.assemblyai.com/docs/pre-recorded-audio/include-filler-words), like "umm", in your media file; can be true or false'
|
|
11859
|
+
'Transcribe [Filler Words](https://www.assemblyai.com/docs/pre-recorded-audio/include-filler-words), like "umm", in your media file; can be true or false. Supported on Universal-3 Pro and Universal-2.'
|
|
11873
11860
|
),
|
|
11874
11861
|
domain: import_zod3.z.string().nullish().describe(
|
|
11875
11862
|
'Enable domain-specific transcription models to improve accuracy for specialized terminology. Set to `"medical-v1"` to enable [Medical Mode](https://www.assemblyai.com/docs/pre-recorded-audio/medical-mode) for improved accuracy of medical terms such as medications, procedures, conditions, and dosages.\n\nSupported languages: English (`en`), Spanish (`es`), German (`de`), French (`fr`). If used with an unsupported language, the parameter is ignored and a warning is returned.\n'
|
|
@@ -12176,12 +12163,20 @@ var createTranscriptBody = import_zod3.z.object({
|
|
|
12176
12163
|
"email_address",
|
|
12177
12164
|
"event",
|
|
12178
12165
|
"filename",
|
|
12166
|
+
"gender",
|
|
12179
12167
|
"gender_sexuality",
|
|
12180
12168
|
"healthcare_number",
|
|
12181
12169
|
"injury",
|
|
12182
12170
|
"ip_address",
|
|
12183
12171
|
"language",
|
|
12184
12172
|
"location",
|
|
12173
|
+
"location_address",
|
|
12174
|
+
"location_address_street",
|
|
12175
|
+
"location_city",
|
|
12176
|
+
"location_coordinate",
|
|
12177
|
+
"location_country",
|
|
12178
|
+
"location_state",
|
|
12179
|
+
"location_zip",
|
|
12185
12180
|
"marital_status",
|
|
12186
12181
|
"medical_condition",
|
|
12187
12182
|
"medical_process",
|
|
@@ -12190,6 +12185,7 @@ var createTranscriptBody = import_zod3.z.object({
|
|
|
12190
12185
|
"number_sequence",
|
|
12191
12186
|
"occupation",
|
|
12192
12187
|
"organization",
|
|
12188
|
+
"organization_medical_facility",
|
|
12193
12189
|
"passport_number",
|
|
12194
12190
|
"password",
|
|
12195
12191
|
"person_age",
|
|
@@ -12198,6 +12194,7 @@ var createTranscriptBody = import_zod3.z.object({
|
|
|
12198
12194
|
"physical_attribute",
|
|
12199
12195
|
"political_affiliation",
|
|
12200
12196
|
"religion",
|
|
12197
|
+
"sexuality",
|
|
12201
12198
|
"statistics",
|
|
12202
12199
|
"time",
|
|
12203
12200
|
"url",
|
|
@@ -12205,15 +12202,20 @@ var createTranscriptBody = import_zod3.z.object({
|
|
|
12205
12202
|
"username",
|
|
12206
12203
|
"vehicle_id",
|
|
12207
12204
|
"zodiac_sign"
|
|
12208
|
-
]).describe(
|
|
12205
|
+
]).describe(
|
|
12206
|
+
"The type of PII to redact. See [PII redaction](https://www.assemblyai.com/docs/streaming/pii-redaction) for the full list of policies and their descriptions."
|
|
12207
|
+
)
|
|
12209
12208
|
).optional().describe(
|
|
12210
12209
|
"The list of PII Redaction policies to enable. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more details."
|
|
12211
12210
|
),
|
|
12212
12211
|
redact_pii_sub: import_zod3.z.enum(["entity_name", "hash"]).describe(
|
|
12213
|
-
"The replacement logic for detected PII, can be `entity_name` or `hash`. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more details."
|
|
12212
|
+
"The replacement logic for detected PII, can be `entity_name` or `hash`. See [PII redaction](https://www.assemblyai.com/docs/streaming/pii-redaction) for more details."
|
|
12214
12213
|
).or(import_zod3.z.null()).optional().describe(
|
|
12215
12214
|
"The replacement logic for detected PII, can be `entity_type` or `hash`. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more details."
|
|
12216
12215
|
),
|
|
12216
|
+
redact_pii_return_unredacted: import_zod3.z.boolean().optional().describe(
|
|
12217
|
+
"When set to `true`, returns the original unredacted transcript alongside the redacted one in the same response. Requires `redact_pii` to be `true`, otherwise a 400 error is returned.\n\nWhen enabled, the response includes the additional fields `unredacted_text`, `unredacted_words`, and `unredacted_utterances`. The existing `text`, `words`, and `utterances` fields remain fully redacted. When disabled (default), the response is unchanged and contains only the redacted transcript. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more details.\n"
|
|
12218
|
+
),
|
|
12217
12219
|
sentiment_analysis: import_zod3.z.boolean().optional().describe(
|
|
12218
12220
|
"Enable [Sentiment Analysis](https://www.assemblyai.com/docs/speech-understanding/analyze-sentiment-of-speech), can be true or false"
|
|
12219
12221
|
),
|
|
@@ -12311,10 +12313,10 @@ var createTranscriptBody = import_zod3.z.object({
|
|
|
12311
12313
|
),
|
|
12312
12314
|
summary_model: import_zod3.z.enum(["informative", "conversational", "catchy"]).optional().describe("The model to summarize the transcript"),
|
|
12313
12315
|
summary_type: import_zod3.z.enum(["bullets", "bullets_verbose", "gist", "headline", "paragraph"]).optional().describe("The type of summary"),
|
|
12314
|
-
remove_audio_tags: import_zod3.z.enum(["all"]).describe(
|
|
12315
|
-
'
|
|
12316
|
+
remove_audio_tags: import_zod3.z.enum(["all", "speaker"]).describe(
|
|
12317
|
+
'Universal-3 Pro generates rich transcripts that can include inline annotations such as audio event markers and speaker cues. Set to `"all"` to remove all inline annotations, or `"speaker"` to remove only speaker cues while keeping other annotations.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n'
|
|
12316
12318
|
).or(import_zod3.z.null()).optional().describe(
|
|
12317
|
-
'
|
|
12319
|
+
'Universal-3 Pro generates rich transcripts that can include inline annotations such as audio event markers and speaker cues. Set to `"all"` to remove all inline annotations, or `"speaker"` to remove only speaker cues while keeping other annotations.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n'
|
|
12318
12320
|
),
|
|
12319
12321
|
temperature: import_zod3.z.number().optional().describe(
|
|
12320
12322
|
"Control the amount of randomness injected into the model's response. See the [Prompting Guide](https://www.assemblyai.com/docs/pre-recorded-audio/prompting) for more details.\n\nNote: This parameter can only be used with the Universal-3 Pro model.\n"
|
|
@@ -12448,7 +12450,7 @@ var createTranscriptResponse = import_zod3.z.object({
|
|
|
12448
12450
|
"Customize how words are spelled and formatted using to and from values. See [Custom Spelling](https://www.assemblyai.com/docs/pre-recorded-audio/correct-spelling-of-terms) for more details."
|
|
12449
12451
|
),
|
|
12450
12452
|
disfluencies: import_zod3.z.boolean().nullish().describe(
|
|
12451
|
-
'Transcribe [Filler Words](https://www.assemblyai.com/docs/pre-recorded-audio/include-filler-words), like "umm", in your media file; can be true or false'
|
|
12453
|
+
'Transcribe [Filler Words](https://www.assemblyai.com/docs/pre-recorded-audio/include-filler-words), like "umm", in your media file; can be true or false. Supported on Universal-3 Pro and Universal-2.'
|
|
12452
12454
|
),
|
|
12453
12455
|
domain: import_zod3.z.string().nullish().describe(
|
|
12454
12456
|
'The domain-specific model applied to the transcript. When set to `"medical-v1"`, [Medical Mode](https://www.assemblyai.com/docs/pre-recorded-audio/medical-mode) was used to improve accuracy for medical terminology.\n'
|
|
@@ -12471,12 +12473,20 @@ var createTranscriptResponse = import_zod3.z.object({
|
|
|
12471
12473
|
"email_address",
|
|
12472
12474
|
"event",
|
|
12473
12475
|
"filename",
|
|
12476
|
+
"gender",
|
|
12474
12477
|
"gender_sexuality",
|
|
12475
12478
|
"healthcare_number",
|
|
12476
12479
|
"injury",
|
|
12477
12480
|
"ip_address",
|
|
12478
12481
|
"language",
|
|
12479
12482
|
"location",
|
|
12483
|
+
"location_address",
|
|
12484
|
+
"location_address_street",
|
|
12485
|
+
"location_city",
|
|
12486
|
+
"location_coordinate",
|
|
12487
|
+
"location_country",
|
|
12488
|
+
"location_state",
|
|
12489
|
+
"location_zip",
|
|
12480
12490
|
"marital_status",
|
|
12481
12491
|
"medical_condition",
|
|
12482
12492
|
"medical_process",
|
|
@@ -12485,6 +12495,7 @@ var createTranscriptResponse = import_zod3.z.object({
|
|
|
12485
12495
|
"number_sequence",
|
|
12486
12496
|
"occupation",
|
|
12487
12497
|
"organization",
|
|
12498
|
+
"organization_medical_facility",
|
|
12488
12499
|
"passport_number",
|
|
12489
12500
|
"password",
|
|
12490
12501
|
"person_age",
|
|
@@ -12493,6 +12504,7 @@ var createTranscriptResponse = import_zod3.z.object({
|
|
|
12493
12504
|
"physical_attribute",
|
|
12494
12505
|
"political_affiliation",
|
|
12495
12506
|
"religion",
|
|
12507
|
+
"sexuality",
|
|
12496
12508
|
"statistics",
|
|
12497
12509
|
"time",
|
|
12498
12510
|
"url",
|
|
@@ -12797,6 +12809,24 @@ var createTranscriptResponse = import_zod3.z.object({
|
|
|
12797
12809
|
}).optional().describe(
|
|
12798
12810
|
"Specify options for [Automatic Language Detection](https://www.assemblyai.com/docs/pre-recorded-audio/language-detection)."
|
|
12799
12811
|
),
|
|
12812
|
+
metadata: import_zod3.z.object({
|
|
12813
|
+
domain_used: import_zod3.z.string().nullish().describe(
|
|
12814
|
+
'The domain-specific model that was applied to the transcription (for example, `"medical-v1"` when [Medical Mode](https://www.assemblyai.com/docs/pre-recorded-audio/medical-mode) was applied), or `null` if no domain-specific model was used. Always present when `metadata` is present.\n'
|
|
12815
|
+
),
|
|
12816
|
+
warnings: import_zod3.z.array(
|
|
12817
|
+
import_zod3.z.object({
|
|
12818
|
+
message: import_zod3.z.string().describe("A human-readable description of the warning.")
|
|
12819
|
+
}).describe(
|
|
12820
|
+
"A warning message emitted while processing a transcription request. Warnings are surfaced on the transcript response under `metadata.warnings`.\n"
|
|
12821
|
+
)
|
|
12822
|
+
).optional().describe(
|
|
12823
|
+
"Warning messages emitted while processing the request. Each warning is an object with a human-readable `message`. When there are no warnings to report, this field is omitted from the `metadata` object entirely.\n"
|
|
12824
|
+
)
|
|
12825
|
+
}).describe(
|
|
12826
|
+
"Additional metadata about the transcription returned on the `Transcript` object under `metadata`. Only present when there is information to report \u2014 when all of its fields would be empty, the `metadata` object is omitted from the response entirely.\n"
|
|
12827
|
+
).or(import_zod3.z.null()).optional().describe(
|
|
12828
|
+
"Additional metadata about the transcription, including any warnings emitted while processing the request. Only present when there is information to report; if no fields would be populated, `metadata` is omitted from the response entirely.\n"
|
|
12829
|
+
),
|
|
12800
12830
|
multichannel: import_zod3.z.boolean().nullish().describe(
|
|
12801
12831
|
"Whether [Multichannel transcription](https://www.assemblyai.com/docs/pre-recorded-audio/transcribe-multiple-audio-channels) was enabled in the transcription request, either true or false"
|
|
12802
12832
|
),
|
|
@@ -12844,12 +12874,20 @@ var createTranscriptResponse = import_zod3.z.object({
|
|
|
12844
12874
|
"email_address",
|
|
12845
12875
|
"event",
|
|
12846
12876
|
"filename",
|
|
12877
|
+
"gender",
|
|
12847
12878
|
"gender_sexuality",
|
|
12848
12879
|
"healthcare_number",
|
|
12849
12880
|
"injury",
|
|
12850
12881
|
"ip_address",
|
|
12851
12882
|
"language",
|
|
12852
12883
|
"location",
|
|
12884
|
+
"location_address",
|
|
12885
|
+
"location_address_street",
|
|
12886
|
+
"location_city",
|
|
12887
|
+
"location_coordinate",
|
|
12888
|
+
"location_country",
|
|
12889
|
+
"location_state",
|
|
12890
|
+
"location_zip",
|
|
12853
12891
|
"marital_status",
|
|
12854
12892
|
"medical_condition",
|
|
12855
12893
|
"medical_process",
|
|
@@ -12858,6 +12896,7 @@ var createTranscriptResponse = import_zod3.z.object({
|
|
|
12858
12896
|
"number_sequence",
|
|
12859
12897
|
"occupation",
|
|
12860
12898
|
"organization",
|
|
12899
|
+
"organization_medical_facility",
|
|
12861
12900
|
"passport_number",
|
|
12862
12901
|
"password",
|
|
12863
12902
|
"person_age",
|
|
@@ -12866,6 +12905,7 @@ var createTranscriptResponse = import_zod3.z.object({
|
|
|
12866
12905
|
"physical_attribute",
|
|
12867
12906
|
"political_affiliation",
|
|
12868
12907
|
"religion",
|
|
12908
|
+
"sexuality",
|
|
12869
12909
|
"statistics",
|
|
12870
12910
|
"time",
|
|
12871
12911
|
"url",
|
|
@@ -12873,12 +12913,17 @@ var createTranscriptResponse = import_zod3.z.object({
|
|
|
12873
12913
|
"username",
|
|
12874
12914
|
"vehicle_id",
|
|
12875
12915
|
"zodiac_sign"
|
|
12876
|
-
]).describe(
|
|
12916
|
+
]).describe(
|
|
12917
|
+
"The type of PII to redact. See [PII redaction](https://www.assemblyai.com/docs/streaming/pii-redaction) for the full list of policies and their descriptions."
|
|
12918
|
+
)
|
|
12877
12919
|
).nullish().describe(
|
|
12878
12920
|
"The list of PII Redaction policies that were enabled, if PII Redaction is enabled.\nSee [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
|
|
12879
12921
|
),
|
|
12880
12922
|
redact_pii_sub: import_zod3.z.enum(["entity_name", "hash"]).optional().describe(
|
|
12881
|
-
"The replacement logic for detected PII, can be `entity_name` or `hash`. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more details."
|
|
12923
|
+
"The replacement logic for detected PII, can be `entity_name` or `hash`. See [PII redaction](https://www.assemblyai.com/docs/streaming/pii-redaction) for more details."
|
|
12924
|
+
),
|
|
12925
|
+
redact_pii_return_unredacted: import_zod3.z.boolean().nullish().describe(
|
|
12926
|
+
"Whether the original unredacted transcript was also returned alongside the redacted one. When `true`, the response includes `unredacted_text`, `unredacted_words`, and `unredacted_utterances`. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
|
|
12882
12927
|
),
|
|
12883
12928
|
sentiment_analysis: import_zod3.z.boolean().nullish().describe(
|
|
12884
12929
|
"Whether [Sentiment Analysis](https://www.assemblyai.com/docs/speech-understanding/analyze-sentiment-of-speech) is enabled, can be true or false"
|
|
@@ -13015,20 +13060,23 @@ var createTranscriptResponse = import_zod3.z.object({
|
|
|
13015
13060
|
"The generated summary of the media file, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details."
|
|
13016
13061
|
),
|
|
13017
13062
|
summary_model: import_zod3.z.string().nullish().describe(
|
|
13018
|
-
"The Summarization model used to generate the summary,\nif [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts
|
|
13063
|
+
"The Summarization model used to generate the summary,\nif [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details.\n"
|
|
13019
13064
|
),
|
|
13020
13065
|
summary_type: import_zod3.z.string().nullish().describe(
|
|
13021
|
-
"The type of summary generated, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts
|
|
13066
|
+
"The type of summary generated, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details."
|
|
13022
13067
|
),
|
|
13023
|
-
remove_audio_tags: import_zod3.z.enum(["all"]).describe(
|
|
13024
|
-
|
|
13068
|
+
remove_audio_tags: import_zod3.z.enum(["all", "speaker"]).describe(
|
|
13069
|
+
'Universal-3 Pro generates rich transcripts that can include inline annotations such as audio event markers and speaker cues. Set to `"all"` to remove all inline annotations, or `"speaker"` to remove only speaker cues while keeping other annotations.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n'
|
|
13025
13070
|
).or(import_zod3.z.null()).optional().describe(
|
|
13026
|
-
|
|
13071
|
+
'Universal-3 Pro generates rich transcripts that can include inline annotations such as audio event markers and speaker cues. Set to `"all"` to remove all inline annotations, or `"speaker"` to remove only speaker cues while keeping other annotations.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n'
|
|
13027
13072
|
),
|
|
13028
13073
|
temperature: import_zod3.z.number().nullish().describe(
|
|
13029
13074
|
"The temperature that was used for the model's response. See the [Prompting Guide](https://www.assemblyai.com/docs/pre-recorded-audio/prompting) for more details.\n\nNote: This parameter can only be used with the Universal-3 Pro model.\n"
|
|
13030
13075
|
),
|
|
13031
13076
|
text: import_zod3.z.string().nullish().describe("The textual transcript of your media file"),
|
|
13077
|
+
unredacted_text: import_zod3.z.string().nullish().describe(
|
|
13078
|
+
"The original textual transcript of your media file before PII redaction was applied. Only returned when `redact_pii_return_unredacted` was set to `true` on the transcription request, otherwise this field is omitted and the `text` field remains fully redacted. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
|
|
13079
|
+
),
|
|
13032
13080
|
throttled: import_zod3.z.boolean().nullish().describe(
|
|
13033
13081
|
"True while a request is throttled and false when a request is no longer throttled"
|
|
13034
13082
|
),
|
|
@@ -13065,6 +13113,39 @@ var createTranscriptResponse = import_zod3.z.object({
|
|
|
13065
13113
|
).nullish().describe(
|
|
13066
13114
|
"When multichannel or speaker_labels is enabled, a list of turn-by-turn utterance objects.\nSee [Speaker diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) and [Multichannel transcription](https://www.assemblyai.com/docs/pre-recorded-audio/transcribe-multiple-audio-channels) for more information.\n"
|
|
13067
13115
|
),
|
|
13116
|
+
unredacted_utterances: import_zod3.z.array(
|
|
13117
|
+
import_zod3.z.object({
|
|
13118
|
+
confidence: import_zod3.z.number().describe("The confidence score for the transcript of this utterance"),
|
|
13119
|
+
start: import_zod3.z.number().describe("The starting time, in milliseconds, of the utterance in the audio file"),
|
|
13120
|
+
end: import_zod3.z.number().describe("The ending time, in milliseconds, of the utterance in the audio file"),
|
|
13121
|
+
text: import_zod3.z.string().describe("The text for this utterance"),
|
|
13122
|
+
words: import_zod3.z.array(
|
|
13123
|
+
import_zod3.z.object({
|
|
13124
|
+
confidence: import_zod3.z.number().describe("The confidence score for the transcript of this word"),
|
|
13125
|
+
start: import_zod3.z.number().describe("The starting time, in milliseconds, for the word"),
|
|
13126
|
+
end: import_zod3.z.number().describe("The ending time, in milliseconds, for the word"),
|
|
13127
|
+
text: import_zod3.z.string().describe("The text of the word"),
|
|
13128
|
+
channel: import_zod3.z.string().nullish().describe(
|
|
13129
|
+
"The channel of the word. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
|
|
13130
|
+
),
|
|
13131
|
+
speaker: import_zod3.z.string().nullable().describe(
|
|
13132
|
+
"The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) is enabled, else null"
|
|
13133
|
+
)
|
|
13134
|
+
})
|
|
13135
|
+
).describe("The words in the utterance."),
|
|
13136
|
+
channel: import_zod3.z.string().nullish().describe(
|
|
13137
|
+
"The channel of this utterance. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
|
|
13138
|
+
),
|
|
13139
|
+
speaker: import_zod3.z.string().describe(
|
|
13140
|
+
'The speaker of this utterance, where each speaker is assigned a sequential capital letter - e.g. "A" for Speaker A, "B" for Speaker B, etc.'
|
|
13141
|
+
),
|
|
13142
|
+
translated_texts: import_zod3.z.record(import_zod3.z.string(), import_zod3.z.string()).optional().describe(
|
|
13143
|
+
'Translations keyed by language code (e.g., `{"es": "Texto traducido", "de": "\xDCbersetzter Text"}`). Only present when `match_original_utterance` is enabled with translation.'
|
|
13144
|
+
)
|
|
13145
|
+
})
|
|
13146
|
+
).nullish().describe(
|
|
13147
|
+
"The original turn-by-turn utterance objects before PII redaction was applied. Same shape as `utterances`. Only returned when `redact_pii_return_unredacted` was set to `true` on the transcription request, otherwise this field is omitted and the `utterances` field remains fully redacted. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
|
|
13148
|
+
),
|
|
13068
13149
|
webhook_auth: import_zod3.z.boolean().describe(
|
|
13069
13150
|
"Whether [webhook](https://www.assemblyai.com/docs/deployment/webhooks-for-pre-recorded-audio) authentication details were provided"
|
|
13070
13151
|
),
|
|
@@ -13093,6 +13174,22 @@ var createTranscriptResponse = import_zod3.z.object({
|
|
|
13093
13174
|
).nullish().describe(
|
|
13094
13175
|
"An array of temporally-sequential word objects, one for each word in the transcript.\n"
|
|
13095
13176
|
),
|
|
13177
|
+
unredacted_words: import_zod3.z.array(
|
|
13178
|
+
import_zod3.z.object({
|
|
13179
|
+
confidence: import_zod3.z.number().describe("The confidence score for the transcript of this word"),
|
|
13180
|
+
start: import_zod3.z.number().describe("The starting time, in milliseconds, for the word"),
|
|
13181
|
+
end: import_zod3.z.number().describe("The ending time, in milliseconds, for the word"),
|
|
13182
|
+
text: import_zod3.z.string().describe("The text of the word"),
|
|
13183
|
+
channel: import_zod3.z.string().nullish().describe(
|
|
13184
|
+
"The channel of the word. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
|
|
13185
|
+
),
|
|
13186
|
+
speaker: import_zod3.z.string().nullable().describe(
|
|
13187
|
+
"The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) is enabled, else null"
|
|
13188
|
+
)
|
|
13189
|
+
})
|
|
13190
|
+
).nullish().describe(
|
|
13191
|
+
"The original temporally-sequential word objects before PII redaction was applied. Same shape as `words`. Only returned when `redact_pii_return_unredacted` was set to `true` on the transcription request, otherwise this field is omitted and the `words` field remains fully redacted. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
|
|
13192
|
+
),
|
|
13096
13193
|
acoustic_model: import_zod3.z.string().describe("This parameter does not currently have any functionality attached to it."),
|
|
13097
13194
|
custom_topics: import_zod3.z.boolean().nullish().describe("This parameter does not currently have any functionality attached to it."),
|
|
13098
13195
|
language_model: import_zod3.z.string().describe("This parameter does not currently have any functionality attached to it."),
|
|
@@ -13268,7 +13365,7 @@ var getTranscriptResponse = import_zod3.z.object({
|
|
|
13268
13365
|
"Customize how words are spelled and formatted using to and from values. See [Custom Spelling](https://www.assemblyai.com/docs/pre-recorded-audio/correct-spelling-of-terms) for more details."
|
|
13269
13366
|
),
|
|
13270
13367
|
disfluencies: import_zod3.z.boolean().nullish().describe(
|
|
13271
|
-
'Transcribe [Filler Words](https://www.assemblyai.com/docs/pre-recorded-audio/include-filler-words), like "umm", in your media file; can be true or false'
|
|
13368
|
+
'Transcribe [Filler Words](https://www.assemblyai.com/docs/pre-recorded-audio/include-filler-words), like "umm", in your media file; can be true or false. Supported on Universal-3 Pro and Universal-2.'
|
|
13272
13369
|
),
|
|
13273
13370
|
domain: import_zod3.z.string().nullish().describe(
|
|
13274
13371
|
'The domain-specific model applied to the transcript. When set to `"medical-v1"`, [Medical Mode](https://www.assemblyai.com/docs/pre-recorded-audio/medical-mode) was used to improve accuracy for medical terminology.\n'
|
|
@@ -13291,12 +13388,20 @@ var getTranscriptResponse = import_zod3.z.object({
|
|
|
13291
13388
|
"email_address",
|
|
13292
13389
|
"event",
|
|
13293
13390
|
"filename",
|
|
13391
|
+
"gender",
|
|
13294
13392
|
"gender_sexuality",
|
|
13295
13393
|
"healthcare_number",
|
|
13296
13394
|
"injury",
|
|
13297
13395
|
"ip_address",
|
|
13298
13396
|
"language",
|
|
13299
13397
|
"location",
|
|
13398
|
+
"location_address",
|
|
13399
|
+
"location_address_street",
|
|
13400
|
+
"location_city",
|
|
13401
|
+
"location_coordinate",
|
|
13402
|
+
"location_country",
|
|
13403
|
+
"location_state",
|
|
13404
|
+
"location_zip",
|
|
13300
13405
|
"marital_status",
|
|
13301
13406
|
"medical_condition",
|
|
13302
13407
|
"medical_process",
|
|
@@ -13305,6 +13410,7 @@ var getTranscriptResponse = import_zod3.z.object({
|
|
|
13305
13410
|
"number_sequence",
|
|
13306
13411
|
"occupation",
|
|
13307
13412
|
"organization",
|
|
13413
|
+
"organization_medical_facility",
|
|
13308
13414
|
"passport_number",
|
|
13309
13415
|
"password",
|
|
13310
13416
|
"person_age",
|
|
@@ -13313,6 +13419,7 @@ var getTranscriptResponse = import_zod3.z.object({
|
|
|
13313
13419
|
"physical_attribute",
|
|
13314
13420
|
"political_affiliation",
|
|
13315
13421
|
"religion",
|
|
13422
|
+
"sexuality",
|
|
13316
13423
|
"statistics",
|
|
13317
13424
|
"time",
|
|
13318
13425
|
"url",
|
|
@@ -13617,6 +13724,24 @@ var getTranscriptResponse = import_zod3.z.object({
|
|
|
13617
13724
|
}).optional().describe(
|
|
13618
13725
|
"Specify options for [Automatic Language Detection](https://www.assemblyai.com/docs/pre-recorded-audio/language-detection)."
|
|
13619
13726
|
),
|
|
13727
|
+
metadata: import_zod3.z.object({
|
|
13728
|
+
domain_used: import_zod3.z.string().nullish().describe(
|
|
13729
|
+
'The domain-specific model that was applied to the transcription (for example, `"medical-v1"` when [Medical Mode](https://www.assemblyai.com/docs/pre-recorded-audio/medical-mode) was applied), or `null` if no domain-specific model was used. Always present when `metadata` is present.\n'
|
|
13730
|
+
),
|
|
13731
|
+
warnings: import_zod3.z.array(
|
|
13732
|
+
import_zod3.z.object({
|
|
13733
|
+
message: import_zod3.z.string().describe("A human-readable description of the warning.")
|
|
13734
|
+
}).describe(
|
|
13735
|
+
"A warning message emitted while processing a transcription request. Warnings are surfaced on the transcript response under `metadata.warnings`.\n"
|
|
13736
|
+
)
|
|
13737
|
+
).optional().describe(
|
|
13738
|
+
"Warning messages emitted while processing the request. Each warning is an object with a human-readable `message`. When there are no warnings to report, this field is omitted from the `metadata` object entirely.\n"
|
|
13739
|
+
)
|
|
13740
|
+
}).describe(
|
|
13741
|
+
"Additional metadata about the transcription returned on the `Transcript` object under `metadata`. Only present when there is information to report \u2014 when all of its fields would be empty, the `metadata` object is omitted from the response entirely.\n"
|
|
13742
|
+
).or(import_zod3.z.null()).optional().describe(
|
|
13743
|
+
"Additional metadata about the transcription, including any warnings emitted while processing the request. Only present when there is information to report; if no fields would be populated, `metadata` is omitted from the response entirely.\n"
|
|
13744
|
+
),
|
|
13620
13745
|
multichannel: import_zod3.z.boolean().nullish().describe(
|
|
13621
13746
|
"Whether [Multichannel transcription](https://www.assemblyai.com/docs/pre-recorded-audio/transcribe-multiple-audio-channels) was enabled in the transcription request, either true or false"
|
|
13622
13747
|
),
|
|
@@ -13664,12 +13789,20 @@ var getTranscriptResponse = import_zod3.z.object({
|
|
|
13664
13789
|
"email_address",
|
|
13665
13790
|
"event",
|
|
13666
13791
|
"filename",
|
|
13792
|
+
"gender",
|
|
13667
13793
|
"gender_sexuality",
|
|
13668
13794
|
"healthcare_number",
|
|
13669
13795
|
"injury",
|
|
13670
13796
|
"ip_address",
|
|
13671
13797
|
"language",
|
|
13672
13798
|
"location",
|
|
13799
|
+
"location_address",
|
|
13800
|
+
"location_address_street",
|
|
13801
|
+
"location_city",
|
|
13802
|
+
"location_coordinate",
|
|
13803
|
+
"location_country",
|
|
13804
|
+
"location_state",
|
|
13805
|
+
"location_zip",
|
|
13673
13806
|
"marital_status",
|
|
13674
13807
|
"medical_condition",
|
|
13675
13808
|
"medical_process",
|
|
@@ -13678,6 +13811,7 @@ var getTranscriptResponse = import_zod3.z.object({
|
|
|
13678
13811
|
"number_sequence",
|
|
13679
13812
|
"occupation",
|
|
13680
13813
|
"organization",
|
|
13814
|
+
"organization_medical_facility",
|
|
13681
13815
|
"passport_number",
|
|
13682
13816
|
"password",
|
|
13683
13817
|
"person_age",
|
|
@@ -13686,6 +13820,7 @@ var getTranscriptResponse = import_zod3.z.object({
|
|
|
13686
13820
|
"physical_attribute",
|
|
13687
13821
|
"political_affiliation",
|
|
13688
13822
|
"religion",
|
|
13823
|
+
"sexuality",
|
|
13689
13824
|
"statistics",
|
|
13690
13825
|
"time",
|
|
13691
13826
|
"url",
|
|
@@ -13693,12 +13828,17 @@ var getTranscriptResponse = import_zod3.z.object({
|
|
|
13693
13828
|
"username",
|
|
13694
13829
|
"vehicle_id",
|
|
13695
13830
|
"zodiac_sign"
|
|
13696
|
-
]).describe(
|
|
13831
|
+
]).describe(
|
|
13832
|
+
"The type of PII to redact. See [PII redaction](https://www.assemblyai.com/docs/streaming/pii-redaction) for the full list of policies and their descriptions."
|
|
13833
|
+
)
|
|
13697
13834
|
).nullish().describe(
|
|
13698
13835
|
"The list of PII Redaction policies that were enabled, if PII Redaction is enabled.\nSee [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
|
|
13699
13836
|
),
|
|
13700
13837
|
redact_pii_sub: import_zod3.z.enum(["entity_name", "hash"]).optional().describe(
|
|
13701
|
-
"The replacement logic for detected PII, can be `entity_name` or `hash`. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more details."
|
|
13838
|
+
"The replacement logic for detected PII, can be `entity_name` or `hash`. See [PII redaction](https://www.assemblyai.com/docs/streaming/pii-redaction) for more details."
|
|
13839
|
+
),
|
|
13840
|
+
redact_pii_return_unredacted: import_zod3.z.boolean().nullish().describe(
|
|
13841
|
+
"Whether the original unredacted transcript was also returned alongside the redacted one. When `true`, the response includes `unredacted_text`, `unredacted_words`, and `unredacted_utterances`. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
|
|
13702
13842
|
),
|
|
13703
13843
|
sentiment_analysis: import_zod3.z.boolean().nullish().describe(
|
|
13704
13844
|
"Whether [Sentiment Analysis](https://www.assemblyai.com/docs/speech-understanding/analyze-sentiment-of-speech) is enabled, can be true or false"
|
|
@@ -13835,20 +13975,23 @@ var getTranscriptResponse = import_zod3.z.object({
|
|
|
13835
13975
|
"The generated summary of the media file, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details."
|
|
13836
13976
|
),
|
|
13837
13977
|
summary_model: import_zod3.z.string().nullish().describe(
|
|
13838
|
-
"The Summarization model used to generate the summary,\nif [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts
|
|
13978
|
+
"The Summarization model used to generate the summary,\nif [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details.\n"
|
|
13839
13979
|
),
|
|
13840
13980
|
summary_type: import_zod3.z.string().nullish().describe(
|
|
13841
|
-
"The type of summary generated, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts
|
|
13981
|
+
"The type of summary generated, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details."
|
|
13842
13982
|
),
|
|
13843
|
-
remove_audio_tags: import_zod3.z.enum(["all"]).describe(
|
|
13844
|
-
|
|
13983
|
+
remove_audio_tags: import_zod3.z.enum(["all", "speaker"]).describe(
|
|
13984
|
+
'Universal-3 Pro generates rich transcripts that can include inline annotations such as audio event markers and speaker cues. Set to `"all"` to remove all inline annotations, or `"speaker"` to remove only speaker cues while keeping other annotations.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n'
|
|
13845
13985
|
).or(import_zod3.z.null()).optional().describe(
|
|
13846
|
-
|
|
13986
|
+
'Universal-3 Pro generates rich transcripts that can include inline annotations such as audio event markers and speaker cues. Set to `"all"` to remove all inline annotations, or `"speaker"` to remove only speaker cues while keeping other annotations.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n'
|
|
13847
13987
|
),
|
|
13848
13988
|
temperature: import_zod3.z.number().nullish().describe(
|
|
13849
13989
|
"The temperature that was used for the model's response. See the [Prompting Guide](https://www.assemblyai.com/docs/pre-recorded-audio/prompting) for more details.\n\nNote: This parameter can only be used with the Universal-3 Pro model.\n"
|
|
13850
13990
|
),
|
|
13851
13991
|
text: import_zod3.z.string().nullish().describe("The textual transcript of your media file"),
|
|
13992
|
+
unredacted_text: import_zod3.z.string().nullish().describe(
|
|
13993
|
+
"The original textual transcript of your media file before PII redaction was applied. Only returned when `redact_pii_return_unredacted` was set to `true` on the transcription request, otherwise this field is omitted and the `text` field remains fully redacted. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
|
|
13994
|
+
),
|
|
13852
13995
|
throttled: import_zod3.z.boolean().nullish().describe(
|
|
13853
13996
|
"True while a request is throttled and false when a request is no longer throttled"
|
|
13854
13997
|
),
|
|
@@ -13885,6 +14028,39 @@ var getTranscriptResponse = import_zod3.z.object({
|
|
|
13885
14028
|
).nullish().describe(
|
|
13886
14029
|
"When multichannel or speaker_labels is enabled, a list of turn-by-turn utterance objects.\nSee [Speaker diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) and [Multichannel transcription](https://www.assemblyai.com/docs/pre-recorded-audio/transcribe-multiple-audio-channels) for more information.\n"
|
|
13887
14030
|
),
|
|
14031
|
+
unredacted_utterances: import_zod3.z.array(
|
|
14032
|
+
import_zod3.z.object({
|
|
14033
|
+
confidence: import_zod3.z.number().describe("The confidence score for the transcript of this utterance"),
|
|
14034
|
+
start: import_zod3.z.number().describe("The starting time, in milliseconds, of the utterance in the audio file"),
|
|
14035
|
+
end: import_zod3.z.number().describe("The ending time, in milliseconds, of the utterance in the audio file"),
|
|
14036
|
+
text: import_zod3.z.string().describe("The text for this utterance"),
|
|
14037
|
+
words: import_zod3.z.array(
|
|
14038
|
+
import_zod3.z.object({
|
|
14039
|
+
confidence: import_zod3.z.number().describe("The confidence score for the transcript of this word"),
|
|
14040
|
+
start: import_zod3.z.number().describe("The starting time, in milliseconds, for the word"),
|
|
14041
|
+
end: import_zod3.z.number().describe("The ending time, in milliseconds, for the word"),
|
|
14042
|
+
text: import_zod3.z.string().describe("The text of the word"),
|
|
14043
|
+
channel: import_zod3.z.string().nullish().describe(
|
|
14044
|
+
"The channel of the word. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
|
|
14045
|
+
),
|
|
14046
|
+
speaker: import_zod3.z.string().nullable().describe(
|
|
14047
|
+
"The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) is enabled, else null"
|
|
14048
|
+
)
|
|
14049
|
+
})
|
|
14050
|
+
).describe("The words in the utterance."),
|
|
14051
|
+
channel: import_zod3.z.string().nullish().describe(
|
|
14052
|
+
"The channel of this utterance. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
|
|
14053
|
+
),
|
|
14054
|
+
speaker: import_zod3.z.string().describe(
|
|
14055
|
+
'The speaker of this utterance, where each speaker is assigned a sequential capital letter - e.g. "A" for Speaker A, "B" for Speaker B, etc.'
|
|
14056
|
+
),
|
|
14057
|
+
translated_texts: import_zod3.z.record(import_zod3.z.string(), import_zod3.z.string()).optional().describe(
|
|
14058
|
+
'Translations keyed by language code (e.g., `{"es": "Texto traducido", "de": "\xDCbersetzter Text"}`). Only present when `match_original_utterance` is enabled with translation.'
|
|
14059
|
+
)
|
|
14060
|
+
})
|
|
14061
|
+
).nullish().describe(
|
|
14062
|
+
"The original turn-by-turn utterance objects before PII redaction was applied. Same shape as `utterances`. Only returned when `redact_pii_return_unredacted` was set to `true` on the transcription request, otherwise this field is omitted and the `utterances` field remains fully redacted. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
|
|
14063
|
+
),
|
|
13888
14064
|
webhook_auth: import_zod3.z.boolean().describe(
|
|
13889
14065
|
"Whether [webhook](https://www.assemblyai.com/docs/deployment/webhooks-for-pre-recorded-audio) authentication details were provided"
|
|
13890
14066
|
),
|
|
@@ -13913,6 +14089,22 @@ var getTranscriptResponse = import_zod3.z.object({
|
|
|
13913
14089
|
).nullish().describe(
|
|
13914
14090
|
"An array of temporally-sequential word objects, one for each word in the transcript.\n"
|
|
13915
14091
|
),
|
|
14092
|
+
unredacted_words: import_zod3.z.array(
|
|
14093
|
+
import_zod3.z.object({
|
|
14094
|
+
confidence: import_zod3.z.number().describe("The confidence score for the transcript of this word"),
|
|
14095
|
+
start: import_zod3.z.number().describe("The starting time, in milliseconds, for the word"),
|
|
14096
|
+
end: import_zod3.z.number().describe("The ending time, in milliseconds, for the word"),
|
|
14097
|
+
text: import_zod3.z.string().describe("The text of the word"),
|
|
14098
|
+
channel: import_zod3.z.string().nullish().describe(
|
|
14099
|
+
"The channel of the word. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
|
|
14100
|
+
),
|
|
14101
|
+
speaker: import_zod3.z.string().nullable().describe(
|
|
14102
|
+
"The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) is enabled, else null"
|
|
14103
|
+
)
|
|
14104
|
+
})
|
|
14105
|
+
).nullish().describe(
|
|
14106
|
+
"The original temporally-sequential word objects before PII redaction was applied. Same shape as `words`. Only returned when `redact_pii_return_unredacted` was set to `true` on the transcription request, otherwise this field is omitted and the `words` field remains fully redacted. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
|
|
14107
|
+
),
|
|
13916
14108
|
acoustic_model: import_zod3.z.string().describe("This parameter does not currently have any functionality attached to it."),
|
|
13917
14109
|
custom_topics: import_zod3.z.boolean().nullish().describe("This parameter does not currently have any functionality attached to it."),
|
|
13918
14110
|
language_model: import_zod3.z.string().describe("This parameter does not currently have any functionality attached to it."),
|
|
@@ -14048,7 +14240,7 @@ var deleteTranscriptResponse = import_zod3.z.object({
|
|
|
14048
14240
|
"Customize how words are spelled and formatted using to and from values. See [Custom Spelling](https://www.assemblyai.com/docs/pre-recorded-audio/correct-spelling-of-terms) for more details."
|
|
14049
14241
|
),
|
|
14050
14242
|
disfluencies: import_zod3.z.boolean().nullish().describe(
|
|
14051
|
-
'Transcribe [Filler Words](https://www.assemblyai.com/docs/pre-recorded-audio/include-filler-words), like "umm", in your media file; can be true or false'
|
|
14243
|
+
'Transcribe [Filler Words](https://www.assemblyai.com/docs/pre-recorded-audio/include-filler-words), like "umm", in your media file; can be true or false. Supported on Universal-3 Pro and Universal-2.'
|
|
14052
14244
|
),
|
|
14053
14245
|
domain: import_zod3.z.string().nullish().describe(
|
|
14054
14246
|
'The domain-specific model applied to the transcript. When set to `"medical-v1"`, [Medical Mode](https://www.assemblyai.com/docs/pre-recorded-audio/medical-mode) was used to improve accuracy for medical terminology.\n'
|
|
@@ -14071,12 +14263,20 @@ var deleteTranscriptResponse = import_zod3.z.object({
|
|
|
14071
14263
|
"email_address",
|
|
14072
14264
|
"event",
|
|
14073
14265
|
"filename",
|
|
14266
|
+
"gender",
|
|
14074
14267
|
"gender_sexuality",
|
|
14075
14268
|
"healthcare_number",
|
|
14076
14269
|
"injury",
|
|
14077
14270
|
"ip_address",
|
|
14078
14271
|
"language",
|
|
14079
14272
|
"location",
|
|
14273
|
+
"location_address",
|
|
14274
|
+
"location_address_street",
|
|
14275
|
+
"location_city",
|
|
14276
|
+
"location_coordinate",
|
|
14277
|
+
"location_country",
|
|
14278
|
+
"location_state",
|
|
14279
|
+
"location_zip",
|
|
14080
14280
|
"marital_status",
|
|
14081
14281
|
"medical_condition",
|
|
14082
14282
|
"medical_process",
|
|
@@ -14085,6 +14285,7 @@ var deleteTranscriptResponse = import_zod3.z.object({
|
|
|
14085
14285
|
"number_sequence",
|
|
14086
14286
|
"occupation",
|
|
14087
14287
|
"organization",
|
|
14288
|
+
"organization_medical_facility",
|
|
14088
14289
|
"passport_number",
|
|
14089
14290
|
"password",
|
|
14090
14291
|
"person_age",
|
|
@@ -14093,6 +14294,7 @@ var deleteTranscriptResponse = import_zod3.z.object({
|
|
|
14093
14294
|
"physical_attribute",
|
|
14094
14295
|
"political_affiliation",
|
|
14095
14296
|
"religion",
|
|
14297
|
+
"sexuality",
|
|
14096
14298
|
"statistics",
|
|
14097
14299
|
"time",
|
|
14098
14300
|
"url",
|
|
@@ -14397,6 +14599,24 @@ var deleteTranscriptResponse = import_zod3.z.object({
|
|
|
14397
14599
|
}).optional().describe(
|
|
14398
14600
|
"Specify options for [Automatic Language Detection](https://www.assemblyai.com/docs/pre-recorded-audio/language-detection)."
|
|
14399
14601
|
),
|
|
14602
|
+
metadata: import_zod3.z.object({
|
|
14603
|
+
domain_used: import_zod3.z.string().nullish().describe(
|
|
14604
|
+
'The domain-specific model that was applied to the transcription (for example, `"medical-v1"` when [Medical Mode](https://www.assemblyai.com/docs/pre-recorded-audio/medical-mode) was applied), or `null` if no domain-specific model was used. Always present when `metadata` is present.\n'
|
|
14605
|
+
),
|
|
14606
|
+
warnings: import_zod3.z.array(
|
|
14607
|
+
import_zod3.z.object({
|
|
14608
|
+
message: import_zod3.z.string().describe("A human-readable description of the warning.")
|
|
14609
|
+
}).describe(
|
|
14610
|
+
"A warning message emitted while processing a transcription request. Warnings are surfaced on the transcript response under `metadata.warnings`.\n"
|
|
14611
|
+
)
|
|
14612
|
+
).optional().describe(
|
|
14613
|
+
"Warning messages emitted while processing the request. Each warning is an object with a human-readable `message`. When there are no warnings to report, this field is omitted from the `metadata` object entirely.\n"
|
|
14614
|
+
)
|
|
14615
|
+
}).describe(
|
|
14616
|
+
"Additional metadata about the transcription returned on the `Transcript` object under `metadata`. Only present when there is information to report \u2014 when all of its fields would be empty, the `metadata` object is omitted from the response entirely.\n"
|
|
14617
|
+
).or(import_zod3.z.null()).optional().describe(
|
|
14618
|
+
"Additional metadata about the transcription, including any warnings emitted while processing the request. Only present when there is information to report; if no fields would be populated, `metadata` is omitted from the response entirely.\n"
|
|
14619
|
+
),
|
|
14400
14620
|
multichannel: import_zod3.z.boolean().nullish().describe(
|
|
14401
14621
|
"Whether [Multichannel transcription](https://www.assemblyai.com/docs/pre-recorded-audio/transcribe-multiple-audio-channels) was enabled in the transcription request, either true or false"
|
|
14402
14622
|
),
|
|
@@ -14444,12 +14664,20 @@ var deleteTranscriptResponse = import_zod3.z.object({
|
|
|
14444
14664
|
"email_address",
|
|
14445
14665
|
"event",
|
|
14446
14666
|
"filename",
|
|
14667
|
+
"gender",
|
|
14447
14668
|
"gender_sexuality",
|
|
14448
14669
|
"healthcare_number",
|
|
14449
14670
|
"injury",
|
|
14450
14671
|
"ip_address",
|
|
14451
14672
|
"language",
|
|
14452
14673
|
"location",
|
|
14674
|
+
"location_address",
|
|
14675
|
+
"location_address_street",
|
|
14676
|
+
"location_city",
|
|
14677
|
+
"location_coordinate",
|
|
14678
|
+
"location_country",
|
|
14679
|
+
"location_state",
|
|
14680
|
+
"location_zip",
|
|
14453
14681
|
"marital_status",
|
|
14454
14682
|
"medical_condition",
|
|
14455
14683
|
"medical_process",
|
|
@@ -14458,6 +14686,7 @@ var deleteTranscriptResponse = import_zod3.z.object({
|
|
|
14458
14686
|
"number_sequence",
|
|
14459
14687
|
"occupation",
|
|
14460
14688
|
"organization",
|
|
14689
|
+
"organization_medical_facility",
|
|
14461
14690
|
"passport_number",
|
|
14462
14691
|
"password",
|
|
14463
14692
|
"person_age",
|
|
@@ -14466,6 +14695,7 @@ var deleteTranscriptResponse = import_zod3.z.object({
|
|
|
14466
14695
|
"physical_attribute",
|
|
14467
14696
|
"political_affiliation",
|
|
14468
14697
|
"religion",
|
|
14698
|
+
"sexuality",
|
|
14469
14699
|
"statistics",
|
|
14470
14700
|
"time",
|
|
14471
14701
|
"url",
|
|
@@ -14473,12 +14703,17 @@ var deleteTranscriptResponse = import_zod3.z.object({
|
|
|
14473
14703
|
"username",
|
|
14474
14704
|
"vehicle_id",
|
|
14475
14705
|
"zodiac_sign"
|
|
14476
|
-
]).describe(
|
|
14706
|
+
]).describe(
|
|
14707
|
+
"The type of PII to redact. See [PII redaction](https://www.assemblyai.com/docs/streaming/pii-redaction) for the full list of policies and their descriptions."
|
|
14708
|
+
)
|
|
14477
14709
|
).nullish().describe(
|
|
14478
14710
|
"The list of PII Redaction policies that were enabled, if PII Redaction is enabled.\nSee [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
|
|
14479
14711
|
),
|
|
14480
14712
|
redact_pii_sub: import_zod3.z.enum(["entity_name", "hash"]).optional().describe(
|
|
14481
|
-
"The replacement logic for detected PII, can be `entity_name` or `hash`. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more details."
|
|
14713
|
+
"The replacement logic for detected PII, can be `entity_name` or `hash`. See [PII redaction](https://www.assemblyai.com/docs/streaming/pii-redaction) for more details."
|
|
14714
|
+
),
|
|
14715
|
+
redact_pii_return_unredacted: import_zod3.z.boolean().nullish().describe(
|
|
14716
|
+
"Whether the original unredacted transcript was also returned alongside the redacted one. When `true`, the response includes `unredacted_text`, `unredacted_words`, and `unredacted_utterances`. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
|
|
14482
14717
|
),
|
|
14483
14718
|
sentiment_analysis: import_zod3.z.boolean().nullish().describe(
|
|
14484
14719
|
"Whether [Sentiment Analysis](https://www.assemblyai.com/docs/speech-understanding/analyze-sentiment-of-speech) is enabled, can be true or false"
|
|
@@ -14615,20 +14850,23 @@ var deleteTranscriptResponse = import_zod3.z.object({
|
|
|
14615
14850
|
"The generated summary of the media file, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details."
|
|
14616
14851
|
),
|
|
14617
14852
|
summary_model: import_zod3.z.string().nullish().describe(
|
|
14618
|
-
"The Summarization model used to generate the summary,\nif [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts
|
|
14853
|
+
"The Summarization model used to generate the summary,\nif [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details.\n"
|
|
14619
14854
|
),
|
|
14620
14855
|
summary_type: import_zod3.z.string().nullish().describe(
|
|
14621
|
-
"The type of summary generated, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts
|
|
14856
|
+
"The type of summary generated, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details."
|
|
14622
14857
|
),
|
|
14623
|
-
remove_audio_tags: import_zod3.z.enum(["all"]).describe(
|
|
14624
|
-
|
|
14858
|
+
remove_audio_tags: import_zod3.z.enum(["all", "speaker"]).describe(
|
|
14859
|
+
'Universal-3 Pro generates rich transcripts that can include inline annotations such as audio event markers and speaker cues. Set to `"all"` to remove all inline annotations, or `"speaker"` to remove only speaker cues while keeping other annotations.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n'
|
|
14625
14860
|
).or(import_zod3.z.null()).optional().describe(
|
|
14626
|
-
|
|
14861
|
+
'Universal-3 Pro generates rich transcripts that can include inline annotations such as audio event markers and speaker cues. Set to `"all"` to remove all inline annotations, or `"speaker"` to remove only speaker cues while keeping other annotations.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n'
|
|
14627
14862
|
),
|
|
14628
14863
|
temperature: import_zod3.z.number().nullish().describe(
|
|
14629
14864
|
"The temperature that was used for the model's response. See the [Prompting Guide](https://www.assemblyai.com/docs/pre-recorded-audio/prompting) for more details.\n\nNote: This parameter can only be used with the Universal-3 Pro model.\n"
|
|
14630
14865
|
),
|
|
14631
14866
|
text: import_zod3.z.string().nullish().describe("The textual transcript of your media file"),
|
|
14867
|
+
unredacted_text: import_zod3.z.string().nullish().describe(
|
|
14868
|
+
"The original textual transcript of your media file before PII redaction was applied. Only returned when `redact_pii_return_unredacted` was set to `true` on the transcription request, otherwise this field is omitted and the `text` field remains fully redacted. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
|
|
14869
|
+
),
|
|
14632
14870
|
throttled: import_zod3.z.boolean().nullish().describe(
|
|
14633
14871
|
"True while a request is throttled and false when a request is no longer throttled"
|
|
14634
14872
|
),
|
|
@@ -14665,6 +14903,39 @@ var deleteTranscriptResponse = import_zod3.z.object({
|
|
|
14665
14903
|
).nullish().describe(
|
|
14666
14904
|
"When multichannel or speaker_labels is enabled, a list of turn-by-turn utterance objects.\nSee [Speaker diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) and [Multichannel transcription](https://www.assemblyai.com/docs/pre-recorded-audio/transcribe-multiple-audio-channels) for more information.\n"
|
|
14667
14905
|
),
|
|
14906
|
+
unredacted_utterances: import_zod3.z.array(
|
|
14907
|
+
import_zod3.z.object({
|
|
14908
|
+
confidence: import_zod3.z.number().describe("The confidence score for the transcript of this utterance"),
|
|
14909
|
+
start: import_zod3.z.number().describe("The starting time, in milliseconds, of the utterance in the audio file"),
|
|
14910
|
+
end: import_zod3.z.number().describe("The ending time, in milliseconds, of the utterance in the audio file"),
|
|
14911
|
+
text: import_zod3.z.string().describe("The text for this utterance"),
|
|
14912
|
+
words: import_zod3.z.array(
|
|
14913
|
+
import_zod3.z.object({
|
|
14914
|
+
confidence: import_zod3.z.number().describe("The confidence score for the transcript of this word"),
|
|
14915
|
+
start: import_zod3.z.number().describe("The starting time, in milliseconds, for the word"),
|
|
14916
|
+
end: import_zod3.z.number().describe("The ending time, in milliseconds, for the word"),
|
|
14917
|
+
text: import_zod3.z.string().describe("The text of the word"),
|
|
14918
|
+
channel: import_zod3.z.string().nullish().describe(
|
|
14919
|
+
"The channel of the word. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
|
|
14920
|
+
),
|
|
14921
|
+
speaker: import_zod3.z.string().nullable().describe(
|
|
14922
|
+
"The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) is enabled, else null"
|
|
14923
|
+
)
|
|
14924
|
+
})
|
|
14925
|
+
).describe("The words in the utterance."),
|
|
14926
|
+
channel: import_zod3.z.string().nullish().describe(
|
|
14927
|
+
"The channel of this utterance. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
|
|
14928
|
+
),
|
|
14929
|
+
speaker: import_zod3.z.string().describe(
|
|
14930
|
+
'The speaker of this utterance, where each speaker is assigned a sequential capital letter - e.g. "A" for Speaker A, "B" for Speaker B, etc.'
|
|
14931
|
+
),
|
|
14932
|
+
translated_texts: import_zod3.z.record(import_zod3.z.string(), import_zod3.z.string()).optional().describe(
|
|
14933
|
+
'Translations keyed by language code (e.g., `{"es": "Texto traducido", "de": "\xDCbersetzter Text"}`). Only present when `match_original_utterance` is enabled with translation.'
|
|
14934
|
+
)
|
|
14935
|
+
})
|
|
14936
|
+
).nullish().describe(
|
|
14937
|
+
"The original turn-by-turn utterance objects before PII redaction was applied. Same shape as `utterances`. Only returned when `redact_pii_return_unredacted` was set to `true` on the transcription request, otherwise this field is omitted and the `utterances` field remains fully redacted. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
|
|
14938
|
+
),
|
|
14668
14939
|
webhook_auth: import_zod3.z.boolean().describe(
|
|
14669
14940
|
"Whether [webhook](https://www.assemblyai.com/docs/deployment/webhooks-for-pre-recorded-audio) authentication details were provided"
|
|
14670
14941
|
),
|
|
@@ -14693,6 +14964,22 @@ var deleteTranscriptResponse = import_zod3.z.object({
|
|
|
14693
14964
|
).nullish().describe(
|
|
14694
14965
|
"An array of temporally-sequential word objects, one for each word in the transcript.\n"
|
|
14695
14966
|
),
|
|
14967
|
+
unredacted_words: import_zod3.z.array(
|
|
14968
|
+
import_zod3.z.object({
|
|
14969
|
+
confidence: import_zod3.z.number().describe("The confidence score for the transcript of this word"),
|
|
14970
|
+
start: import_zod3.z.number().describe("The starting time, in milliseconds, for the word"),
|
|
14971
|
+
end: import_zod3.z.number().describe("The ending time, in milliseconds, for the word"),
|
|
14972
|
+
text: import_zod3.z.string().describe("The text of the word"),
|
|
14973
|
+
channel: import_zod3.z.string().nullish().describe(
|
|
14974
|
+
"The channel of the word. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
|
|
14975
|
+
),
|
|
14976
|
+
speaker: import_zod3.z.string().nullable().describe(
|
|
14977
|
+
"The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) is enabled, else null"
|
|
14978
|
+
)
|
|
14979
|
+
})
|
|
14980
|
+
).nullish().describe(
|
|
14981
|
+
"The original temporally-sequential word objects before PII redaction was applied. Same shape as `words`. Only returned when `redact_pii_return_unredacted` was set to `true` on the transcription request, otherwise this field is omitted and the `words` field remains fully redacted. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
|
|
14982
|
+
),
|
|
14696
14983
|
acoustic_model: import_zod3.z.string().describe("This parameter does not currently have any functionality attached to it."),
|
|
14697
14984
|
custom_topics: import_zod3.z.boolean().nullish().describe("This parameter does not currently have any functionality attached to it."),
|
|
14698
14985
|
language_model: import_zod3.z.string().describe("This parameter does not currently have any functionality attached to it."),
|
|
@@ -14848,7 +15135,21 @@ var streamingTranscriberParams = import_zod4.z.object({
|
|
|
14848
15135
|
inactivityTimeout: import_zod4.z.number().optional().describe("From SDK v3"),
|
|
14849
15136
|
speakerLabels: import_zod4.z.boolean().optional().describe("From SDK v3"),
|
|
14850
15137
|
maxSpeakers: import_zod4.z.number().optional().describe("From SDK v3"),
|
|
14851
|
-
|
|
15138
|
+
voiceFocus: import_zod4.z.unknown().optional().describe("From SDK v3"),
|
|
15139
|
+
voiceFocusThreshold: import_zod4.z.number().optional().describe("From SDK v3"),
|
|
15140
|
+
continuousPartials: import_zod4.z.boolean().optional().describe("From SDK v3"),
|
|
15141
|
+
interruptionDelay: import_zod4.z.number().optional().describe("From SDK v3"),
|
|
15142
|
+
turnLeftPadMs: import_zod4.z.number().optional().describe("From SDK v3"),
|
|
15143
|
+
customerSupportAudioCapture: import_zod4.z.boolean().optional().describe("From SDK v3"),
|
|
15144
|
+
includePartialTurns: import_zod4.z.boolean().optional().describe("From SDK v3"),
|
|
15145
|
+
redactPii: import_zod4.z.boolean().optional().describe("From SDK v3"),
|
|
15146
|
+
redactPiiPolicies: import_zod4.z.unknown().optional().describe("From SDK v3"),
|
|
15147
|
+
redactPiiSub: import_zod4.z.unknown().optional().describe("From SDK v3"),
|
|
15148
|
+
llmGateway: import_zod4.z.unknown().optional().describe("From SDK v3"),
|
|
15149
|
+
webhookUrl: import_zod4.z.string().optional().describe("From SDK v3"),
|
|
15150
|
+
webhookAuthHeaderName: import_zod4.z.string().optional().describe("From SDK v3"),
|
|
15151
|
+
webhookAuthHeaderValue: import_zod4.z.string().optional().describe("From SDK v3"),
|
|
15152
|
+
mode: import_zod4.z.unknown().describe("From SDK v3")
|
|
14852
15153
|
});
|
|
14853
15154
|
var streamingUpdateConfigParams = import_zod4.z.object({
|
|
14854
15155
|
end_utterance_silence_threshold: import_zod4.z.number().min(0).max(2e4).optional().describe("The duration threshold in milliseconds"),
|
|
@@ -14860,7 +15161,9 @@ var streamingUpdateConfigParams = import_zod4.z.object({
|
|
|
14860
15161
|
format_turns: import_zod4.z.boolean().optional().describe("From SDK v3"),
|
|
14861
15162
|
keyterms_prompt: import_zod4.z.array(import_zod4.z.string()).optional().describe("From SDK v3"),
|
|
14862
15163
|
prompt: import_zod4.z.string().optional().describe("From SDK v3"),
|
|
14863
|
-
filter_profanity: import_zod4.z.boolean().optional().describe("From SDK v3")
|
|
15164
|
+
filter_profanity: import_zod4.z.boolean().optional().describe("From SDK v3"),
|
|
15165
|
+
interruption_delay: import_zod4.z.number().optional().describe("From SDK v3"),
|
|
15166
|
+
turn_left_pad_ms: import_zod4.z.number().optional().describe("From SDK v3")
|
|
14864
15167
|
});
|
|
14865
15168
|
|
|
14866
15169
|
// src/generated/gladia/api/gladiaControlAPI.zod.ts
|
|
@@ -15609,7 +15912,7 @@ var preRecordedControllerInitPreRecordedJobV2BodyNamedEntityRecognitionDefault =
|
|
|
15609
15912
|
var preRecordedControllerInitPreRecordedJobV2BodyCustomSpellingDefault = false;
|
|
15610
15913
|
var preRecordedControllerInitPreRecordedJobV2BodySentimentAnalysisDefault = false;
|
|
15611
15914
|
var preRecordedControllerInitPreRecordedJobV2BodyAudioToLlmDefault = false;
|
|
15612
|
-
var preRecordedControllerInitPreRecordedJobV2BodyAudioToLlmConfigModelDefault = "openai/gpt-
|
|
15915
|
+
var preRecordedControllerInitPreRecordedJobV2BodyAudioToLlmConfigModelDefault = "openai/gpt-5.4-nano";
|
|
15613
15916
|
var preRecordedControllerInitPreRecordedJobV2BodyPiiRedactionDefault = false;
|
|
15614
15917
|
var preRecordedControllerInitPreRecordedJobV2BodySentencesDefault = false;
|
|
15615
15918
|
var preRecordedControllerInitPreRecordedJobV2BodyPunctuationEnhancedDefault = false;
|
|
@@ -15898,23 +16201,23 @@ var preRecordedControllerInitPreRecordedJobV2Body = import_zod5.z.object({
|
|
|
15898
16201
|
"Forces the translation to use informal language forms when available in the target language."
|
|
15899
16202
|
)
|
|
15900
16203
|
}).optional().describe("**[Beta]** Translation configuration, if `translation` is enabled"),
|
|
15901
|
-
summarization: import_zod5.z.boolean().optional().describe("
|
|
16204
|
+
summarization: import_zod5.z.boolean().optional().describe("Enable summarization for this audio"),
|
|
15902
16205
|
summarization_config: import_zod5.z.object({
|
|
15903
16206
|
type: import_zod5.z.enum(["general", "bullet_points", "concise"]).describe("The type of summarization to apply").default(preRecordedControllerInitPreRecordedJobV2BodySummarizationConfigTypeDefault).describe("The type of summarization to apply")
|
|
15904
|
-
}).optional().describe("
|
|
16207
|
+
}).optional().describe("Summarization configuration, if `summarization` is enabled"),
|
|
15905
16208
|
named_entity_recognition: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable named entity recognition for this audio"),
|
|
15906
16209
|
custom_spelling: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable custom spelling for this audio"),
|
|
15907
16210
|
custom_spelling_config: import_zod5.z.object({
|
|
15908
16211
|
spelling_dictionary: import_zod5.z.record(import_zod5.z.string(), import_zod5.z.array(import_zod5.z.string())).describe("The list of spelling applied on the audio transcription")
|
|
15909
16212
|
}).optional().describe("**[Alpha]** Custom spelling configuration, if `custom_spelling` is enabled"),
|
|
15910
16213
|
sentiment_analysis: import_zod5.z.boolean().optional().describe("Enable sentiment analysis for this audio"),
|
|
15911
|
-
audio_to_llm: import_zod5.z.boolean().optional().describe("
|
|
16214
|
+
audio_to_llm: import_zod5.z.boolean().optional().describe("Enable audio to LLM processing for this audio"),
|
|
15912
16215
|
audio_to_llm_config: import_zod5.z.object({
|
|
15913
16216
|
prompts: import_zod5.z.array(import_zod5.z.array(import_zod5.z.unknown())).min(1).describe("The list of prompts applied on the audio transcription"),
|
|
15914
16217
|
model: import_zod5.z.string().default(preRecordedControllerInitPreRecordedJobV2BodyAudioToLlmConfigModelDefault).describe(
|
|
15915
16218
|
"The model to use for the prompt execution. You can find the list of supported models [here](https://openrouter.ai/models)."
|
|
15916
16219
|
)
|
|
15917
|
-
}).optional().describe("
|
|
16220
|
+
}).optional().describe("Audio to LLM configuration, if `audio_to_llm` is enabled"),
|
|
15918
16221
|
pii_redaction: import_zod5.z.boolean().optional().describe("Enable PII redaction for this audio"),
|
|
15919
16222
|
pii_redaction_config: import_zod5.z.object({
|
|
15920
16223
|
entity_types: import_zod5.z.enum([
|
|
@@ -16169,7 +16472,7 @@ var preRecordedControllerGetPreRecordedJobsV2ResponseItemsItemRequestParamsNamed
|
|
|
16169
16472
|
var preRecordedControllerGetPreRecordedJobsV2ResponseItemsItemRequestParamsCustomSpellingDefault = false;
|
|
16170
16473
|
var preRecordedControllerGetPreRecordedJobsV2ResponseItemsItemRequestParamsSentimentAnalysisDefault = false;
|
|
16171
16474
|
var preRecordedControllerGetPreRecordedJobsV2ResponseItemsItemRequestParamsAudioToLlmDefault = false;
|
|
16172
|
-
var preRecordedControllerGetPreRecordedJobsV2ResponseItemsItemRequestParamsAudioToLlmConfigModelDefault = "openai/gpt-
|
|
16475
|
+
var preRecordedControllerGetPreRecordedJobsV2ResponseItemsItemRequestParamsAudioToLlmConfigModelDefault = "openai/gpt-5.4-nano";
|
|
16173
16476
|
var preRecordedControllerGetPreRecordedJobsV2ResponseItemsItemRequestParamsPiiRedactionDefault = false;
|
|
16174
16477
|
var preRecordedControllerGetPreRecordedJobsV2ResponseItemsItemRequestParamsSentencesDefault = false;
|
|
16175
16478
|
var preRecordedControllerGetPreRecordedJobsV2ResponseItemsItemRequestParamsPunctuationEnhancedDefault = false;
|
|
@@ -16517,12 +16820,12 @@ var preRecordedControllerGetPreRecordedJobsV2Response = import_zod5.z.object({
|
|
|
16517
16820
|
"Forces the translation to use informal language forms when available in the target language."
|
|
16518
16821
|
)
|
|
16519
16822
|
}).optional().describe("**[Beta]** Translation configuration, if `translation` is enabled"),
|
|
16520
|
-
summarization: import_zod5.z.boolean().optional().describe("
|
|
16823
|
+
summarization: import_zod5.z.boolean().optional().describe("Enable summarization for this audio"),
|
|
16521
16824
|
summarization_config: import_zod5.z.object({
|
|
16522
16825
|
type: import_zod5.z.enum(["general", "bullet_points", "concise"]).describe("The type of summarization to apply").default(
|
|
16523
16826
|
preRecordedControllerGetPreRecordedJobsV2ResponseItemsItemRequestParamsSummarizationConfigTypeDefault
|
|
16524
16827
|
).describe("The type of summarization to apply")
|
|
16525
|
-
}).optional().describe("
|
|
16828
|
+
}).optional().describe("Summarization configuration, if `summarization` is enabled"),
|
|
16526
16829
|
named_entity_recognition: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable named entity recognition for this audio"),
|
|
16527
16830
|
custom_spelling: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable custom spelling for this audio"),
|
|
16528
16831
|
custom_spelling_config: import_zod5.z.object({
|
|
@@ -16531,7 +16834,7 @@ var preRecordedControllerGetPreRecordedJobsV2Response = import_zod5.z.object({
|
|
|
16531
16834
|
"**[Alpha]** Custom spelling configuration, if `custom_spelling` is enabled"
|
|
16532
16835
|
),
|
|
16533
16836
|
sentiment_analysis: import_zod5.z.boolean().optional().describe("Enable sentiment analysis for this audio"),
|
|
16534
|
-
audio_to_llm: import_zod5.z.boolean().optional().describe("
|
|
16837
|
+
audio_to_llm: import_zod5.z.boolean().optional().describe("Enable audio to LLM processing for this audio"),
|
|
16535
16838
|
audio_to_llm_config: import_zod5.z.object({
|
|
16536
16839
|
prompts: import_zod5.z.array(import_zod5.z.array(import_zod5.z.unknown())).min(1).describe("The list of prompts applied on the audio transcription"),
|
|
16537
16840
|
model: import_zod5.z.string().default(
|
|
@@ -16539,7 +16842,7 @@ var preRecordedControllerGetPreRecordedJobsV2Response = import_zod5.z.object({
|
|
|
16539
16842
|
).describe(
|
|
16540
16843
|
"The model to use for the prompt execution. You can find the list of supported models [here](https://openrouter.ai/models)."
|
|
16541
16844
|
)
|
|
16542
|
-
}).optional().describe("
|
|
16845
|
+
}).optional().describe("Audio to LLM configuration, if `audio_to_llm` is enabled"),
|
|
16543
16846
|
pii_redaction: import_zod5.z.boolean().optional().describe("Enable PII redaction for this audio"),
|
|
16544
16847
|
pii_redaction_config: import_zod5.z.object({
|
|
16545
16848
|
entity_types: import_zod5.z.enum([
|
|
@@ -17676,7 +17979,7 @@ var preRecordedControllerGetPreRecordedJobV2ResponseRequestParamsNamedEntityReco
|
|
|
17676
17979
|
var preRecordedControllerGetPreRecordedJobV2ResponseRequestParamsCustomSpellingDefault = false;
|
|
17677
17980
|
var preRecordedControllerGetPreRecordedJobV2ResponseRequestParamsSentimentAnalysisDefault = false;
|
|
17678
17981
|
var preRecordedControllerGetPreRecordedJobV2ResponseRequestParamsAudioToLlmDefault = false;
|
|
17679
|
-
var preRecordedControllerGetPreRecordedJobV2ResponseRequestParamsAudioToLlmConfigModelDefault = "openai/gpt-
|
|
17982
|
+
var preRecordedControllerGetPreRecordedJobV2ResponseRequestParamsAudioToLlmConfigModelDefault = "openai/gpt-5.4-nano";
|
|
17680
17983
|
var preRecordedControllerGetPreRecordedJobV2ResponseRequestParamsPiiRedactionDefault = false;
|
|
17681
17984
|
var preRecordedControllerGetPreRecordedJobV2ResponseRequestParamsSentencesDefault = false;
|
|
17682
17985
|
var preRecordedControllerGetPreRecordedJobV2ResponseRequestParamsPunctuationEnhancedDefault = false;
|
|
@@ -18017,19 +18320,19 @@ var preRecordedControllerGetPreRecordedJobV2Response = import_zod5.z.object({
|
|
|
18017
18320
|
"Forces the translation to use informal language forms when available in the target language."
|
|
18018
18321
|
)
|
|
18019
18322
|
}).optional().describe("**[Beta]** Translation configuration, if `translation` is enabled"),
|
|
18020
|
-
summarization: import_zod5.z.boolean().optional().describe("
|
|
18323
|
+
summarization: import_zod5.z.boolean().optional().describe("Enable summarization for this audio"),
|
|
18021
18324
|
summarization_config: import_zod5.z.object({
|
|
18022
18325
|
type: import_zod5.z.enum(["general", "bullet_points", "concise"]).describe("The type of summarization to apply").default(
|
|
18023
18326
|
preRecordedControllerGetPreRecordedJobV2ResponseRequestParamsSummarizationConfigTypeDefault
|
|
18024
18327
|
).describe("The type of summarization to apply")
|
|
18025
|
-
}).optional().describe("
|
|
18328
|
+
}).optional().describe("Summarization configuration, if `summarization` is enabled"),
|
|
18026
18329
|
named_entity_recognition: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable named entity recognition for this audio"),
|
|
18027
18330
|
custom_spelling: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable custom spelling for this audio"),
|
|
18028
18331
|
custom_spelling_config: import_zod5.z.object({
|
|
18029
18332
|
spelling_dictionary: import_zod5.z.record(import_zod5.z.string(), import_zod5.z.array(import_zod5.z.string())).describe("The list of spelling applied on the audio transcription")
|
|
18030
18333
|
}).optional().describe("**[Alpha]** Custom spelling configuration, if `custom_spelling` is enabled"),
|
|
18031
18334
|
sentiment_analysis: import_zod5.z.boolean().optional().describe("Enable sentiment analysis for this audio"),
|
|
18032
|
-
audio_to_llm: import_zod5.z.boolean().optional().describe("
|
|
18335
|
+
audio_to_llm: import_zod5.z.boolean().optional().describe("Enable audio to LLM processing for this audio"),
|
|
18033
18336
|
audio_to_llm_config: import_zod5.z.object({
|
|
18034
18337
|
prompts: import_zod5.z.array(import_zod5.z.array(import_zod5.z.unknown())).min(1).describe("The list of prompts applied on the audio transcription"),
|
|
18035
18338
|
model: import_zod5.z.string().default(
|
|
@@ -18037,7 +18340,7 @@ var preRecordedControllerGetPreRecordedJobV2Response = import_zod5.z.object({
|
|
|
18037
18340
|
).describe(
|
|
18038
18341
|
"The model to use for the prompt execution. You can find the list of supported models [here](https://openrouter.ai/models)."
|
|
18039
18342
|
)
|
|
18040
|
-
}).optional().describe("
|
|
18343
|
+
}).optional().describe("Audio to LLM configuration, if `audio_to_llm` is enabled"),
|
|
18041
18344
|
pii_redaction: import_zod5.z.boolean().optional().describe("Enable PII redaction for this audio"),
|
|
18042
18345
|
pii_redaction_config: import_zod5.z.object({
|
|
18043
18346
|
entity_types: import_zod5.z.enum([
|
|
@@ -19150,7 +19453,7 @@ var transcriptionControllerInitPreRecordedJobV2BodyNamedEntityRecognitionDefault
|
|
|
19150
19453
|
var transcriptionControllerInitPreRecordedJobV2BodyCustomSpellingDefault = false;
|
|
19151
19454
|
var transcriptionControllerInitPreRecordedJobV2BodySentimentAnalysisDefault = false;
|
|
19152
19455
|
var transcriptionControllerInitPreRecordedJobV2BodyAudioToLlmDefault = false;
|
|
19153
|
-
var transcriptionControllerInitPreRecordedJobV2BodyAudioToLlmConfigModelDefault = "openai/gpt-
|
|
19456
|
+
var transcriptionControllerInitPreRecordedJobV2BodyAudioToLlmConfigModelDefault = "openai/gpt-5.4-nano";
|
|
19154
19457
|
var transcriptionControllerInitPreRecordedJobV2BodyPiiRedactionDefault = false;
|
|
19155
19458
|
var transcriptionControllerInitPreRecordedJobV2BodySentencesDefault = false;
|
|
19156
19459
|
var transcriptionControllerInitPreRecordedJobV2BodyPunctuationEnhancedDefault = false;
|
|
@@ -19443,23 +19746,23 @@ var transcriptionControllerInitPreRecordedJobV2Body = import_zod5.z.object({
|
|
|
19443
19746
|
"Forces the translation to use informal language forms when available in the target language."
|
|
19444
19747
|
)
|
|
19445
19748
|
}).optional().describe("**[Beta]** Translation configuration, if `translation` is enabled"),
|
|
19446
|
-
summarization: import_zod5.z.boolean().optional().describe("
|
|
19749
|
+
summarization: import_zod5.z.boolean().optional().describe("Enable summarization for this audio"),
|
|
19447
19750
|
summarization_config: import_zod5.z.object({
|
|
19448
19751
|
type: import_zod5.z.enum(["general", "bullet_points", "concise"]).describe("The type of summarization to apply").default(transcriptionControllerInitPreRecordedJobV2BodySummarizationConfigTypeDefault).describe("The type of summarization to apply")
|
|
19449
|
-
}).optional().describe("
|
|
19752
|
+
}).optional().describe("Summarization configuration, if `summarization` is enabled"),
|
|
19450
19753
|
named_entity_recognition: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable named entity recognition for this audio"),
|
|
19451
19754
|
custom_spelling: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable custom spelling for this audio"),
|
|
19452
19755
|
custom_spelling_config: import_zod5.z.object({
|
|
19453
19756
|
spelling_dictionary: import_zod5.z.record(import_zod5.z.string(), import_zod5.z.array(import_zod5.z.string())).describe("The list of spelling applied on the audio transcription")
|
|
19454
19757
|
}).optional().describe("**[Alpha]** Custom spelling configuration, if `custom_spelling` is enabled"),
|
|
19455
19758
|
sentiment_analysis: import_zod5.z.boolean().optional().describe("Enable sentiment analysis for this audio"),
|
|
19456
|
-
audio_to_llm: import_zod5.z.boolean().optional().describe("
|
|
19759
|
+
audio_to_llm: import_zod5.z.boolean().optional().describe("Enable audio to LLM processing for this audio"),
|
|
19457
19760
|
audio_to_llm_config: import_zod5.z.object({
|
|
19458
19761
|
prompts: import_zod5.z.array(import_zod5.z.array(import_zod5.z.unknown())).min(1).describe("The list of prompts applied on the audio transcription"),
|
|
19459
19762
|
model: import_zod5.z.string().default(transcriptionControllerInitPreRecordedJobV2BodyAudioToLlmConfigModelDefault).describe(
|
|
19460
19763
|
"The model to use for the prompt execution. You can find the list of supported models [here](https://openrouter.ai/models)."
|
|
19461
19764
|
)
|
|
19462
|
-
}).optional().describe("
|
|
19765
|
+
}).optional().describe("Audio to LLM configuration, if `audio_to_llm` is enabled"),
|
|
19463
19766
|
pii_redaction: import_zod5.z.boolean().optional().describe("Enable PII redaction for this audio"),
|
|
19464
19767
|
pii_redaction_config: import_zod5.z.object({
|
|
19465
19768
|
entity_types: import_zod5.z.enum([
|
|
@@ -19717,7 +20020,7 @@ var transcriptionControllerListV2ResponseItemsItemRequestParamsNamedEntityRecogn
|
|
|
19717
20020
|
var transcriptionControllerListV2ResponseItemsItemRequestParamsCustomSpellingDefault = false;
|
|
19718
20021
|
var transcriptionControllerListV2ResponseItemsItemRequestParamsSentimentAnalysisDefault = false;
|
|
19719
20022
|
var transcriptionControllerListV2ResponseItemsItemRequestParamsAudioToLlmDefault = false;
|
|
19720
|
-
var transcriptionControllerListV2ResponseItemsItemRequestParamsAudioToLlmConfigModelDefault = "openai/gpt-
|
|
20023
|
+
var transcriptionControllerListV2ResponseItemsItemRequestParamsAudioToLlmConfigModelDefault = "openai/gpt-5.4-nano";
|
|
19721
20024
|
var transcriptionControllerListV2ResponseItemsItemRequestParamsPiiRedactionDefault = false;
|
|
19722
20025
|
var transcriptionControllerListV2ResponseItemsItemRequestParamsSentencesDefault = false;
|
|
19723
20026
|
var transcriptionControllerListV2ResponseItemsItemRequestParamsPunctuationEnhancedDefault = false;
|
|
@@ -20128,12 +20431,12 @@ var transcriptionControllerListV2Response = import_zod5.z.object({
|
|
|
20128
20431
|
"Forces the translation to use informal language forms when available in the target language."
|
|
20129
20432
|
)
|
|
20130
20433
|
}).optional().describe("**[Beta]** Translation configuration, if `translation` is enabled"),
|
|
20131
|
-
summarization: import_zod5.z.boolean().optional().describe("
|
|
20434
|
+
summarization: import_zod5.z.boolean().optional().describe("Enable summarization for this audio"),
|
|
20132
20435
|
summarization_config: import_zod5.z.object({
|
|
20133
20436
|
type: import_zod5.z.enum(["general", "bullet_points", "concise"]).describe("The type of summarization to apply").default(
|
|
20134
20437
|
transcriptionControllerListV2ResponseItemsItemRequestParamsSummarizationConfigTypeDefault
|
|
20135
20438
|
).describe("The type of summarization to apply")
|
|
20136
|
-
}).optional().describe("
|
|
20439
|
+
}).optional().describe("Summarization configuration, if `summarization` is enabled"),
|
|
20137
20440
|
named_entity_recognition: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable named entity recognition for this audio"),
|
|
20138
20441
|
custom_spelling: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable custom spelling for this audio"),
|
|
20139
20442
|
custom_spelling_config: import_zod5.z.object({
|
|
@@ -20142,7 +20445,7 @@ var transcriptionControllerListV2Response = import_zod5.z.object({
|
|
|
20142
20445
|
"**[Alpha]** Custom spelling configuration, if `custom_spelling` is enabled"
|
|
20143
20446
|
),
|
|
20144
20447
|
sentiment_analysis: import_zod5.z.boolean().optional().describe("Enable sentiment analysis for this audio"),
|
|
20145
|
-
audio_to_llm: import_zod5.z.boolean().optional().describe("
|
|
20448
|
+
audio_to_llm: import_zod5.z.boolean().optional().describe("Enable audio to LLM processing for this audio"),
|
|
20146
20449
|
audio_to_llm_config: import_zod5.z.object({
|
|
20147
20450
|
prompts: import_zod5.z.array(import_zod5.z.array(import_zod5.z.unknown())).min(1).describe("The list of prompts applied on the audio transcription"),
|
|
20148
20451
|
model: import_zod5.z.string().default(
|
|
@@ -20150,7 +20453,7 @@ var transcriptionControllerListV2Response = import_zod5.z.object({
|
|
|
20150
20453
|
).describe(
|
|
20151
20454
|
"The model to use for the prompt execution. You can find the list of supported models [here](https://openrouter.ai/models)."
|
|
20152
20455
|
)
|
|
20153
|
-
}).optional().describe("
|
|
20456
|
+
}).optional().describe("Audio to LLM configuration, if `audio_to_llm` is enabled"),
|
|
20154
20457
|
pii_redaction: import_zod5.z.boolean().optional().describe("Enable PII redaction for this audio"),
|
|
20155
20458
|
pii_redaction_config: import_zod5.z.object({
|
|
20156
20459
|
entity_types: import_zod5.z.enum([
|
|
@@ -22468,7 +22771,7 @@ var transcriptionControllerGetTranscriptV2ResponseRequestParamsNamedEntityRecogn
|
|
|
22468
22771
|
var transcriptionControllerGetTranscriptV2ResponseRequestParamsCustomSpellingDefault = false;
|
|
22469
22772
|
var transcriptionControllerGetTranscriptV2ResponseRequestParamsSentimentAnalysisDefault = false;
|
|
22470
22773
|
var transcriptionControllerGetTranscriptV2ResponseRequestParamsAudioToLlmDefault = false;
|
|
22471
|
-
var transcriptionControllerGetTranscriptV2ResponseRequestParamsAudioToLlmConfigModelDefault = "openai/gpt-
|
|
22774
|
+
var transcriptionControllerGetTranscriptV2ResponseRequestParamsAudioToLlmConfigModelDefault = "openai/gpt-5.4-nano";
|
|
22472
22775
|
var transcriptionControllerGetTranscriptV2ResponseRequestParamsPiiRedactionDefault = false;
|
|
22473
22776
|
var transcriptionControllerGetTranscriptV2ResponseRequestParamsSentencesDefault = false;
|
|
22474
22777
|
var transcriptionControllerGetTranscriptV2ResponseRequestParamsPunctuationEnhancedDefault = false;
|
|
@@ -22873,19 +23176,19 @@ var transcriptionControllerGetTranscriptV2Response = import_zod5.z.discriminated
|
|
|
22873
23176
|
"Forces the translation to use informal language forms when available in the target language."
|
|
22874
23177
|
)
|
|
22875
23178
|
}).optional().describe("**[Beta]** Translation configuration, if `translation` is enabled"),
|
|
22876
|
-
summarization: import_zod5.z.boolean().optional().describe("
|
|
23179
|
+
summarization: import_zod5.z.boolean().optional().describe("Enable summarization for this audio"),
|
|
22877
23180
|
summarization_config: import_zod5.z.object({
|
|
22878
23181
|
type: import_zod5.z.enum(["general", "bullet_points", "concise"]).describe("The type of summarization to apply").default(
|
|
22879
23182
|
transcriptionControllerGetTranscriptV2ResponseRequestParamsSummarizationConfigTypeDefault
|
|
22880
23183
|
).describe("The type of summarization to apply")
|
|
22881
|
-
}).optional().describe("
|
|
23184
|
+
}).optional().describe("Summarization configuration, if `summarization` is enabled"),
|
|
22882
23185
|
named_entity_recognition: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable named entity recognition for this audio"),
|
|
22883
23186
|
custom_spelling: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable custom spelling for this audio"),
|
|
22884
23187
|
custom_spelling_config: import_zod5.z.object({
|
|
22885
23188
|
spelling_dictionary: import_zod5.z.record(import_zod5.z.string(), import_zod5.z.array(import_zod5.z.string())).describe("The list of spelling applied on the audio transcription")
|
|
22886
23189
|
}).optional().describe("**[Alpha]** Custom spelling configuration, if `custom_spelling` is enabled"),
|
|
22887
23190
|
sentiment_analysis: import_zod5.z.boolean().optional().describe("Enable sentiment analysis for this audio"),
|
|
22888
|
-
audio_to_llm: import_zod5.z.boolean().optional().describe("
|
|
23191
|
+
audio_to_llm: import_zod5.z.boolean().optional().describe("Enable audio to LLM processing for this audio"),
|
|
22889
23192
|
audio_to_llm_config: import_zod5.z.object({
|
|
22890
23193
|
prompts: import_zod5.z.array(import_zod5.z.array(import_zod5.z.unknown())).min(1).describe("The list of prompts applied on the audio transcription"),
|
|
22891
23194
|
model: import_zod5.z.string().default(
|
|
@@ -22893,7 +23196,7 @@ var transcriptionControllerGetTranscriptV2Response = import_zod5.z.discriminated
|
|
|
22893
23196
|
).describe(
|
|
22894
23197
|
"The model to use for the prompt execution. You can find the list of supported models [here](https://openrouter.ai/models)."
|
|
22895
23198
|
)
|
|
22896
|
-
}).optional().describe("
|
|
23199
|
+
}).optional().describe("Audio to LLM configuration, if `audio_to_llm` is enabled"),
|
|
22897
23200
|
pii_redaction: import_zod5.z.boolean().optional().describe("Enable PII redaction for this audio"),
|
|
22898
23201
|
pii_redaction_config: import_zod5.z.object({
|
|
22899
23202
|
entity_types: import_zod5.z.enum([
|
|
@@ -25605,7 +25908,7 @@ var historyControllerGetListV1ResponseItemsItemRequestParamsNamedEntityRecogniti
|
|
|
25605
25908
|
var historyControllerGetListV1ResponseItemsItemRequestParamsCustomSpellingDefault = false;
|
|
25606
25909
|
var historyControllerGetListV1ResponseItemsItemRequestParamsSentimentAnalysisDefault = false;
|
|
25607
25910
|
var historyControllerGetListV1ResponseItemsItemRequestParamsAudioToLlmDefault = false;
|
|
25608
|
-
var historyControllerGetListV1ResponseItemsItemRequestParamsAudioToLlmConfigModelDefault = "openai/gpt-
|
|
25911
|
+
var historyControllerGetListV1ResponseItemsItemRequestParamsAudioToLlmConfigModelDefault = "openai/gpt-5.4-nano";
|
|
25609
25912
|
var historyControllerGetListV1ResponseItemsItemRequestParamsPiiRedactionDefault = false;
|
|
25610
25913
|
var historyControllerGetListV1ResponseItemsItemRequestParamsSentencesDefault = false;
|
|
25611
25914
|
var historyControllerGetListV1ResponseItemsItemRequestParamsPunctuationEnhancedDefault = false;
|
|
@@ -26016,12 +26319,12 @@ var historyControllerGetListV1Response = import_zod5.z.object({
|
|
|
26016
26319
|
"Forces the translation to use informal language forms when available in the target language."
|
|
26017
26320
|
)
|
|
26018
26321
|
}).optional().describe("**[Beta]** Translation configuration, if `translation` is enabled"),
|
|
26019
|
-
summarization: import_zod5.z.boolean().optional().describe("
|
|
26322
|
+
summarization: import_zod5.z.boolean().optional().describe("Enable summarization for this audio"),
|
|
26020
26323
|
summarization_config: import_zod5.z.object({
|
|
26021
26324
|
type: import_zod5.z.enum(["general", "bullet_points", "concise"]).describe("The type of summarization to apply").default(
|
|
26022
26325
|
historyControllerGetListV1ResponseItemsItemRequestParamsSummarizationConfigTypeDefault
|
|
26023
26326
|
).describe("The type of summarization to apply")
|
|
26024
|
-
}).optional().describe("
|
|
26327
|
+
}).optional().describe("Summarization configuration, if `summarization` is enabled"),
|
|
26025
26328
|
named_entity_recognition: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable named entity recognition for this audio"),
|
|
26026
26329
|
custom_spelling: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable custom spelling for this audio"),
|
|
26027
26330
|
custom_spelling_config: import_zod5.z.object({
|
|
@@ -26030,7 +26333,7 @@ var historyControllerGetListV1Response = import_zod5.z.object({
|
|
|
26030
26333
|
"**[Alpha]** Custom spelling configuration, if `custom_spelling` is enabled"
|
|
26031
26334
|
),
|
|
26032
26335
|
sentiment_analysis: import_zod5.z.boolean().optional().describe("Enable sentiment analysis for this audio"),
|
|
26033
|
-
audio_to_llm: import_zod5.z.boolean().optional().describe("
|
|
26336
|
+
audio_to_llm: import_zod5.z.boolean().optional().describe("Enable audio to LLM processing for this audio"),
|
|
26034
26337
|
audio_to_llm_config: import_zod5.z.object({
|
|
26035
26338
|
prompts: import_zod5.z.array(import_zod5.z.array(import_zod5.z.unknown())).min(1).describe("The list of prompts applied on the audio transcription"),
|
|
26036
26339
|
model: import_zod5.z.string().default(
|
|
@@ -26038,7 +26341,7 @@ var historyControllerGetListV1Response = import_zod5.z.object({
|
|
|
26038
26341
|
).describe(
|
|
26039
26342
|
"The model to use for the prompt execution. You can find the list of supported models [here](https://openrouter.ai/models)."
|
|
26040
26343
|
)
|
|
26041
|
-
}).optional().describe("
|
|
26344
|
+
}).optional().describe("Audio to LLM configuration, if `audio_to_llm` is enabled"),
|
|
26042
26345
|
pii_redaction: import_zod5.z.boolean().optional().describe("Enable PII redaction for this audio"),
|
|
26043
26346
|
pii_redaction_config: import_zod5.z.object({
|
|
26044
26347
|
entity_types: import_zod5.z.enum([
|
|
@@ -31283,6 +31586,7 @@ __export(openAIAudioRealtimeAPI_zod_exports, {
|
|
|
31283
31586
|
createRealtimeClientSecretBodySessionPromptVariablesTypeDefault: () => createRealtimeClientSecretBodySessionPromptVariablesTypeDefault,
|
|
31284
31587
|
createRealtimeClientSecretBodySessionPromptVariablesTypeDefaultOne: () => createRealtimeClientSecretBodySessionPromptVariablesTypeDefaultOne,
|
|
31285
31588
|
createRealtimeClientSecretBodySessionPromptVariablesTypeDefaultTwo: () => createRealtimeClientSecretBodySessionPromptVariablesTypeDefaultTwo,
|
|
31589
|
+
createRealtimeClientSecretBodySessionReasoningEffortDefault: () => createRealtimeClientSecretBodySessionReasoningEffortDefault,
|
|
31286
31590
|
createRealtimeClientSecretBodySessionToolChoiceDefault: () => createRealtimeClientSecretBodySessionToolChoiceDefault,
|
|
31287
31591
|
createRealtimeClientSecretBodySessionToolsItemRequireApprovalDefaultOne: () => createRealtimeClientSecretBodySessionToolsItemRequireApprovalDefaultOne,
|
|
31288
31592
|
createRealtimeClientSecretBodySessionTracingDefault: () => createRealtimeClientSecretBodySessionTracingDefault,
|
|
@@ -31307,6 +31611,7 @@ __export(openAIAudioRealtimeAPI_zod_exports, {
|
|
|
31307
31611
|
createRealtimeClientSecretResponseSessionPromptVariablesTypeDefault: () => createRealtimeClientSecretResponseSessionPromptVariablesTypeDefault,
|
|
31308
31612
|
createRealtimeClientSecretResponseSessionPromptVariablesTypeDefaultOne: () => createRealtimeClientSecretResponseSessionPromptVariablesTypeDefaultOne,
|
|
31309
31613
|
createRealtimeClientSecretResponseSessionPromptVariablesTypeDefaultTwo: () => createRealtimeClientSecretResponseSessionPromptVariablesTypeDefaultTwo,
|
|
31614
|
+
createRealtimeClientSecretResponseSessionReasoningEffortDefault: () => createRealtimeClientSecretResponseSessionReasoningEffortDefault,
|
|
31310
31615
|
createRealtimeClientSecretResponseSessionToolChoiceDefault: () => createRealtimeClientSecretResponseSessionToolChoiceDefault,
|
|
31311
31616
|
createRealtimeClientSecretResponseSessionToolsItemRequireApprovalDefaultOne: () => createRealtimeClientSecretResponseSessionToolsItemRequireApprovalDefaultOne,
|
|
31312
31617
|
createRealtimeClientSecretResponseSessionTracingDefaultOne: () => createRealtimeClientSecretResponseSessionTracingDefaultOne,
|
|
@@ -31663,6 +31968,7 @@ var createRealtimeClientSecretBodySessionTracingDefaultOne = "auto";
|
|
|
31663
31968
|
var createRealtimeClientSecretBodySessionTracingDefault = null;
|
|
31664
31969
|
var createRealtimeClientSecretBodySessionToolsItemRequireApprovalDefaultOne = "always";
|
|
31665
31970
|
var createRealtimeClientSecretBodySessionToolChoiceDefault = "auto";
|
|
31971
|
+
var createRealtimeClientSecretBodySessionReasoningEffortDefault = "low";
|
|
31666
31972
|
var createRealtimeClientSecretBodySessionTruncationRetentionRatioMin = 0;
|
|
31667
31973
|
var createRealtimeClientSecretBodySessionTruncationRetentionRatioMax = 1;
|
|
31668
31974
|
var createRealtimeClientSecretBodySessionTruncationTokenLimitsPostInstructionsMin = 0;
|
|
@@ -31698,6 +32004,7 @@ var createRealtimeClientSecretBody = import_zod6.z.object({
|
|
|
31698
32004
|
import_zod6.z.enum([
|
|
31699
32005
|
"gpt-realtime",
|
|
31700
32006
|
"gpt-realtime-1.5",
|
|
32007
|
+
"gpt-realtime-2",
|
|
31701
32008
|
"gpt-realtime-2025-08-28",
|
|
31702
32009
|
"gpt-4o-realtime-preview",
|
|
31703
32010
|
"gpt-4o-realtime-preview-2024-10-01",
|
|
@@ -31738,16 +32045,20 @@ var createRealtimeClientSecretBody = import_zod6.z.object({
|
|
|
31738
32045
|
"gpt-4o-mini-transcribe",
|
|
31739
32046
|
"gpt-4o-mini-transcribe-2025-12-15",
|
|
31740
32047
|
"gpt-4o-transcribe",
|
|
31741
|
-
"gpt-4o-transcribe-diarize"
|
|
32048
|
+
"gpt-4o-transcribe-diarize",
|
|
32049
|
+
"gpt-realtime-whisper"
|
|
31742
32050
|
])
|
|
31743
32051
|
).optional().describe(
|
|
31744
|
-
"The model to use for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`,
|
|
32052
|
+
"The model to use for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`, `gpt-4o-transcribe-diarize`, and `gpt-realtime-whisper`. Use `gpt-4o-transcribe-diarize` when you need diarization with speaker labels.\n"
|
|
31745
32053
|
),
|
|
31746
32054
|
language: import_zod6.z.string().optional().describe(
|
|
31747
32055
|
"The language of the input audio. Supplying the input language in\n[ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`) format\nwill improve accuracy and latency.\n"
|
|
31748
32056
|
),
|
|
31749
32057
|
prompt: import_zod6.z.string().optional().describe(
|
|
31750
|
-
'An optional text to guide the model\'s style or continue a previous audio\nsegment.\nFor `whisper-1`, the [prompt is a list of keywords](/docs/guides/speech-to-text#prompting).\nFor `gpt-4o-transcribe` models (excluding `gpt-4o-transcribe-diarize`), the prompt is a free text string, for example "expect words related to technology".\n'
|
|
32058
|
+
'An optional text to guide the model\'s style or continue a previous audio\nsegment.\nFor `whisper-1`, the [prompt is a list of keywords](/docs/guides/speech-to-text#prompting).\nFor `gpt-4o-transcribe` models (excluding `gpt-4o-transcribe-diarize`), the prompt is a free text string, for example "expect words related to technology".\nPrompt is not supported with `gpt-realtime-whisper` in GA Realtime sessions.\n'
|
|
32059
|
+
),
|
|
32060
|
+
delay: import_zod6.z.enum(["minimal", "low", "medium", "high", "xhigh"]).optional().describe(
|
|
32061
|
+
"Controls how long the model waits before emitting transcription text.\nHigher values can improve transcription accuracy at the cost of latency.\nOnly supported with `gpt-realtime-whisper` in GA Realtime sessions.\n"
|
|
31751
32062
|
)
|
|
31752
32063
|
}).optional(),
|
|
31753
32064
|
noise_reduction: import_zod6.z.object({
|
|
@@ -31814,7 +32125,7 @@ var createRealtimeClientSecretBody = import_zod6.z.object({
|
|
|
31814
32125
|
"Server-side semantic turn detection which uses a model to determine when the user has finished speaking."
|
|
31815
32126
|
)
|
|
31816
32127
|
]).describe(
|
|
31817
|
-
'Configuration for turn detection, ether Server VAD or Semantic VAD. This can be set to `null` to turn off, in which case the client must manually trigger model response.\n\nServer VAD means that the model will detect the start and end of speech based on audio volume and respond at the end of user speech.\n\nSemantic VAD is more advanced and uses a turn detection model (in conjunction with VAD) to semantically estimate whether the user has finished speaking, then dynamically sets a timeout based on this probability. For example, if user audio trails off with "uhhm", the model will score a low probability of turn end and wait longer for the user to continue speaking. This can be useful for more natural conversations, but may have a higher latency.\n'
|
|
32128
|
+
'Configuration for turn detection, ether Server VAD or Semantic VAD. This can be set to `null` to turn off, in which case the client must manually trigger model response.\n\nServer VAD means that the model will detect the start and end of speech based on audio volume and respond at the end of user speech.\n\nSemantic VAD is more advanced and uses a turn detection model (in conjunction with VAD) to semantically estimate whether the user has finished speaking, then dynamically sets a timeout based on this probability. For example, if user audio trails off with "uhhm", the model will score a low probability of turn end and wait longer for the user to continue speaking. This can be useful for more natural conversations, but may have a higher latency.\n\nFor `gpt-realtime-whisper` transcription sessions, turn detection must be\nset to `null`; VAD is not supported.\n'
|
|
31818
32129
|
).or(import_zod6.z.null()).optional()
|
|
31819
32130
|
}).optional(),
|
|
31820
32131
|
output: import_zod6.z.object({
|
|
@@ -31887,7 +32198,7 @@ var createRealtimeClientSecretBody = import_zod6.z.object({
|
|
|
31887
32198
|
server_label: import_zod6.z.string().describe(
|
|
31888
32199
|
"A label for this MCP server, used to identify it in tool calls.\n"
|
|
31889
32200
|
),
|
|
31890
|
-
server_url: import_zod6.z.string().optional().describe(
|
|
32201
|
+
server_url: import_zod6.z.string().url().optional().describe(
|
|
31891
32202
|
"The URL for the MCP server. One of `server_url` or `connector_id` must be\nprovided.\n"
|
|
31892
32203
|
),
|
|
31893
32204
|
connector_id: import_zod6.z.enum([
|
|
@@ -31965,6 +32276,16 @@ var createRealtimeClientSecretBody = import_zod6.z.object({
|
|
|
31965
32276
|
).default(createRealtimeClientSecretBodySessionToolChoiceDefault).describe(
|
|
31966
32277
|
"How the model chooses tools. Provide one of the string modes or force a specific\nfunction/MCP tool.\n"
|
|
31967
32278
|
),
|
|
32279
|
+
parallel_tool_calls: import_zod6.z.boolean().optional().describe(
|
|
32280
|
+
"Whether the model may call multiple tools in parallel. Only supported by\nreasoning Realtime models such as `gpt-realtime-2`.\n"
|
|
32281
|
+
),
|
|
32282
|
+
reasoning: import_zod6.z.object({
|
|
32283
|
+
effort: import_zod6.z.enum(["minimal", "low", "medium", "high", "xhigh"]).default(createRealtimeClientSecretBodySessionReasoningEffortDefault).describe(
|
|
32284
|
+
"Constrains effort on reasoning for reasoning-capable Realtime models such as\n`gpt-realtime-2`.\n"
|
|
32285
|
+
)
|
|
32286
|
+
}).optional().describe(
|
|
32287
|
+
"Configuration for reasoning-capable Realtime models such as `gpt-realtime-2`.\n"
|
|
32288
|
+
),
|
|
31968
32289
|
max_output_tokens: import_zod6.z.number().or(import_zod6.z.enum(["inf"])).optional().describe(
|
|
31969
32290
|
"Maximum number of output tokens for a single assistant response,\ninclusive of tool calls. Provide an integer between 1 and 4096 to\nlimit output tokens, or `inf` for the maximum available tokens for a\ngiven model. Defaults to `inf`.\n"
|
|
31970
32291
|
),
|
|
@@ -32004,7 +32325,7 @@ var createRealtimeClientSecretBody = import_zod6.z.object({
|
|
|
32004
32325
|
).or(
|
|
32005
32326
|
import_zod6.z.object({
|
|
32006
32327
|
type: import_zod6.z.enum(["input_image"]).describe("The type of the input item. Always `input_image`."),
|
|
32007
|
-
image_url: import_zod6.z.string().describe(
|
|
32328
|
+
image_url: import_zod6.z.string().url().describe(
|
|
32008
32329
|
"The URL of the image to be sent to the model. A fully qualified URL or base64 encoded image in a data URL."
|
|
32009
32330
|
).or(import_zod6.z.null()).optional(),
|
|
32010
32331
|
file_id: import_zod6.z.string().describe("The ID of the file to be sent to the model.").or(import_zod6.z.null()).optional(),
|
|
@@ -32018,7 +32339,7 @@ var createRealtimeClientSecretBody = import_zod6.z.object({
|
|
|
32018
32339
|
file_id: import_zod6.z.string().describe("The ID of the file to be sent to the model.").or(import_zod6.z.null()).optional(),
|
|
32019
32340
|
filename: import_zod6.z.string().optional().describe("The name of the file to be sent to the model."),
|
|
32020
32341
|
file_data: import_zod6.z.string().optional().describe("The content of the file to be sent to the model.\n"),
|
|
32021
|
-
file_url: import_zod6.z.string().optional().describe("The URL of the file to be sent to the model."),
|
|
32342
|
+
file_url: import_zod6.z.string().url().optional().describe("The URL of the file to be sent to the model."),
|
|
32022
32343
|
detail: import_zod6.z.enum(["low", "high"]).optional()
|
|
32023
32344
|
}).describe("A file input to the model.")
|
|
32024
32345
|
)
|
|
@@ -32054,16 +32375,20 @@ var createRealtimeClientSecretBody = import_zod6.z.object({
|
|
|
32054
32375
|
"gpt-4o-mini-transcribe",
|
|
32055
32376
|
"gpt-4o-mini-transcribe-2025-12-15",
|
|
32056
32377
|
"gpt-4o-transcribe",
|
|
32057
|
-
"gpt-4o-transcribe-diarize"
|
|
32378
|
+
"gpt-4o-transcribe-diarize",
|
|
32379
|
+
"gpt-realtime-whisper"
|
|
32058
32380
|
])
|
|
32059
32381
|
).optional().describe(
|
|
32060
|
-
"The model to use for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`,
|
|
32382
|
+
"The model to use for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`, `gpt-4o-transcribe-diarize`, and `gpt-realtime-whisper`. Use `gpt-4o-transcribe-diarize` when you need diarization with speaker labels.\n"
|
|
32061
32383
|
),
|
|
32062
32384
|
language: import_zod6.z.string().optional().describe(
|
|
32063
32385
|
"The language of the input audio. Supplying the input language in\n[ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`) format\nwill improve accuracy and latency.\n"
|
|
32064
32386
|
),
|
|
32065
32387
|
prompt: import_zod6.z.string().optional().describe(
|
|
32066
|
-
'An optional text to guide the model\'s style or continue a previous audio\nsegment.\nFor `whisper-1`, the [prompt is a list of keywords](/docs/guides/speech-to-text#prompting).\nFor `gpt-4o-transcribe` models (excluding `gpt-4o-transcribe-diarize`), the prompt is a free text string, for example "expect words related to technology".\n'
|
|
32388
|
+
'An optional text to guide the model\'s style or continue a previous audio\nsegment.\nFor `whisper-1`, the [prompt is a list of keywords](/docs/guides/speech-to-text#prompting).\nFor `gpt-4o-transcribe` models (excluding `gpt-4o-transcribe-diarize`), the prompt is a free text string, for example "expect words related to technology".\nPrompt is not supported with `gpt-realtime-whisper` in GA Realtime sessions.\n'
|
|
32389
|
+
),
|
|
32390
|
+
delay: import_zod6.z.enum(["minimal", "low", "medium", "high", "xhigh"]).optional().describe(
|
|
32391
|
+
"Controls how long the model waits before emitting transcription text.\nHigher values can improve transcription accuracy at the cost of latency.\nOnly supported with `gpt-realtime-whisper` in GA Realtime sessions.\n"
|
|
32067
32392
|
)
|
|
32068
32393
|
}).optional(),
|
|
32069
32394
|
noise_reduction: import_zod6.z.object({
|
|
@@ -32130,7 +32455,7 @@ var createRealtimeClientSecretBody = import_zod6.z.object({
|
|
|
32130
32455
|
"Server-side semantic turn detection which uses a model to determine when the user has finished speaking."
|
|
32131
32456
|
)
|
|
32132
32457
|
]).describe(
|
|
32133
|
-
'Configuration for turn detection, ether Server VAD or Semantic VAD. This can be set to `null` to turn off, in which case the client must manually trigger model response.\n\nServer VAD means that the model will detect the start and end of speech based on audio volume and respond at the end of user speech.\n\nSemantic VAD is more advanced and uses a turn detection model (in conjunction with VAD) to semantically estimate whether the user has finished speaking, then dynamically sets a timeout based on this probability. For example, if user audio trails off with "uhhm", the model will score a low probability of turn end and wait longer for the user to continue speaking. This can be useful for more natural conversations, but may have a higher latency.\n'
|
|
32458
|
+
'Configuration for turn detection, ether Server VAD or Semantic VAD. This can be set to `null` to turn off, in which case the client must manually trigger model response.\n\nServer VAD means that the model will detect the start and end of speech based on audio volume and respond at the end of user speech.\n\nSemantic VAD is more advanced and uses a turn detection model (in conjunction with VAD) to semantically estimate whether the user has finished speaking, then dynamically sets a timeout based on this probability. For example, if user audio trails off with "uhhm", the model will score a low probability of turn end and wait longer for the user to continue speaking. This can be useful for more natural conversations, but may have a higher latency.\n\nFor `gpt-realtime-whisper` transcription sessions, turn detection must be\nset to `null`; VAD is not supported.\n'
|
|
32134
32459
|
).or(import_zod6.z.null()).optional()
|
|
32135
32460
|
}).optional()
|
|
32136
32461
|
}).optional().describe("Configuration for input and output audio.\n"),
|
|
@@ -32161,6 +32486,7 @@ var createRealtimeClientSecretResponseSessionTracingDefaultTwo = "auto";
|
|
|
32161
32486
|
var createRealtimeClientSecretResponseSessionTracingDefaultOne = null;
|
|
32162
32487
|
var createRealtimeClientSecretResponseSessionToolsItemRequireApprovalDefaultOne = "always";
|
|
32163
32488
|
var createRealtimeClientSecretResponseSessionToolChoiceDefault = "auto";
|
|
32489
|
+
var createRealtimeClientSecretResponseSessionReasoningEffortDefault = "low";
|
|
32164
32490
|
var createRealtimeClientSecretResponseSessionTruncationRetentionRatioMin = 0;
|
|
32165
32491
|
var createRealtimeClientSecretResponseSessionTruncationRetentionRatioMax = 1;
|
|
32166
32492
|
var createRealtimeClientSecretResponseSessionTruncationTokenLimitsPostInstructionsMin = 0;
|
|
@@ -32170,17 +32496,14 @@ var createRealtimeClientSecretResponseSessionPromptVariablesTypeDefaultTwo = "in
|
|
|
32170
32496
|
var createRealtimeClientSecretResponse = import_zod6.z.object({
|
|
32171
32497
|
value: import_zod6.z.string().describe("The generated client secret value."),
|
|
32172
32498
|
expires_at: import_zod6.z.number().describe("Expiration timestamp for the client secret, in seconds since epoch."),
|
|
32173
|
-
session: import_zod6.z.
|
|
32499
|
+
session: import_zod6.z.union([
|
|
32174
32500
|
import_zod6.z.object({
|
|
32175
|
-
client_secret: import_zod6.z.object({
|
|
32176
|
-
value: import_zod6.z.string().describe(
|
|
32177
|
-
"Ephemeral key usable in client environments to authenticate connections to the Realtime API. Use this in client-side environments rather than a standard API token, which should only be used server-side.\n"
|
|
32178
|
-
),
|
|
32179
|
-
expires_at: import_zod6.z.number().describe(
|
|
32180
|
-
"Timestamp for when the token expires. Currently, all tokens expire\nafter one minute.\n"
|
|
32181
|
-
)
|
|
32182
|
-
}).describe("Ephemeral key returned by the API."),
|
|
32183
32501
|
type: import_zod6.z.enum(["realtime"]).describe("The type of session to create. Always `realtime` for the Realtime API.\n"),
|
|
32502
|
+
id: import_zod6.z.string().describe(
|
|
32503
|
+
"Unique identifier for the session that looks like `sess_1234567890abcdef`.\n"
|
|
32504
|
+
),
|
|
32505
|
+
object: import_zod6.z.enum(["realtime.session"]).describe("The object type. Always `realtime.session`."),
|
|
32506
|
+
expires_at: import_zod6.z.number().optional().describe("Expiration timestamp for the session, in seconds since epoch."),
|
|
32184
32507
|
output_modalities: import_zod6.z.array(import_zod6.z.enum(["text", "audio"])).default(createRealtimeClientSecretResponseSessionOutputModalitiesDefault).describe(
|
|
32185
32508
|
'The set of modalities the model can respond with. It defaults to `["audio"]`, indicating\nthat the model will respond with audio plus a transcript. `["text"]` can be used to make\nthe model respond with text only. It is not possible to request both `text` and `audio` at the same time.\n'
|
|
32186
32509
|
),
|
|
@@ -32188,6 +32511,7 @@ var createRealtimeClientSecretResponse = import_zod6.z.object({
|
|
|
32188
32511
|
import_zod6.z.enum([
|
|
32189
32512
|
"gpt-realtime",
|
|
32190
32513
|
"gpt-realtime-1.5",
|
|
32514
|
+
"gpt-realtime-2",
|
|
32191
32515
|
"gpt-realtime-2025-08-28",
|
|
32192
32516
|
"gpt-4o-realtime-preview",
|
|
32193
32517
|
"gpt-4o-realtime-preview-2024-10-01",
|
|
@@ -32210,15 +32534,15 @@ var createRealtimeClientSecretResponse = import_zod6.z.object({
|
|
|
32210
32534
|
audio: import_zod6.z.object({
|
|
32211
32535
|
input: import_zod6.z.object({
|
|
32212
32536
|
format: import_zod6.z.object({
|
|
32213
|
-
type: import_zod6.z.enum(["audio/pcm"]).describe("The audio format. Always `audio/pcm`."),
|
|
32214
|
-
rate: import_zod6.z.literal(24e3).describe("The sample rate of the audio. Always `24000`.")
|
|
32537
|
+
type: import_zod6.z.enum(["audio/pcm"]).optional().describe("The audio format. Always `audio/pcm`."),
|
|
32538
|
+
rate: import_zod6.z.literal(24e3).optional().describe("The sample rate of the audio. Always `24000`.")
|
|
32215
32539
|
}).describe("The PCM audio format. Only a 24kHz sample rate is supported.").or(
|
|
32216
32540
|
import_zod6.z.object({
|
|
32217
|
-
type: import_zod6.z.enum(["audio/pcmu"]).describe("The audio format. Always `audio/pcmu`.")
|
|
32541
|
+
type: import_zod6.z.enum(["audio/pcmu"]).optional().describe("The audio format. Always `audio/pcmu`.")
|
|
32218
32542
|
}).describe("The G.711 \u03BC-law format.")
|
|
32219
32543
|
).or(
|
|
32220
32544
|
import_zod6.z.object({
|
|
32221
|
-
type: import_zod6.z.enum(["audio/pcma"]).describe("The audio format. Always `audio/pcma`.")
|
|
32545
|
+
type: import_zod6.z.enum(["audio/pcma"]).optional().describe("The audio format. Always `audio/pcma`.")
|
|
32222
32546
|
}).describe("The G.711 A-law format.")
|
|
32223
32547
|
).optional(),
|
|
32224
32548
|
transcription: import_zod6.z.object({
|
|
@@ -32228,20 +32552,19 @@ var createRealtimeClientSecretResponse = import_zod6.z.object({
|
|
|
32228
32552
|
"gpt-4o-mini-transcribe",
|
|
32229
32553
|
"gpt-4o-mini-transcribe-2025-12-15",
|
|
32230
32554
|
"gpt-4o-transcribe",
|
|
32231
|
-
"gpt-4o-transcribe-diarize"
|
|
32555
|
+
"gpt-4o-transcribe-diarize",
|
|
32556
|
+
"gpt-realtime-whisper"
|
|
32232
32557
|
])
|
|
32233
32558
|
).optional().describe(
|
|
32234
|
-
"The model
|
|
32235
|
-
),
|
|
32236
|
-
language: import_zod6.z.string().optional().describe(
|
|
32237
|
-
"The language of the input audio. Supplying the input language in\n[ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`) format\nwill improve accuracy and latency.\n"
|
|
32559
|
+
"The model used for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`, `gpt-4o-transcribe-diarize`, and `gpt-realtime-whisper`.\n"
|
|
32238
32560
|
),
|
|
32561
|
+
language: import_zod6.z.string().optional().describe("The language of the input audio.\n"),
|
|
32239
32562
|
prompt: import_zod6.z.string().optional().describe(
|
|
32240
|
-
|
|
32563
|
+
"The prompt configured for input audio transcription, when present.\n"
|
|
32241
32564
|
)
|
|
32242
32565
|
}).optional(),
|
|
32243
32566
|
noise_reduction: import_zod6.z.object({
|
|
32244
|
-
type: import_zod6.z.enum(["near_field", "far_field"]).describe(
|
|
32567
|
+
type: import_zod6.z.enum(["near_field", "far_field"]).optional().describe(
|
|
32245
32568
|
"Type of noise reduction. `near_field` is for close-talking microphones such as headphones, `far_field` is for far-field microphones such as laptop or conference room microphones.\n"
|
|
32246
32569
|
)
|
|
32247
32570
|
}).optional().describe(
|
|
@@ -32304,20 +32627,20 @@ var createRealtimeClientSecretResponse = import_zod6.z.object({
|
|
|
32304
32627
|
"Server-side semantic turn detection which uses a model to determine when the user has finished speaking."
|
|
32305
32628
|
)
|
|
32306
32629
|
]).describe(
|
|
32307
|
-
'Configuration for turn detection, ether Server VAD or Semantic VAD. This can be set to `null` to turn off, in which case the client must manually trigger model response.\n\nServer VAD means that the model will detect the start and end of speech based on audio volume and respond at the end of user speech.\n\nSemantic VAD is more advanced and uses a turn detection model (in conjunction with VAD) to semantically estimate whether the user has finished speaking, then dynamically sets a timeout based on this probability. For example, if user audio trails off with "uhhm", the model will score a low probability of turn end and wait longer for the user to continue speaking. This can be useful for more natural conversations, but may have a higher latency.\n'
|
|
32630
|
+
'Configuration for turn detection, ether Server VAD or Semantic VAD. This can be set to `null` to turn off, in which case the client must manually trigger model response.\n\nServer VAD means that the model will detect the start and end of speech based on audio volume and respond at the end of user speech.\n\nSemantic VAD is more advanced and uses a turn detection model (in conjunction with VAD) to semantically estimate whether the user has finished speaking, then dynamically sets a timeout based on this probability. For example, if user audio trails off with "uhhm", the model will score a low probability of turn end and wait longer for the user to continue speaking. This can be useful for more natural conversations, but may have a higher latency.\n\nFor `gpt-realtime-whisper` transcription sessions, turn detection must be\nset to `null`; VAD is not supported.\n'
|
|
32308
32631
|
).or(import_zod6.z.null()).optional()
|
|
32309
32632
|
}).optional(),
|
|
32310
32633
|
output: import_zod6.z.object({
|
|
32311
32634
|
format: import_zod6.z.object({
|
|
32312
|
-
type: import_zod6.z.enum(["audio/pcm"]).describe("The audio format. Always `audio/pcm`."),
|
|
32313
|
-
rate: import_zod6.z.literal(24e3).describe("The sample rate of the audio. Always `24000`.")
|
|
32635
|
+
type: import_zod6.z.enum(["audio/pcm"]).optional().describe("The audio format. Always `audio/pcm`."),
|
|
32636
|
+
rate: import_zod6.z.literal(24e3).optional().describe("The sample rate of the audio. Always `24000`.")
|
|
32314
32637
|
}).describe("The PCM audio format. Only a 24kHz sample rate is supported.").or(
|
|
32315
32638
|
import_zod6.z.object({
|
|
32316
|
-
type: import_zod6.z.enum(["audio/pcmu"]).describe("The audio format. Always `audio/pcmu`.")
|
|
32639
|
+
type: import_zod6.z.enum(["audio/pcmu"]).optional().describe("The audio format. Always `audio/pcmu`.")
|
|
32317
32640
|
}).describe("The G.711 \u03BC-law format.")
|
|
32318
32641
|
).or(
|
|
32319
32642
|
import_zod6.z.object({
|
|
32320
|
-
type: import_zod6.z.enum(["audio/pcma"]).describe("The audio format. Always `audio/pcma`.")
|
|
32643
|
+
type: import_zod6.z.enum(["audio/pcma"]).optional().describe("The audio format. Always `audio/pcma`.")
|
|
32321
32644
|
}).describe("The G.711 A-law format.")
|
|
32322
32645
|
).optional(),
|
|
32323
32646
|
voice: import_zod6.z.string().or(
|
|
@@ -32361,7 +32684,7 @@ var createRealtimeClientSecretResponse = import_zod6.z.object({
|
|
|
32361
32684
|
).or(import_zod6.z.null()).optional(),
|
|
32362
32685
|
tools: import_zod6.z.array(
|
|
32363
32686
|
import_zod6.z.object({
|
|
32364
|
-
type: import_zod6.z.enum(["function"]).describe("The type of the tool, i.e. `function`."),
|
|
32687
|
+
type: import_zod6.z.enum(["function"]).optional().describe("The type of the tool, i.e. `function`."),
|
|
32365
32688
|
name: import_zod6.z.string().optional().describe("The name of the function."),
|
|
32366
32689
|
description: import_zod6.z.string().optional().describe(
|
|
32367
32690
|
"The description of the function, including guidance on when and how\nto call it, and guidance about what to tell the user when calling\n(if anything).\n"
|
|
@@ -32373,7 +32696,7 @@ var createRealtimeClientSecretResponse = import_zod6.z.object({
|
|
|
32373
32696
|
server_label: import_zod6.z.string().describe(
|
|
32374
32697
|
"A label for this MCP server, used to identify it in tool calls.\n"
|
|
32375
32698
|
),
|
|
32376
|
-
server_url: import_zod6.z.string().optional().describe(
|
|
32699
|
+
server_url: import_zod6.z.string().url().optional().describe(
|
|
32377
32700
|
"The URL for the MCP server. One of `server_url` or `connector_id` must be\nprovided.\n"
|
|
32378
32701
|
),
|
|
32379
32702
|
connector_id: import_zod6.z.enum([
|
|
@@ -32385,7 +32708,7 @@ var createRealtimeClientSecretResponse = import_zod6.z.object({
|
|
|
32385
32708
|
"connector_outlookcalendar",
|
|
32386
32709
|
"connector_outlookemail",
|
|
32387
32710
|
"connector_sharepoint"
|
|
32388
|
-
]).describe(
|
|
32711
|
+
]).optional().describe(
|
|
32389
32712
|
"Identifier for service connectors, like those available in ChatGPT. One of\n`server_url` or `connector_id` must be provided. Learn more about service\nconnectors [here](/docs/guides/tools-remote-mcp#connectors).\n\nCurrently supported `connector_id` values are:\n\n- Dropbox: `connector_dropbox`\n- Gmail: `connector_gmail`\n- Google Calendar: `connector_googlecalendar`\n- Google Drive: `connector_googledrive`\n- Microsoft Teams: `connector_microsoftteams`\n- Outlook Calendar: `connector_outlookcalendar`\n- Outlook Email: `connector_outlookemail`\n- SharePoint: `connector_sharepoint`\n"
|
|
32390
32713
|
),
|
|
32391
32714
|
authorization: import_zod6.z.string().optional().describe(
|
|
@@ -32451,6 +32774,13 @@ var createRealtimeClientSecretResponse = import_zod6.z.object({
|
|
|
32451
32774
|
).default(createRealtimeClientSecretResponseSessionToolChoiceDefault).describe(
|
|
32452
32775
|
"How the model chooses tools. Provide one of the string modes or force a specific\nfunction/MCP tool.\n"
|
|
32453
32776
|
),
|
|
32777
|
+
reasoning: import_zod6.z.object({
|
|
32778
|
+
effort: import_zod6.z.enum(["minimal", "low", "medium", "high", "xhigh"]).default(createRealtimeClientSecretResponseSessionReasoningEffortDefault).describe(
|
|
32779
|
+
"Constrains effort on reasoning for reasoning-capable Realtime models such as\n`gpt-realtime-2`.\n"
|
|
32780
|
+
)
|
|
32781
|
+
}).optional().describe(
|
|
32782
|
+
"Configuration for reasoning-capable Realtime models such as `gpt-realtime-2`.\n"
|
|
32783
|
+
),
|
|
32454
32784
|
max_output_tokens: import_zod6.z.number().or(import_zod6.z.enum(["inf"])).optional().describe(
|
|
32455
32785
|
"Maximum number of output tokens for a single assistant response,\ninclusive of tool calls. Provide an integer between 1 and 4096 to\nlimit output tokens, or `inf` for the maximum available tokens for a\ngiven model. Defaults to `inf`.\n"
|
|
32456
32786
|
),
|
|
@@ -32490,7 +32820,7 @@ var createRealtimeClientSecretResponse = import_zod6.z.object({
|
|
|
32490
32820
|
).or(
|
|
32491
32821
|
import_zod6.z.object({
|
|
32492
32822
|
type: import_zod6.z.enum(["input_image"]).describe("The type of the input item. Always `input_image`."),
|
|
32493
|
-
image_url: import_zod6.z.string().describe(
|
|
32823
|
+
image_url: import_zod6.z.string().url().describe(
|
|
32494
32824
|
"The URL of the image to be sent to the model. A fully qualified URL or base64 encoded image in a data URL."
|
|
32495
32825
|
).or(import_zod6.z.null()).optional(),
|
|
32496
32826
|
file_id: import_zod6.z.string().describe("The ID of the file to be sent to the model.").or(import_zod6.z.null()).optional(),
|
|
@@ -32504,8 +32834,8 @@ var createRealtimeClientSecretResponse = import_zod6.z.object({
|
|
|
32504
32834
|
file_id: import_zod6.z.string().describe("The ID of the file to be sent to the model.").or(import_zod6.z.null()).optional(),
|
|
32505
32835
|
filename: import_zod6.z.string().optional().describe("The name of the file to be sent to the model."),
|
|
32506
32836
|
file_data: import_zod6.z.string().optional().describe("The content of the file to be sent to the model.\n"),
|
|
32507
|
-
file_url: import_zod6.z.string().optional().describe("The URL of the file to be sent to the model."),
|
|
32508
|
-
detail: import_zod6.z.enum(["low", "high"])
|
|
32837
|
+
file_url: import_zod6.z.string().url().optional().describe("The URL of the file to be sent to the model."),
|
|
32838
|
+
detail: import_zod6.z.enum(["low", "high"]).optional()
|
|
32509
32839
|
}).describe("A file input to the model.")
|
|
32510
32840
|
)
|
|
32511
32841
|
).describe(
|
|
@@ -32514,9 +32844,7 @@ var createRealtimeClientSecretResponse = import_zod6.z.object({
|
|
|
32514
32844
|
}).describe(
|
|
32515
32845
|
"Reference to a prompt template and its variables.\n[Learn more](/docs/guides/text?api-mode=responses#reusable-prompts).\n"
|
|
32516
32846
|
).or(import_zod6.z.null()).optional()
|
|
32517
|
-
}).describe(
|
|
32518
|
-
"A new Realtime session configuration, with an ephemeral key. Default TTL\nfor keys is one minute.\n"
|
|
32519
|
-
),
|
|
32847
|
+
}).describe("A Realtime session configuration object.\n"),
|
|
32520
32848
|
import_zod6.z.object({
|
|
32521
32849
|
type: import_zod6.z.enum(["transcription"]).describe(
|
|
32522
32850
|
"The type of session. Always `transcription` for transcription sessions.\n"
|
|
@@ -32532,15 +32860,15 @@ var createRealtimeClientSecretResponse = import_zod6.z.object({
|
|
|
32532
32860
|
audio: import_zod6.z.object({
|
|
32533
32861
|
input: import_zod6.z.object({
|
|
32534
32862
|
format: import_zod6.z.object({
|
|
32535
|
-
type: import_zod6.z.enum(["audio/pcm"]).describe("The audio format. Always `audio/pcm`."),
|
|
32536
|
-
rate: import_zod6.z.literal(24e3).describe("The sample rate of the audio. Always `24000`.")
|
|
32863
|
+
type: import_zod6.z.enum(["audio/pcm"]).optional().describe("The audio format. Always `audio/pcm`."),
|
|
32864
|
+
rate: import_zod6.z.literal(24e3).optional().describe("The sample rate of the audio. Always `24000`.")
|
|
32537
32865
|
}).describe("The PCM audio format. Only a 24kHz sample rate is supported.").or(
|
|
32538
32866
|
import_zod6.z.object({
|
|
32539
|
-
type: import_zod6.z.enum(["audio/pcmu"]).describe("The audio format. Always `audio/pcmu`.")
|
|
32867
|
+
type: import_zod6.z.enum(["audio/pcmu"]).optional().describe("The audio format. Always `audio/pcmu`.")
|
|
32540
32868
|
}).describe("The G.711 \u03BC-law format.")
|
|
32541
32869
|
).or(
|
|
32542
32870
|
import_zod6.z.object({
|
|
32543
|
-
type: import_zod6.z.enum(["audio/pcma"]).describe("The audio format. Always `audio/pcma`.")
|
|
32871
|
+
type: import_zod6.z.enum(["audio/pcma"]).optional().describe("The audio format. Always `audio/pcma`.")
|
|
32544
32872
|
}).describe("The G.711 A-law format.")
|
|
32545
32873
|
).optional(),
|
|
32546
32874
|
transcription: import_zod6.z.object({
|
|
@@ -32550,20 +32878,19 @@ var createRealtimeClientSecretResponse = import_zod6.z.object({
|
|
|
32550
32878
|
"gpt-4o-mini-transcribe",
|
|
32551
32879
|
"gpt-4o-mini-transcribe-2025-12-15",
|
|
32552
32880
|
"gpt-4o-transcribe",
|
|
32553
|
-
"gpt-4o-transcribe-diarize"
|
|
32881
|
+
"gpt-4o-transcribe-diarize",
|
|
32882
|
+
"gpt-realtime-whisper"
|
|
32554
32883
|
])
|
|
32555
32884
|
).optional().describe(
|
|
32556
|
-
"The model
|
|
32557
|
-
),
|
|
32558
|
-
language: import_zod6.z.string().optional().describe(
|
|
32559
|
-
"The language of the input audio. Supplying the input language in\n[ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`) format\nwill improve accuracy and latency.\n"
|
|
32885
|
+
"The model used for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`, `gpt-4o-transcribe-diarize`, and `gpt-realtime-whisper`.\n"
|
|
32560
32886
|
),
|
|
32887
|
+
language: import_zod6.z.string().optional().describe("The language of the input audio.\n"),
|
|
32561
32888
|
prompt: import_zod6.z.string().optional().describe(
|
|
32562
|
-
|
|
32889
|
+
"The prompt configured for input audio transcription, when present.\n"
|
|
32563
32890
|
)
|
|
32564
32891
|
}).optional(),
|
|
32565
32892
|
noise_reduction: import_zod6.z.object({
|
|
32566
|
-
type: import_zod6.z.enum(["near_field", "far_field"]).describe(
|
|
32893
|
+
type: import_zod6.z.enum(["near_field", "far_field"]).optional().describe(
|
|
32567
32894
|
"Type of noise reduction. `near_field` is for close-talking microphones such as headphones, `far_field` is for far-field microphones such as laptop or conference room microphones.\n"
|
|
32568
32895
|
)
|
|
32569
32896
|
}).optional().describe("Configuration for input audio noise reduction.\n"),
|
|
@@ -32580,8 +32907,10 @@ var createRealtimeClientSecretResponse = import_zod6.z.object({
|
|
|
32580
32907
|
silence_duration_ms: import_zod6.z.number().optional().describe(
|
|
32581
32908
|
"Duration of silence to detect speech stop (in milliseconds). Defaults\nto 500ms. With shorter values the model will respond more quickly,\nbut may jump in on short pauses from the user.\n"
|
|
32582
32909
|
)
|
|
32583
|
-
}).
|
|
32584
|
-
"Configuration for turn detection. Can be set to `null` to turn off. Server\nVAD means that the model will detect the start and end of speech based on\naudio volume and respond at the end of user speech.\n"
|
|
32910
|
+
}).describe(
|
|
32911
|
+
"Configuration for turn detection. Can be set to `null` to turn off. Server\nVAD means that the model will detect the start and end of speech based on\naudio volume and respond at the end of user speech. For `gpt-realtime-whisper`, this must be `null`; VAD is not supported.\n"
|
|
32912
|
+
).or(import_zod6.z.null()).optional().describe(
|
|
32913
|
+
"Configuration for turn detection. For `gpt-realtime-whisper`, this must be `null`; VAD is not supported.\n"
|
|
32585
32914
|
)
|
|
32586
32915
|
}).optional()
|
|
32587
32916
|
}).optional().describe("Configuration for input audio for the session.\n")
|
|
@@ -32721,7 +33050,7 @@ var createRealtimeSessionBody = import_zod6.z.object({
|
|
|
32721
33050
|
).or(
|
|
32722
33051
|
import_zod6.z.object({
|
|
32723
33052
|
type: import_zod6.z.enum(["input_image"]).describe("The type of the input item. Always `input_image`."),
|
|
32724
|
-
image_url: import_zod6.z.string().describe(
|
|
33053
|
+
image_url: import_zod6.z.string().url().describe(
|
|
32725
33054
|
"The URL of the image to be sent to the model. A fully qualified URL or base64 encoded image in a data URL."
|
|
32726
33055
|
).or(import_zod6.z.null()).optional(),
|
|
32727
33056
|
file_id: import_zod6.z.string().describe("The ID of the file to be sent to the model.").or(import_zod6.z.null()).optional(),
|
|
@@ -32735,7 +33064,7 @@ var createRealtimeSessionBody = import_zod6.z.object({
|
|
|
32735
33064
|
file_id: import_zod6.z.string().describe("The ID of the file to be sent to the model.").or(import_zod6.z.null()).optional(),
|
|
32736
33065
|
filename: import_zod6.z.string().optional().describe("The name of the file to be sent to the model."),
|
|
32737
33066
|
file_data: import_zod6.z.string().optional().describe("The content of the file to be sent to the model.\n"),
|
|
32738
|
-
file_url: import_zod6.z.string().optional().describe("The URL of the file to be sent to the model."),
|
|
33067
|
+
file_url: import_zod6.z.string().url().optional().describe("The URL of the file to be sent to the model."),
|
|
32739
33068
|
detail: import_zod6.z.enum(["low", "high"]).optional()
|
|
32740
33069
|
}).describe("A file input to the model.")
|
|
32741
33070
|
)
|
|
@@ -32784,17 +33113,14 @@ var createRealtimeSessionResponse = import_zod6.z.object({
|
|
|
32784
33113
|
"gpt-4o-mini-transcribe",
|
|
32785
33114
|
"gpt-4o-mini-transcribe-2025-12-15",
|
|
32786
33115
|
"gpt-4o-transcribe",
|
|
32787
|
-
"gpt-4o-transcribe-diarize"
|
|
33116
|
+
"gpt-4o-transcribe-diarize",
|
|
33117
|
+
"gpt-realtime-whisper"
|
|
32788
33118
|
])
|
|
32789
33119
|
).optional().describe(
|
|
32790
|
-
"The model
|
|
32791
|
-
),
|
|
32792
|
-
language: import_zod6.z.string().optional().describe(
|
|
32793
|
-
"The language of the input audio. Supplying the input language in\n[ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`) format\nwill improve accuracy and latency.\n"
|
|
33120
|
+
"The model used for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`, `gpt-4o-transcribe-diarize`, and `gpt-realtime-whisper`.\n"
|
|
32794
33121
|
),
|
|
32795
|
-
|
|
32796
|
-
|
|
32797
|
-
)
|
|
33122
|
+
language: import_zod6.z.string().optional().describe("The language of the input audio.\n"),
|
|
33123
|
+
prompt: import_zod6.z.string().optional().describe("The prompt configured for input audio transcription, when present.\n")
|
|
32798
33124
|
}).optional(),
|
|
32799
33125
|
noise_reduction: import_zod6.z.object({
|
|
32800
33126
|
type: import_zod6.z.enum(["near_field", "far_field"]).optional().describe(
|
|
@@ -32920,16 +33246,20 @@ var createRealtimeTranscriptionSessionBody = import_zod6.z.object({
|
|
|
32920
33246
|
"gpt-4o-mini-transcribe",
|
|
32921
33247
|
"gpt-4o-mini-transcribe-2025-12-15",
|
|
32922
33248
|
"gpt-4o-transcribe",
|
|
32923
|
-
"gpt-4o-transcribe-diarize"
|
|
33249
|
+
"gpt-4o-transcribe-diarize",
|
|
33250
|
+
"gpt-realtime-whisper"
|
|
32924
33251
|
])
|
|
32925
33252
|
).optional().describe(
|
|
32926
|
-
"The model to use for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`,
|
|
33253
|
+
"The model to use for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`, `gpt-4o-transcribe-diarize`, and `gpt-realtime-whisper`. Use `gpt-4o-transcribe-diarize` when you need diarization with speaker labels.\n"
|
|
32927
33254
|
),
|
|
32928
33255
|
language: import_zod6.z.string().optional().describe(
|
|
32929
33256
|
"The language of the input audio. Supplying the input language in\n[ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`) format\nwill improve accuracy and latency.\n"
|
|
32930
33257
|
),
|
|
32931
33258
|
prompt: import_zod6.z.string().optional().describe(
|
|
32932
|
-
'An optional text to guide the model\'s style or continue a previous audio\nsegment.\nFor `whisper-1`, the [prompt is a list of keywords](/docs/guides/speech-to-text#prompting).\nFor `gpt-4o-transcribe` models (excluding `gpt-4o-transcribe-diarize`), the prompt is a free text string, for example "expect words related to technology".\n'
|
|
33259
|
+
'An optional text to guide the model\'s style or continue a previous audio\nsegment.\nFor `whisper-1`, the [prompt is a list of keywords](/docs/guides/speech-to-text#prompting).\nFor `gpt-4o-transcribe` models (excluding `gpt-4o-transcribe-diarize`), the prompt is a free text string, for example "expect words related to technology".\nPrompt is not supported with `gpt-realtime-whisper` in GA Realtime sessions.\n'
|
|
33260
|
+
),
|
|
33261
|
+
delay: import_zod6.z.enum(["minimal", "low", "medium", "high", "xhigh"]).optional().describe(
|
|
33262
|
+
"Controls how long the model waits before emitting transcription text.\nHigher values can improve transcription accuracy at the cost of latency.\nOnly supported with `gpt-realtime-whisper` in GA Realtime sessions.\n"
|
|
32933
33263
|
)
|
|
32934
33264
|
}).optional(),
|
|
32935
33265
|
include: import_zod6.z.array(import_zod6.z.enum(["item.input_audio_transcription.logprobs"])).optional().describe(
|
|
@@ -32958,17 +33288,14 @@ var createRealtimeTranscriptionSessionResponse = import_zod6.z.object({
|
|
|
32958
33288
|
"gpt-4o-mini-transcribe",
|
|
32959
33289
|
"gpt-4o-mini-transcribe-2025-12-15",
|
|
32960
33290
|
"gpt-4o-transcribe",
|
|
32961
|
-
"gpt-4o-transcribe-diarize"
|
|
33291
|
+
"gpt-4o-transcribe-diarize",
|
|
33292
|
+
"gpt-realtime-whisper"
|
|
32962
33293
|
])
|
|
32963
33294
|
).optional().describe(
|
|
32964
|
-
"The model
|
|
32965
|
-
),
|
|
32966
|
-
language: import_zod6.z.string().optional().describe(
|
|
32967
|
-
"The language of the input audio. Supplying the input language in\n[ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`) format\nwill improve accuracy and latency.\n"
|
|
33295
|
+
"The model used for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`, `gpt-4o-transcribe-diarize`, and `gpt-realtime-whisper`.\n"
|
|
32968
33296
|
),
|
|
32969
|
-
|
|
32970
|
-
|
|
32971
|
-
)
|
|
33297
|
+
language: import_zod6.z.string().optional().describe("The language of the input audio.\n"),
|
|
33298
|
+
prompt: import_zod6.z.string().optional().describe("The prompt configured for input audio transcription, when present.\n")
|
|
32972
33299
|
}).optional(),
|
|
32973
33300
|
turn_detection: import_zod6.z.object({
|
|
32974
33301
|
type: import_zod6.z.string().optional().describe("Type of turn detection, only `server_vad` is currently supported.\n"),
|
|
@@ -36353,6 +36680,7 @@ __export(sonioxPublicAPI_zod_exports, {
|
|
|
36353
36680
|
createTranscriptionBodyWebhookUrlRegExpOne: () => createTranscriptionBodyWebhookUrlRegExpOne,
|
|
36354
36681
|
deleteFileParams: () => deleteFileParams,
|
|
36355
36682
|
deleteTranscriptionParams: () => deleteTranscriptionParams,
|
|
36683
|
+
getConcurrencyLimitsResponse: () => getConcurrencyLimitsResponse,
|
|
36356
36684
|
getFileParams: () => getFileParams,
|
|
36357
36685
|
getFileResponse: () => getFileResponse,
|
|
36358
36686
|
getFilesCountResponse: () => getFilesCountResponse,
|
|
@@ -36370,6 +36698,12 @@ __export(sonioxPublicAPI_zod_exports, {
|
|
|
36370
36698
|
getTranscriptionsQueryLimitMax: () => getTranscriptionsQueryLimitMax,
|
|
36371
36699
|
getTranscriptionsQueryParams: () => getTranscriptionsQueryParams,
|
|
36372
36700
|
getTranscriptionsResponse: () => getTranscriptionsResponse,
|
|
36701
|
+
getTtsModelsResponse: () => getTtsModelsResponse,
|
|
36702
|
+
getUsageLogsQueryLimitDefault: () => getUsageLogsQueryLimitDefault,
|
|
36703
|
+
getUsageLogsQueryLimitMax: () => getUsageLogsQueryLimitMax,
|
|
36704
|
+
getUsageLogsQueryParams: () => getUsageLogsQueryParams,
|
|
36705
|
+
getUsageLogsQuerySortDefault: () => getUsageLogsQuerySortDefault,
|
|
36706
|
+
getUsageLogsResponse: () => getUsageLogsResponse,
|
|
36373
36707
|
uploadFileBody: () => uploadFileBody,
|
|
36374
36708
|
uploadFileBodyClientReferenceIdMaxOne: () => uploadFileBodyClientReferenceIdMaxOne
|
|
36375
36709
|
});
|
|
@@ -36620,11 +36954,73 @@ var getModelsResponse = import_zod10.z.object({
|
|
|
36620
36954
|
})
|
|
36621
36955
|
).describe("List of available models and their attributes.")
|
|
36622
36956
|
});
|
|
36957
|
+
var getTtsModelsResponse = import_zod10.z.object({
|
|
36958
|
+
models: import_zod10.z.array(
|
|
36959
|
+
import_zod10.z.object({
|
|
36960
|
+
id: import_zod10.z.string().describe("Unique identifier of the model."),
|
|
36961
|
+
aliased_model_id: import_zod10.z.string().or(import_zod10.z.null()).describe("If this is an alias, the id of the aliased model."),
|
|
36962
|
+
name: import_zod10.z.string().describe("Name of the model."),
|
|
36963
|
+
voices: import_zod10.z.array(
|
|
36964
|
+
import_zod10.z.object({
|
|
36965
|
+
id: import_zod10.z.string().describe("Unique identifier of the voice."),
|
|
36966
|
+
description: import_zod10.z.string().describe("Description of the TTS voice."),
|
|
36967
|
+
gender: import_zod10.z.enum(["male", "female", "neutral"])
|
|
36968
|
+
})
|
|
36969
|
+
).describe("List of available voices for this model."),
|
|
36970
|
+
languages: import_zod10.z.array(
|
|
36971
|
+
import_zod10.z.object({
|
|
36972
|
+
code: import_zod10.z.string().describe("2-letter language code."),
|
|
36973
|
+
name: import_zod10.z.string().describe("Language name.")
|
|
36974
|
+
})
|
|
36975
|
+
).describe("List of languages supported by the model.")
|
|
36976
|
+
})
|
|
36977
|
+
).describe("List of available TTS models and their attributes.")
|
|
36978
|
+
});
|
|
36979
|
+
var getUsageLogsQueryLimitDefault = 1e3;
|
|
36980
|
+
var getUsageLogsQueryLimitMax = 1e3;
|
|
36981
|
+
var getUsageLogsQuerySortDefault = "end_time_asc";
|
|
36982
|
+
var getUsageLogsQueryParams = import_zod10.z.object({
|
|
36983
|
+
start_time: import_zod10.z.string().describe("Start of the time window (inclusive). Filters by request end time."),
|
|
36984
|
+
end_time: import_zod10.z.string().describe("End of the time window (exclusive). Filters by request end time."),
|
|
36985
|
+
limit: import_zod10.z.number().min(1).max(getUsageLogsQueryLimitMax).default(getUsageLogsQueryLimitDefault).describe("Maximum number of usage log entries to return."),
|
|
36986
|
+
sort: import_zod10.z.enum(["end_time_asc", "end_time_desc"]).default(getUsageLogsQuerySortDefault).describe(
|
|
36987
|
+
"Sort order by end_time.Use `end_time_desc` to get the most recent entries first. When paginating, pass the same `sort` value alongside the cursor."
|
|
36988
|
+
),
|
|
36989
|
+
cursor: import_zod10.z.string().or(import_zod10.z.null()).optional().describe("Pagination cursor for the next page of results.")
|
|
36990
|
+
});
|
|
36991
|
+
var getUsageLogsResponse = import_zod10.z.object({
|
|
36992
|
+
usage_logs: import_zod10.z.array(
|
|
36993
|
+
import_zod10.z.object({
|
|
36994
|
+
uuid: import_zod10.z.string().uuid().describe("Unique identifier of the request."),
|
|
36995
|
+
request_scope: import_zod10.z.string().describe("Scope of the request (api / playground)."),
|
|
36996
|
+
client_reference_id: import_zod10.z.string().describe("Client reference ID supplied on the original request. Empty string if none."),
|
|
36997
|
+
model: import_zod10.z.string().describe("Model identifier."),
|
|
36998
|
+
start_time: import_zod10.z.string().datetime({}).describe("When the request started."),
|
|
36999
|
+
end_time: import_zod10.z.string().datetime({}).describe("When the request ended."),
|
|
37000
|
+
input_text_tokens: import_zod10.z.number(),
|
|
37001
|
+
input_audio_tokens: import_zod10.z.number(),
|
|
37002
|
+
input_audio_duration_ms: import_zod10.z.number(),
|
|
37003
|
+
output_text_tokens: import_zod10.z.number(),
|
|
37004
|
+
output_audio_tokens: import_zod10.z.number(),
|
|
37005
|
+
output_audio_duration_ms: import_zod10.z.number(),
|
|
37006
|
+
cost_usd: import_zod10.z.string(),
|
|
37007
|
+
input_cost_usd: import_zod10.z.string(),
|
|
37008
|
+
input_text_cost_usd: import_zod10.z.string(),
|
|
37009
|
+
input_audio_cost_usd: import_zod10.z.string(),
|
|
37010
|
+
output_cost_usd: import_zod10.z.string(),
|
|
37011
|
+
output_text_cost_usd: import_zod10.z.string(),
|
|
37012
|
+
output_audio_cost_usd: import_zod10.z.string()
|
|
37013
|
+
})
|
|
37014
|
+
).describe("Per-request usage log entries ordered by end_time, uuid (per `sort`)."),
|
|
37015
|
+
next_page_cursor: import_zod10.z.string().or(import_zod10.z.null()).optional().describe(
|
|
37016
|
+
"A pagination token that references the next page of results. When more data is available, this field contains a value to pass in the cursor parameter of a subsequent request. When null, no additional results are available."
|
|
37017
|
+
)
|
|
37018
|
+
});
|
|
36623
37019
|
var createTemporaryApiKeyBodyExpiresInSecondsMax = 3600;
|
|
36624
37020
|
var createTemporaryApiKeyBodyClientReferenceIdMaxOne = 256;
|
|
36625
37021
|
var createTemporaryApiKeyBodyMaxSessionDurationSecondsMaxOne = 18e3;
|
|
36626
37022
|
var createTemporaryApiKeyBody = import_zod10.z.object({
|
|
36627
|
-
usage_type: import_zod10.z.enum(["transcribe_websocket"]),
|
|
37023
|
+
usage_type: import_zod10.z.enum(["transcribe_websocket", "tts_rt"]),
|
|
36628
37024
|
expires_in_seconds: import_zod10.z.number().min(1).max(createTemporaryApiKeyBodyExpiresInSecondsMax).describe("Duration in seconds until the temporary API key expires."),
|
|
36629
37025
|
client_reference_id: import_zod10.z.string().max(createTemporaryApiKeyBodyClientReferenceIdMaxOne).or(import_zod10.z.null()).optional().describe("Optional tracking identifier string. Does not need to be unique."),
|
|
36630
37026
|
single_use: import_zod10.z.boolean().or(import_zod10.z.null()).optional().describe("If true, the temporary API key can be used only once."),
|
|
@@ -36632,6 +37028,28 @@ var createTemporaryApiKeyBody = import_zod10.z.object({
|
|
|
36632
37028
|
"Maximum WebSocket connection duration in seconds. If exceeded, the connection will be dropped. If not set, no limit is applied."
|
|
36633
37029
|
)
|
|
36634
37030
|
});
|
|
37031
|
+
var getConcurrencyLimitsResponse = import_zod10.z.object({
|
|
37032
|
+
project: import_zod10.z.object({
|
|
37033
|
+
current: import_zod10.z.object({
|
|
37034
|
+
transcribe_concurrent: import_zod10.z.number(),
|
|
37035
|
+
tts_concurrent: import_zod10.z.number()
|
|
37036
|
+
}).describe("Live counts read from Redis"),
|
|
37037
|
+
limits: import_zod10.z.object({
|
|
37038
|
+
transcribe_concurrent: import_zod10.z.number().or(import_zod10.z.null()),
|
|
37039
|
+
tts_concurrent: import_zod10.z.number().or(import_zod10.z.null())
|
|
37040
|
+
}).describe("Configured limits")
|
|
37041
|
+
}),
|
|
37042
|
+
organization: import_zod10.z.object({
|
|
37043
|
+
current: import_zod10.z.object({
|
|
37044
|
+
transcribe_concurrent: import_zod10.z.number(),
|
|
37045
|
+
tts_concurrent: import_zod10.z.number()
|
|
37046
|
+
}).describe("Live counts read from Redis"),
|
|
37047
|
+
limits: import_zod10.z.object({
|
|
37048
|
+
transcribe_concurrent: import_zod10.z.number().or(import_zod10.z.null()),
|
|
37049
|
+
tts_concurrent: import_zod10.z.number().or(import_zod10.z.null())
|
|
37050
|
+
}).describe("Configured limits")
|
|
37051
|
+
})
|
|
37052
|
+
});
|
|
36635
37053
|
|
|
36636
37054
|
// src/generated/soniox/streaming-types.zod.ts
|
|
36637
37055
|
var streaming_types_zod_exports = {};
|
|
@@ -36716,10 +37134,10 @@ var sonioxStructuredContextSchema = import_zod11.z.object({
|
|
|
36716
37134
|
var sonioxContextSchema = import_zod11.z.union([sonioxStructuredContextSchema, import_zod11.z.string()]);
|
|
36717
37135
|
var sonioxRealtimeModelSchema = import_zod11.z.enum([
|
|
36718
37136
|
"stt-rt-v4",
|
|
36719
|
-
"stt-rt-v3",
|
|
36720
37137
|
"stt-rt-preview",
|
|
36721
37138
|
"stt-rt-v3-preview",
|
|
36722
|
-
"stt-rt-preview-v2"
|
|
37139
|
+
"stt-rt-preview-v2",
|
|
37140
|
+
"stt-rt-v3"
|
|
36723
37141
|
]);
|
|
36724
37142
|
var streamingTranscriberParams3 = import_zod11.z.object({
|
|
36725
37143
|
model: sonioxRealtimeModelSchema,
|
|
@@ -36727,12 +37145,16 @@ var streamingTranscriberParams3 = import_zod11.z.object({
|
|
|
36727
37145
|
sampleRate: import_zod11.z.number().optional(),
|
|
36728
37146
|
numChannels: import_zod11.z.number().optional(),
|
|
36729
37147
|
languageHints: import_zod11.z.array(import_zod11.z.string()).optional(),
|
|
37148
|
+
languageHintsStrict: import_zod11.z.boolean().optional(),
|
|
36730
37149
|
context: sonioxContextSchema.optional(),
|
|
36731
37150
|
enableSpeakerDiarization: import_zod11.z.boolean().optional(),
|
|
36732
37151
|
enableLanguageIdentification: import_zod11.z.boolean().optional(),
|
|
36733
37152
|
enableEndpointDetection: import_zod11.z.boolean().optional(),
|
|
37153
|
+
maxEndpointDelayMs: import_zod11.z.number().optional(),
|
|
36734
37154
|
translation: sonioxTranslationConfigSchema.optional(),
|
|
36735
|
-
clientReferenceId: import_zod11.z.string().optional()
|
|
37155
|
+
clientReferenceId: import_zod11.z.string().optional(),
|
|
37156
|
+
keepaliveIntervalMs: import_zod11.z.number().optional(),
|
|
37157
|
+
connectTimeoutMs: import_zod11.z.number().optional()
|
|
36736
37158
|
});
|
|
36737
37159
|
var sonioxTranslationStatusSchema = import_zod11.z.enum(["original", "translation", "none"]);
|
|
36738
37160
|
var sonioxTokenSchema = import_zod11.z.object({
|
|
@@ -37324,6 +37746,7 @@ __export(schema_exports5, {
|
|
|
37324
37746
|
V1ListenPostParametersCallbackMethod: () => V1ListenPostParametersCallbackMethod,
|
|
37325
37747
|
V1ListenPostParametersCustomIntentMode: () => V1ListenPostParametersCustomIntentMode,
|
|
37326
37748
|
V1ListenPostParametersCustomTopicMode: () => V1ListenPostParametersCustomTopicMode,
|
|
37749
|
+
V1ListenPostParametersDiarizeModel: () => V1ListenPostParametersDiarizeModel,
|
|
37327
37750
|
V1ListenPostParametersEncoding: () => V1ListenPostParametersEncoding,
|
|
37328
37751
|
V1ListenPostParametersModel0: () => V1ListenPostParametersModel0,
|
|
37329
37752
|
V1ListenPostParametersRedactSchemaOneOf1Items: () => V1ListenPostParametersRedactSchemaOneOf1Items,
|
|
@@ -37362,6 +37785,13 @@ __export(schema_exports5, {
|
|
|
37362
37785
|
V1SpeakPostParametersSampleRate4: () => V1SpeakPostParametersSampleRate4
|
|
37363
37786
|
});
|
|
37364
37787
|
|
|
37788
|
+
// src/generated/deepgram/schema/v1ListenPostParametersDiarizeModel.ts
|
|
37789
|
+
var V1ListenPostParametersDiarizeModel = {
|
|
37790
|
+
latest: "latest",
|
|
37791
|
+
v1: "v1",
|
|
37792
|
+
v2: "v2"
|
|
37793
|
+
};
|
|
37794
|
+
|
|
37365
37795
|
// src/generated/deepgram/schema/v1ListenPostParametersModel0.ts
|
|
37366
37796
|
var V1ListenPostParametersModel0 = {
|
|
37367
37797
|
"nova-3": "nova-3",
|
|
@@ -37578,6 +38008,7 @@ var V1SpeakPostParametersSampleRate = {
|
|
|
37578
38008
|
var schema_exports6 = {};
|
|
37579
38009
|
__export(schema_exports6, {
|
|
37580
38010
|
AudioResponseFormat: () => AudioResponseFormat,
|
|
38011
|
+
AudioTranscriptionDelay: () => AudioTranscriptionDelay,
|
|
37581
38012
|
CreateSpeechRequestResponseFormat: () => CreateSpeechRequestResponseFormat,
|
|
37582
38013
|
CreateSpeechRequestStreamFormat: () => CreateSpeechRequestStreamFormat,
|
|
37583
38014
|
CreateTranscriptionRequestTimestampGranularitiesItem: () => CreateTranscriptionRequestTimestampGranularitiesItem,
|
|
@@ -37597,12 +38028,14 @@ __export(schema_exports6, {
|
|
|
37597
38028
|
RealtimeAudioFormatsAnyOfType: () => RealtimeAudioFormatsAnyOfType,
|
|
37598
38029
|
RealtimeCreateClientSecretRequestExpiresAfterAnchor: () => RealtimeCreateClientSecretRequestExpiresAfterAnchor,
|
|
37599
38030
|
RealtimeFunctionToolType: () => RealtimeFunctionToolType,
|
|
38031
|
+
RealtimeReasoningEffort: () => RealtimeReasoningEffort,
|
|
37600
38032
|
RealtimeSessionCreateRequestGAIncludeItem: () => RealtimeSessionCreateRequestGAIncludeItem,
|
|
37601
38033
|
RealtimeSessionCreateRequestGAOutputModalitiesItem: () => RealtimeSessionCreateRequestGAOutputModalitiesItem,
|
|
37602
38034
|
RealtimeSessionCreateRequestGAType: () => RealtimeSessionCreateRequestGAType,
|
|
37603
38035
|
RealtimeSessionCreateRequestModalitiesItem: () => RealtimeSessionCreateRequestModalitiesItem,
|
|
37604
38036
|
RealtimeSessionCreateRequestToolsItemType: () => RealtimeSessionCreateRequestToolsItemType,
|
|
37605
38037
|
RealtimeSessionCreateResponseGAIncludeItem: () => RealtimeSessionCreateResponseGAIncludeItem,
|
|
38038
|
+
RealtimeSessionCreateResponseGAObject: () => RealtimeSessionCreateResponseGAObject,
|
|
37606
38039
|
RealtimeSessionCreateResponseGAOutputModalitiesItem: () => RealtimeSessionCreateResponseGAOutputModalitiesItem,
|
|
37607
38040
|
RealtimeSessionCreateResponseGAType: () => RealtimeSessionCreateResponseGAType,
|
|
37608
38041
|
RealtimeSessionCreateResponseIncludeItem: () => RealtimeSessionCreateResponseIncludeItem,
|
|
@@ -37633,6 +38066,15 @@ __export(schema_exports6, {
|
|
|
37633
38066
|
VoiceResourceObject: () => VoiceResourceObject
|
|
37634
38067
|
});
|
|
37635
38068
|
|
|
38069
|
+
// src/generated/openai/schema/audioTranscriptionDelay.ts
|
|
38070
|
+
var AudioTranscriptionDelay = {
|
|
38071
|
+
minimal: "minimal",
|
|
38072
|
+
low: "low",
|
|
38073
|
+
medium: "medium",
|
|
38074
|
+
high: "high",
|
|
38075
|
+
xhigh: "xhigh"
|
|
38076
|
+
};
|
|
38077
|
+
|
|
37636
38078
|
// src/generated/openai/schema/createSpeechRequestResponseFormat.ts
|
|
37637
38079
|
var CreateSpeechRequestResponseFormat = {
|
|
37638
38080
|
mp3: "mp3",
|
|
@@ -37745,6 +38187,15 @@ var RealtimeFunctionToolType = {
|
|
|
37745
38187
|
function: "function"
|
|
37746
38188
|
};
|
|
37747
38189
|
|
|
38190
|
+
// src/generated/openai/schema/realtimeReasoningEffort.ts
|
|
38191
|
+
var RealtimeReasoningEffort = {
|
|
38192
|
+
minimal: "minimal",
|
|
38193
|
+
low: "low",
|
|
38194
|
+
medium: "medium",
|
|
38195
|
+
high: "high",
|
|
38196
|
+
xhigh: "xhigh"
|
|
38197
|
+
};
|
|
38198
|
+
|
|
37748
38199
|
// src/generated/openai/schema/realtimeSessionCreateRequestGAIncludeItem.ts
|
|
37749
38200
|
var RealtimeSessionCreateRequestGAIncludeItem = {
|
|
37750
38201
|
iteminput_audio_transcriptionlogprobs: "item.input_audio_transcription.logprobs"
|
|
@@ -37777,6 +38228,11 @@ var RealtimeSessionCreateResponseGAIncludeItem = {
|
|
|
37777
38228
|
iteminput_audio_transcriptionlogprobs: "item.input_audio_transcription.logprobs"
|
|
37778
38229
|
};
|
|
37779
38230
|
|
|
38231
|
+
// src/generated/openai/schema/realtimeSessionCreateResponseGAObject.ts
|
|
38232
|
+
var RealtimeSessionCreateResponseGAObject = {
|
|
38233
|
+
realtimesession: "realtime.session"
|
|
38234
|
+
};
|
|
38235
|
+
|
|
37780
38236
|
// src/generated/openai/schema/realtimeSessionCreateResponseGAOutputModalitiesItem.ts
|
|
37781
38237
|
var RealtimeSessionCreateResponseGAOutputModalitiesItem = {
|
|
37782
38238
|
text: "text",
|
|
@@ -37921,6 +38377,7 @@ __export(schema_exports7, {
|
|
|
37921
38377
|
AutoChaptersResultErrorType: () => AutoChaptersResultErrorType,
|
|
37922
38378
|
ErrorResponseError: () => ErrorResponseError,
|
|
37923
38379
|
GetJobsJobidAlignmentTags: () => GetJobsJobidAlignmentTags,
|
|
38380
|
+
GetJobsJobidObjectUrlsUrlForItem: () => GetJobsJobidObjectUrlsUrlForItem,
|
|
37924
38381
|
GetJobsJobidTranscriptFormat: () => GetJobsJobidTranscriptFormat,
|
|
37925
38382
|
JobDetailsStatus: () => JobDetailsStatus,
|
|
37926
38383
|
JobMode: () => JobMode,
|
|
@@ -37990,6 +38447,13 @@ var GetJobsJobidAlignmentTags = {
|
|
|
37990
38447
|
one_per_line: "one_per_line"
|
|
37991
38448
|
};
|
|
37992
38449
|
|
|
38450
|
+
// src/generated/speechmatics/schema/getJobsJobidObjectUrlsUrlForItem.ts
|
|
38451
|
+
var GetJobsJobidObjectUrlsUrlForItem = {
|
|
38452
|
+
data: "data",
|
|
38453
|
+
audio_mp3: "audio_mp3",
|
|
38454
|
+
transcript: "transcript"
|
|
38455
|
+
};
|
|
38456
|
+
|
|
37993
38457
|
// src/generated/speechmatics/schema/getJobsJobidTranscriptFormat.ts
|
|
37994
38458
|
var GetJobsJobidTranscriptFormat = {
|
|
37995
38459
|
"json-v2": "json-v2",
|
|
@@ -38106,6 +38570,15 @@ var WrittenFormRecognitionResultType = {
|
|
|
38106
38570
|
word: "word"
|
|
38107
38571
|
};
|
|
38108
38572
|
|
|
38573
|
+
// src/generated/soniox/sdk-types.ts
|
|
38574
|
+
var sdk_types_exports = {};
|
|
38575
|
+
__export(sdk_types_exports, {
|
|
38576
|
+
RealtimeSttSession: () => import_node.RealtimeSttSession,
|
|
38577
|
+
SonioxFetchHttpClient: () => import_node.FetchHttpClient,
|
|
38578
|
+
SonioxNodeClient: () => import_node.SonioxNodeClient
|
|
38579
|
+
});
|
|
38580
|
+
var import_node = require("@soniox/node");
|
|
38581
|
+
|
|
38109
38582
|
// src/generated/elevenlabs/schema/index.ts
|
|
38110
38583
|
var schema_exports8 = {};
|
|
38111
38584
|
__export(schema_exports8, {
|
|
@@ -38183,6 +38656,10 @@ __export(speechmaticsASRRESTAPI_zod_exports, {
|
|
|
38183
38656
|
deleteJobsJobidParams: () => deleteJobsJobidParams,
|
|
38184
38657
|
deleteJobsJobidQueryParams: () => deleteJobsJobidQueryParams,
|
|
38185
38658
|
deleteJobsJobidResponse: () => deleteJobsJobidResponse,
|
|
38659
|
+
deleteJobsJobidResponseJobConfigSummarizationConfigContentTypeDefault: () => deleteJobsJobidResponseJobConfigSummarizationConfigContentTypeDefault,
|
|
38660
|
+
deleteJobsJobidResponseJobConfigSummarizationConfigSummaryLengthDefault: () => deleteJobsJobidResponseJobConfigSummarizationConfigSummaryLengthDefault,
|
|
38661
|
+
deleteJobsJobidResponseJobConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMax: () => deleteJobsJobidResponseJobConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMax,
|
|
38662
|
+
deleteJobsJobidResponseJobConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMin: () => deleteJobsJobidResponseJobConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMin,
|
|
38186
38663
|
deleteJobsJobidResponseJobConfigTranscriptionConfigChannelDiarizationLabelsItemRegExp: () => deleteJobsJobidResponseJobConfigTranscriptionConfigChannelDiarizationLabelsItemRegExp,
|
|
38187
38664
|
deleteJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesPermittedMarksItemRegExp: () => deleteJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesPermittedMarksItemRegExp,
|
|
38188
38665
|
deleteJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesSensitivityMax: () => deleteJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesSensitivityMax,
|
|
@@ -38198,8 +38675,15 @@ __export(speechmaticsASRRESTAPI_zod_exports, {
|
|
|
38198
38675
|
getJobsJobidDataResponse: () => getJobsJobidDataResponse,
|
|
38199
38676
|
getJobsJobidLogParams: () => getJobsJobidLogParams,
|
|
38200
38677
|
getJobsJobidLogResponse: () => getJobsJobidLogResponse,
|
|
38678
|
+
getJobsJobidObjectUrlsParams: () => getJobsJobidObjectUrlsParams,
|
|
38679
|
+
getJobsJobidObjectUrlsQueryParams: () => getJobsJobidObjectUrlsQueryParams,
|
|
38680
|
+
getJobsJobidObjectUrlsResponse: () => getJobsJobidObjectUrlsResponse,
|
|
38201
38681
|
getJobsJobidParams: () => getJobsJobidParams,
|
|
38202
38682
|
getJobsJobidResponse: () => getJobsJobidResponse,
|
|
38683
|
+
getJobsJobidResponseJobConfigSummarizationConfigContentTypeDefault: () => getJobsJobidResponseJobConfigSummarizationConfigContentTypeDefault,
|
|
38684
|
+
getJobsJobidResponseJobConfigSummarizationConfigSummaryLengthDefault: () => getJobsJobidResponseJobConfigSummarizationConfigSummaryLengthDefault,
|
|
38685
|
+
getJobsJobidResponseJobConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMax: () => getJobsJobidResponseJobConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMax,
|
|
38686
|
+
getJobsJobidResponseJobConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMin: () => getJobsJobidResponseJobConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMin,
|
|
38203
38687
|
getJobsJobidResponseJobConfigTranscriptionConfigChannelDiarizationLabelsItemRegExp: () => getJobsJobidResponseJobConfigTranscriptionConfigChannelDiarizationLabelsItemRegExp,
|
|
38204
38688
|
getJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesPermittedMarksItemRegExp: () => getJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesPermittedMarksItemRegExp,
|
|
38205
38689
|
getJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesSensitivityMax: () => getJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesSensitivityMax,
|
|
@@ -38214,6 +38698,8 @@ __export(speechmaticsASRRESTAPI_zod_exports, {
|
|
|
38214
38698
|
getJobsJobidTranscriptQueryParams: () => getJobsJobidTranscriptQueryParams,
|
|
38215
38699
|
getJobsJobidTranscriptResponse: () => getJobsJobidTranscriptResponse,
|
|
38216
38700
|
getJobsJobidTranscriptResponseJobDurationMin: () => getJobsJobidTranscriptResponseJobDurationMin,
|
|
38701
|
+
getJobsJobidTranscriptResponseMetadataTranscriptionConfigAudioFilteringConfigVolumeThresholdMax: () => getJobsJobidTranscriptResponseMetadataTranscriptionConfigAudioFilteringConfigVolumeThresholdMax,
|
|
38702
|
+
getJobsJobidTranscriptResponseMetadataTranscriptionConfigAudioFilteringConfigVolumeThresholdMin: () => getJobsJobidTranscriptResponseMetadataTranscriptionConfigAudioFilteringConfigVolumeThresholdMin,
|
|
38217
38703
|
getJobsJobidTranscriptResponseMetadataTranscriptionConfigChannelDiarizationLabelsItemRegExp: () => getJobsJobidTranscriptResponseMetadataTranscriptionConfigChannelDiarizationLabelsItemRegExp,
|
|
38218
38704
|
getJobsJobidTranscriptResponseMetadataTranscriptionConfigPunctuationOverridesPermittedMarksItemRegExp: () => getJobsJobidTranscriptResponseMetadataTranscriptionConfigPunctuationOverridesPermittedMarksItemRegExp,
|
|
38219
38705
|
getJobsJobidTranscriptResponseMetadataTranscriptionConfigPunctuationOverridesSensitivityMax: () => getJobsJobidTranscriptResponseMetadataTranscriptionConfigPunctuationOverridesSensitivityMax,
|
|
@@ -38225,6 +38711,10 @@ __export(speechmaticsASRRESTAPI_zod_exports, {
|
|
|
38225
38711
|
getJobsQueryLimitMax: () => getJobsQueryLimitMax,
|
|
38226
38712
|
getJobsQueryParams: () => getJobsQueryParams,
|
|
38227
38713
|
getJobsResponse: () => getJobsResponse,
|
|
38714
|
+
getJobsResponseJobsItemConfigSummarizationConfigContentTypeDefault: () => getJobsResponseJobsItemConfigSummarizationConfigContentTypeDefault,
|
|
38715
|
+
getJobsResponseJobsItemConfigSummarizationConfigSummaryLengthDefault: () => getJobsResponseJobsItemConfigSummarizationConfigSummaryLengthDefault,
|
|
38716
|
+
getJobsResponseJobsItemConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMax: () => getJobsResponseJobsItemConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMax,
|
|
38717
|
+
getJobsResponseJobsItemConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMin: () => getJobsResponseJobsItemConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMin,
|
|
38228
38718
|
getJobsResponseJobsItemConfigTranscriptionConfigChannelDiarizationLabelsItemRegExp: () => getJobsResponseJobsItemConfigTranscriptionConfigChannelDiarizationLabelsItemRegExp,
|
|
38229
38719
|
getJobsResponseJobsItemConfigTranscriptionConfigPunctuationOverridesPermittedMarksItemRegExp: () => getJobsResponseJobsItemConfigTranscriptionConfigPunctuationOverridesPermittedMarksItemRegExp,
|
|
38230
38720
|
getJobsResponseJobsItemConfigTranscriptionConfigPunctuationOverridesSensitivityMax: () => getJobsResponseJobsItemConfigTranscriptionConfigPunctuationOverridesSensitivityMax,
|
|
@@ -38235,12 +38725,18 @@ __export(speechmaticsASRRESTAPI_zod_exports, {
|
|
|
38235
38725
|
getJobsResponseJobsItemDurationMin: () => getJobsResponseJobsItemDurationMin,
|
|
38236
38726
|
getUsageQueryParams: () => getUsageQueryParams,
|
|
38237
38727
|
getUsageResponse: () => getUsageResponse,
|
|
38238
|
-
postJobsBody: () => postJobsBody
|
|
38728
|
+
postJobsBody: () => postJobsBody,
|
|
38729
|
+
postJobsHeader: () => postJobsHeader
|
|
38239
38730
|
});
|
|
38240
38731
|
var import_zod12 = require("zod");
|
|
38732
|
+
var postJobsHeader = import_zod12.z.object({
|
|
38733
|
+
"X-SM-Processing-Data": import_zod12.z.string().optional().describe(
|
|
38734
|
+
'**Note**: Only available for on-prem\nJSON dictionary of processing settings for the job worker. Currently supports `parallel_engines` (integer), which controls the number of engines the worker can use in parallel for this job, and `user_id` (string), which is the user id for this job. Example: `{"parallel_engines": 4}`'
|
|
38735
|
+
)
|
|
38736
|
+
});
|
|
38241
38737
|
var postJobsBody = import_zod12.z.object({
|
|
38242
38738
|
config: import_zod12.z.string().describe(
|
|
38243
|
-
"JSON containing a `JobConfig` model indicating the type and parameters for the recognition job."
|
|
38739
|
+
"JSON containing a [`JobConfig`](/speech-to-text/batch/input#jobconfig-schema) model indicating the type and parameters for the recognition job."
|
|
38244
38740
|
),
|
|
38245
38741
|
data_file: import_zod12.z.instanceof(File).optional().describe(
|
|
38246
38742
|
"The data file to be processed. Alternatively the data file can be fetched from a url specified in `JobConfig`."
|
|
@@ -38262,9 +38758,13 @@ var getJobsResponseJobsItemConfigTranscriptionConfigPunctuationOverridesSensitiv
|
|
|
38262
38758
|
var getJobsResponseJobsItemConfigTranscriptionConfigPunctuationOverridesSensitivityMax = 1;
|
|
38263
38759
|
var getJobsResponseJobsItemConfigTranscriptionConfigPunctuationOverridesPermittedMarksItemRegExp = /^(.|all)$/;
|
|
38264
38760
|
var getJobsResponseJobsItemConfigTranscriptionConfigChannelDiarizationLabelsItemRegExp = /^[A-Za-z0-9._]+$/;
|
|
38761
|
+
var getJobsResponseJobsItemConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMin = 0;
|
|
38762
|
+
var getJobsResponseJobsItemConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMax = 100;
|
|
38265
38763
|
var getJobsResponseJobsItemConfigTranscriptionConfigSpeakerDiarizationConfigSpeakerSensitivityMin = 0;
|
|
38266
38764
|
var getJobsResponseJobsItemConfigTranscriptionConfigSpeakerDiarizationConfigSpeakerSensitivityMax = 1;
|
|
38267
38765
|
var getJobsResponseJobsItemConfigTranslationConfigTargetLanguagesMax = 5;
|
|
38766
|
+
var getJobsResponseJobsItemConfigSummarizationConfigContentTypeDefault = "auto";
|
|
38767
|
+
var getJobsResponseJobsItemConfigSummarizationConfigSummaryLengthDefault = "brief";
|
|
38268
38768
|
var getJobsResponse = import_zod12.z.object({
|
|
38269
38769
|
jobs: import_zod12.z.array(
|
|
38270
38770
|
import_zod12.z.object({
|
|
@@ -38344,19 +38844,30 @@ var getJobsResponse = import_zod12.z.object({
|
|
|
38344
38844
|
max_delay_mode: import_zod12.z.enum(["fixed", "flexible"]).optional().describe(
|
|
38345
38845
|
"Whether or not to enable flexible endpointing and allow the entity to continue to be spoken."
|
|
38346
38846
|
),
|
|
38847
|
+
audio_filtering_config: import_zod12.z.object({
|
|
38848
|
+
volume_threshold: import_zod12.z.number().min(
|
|
38849
|
+
getJobsResponseJobsItemConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMin
|
|
38850
|
+
).max(
|
|
38851
|
+
getJobsResponseJobsItemConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMax
|
|
38852
|
+
).optional().describe(
|
|
38853
|
+
"Controls the lower limit of audio volume at which speech and audio events will be transcribed. If the volume limit is very low, then most sound will be passed to the speech recognition engine. Higher numbers will cut out increasing amounts of sound."
|
|
38854
|
+
)
|
|
38855
|
+
}).optional().describe("Configuration for limiting the transcription of quiet audio."),
|
|
38347
38856
|
transcript_filtering_config: import_zod12.z.object({
|
|
38348
38857
|
remove_disfluencies: import_zod12.z.boolean().optional().describe(
|
|
38349
|
-
"If true, words
|
|
38858
|
+
"If true, words identified as disfluencies (e.g., 'um', 'uh') will be removed from the transcript. If false (default), they are tagged in the transcript as 'disfluency'."
|
|
38350
38859
|
),
|
|
38351
38860
|
replacements: import_zod12.z.array(
|
|
38352
38861
|
import_zod12.z.object({
|
|
38353
|
-
from: import_zod12.z.string(),
|
|
38354
|
-
to: import_zod12.z.string()
|
|
38862
|
+
from: import_zod12.z.string().describe("The text or pattern identified to be replaced."),
|
|
38863
|
+
to: import_zod12.z.string().describe(
|
|
38864
|
+
"The corrected or formatted string to appear in the transcript."
|
|
38865
|
+
)
|
|
38355
38866
|
})
|
|
38356
38867
|
).optional().describe(
|
|
38357
|
-
|
|
38868
|
+
'An array of objects defining custom replacements. Each replacement contains a pair of strings: the text to find ("from:") and the text to replace it with ("to:").'
|
|
38358
38869
|
)
|
|
38359
|
-
}).optional().describe("Configuration for applying filtering to the transcription"),
|
|
38870
|
+
}).optional().describe("Configuration for applying filtering to the transcription."),
|
|
38360
38871
|
speaker_diarization_config: import_zod12.z.object({
|
|
38361
38872
|
prefer_current_speaker: import_zod12.z.boolean().optional().describe(
|
|
38362
38873
|
'If true, the algorithm will prefer to stay with the current active speaker if it is a close enough match, even if other speakers may be closer. This is useful for cases where we can flip incorrectly between similar speakers during a single speaker section."'
|
|
@@ -38367,6 +38878,19 @@ var getJobsResponse = import_zod12.z.object({
|
|
|
38367
38878
|
getJobsResponseJobsItemConfigTranscriptionConfigSpeakerDiarizationConfigSpeakerSensitivityMax
|
|
38368
38879
|
).optional().describe(
|
|
38369
38880
|
"Controls how sensitive the algorithm is in terms of keeping similar speakers separate, as opposed to combining them into a single speaker. Higher values will typically lead to more speakers, as the degree of difference between speakers in order to allow them to remain distinct will be lower. A lower value for this parameter will conversely guide the algorithm towards being less sensitive in terms of retaining similar speakers, and as such may lead to fewer speakers overall. The default is 0.5."
|
|
38881
|
+
),
|
|
38882
|
+
get_speakers: import_zod12.z.boolean().optional().describe(
|
|
38883
|
+
"If true, speaker identifiers will be returned at the end of transcript."
|
|
38884
|
+
),
|
|
38885
|
+
speakers: import_zod12.z.array(
|
|
38886
|
+
import_zod12.z.object({
|
|
38887
|
+
label: import_zod12.z.string().min(1).describe(
|
|
38888
|
+
"Speaker label, which must not match the format used internally (e.g. S1, S2, etc)"
|
|
38889
|
+
),
|
|
38890
|
+
speaker_identifiers: import_zod12.z.array(import_zod12.z.string().describe("Speaker identifiers.")).min(1)
|
|
38891
|
+
})
|
|
38892
|
+
).optional().describe(
|
|
38893
|
+
"Use this option to provide speaker labels linked to their speaker identifiers. When passed, the transcription system will tag spoken words in the transcript with the provided speaker labels whenever any of the specified speakers is detected in the audio. A maximum of 50 speakers identifiers across all speakers can be provided."
|
|
38370
38894
|
)
|
|
38371
38895
|
}).optional().describe("Configuration for speaker diarization")
|
|
38372
38896
|
}).optional(),
|
|
@@ -38424,10 +38948,14 @@ var getJobsResponse = import_zod12.z.object({
|
|
|
38424
38948
|
default_language: import_zod12.z.string().optional()
|
|
38425
38949
|
}).optional(),
|
|
38426
38950
|
summarization_config: import_zod12.z.object({
|
|
38427
|
-
content_type: import_zod12.z.enum(["auto", "informative", "conversational"]).
|
|
38428
|
-
|
|
38951
|
+
content_type: import_zod12.z.enum(["auto", "informative", "conversational"]).default(getJobsResponseJobsItemConfigSummarizationConfigContentTypeDefault).describe(
|
|
38952
|
+
"Choose from three options:\n- `conversational` - Best suited for dialogues involving multiple participants, such as calls, meetings or discussions. It focuses on summarizing key points of the conversation.\n- `informative` - Recommended for more structured information delivered by one or more people, making it ideal for videos, podcasts, lectures, and presentations.\n- `auto` - Automatically selects the most appropriate content type based on an analysis of the transcript.\n"
|
|
38953
|
+
),
|
|
38954
|
+
summary_length: import_zod12.z.enum(["brief", "detailed"]).default(getJobsResponseJobsItemConfigSummarizationConfigSummaryLengthDefault).describe(
|
|
38955
|
+
"Determines the depth of the summary:\n- `brief` - Provides a succinct summary, condensing the content into just a few sentences.\n- `detailed` - Provide a longer, structured summary. For _conversational_ content, it includes key topics and a summary of the entire conversation. For _informative_ content, it logically divides the audio into sections and provides a summary for each."
|
|
38956
|
+
),
|
|
38429
38957
|
summary_type: import_zod12.z.enum(["paragraphs", "bullets"]).optional()
|
|
38430
|
-
}).optional(),
|
|
38958
|
+
}).optional().describe("Configuration options for summarization."),
|
|
38431
38959
|
sentiment_analysis_config: import_zod12.z.object({}).optional(),
|
|
38432
38960
|
topic_detection_config: import_zod12.z.object({
|
|
38433
38961
|
topics: import_zod12.z.array(import_zod12.z.string()).optional()
|
|
@@ -38449,7 +38977,7 @@ var getJobsResponse = import_zod12.z.object({
|
|
|
38449
38977
|
"Optional list of errors that have occurred in user interaction, for example: audio could not be fetched or notification could not be sent."
|
|
38450
38978
|
)
|
|
38451
38979
|
}).describe(
|
|
38452
|
-
"Document describing a job. JobConfig will be present in JobDetails returned for GET jobs
|
|
38980
|
+
"Document describing a job. JobConfig will be present in JobDetails returned for GET jobs/{id} request in SaaS and in Batch Appliance, but it will not be present in JobDetails returned as item in RetrieveJobsResponse in case of Batch Appliance."
|
|
38453
38981
|
)
|
|
38454
38982
|
)
|
|
38455
38983
|
});
|
|
@@ -38461,9 +38989,13 @@ var getJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesSensitiv
|
|
|
38461
38989
|
var getJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesSensitivityMax = 1;
|
|
38462
38990
|
var getJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesPermittedMarksItemRegExp = /^(.|all)$/;
|
|
38463
38991
|
var getJobsJobidResponseJobConfigTranscriptionConfigChannelDiarizationLabelsItemRegExp = /^[A-Za-z0-9._]+$/;
|
|
38992
|
+
var getJobsJobidResponseJobConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMin = 0;
|
|
38993
|
+
var getJobsJobidResponseJobConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMax = 100;
|
|
38464
38994
|
var getJobsJobidResponseJobConfigTranscriptionConfigSpeakerDiarizationConfigSpeakerSensitivityMin = 0;
|
|
38465
38995
|
var getJobsJobidResponseJobConfigTranscriptionConfigSpeakerDiarizationConfigSpeakerSensitivityMax = 1;
|
|
38466
38996
|
var getJobsJobidResponseJobConfigTranslationConfigTargetLanguagesMax = 5;
|
|
38997
|
+
var getJobsJobidResponseJobConfigSummarizationConfigContentTypeDefault = "auto";
|
|
38998
|
+
var getJobsJobidResponseJobConfigSummarizationConfigSummaryLengthDefault = "brief";
|
|
38467
38999
|
var getJobsJobidResponse = import_zod12.z.object({
|
|
38468
39000
|
job: import_zod12.z.object({
|
|
38469
39001
|
created_at: import_zod12.z.string().datetime({}).describe("The UTC date time the job was created."),
|
|
@@ -38540,19 +39072,30 @@ var getJobsJobidResponse = import_zod12.z.object({
|
|
|
38540
39072
|
max_delay_mode: import_zod12.z.enum(["fixed", "flexible"]).optional().describe(
|
|
38541
39073
|
"Whether or not to enable flexible endpointing and allow the entity to continue to be spoken."
|
|
38542
39074
|
),
|
|
39075
|
+
audio_filtering_config: import_zod12.z.object({
|
|
39076
|
+
volume_threshold: import_zod12.z.number().min(
|
|
39077
|
+
getJobsJobidResponseJobConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMin
|
|
39078
|
+
).max(
|
|
39079
|
+
getJobsJobidResponseJobConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMax
|
|
39080
|
+
).optional().describe(
|
|
39081
|
+
"Controls the lower limit of audio volume at which speech and audio events will be transcribed. If the volume limit is very low, then most sound will be passed to the speech recognition engine. Higher numbers will cut out increasing amounts of sound."
|
|
39082
|
+
)
|
|
39083
|
+
}).optional().describe("Configuration for limiting the transcription of quiet audio."),
|
|
38543
39084
|
transcript_filtering_config: import_zod12.z.object({
|
|
38544
39085
|
remove_disfluencies: import_zod12.z.boolean().optional().describe(
|
|
38545
|
-
"If true, words
|
|
39086
|
+
"If true, words identified as disfluencies (e.g., 'um', 'uh') will be removed from the transcript. If false (default), they are tagged in the transcript as 'disfluency'."
|
|
38546
39087
|
),
|
|
38547
39088
|
replacements: import_zod12.z.array(
|
|
38548
39089
|
import_zod12.z.object({
|
|
38549
|
-
from: import_zod12.z.string(),
|
|
38550
|
-
to: import_zod12.z.string()
|
|
39090
|
+
from: import_zod12.z.string().describe("The text or pattern identified to be replaced."),
|
|
39091
|
+
to: import_zod12.z.string().describe(
|
|
39092
|
+
"The corrected or formatted string to appear in the transcript."
|
|
39093
|
+
)
|
|
38551
39094
|
})
|
|
38552
39095
|
).optional().describe(
|
|
38553
|
-
|
|
39096
|
+
'An array of objects defining custom replacements. Each replacement contains a pair of strings: the text to find ("from:") and the text to replace it with ("to:").'
|
|
38554
39097
|
)
|
|
38555
|
-
}).optional().describe("Configuration for applying filtering to the transcription"),
|
|
39098
|
+
}).optional().describe("Configuration for applying filtering to the transcription."),
|
|
38556
39099
|
speaker_diarization_config: import_zod12.z.object({
|
|
38557
39100
|
prefer_current_speaker: import_zod12.z.boolean().optional().describe(
|
|
38558
39101
|
'If true, the algorithm will prefer to stay with the current active speaker if it is a close enough match, even if other speakers may be closer. This is useful for cases where we can flip incorrectly between similar speakers during a single speaker section."'
|
|
@@ -38563,6 +39106,19 @@ var getJobsJobidResponse = import_zod12.z.object({
|
|
|
38563
39106
|
getJobsJobidResponseJobConfigTranscriptionConfigSpeakerDiarizationConfigSpeakerSensitivityMax
|
|
38564
39107
|
).optional().describe(
|
|
38565
39108
|
"Controls how sensitive the algorithm is in terms of keeping similar speakers separate, as opposed to combining them into a single speaker. Higher values will typically lead to more speakers, as the degree of difference between speakers in order to allow them to remain distinct will be lower. A lower value for this parameter will conversely guide the algorithm towards being less sensitive in terms of retaining similar speakers, and as such may lead to fewer speakers overall. The default is 0.5."
|
|
39109
|
+
),
|
|
39110
|
+
get_speakers: import_zod12.z.boolean().optional().describe(
|
|
39111
|
+
"If true, speaker identifiers will be returned at the end of transcript."
|
|
39112
|
+
),
|
|
39113
|
+
speakers: import_zod12.z.array(
|
|
39114
|
+
import_zod12.z.object({
|
|
39115
|
+
label: import_zod12.z.string().min(1).describe(
|
|
39116
|
+
"Speaker label, which must not match the format used internally (e.g. S1, S2, etc)"
|
|
39117
|
+
),
|
|
39118
|
+
speaker_identifiers: import_zod12.z.array(import_zod12.z.string().describe("Speaker identifiers.")).min(1)
|
|
39119
|
+
})
|
|
39120
|
+
).optional().describe(
|
|
39121
|
+
"Use this option to provide speaker labels linked to their speaker identifiers. When passed, the transcription system will tag spoken words in the transcript with the provided speaker labels whenever any of the specified speakers is detected in the audio. A maximum of 50 speakers identifiers across all speakers can be provided."
|
|
38566
39122
|
)
|
|
38567
39123
|
}).optional().describe("Configuration for speaker diarization")
|
|
38568
39124
|
}).optional(),
|
|
@@ -38618,10 +39174,14 @@ var getJobsJobidResponse = import_zod12.z.object({
|
|
|
38618
39174
|
default_language: import_zod12.z.string().optional()
|
|
38619
39175
|
}).optional(),
|
|
38620
39176
|
summarization_config: import_zod12.z.object({
|
|
38621
|
-
content_type: import_zod12.z.enum(["auto", "informative", "conversational"]).
|
|
38622
|
-
|
|
39177
|
+
content_type: import_zod12.z.enum(["auto", "informative", "conversational"]).default(getJobsJobidResponseJobConfigSummarizationConfigContentTypeDefault).describe(
|
|
39178
|
+
"Choose from three options:\n- `conversational` - Best suited for dialogues involving multiple participants, such as calls, meetings or discussions. It focuses on summarizing key points of the conversation.\n- `informative` - Recommended for more structured information delivered by one or more people, making it ideal for videos, podcasts, lectures, and presentations.\n- `auto` - Automatically selects the most appropriate content type based on an analysis of the transcript.\n"
|
|
39179
|
+
),
|
|
39180
|
+
summary_length: import_zod12.z.enum(["brief", "detailed"]).default(getJobsJobidResponseJobConfigSummarizationConfigSummaryLengthDefault).describe(
|
|
39181
|
+
"Determines the depth of the summary:\n- `brief` - Provides a succinct summary, condensing the content into just a few sentences.\n- `detailed` - Provide a longer, structured summary. For _conversational_ content, it includes key topics and a summary of the entire conversation. For _informative_ content, it logically divides the audio into sections and provides a summary for each."
|
|
39182
|
+
),
|
|
38623
39183
|
summary_type: import_zod12.z.enum(["paragraphs", "bullets"]).optional()
|
|
38624
|
-
}).optional(),
|
|
39184
|
+
}).optional().describe("Configuration options for summarization."),
|
|
38625
39185
|
sentiment_analysis_config: import_zod12.z.object({}).optional(),
|
|
38626
39186
|
topic_detection_config: import_zod12.z.object({
|
|
38627
39187
|
topics: import_zod12.z.array(import_zod12.z.string()).optional()
|
|
@@ -38643,7 +39203,7 @@ var getJobsJobidResponse = import_zod12.z.object({
|
|
|
38643
39203
|
"Optional list of errors that have occurred in user interaction, for example: audio could not be fetched or notification could not be sent."
|
|
38644
39204
|
)
|
|
38645
39205
|
}).describe(
|
|
38646
|
-
"Document describing a job. JobConfig will be present in JobDetails returned for GET jobs
|
|
39206
|
+
"Document describing a job. JobConfig will be present in JobDetails returned for GET jobs/{id} request in SaaS and in Batch Appliance, but it will not be present in JobDetails returned as item in RetrieveJobsResponse in case of Batch Appliance."
|
|
38647
39207
|
)
|
|
38648
39208
|
});
|
|
38649
39209
|
var deleteJobsJobidParams = import_zod12.z.object({
|
|
@@ -38659,9 +39219,13 @@ var deleteJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesSensi
|
|
|
38659
39219
|
var deleteJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesSensitivityMax = 1;
|
|
38660
39220
|
var deleteJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesPermittedMarksItemRegExp = /^(.|all)$/;
|
|
38661
39221
|
var deleteJobsJobidResponseJobConfigTranscriptionConfigChannelDiarizationLabelsItemRegExp = /^[A-Za-z0-9._]+$/;
|
|
39222
|
+
var deleteJobsJobidResponseJobConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMin = 0;
|
|
39223
|
+
var deleteJobsJobidResponseJobConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMax = 100;
|
|
38662
39224
|
var deleteJobsJobidResponseJobConfigTranscriptionConfigSpeakerDiarizationConfigSpeakerSensitivityMin = 0;
|
|
38663
39225
|
var deleteJobsJobidResponseJobConfigTranscriptionConfigSpeakerDiarizationConfigSpeakerSensitivityMax = 1;
|
|
38664
39226
|
var deleteJobsJobidResponseJobConfigTranslationConfigTargetLanguagesMax = 5;
|
|
39227
|
+
var deleteJobsJobidResponseJobConfigSummarizationConfigContentTypeDefault = "auto";
|
|
39228
|
+
var deleteJobsJobidResponseJobConfigSummarizationConfigSummaryLengthDefault = "brief";
|
|
38665
39229
|
var deleteJobsJobidResponse = import_zod12.z.object({
|
|
38666
39230
|
job: import_zod12.z.object({
|
|
38667
39231
|
created_at: import_zod12.z.string().datetime({}).describe("The UTC date time the job was created."),
|
|
@@ -38738,19 +39302,30 @@ var deleteJobsJobidResponse = import_zod12.z.object({
|
|
|
38738
39302
|
max_delay_mode: import_zod12.z.enum(["fixed", "flexible"]).optional().describe(
|
|
38739
39303
|
"Whether or not to enable flexible endpointing and allow the entity to continue to be spoken."
|
|
38740
39304
|
),
|
|
39305
|
+
audio_filtering_config: import_zod12.z.object({
|
|
39306
|
+
volume_threshold: import_zod12.z.number().min(
|
|
39307
|
+
deleteJobsJobidResponseJobConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMin
|
|
39308
|
+
).max(
|
|
39309
|
+
deleteJobsJobidResponseJobConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMax
|
|
39310
|
+
).optional().describe(
|
|
39311
|
+
"Controls the lower limit of audio volume at which speech and audio events will be transcribed. If the volume limit is very low, then most sound will be passed to the speech recognition engine. Higher numbers will cut out increasing amounts of sound."
|
|
39312
|
+
)
|
|
39313
|
+
}).optional().describe("Configuration for limiting the transcription of quiet audio."),
|
|
38741
39314
|
transcript_filtering_config: import_zod12.z.object({
|
|
38742
39315
|
remove_disfluencies: import_zod12.z.boolean().optional().describe(
|
|
38743
|
-
"If true, words
|
|
39316
|
+
"If true, words identified as disfluencies (e.g., 'um', 'uh') will be removed from the transcript. If false (default), they are tagged in the transcript as 'disfluency'."
|
|
38744
39317
|
),
|
|
38745
39318
|
replacements: import_zod12.z.array(
|
|
38746
39319
|
import_zod12.z.object({
|
|
38747
|
-
from: import_zod12.z.string(),
|
|
38748
|
-
to: import_zod12.z.string()
|
|
39320
|
+
from: import_zod12.z.string().describe("The text or pattern identified to be replaced."),
|
|
39321
|
+
to: import_zod12.z.string().describe(
|
|
39322
|
+
"The corrected or formatted string to appear in the transcript."
|
|
39323
|
+
)
|
|
38749
39324
|
})
|
|
38750
39325
|
).optional().describe(
|
|
38751
|
-
|
|
39326
|
+
'An array of objects defining custom replacements. Each replacement contains a pair of strings: the text to find ("from:") and the text to replace it with ("to:").'
|
|
38752
39327
|
)
|
|
38753
|
-
}).optional().describe("Configuration for applying filtering to the transcription"),
|
|
39328
|
+
}).optional().describe("Configuration for applying filtering to the transcription."),
|
|
38754
39329
|
speaker_diarization_config: import_zod12.z.object({
|
|
38755
39330
|
prefer_current_speaker: import_zod12.z.boolean().optional().describe(
|
|
38756
39331
|
'If true, the algorithm will prefer to stay with the current active speaker if it is a close enough match, even if other speakers may be closer. This is useful for cases where we can flip incorrectly between similar speakers during a single speaker section."'
|
|
@@ -38761,6 +39336,19 @@ var deleteJobsJobidResponse = import_zod12.z.object({
|
|
|
38761
39336
|
deleteJobsJobidResponseJobConfigTranscriptionConfigSpeakerDiarizationConfigSpeakerSensitivityMax
|
|
38762
39337
|
).optional().describe(
|
|
38763
39338
|
"Controls how sensitive the algorithm is in terms of keeping similar speakers separate, as opposed to combining them into a single speaker. Higher values will typically lead to more speakers, as the degree of difference between speakers in order to allow them to remain distinct will be lower. A lower value for this parameter will conversely guide the algorithm towards being less sensitive in terms of retaining similar speakers, and as such may lead to fewer speakers overall. The default is 0.5."
|
|
39339
|
+
),
|
|
39340
|
+
get_speakers: import_zod12.z.boolean().optional().describe(
|
|
39341
|
+
"If true, speaker identifiers will be returned at the end of transcript."
|
|
39342
|
+
),
|
|
39343
|
+
speakers: import_zod12.z.array(
|
|
39344
|
+
import_zod12.z.object({
|
|
39345
|
+
label: import_zod12.z.string().min(1).describe(
|
|
39346
|
+
"Speaker label, which must not match the format used internally (e.g. S1, S2, etc)"
|
|
39347
|
+
),
|
|
39348
|
+
speaker_identifiers: import_zod12.z.array(import_zod12.z.string().describe("Speaker identifiers.")).min(1)
|
|
39349
|
+
})
|
|
39350
|
+
).optional().describe(
|
|
39351
|
+
"Use this option to provide speaker labels linked to their speaker identifiers. When passed, the transcription system will tag spoken words in the transcript with the provided speaker labels whenever any of the specified speakers is detected in the audio. A maximum of 50 speakers identifiers across all speakers can be provided."
|
|
38764
39352
|
)
|
|
38765
39353
|
}).optional().describe("Configuration for speaker diarization")
|
|
38766
39354
|
}).optional(),
|
|
@@ -38816,10 +39404,14 @@ var deleteJobsJobidResponse = import_zod12.z.object({
|
|
|
38816
39404
|
default_language: import_zod12.z.string().optional()
|
|
38817
39405
|
}).optional(),
|
|
38818
39406
|
summarization_config: import_zod12.z.object({
|
|
38819
|
-
content_type: import_zod12.z.enum(["auto", "informative", "conversational"]).
|
|
38820
|
-
|
|
39407
|
+
content_type: import_zod12.z.enum(["auto", "informative", "conversational"]).default(deleteJobsJobidResponseJobConfigSummarizationConfigContentTypeDefault).describe(
|
|
39408
|
+
"Choose from three options:\n- `conversational` - Best suited for dialogues involving multiple participants, such as calls, meetings or discussions. It focuses on summarizing key points of the conversation.\n- `informative` - Recommended for more structured information delivered by one or more people, making it ideal for videos, podcasts, lectures, and presentations.\n- `auto` - Automatically selects the most appropriate content type based on an analysis of the transcript.\n"
|
|
39409
|
+
),
|
|
39410
|
+
summary_length: import_zod12.z.enum(["brief", "detailed"]).default(deleteJobsJobidResponseJobConfigSummarizationConfigSummaryLengthDefault).describe(
|
|
39411
|
+
"Determines the depth of the summary:\n- `brief` - Provides a succinct summary, condensing the content into just a few sentences.\n- `detailed` - Provide a longer, structured summary. For _conversational_ content, it includes key topics and a summary of the entire conversation. For _informative_ content, it logically divides the audio into sections and provides a summary for each."
|
|
39412
|
+
),
|
|
38821
39413
|
summary_type: import_zod12.z.enum(["paragraphs", "bullets"]).optional()
|
|
38822
|
-
}).optional(),
|
|
39414
|
+
}).optional().describe("Configuration options for summarization."),
|
|
38823
39415
|
sentiment_analysis_config: import_zod12.z.object({}).optional(),
|
|
38824
39416
|
topic_detection_config: import_zod12.z.object({
|
|
38825
39417
|
topics: import_zod12.z.array(import_zod12.z.string()).optional()
|
|
@@ -38841,7 +39433,7 @@ var deleteJobsJobidResponse = import_zod12.z.object({
|
|
|
38841
39433
|
"Optional list of errors that have occurred in user interaction, for example: audio could not be fetched or notification could not be sent."
|
|
38842
39434
|
)
|
|
38843
39435
|
}).describe(
|
|
38844
|
-
"Document describing a job. JobConfig will be present in JobDetails returned for GET jobs
|
|
39436
|
+
"Document describing a job. JobConfig will be present in JobDetails returned for GET jobs/{id} request in SaaS and in Batch Appliance, but it will not be present in JobDetails returned as item in RetrieveJobsResponse in case of Batch Appliance."
|
|
38845
39437
|
)
|
|
38846
39438
|
});
|
|
38847
39439
|
var getJobsJobidDataParams = import_zod12.z.object({
|
|
@@ -38863,6 +39455,8 @@ var getJobsJobidTranscriptResponseMetadataTranscriptionConfigPunctuationOverride
|
|
|
38863
39455
|
var getJobsJobidTranscriptResponseMetadataTranscriptionConfigPunctuationOverridesSensitivityMax = 1;
|
|
38864
39456
|
var getJobsJobidTranscriptResponseMetadataTranscriptionConfigPunctuationOverridesPermittedMarksItemRegExp = /^(.|all)$/;
|
|
38865
39457
|
var getJobsJobidTranscriptResponseMetadataTranscriptionConfigChannelDiarizationLabelsItemRegExp = /^[A-Za-z0-9._]+$/;
|
|
39458
|
+
var getJobsJobidTranscriptResponseMetadataTranscriptionConfigAudioFilteringConfigVolumeThresholdMin = 0;
|
|
39459
|
+
var getJobsJobidTranscriptResponseMetadataTranscriptionConfigAudioFilteringConfigVolumeThresholdMax = 100;
|
|
38866
39460
|
var getJobsJobidTranscriptResponseMetadataTranscriptionConfigSpeakerDiarizationConfigSpeakerSensitivityMin = 0;
|
|
38867
39461
|
var getJobsJobidTranscriptResponseMetadataTranscriptionConfigSpeakerDiarizationConfigSpeakerSensitivityMax = 1;
|
|
38868
39462
|
var getJobsJobidTranscriptResponseResultsItemVolumeMin = 0;
|
|
@@ -38934,19 +39528,28 @@ var getJobsJobidTranscriptResponse = import_zod12.z.object({
|
|
|
38934
39528
|
max_delay_mode: import_zod12.z.enum(["fixed", "flexible"]).optional().describe(
|
|
38935
39529
|
"Whether or not to enable flexible endpointing and allow the entity to continue to be spoken."
|
|
38936
39530
|
),
|
|
39531
|
+
audio_filtering_config: import_zod12.z.object({
|
|
39532
|
+
volume_threshold: import_zod12.z.number().min(
|
|
39533
|
+
getJobsJobidTranscriptResponseMetadataTranscriptionConfigAudioFilteringConfigVolumeThresholdMin
|
|
39534
|
+
).max(
|
|
39535
|
+
getJobsJobidTranscriptResponseMetadataTranscriptionConfigAudioFilteringConfigVolumeThresholdMax
|
|
39536
|
+
).optional().describe(
|
|
39537
|
+
"Controls the lower limit of audio volume at which speech and audio events will be transcribed. If the volume limit is very low, then most sound will be passed to the speech recognition engine. Higher numbers will cut out increasing amounts of sound."
|
|
39538
|
+
)
|
|
39539
|
+
}).optional().describe("Configuration for limiting the transcription of quiet audio."),
|
|
38937
39540
|
transcript_filtering_config: import_zod12.z.object({
|
|
38938
39541
|
remove_disfluencies: import_zod12.z.boolean().optional().describe(
|
|
38939
|
-
"If true, words
|
|
39542
|
+
"If true, words identified as disfluencies (e.g., 'um', 'uh') will be removed from the transcript. If false (default), they are tagged in the transcript as 'disfluency'."
|
|
38940
39543
|
),
|
|
38941
39544
|
replacements: import_zod12.z.array(
|
|
38942
39545
|
import_zod12.z.object({
|
|
38943
|
-
from: import_zod12.z.string(),
|
|
38944
|
-
to: import_zod12.z.string()
|
|
39546
|
+
from: import_zod12.z.string().describe("The text or pattern identified to be replaced."),
|
|
39547
|
+
to: import_zod12.z.string().describe("The corrected or formatted string to appear in the transcript.")
|
|
38945
39548
|
})
|
|
38946
39549
|
).optional().describe(
|
|
38947
|
-
|
|
39550
|
+
'An array of objects defining custom replacements. Each replacement contains a pair of strings: the text to find ("from:") and the text to replace it with ("to:").'
|
|
38948
39551
|
)
|
|
38949
|
-
}).optional().describe("Configuration for applying filtering to the transcription"),
|
|
39552
|
+
}).optional().describe("Configuration for applying filtering to the transcription."),
|
|
38950
39553
|
speaker_diarization_config: import_zod12.z.object({
|
|
38951
39554
|
prefer_current_speaker: import_zod12.z.boolean().optional().describe(
|
|
38952
39555
|
'If true, the algorithm will prefer to stay with the current active speaker if it is a close enough match, even if other speakers may be closer. This is useful for cases where we can flip incorrectly between similar speakers during a single speaker section."'
|
|
@@ -38957,9 +39560,23 @@ var getJobsJobidTranscriptResponse = import_zod12.z.object({
|
|
|
38957
39560
|
getJobsJobidTranscriptResponseMetadataTranscriptionConfigSpeakerDiarizationConfigSpeakerSensitivityMax
|
|
38958
39561
|
).optional().describe(
|
|
38959
39562
|
"Controls how sensitive the algorithm is in terms of keeping similar speakers separate, as opposed to combining them into a single speaker. Higher values will typically lead to more speakers, as the degree of difference between speakers in order to allow them to remain distinct will be lower. A lower value for this parameter will conversely guide the algorithm towards being less sensitive in terms of retaining similar speakers, and as such may lead to fewer speakers overall. The default is 0.5."
|
|
39563
|
+
),
|
|
39564
|
+
get_speakers: import_zod12.z.boolean().optional().describe(
|
|
39565
|
+
"If true, speaker identifiers will be returned at the end of transcript."
|
|
39566
|
+
),
|
|
39567
|
+
speakers: import_zod12.z.array(
|
|
39568
|
+
import_zod12.z.object({
|
|
39569
|
+
label: import_zod12.z.string().min(1).describe(
|
|
39570
|
+
"Speaker label, which must not match the format used internally (e.g. S1, S2, etc)"
|
|
39571
|
+
),
|
|
39572
|
+
speaker_identifiers: import_zod12.z.array(import_zod12.z.string().describe("Speaker identifiers.")).min(1)
|
|
39573
|
+
})
|
|
39574
|
+
).optional().describe(
|
|
39575
|
+
"Use this option to provide speaker labels linked to their speaker identifiers. When passed, the transcription system will tag spoken words in the transcript with the provided speaker labels whenever any of the specified speakers is detected in the audio. A maximum of 50 speakers identifiers across all speakers can be provided."
|
|
38960
39576
|
)
|
|
38961
39577
|
}).optional().describe("Configuration for speaker diarization")
|
|
38962
39578
|
}).optional(),
|
|
39579
|
+
orchestrator_version: import_zod12.z.string().optional().describe("The engine version used to generate transcription output."),
|
|
38963
39580
|
translation_errors: import_zod12.z.array(
|
|
38964
39581
|
import_zod12.z.object({
|
|
38965
39582
|
type: import_zod12.z.enum(["translation_failed", "unsupported_translation_pair"]).optional(),
|
|
@@ -39037,10 +39654,7 @@ var getJobsJobidTranscriptResponse = import_zod12.z.object({
|
|
|
39037
39654
|
"OTHER"
|
|
39038
39655
|
]).optional(),
|
|
39039
39656
|
message: import_zod12.z.string().optional()
|
|
39040
|
-
}).optional()
|
|
39041
|
-
orchestrator_version: import_zod12.z.string().optional().describe(
|
|
39042
|
-
"Orchestrator version in PEP 440 Format or set to 'version_not_found' as default."
|
|
39043
|
-
)
|
|
39657
|
+
}).optional()
|
|
39044
39658
|
}).describe(
|
|
39045
39659
|
"Summary information about the output from an ASR job, comprising the job type and configuration parameters used when generating the output."
|
|
39046
39660
|
),
|
|
@@ -39123,6 +39737,12 @@ var getJobsJobidTranscriptResponse = import_zod12.z.object({
|
|
|
39123
39737
|
"An ASR job output item. The primary item types are `word` and `punctuation`. Other item types may be present, for example to provide semantic information of different forms."
|
|
39124
39738
|
)
|
|
39125
39739
|
),
|
|
39740
|
+
speakers: import_zod12.z.array(
|
|
39741
|
+
import_zod12.z.object({
|
|
39742
|
+
label: import_zod12.z.string().min(1).describe("Speaker label."),
|
|
39743
|
+
speaker_identifiers: import_zod12.z.array(import_zod12.z.string().describe("Speaker identifiers.")).min(1)
|
|
39744
|
+
})
|
|
39745
|
+
).optional().describe("List of unique speaker identifiers detected in the transcript."),
|
|
39126
39746
|
translations: import_zod12.z.record(
|
|
39127
39747
|
import_zod12.z.string(),
|
|
39128
39748
|
import_zod12.z.array(
|
|
@@ -39144,13 +39764,23 @@ var getJobsJobidTranscriptResponse = import_zod12.z.object({
|
|
|
39144
39764
|
sentiment_analysis: import_zod12.z.object({
|
|
39145
39765
|
segments: import_zod12.z.array(
|
|
39146
39766
|
import_zod12.z.object({
|
|
39147
|
-
text: import_zod12.z.string().optional(),
|
|
39148
|
-
|
|
39149
|
-
|
|
39150
|
-
|
|
39151
|
-
|
|
39152
|
-
|
|
39153
|
-
|
|
39767
|
+
text: import_zod12.z.string().optional().describe("Represents the transcript of the analysed segment"),
|
|
39768
|
+
sentiment: import_zod12.z.string().optional().describe(
|
|
39769
|
+
"The assigned sentiment to the segment, which can be positive, neutral or negative"
|
|
39770
|
+
),
|
|
39771
|
+
start_time: import_zod12.z.number().optional().describe(
|
|
39772
|
+
"The timestamp corresponding to the beginning of the transcription segment"
|
|
39773
|
+
),
|
|
39774
|
+
end_time: import_zod12.z.number().optional().describe(
|
|
39775
|
+
"The timestamp corresponding to the end of the transcription segment"
|
|
39776
|
+
),
|
|
39777
|
+
speaker: import_zod12.z.string().optional().describe(
|
|
39778
|
+
"The speaker label for the segment, if speaker diarization is enabled"
|
|
39779
|
+
),
|
|
39780
|
+
channel: import_zod12.z.string().optional().describe(
|
|
39781
|
+
"The channel label for the segment, if channel diarization is enabled"
|
|
39782
|
+
),
|
|
39783
|
+
confidence: import_zod12.z.number().optional().describe("A confidence score in the range of 0-1")
|
|
39154
39784
|
}).describe("Represents a segment of text and its associated sentiment.")
|
|
39155
39785
|
).optional().describe(
|
|
39156
39786
|
"An array of objects that represent a segment of text and its associated sentiment."
|
|
@@ -39209,10 +39839,10 @@ var getJobsJobidTranscriptResponse = import_zod12.z.object({
|
|
|
39209
39839
|
}).optional().describe("Main object that holds topic detection results."),
|
|
39210
39840
|
chapters: import_zod12.z.array(
|
|
39211
39841
|
import_zod12.z.object({
|
|
39212
|
-
title: import_zod12.z.string().optional(),
|
|
39213
|
-
summary: import_zod12.z.string().optional(),
|
|
39214
|
-
start_time: import_zod12.z.number().optional(),
|
|
39215
|
-
end_time: import_zod12.z.number().optional()
|
|
39842
|
+
title: import_zod12.z.string().optional().describe("The auto-generated title for the chapter"),
|
|
39843
|
+
summary: import_zod12.z.string().optional().describe("An auto-generated paragraph-style, short summary of the chapter"),
|
|
39844
|
+
start_time: import_zod12.z.number().optional().describe("The start time of the chapter in the audio file"),
|
|
39845
|
+
end_time: import_zod12.z.number().optional().describe("The end time of the chapter in the audio file")
|
|
39216
39846
|
})
|
|
39217
39847
|
).optional().describe("An array of objects that represent summarized chapters of the transcript"),
|
|
39218
39848
|
audio_events: import_zod12.z.array(
|
|
@@ -39257,6 +39887,18 @@ var getJobsJobidLogParams = import_zod12.z.object({
|
|
|
39257
39887
|
jobid: import_zod12.z.string().describe("ID of the job.")
|
|
39258
39888
|
});
|
|
39259
39889
|
var getJobsJobidLogResponse = import_zod12.z.instanceof(File);
|
|
39890
|
+
var getJobsJobidObjectUrlsParams = import_zod12.z.object({
|
|
39891
|
+
jobid: import_zod12.z.string().describe("ID of the job.")
|
|
39892
|
+
});
|
|
39893
|
+
var getJobsJobidObjectUrlsQueryParams = import_zod12.z.object({
|
|
39894
|
+
ttl: import_zod12.z.number().describe("Time to live in seconds for the signed URLs"),
|
|
39895
|
+
url_for: import_zod12.z.array(import_zod12.z.enum(["data", "audio_mp3", "transcript"]))
|
|
39896
|
+
});
|
|
39897
|
+
var getJobsJobidObjectUrlsResponse = import_zod12.z.object({
|
|
39898
|
+
data: import_zod12.z.string().optional(),
|
|
39899
|
+
audio_mp3: import_zod12.z.string().optional(),
|
|
39900
|
+
transcript: import_zod12.z.string().optional()
|
|
39901
|
+
});
|
|
39260
39902
|
var getUsageQueryParams = import_zod12.z.object({
|
|
39261
39903
|
since: import_zod12.z.string().date().optional().describe(
|
|
39262
39904
|
"Include usage after the given date (inclusive). This is a [ISO-8601](https://en.wikipedia.org/wiki/ISO_8601) calendar date format: `YYYY-MM-DD`."
|
|
@@ -39390,7 +40032,7 @@ var speechToTextBodyKeytermsDefault = [];
|
|
|
39390
40032
|
var speechToTextBody = import_zod13.z.object({
|
|
39391
40033
|
model_id: import_zod13.z.enum(["scribe_v1", "scribe_v2"]).describe("The ID of the model to use for transcription."),
|
|
39392
40034
|
file: import_zod13.z.instanceof(File).or(import_zod13.z.null()).optional().describe(
|
|
39393
|
-
"The file to transcribe (100ms minimum audio length). All major audio and video formats are supported. Exactly one of the file or cloud_storage_url parameters must be provided. The file size must be less than
|
|
40035
|
+
"The file to transcribe (100ms minimum audio length). All major audio and video formats are supported. Exactly one of the file or cloud_storage_url parameters must be provided. The file size must be less than 5.0GB."
|
|
39394
40036
|
),
|
|
39395
40037
|
language_code: import_zod13.z.string().or(import_zod13.z.null()).optional().describe(
|
|
39396
40038
|
"An ISO-639-1 or ISO-639-3 language_code corresponding to the language of the audio file. Can sometimes improve transcription performance if known beforehand. Defaults to null, in this case the language is predicted automatically."
|
|
@@ -39468,7 +40110,7 @@ var speechToTextBody = import_zod13.z.object({
|
|
|
39468
40110
|
"The format of input audio. Options are 'pcm_s16le_16' or 'other' For `pcm_s16le_16`, the input audio must be 16-bit PCM at a 16kHz sample rate, single channel (mono), and little-endian byte order. Latency will be lower than with passing an encoded waveform."
|
|
39469
40111
|
),
|
|
39470
40112
|
cloud_storage_url: import_zod13.z.string().or(import_zod13.z.null()).optional().describe(
|
|
39471
|
-
"The HTTPS URL of the file to transcribe. Exactly one of the file or cloud_storage_url parameters must be provided. The file must be accessible via HTTPS and the file size must be less than 2GB. Any valid HTTPS URL is accepted, including URLs from cloud storage providers (AWS S3, Google Cloud Storage, Cloudflare R2, etc.), CDNs, or any other HTTPS source. URLs can be pre-signed or include authentication tokens in query parameters."
|
|
40113
|
+
"[Deprecated] This parameter is deprecated and will be removed in the future. Use 'source_url' instead.The HTTPS URL of the file to transcribe. Exactly one of the file or cloud_storage_url parameters must be provided. The file must be accessible via HTTPS and the file size must be less than 2GB. Any valid HTTPS URL is accepted, including URLs from cloud storage providers (AWS S3, Google Cloud Storage, Cloudflare R2, etc.), CDNs, or any other HTTPS source. URLs can be pre-signed or include authentication tokens in query parameters."
|
|
39472
40114
|
),
|
|
39473
40115
|
source_url: import_zod13.z.string().or(import_zod13.z.null()).optional().describe(
|
|
39474
40116
|
"The URL of an audio or video file to transcribe. Supports hosted video or audio files, YouTube video URLs, TikTok video URLs, and other video hosting services."
|
|
@@ -39507,7 +40149,7 @@ var speechToTextBody = import_zod13.z.object({
|
|
|
39507
40149
|
"How to format redacted entities. 'redacted' replaces with {REDACTED}, 'entity_type' replaces with {ENTITY_TYPE}, 'enumerated_entity_type' replaces with {ENTITY_TYPE_N} where N enumerates each occurrence. Only used when entity_redaction is set."
|
|
39508
40150
|
),
|
|
39509
40151
|
keyterms: import_zod13.z.array(import_zod13.z.string()).default(speechToTextBodyKeytermsDefault).describe(
|
|
39510
|
-
'A list of keyterms to bias the transcription towards. The keyterms are words or phrases you want the model to recognise more accurately. The number of keyterms cannot exceed 1000. The length of each keyterm must be less than 50 characters. Keyterms can contain at most 5 words (after normalisation). For example ["hello", "world", "technical term"]. Usage of this parameter will incur an additional 20% surcharge on the base transcription cost. When more than 100 keyterms are provided, a minimum billable duration of 20 seconds applies per request.'
|
|
40152
|
+
'A list of keyterms to bias the transcription towards. The keyterms are words or phrases you want the model to recognise more accurately. The number of keyterms cannot exceed 1000. The length of each keyterm must be less than 50 characters. Keyterms can contain at most 5 words (after normalisation). For example ["hello", "world", "technical term"]. The following characters are not supported: `<`, `>`, `{`, `}`, `[`, `]`, `\\`. Usage of this parameter will incur an additional 20% surcharge on the base transcription cost. When more than 100 keyterms are provided, a minimum billable duration of 20 seconds applies per request.'
|
|
39511
40153
|
)
|
|
39512
40154
|
});
|
|
39513
40155
|
var speechToTextResponse = import_zod13.z.object({
|
|
@@ -39873,6 +40515,7 @@ var deleteTranscriptByIdResponse = import_zod13.z.any();
|
|
|
39873
40515
|
SonioxModels,
|
|
39874
40516
|
SonioxRealtimeModel,
|
|
39875
40517
|
SonioxRegion,
|
|
40518
|
+
SonioxSDK,
|
|
39876
40519
|
SonioxStreamingSchema,
|
|
39877
40520
|
SonioxStreamingTypes,
|
|
39878
40521
|
SonioxStreamingUpdateSchema,
|