voice-router-dev 0.9.4 → 0.9.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/constants.d.mts +11 -92
- package/dist/constants.d.ts +11 -92
- package/dist/constants.js +11 -88
- package/dist/constants.mjs +11 -88
- package/dist/{field-configs-BXXH2T3E.d.mts → field-configs-BVOZQiG3.d.mts} +8854 -7772
- package/dist/{field-configs-BXXH2T3E.d.ts → field-configs-BVOZQiG3.d.ts} +8854 -7772
- package/dist/field-configs.d.mts +1 -1
- package/dist/field-configs.d.ts +1 -1
- package/dist/field-configs.js +583 -150
- package/dist/field-configs.mjs +583 -150
- package/dist/index.d.mts +1577 -559
- package/dist/index.d.ts +1577 -559
- package/dist/index.js +916 -274
- package/dist/index.mjs +919 -274
- package/dist/{provider-metadata-D1d-9cng.d.ts → provider-metadata-CiSA4fWP.d.ts} +2 -2
- package/dist/{provider-metadata-BJ29OPW1.d.mts → provider-metadata-oxzd1q6t.d.mts} +2 -2
- package/dist/provider-metadata.d.mts +1 -1
- package/dist/provider-metadata.d.ts +1 -1
- package/dist/provider-metadata.js +3 -66
- package/dist/provider-metadata.mjs +3 -66
- package/dist/{speechToTextChunkResponseModel-BY2lGyZ3.d.ts → speechToTextChunkResponseModel-Dns0Ma9x.d.ts} +364 -39
- package/dist/{speechToTextChunkResponseModel-KayxDiZ7.d.mts → speechToTextChunkResponseModel-_ZvHTD4e.d.mts} +364 -39
- package/dist/webhooks.d.mts +3 -2
- package/dist/webhooks.d.ts +3 -2
- package/package.json +8 -3
package/dist/index.js
CHANGED
|
@@ -145,6 +145,7 @@ __export(src_exports, {
|
|
|
145
145
|
SonioxModels: () => SonioxModels,
|
|
146
146
|
SonioxRealtimeModel: () => SonioxRealtimeModel,
|
|
147
147
|
SonioxRegion: () => SonioxRegion,
|
|
148
|
+
SonioxSDK: () => sdk_types_exports,
|
|
148
149
|
SonioxStreamingSchema: () => SonioxStreamingSchema,
|
|
149
150
|
SonioxStreamingTypes: () => streaming_types_zod_exports,
|
|
150
151
|
SonioxStreamingUpdateSchema: () => SonioxStreamingUpdateSchema,
|
|
@@ -1333,7 +1334,6 @@ var AzureLocales = [
|
|
|
1333
1334
|
{ code: "ar-YE", name: "Arabic (Yemen)" },
|
|
1334
1335
|
{ code: "as-IN", name: "Assamese (India)" },
|
|
1335
1336
|
{ code: "az-AZ", name: "Azerbaijani (Azerbaijan)" },
|
|
1336
|
-
{ code: "be-BY", name: "Belarusian (Belarus)" },
|
|
1337
1337
|
{ code: "bg-BG", name: "Bulgarian (Bulgaria)" },
|
|
1338
1338
|
{ code: "bn-BD", name: "Bengali (Bangladesh)" },
|
|
1339
1339
|
{ code: "bn-IN", name: "Bengali (India)" },
|
|
@@ -1414,7 +1414,6 @@ var AzureLocales = [
|
|
|
1414
1414
|
{ code: "lo-LA", name: "Lao (Latin)" },
|
|
1415
1415
|
{ code: "lt-LT", name: "Lithuanian (Lithuania)" },
|
|
1416
1416
|
{ code: "lv-LV", name: "Latvian (Latvia)" },
|
|
1417
|
-
{ code: "mi-NZ", name: "Maori (New Zealand)" },
|
|
1418
1417
|
{ code: "mk-MK", name: "Macedonian (North Macedonia)" },
|
|
1419
1418
|
{ code: "ml-IN", name: "Malayalam (India)" },
|
|
1420
1419
|
{ code: "mn-MN", name: "Mongolian (Mongolia)" },
|
|
@@ -1490,7 +1489,6 @@ var AzureLocaleCodes = [
|
|
|
1490
1489
|
"ar-YE",
|
|
1491
1490
|
"as-IN",
|
|
1492
1491
|
"az-AZ",
|
|
1493
|
-
"be-BY",
|
|
1494
1492
|
"bg-BG",
|
|
1495
1493
|
"bn-BD",
|
|
1496
1494
|
"bn-IN",
|
|
@@ -1571,7 +1569,6 @@ var AzureLocaleCodes = [
|
|
|
1571
1569
|
"lo-LA",
|
|
1572
1570
|
"lt-LT",
|
|
1573
1571
|
"lv-LV",
|
|
1574
|
-
"mi-NZ",
|
|
1575
1572
|
"mk-MK",
|
|
1576
1573
|
"ml-IN",
|
|
1577
1574
|
"mn-MN",
|
|
@@ -1647,7 +1644,6 @@ var AzureLocaleLabels = {
|
|
|
1647
1644
|
"ar-YE": "Arabic (Yemen)",
|
|
1648
1645
|
"as-IN": "Assamese (India)",
|
|
1649
1646
|
"az-AZ": "Azerbaijani (Azerbaijan)",
|
|
1650
|
-
"be-BY": "Belarusian (Belarus)",
|
|
1651
1647
|
"bg-BG": "Bulgarian (Bulgaria)",
|
|
1652
1648
|
"bn-BD": "Bengali (Bangladesh)",
|
|
1653
1649
|
"bn-IN": "Bengali (India)",
|
|
@@ -1728,7 +1724,6 @@ var AzureLocaleLabels = {
|
|
|
1728
1724
|
"lo-LA": "Lao (Latin)",
|
|
1729
1725
|
"lt-LT": "Lithuanian (Lithuania)",
|
|
1730
1726
|
"lv-LV": "Latvian (Latvia)",
|
|
1731
|
-
"mi-NZ": "Maori (New Zealand)",
|
|
1732
1727
|
"mk-MK": "Macedonian (North Macedonia)",
|
|
1733
1728
|
"ml-IN": "Malayalam (India)",
|
|
1734
1729
|
"mn-MN": "Mongolian (Mongolia)",
|
|
@@ -1804,7 +1799,6 @@ var AzureLocale = {
|
|
|
1804
1799
|
"ar-YE": "ar-YE",
|
|
1805
1800
|
"as-IN": "as-IN",
|
|
1806
1801
|
"az-AZ": "az-AZ",
|
|
1807
|
-
"be-BY": "be-BY",
|
|
1808
1802
|
"bg-BG": "bg-BG",
|
|
1809
1803
|
"bn-BD": "bn-BD",
|
|
1810
1804
|
"bn-IN": "bn-IN",
|
|
@@ -1885,7 +1879,6 @@ var AzureLocale = {
|
|
|
1885
1879
|
"lo-LA": "lo-LA",
|
|
1886
1880
|
"lt-LT": "lt-LT",
|
|
1887
1881
|
"lv-LV": "lv-LV",
|
|
1888
|
-
"mi-NZ": "mi-NZ",
|
|
1889
1882
|
"mk-MK": "mk-MK",
|
|
1890
1883
|
"ml-IN": "ml-IN",
|
|
1891
1884
|
"mn-MN": "mn-MN",
|
|
@@ -1976,8 +1969,6 @@ var ElevenLabsLanguages = [
|
|
|
1976
1969
|
{ code: "hr", name: "Croatian" },
|
|
1977
1970
|
{ code: "bg", name: "Bulgarian" },
|
|
1978
1971
|
{ code: "lt", name: "Lithuanian" },
|
|
1979
|
-
{ code: "la", name: "Latin" },
|
|
1980
|
-
{ code: "mi", name: "Maori" },
|
|
1981
1972
|
{ code: "ml", name: "Malayalam" },
|
|
1982
1973
|
{ code: "cy", name: "Welsh" },
|
|
1983
1974
|
{ code: "sk", name: "Slovak" },
|
|
@@ -1991,20 +1982,16 @@ var ElevenLabsLanguages = [
|
|
|
1991
1982
|
{ code: "kn", name: "Kannada" },
|
|
1992
1983
|
{ code: "et", name: "Estonian" },
|
|
1993
1984
|
{ code: "mk", name: "Macedonian" },
|
|
1994
|
-
{ code: "br", name: "Breton" },
|
|
1995
|
-
{ code: "eu", name: "Basque" },
|
|
1996
1985
|
{ code: "is", name: "Icelandic" },
|
|
1997
1986
|
{ code: "hy", name: "Armenian" },
|
|
1998
1987
|
{ code: "ne", name: "Nepali" },
|
|
1999
1988
|
{ code: "mn", name: "Mongolian" },
|
|
2000
1989
|
{ code: "bs", name: "Bosnian" },
|
|
2001
1990
|
{ code: "kk", name: "Kazakh" },
|
|
2002
|
-
{ code: "sq", name: "Albanian" },
|
|
2003
1991
|
{ code: "sw", name: "Swahili" },
|
|
2004
1992
|
{ code: "gl", name: "Galician" },
|
|
2005
1993
|
{ code: "mr", name: "Marathi" },
|
|
2006
1994
|
{ code: "pa", name: "Punjabi" },
|
|
2007
|
-
{ code: "si", name: "Sinhala" },
|
|
2008
1995
|
{ code: "km", name: "Khmer" },
|
|
2009
1996
|
{ code: "sn", name: "Shona" },
|
|
2010
1997
|
{ code: "yo", name: "Yoruba" },
|
|
@@ -2017,29 +2004,16 @@ var ElevenLabsLanguages = [
|
|
|
2017
2004
|
{ code: "sd", name: "Sindhi" },
|
|
2018
2005
|
{ code: "gu", name: "Gujarati" },
|
|
2019
2006
|
{ code: "am", name: "Amharic" },
|
|
2020
|
-
{ code: "yi", name: "Yiddish" },
|
|
2021
2007
|
{ code: "lo", name: "Lao" },
|
|
2022
2008
|
{ code: "uz", name: "Uzbek" },
|
|
2023
|
-
{ code: "fo", name: "Faroese" },
|
|
2024
|
-
{ code: "ht", name: "Haitian Creole" },
|
|
2025
2009
|
{ code: "ps", name: "Pashto" },
|
|
2026
|
-
{ code: "tk", name: "Turkmen" },
|
|
2027
|
-
{ code: "nn", name: "Norwegian Nynorsk" },
|
|
2028
2010
|
{ code: "mt", name: "Maltese" },
|
|
2029
|
-
{ code: "sa", name: "Sanskrit" },
|
|
2030
2011
|
{ code: "lb", name: "Luxembourgish" },
|
|
2031
2012
|
{ code: "my", name: "Burmese" },
|
|
2032
|
-
{ code: "bo", name: "Tibetan" },
|
|
2033
|
-
{ code: "tl", name: "Tagalog" },
|
|
2034
|
-
{ code: "mg", name: "Malagasy" },
|
|
2035
2013
|
{ code: "as", name: "Assamese" },
|
|
2036
|
-
{ code: "tt", name: "Tatar" },
|
|
2037
|
-
{ code: "haw", name: "Hawaiian" },
|
|
2038
2014
|
{ code: "ln", name: "Lingala" },
|
|
2039
2015
|
{ code: "ha", name: "Hausa" },
|
|
2040
|
-
{ code: "
|
|
2041
|
-
{ code: "jw", name: "Javanese" },
|
|
2042
|
-
{ code: "su", name: "Sundanese" }
|
|
2016
|
+
{ code: "jw", name: "Javanese" }
|
|
2043
2017
|
];
|
|
2044
2018
|
var ElevenLabsLanguageCodes = [
|
|
2045
2019
|
"en",
|
|
@@ -2077,8 +2051,6 @@ var ElevenLabsLanguageCodes = [
|
|
|
2077
2051
|
"hr",
|
|
2078
2052
|
"bg",
|
|
2079
2053
|
"lt",
|
|
2080
|
-
"la",
|
|
2081
|
-
"mi",
|
|
2082
2054
|
"ml",
|
|
2083
2055
|
"cy",
|
|
2084
2056
|
"sk",
|
|
@@ -2092,20 +2064,16 @@ var ElevenLabsLanguageCodes = [
|
|
|
2092
2064
|
"kn",
|
|
2093
2065
|
"et",
|
|
2094
2066
|
"mk",
|
|
2095
|
-
"br",
|
|
2096
|
-
"eu",
|
|
2097
2067
|
"is",
|
|
2098
2068
|
"hy",
|
|
2099
2069
|
"ne",
|
|
2100
2070
|
"mn",
|
|
2101
2071
|
"bs",
|
|
2102
2072
|
"kk",
|
|
2103
|
-
"sq",
|
|
2104
2073
|
"sw",
|
|
2105
2074
|
"gl",
|
|
2106
2075
|
"mr",
|
|
2107
2076
|
"pa",
|
|
2108
|
-
"si",
|
|
2109
2077
|
"km",
|
|
2110
2078
|
"sn",
|
|
2111
2079
|
"yo",
|
|
@@ -2118,29 +2086,16 @@ var ElevenLabsLanguageCodes = [
|
|
|
2118
2086
|
"sd",
|
|
2119
2087
|
"gu",
|
|
2120
2088
|
"am",
|
|
2121
|
-
"yi",
|
|
2122
2089
|
"lo",
|
|
2123
2090
|
"uz",
|
|
2124
|
-
"fo",
|
|
2125
|
-
"ht",
|
|
2126
2091
|
"ps",
|
|
2127
|
-
"tk",
|
|
2128
|
-
"nn",
|
|
2129
2092
|
"mt",
|
|
2130
|
-
"sa",
|
|
2131
2093
|
"lb",
|
|
2132
2094
|
"my",
|
|
2133
|
-
"bo",
|
|
2134
|
-
"tl",
|
|
2135
|
-
"mg",
|
|
2136
2095
|
"as",
|
|
2137
|
-
"tt",
|
|
2138
|
-
"haw",
|
|
2139
2096
|
"ln",
|
|
2140
2097
|
"ha",
|
|
2141
|
-
"
|
|
2142
|
-
"jw",
|
|
2143
|
-
"su"
|
|
2098
|
+
"jw"
|
|
2144
2099
|
];
|
|
2145
2100
|
var ElevenLabsLanguageLabels = {
|
|
2146
2101
|
en: "English",
|
|
@@ -2178,8 +2133,6 @@ var ElevenLabsLanguageLabels = {
|
|
|
2178
2133
|
hr: "Croatian",
|
|
2179
2134
|
bg: "Bulgarian",
|
|
2180
2135
|
lt: "Lithuanian",
|
|
2181
|
-
la: "Latin",
|
|
2182
|
-
mi: "Maori",
|
|
2183
2136
|
ml: "Malayalam",
|
|
2184
2137
|
cy: "Welsh",
|
|
2185
2138
|
sk: "Slovak",
|
|
@@ -2193,20 +2146,16 @@ var ElevenLabsLanguageLabels = {
|
|
|
2193
2146
|
kn: "Kannada",
|
|
2194
2147
|
et: "Estonian",
|
|
2195
2148
|
mk: "Macedonian",
|
|
2196
|
-
br: "Breton",
|
|
2197
|
-
eu: "Basque",
|
|
2198
2149
|
is: "Icelandic",
|
|
2199
2150
|
hy: "Armenian",
|
|
2200
2151
|
ne: "Nepali",
|
|
2201
2152
|
mn: "Mongolian",
|
|
2202
2153
|
bs: "Bosnian",
|
|
2203
2154
|
kk: "Kazakh",
|
|
2204
|
-
sq: "Albanian",
|
|
2205
2155
|
sw: "Swahili",
|
|
2206
2156
|
gl: "Galician",
|
|
2207
2157
|
mr: "Marathi",
|
|
2208
2158
|
pa: "Punjabi",
|
|
2209
|
-
si: "Sinhala",
|
|
2210
2159
|
km: "Khmer",
|
|
2211
2160
|
sn: "Shona",
|
|
2212
2161
|
yo: "Yoruba",
|
|
@@ -2219,29 +2168,16 @@ var ElevenLabsLanguageLabels = {
|
|
|
2219
2168
|
sd: "Sindhi",
|
|
2220
2169
|
gu: "Gujarati",
|
|
2221
2170
|
am: "Amharic",
|
|
2222
|
-
yi: "Yiddish",
|
|
2223
2171
|
lo: "Lao",
|
|
2224
2172
|
uz: "Uzbek",
|
|
2225
|
-
fo: "Faroese",
|
|
2226
|
-
ht: "Haitian Creole",
|
|
2227
2173
|
ps: "Pashto",
|
|
2228
|
-
tk: "Turkmen",
|
|
2229
|
-
nn: "Norwegian Nynorsk",
|
|
2230
2174
|
mt: "Maltese",
|
|
2231
|
-
sa: "Sanskrit",
|
|
2232
2175
|
lb: "Luxembourgish",
|
|
2233
2176
|
my: "Burmese",
|
|
2234
|
-
bo: "Tibetan",
|
|
2235
|
-
tl: "Tagalog",
|
|
2236
|
-
mg: "Malagasy",
|
|
2237
2177
|
as: "Assamese",
|
|
2238
|
-
tt: "Tatar",
|
|
2239
|
-
haw: "Hawaiian",
|
|
2240
2178
|
ln: "Lingala",
|
|
2241
2179
|
ha: "Hausa",
|
|
2242
|
-
|
|
2243
|
-
jw: "Javanese",
|
|
2244
|
-
su: "Sundanese"
|
|
2180
|
+
jw: "Javanese"
|
|
2245
2181
|
};
|
|
2246
2182
|
|
|
2247
2183
|
// src/generated/gladia/schema/streamingSupportedBitDepthEnum.ts
|
|
@@ -2746,6 +2682,7 @@ var OpenAITranscriptionModel = {
|
|
|
2746
2682
|
"gpt-4o-mini-transcribe-2025-12-15": "gpt-4o-mini-transcribe-2025-12-15",
|
|
2747
2683
|
"gpt-4o-transcribe": "gpt-4o-transcribe",
|
|
2748
2684
|
"gpt-4o-transcribe-diarize": "gpt-4o-transcribe-diarize",
|
|
2685
|
+
"gpt-realtime-whisper": "gpt-realtime-whisper",
|
|
2749
2686
|
"whisper-1": "whisper-1"
|
|
2750
2687
|
};
|
|
2751
2688
|
var OpenAIRealtimeModel = {
|
|
@@ -2761,6 +2698,7 @@ var OpenAIRealtimeModel = {
|
|
|
2761
2698
|
"gpt-audio-mini-2025-12-15": "gpt-audio-mini-2025-12-15",
|
|
2762
2699
|
"gpt-realtime": "gpt-realtime",
|
|
2763
2700
|
"gpt-realtime-1.5": "gpt-realtime-1.5",
|
|
2701
|
+
"gpt-realtime-2": "gpt-realtime-2",
|
|
2764
2702
|
"gpt-realtime-2025-08-28": "gpt-realtime-2025-08-28",
|
|
2765
2703
|
"gpt-realtime-mini": "gpt-realtime-mini",
|
|
2766
2704
|
"gpt-realtime-mini-2025-10-06": "gpt-realtime-mini-2025-10-06",
|
|
@@ -5577,12 +5515,20 @@ var EntityType = {
|
|
|
5577
5515
|
email_address: "email_address",
|
|
5578
5516
|
event: "event",
|
|
5579
5517
|
filename: "filename",
|
|
5518
|
+
gender: "gender",
|
|
5580
5519
|
gender_sexuality: "gender_sexuality",
|
|
5581
5520
|
healthcare_number: "healthcare_number",
|
|
5582
5521
|
injury: "injury",
|
|
5583
5522
|
ip_address: "ip_address",
|
|
5584
5523
|
language: "language",
|
|
5585
5524
|
location: "location",
|
|
5525
|
+
location_address: "location_address",
|
|
5526
|
+
location_address_street: "location_address_street",
|
|
5527
|
+
location_city: "location_city",
|
|
5528
|
+
location_coordinate: "location_coordinate",
|
|
5529
|
+
location_country: "location_country",
|
|
5530
|
+
location_state: "location_state",
|
|
5531
|
+
location_zip: "location_zip",
|
|
5586
5532
|
marital_status: "marital_status",
|
|
5587
5533
|
medical_condition: "medical_condition",
|
|
5588
5534
|
medical_process: "medical_process",
|
|
@@ -5591,6 +5537,7 @@ var EntityType = {
|
|
|
5591
5537
|
number_sequence: "number_sequence",
|
|
5592
5538
|
occupation: "occupation",
|
|
5593
5539
|
organization: "organization",
|
|
5540
|
+
organization_medical_facility: "organization_medical_facility",
|
|
5594
5541
|
passport_number: "passport_number",
|
|
5595
5542
|
password: "password",
|
|
5596
5543
|
person_age: "person_age",
|
|
@@ -5599,6 +5546,7 @@ var EntityType = {
|
|
|
5599
5546
|
physical_attribute: "physical_attribute",
|
|
5600
5547
|
political_affiliation: "political_affiliation",
|
|
5601
5548
|
religion: "religion",
|
|
5549
|
+
sexuality: "sexuality",
|
|
5602
5550
|
statistics: "statistics",
|
|
5603
5551
|
time: "time",
|
|
5604
5552
|
url: "url",
|
|
@@ -5625,12 +5573,20 @@ var PiiPolicy = {
|
|
|
5625
5573
|
email_address: "email_address",
|
|
5626
5574
|
event: "event",
|
|
5627
5575
|
filename: "filename",
|
|
5576
|
+
gender: "gender",
|
|
5628
5577
|
gender_sexuality: "gender_sexuality",
|
|
5629
5578
|
healthcare_number: "healthcare_number",
|
|
5630
5579
|
injury: "injury",
|
|
5631
5580
|
ip_address: "ip_address",
|
|
5632
5581
|
language: "language",
|
|
5633
5582
|
location: "location",
|
|
5583
|
+
location_address: "location_address",
|
|
5584
|
+
location_address_street: "location_address_street",
|
|
5585
|
+
location_city: "location_city",
|
|
5586
|
+
location_coordinate: "location_coordinate",
|
|
5587
|
+
location_country: "location_country",
|
|
5588
|
+
location_state: "location_state",
|
|
5589
|
+
location_zip: "location_zip",
|
|
5634
5590
|
marital_status: "marital_status",
|
|
5635
5591
|
medical_condition: "medical_condition",
|
|
5636
5592
|
medical_process: "medical_process",
|
|
@@ -5639,6 +5595,7 @@ var PiiPolicy = {
|
|
|
5639
5595
|
number_sequence: "number_sequence",
|
|
5640
5596
|
occupation: "occupation",
|
|
5641
5597
|
organization: "organization",
|
|
5598
|
+
organization_medical_facility: "organization_medical_facility",
|
|
5642
5599
|
passport_number: "passport_number",
|
|
5643
5600
|
password: "password",
|
|
5644
5601
|
person_age: "person_age",
|
|
@@ -5647,6 +5604,7 @@ var PiiPolicy = {
|
|
|
5647
5604
|
physical_attribute: "physical_attribute",
|
|
5648
5605
|
political_affiliation: "political_affiliation",
|
|
5649
5606
|
religion: "religion",
|
|
5607
|
+
sexuality: "sexuality",
|
|
5650
5608
|
statistics: "statistics",
|
|
5651
5609
|
time: "time",
|
|
5652
5610
|
url: "url",
|
|
@@ -5715,7 +5673,8 @@ var TranscriptOptionalParamsRedactPiiAudioOptionsOverrideAudioRedactionMethod =
|
|
|
5715
5673
|
|
|
5716
5674
|
// src/generated/assemblyai/schema/transcriptOptionalParamsRemoveAudioTags.ts
|
|
5717
5675
|
var TranscriptOptionalParamsRemoveAudioTags = {
|
|
5718
|
-
all: "all"
|
|
5676
|
+
all: "all",
|
|
5677
|
+
speaker: "speaker"
|
|
5719
5678
|
};
|
|
5720
5679
|
|
|
5721
5680
|
// src/generated/assemblyai/schema/transcriptRedactPiiAudioOptionsOverrideAudioRedactionMethod.ts
|
|
@@ -5725,7 +5684,8 @@ var TranscriptRedactPiiAudioOptionsOverrideAudioRedactionMethod = {
|
|
|
5725
5684
|
|
|
5726
5685
|
// src/generated/assemblyai/schema/transcriptRemoveAudioTags.ts
|
|
5727
5686
|
var TranscriptRemoveAudioTags = {
|
|
5728
|
-
all: "all"
|
|
5687
|
+
all: "all",
|
|
5688
|
+
speaker: "speaker"
|
|
5729
5689
|
};
|
|
5730
5690
|
|
|
5731
5691
|
// src/generated/assemblyai/api/assemblyAIAPI.ts
|
|
@@ -9617,15 +9577,18 @@ var import_axios9 = __toESM(require("axios"));
|
|
|
9617
9577
|
// src/generated/soniox/schema/index.ts
|
|
9618
9578
|
var schema_exports4 = {};
|
|
9619
9579
|
__export(schema_exports4, {
|
|
9580
|
+
TTSVoiceGender: () => TTSVoiceGender,
|
|
9620
9581
|
TemporaryApiKeyUsageType: () => TemporaryApiKeyUsageType,
|
|
9621
9582
|
TranscriptionMode: () => TranscriptionMode,
|
|
9622
9583
|
TranscriptionStatus: () => TranscriptionStatus,
|
|
9623
|
-
TranslationConfigType: () => TranslationConfigType
|
|
9584
|
+
TranslationConfigType: () => TranslationConfigType,
|
|
9585
|
+
UsageLogsSort: () => UsageLogsSort
|
|
9624
9586
|
});
|
|
9625
9587
|
|
|
9626
9588
|
// src/generated/soniox/schema/temporaryApiKeyUsageType.ts
|
|
9627
9589
|
var TemporaryApiKeyUsageType = {
|
|
9628
|
-
transcribe_websocket: "transcribe_websocket"
|
|
9590
|
+
transcribe_websocket: "transcribe_websocket",
|
|
9591
|
+
tts_rt: "tts_rt"
|
|
9629
9592
|
};
|
|
9630
9593
|
|
|
9631
9594
|
// src/generated/soniox/schema/transcriptionMode.ts
|
|
@@ -9640,6 +9603,19 @@ var TranslationConfigType = {
|
|
|
9640
9603
|
two_way: "two_way"
|
|
9641
9604
|
};
|
|
9642
9605
|
|
|
9606
|
+
// src/generated/soniox/schema/tTSVoiceGender.ts
|
|
9607
|
+
var TTSVoiceGender = {
|
|
9608
|
+
male: "male",
|
|
9609
|
+
female: "female",
|
|
9610
|
+
neutral: "neutral"
|
|
9611
|
+
};
|
|
9612
|
+
|
|
9613
|
+
// src/generated/soniox/schema/usageLogsSort.ts
|
|
9614
|
+
var UsageLogsSort = {
|
|
9615
|
+
end_time_asc: "end_time_asc",
|
|
9616
|
+
end_time_desc: "end_time_desc"
|
|
9617
|
+
};
|
|
9618
|
+
|
|
9643
9619
|
// src/generated/soniox/api/sonioxPublicAPI.ts
|
|
9644
9620
|
var uploadFile = (uploadFileBody2, options) => {
|
|
9645
9621
|
const formData = new FormData();
|
|
@@ -11007,6 +10983,7 @@ __export(deepgramAPI_zod_exports, {
|
|
|
11007
10983
|
speakGenerateQueryMipOptOutDefault: () => speakGenerateQueryMipOptOutDefault,
|
|
11008
10984
|
speakGenerateQueryModelDefault: () => speakGenerateQueryModelDefault,
|
|
11009
10985
|
speakGenerateQueryParams: () => speakGenerateQueryParams,
|
|
10986
|
+
speakGenerateQuerySpeedDefault: () => speakGenerateQuerySpeedDefault,
|
|
11010
10987
|
speakGenerateResponse: () => speakGenerateResponse
|
|
11011
10988
|
});
|
|
11012
10989
|
var import_zod = require("zod");
|
|
@@ -11061,6 +11038,9 @@ var listenTranscribeQueryParams = import_zod.z.object({
|
|
|
11061
11038
|
diarize: import_zod.z.boolean().optional().describe(
|
|
11062
11039
|
"Recognize speaker changes. Each word in the transcript will be assigned a speaker number starting at 0"
|
|
11063
11040
|
),
|
|
11041
|
+
diarize_model: import_zod.z.enum(["latest", "v1", "v2"]).optional().describe(
|
|
11042
|
+
"Select and enable a specific batch diarization model version. If specifying this parameter, you should not set the deprecated `diarize=true` parameter. Not accepted on streaming requests."
|
|
11043
|
+
),
|
|
11064
11044
|
dictation: import_zod.z.boolean().optional().describe("Dictation mode for controlling formatting with dictated speech"),
|
|
11065
11045
|
encoding: import_zod.z.enum(["linear16", "flac", "mulaw", "amr-nb", "amr-wb", "opus", "speex", "g729"]).optional().describe("Specify the expected encoding of your submitted audio"),
|
|
11066
11046
|
filler_words: import_zod.z.boolean().optional().describe('Filler Words can help transcribe interruptions in your audio, like "uh" and "um"'),
|
|
@@ -11326,6 +11306,7 @@ var listenTranscribeResponse = import_zod.z.object({
|
|
|
11326
11306
|
var speakGenerateQueryCallbackMethodDefault = "POST";
|
|
11327
11307
|
var speakGenerateQueryMipOptOutDefault = false;
|
|
11328
11308
|
var speakGenerateQueryModelDefault = "aura-asteria-en";
|
|
11309
|
+
var speakGenerateQuerySpeedDefault = 1;
|
|
11329
11310
|
var speakGenerateQueryParams = import_zod.z.object({
|
|
11330
11311
|
callback: import_zod.z.string().optional().describe("URL to which we'll make the callback request"),
|
|
11331
11312
|
callback_method: import_zod.z.enum(["POST", "PUT"]).default(speakGenerateQueryCallbackMethodDefault).describe("HTTP method by which the callback request will be made"),
|
|
@@ -11437,6 +11418,9 @@ var speakGenerateQueryParams = import_zod.z.object({
|
|
|
11437
11418
|
import_zod.z.enum(["22050"]).describe("Encoding - mp3. Sample rate is fixed and not configurable (22050 Hz).")
|
|
11438
11419
|
).or(import_zod.z.enum(["48000"]).describe("Encoding - opus. Sample rate is fixed at 48000 Hz.")).optional().describe(
|
|
11439
11420
|
"Sample Rate specifies the sample rate for the output audio. Based on the encoding, different sample rates are supported. For some encodings, the sample rate is not configurable"
|
|
11421
|
+
),
|
|
11422
|
+
speed: import_zod.z.number().default(speakGenerateQuerySpeedDefault).describe(
|
|
11423
|
+
"Speaking rate multiplier that adjusts the pace of generated speech while preserving natural prosody and voice quality. Not yet supported in all languages."
|
|
11440
11424
|
)
|
|
11441
11425
|
});
|
|
11442
11426
|
var speakGenerateHeader = import_zod.z.object({
|
|
@@ -11761,6 +11745,7 @@ __export(assemblyAIAPI_zod_exports, {
|
|
|
11761
11745
|
createTranscriptBodyRedactPiiAudioDefault: () => createTranscriptBodyRedactPiiAudioDefault,
|
|
11762
11746
|
createTranscriptBodyRedactPiiAudioOptionsReturnRedactedNoSpeechAudioDefault: () => createTranscriptBodyRedactPiiAudioOptionsReturnRedactedNoSpeechAudioDefault,
|
|
11763
11747
|
createTranscriptBodyRedactPiiDefault: () => createTranscriptBodyRedactPiiDefault,
|
|
11748
|
+
createTranscriptBodyRedactPiiReturnUnredactedDefault: () => createTranscriptBodyRedactPiiReturnUnredactedDefault,
|
|
11764
11749
|
createTranscriptBodySentimentAnalysisDefault: () => createTranscriptBodySentimentAnalysisDefault,
|
|
11765
11750
|
createTranscriptBodySpeakerLabelsDefault: () => createTranscriptBodySpeakerLabelsDefault,
|
|
11766
11751
|
createTranscriptBodySpeakerOptionsMinSpeakersExpectedDefault: () => createTranscriptBodySpeakerOptionsMinSpeakersExpectedDefault,
|
|
@@ -11831,6 +11816,7 @@ var createTranscriptBodyPunctuateDefault = true;
|
|
|
11831
11816
|
var createTranscriptBodyRedactPiiDefault = false;
|
|
11832
11817
|
var createTranscriptBodyRedactPiiAudioDefault = false;
|
|
11833
11818
|
var createTranscriptBodyRedactPiiAudioOptionsReturnRedactedNoSpeechAudioDefault = false;
|
|
11819
|
+
var createTranscriptBodyRedactPiiReturnUnredactedDefault = false;
|
|
11834
11820
|
var createTranscriptBodySentimentAnalysisDefault = false;
|
|
11835
11821
|
var createTranscriptBodySpeakerLabelsDefault = false;
|
|
11836
11822
|
var createTranscriptBodySpeakerOptionsMinSpeakersExpectedDefault = 1;
|
|
@@ -11869,7 +11855,7 @@ var createTranscriptBody = import_zod3.z.object({
|
|
|
11869
11855
|
"Customize how words are spelled and formatted using to and from values. See [Custom Spelling](https://www.assemblyai.com/docs/pre-recorded-audio/correct-spelling-of-terms) for more details."
|
|
11870
11856
|
),
|
|
11871
11857
|
disfluencies: import_zod3.z.boolean().optional().describe(
|
|
11872
|
-
'Transcribe [Filler Words](https://www.assemblyai.com/docs/pre-recorded-audio/include-filler-words), like "umm", in your media file; can be true or false'
|
|
11858
|
+
'Transcribe [Filler Words](https://www.assemblyai.com/docs/pre-recorded-audio/include-filler-words), like "umm", in your media file; can be true or false. Supported on Universal-3 Pro and Universal-2.'
|
|
11873
11859
|
),
|
|
11874
11860
|
domain: import_zod3.z.string().nullish().describe(
|
|
11875
11861
|
'Enable domain-specific transcription models to improve accuracy for specialized terminology. Set to `"medical-v1"` to enable [Medical Mode](https://www.assemblyai.com/docs/pre-recorded-audio/medical-mode) for improved accuracy of medical terms such as medications, procedures, conditions, and dosages.\n\nSupported languages: English (`en`), Spanish (`es`), German (`de`), French (`fr`). If used with an unsupported language, the parameter is ignored and a warning is returned.\n'
|
|
@@ -12176,12 +12162,20 @@ var createTranscriptBody = import_zod3.z.object({
|
|
|
12176
12162
|
"email_address",
|
|
12177
12163
|
"event",
|
|
12178
12164
|
"filename",
|
|
12165
|
+
"gender",
|
|
12179
12166
|
"gender_sexuality",
|
|
12180
12167
|
"healthcare_number",
|
|
12181
12168
|
"injury",
|
|
12182
12169
|
"ip_address",
|
|
12183
12170
|
"language",
|
|
12184
12171
|
"location",
|
|
12172
|
+
"location_address",
|
|
12173
|
+
"location_address_street",
|
|
12174
|
+
"location_city",
|
|
12175
|
+
"location_coordinate",
|
|
12176
|
+
"location_country",
|
|
12177
|
+
"location_state",
|
|
12178
|
+
"location_zip",
|
|
12185
12179
|
"marital_status",
|
|
12186
12180
|
"medical_condition",
|
|
12187
12181
|
"medical_process",
|
|
@@ -12190,6 +12184,7 @@ var createTranscriptBody = import_zod3.z.object({
|
|
|
12190
12184
|
"number_sequence",
|
|
12191
12185
|
"occupation",
|
|
12192
12186
|
"organization",
|
|
12187
|
+
"organization_medical_facility",
|
|
12193
12188
|
"passport_number",
|
|
12194
12189
|
"password",
|
|
12195
12190
|
"person_age",
|
|
@@ -12198,6 +12193,7 @@ var createTranscriptBody = import_zod3.z.object({
|
|
|
12198
12193
|
"physical_attribute",
|
|
12199
12194
|
"political_affiliation",
|
|
12200
12195
|
"religion",
|
|
12196
|
+
"sexuality",
|
|
12201
12197
|
"statistics",
|
|
12202
12198
|
"time",
|
|
12203
12199
|
"url",
|
|
@@ -12205,15 +12201,20 @@ var createTranscriptBody = import_zod3.z.object({
|
|
|
12205
12201
|
"username",
|
|
12206
12202
|
"vehicle_id",
|
|
12207
12203
|
"zodiac_sign"
|
|
12208
|
-
]).describe(
|
|
12204
|
+
]).describe(
|
|
12205
|
+
"The type of PII to redact. See [PII redaction](https://www.assemblyai.com/docs/streaming/pii-redaction) for the full list of policies and their descriptions."
|
|
12206
|
+
)
|
|
12209
12207
|
).optional().describe(
|
|
12210
12208
|
"The list of PII Redaction policies to enable. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more details."
|
|
12211
12209
|
),
|
|
12212
12210
|
redact_pii_sub: import_zod3.z.enum(["entity_name", "hash"]).describe(
|
|
12213
|
-
"The replacement logic for detected PII, can be `entity_name` or `hash`. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more details."
|
|
12211
|
+
"The replacement logic for detected PII, can be `entity_name` or `hash`. See [PII redaction](https://www.assemblyai.com/docs/streaming/pii-redaction) for more details."
|
|
12214
12212
|
).or(import_zod3.z.null()).optional().describe(
|
|
12215
12213
|
"The replacement logic for detected PII, can be `entity_type` or `hash`. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more details."
|
|
12216
12214
|
),
|
|
12215
|
+
redact_pii_return_unredacted: import_zod3.z.boolean().optional().describe(
|
|
12216
|
+
"When set to `true`, returns the original unredacted transcript alongside the redacted one in the same response. Requires `redact_pii` to be `true`, otherwise a 400 error is returned.\n\nWhen enabled, the response includes the additional fields `unredacted_text`, `unredacted_words`, and `unredacted_utterances`. The existing `text`, `words`, and `utterances` fields remain fully redacted. When disabled (default), the response is unchanged and contains only the redacted transcript. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more details.\n"
|
|
12217
|
+
),
|
|
12217
12218
|
sentiment_analysis: import_zod3.z.boolean().optional().describe(
|
|
12218
12219
|
"Enable [Sentiment Analysis](https://www.assemblyai.com/docs/speech-understanding/analyze-sentiment-of-speech), can be true or false"
|
|
12219
12220
|
),
|
|
@@ -12311,10 +12312,10 @@ var createTranscriptBody = import_zod3.z.object({
|
|
|
12311
12312
|
),
|
|
12312
12313
|
summary_model: import_zod3.z.enum(["informative", "conversational", "catchy"]).optional().describe("The model to summarize the transcript"),
|
|
12313
12314
|
summary_type: import_zod3.z.enum(["bullets", "bullets_verbose", "gist", "headline", "paragraph"]).optional().describe("The type of summary"),
|
|
12314
|
-
remove_audio_tags: import_zod3.z.enum(["all"]).describe(
|
|
12315
|
-
'
|
|
12315
|
+
remove_audio_tags: import_zod3.z.enum(["all", "speaker"]).describe(
|
|
12316
|
+
'Universal-3 Pro generates rich transcripts that can include inline annotations such as audio event markers and speaker cues. Set to `"all"` to remove all inline annotations, or `"speaker"` to remove only speaker cues while keeping other annotations.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n'
|
|
12316
12317
|
).or(import_zod3.z.null()).optional().describe(
|
|
12317
|
-
'
|
|
12318
|
+
'Universal-3 Pro generates rich transcripts that can include inline annotations such as audio event markers and speaker cues. Set to `"all"` to remove all inline annotations, or `"speaker"` to remove only speaker cues while keeping other annotations.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n'
|
|
12318
12319
|
),
|
|
12319
12320
|
temperature: import_zod3.z.number().optional().describe(
|
|
12320
12321
|
"Control the amount of randomness injected into the model's response. See the [Prompting Guide](https://www.assemblyai.com/docs/pre-recorded-audio/prompting) for more details.\n\nNote: This parameter can only be used with the Universal-3 Pro model.\n"
|
|
@@ -12448,7 +12449,7 @@ var createTranscriptResponse = import_zod3.z.object({
|
|
|
12448
12449
|
"Customize how words are spelled and formatted using to and from values. See [Custom Spelling](https://www.assemblyai.com/docs/pre-recorded-audio/correct-spelling-of-terms) for more details."
|
|
12449
12450
|
),
|
|
12450
12451
|
disfluencies: import_zod3.z.boolean().nullish().describe(
|
|
12451
|
-
'Transcribe [Filler Words](https://www.assemblyai.com/docs/pre-recorded-audio/include-filler-words), like "umm", in your media file; can be true or false'
|
|
12452
|
+
'Transcribe [Filler Words](https://www.assemblyai.com/docs/pre-recorded-audio/include-filler-words), like "umm", in your media file; can be true or false. Supported on Universal-3 Pro and Universal-2.'
|
|
12452
12453
|
),
|
|
12453
12454
|
domain: import_zod3.z.string().nullish().describe(
|
|
12454
12455
|
'The domain-specific model applied to the transcript. When set to `"medical-v1"`, [Medical Mode](https://www.assemblyai.com/docs/pre-recorded-audio/medical-mode) was used to improve accuracy for medical terminology.\n'
|
|
@@ -12471,12 +12472,20 @@ var createTranscriptResponse = import_zod3.z.object({
|
|
|
12471
12472
|
"email_address",
|
|
12472
12473
|
"event",
|
|
12473
12474
|
"filename",
|
|
12475
|
+
"gender",
|
|
12474
12476
|
"gender_sexuality",
|
|
12475
12477
|
"healthcare_number",
|
|
12476
12478
|
"injury",
|
|
12477
12479
|
"ip_address",
|
|
12478
12480
|
"language",
|
|
12479
12481
|
"location",
|
|
12482
|
+
"location_address",
|
|
12483
|
+
"location_address_street",
|
|
12484
|
+
"location_city",
|
|
12485
|
+
"location_coordinate",
|
|
12486
|
+
"location_country",
|
|
12487
|
+
"location_state",
|
|
12488
|
+
"location_zip",
|
|
12480
12489
|
"marital_status",
|
|
12481
12490
|
"medical_condition",
|
|
12482
12491
|
"medical_process",
|
|
@@ -12485,6 +12494,7 @@ var createTranscriptResponse = import_zod3.z.object({
|
|
|
12485
12494
|
"number_sequence",
|
|
12486
12495
|
"occupation",
|
|
12487
12496
|
"organization",
|
|
12497
|
+
"organization_medical_facility",
|
|
12488
12498
|
"passport_number",
|
|
12489
12499
|
"password",
|
|
12490
12500
|
"person_age",
|
|
@@ -12493,6 +12503,7 @@ var createTranscriptResponse = import_zod3.z.object({
|
|
|
12493
12503
|
"physical_attribute",
|
|
12494
12504
|
"political_affiliation",
|
|
12495
12505
|
"religion",
|
|
12506
|
+
"sexuality",
|
|
12496
12507
|
"statistics",
|
|
12497
12508
|
"time",
|
|
12498
12509
|
"url",
|
|
@@ -12797,6 +12808,24 @@ var createTranscriptResponse = import_zod3.z.object({
|
|
|
12797
12808
|
}).optional().describe(
|
|
12798
12809
|
"Specify options for [Automatic Language Detection](https://www.assemblyai.com/docs/pre-recorded-audio/language-detection)."
|
|
12799
12810
|
),
|
|
12811
|
+
metadata: import_zod3.z.object({
|
|
12812
|
+
domain_used: import_zod3.z.string().nullish().describe(
|
|
12813
|
+
'The domain-specific model that was applied to the transcription (for example, `"medical-v1"` when [Medical Mode](https://www.assemblyai.com/docs/pre-recorded-audio/medical-mode) was applied), or `null` if no domain-specific model was used. Always present when `metadata` is present.\n'
|
|
12814
|
+
),
|
|
12815
|
+
warnings: import_zod3.z.array(
|
|
12816
|
+
import_zod3.z.object({
|
|
12817
|
+
message: import_zod3.z.string().describe("A human-readable description of the warning.")
|
|
12818
|
+
}).describe(
|
|
12819
|
+
"A warning message emitted while processing a transcription request. Warnings are surfaced on the transcript response under `metadata.warnings`.\n"
|
|
12820
|
+
)
|
|
12821
|
+
).optional().describe(
|
|
12822
|
+
"Warning messages emitted while processing the request. Each warning is an object with a human-readable `message`. When there are no warnings to report, this field is omitted from the `metadata` object entirely.\n"
|
|
12823
|
+
)
|
|
12824
|
+
}).describe(
|
|
12825
|
+
"Additional metadata about the transcription returned on the `Transcript` object under `metadata`. Only present when there is information to report \u2014 when all of its fields would be empty, the `metadata` object is omitted from the response entirely.\n"
|
|
12826
|
+
).or(import_zod3.z.null()).optional().describe(
|
|
12827
|
+
"Additional metadata about the transcription, including any warnings emitted while processing the request. Only present when there is information to report; if no fields would be populated, `metadata` is omitted from the response entirely.\n"
|
|
12828
|
+
),
|
|
12800
12829
|
multichannel: import_zod3.z.boolean().nullish().describe(
|
|
12801
12830
|
"Whether [Multichannel transcription](https://www.assemblyai.com/docs/pre-recorded-audio/transcribe-multiple-audio-channels) was enabled in the transcription request, either true or false"
|
|
12802
12831
|
),
|
|
@@ -12844,12 +12873,20 @@ var createTranscriptResponse = import_zod3.z.object({
|
|
|
12844
12873
|
"email_address",
|
|
12845
12874
|
"event",
|
|
12846
12875
|
"filename",
|
|
12876
|
+
"gender",
|
|
12847
12877
|
"gender_sexuality",
|
|
12848
12878
|
"healthcare_number",
|
|
12849
12879
|
"injury",
|
|
12850
12880
|
"ip_address",
|
|
12851
12881
|
"language",
|
|
12852
12882
|
"location",
|
|
12883
|
+
"location_address",
|
|
12884
|
+
"location_address_street",
|
|
12885
|
+
"location_city",
|
|
12886
|
+
"location_coordinate",
|
|
12887
|
+
"location_country",
|
|
12888
|
+
"location_state",
|
|
12889
|
+
"location_zip",
|
|
12853
12890
|
"marital_status",
|
|
12854
12891
|
"medical_condition",
|
|
12855
12892
|
"medical_process",
|
|
@@ -12858,6 +12895,7 @@ var createTranscriptResponse = import_zod3.z.object({
|
|
|
12858
12895
|
"number_sequence",
|
|
12859
12896
|
"occupation",
|
|
12860
12897
|
"organization",
|
|
12898
|
+
"organization_medical_facility",
|
|
12861
12899
|
"passport_number",
|
|
12862
12900
|
"password",
|
|
12863
12901
|
"person_age",
|
|
@@ -12866,6 +12904,7 @@ var createTranscriptResponse = import_zod3.z.object({
|
|
|
12866
12904
|
"physical_attribute",
|
|
12867
12905
|
"political_affiliation",
|
|
12868
12906
|
"religion",
|
|
12907
|
+
"sexuality",
|
|
12869
12908
|
"statistics",
|
|
12870
12909
|
"time",
|
|
12871
12910
|
"url",
|
|
@@ -12873,12 +12912,17 @@ var createTranscriptResponse = import_zod3.z.object({
|
|
|
12873
12912
|
"username",
|
|
12874
12913
|
"vehicle_id",
|
|
12875
12914
|
"zodiac_sign"
|
|
12876
|
-
]).describe(
|
|
12915
|
+
]).describe(
|
|
12916
|
+
"The type of PII to redact. See [PII redaction](https://www.assemblyai.com/docs/streaming/pii-redaction) for the full list of policies and their descriptions."
|
|
12917
|
+
)
|
|
12877
12918
|
).nullish().describe(
|
|
12878
12919
|
"The list of PII Redaction policies that were enabled, if PII Redaction is enabled.\nSee [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
|
|
12879
12920
|
),
|
|
12880
12921
|
redact_pii_sub: import_zod3.z.enum(["entity_name", "hash"]).optional().describe(
|
|
12881
|
-
"The replacement logic for detected PII, can be `entity_name` or `hash`. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more details."
|
|
12922
|
+
"The replacement logic for detected PII, can be `entity_name` or `hash`. See [PII redaction](https://www.assemblyai.com/docs/streaming/pii-redaction) for more details."
|
|
12923
|
+
),
|
|
12924
|
+
redact_pii_return_unredacted: import_zod3.z.boolean().nullish().describe(
|
|
12925
|
+
"Whether the original unredacted transcript was also returned alongside the redacted one. When `true`, the response includes `unredacted_text`, `unredacted_words`, and `unredacted_utterances`. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
|
|
12882
12926
|
),
|
|
12883
12927
|
sentiment_analysis: import_zod3.z.boolean().nullish().describe(
|
|
12884
12928
|
"Whether [Sentiment Analysis](https://www.assemblyai.com/docs/speech-understanding/analyze-sentiment-of-speech) is enabled, can be true or false"
|
|
@@ -13015,20 +13059,23 @@ var createTranscriptResponse = import_zod3.z.object({
|
|
|
13015
13059
|
"The generated summary of the media file, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details."
|
|
13016
13060
|
),
|
|
13017
13061
|
summary_model: import_zod3.z.string().nullish().describe(
|
|
13018
|
-
"The Summarization model used to generate the summary,\nif [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts
|
|
13062
|
+
"The Summarization model used to generate the summary,\nif [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details.\n"
|
|
13019
13063
|
),
|
|
13020
13064
|
summary_type: import_zod3.z.string().nullish().describe(
|
|
13021
|
-
"The type of summary generated, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts
|
|
13065
|
+
"The type of summary generated, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details."
|
|
13022
13066
|
),
|
|
13023
|
-
remove_audio_tags: import_zod3.z.enum(["all"]).describe(
|
|
13024
|
-
|
|
13067
|
+
remove_audio_tags: import_zod3.z.enum(["all", "speaker"]).describe(
|
|
13068
|
+
'Universal-3 Pro generates rich transcripts that can include inline annotations such as audio event markers and speaker cues. Set to `"all"` to remove all inline annotations, or `"speaker"` to remove only speaker cues while keeping other annotations.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n'
|
|
13025
13069
|
).or(import_zod3.z.null()).optional().describe(
|
|
13026
|
-
|
|
13070
|
+
'Universal-3 Pro generates rich transcripts that can include inline annotations such as audio event markers and speaker cues. Set to `"all"` to remove all inline annotations, or `"speaker"` to remove only speaker cues while keeping other annotations.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n'
|
|
13027
13071
|
),
|
|
13028
13072
|
temperature: import_zod3.z.number().nullish().describe(
|
|
13029
13073
|
"The temperature that was used for the model's response. See the [Prompting Guide](https://www.assemblyai.com/docs/pre-recorded-audio/prompting) for more details.\n\nNote: This parameter can only be used with the Universal-3 Pro model.\n"
|
|
13030
13074
|
),
|
|
13031
13075
|
text: import_zod3.z.string().nullish().describe("The textual transcript of your media file"),
|
|
13076
|
+
unredacted_text: import_zod3.z.string().nullish().describe(
|
|
13077
|
+
"The original textual transcript of your media file before PII redaction was applied. Only returned when `redact_pii_return_unredacted` was set to `true` on the transcription request, otherwise this field is omitted and the `text` field remains fully redacted. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
|
|
13078
|
+
),
|
|
13032
13079
|
throttled: import_zod3.z.boolean().nullish().describe(
|
|
13033
13080
|
"True while a request is throttled and false when a request is no longer throttled"
|
|
13034
13081
|
),
|
|
@@ -13065,6 +13112,39 @@ var createTranscriptResponse = import_zod3.z.object({
|
|
|
13065
13112
|
).nullish().describe(
|
|
13066
13113
|
"When multichannel or speaker_labels is enabled, a list of turn-by-turn utterance objects.\nSee [Speaker diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) and [Multichannel transcription](https://www.assemblyai.com/docs/pre-recorded-audio/transcribe-multiple-audio-channels) for more information.\n"
|
|
13067
13114
|
),
|
|
13115
|
+
unredacted_utterances: import_zod3.z.array(
|
|
13116
|
+
import_zod3.z.object({
|
|
13117
|
+
confidence: import_zod3.z.number().describe("The confidence score for the transcript of this utterance"),
|
|
13118
|
+
start: import_zod3.z.number().describe("The starting time, in milliseconds, of the utterance in the audio file"),
|
|
13119
|
+
end: import_zod3.z.number().describe("The ending time, in milliseconds, of the utterance in the audio file"),
|
|
13120
|
+
text: import_zod3.z.string().describe("The text for this utterance"),
|
|
13121
|
+
words: import_zod3.z.array(
|
|
13122
|
+
import_zod3.z.object({
|
|
13123
|
+
confidence: import_zod3.z.number().describe("The confidence score for the transcript of this word"),
|
|
13124
|
+
start: import_zod3.z.number().describe("The starting time, in milliseconds, for the word"),
|
|
13125
|
+
end: import_zod3.z.number().describe("The ending time, in milliseconds, for the word"),
|
|
13126
|
+
text: import_zod3.z.string().describe("The text of the word"),
|
|
13127
|
+
channel: import_zod3.z.string().nullish().describe(
|
|
13128
|
+
"The channel of the word. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
|
|
13129
|
+
),
|
|
13130
|
+
speaker: import_zod3.z.string().nullable().describe(
|
|
13131
|
+
"The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) is enabled, else null"
|
|
13132
|
+
)
|
|
13133
|
+
})
|
|
13134
|
+
).describe("The words in the utterance."),
|
|
13135
|
+
channel: import_zod3.z.string().nullish().describe(
|
|
13136
|
+
"The channel of this utterance. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
|
|
13137
|
+
),
|
|
13138
|
+
speaker: import_zod3.z.string().describe(
|
|
13139
|
+
'The speaker of this utterance, where each speaker is assigned a sequential capital letter - e.g. "A" for Speaker A, "B" for Speaker B, etc.'
|
|
13140
|
+
),
|
|
13141
|
+
translated_texts: import_zod3.z.record(import_zod3.z.string(), import_zod3.z.string()).optional().describe(
|
|
13142
|
+
'Translations keyed by language code (e.g., `{"es": "Texto traducido", "de": "\xDCbersetzter Text"}`). Only present when `match_original_utterance` is enabled with translation.'
|
|
13143
|
+
)
|
|
13144
|
+
})
|
|
13145
|
+
).nullish().describe(
|
|
13146
|
+
"The original turn-by-turn utterance objects before PII redaction was applied. Same shape as `utterances`. Only returned when `redact_pii_return_unredacted` was set to `true` on the transcription request, otherwise this field is omitted and the `utterances` field remains fully redacted. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
|
|
13147
|
+
),
|
|
13068
13148
|
webhook_auth: import_zod3.z.boolean().describe(
|
|
13069
13149
|
"Whether [webhook](https://www.assemblyai.com/docs/deployment/webhooks-for-pre-recorded-audio) authentication details were provided"
|
|
13070
13150
|
),
|
|
@@ -13093,6 +13173,22 @@ var createTranscriptResponse = import_zod3.z.object({
|
|
|
13093
13173
|
).nullish().describe(
|
|
13094
13174
|
"An array of temporally-sequential word objects, one for each word in the transcript.\n"
|
|
13095
13175
|
),
|
|
13176
|
+
unredacted_words: import_zod3.z.array(
|
|
13177
|
+
import_zod3.z.object({
|
|
13178
|
+
confidence: import_zod3.z.number().describe("The confidence score for the transcript of this word"),
|
|
13179
|
+
start: import_zod3.z.number().describe("The starting time, in milliseconds, for the word"),
|
|
13180
|
+
end: import_zod3.z.number().describe("The ending time, in milliseconds, for the word"),
|
|
13181
|
+
text: import_zod3.z.string().describe("The text of the word"),
|
|
13182
|
+
channel: import_zod3.z.string().nullish().describe(
|
|
13183
|
+
"The channel of the word. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
|
|
13184
|
+
),
|
|
13185
|
+
speaker: import_zod3.z.string().nullable().describe(
|
|
13186
|
+
"The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) is enabled, else null"
|
|
13187
|
+
)
|
|
13188
|
+
})
|
|
13189
|
+
).nullish().describe(
|
|
13190
|
+
"The original temporally-sequential word objects before PII redaction was applied. Same shape as `words`. Only returned when `redact_pii_return_unredacted` was set to `true` on the transcription request, otherwise this field is omitted and the `words` field remains fully redacted. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
|
|
13191
|
+
),
|
|
13096
13192
|
acoustic_model: import_zod3.z.string().describe("This parameter does not currently have any functionality attached to it."),
|
|
13097
13193
|
custom_topics: import_zod3.z.boolean().nullish().describe("This parameter does not currently have any functionality attached to it."),
|
|
13098
13194
|
language_model: import_zod3.z.string().describe("This parameter does not currently have any functionality attached to it."),
|
|
@@ -13268,7 +13364,7 @@ var getTranscriptResponse = import_zod3.z.object({
|
|
|
13268
13364
|
"Customize how words are spelled and formatted using to and from values. See [Custom Spelling](https://www.assemblyai.com/docs/pre-recorded-audio/correct-spelling-of-terms) for more details."
|
|
13269
13365
|
),
|
|
13270
13366
|
disfluencies: import_zod3.z.boolean().nullish().describe(
|
|
13271
|
-
'Transcribe [Filler Words](https://www.assemblyai.com/docs/pre-recorded-audio/include-filler-words), like "umm", in your media file; can be true or false'
|
|
13367
|
+
'Transcribe [Filler Words](https://www.assemblyai.com/docs/pre-recorded-audio/include-filler-words), like "umm", in your media file; can be true or false. Supported on Universal-3 Pro and Universal-2.'
|
|
13272
13368
|
),
|
|
13273
13369
|
domain: import_zod3.z.string().nullish().describe(
|
|
13274
13370
|
'The domain-specific model applied to the transcript. When set to `"medical-v1"`, [Medical Mode](https://www.assemblyai.com/docs/pre-recorded-audio/medical-mode) was used to improve accuracy for medical terminology.\n'
|
|
@@ -13291,12 +13387,20 @@ var getTranscriptResponse = import_zod3.z.object({
|
|
|
13291
13387
|
"email_address",
|
|
13292
13388
|
"event",
|
|
13293
13389
|
"filename",
|
|
13390
|
+
"gender",
|
|
13294
13391
|
"gender_sexuality",
|
|
13295
13392
|
"healthcare_number",
|
|
13296
13393
|
"injury",
|
|
13297
13394
|
"ip_address",
|
|
13298
13395
|
"language",
|
|
13299
13396
|
"location",
|
|
13397
|
+
"location_address",
|
|
13398
|
+
"location_address_street",
|
|
13399
|
+
"location_city",
|
|
13400
|
+
"location_coordinate",
|
|
13401
|
+
"location_country",
|
|
13402
|
+
"location_state",
|
|
13403
|
+
"location_zip",
|
|
13300
13404
|
"marital_status",
|
|
13301
13405
|
"medical_condition",
|
|
13302
13406
|
"medical_process",
|
|
@@ -13305,6 +13409,7 @@ var getTranscriptResponse = import_zod3.z.object({
|
|
|
13305
13409
|
"number_sequence",
|
|
13306
13410
|
"occupation",
|
|
13307
13411
|
"organization",
|
|
13412
|
+
"organization_medical_facility",
|
|
13308
13413
|
"passport_number",
|
|
13309
13414
|
"password",
|
|
13310
13415
|
"person_age",
|
|
@@ -13313,6 +13418,7 @@ var getTranscriptResponse = import_zod3.z.object({
|
|
|
13313
13418
|
"physical_attribute",
|
|
13314
13419
|
"political_affiliation",
|
|
13315
13420
|
"religion",
|
|
13421
|
+
"sexuality",
|
|
13316
13422
|
"statistics",
|
|
13317
13423
|
"time",
|
|
13318
13424
|
"url",
|
|
@@ -13617,6 +13723,24 @@ var getTranscriptResponse = import_zod3.z.object({
|
|
|
13617
13723
|
}).optional().describe(
|
|
13618
13724
|
"Specify options for [Automatic Language Detection](https://www.assemblyai.com/docs/pre-recorded-audio/language-detection)."
|
|
13619
13725
|
),
|
|
13726
|
+
metadata: import_zod3.z.object({
|
|
13727
|
+
domain_used: import_zod3.z.string().nullish().describe(
|
|
13728
|
+
'The domain-specific model that was applied to the transcription (for example, `"medical-v1"` when [Medical Mode](https://www.assemblyai.com/docs/pre-recorded-audio/medical-mode) was applied), or `null` if no domain-specific model was used. Always present when `metadata` is present.\n'
|
|
13729
|
+
),
|
|
13730
|
+
warnings: import_zod3.z.array(
|
|
13731
|
+
import_zod3.z.object({
|
|
13732
|
+
message: import_zod3.z.string().describe("A human-readable description of the warning.")
|
|
13733
|
+
}).describe(
|
|
13734
|
+
"A warning message emitted while processing a transcription request. Warnings are surfaced on the transcript response under `metadata.warnings`.\n"
|
|
13735
|
+
)
|
|
13736
|
+
).optional().describe(
|
|
13737
|
+
"Warning messages emitted while processing the request. Each warning is an object with a human-readable `message`. When there are no warnings to report, this field is omitted from the `metadata` object entirely.\n"
|
|
13738
|
+
)
|
|
13739
|
+
}).describe(
|
|
13740
|
+
"Additional metadata about the transcription returned on the `Transcript` object under `metadata`. Only present when there is information to report \u2014 when all of its fields would be empty, the `metadata` object is omitted from the response entirely.\n"
|
|
13741
|
+
).or(import_zod3.z.null()).optional().describe(
|
|
13742
|
+
"Additional metadata about the transcription, including any warnings emitted while processing the request. Only present when there is information to report; if no fields would be populated, `metadata` is omitted from the response entirely.\n"
|
|
13743
|
+
),
|
|
13620
13744
|
multichannel: import_zod3.z.boolean().nullish().describe(
|
|
13621
13745
|
"Whether [Multichannel transcription](https://www.assemblyai.com/docs/pre-recorded-audio/transcribe-multiple-audio-channels) was enabled in the transcription request, either true or false"
|
|
13622
13746
|
),
|
|
@@ -13664,12 +13788,20 @@ var getTranscriptResponse = import_zod3.z.object({
|
|
|
13664
13788
|
"email_address",
|
|
13665
13789
|
"event",
|
|
13666
13790
|
"filename",
|
|
13791
|
+
"gender",
|
|
13667
13792
|
"gender_sexuality",
|
|
13668
13793
|
"healthcare_number",
|
|
13669
13794
|
"injury",
|
|
13670
13795
|
"ip_address",
|
|
13671
13796
|
"language",
|
|
13672
13797
|
"location",
|
|
13798
|
+
"location_address",
|
|
13799
|
+
"location_address_street",
|
|
13800
|
+
"location_city",
|
|
13801
|
+
"location_coordinate",
|
|
13802
|
+
"location_country",
|
|
13803
|
+
"location_state",
|
|
13804
|
+
"location_zip",
|
|
13673
13805
|
"marital_status",
|
|
13674
13806
|
"medical_condition",
|
|
13675
13807
|
"medical_process",
|
|
@@ -13678,6 +13810,7 @@ var getTranscriptResponse = import_zod3.z.object({
|
|
|
13678
13810
|
"number_sequence",
|
|
13679
13811
|
"occupation",
|
|
13680
13812
|
"organization",
|
|
13813
|
+
"organization_medical_facility",
|
|
13681
13814
|
"passport_number",
|
|
13682
13815
|
"password",
|
|
13683
13816
|
"person_age",
|
|
@@ -13686,6 +13819,7 @@ var getTranscriptResponse = import_zod3.z.object({
|
|
|
13686
13819
|
"physical_attribute",
|
|
13687
13820
|
"political_affiliation",
|
|
13688
13821
|
"religion",
|
|
13822
|
+
"sexuality",
|
|
13689
13823
|
"statistics",
|
|
13690
13824
|
"time",
|
|
13691
13825
|
"url",
|
|
@@ -13693,12 +13827,17 @@ var getTranscriptResponse = import_zod3.z.object({
|
|
|
13693
13827
|
"username",
|
|
13694
13828
|
"vehicle_id",
|
|
13695
13829
|
"zodiac_sign"
|
|
13696
|
-
]).describe(
|
|
13830
|
+
]).describe(
|
|
13831
|
+
"The type of PII to redact. See [PII redaction](https://www.assemblyai.com/docs/streaming/pii-redaction) for the full list of policies and their descriptions."
|
|
13832
|
+
)
|
|
13697
13833
|
).nullish().describe(
|
|
13698
13834
|
"The list of PII Redaction policies that were enabled, if PII Redaction is enabled.\nSee [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
|
|
13699
13835
|
),
|
|
13700
13836
|
redact_pii_sub: import_zod3.z.enum(["entity_name", "hash"]).optional().describe(
|
|
13701
|
-
"The replacement logic for detected PII, can be `entity_name` or `hash`. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more details."
|
|
13837
|
+
"The replacement logic for detected PII, can be `entity_name` or `hash`. See [PII redaction](https://www.assemblyai.com/docs/streaming/pii-redaction) for more details."
|
|
13838
|
+
),
|
|
13839
|
+
redact_pii_return_unredacted: import_zod3.z.boolean().nullish().describe(
|
|
13840
|
+
"Whether the original unredacted transcript was also returned alongside the redacted one. When `true`, the response includes `unredacted_text`, `unredacted_words`, and `unredacted_utterances`. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
|
|
13702
13841
|
),
|
|
13703
13842
|
sentiment_analysis: import_zod3.z.boolean().nullish().describe(
|
|
13704
13843
|
"Whether [Sentiment Analysis](https://www.assemblyai.com/docs/speech-understanding/analyze-sentiment-of-speech) is enabled, can be true or false"
|
|
@@ -13835,20 +13974,23 @@ var getTranscriptResponse = import_zod3.z.object({
|
|
|
13835
13974
|
"The generated summary of the media file, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details."
|
|
13836
13975
|
),
|
|
13837
13976
|
summary_model: import_zod3.z.string().nullish().describe(
|
|
13838
|
-
"The Summarization model used to generate the summary,\nif [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts
|
|
13977
|
+
"The Summarization model used to generate the summary,\nif [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details.\n"
|
|
13839
13978
|
),
|
|
13840
13979
|
summary_type: import_zod3.z.string().nullish().describe(
|
|
13841
|
-
"The type of summary generated, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts
|
|
13980
|
+
"The type of summary generated, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details."
|
|
13842
13981
|
),
|
|
13843
|
-
remove_audio_tags: import_zod3.z.enum(["all"]).describe(
|
|
13844
|
-
|
|
13982
|
+
remove_audio_tags: import_zod3.z.enum(["all", "speaker"]).describe(
|
|
13983
|
+
'Universal-3 Pro generates rich transcripts that can include inline annotations such as audio event markers and speaker cues. Set to `"all"` to remove all inline annotations, or `"speaker"` to remove only speaker cues while keeping other annotations.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n'
|
|
13845
13984
|
).or(import_zod3.z.null()).optional().describe(
|
|
13846
|
-
|
|
13985
|
+
'Universal-3 Pro generates rich transcripts that can include inline annotations such as audio event markers and speaker cues. Set to `"all"` to remove all inline annotations, or `"speaker"` to remove only speaker cues while keeping other annotations.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n'
|
|
13847
13986
|
),
|
|
13848
13987
|
temperature: import_zod3.z.number().nullish().describe(
|
|
13849
13988
|
"The temperature that was used for the model's response. See the [Prompting Guide](https://www.assemblyai.com/docs/pre-recorded-audio/prompting) for more details.\n\nNote: This parameter can only be used with the Universal-3 Pro model.\n"
|
|
13850
13989
|
),
|
|
13851
13990
|
text: import_zod3.z.string().nullish().describe("The textual transcript of your media file"),
|
|
13991
|
+
unredacted_text: import_zod3.z.string().nullish().describe(
|
|
13992
|
+
"The original textual transcript of your media file before PII redaction was applied. Only returned when `redact_pii_return_unredacted` was set to `true` on the transcription request, otherwise this field is omitted and the `text` field remains fully redacted. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
|
|
13993
|
+
),
|
|
13852
13994
|
throttled: import_zod3.z.boolean().nullish().describe(
|
|
13853
13995
|
"True while a request is throttled and false when a request is no longer throttled"
|
|
13854
13996
|
),
|
|
@@ -13885,6 +14027,39 @@ var getTranscriptResponse = import_zod3.z.object({
|
|
|
13885
14027
|
).nullish().describe(
|
|
13886
14028
|
"When multichannel or speaker_labels is enabled, a list of turn-by-turn utterance objects.\nSee [Speaker diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) and [Multichannel transcription](https://www.assemblyai.com/docs/pre-recorded-audio/transcribe-multiple-audio-channels) for more information.\n"
|
|
13887
14029
|
),
|
|
14030
|
+
unredacted_utterances: import_zod3.z.array(
|
|
14031
|
+
import_zod3.z.object({
|
|
14032
|
+
confidence: import_zod3.z.number().describe("The confidence score for the transcript of this utterance"),
|
|
14033
|
+
start: import_zod3.z.number().describe("The starting time, in milliseconds, of the utterance in the audio file"),
|
|
14034
|
+
end: import_zod3.z.number().describe("The ending time, in milliseconds, of the utterance in the audio file"),
|
|
14035
|
+
text: import_zod3.z.string().describe("The text for this utterance"),
|
|
14036
|
+
words: import_zod3.z.array(
|
|
14037
|
+
import_zod3.z.object({
|
|
14038
|
+
confidence: import_zod3.z.number().describe("The confidence score for the transcript of this word"),
|
|
14039
|
+
start: import_zod3.z.number().describe("The starting time, in milliseconds, for the word"),
|
|
14040
|
+
end: import_zod3.z.number().describe("The ending time, in milliseconds, for the word"),
|
|
14041
|
+
text: import_zod3.z.string().describe("The text of the word"),
|
|
14042
|
+
channel: import_zod3.z.string().nullish().describe(
|
|
14043
|
+
"The channel of the word. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
|
|
14044
|
+
),
|
|
14045
|
+
speaker: import_zod3.z.string().nullable().describe(
|
|
14046
|
+
"The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) is enabled, else null"
|
|
14047
|
+
)
|
|
14048
|
+
})
|
|
14049
|
+
).describe("The words in the utterance."),
|
|
14050
|
+
channel: import_zod3.z.string().nullish().describe(
|
|
14051
|
+
"The channel of this utterance. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
|
|
14052
|
+
),
|
|
14053
|
+
speaker: import_zod3.z.string().describe(
|
|
14054
|
+
'The speaker of this utterance, where each speaker is assigned a sequential capital letter - e.g. "A" for Speaker A, "B" for Speaker B, etc.'
|
|
14055
|
+
),
|
|
14056
|
+
translated_texts: import_zod3.z.record(import_zod3.z.string(), import_zod3.z.string()).optional().describe(
|
|
14057
|
+
'Translations keyed by language code (e.g., `{"es": "Texto traducido", "de": "\xDCbersetzter Text"}`). Only present when `match_original_utterance` is enabled with translation.'
|
|
14058
|
+
)
|
|
14059
|
+
})
|
|
14060
|
+
).nullish().describe(
|
|
14061
|
+
"The original turn-by-turn utterance objects before PII redaction was applied. Same shape as `utterances`. Only returned when `redact_pii_return_unredacted` was set to `true` on the transcription request, otherwise this field is omitted and the `utterances` field remains fully redacted. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
|
|
14062
|
+
),
|
|
13888
14063
|
webhook_auth: import_zod3.z.boolean().describe(
|
|
13889
14064
|
"Whether [webhook](https://www.assemblyai.com/docs/deployment/webhooks-for-pre-recorded-audio) authentication details were provided"
|
|
13890
14065
|
),
|
|
@@ -13913,6 +14088,22 @@ var getTranscriptResponse = import_zod3.z.object({
|
|
|
13913
14088
|
).nullish().describe(
|
|
13914
14089
|
"An array of temporally-sequential word objects, one for each word in the transcript.\n"
|
|
13915
14090
|
),
|
|
14091
|
+
unredacted_words: import_zod3.z.array(
|
|
14092
|
+
import_zod3.z.object({
|
|
14093
|
+
confidence: import_zod3.z.number().describe("The confidence score for the transcript of this word"),
|
|
14094
|
+
start: import_zod3.z.number().describe("The starting time, in milliseconds, for the word"),
|
|
14095
|
+
end: import_zod3.z.number().describe("The ending time, in milliseconds, for the word"),
|
|
14096
|
+
text: import_zod3.z.string().describe("The text of the word"),
|
|
14097
|
+
channel: import_zod3.z.string().nullish().describe(
|
|
14098
|
+
"The channel of the word. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
|
|
14099
|
+
),
|
|
14100
|
+
speaker: import_zod3.z.string().nullable().describe(
|
|
14101
|
+
"The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) is enabled, else null"
|
|
14102
|
+
)
|
|
14103
|
+
})
|
|
14104
|
+
).nullish().describe(
|
|
14105
|
+
"The original temporally-sequential word objects before PII redaction was applied. Same shape as `words`. Only returned when `redact_pii_return_unredacted` was set to `true` on the transcription request, otherwise this field is omitted and the `words` field remains fully redacted. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
|
|
14106
|
+
),
|
|
13916
14107
|
acoustic_model: import_zod3.z.string().describe("This parameter does not currently have any functionality attached to it."),
|
|
13917
14108
|
custom_topics: import_zod3.z.boolean().nullish().describe("This parameter does not currently have any functionality attached to it."),
|
|
13918
14109
|
language_model: import_zod3.z.string().describe("This parameter does not currently have any functionality attached to it."),
|
|
@@ -14048,7 +14239,7 @@ var deleteTranscriptResponse = import_zod3.z.object({
|
|
|
14048
14239
|
"Customize how words are spelled and formatted using to and from values. See [Custom Spelling](https://www.assemblyai.com/docs/pre-recorded-audio/correct-spelling-of-terms) for more details."
|
|
14049
14240
|
),
|
|
14050
14241
|
disfluencies: import_zod3.z.boolean().nullish().describe(
|
|
14051
|
-
'Transcribe [Filler Words](https://www.assemblyai.com/docs/pre-recorded-audio/include-filler-words), like "umm", in your media file; can be true or false'
|
|
14242
|
+
'Transcribe [Filler Words](https://www.assemblyai.com/docs/pre-recorded-audio/include-filler-words), like "umm", in your media file; can be true or false. Supported on Universal-3 Pro and Universal-2.'
|
|
14052
14243
|
),
|
|
14053
14244
|
domain: import_zod3.z.string().nullish().describe(
|
|
14054
14245
|
'The domain-specific model applied to the transcript. When set to `"medical-v1"`, [Medical Mode](https://www.assemblyai.com/docs/pre-recorded-audio/medical-mode) was used to improve accuracy for medical terminology.\n'
|
|
@@ -14071,12 +14262,20 @@ var deleteTranscriptResponse = import_zod3.z.object({
|
|
|
14071
14262
|
"email_address",
|
|
14072
14263
|
"event",
|
|
14073
14264
|
"filename",
|
|
14265
|
+
"gender",
|
|
14074
14266
|
"gender_sexuality",
|
|
14075
14267
|
"healthcare_number",
|
|
14076
14268
|
"injury",
|
|
14077
14269
|
"ip_address",
|
|
14078
14270
|
"language",
|
|
14079
14271
|
"location",
|
|
14272
|
+
"location_address",
|
|
14273
|
+
"location_address_street",
|
|
14274
|
+
"location_city",
|
|
14275
|
+
"location_coordinate",
|
|
14276
|
+
"location_country",
|
|
14277
|
+
"location_state",
|
|
14278
|
+
"location_zip",
|
|
14080
14279
|
"marital_status",
|
|
14081
14280
|
"medical_condition",
|
|
14082
14281
|
"medical_process",
|
|
@@ -14085,6 +14284,7 @@ var deleteTranscriptResponse = import_zod3.z.object({
|
|
|
14085
14284
|
"number_sequence",
|
|
14086
14285
|
"occupation",
|
|
14087
14286
|
"organization",
|
|
14287
|
+
"organization_medical_facility",
|
|
14088
14288
|
"passport_number",
|
|
14089
14289
|
"password",
|
|
14090
14290
|
"person_age",
|
|
@@ -14093,6 +14293,7 @@ var deleteTranscriptResponse = import_zod3.z.object({
|
|
|
14093
14293
|
"physical_attribute",
|
|
14094
14294
|
"political_affiliation",
|
|
14095
14295
|
"religion",
|
|
14296
|
+
"sexuality",
|
|
14096
14297
|
"statistics",
|
|
14097
14298
|
"time",
|
|
14098
14299
|
"url",
|
|
@@ -14397,6 +14598,24 @@ var deleteTranscriptResponse = import_zod3.z.object({
|
|
|
14397
14598
|
}).optional().describe(
|
|
14398
14599
|
"Specify options for [Automatic Language Detection](https://www.assemblyai.com/docs/pre-recorded-audio/language-detection)."
|
|
14399
14600
|
),
|
|
14601
|
+
metadata: import_zod3.z.object({
|
|
14602
|
+
domain_used: import_zod3.z.string().nullish().describe(
|
|
14603
|
+
'The domain-specific model that was applied to the transcription (for example, `"medical-v1"` when [Medical Mode](https://www.assemblyai.com/docs/pre-recorded-audio/medical-mode) was applied), or `null` if no domain-specific model was used. Always present when `metadata` is present.\n'
|
|
14604
|
+
),
|
|
14605
|
+
warnings: import_zod3.z.array(
|
|
14606
|
+
import_zod3.z.object({
|
|
14607
|
+
message: import_zod3.z.string().describe("A human-readable description of the warning.")
|
|
14608
|
+
}).describe(
|
|
14609
|
+
"A warning message emitted while processing a transcription request. Warnings are surfaced on the transcript response under `metadata.warnings`.\n"
|
|
14610
|
+
)
|
|
14611
|
+
).optional().describe(
|
|
14612
|
+
"Warning messages emitted while processing the request. Each warning is an object with a human-readable `message`. When there are no warnings to report, this field is omitted from the `metadata` object entirely.\n"
|
|
14613
|
+
)
|
|
14614
|
+
}).describe(
|
|
14615
|
+
"Additional metadata about the transcription returned on the `Transcript` object under `metadata`. Only present when there is information to report \u2014 when all of its fields would be empty, the `metadata` object is omitted from the response entirely.\n"
|
|
14616
|
+
).or(import_zod3.z.null()).optional().describe(
|
|
14617
|
+
"Additional metadata about the transcription, including any warnings emitted while processing the request. Only present when there is information to report; if no fields would be populated, `metadata` is omitted from the response entirely.\n"
|
|
14618
|
+
),
|
|
14400
14619
|
multichannel: import_zod3.z.boolean().nullish().describe(
|
|
14401
14620
|
"Whether [Multichannel transcription](https://www.assemblyai.com/docs/pre-recorded-audio/transcribe-multiple-audio-channels) was enabled in the transcription request, either true or false"
|
|
14402
14621
|
),
|
|
@@ -14444,12 +14663,20 @@ var deleteTranscriptResponse = import_zod3.z.object({
|
|
|
14444
14663
|
"email_address",
|
|
14445
14664
|
"event",
|
|
14446
14665
|
"filename",
|
|
14666
|
+
"gender",
|
|
14447
14667
|
"gender_sexuality",
|
|
14448
14668
|
"healthcare_number",
|
|
14449
14669
|
"injury",
|
|
14450
14670
|
"ip_address",
|
|
14451
14671
|
"language",
|
|
14452
14672
|
"location",
|
|
14673
|
+
"location_address",
|
|
14674
|
+
"location_address_street",
|
|
14675
|
+
"location_city",
|
|
14676
|
+
"location_coordinate",
|
|
14677
|
+
"location_country",
|
|
14678
|
+
"location_state",
|
|
14679
|
+
"location_zip",
|
|
14453
14680
|
"marital_status",
|
|
14454
14681
|
"medical_condition",
|
|
14455
14682
|
"medical_process",
|
|
@@ -14458,6 +14685,7 @@ var deleteTranscriptResponse = import_zod3.z.object({
|
|
|
14458
14685
|
"number_sequence",
|
|
14459
14686
|
"occupation",
|
|
14460
14687
|
"organization",
|
|
14688
|
+
"organization_medical_facility",
|
|
14461
14689
|
"passport_number",
|
|
14462
14690
|
"password",
|
|
14463
14691
|
"person_age",
|
|
@@ -14466,6 +14694,7 @@ var deleteTranscriptResponse = import_zod3.z.object({
|
|
|
14466
14694
|
"physical_attribute",
|
|
14467
14695
|
"political_affiliation",
|
|
14468
14696
|
"religion",
|
|
14697
|
+
"sexuality",
|
|
14469
14698
|
"statistics",
|
|
14470
14699
|
"time",
|
|
14471
14700
|
"url",
|
|
@@ -14473,12 +14702,17 @@ var deleteTranscriptResponse = import_zod3.z.object({
|
|
|
14473
14702
|
"username",
|
|
14474
14703
|
"vehicle_id",
|
|
14475
14704
|
"zodiac_sign"
|
|
14476
|
-
]).describe(
|
|
14705
|
+
]).describe(
|
|
14706
|
+
"The type of PII to redact. See [PII redaction](https://www.assemblyai.com/docs/streaming/pii-redaction) for the full list of policies and their descriptions."
|
|
14707
|
+
)
|
|
14477
14708
|
).nullish().describe(
|
|
14478
14709
|
"The list of PII Redaction policies that were enabled, if PII Redaction is enabled.\nSee [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
|
|
14479
14710
|
),
|
|
14480
14711
|
redact_pii_sub: import_zod3.z.enum(["entity_name", "hash"]).optional().describe(
|
|
14481
|
-
"The replacement logic for detected PII, can be `entity_name` or `hash`. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more details."
|
|
14712
|
+
"The replacement logic for detected PII, can be `entity_name` or `hash`. See [PII redaction](https://www.assemblyai.com/docs/streaming/pii-redaction) for more details."
|
|
14713
|
+
),
|
|
14714
|
+
redact_pii_return_unredacted: import_zod3.z.boolean().nullish().describe(
|
|
14715
|
+
"Whether the original unredacted transcript was also returned alongside the redacted one. When `true`, the response includes `unredacted_text`, `unredacted_words`, and `unredacted_utterances`. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
|
|
14482
14716
|
),
|
|
14483
14717
|
sentiment_analysis: import_zod3.z.boolean().nullish().describe(
|
|
14484
14718
|
"Whether [Sentiment Analysis](https://www.assemblyai.com/docs/speech-understanding/analyze-sentiment-of-speech) is enabled, can be true or false"
|
|
@@ -14615,20 +14849,23 @@ var deleteTranscriptResponse = import_zod3.z.object({
|
|
|
14615
14849
|
"The generated summary of the media file, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details."
|
|
14616
14850
|
),
|
|
14617
14851
|
summary_model: import_zod3.z.string().nullish().describe(
|
|
14618
|
-
"The Summarization model used to generate the summary,\nif [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts
|
|
14852
|
+
"The Summarization model used to generate the summary,\nif [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details.\n"
|
|
14619
14853
|
),
|
|
14620
14854
|
summary_type: import_zod3.z.string().nullish().describe(
|
|
14621
|
-
"The type of summary generated, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts
|
|
14855
|
+
"The type of summary generated, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details."
|
|
14622
14856
|
),
|
|
14623
|
-
remove_audio_tags: import_zod3.z.enum(["all"]).describe(
|
|
14624
|
-
|
|
14857
|
+
remove_audio_tags: import_zod3.z.enum(["all", "speaker"]).describe(
|
|
14858
|
+
'Universal-3 Pro generates rich transcripts that can include inline annotations such as audio event markers and speaker cues. Set to `"all"` to remove all inline annotations, or `"speaker"` to remove only speaker cues while keeping other annotations.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n'
|
|
14625
14859
|
).or(import_zod3.z.null()).optional().describe(
|
|
14626
|
-
|
|
14860
|
+
'Universal-3 Pro generates rich transcripts that can include inline annotations such as audio event markers and speaker cues. Set to `"all"` to remove all inline annotations, or `"speaker"` to remove only speaker cues while keeping other annotations.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n'
|
|
14627
14861
|
),
|
|
14628
14862
|
temperature: import_zod3.z.number().nullish().describe(
|
|
14629
14863
|
"The temperature that was used for the model's response. See the [Prompting Guide](https://www.assemblyai.com/docs/pre-recorded-audio/prompting) for more details.\n\nNote: This parameter can only be used with the Universal-3 Pro model.\n"
|
|
14630
14864
|
),
|
|
14631
14865
|
text: import_zod3.z.string().nullish().describe("The textual transcript of your media file"),
|
|
14866
|
+
unredacted_text: import_zod3.z.string().nullish().describe(
|
|
14867
|
+
"The original textual transcript of your media file before PII redaction was applied. Only returned when `redact_pii_return_unredacted` was set to `true` on the transcription request, otherwise this field is omitted and the `text` field remains fully redacted. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
|
|
14868
|
+
),
|
|
14632
14869
|
throttled: import_zod3.z.boolean().nullish().describe(
|
|
14633
14870
|
"True while a request is throttled and false when a request is no longer throttled"
|
|
14634
14871
|
),
|
|
@@ -14665,6 +14902,39 @@ var deleteTranscriptResponse = import_zod3.z.object({
|
|
|
14665
14902
|
).nullish().describe(
|
|
14666
14903
|
"When multichannel or speaker_labels is enabled, a list of turn-by-turn utterance objects.\nSee [Speaker diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) and [Multichannel transcription](https://www.assemblyai.com/docs/pre-recorded-audio/transcribe-multiple-audio-channels) for more information.\n"
|
|
14667
14904
|
),
|
|
14905
|
+
unredacted_utterances: import_zod3.z.array(
|
|
14906
|
+
import_zod3.z.object({
|
|
14907
|
+
confidence: import_zod3.z.number().describe("The confidence score for the transcript of this utterance"),
|
|
14908
|
+
start: import_zod3.z.number().describe("The starting time, in milliseconds, of the utterance in the audio file"),
|
|
14909
|
+
end: import_zod3.z.number().describe("The ending time, in milliseconds, of the utterance in the audio file"),
|
|
14910
|
+
text: import_zod3.z.string().describe("The text for this utterance"),
|
|
14911
|
+
words: import_zod3.z.array(
|
|
14912
|
+
import_zod3.z.object({
|
|
14913
|
+
confidence: import_zod3.z.number().describe("The confidence score for the transcript of this word"),
|
|
14914
|
+
start: import_zod3.z.number().describe("The starting time, in milliseconds, for the word"),
|
|
14915
|
+
end: import_zod3.z.number().describe("The ending time, in milliseconds, for the word"),
|
|
14916
|
+
text: import_zod3.z.string().describe("The text of the word"),
|
|
14917
|
+
channel: import_zod3.z.string().nullish().describe(
|
|
14918
|
+
"The channel of the word. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
|
|
14919
|
+
),
|
|
14920
|
+
speaker: import_zod3.z.string().nullable().describe(
|
|
14921
|
+
"The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) is enabled, else null"
|
|
14922
|
+
)
|
|
14923
|
+
})
|
|
14924
|
+
).describe("The words in the utterance."),
|
|
14925
|
+
channel: import_zod3.z.string().nullish().describe(
|
|
14926
|
+
"The channel of this utterance. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
|
|
14927
|
+
),
|
|
14928
|
+
speaker: import_zod3.z.string().describe(
|
|
14929
|
+
'The speaker of this utterance, where each speaker is assigned a sequential capital letter - e.g. "A" for Speaker A, "B" for Speaker B, etc.'
|
|
14930
|
+
),
|
|
14931
|
+
translated_texts: import_zod3.z.record(import_zod3.z.string(), import_zod3.z.string()).optional().describe(
|
|
14932
|
+
'Translations keyed by language code (e.g., `{"es": "Texto traducido", "de": "\xDCbersetzter Text"}`). Only present when `match_original_utterance` is enabled with translation.'
|
|
14933
|
+
)
|
|
14934
|
+
})
|
|
14935
|
+
).nullish().describe(
|
|
14936
|
+
"The original turn-by-turn utterance objects before PII redaction was applied. Same shape as `utterances`. Only returned when `redact_pii_return_unredacted` was set to `true` on the transcription request, otherwise this field is omitted and the `utterances` field remains fully redacted. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
|
|
14937
|
+
),
|
|
14668
14938
|
webhook_auth: import_zod3.z.boolean().describe(
|
|
14669
14939
|
"Whether [webhook](https://www.assemblyai.com/docs/deployment/webhooks-for-pre-recorded-audio) authentication details were provided"
|
|
14670
14940
|
),
|
|
@@ -14693,6 +14963,22 @@ var deleteTranscriptResponse = import_zod3.z.object({
|
|
|
14693
14963
|
).nullish().describe(
|
|
14694
14964
|
"An array of temporally-sequential word objects, one for each word in the transcript.\n"
|
|
14695
14965
|
),
|
|
14966
|
+
unredacted_words: import_zod3.z.array(
|
|
14967
|
+
import_zod3.z.object({
|
|
14968
|
+
confidence: import_zod3.z.number().describe("The confidence score for the transcript of this word"),
|
|
14969
|
+
start: import_zod3.z.number().describe("The starting time, in milliseconds, for the word"),
|
|
14970
|
+
end: import_zod3.z.number().describe("The ending time, in milliseconds, for the word"),
|
|
14971
|
+
text: import_zod3.z.string().describe("The text of the word"),
|
|
14972
|
+
channel: import_zod3.z.string().nullish().describe(
|
|
14973
|
+
"The channel of the word. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
|
|
14974
|
+
),
|
|
14975
|
+
speaker: import_zod3.z.string().nullable().describe(
|
|
14976
|
+
"The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) is enabled, else null"
|
|
14977
|
+
)
|
|
14978
|
+
})
|
|
14979
|
+
).nullish().describe(
|
|
14980
|
+
"The original temporally-sequential word objects before PII redaction was applied. Same shape as `words`. Only returned when `redact_pii_return_unredacted` was set to `true` on the transcription request, otherwise this field is omitted and the `words` field remains fully redacted. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
|
|
14981
|
+
),
|
|
14696
14982
|
acoustic_model: import_zod3.z.string().describe("This parameter does not currently have any functionality attached to it."),
|
|
14697
14983
|
custom_topics: import_zod3.z.boolean().nullish().describe("This parameter does not currently have any functionality attached to it."),
|
|
14698
14984
|
language_model: import_zod3.z.string().describe("This parameter does not currently have any functionality attached to it."),
|
|
@@ -14848,7 +15134,21 @@ var streamingTranscriberParams = import_zod4.z.object({
|
|
|
14848
15134
|
inactivityTimeout: import_zod4.z.number().optional().describe("From SDK v3"),
|
|
14849
15135
|
speakerLabels: import_zod4.z.boolean().optional().describe("From SDK v3"),
|
|
14850
15136
|
maxSpeakers: import_zod4.z.number().optional().describe("From SDK v3"),
|
|
14851
|
-
|
|
15137
|
+
voiceFocus: import_zod4.z.unknown().optional().describe("From SDK v3"),
|
|
15138
|
+
voiceFocusThreshold: import_zod4.z.number().optional().describe("From SDK v3"),
|
|
15139
|
+
continuousPartials: import_zod4.z.boolean().optional().describe("From SDK v3"),
|
|
15140
|
+
interruptionDelay: import_zod4.z.number().optional().describe("From SDK v3"),
|
|
15141
|
+
turnLeftPadMs: import_zod4.z.number().optional().describe("From SDK v3"),
|
|
15142
|
+
customerSupportAudioCapture: import_zod4.z.boolean().optional().describe("From SDK v3"),
|
|
15143
|
+
includePartialTurns: import_zod4.z.boolean().optional().describe("From SDK v3"),
|
|
15144
|
+
redactPii: import_zod4.z.boolean().optional().describe("From SDK v3"),
|
|
15145
|
+
redactPiiPolicies: import_zod4.z.unknown().optional().describe("From SDK v3"),
|
|
15146
|
+
redactPiiSub: import_zod4.z.unknown().optional().describe("From SDK v3"),
|
|
15147
|
+
llmGateway: import_zod4.z.unknown().optional().describe("From SDK v3"),
|
|
15148
|
+
webhookUrl: import_zod4.z.string().optional().describe("From SDK v3"),
|
|
15149
|
+
webhookAuthHeaderName: import_zod4.z.string().optional().describe("From SDK v3"),
|
|
15150
|
+
webhookAuthHeaderValue: import_zod4.z.string().optional().describe("From SDK v3"),
|
|
15151
|
+
mode: import_zod4.z.unknown().describe("From SDK v3")
|
|
14852
15152
|
});
|
|
14853
15153
|
var streamingUpdateConfigParams = import_zod4.z.object({
|
|
14854
15154
|
end_utterance_silence_threshold: import_zod4.z.number().min(0).max(2e4).optional().describe("The duration threshold in milliseconds"),
|
|
@@ -14860,7 +15160,9 @@ var streamingUpdateConfigParams = import_zod4.z.object({
|
|
|
14860
15160
|
format_turns: import_zod4.z.boolean().optional().describe("From SDK v3"),
|
|
14861
15161
|
keyterms_prompt: import_zod4.z.array(import_zod4.z.string()).optional().describe("From SDK v3"),
|
|
14862
15162
|
prompt: import_zod4.z.string().optional().describe("From SDK v3"),
|
|
14863
|
-
filter_profanity: import_zod4.z.boolean().optional().describe("From SDK v3")
|
|
15163
|
+
filter_profanity: import_zod4.z.boolean().optional().describe("From SDK v3"),
|
|
15164
|
+
interruption_delay: import_zod4.z.number().optional().describe("From SDK v3"),
|
|
15165
|
+
turn_left_pad_ms: import_zod4.z.number().optional().describe("From SDK v3")
|
|
14864
15166
|
});
|
|
14865
15167
|
|
|
14866
15168
|
// src/generated/gladia/api/gladiaControlAPI.zod.ts
|
|
@@ -15609,7 +15911,7 @@ var preRecordedControllerInitPreRecordedJobV2BodyNamedEntityRecognitionDefault =
|
|
|
15609
15911
|
var preRecordedControllerInitPreRecordedJobV2BodyCustomSpellingDefault = false;
|
|
15610
15912
|
var preRecordedControllerInitPreRecordedJobV2BodySentimentAnalysisDefault = false;
|
|
15611
15913
|
var preRecordedControllerInitPreRecordedJobV2BodyAudioToLlmDefault = false;
|
|
15612
|
-
var preRecordedControllerInitPreRecordedJobV2BodyAudioToLlmConfigModelDefault = "openai/gpt-
|
|
15914
|
+
var preRecordedControllerInitPreRecordedJobV2BodyAudioToLlmConfigModelDefault = "openai/gpt-5.4-nano";
|
|
15613
15915
|
var preRecordedControllerInitPreRecordedJobV2BodyPiiRedactionDefault = false;
|
|
15614
15916
|
var preRecordedControllerInitPreRecordedJobV2BodySentencesDefault = false;
|
|
15615
15917
|
var preRecordedControllerInitPreRecordedJobV2BodyPunctuationEnhancedDefault = false;
|
|
@@ -15898,23 +16200,23 @@ var preRecordedControllerInitPreRecordedJobV2Body = import_zod5.z.object({
|
|
|
15898
16200
|
"Forces the translation to use informal language forms when available in the target language."
|
|
15899
16201
|
)
|
|
15900
16202
|
}).optional().describe("**[Beta]** Translation configuration, if `translation` is enabled"),
|
|
15901
|
-
summarization: import_zod5.z.boolean().optional().describe("
|
|
16203
|
+
summarization: import_zod5.z.boolean().optional().describe("Enable summarization for this audio"),
|
|
15902
16204
|
summarization_config: import_zod5.z.object({
|
|
15903
16205
|
type: import_zod5.z.enum(["general", "bullet_points", "concise"]).describe("The type of summarization to apply").default(preRecordedControllerInitPreRecordedJobV2BodySummarizationConfigTypeDefault).describe("The type of summarization to apply")
|
|
15904
|
-
}).optional().describe("
|
|
16206
|
+
}).optional().describe("Summarization configuration, if `summarization` is enabled"),
|
|
15905
16207
|
named_entity_recognition: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable named entity recognition for this audio"),
|
|
15906
16208
|
custom_spelling: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable custom spelling for this audio"),
|
|
15907
16209
|
custom_spelling_config: import_zod5.z.object({
|
|
15908
16210
|
spelling_dictionary: import_zod5.z.record(import_zod5.z.string(), import_zod5.z.array(import_zod5.z.string())).describe("The list of spelling applied on the audio transcription")
|
|
15909
16211
|
}).optional().describe("**[Alpha]** Custom spelling configuration, if `custom_spelling` is enabled"),
|
|
15910
16212
|
sentiment_analysis: import_zod5.z.boolean().optional().describe("Enable sentiment analysis for this audio"),
|
|
15911
|
-
audio_to_llm: import_zod5.z.boolean().optional().describe("
|
|
16213
|
+
audio_to_llm: import_zod5.z.boolean().optional().describe("Enable audio to LLM processing for this audio"),
|
|
15912
16214
|
audio_to_llm_config: import_zod5.z.object({
|
|
15913
16215
|
prompts: import_zod5.z.array(import_zod5.z.array(import_zod5.z.unknown())).min(1).describe("The list of prompts applied on the audio transcription"),
|
|
15914
16216
|
model: import_zod5.z.string().default(preRecordedControllerInitPreRecordedJobV2BodyAudioToLlmConfigModelDefault).describe(
|
|
15915
16217
|
"The model to use for the prompt execution. You can find the list of supported models [here](https://openrouter.ai/models)."
|
|
15916
16218
|
)
|
|
15917
|
-
}).optional().describe("
|
|
16219
|
+
}).optional().describe("Audio to LLM configuration, if `audio_to_llm` is enabled"),
|
|
15918
16220
|
pii_redaction: import_zod5.z.boolean().optional().describe("Enable PII redaction for this audio"),
|
|
15919
16221
|
pii_redaction_config: import_zod5.z.object({
|
|
15920
16222
|
entity_types: import_zod5.z.enum([
|
|
@@ -16169,7 +16471,7 @@ var preRecordedControllerGetPreRecordedJobsV2ResponseItemsItemRequestParamsNamed
|
|
|
16169
16471
|
var preRecordedControllerGetPreRecordedJobsV2ResponseItemsItemRequestParamsCustomSpellingDefault = false;
|
|
16170
16472
|
var preRecordedControllerGetPreRecordedJobsV2ResponseItemsItemRequestParamsSentimentAnalysisDefault = false;
|
|
16171
16473
|
var preRecordedControllerGetPreRecordedJobsV2ResponseItemsItemRequestParamsAudioToLlmDefault = false;
|
|
16172
|
-
var preRecordedControllerGetPreRecordedJobsV2ResponseItemsItemRequestParamsAudioToLlmConfigModelDefault = "openai/gpt-
|
|
16474
|
+
var preRecordedControllerGetPreRecordedJobsV2ResponseItemsItemRequestParamsAudioToLlmConfigModelDefault = "openai/gpt-5.4-nano";
|
|
16173
16475
|
var preRecordedControllerGetPreRecordedJobsV2ResponseItemsItemRequestParamsPiiRedactionDefault = false;
|
|
16174
16476
|
var preRecordedControllerGetPreRecordedJobsV2ResponseItemsItemRequestParamsSentencesDefault = false;
|
|
16175
16477
|
var preRecordedControllerGetPreRecordedJobsV2ResponseItemsItemRequestParamsPunctuationEnhancedDefault = false;
|
|
@@ -16517,12 +16819,12 @@ var preRecordedControllerGetPreRecordedJobsV2Response = import_zod5.z.object({
|
|
|
16517
16819
|
"Forces the translation to use informal language forms when available in the target language."
|
|
16518
16820
|
)
|
|
16519
16821
|
}).optional().describe("**[Beta]** Translation configuration, if `translation` is enabled"),
|
|
16520
|
-
summarization: import_zod5.z.boolean().optional().describe("
|
|
16822
|
+
summarization: import_zod5.z.boolean().optional().describe("Enable summarization for this audio"),
|
|
16521
16823
|
summarization_config: import_zod5.z.object({
|
|
16522
16824
|
type: import_zod5.z.enum(["general", "bullet_points", "concise"]).describe("The type of summarization to apply").default(
|
|
16523
16825
|
preRecordedControllerGetPreRecordedJobsV2ResponseItemsItemRequestParamsSummarizationConfigTypeDefault
|
|
16524
16826
|
).describe("The type of summarization to apply")
|
|
16525
|
-
}).optional().describe("
|
|
16827
|
+
}).optional().describe("Summarization configuration, if `summarization` is enabled"),
|
|
16526
16828
|
named_entity_recognition: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable named entity recognition for this audio"),
|
|
16527
16829
|
custom_spelling: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable custom spelling for this audio"),
|
|
16528
16830
|
custom_spelling_config: import_zod5.z.object({
|
|
@@ -16531,7 +16833,7 @@ var preRecordedControllerGetPreRecordedJobsV2Response = import_zod5.z.object({
|
|
|
16531
16833
|
"**[Alpha]** Custom spelling configuration, if `custom_spelling` is enabled"
|
|
16532
16834
|
),
|
|
16533
16835
|
sentiment_analysis: import_zod5.z.boolean().optional().describe("Enable sentiment analysis for this audio"),
|
|
16534
|
-
audio_to_llm: import_zod5.z.boolean().optional().describe("
|
|
16836
|
+
audio_to_llm: import_zod5.z.boolean().optional().describe("Enable audio to LLM processing for this audio"),
|
|
16535
16837
|
audio_to_llm_config: import_zod5.z.object({
|
|
16536
16838
|
prompts: import_zod5.z.array(import_zod5.z.array(import_zod5.z.unknown())).min(1).describe("The list of prompts applied on the audio transcription"),
|
|
16537
16839
|
model: import_zod5.z.string().default(
|
|
@@ -16539,7 +16841,7 @@ var preRecordedControllerGetPreRecordedJobsV2Response = import_zod5.z.object({
|
|
|
16539
16841
|
).describe(
|
|
16540
16842
|
"The model to use for the prompt execution. You can find the list of supported models [here](https://openrouter.ai/models)."
|
|
16541
16843
|
)
|
|
16542
|
-
}).optional().describe("
|
|
16844
|
+
}).optional().describe("Audio to LLM configuration, if `audio_to_llm` is enabled"),
|
|
16543
16845
|
pii_redaction: import_zod5.z.boolean().optional().describe("Enable PII redaction for this audio"),
|
|
16544
16846
|
pii_redaction_config: import_zod5.z.object({
|
|
16545
16847
|
entity_types: import_zod5.z.enum([
|
|
@@ -17676,7 +17978,7 @@ var preRecordedControllerGetPreRecordedJobV2ResponseRequestParamsNamedEntityReco
|
|
|
17676
17978
|
var preRecordedControllerGetPreRecordedJobV2ResponseRequestParamsCustomSpellingDefault = false;
|
|
17677
17979
|
var preRecordedControllerGetPreRecordedJobV2ResponseRequestParamsSentimentAnalysisDefault = false;
|
|
17678
17980
|
var preRecordedControllerGetPreRecordedJobV2ResponseRequestParamsAudioToLlmDefault = false;
|
|
17679
|
-
var preRecordedControllerGetPreRecordedJobV2ResponseRequestParamsAudioToLlmConfigModelDefault = "openai/gpt-
|
|
17981
|
+
var preRecordedControllerGetPreRecordedJobV2ResponseRequestParamsAudioToLlmConfigModelDefault = "openai/gpt-5.4-nano";
|
|
17680
17982
|
var preRecordedControllerGetPreRecordedJobV2ResponseRequestParamsPiiRedactionDefault = false;
|
|
17681
17983
|
var preRecordedControllerGetPreRecordedJobV2ResponseRequestParamsSentencesDefault = false;
|
|
17682
17984
|
var preRecordedControllerGetPreRecordedJobV2ResponseRequestParamsPunctuationEnhancedDefault = false;
|
|
@@ -18017,19 +18319,19 @@ var preRecordedControllerGetPreRecordedJobV2Response = import_zod5.z.object({
|
|
|
18017
18319
|
"Forces the translation to use informal language forms when available in the target language."
|
|
18018
18320
|
)
|
|
18019
18321
|
}).optional().describe("**[Beta]** Translation configuration, if `translation` is enabled"),
|
|
18020
|
-
summarization: import_zod5.z.boolean().optional().describe("
|
|
18322
|
+
summarization: import_zod5.z.boolean().optional().describe("Enable summarization for this audio"),
|
|
18021
18323
|
summarization_config: import_zod5.z.object({
|
|
18022
18324
|
type: import_zod5.z.enum(["general", "bullet_points", "concise"]).describe("The type of summarization to apply").default(
|
|
18023
18325
|
preRecordedControllerGetPreRecordedJobV2ResponseRequestParamsSummarizationConfigTypeDefault
|
|
18024
18326
|
).describe("The type of summarization to apply")
|
|
18025
|
-
}).optional().describe("
|
|
18327
|
+
}).optional().describe("Summarization configuration, if `summarization` is enabled"),
|
|
18026
18328
|
named_entity_recognition: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable named entity recognition for this audio"),
|
|
18027
18329
|
custom_spelling: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable custom spelling for this audio"),
|
|
18028
18330
|
custom_spelling_config: import_zod5.z.object({
|
|
18029
18331
|
spelling_dictionary: import_zod5.z.record(import_zod5.z.string(), import_zod5.z.array(import_zod5.z.string())).describe("The list of spelling applied on the audio transcription")
|
|
18030
18332
|
}).optional().describe("**[Alpha]** Custom spelling configuration, if `custom_spelling` is enabled"),
|
|
18031
18333
|
sentiment_analysis: import_zod5.z.boolean().optional().describe("Enable sentiment analysis for this audio"),
|
|
18032
|
-
audio_to_llm: import_zod5.z.boolean().optional().describe("
|
|
18334
|
+
audio_to_llm: import_zod5.z.boolean().optional().describe("Enable audio to LLM processing for this audio"),
|
|
18033
18335
|
audio_to_llm_config: import_zod5.z.object({
|
|
18034
18336
|
prompts: import_zod5.z.array(import_zod5.z.array(import_zod5.z.unknown())).min(1).describe("The list of prompts applied on the audio transcription"),
|
|
18035
18337
|
model: import_zod5.z.string().default(
|
|
@@ -18037,7 +18339,7 @@ var preRecordedControllerGetPreRecordedJobV2Response = import_zod5.z.object({
|
|
|
18037
18339
|
).describe(
|
|
18038
18340
|
"The model to use for the prompt execution. You can find the list of supported models [here](https://openrouter.ai/models)."
|
|
18039
18341
|
)
|
|
18040
|
-
}).optional().describe("
|
|
18342
|
+
}).optional().describe("Audio to LLM configuration, if `audio_to_llm` is enabled"),
|
|
18041
18343
|
pii_redaction: import_zod5.z.boolean().optional().describe("Enable PII redaction for this audio"),
|
|
18042
18344
|
pii_redaction_config: import_zod5.z.object({
|
|
18043
18345
|
entity_types: import_zod5.z.enum([
|
|
@@ -19150,7 +19452,7 @@ var transcriptionControllerInitPreRecordedJobV2BodyNamedEntityRecognitionDefault
|
|
|
19150
19452
|
var transcriptionControllerInitPreRecordedJobV2BodyCustomSpellingDefault = false;
|
|
19151
19453
|
var transcriptionControllerInitPreRecordedJobV2BodySentimentAnalysisDefault = false;
|
|
19152
19454
|
var transcriptionControllerInitPreRecordedJobV2BodyAudioToLlmDefault = false;
|
|
19153
|
-
var transcriptionControllerInitPreRecordedJobV2BodyAudioToLlmConfigModelDefault = "openai/gpt-
|
|
19455
|
+
var transcriptionControllerInitPreRecordedJobV2BodyAudioToLlmConfigModelDefault = "openai/gpt-5.4-nano";
|
|
19154
19456
|
var transcriptionControllerInitPreRecordedJobV2BodyPiiRedactionDefault = false;
|
|
19155
19457
|
var transcriptionControllerInitPreRecordedJobV2BodySentencesDefault = false;
|
|
19156
19458
|
var transcriptionControllerInitPreRecordedJobV2BodyPunctuationEnhancedDefault = false;
|
|
@@ -19443,23 +19745,23 @@ var transcriptionControllerInitPreRecordedJobV2Body = import_zod5.z.object({
|
|
|
19443
19745
|
"Forces the translation to use informal language forms when available in the target language."
|
|
19444
19746
|
)
|
|
19445
19747
|
}).optional().describe("**[Beta]** Translation configuration, if `translation` is enabled"),
|
|
19446
|
-
summarization: import_zod5.z.boolean().optional().describe("
|
|
19748
|
+
summarization: import_zod5.z.boolean().optional().describe("Enable summarization for this audio"),
|
|
19447
19749
|
summarization_config: import_zod5.z.object({
|
|
19448
19750
|
type: import_zod5.z.enum(["general", "bullet_points", "concise"]).describe("The type of summarization to apply").default(transcriptionControllerInitPreRecordedJobV2BodySummarizationConfigTypeDefault).describe("The type of summarization to apply")
|
|
19449
|
-
}).optional().describe("
|
|
19751
|
+
}).optional().describe("Summarization configuration, if `summarization` is enabled"),
|
|
19450
19752
|
named_entity_recognition: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable named entity recognition for this audio"),
|
|
19451
19753
|
custom_spelling: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable custom spelling for this audio"),
|
|
19452
19754
|
custom_spelling_config: import_zod5.z.object({
|
|
19453
19755
|
spelling_dictionary: import_zod5.z.record(import_zod5.z.string(), import_zod5.z.array(import_zod5.z.string())).describe("The list of spelling applied on the audio transcription")
|
|
19454
19756
|
}).optional().describe("**[Alpha]** Custom spelling configuration, if `custom_spelling` is enabled"),
|
|
19455
19757
|
sentiment_analysis: import_zod5.z.boolean().optional().describe("Enable sentiment analysis for this audio"),
|
|
19456
|
-
audio_to_llm: import_zod5.z.boolean().optional().describe("
|
|
19758
|
+
audio_to_llm: import_zod5.z.boolean().optional().describe("Enable audio to LLM processing for this audio"),
|
|
19457
19759
|
audio_to_llm_config: import_zod5.z.object({
|
|
19458
19760
|
prompts: import_zod5.z.array(import_zod5.z.array(import_zod5.z.unknown())).min(1).describe("The list of prompts applied on the audio transcription"),
|
|
19459
19761
|
model: import_zod5.z.string().default(transcriptionControllerInitPreRecordedJobV2BodyAudioToLlmConfigModelDefault).describe(
|
|
19460
19762
|
"The model to use for the prompt execution. You can find the list of supported models [here](https://openrouter.ai/models)."
|
|
19461
19763
|
)
|
|
19462
|
-
}).optional().describe("
|
|
19764
|
+
}).optional().describe("Audio to LLM configuration, if `audio_to_llm` is enabled"),
|
|
19463
19765
|
pii_redaction: import_zod5.z.boolean().optional().describe("Enable PII redaction for this audio"),
|
|
19464
19766
|
pii_redaction_config: import_zod5.z.object({
|
|
19465
19767
|
entity_types: import_zod5.z.enum([
|
|
@@ -19717,7 +20019,7 @@ var transcriptionControllerListV2ResponseItemsItemRequestParamsNamedEntityRecogn
|
|
|
19717
20019
|
var transcriptionControllerListV2ResponseItemsItemRequestParamsCustomSpellingDefault = false;
|
|
19718
20020
|
var transcriptionControllerListV2ResponseItemsItemRequestParamsSentimentAnalysisDefault = false;
|
|
19719
20021
|
var transcriptionControllerListV2ResponseItemsItemRequestParamsAudioToLlmDefault = false;
|
|
19720
|
-
var transcriptionControllerListV2ResponseItemsItemRequestParamsAudioToLlmConfigModelDefault = "openai/gpt-
|
|
20022
|
+
var transcriptionControllerListV2ResponseItemsItemRequestParamsAudioToLlmConfigModelDefault = "openai/gpt-5.4-nano";
|
|
19721
20023
|
var transcriptionControllerListV2ResponseItemsItemRequestParamsPiiRedactionDefault = false;
|
|
19722
20024
|
var transcriptionControllerListV2ResponseItemsItemRequestParamsSentencesDefault = false;
|
|
19723
20025
|
var transcriptionControllerListV2ResponseItemsItemRequestParamsPunctuationEnhancedDefault = false;
|
|
@@ -20128,12 +20430,12 @@ var transcriptionControllerListV2Response = import_zod5.z.object({
|
|
|
20128
20430
|
"Forces the translation to use informal language forms when available in the target language."
|
|
20129
20431
|
)
|
|
20130
20432
|
}).optional().describe("**[Beta]** Translation configuration, if `translation` is enabled"),
|
|
20131
|
-
summarization: import_zod5.z.boolean().optional().describe("
|
|
20433
|
+
summarization: import_zod5.z.boolean().optional().describe("Enable summarization for this audio"),
|
|
20132
20434
|
summarization_config: import_zod5.z.object({
|
|
20133
20435
|
type: import_zod5.z.enum(["general", "bullet_points", "concise"]).describe("The type of summarization to apply").default(
|
|
20134
20436
|
transcriptionControllerListV2ResponseItemsItemRequestParamsSummarizationConfigTypeDefault
|
|
20135
20437
|
).describe("The type of summarization to apply")
|
|
20136
|
-
}).optional().describe("
|
|
20438
|
+
}).optional().describe("Summarization configuration, if `summarization` is enabled"),
|
|
20137
20439
|
named_entity_recognition: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable named entity recognition for this audio"),
|
|
20138
20440
|
custom_spelling: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable custom spelling for this audio"),
|
|
20139
20441
|
custom_spelling_config: import_zod5.z.object({
|
|
@@ -20142,7 +20444,7 @@ var transcriptionControllerListV2Response = import_zod5.z.object({
|
|
|
20142
20444
|
"**[Alpha]** Custom spelling configuration, if `custom_spelling` is enabled"
|
|
20143
20445
|
),
|
|
20144
20446
|
sentiment_analysis: import_zod5.z.boolean().optional().describe("Enable sentiment analysis for this audio"),
|
|
20145
|
-
audio_to_llm: import_zod5.z.boolean().optional().describe("
|
|
20447
|
+
audio_to_llm: import_zod5.z.boolean().optional().describe("Enable audio to LLM processing for this audio"),
|
|
20146
20448
|
audio_to_llm_config: import_zod5.z.object({
|
|
20147
20449
|
prompts: import_zod5.z.array(import_zod5.z.array(import_zod5.z.unknown())).min(1).describe("The list of prompts applied on the audio transcription"),
|
|
20148
20450
|
model: import_zod5.z.string().default(
|
|
@@ -20150,7 +20452,7 @@ var transcriptionControllerListV2Response = import_zod5.z.object({
|
|
|
20150
20452
|
).describe(
|
|
20151
20453
|
"The model to use for the prompt execution. You can find the list of supported models [here](https://openrouter.ai/models)."
|
|
20152
20454
|
)
|
|
20153
|
-
}).optional().describe("
|
|
20455
|
+
}).optional().describe("Audio to LLM configuration, if `audio_to_llm` is enabled"),
|
|
20154
20456
|
pii_redaction: import_zod5.z.boolean().optional().describe("Enable PII redaction for this audio"),
|
|
20155
20457
|
pii_redaction_config: import_zod5.z.object({
|
|
20156
20458
|
entity_types: import_zod5.z.enum([
|
|
@@ -22468,7 +22770,7 @@ var transcriptionControllerGetTranscriptV2ResponseRequestParamsNamedEntityRecogn
|
|
|
22468
22770
|
var transcriptionControllerGetTranscriptV2ResponseRequestParamsCustomSpellingDefault = false;
|
|
22469
22771
|
var transcriptionControllerGetTranscriptV2ResponseRequestParamsSentimentAnalysisDefault = false;
|
|
22470
22772
|
var transcriptionControllerGetTranscriptV2ResponseRequestParamsAudioToLlmDefault = false;
|
|
22471
|
-
var transcriptionControllerGetTranscriptV2ResponseRequestParamsAudioToLlmConfigModelDefault = "openai/gpt-
|
|
22773
|
+
var transcriptionControllerGetTranscriptV2ResponseRequestParamsAudioToLlmConfigModelDefault = "openai/gpt-5.4-nano";
|
|
22472
22774
|
var transcriptionControllerGetTranscriptV2ResponseRequestParamsPiiRedactionDefault = false;
|
|
22473
22775
|
var transcriptionControllerGetTranscriptV2ResponseRequestParamsSentencesDefault = false;
|
|
22474
22776
|
var transcriptionControllerGetTranscriptV2ResponseRequestParamsPunctuationEnhancedDefault = false;
|
|
@@ -22873,19 +23175,19 @@ var transcriptionControllerGetTranscriptV2Response = import_zod5.z.discriminated
|
|
|
22873
23175
|
"Forces the translation to use informal language forms when available in the target language."
|
|
22874
23176
|
)
|
|
22875
23177
|
}).optional().describe("**[Beta]** Translation configuration, if `translation` is enabled"),
|
|
22876
|
-
summarization: import_zod5.z.boolean().optional().describe("
|
|
23178
|
+
summarization: import_zod5.z.boolean().optional().describe("Enable summarization for this audio"),
|
|
22877
23179
|
summarization_config: import_zod5.z.object({
|
|
22878
23180
|
type: import_zod5.z.enum(["general", "bullet_points", "concise"]).describe("The type of summarization to apply").default(
|
|
22879
23181
|
transcriptionControllerGetTranscriptV2ResponseRequestParamsSummarizationConfigTypeDefault
|
|
22880
23182
|
).describe("The type of summarization to apply")
|
|
22881
|
-
}).optional().describe("
|
|
23183
|
+
}).optional().describe("Summarization configuration, if `summarization` is enabled"),
|
|
22882
23184
|
named_entity_recognition: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable named entity recognition for this audio"),
|
|
22883
23185
|
custom_spelling: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable custom spelling for this audio"),
|
|
22884
23186
|
custom_spelling_config: import_zod5.z.object({
|
|
22885
23187
|
spelling_dictionary: import_zod5.z.record(import_zod5.z.string(), import_zod5.z.array(import_zod5.z.string())).describe("The list of spelling applied on the audio transcription")
|
|
22886
23188
|
}).optional().describe("**[Alpha]** Custom spelling configuration, if `custom_spelling` is enabled"),
|
|
22887
23189
|
sentiment_analysis: import_zod5.z.boolean().optional().describe("Enable sentiment analysis for this audio"),
|
|
22888
|
-
audio_to_llm: import_zod5.z.boolean().optional().describe("
|
|
23190
|
+
audio_to_llm: import_zod5.z.boolean().optional().describe("Enable audio to LLM processing for this audio"),
|
|
22889
23191
|
audio_to_llm_config: import_zod5.z.object({
|
|
22890
23192
|
prompts: import_zod5.z.array(import_zod5.z.array(import_zod5.z.unknown())).min(1).describe("The list of prompts applied on the audio transcription"),
|
|
22891
23193
|
model: import_zod5.z.string().default(
|
|
@@ -22893,7 +23195,7 @@ var transcriptionControllerGetTranscriptV2Response = import_zod5.z.discriminated
|
|
|
22893
23195
|
).describe(
|
|
22894
23196
|
"The model to use for the prompt execution. You can find the list of supported models [here](https://openrouter.ai/models)."
|
|
22895
23197
|
)
|
|
22896
|
-
}).optional().describe("
|
|
23198
|
+
}).optional().describe("Audio to LLM configuration, if `audio_to_llm` is enabled"),
|
|
22897
23199
|
pii_redaction: import_zod5.z.boolean().optional().describe("Enable PII redaction for this audio"),
|
|
22898
23200
|
pii_redaction_config: import_zod5.z.object({
|
|
22899
23201
|
entity_types: import_zod5.z.enum([
|
|
@@ -25605,7 +25907,7 @@ var historyControllerGetListV1ResponseItemsItemRequestParamsNamedEntityRecogniti
|
|
|
25605
25907
|
var historyControllerGetListV1ResponseItemsItemRequestParamsCustomSpellingDefault = false;
|
|
25606
25908
|
var historyControllerGetListV1ResponseItemsItemRequestParamsSentimentAnalysisDefault = false;
|
|
25607
25909
|
var historyControllerGetListV1ResponseItemsItemRequestParamsAudioToLlmDefault = false;
|
|
25608
|
-
var historyControllerGetListV1ResponseItemsItemRequestParamsAudioToLlmConfigModelDefault = "openai/gpt-
|
|
25910
|
+
var historyControllerGetListV1ResponseItemsItemRequestParamsAudioToLlmConfigModelDefault = "openai/gpt-5.4-nano";
|
|
25609
25911
|
var historyControllerGetListV1ResponseItemsItemRequestParamsPiiRedactionDefault = false;
|
|
25610
25912
|
var historyControllerGetListV1ResponseItemsItemRequestParamsSentencesDefault = false;
|
|
25611
25913
|
var historyControllerGetListV1ResponseItemsItemRequestParamsPunctuationEnhancedDefault = false;
|
|
@@ -26016,12 +26318,12 @@ var historyControllerGetListV1Response = import_zod5.z.object({
|
|
|
26016
26318
|
"Forces the translation to use informal language forms when available in the target language."
|
|
26017
26319
|
)
|
|
26018
26320
|
}).optional().describe("**[Beta]** Translation configuration, if `translation` is enabled"),
|
|
26019
|
-
summarization: import_zod5.z.boolean().optional().describe("
|
|
26321
|
+
summarization: import_zod5.z.boolean().optional().describe("Enable summarization for this audio"),
|
|
26020
26322
|
summarization_config: import_zod5.z.object({
|
|
26021
26323
|
type: import_zod5.z.enum(["general", "bullet_points", "concise"]).describe("The type of summarization to apply").default(
|
|
26022
26324
|
historyControllerGetListV1ResponseItemsItemRequestParamsSummarizationConfigTypeDefault
|
|
26023
26325
|
).describe("The type of summarization to apply")
|
|
26024
|
-
}).optional().describe("
|
|
26326
|
+
}).optional().describe("Summarization configuration, if `summarization` is enabled"),
|
|
26025
26327
|
named_entity_recognition: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable named entity recognition for this audio"),
|
|
26026
26328
|
custom_spelling: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable custom spelling for this audio"),
|
|
26027
26329
|
custom_spelling_config: import_zod5.z.object({
|
|
@@ -26030,7 +26332,7 @@ var historyControllerGetListV1Response = import_zod5.z.object({
|
|
|
26030
26332
|
"**[Alpha]** Custom spelling configuration, if `custom_spelling` is enabled"
|
|
26031
26333
|
),
|
|
26032
26334
|
sentiment_analysis: import_zod5.z.boolean().optional().describe("Enable sentiment analysis for this audio"),
|
|
26033
|
-
audio_to_llm: import_zod5.z.boolean().optional().describe("
|
|
26335
|
+
audio_to_llm: import_zod5.z.boolean().optional().describe("Enable audio to LLM processing for this audio"),
|
|
26034
26336
|
audio_to_llm_config: import_zod5.z.object({
|
|
26035
26337
|
prompts: import_zod5.z.array(import_zod5.z.array(import_zod5.z.unknown())).min(1).describe("The list of prompts applied on the audio transcription"),
|
|
26036
26338
|
model: import_zod5.z.string().default(
|
|
@@ -26038,7 +26340,7 @@ var historyControllerGetListV1Response = import_zod5.z.object({
|
|
|
26038
26340
|
).describe(
|
|
26039
26341
|
"The model to use for the prompt execution. You can find the list of supported models [here](https://openrouter.ai/models)."
|
|
26040
26342
|
)
|
|
26041
|
-
}).optional().describe("
|
|
26343
|
+
}).optional().describe("Audio to LLM configuration, if `audio_to_llm` is enabled"),
|
|
26042
26344
|
pii_redaction: import_zod5.z.boolean().optional().describe("Enable PII redaction for this audio"),
|
|
26043
26345
|
pii_redaction_config: import_zod5.z.object({
|
|
26044
26346
|
entity_types: import_zod5.z.enum([
|
|
@@ -31283,6 +31585,7 @@ __export(openAIAudioRealtimeAPI_zod_exports, {
|
|
|
31283
31585
|
createRealtimeClientSecretBodySessionPromptVariablesTypeDefault: () => createRealtimeClientSecretBodySessionPromptVariablesTypeDefault,
|
|
31284
31586
|
createRealtimeClientSecretBodySessionPromptVariablesTypeDefaultOne: () => createRealtimeClientSecretBodySessionPromptVariablesTypeDefaultOne,
|
|
31285
31587
|
createRealtimeClientSecretBodySessionPromptVariablesTypeDefaultTwo: () => createRealtimeClientSecretBodySessionPromptVariablesTypeDefaultTwo,
|
|
31588
|
+
createRealtimeClientSecretBodySessionReasoningEffortDefault: () => createRealtimeClientSecretBodySessionReasoningEffortDefault,
|
|
31286
31589
|
createRealtimeClientSecretBodySessionToolChoiceDefault: () => createRealtimeClientSecretBodySessionToolChoiceDefault,
|
|
31287
31590
|
createRealtimeClientSecretBodySessionToolsItemRequireApprovalDefaultOne: () => createRealtimeClientSecretBodySessionToolsItemRequireApprovalDefaultOne,
|
|
31288
31591
|
createRealtimeClientSecretBodySessionTracingDefault: () => createRealtimeClientSecretBodySessionTracingDefault,
|
|
@@ -31307,6 +31610,7 @@ __export(openAIAudioRealtimeAPI_zod_exports, {
|
|
|
31307
31610
|
createRealtimeClientSecretResponseSessionPromptVariablesTypeDefault: () => createRealtimeClientSecretResponseSessionPromptVariablesTypeDefault,
|
|
31308
31611
|
createRealtimeClientSecretResponseSessionPromptVariablesTypeDefaultOne: () => createRealtimeClientSecretResponseSessionPromptVariablesTypeDefaultOne,
|
|
31309
31612
|
createRealtimeClientSecretResponseSessionPromptVariablesTypeDefaultTwo: () => createRealtimeClientSecretResponseSessionPromptVariablesTypeDefaultTwo,
|
|
31613
|
+
createRealtimeClientSecretResponseSessionReasoningEffortDefault: () => createRealtimeClientSecretResponseSessionReasoningEffortDefault,
|
|
31310
31614
|
createRealtimeClientSecretResponseSessionToolChoiceDefault: () => createRealtimeClientSecretResponseSessionToolChoiceDefault,
|
|
31311
31615
|
createRealtimeClientSecretResponseSessionToolsItemRequireApprovalDefaultOne: () => createRealtimeClientSecretResponseSessionToolsItemRequireApprovalDefaultOne,
|
|
31312
31616
|
createRealtimeClientSecretResponseSessionTracingDefaultOne: () => createRealtimeClientSecretResponseSessionTracingDefaultOne,
|
|
@@ -31663,6 +31967,7 @@ var createRealtimeClientSecretBodySessionTracingDefaultOne = "auto";
|
|
|
31663
31967
|
var createRealtimeClientSecretBodySessionTracingDefault = null;
|
|
31664
31968
|
var createRealtimeClientSecretBodySessionToolsItemRequireApprovalDefaultOne = "always";
|
|
31665
31969
|
var createRealtimeClientSecretBodySessionToolChoiceDefault = "auto";
|
|
31970
|
+
var createRealtimeClientSecretBodySessionReasoningEffortDefault = "low";
|
|
31666
31971
|
var createRealtimeClientSecretBodySessionTruncationRetentionRatioMin = 0;
|
|
31667
31972
|
var createRealtimeClientSecretBodySessionTruncationRetentionRatioMax = 1;
|
|
31668
31973
|
var createRealtimeClientSecretBodySessionTruncationTokenLimitsPostInstructionsMin = 0;
|
|
@@ -31698,6 +32003,7 @@ var createRealtimeClientSecretBody = import_zod6.z.object({
|
|
|
31698
32003
|
import_zod6.z.enum([
|
|
31699
32004
|
"gpt-realtime",
|
|
31700
32005
|
"gpt-realtime-1.5",
|
|
32006
|
+
"gpt-realtime-2",
|
|
31701
32007
|
"gpt-realtime-2025-08-28",
|
|
31702
32008
|
"gpt-4o-realtime-preview",
|
|
31703
32009
|
"gpt-4o-realtime-preview-2024-10-01",
|
|
@@ -31738,16 +32044,20 @@ var createRealtimeClientSecretBody = import_zod6.z.object({
|
|
|
31738
32044
|
"gpt-4o-mini-transcribe",
|
|
31739
32045
|
"gpt-4o-mini-transcribe-2025-12-15",
|
|
31740
32046
|
"gpt-4o-transcribe",
|
|
31741
|
-
"gpt-4o-transcribe-diarize"
|
|
32047
|
+
"gpt-4o-transcribe-diarize",
|
|
32048
|
+
"gpt-realtime-whisper"
|
|
31742
32049
|
])
|
|
31743
32050
|
).optional().describe(
|
|
31744
|
-
"The model to use for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`,
|
|
32051
|
+
"The model to use for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`, `gpt-4o-transcribe-diarize`, and `gpt-realtime-whisper`. Use `gpt-4o-transcribe-diarize` when you need diarization with speaker labels.\n"
|
|
31745
32052
|
),
|
|
31746
32053
|
language: import_zod6.z.string().optional().describe(
|
|
31747
32054
|
"The language of the input audio. Supplying the input language in\n[ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`) format\nwill improve accuracy and latency.\n"
|
|
31748
32055
|
),
|
|
31749
32056
|
prompt: import_zod6.z.string().optional().describe(
|
|
31750
|
-
'An optional text to guide the model\'s style or continue a previous audio\nsegment.\nFor `whisper-1`, the [prompt is a list of keywords](/docs/guides/speech-to-text#prompting).\nFor `gpt-4o-transcribe` models (excluding `gpt-4o-transcribe-diarize`), the prompt is a free text string, for example "expect words related to technology".\n'
|
|
32057
|
+
'An optional text to guide the model\'s style or continue a previous audio\nsegment.\nFor `whisper-1`, the [prompt is a list of keywords](/docs/guides/speech-to-text#prompting).\nFor `gpt-4o-transcribe` models (excluding `gpt-4o-transcribe-diarize`), the prompt is a free text string, for example "expect words related to technology".\nPrompt is not supported with `gpt-realtime-whisper` in GA Realtime sessions.\n'
|
|
32058
|
+
),
|
|
32059
|
+
delay: import_zod6.z.enum(["minimal", "low", "medium", "high", "xhigh"]).optional().describe(
|
|
32060
|
+
"Controls how long the model waits before emitting transcription text.\nHigher values can improve transcription accuracy at the cost of latency.\nOnly supported with `gpt-realtime-whisper` in GA Realtime sessions.\n"
|
|
31751
32061
|
)
|
|
31752
32062
|
}).optional(),
|
|
31753
32063
|
noise_reduction: import_zod6.z.object({
|
|
@@ -31814,7 +32124,7 @@ var createRealtimeClientSecretBody = import_zod6.z.object({
|
|
|
31814
32124
|
"Server-side semantic turn detection which uses a model to determine when the user has finished speaking."
|
|
31815
32125
|
)
|
|
31816
32126
|
]).describe(
|
|
31817
|
-
'Configuration for turn detection, ether Server VAD or Semantic VAD. This can be set to `null` to turn off, in which case the client must manually trigger model response.\n\nServer VAD means that the model will detect the start and end of speech based on audio volume and respond at the end of user speech.\n\nSemantic VAD is more advanced and uses a turn detection model (in conjunction with VAD) to semantically estimate whether the user has finished speaking, then dynamically sets a timeout based on this probability. For example, if user audio trails off with "uhhm", the model will score a low probability of turn end and wait longer for the user to continue speaking. This can be useful for more natural conversations, but may have a higher latency.\n'
|
|
32127
|
+
'Configuration for turn detection, ether Server VAD or Semantic VAD. This can be set to `null` to turn off, in which case the client must manually trigger model response.\n\nServer VAD means that the model will detect the start and end of speech based on audio volume and respond at the end of user speech.\n\nSemantic VAD is more advanced and uses a turn detection model (in conjunction with VAD) to semantically estimate whether the user has finished speaking, then dynamically sets a timeout based on this probability. For example, if user audio trails off with "uhhm", the model will score a low probability of turn end and wait longer for the user to continue speaking. This can be useful for more natural conversations, but may have a higher latency.\n\nFor `gpt-realtime-whisper` transcription sessions, turn detection must be\nset to `null`; VAD is not supported.\n'
|
|
31818
32128
|
).or(import_zod6.z.null()).optional()
|
|
31819
32129
|
}).optional(),
|
|
31820
32130
|
output: import_zod6.z.object({
|
|
@@ -31887,7 +32197,7 @@ var createRealtimeClientSecretBody = import_zod6.z.object({
|
|
|
31887
32197
|
server_label: import_zod6.z.string().describe(
|
|
31888
32198
|
"A label for this MCP server, used to identify it in tool calls.\n"
|
|
31889
32199
|
),
|
|
31890
|
-
server_url: import_zod6.z.string().optional().describe(
|
|
32200
|
+
server_url: import_zod6.z.string().url().optional().describe(
|
|
31891
32201
|
"The URL for the MCP server. One of `server_url` or `connector_id` must be\nprovided.\n"
|
|
31892
32202
|
),
|
|
31893
32203
|
connector_id: import_zod6.z.enum([
|
|
@@ -31965,6 +32275,16 @@ var createRealtimeClientSecretBody = import_zod6.z.object({
|
|
|
31965
32275
|
).default(createRealtimeClientSecretBodySessionToolChoiceDefault).describe(
|
|
31966
32276
|
"How the model chooses tools. Provide one of the string modes or force a specific\nfunction/MCP tool.\n"
|
|
31967
32277
|
),
|
|
32278
|
+
parallel_tool_calls: import_zod6.z.boolean().optional().describe(
|
|
32279
|
+
"Whether the model may call multiple tools in parallel. Only supported by\nreasoning Realtime models such as `gpt-realtime-2`.\n"
|
|
32280
|
+
),
|
|
32281
|
+
reasoning: import_zod6.z.object({
|
|
32282
|
+
effort: import_zod6.z.enum(["minimal", "low", "medium", "high", "xhigh"]).default(createRealtimeClientSecretBodySessionReasoningEffortDefault).describe(
|
|
32283
|
+
"Constrains effort on reasoning for reasoning-capable Realtime models such as\n`gpt-realtime-2`.\n"
|
|
32284
|
+
)
|
|
32285
|
+
}).optional().describe(
|
|
32286
|
+
"Configuration for reasoning-capable Realtime models such as `gpt-realtime-2`.\n"
|
|
32287
|
+
),
|
|
31968
32288
|
max_output_tokens: import_zod6.z.number().or(import_zod6.z.enum(["inf"])).optional().describe(
|
|
31969
32289
|
"Maximum number of output tokens for a single assistant response,\ninclusive of tool calls. Provide an integer between 1 and 4096 to\nlimit output tokens, or `inf` for the maximum available tokens for a\ngiven model. Defaults to `inf`.\n"
|
|
31970
32290
|
),
|
|
@@ -32004,7 +32324,7 @@ var createRealtimeClientSecretBody = import_zod6.z.object({
|
|
|
32004
32324
|
).or(
|
|
32005
32325
|
import_zod6.z.object({
|
|
32006
32326
|
type: import_zod6.z.enum(["input_image"]).describe("The type of the input item. Always `input_image`."),
|
|
32007
|
-
image_url: import_zod6.z.string().describe(
|
|
32327
|
+
image_url: import_zod6.z.string().url().describe(
|
|
32008
32328
|
"The URL of the image to be sent to the model. A fully qualified URL or base64 encoded image in a data URL."
|
|
32009
32329
|
).or(import_zod6.z.null()).optional(),
|
|
32010
32330
|
file_id: import_zod6.z.string().describe("The ID of the file to be sent to the model.").or(import_zod6.z.null()).optional(),
|
|
@@ -32018,7 +32338,7 @@ var createRealtimeClientSecretBody = import_zod6.z.object({
|
|
|
32018
32338
|
file_id: import_zod6.z.string().describe("The ID of the file to be sent to the model.").or(import_zod6.z.null()).optional(),
|
|
32019
32339
|
filename: import_zod6.z.string().optional().describe("The name of the file to be sent to the model."),
|
|
32020
32340
|
file_data: import_zod6.z.string().optional().describe("The content of the file to be sent to the model.\n"),
|
|
32021
|
-
file_url: import_zod6.z.string().optional().describe("The URL of the file to be sent to the model."),
|
|
32341
|
+
file_url: import_zod6.z.string().url().optional().describe("The URL of the file to be sent to the model."),
|
|
32022
32342
|
detail: import_zod6.z.enum(["low", "high"]).optional()
|
|
32023
32343
|
}).describe("A file input to the model.")
|
|
32024
32344
|
)
|
|
@@ -32054,16 +32374,20 @@ var createRealtimeClientSecretBody = import_zod6.z.object({
|
|
|
32054
32374
|
"gpt-4o-mini-transcribe",
|
|
32055
32375
|
"gpt-4o-mini-transcribe-2025-12-15",
|
|
32056
32376
|
"gpt-4o-transcribe",
|
|
32057
|
-
"gpt-4o-transcribe-diarize"
|
|
32377
|
+
"gpt-4o-transcribe-diarize",
|
|
32378
|
+
"gpt-realtime-whisper"
|
|
32058
32379
|
])
|
|
32059
32380
|
).optional().describe(
|
|
32060
|
-
"The model to use for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`,
|
|
32381
|
+
"The model to use for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`, `gpt-4o-transcribe-diarize`, and `gpt-realtime-whisper`. Use `gpt-4o-transcribe-diarize` when you need diarization with speaker labels.\n"
|
|
32061
32382
|
),
|
|
32062
32383
|
language: import_zod6.z.string().optional().describe(
|
|
32063
32384
|
"The language of the input audio. Supplying the input language in\n[ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`) format\nwill improve accuracy and latency.\n"
|
|
32064
32385
|
),
|
|
32065
32386
|
prompt: import_zod6.z.string().optional().describe(
|
|
32066
|
-
'An optional text to guide the model\'s style or continue a previous audio\nsegment.\nFor `whisper-1`, the [prompt is a list of keywords](/docs/guides/speech-to-text#prompting).\nFor `gpt-4o-transcribe` models (excluding `gpt-4o-transcribe-diarize`), the prompt is a free text string, for example "expect words related to technology".\n'
|
|
32387
|
+
'An optional text to guide the model\'s style or continue a previous audio\nsegment.\nFor `whisper-1`, the [prompt is a list of keywords](/docs/guides/speech-to-text#prompting).\nFor `gpt-4o-transcribe` models (excluding `gpt-4o-transcribe-diarize`), the prompt is a free text string, for example "expect words related to technology".\nPrompt is not supported with `gpt-realtime-whisper` in GA Realtime sessions.\n'
|
|
32388
|
+
),
|
|
32389
|
+
delay: import_zod6.z.enum(["minimal", "low", "medium", "high", "xhigh"]).optional().describe(
|
|
32390
|
+
"Controls how long the model waits before emitting transcription text.\nHigher values can improve transcription accuracy at the cost of latency.\nOnly supported with `gpt-realtime-whisper` in GA Realtime sessions.\n"
|
|
32067
32391
|
)
|
|
32068
32392
|
}).optional(),
|
|
32069
32393
|
noise_reduction: import_zod6.z.object({
|
|
@@ -32130,7 +32454,7 @@ var createRealtimeClientSecretBody = import_zod6.z.object({
|
|
|
32130
32454
|
"Server-side semantic turn detection which uses a model to determine when the user has finished speaking."
|
|
32131
32455
|
)
|
|
32132
32456
|
]).describe(
|
|
32133
|
-
'Configuration for turn detection, ether Server VAD or Semantic VAD. This can be set to `null` to turn off, in which case the client must manually trigger model response.\n\nServer VAD means that the model will detect the start and end of speech based on audio volume and respond at the end of user speech.\n\nSemantic VAD is more advanced and uses a turn detection model (in conjunction with VAD) to semantically estimate whether the user has finished speaking, then dynamically sets a timeout based on this probability. For example, if user audio trails off with "uhhm", the model will score a low probability of turn end and wait longer for the user to continue speaking. This can be useful for more natural conversations, but may have a higher latency.\n'
|
|
32457
|
+
'Configuration for turn detection, ether Server VAD or Semantic VAD. This can be set to `null` to turn off, in which case the client must manually trigger model response.\n\nServer VAD means that the model will detect the start and end of speech based on audio volume and respond at the end of user speech.\n\nSemantic VAD is more advanced and uses a turn detection model (in conjunction with VAD) to semantically estimate whether the user has finished speaking, then dynamically sets a timeout based on this probability. For example, if user audio trails off with "uhhm", the model will score a low probability of turn end and wait longer for the user to continue speaking. This can be useful for more natural conversations, but may have a higher latency.\n\nFor `gpt-realtime-whisper` transcription sessions, turn detection must be\nset to `null`; VAD is not supported.\n'
|
|
32134
32458
|
).or(import_zod6.z.null()).optional()
|
|
32135
32459
|
}).optional()
|
|
32136
32460
|
}).optional().describe("Configuration for input and output audio.\n"),
|
|
@@ -32161,6 +32485,7 @@ var createRealtimeClientSecretResponseSessionTracingDefaultTwo = "auto";
|
|
|
32161
32485
|
var createRealtimeClientSecretResponseSessionTracingDefaultOne = null;
|
|
32162
32486
|
var createRealtimeClientSecretResponseSessionToolsItemRequireApprovalDefaultOne = "always";
|
|
32163
32487
|
var createRealtimeClientSecretResponseSessionToolChoiceDefault = "auto";
|
|
32488
|
+
var createRealtimeClientSecretResponseSessionReasoningEffortDefault = "low";
|
|
32164
32489
|
var createRealtimeClientSecretResponseSessionTruncationRetentionRatioMin = 0;
|
|
32165
32490
|
var createRealtimeClientSecretResponseSessionTruncationRetentionRatioMax = 1;
|
|
32166
32491
|
var createRealtimeClientSecretResponseSessionTruncationTokenLimitsPostInstructionsMin = 0;
|
|
@@ -32170,17 +32495,14 @@ var createRealtimeClientSecretResponseSessionPromptVariablesTypeDefaultTwo = "in
|
|
|
32170
32495
|
var createRealtimeClientSecretResponse = import_zod6.z.object({
|
|
32171
32496
|
value: import_zod6.z.string().describe("The generated client secret value."),
|
|
32172
32497
|
expires_at: import_zod6.z.number().describe("Expiration timestamp for the client secret, in seconds since epoch."),
|
|
32173
|
-
session: import_zod6.z.
|
|
32498
|
+
session: import_zod6.z.union([
|
|
32174
32499
|
import_zod6.z.object({
|
|
32175
|
-
client_secret: import_zod6.z.object({
|
|
32176
|
-
value: import_zod6.z.string().describe(
|
|
32177
|
-
"Ephemeral key usable in client environments to authenticate connections to the Realtime API. Use this in client-side environments rather than a standard API token, which should only be used server-side.\n"
|
|
32178
|
-
),
|
|
32179
|
-
expires_at: import_zod6.z.number().describe(
|
|
32180
|
-
"Timestamp for when the token expires. Currently, all tokens expire\nafter one minute.\n"
|
|
32181
|
-
)
|
|
32182
|
-
}).describe("Ephemeral key returned by the API."),
|
|
32183
32500
|
type: import_zod6.z.enum(["realtime"]).describe("The type of session to create. Always `realtime` for the Realtime API.\n"),
|
|
32501
|
+
id: import_zod6.z.string().describe(
|
|
32502
|
+
"Unique identifier for the session that looks like `sess_1234567890abcdef`.\n"
|
|
32503
|
+
),
|
|
32504
|
+
object: import_zod6.z.enum(["realtime.session"]).describe("The object type. Always `realtime.session`."),
|
|
32505
|
+
expires_at: import_zod6.z.number().optional().describe("Expiration timestamp for the session, in seconds since epoch."),
|
|
32184
32506
|
output_modalities: import_zod6.z.array(import_zod6.z.enum(["text", "audio"])).default(createRealtimeClientSecretResponseSessionOutputModalitiesDefault).describe(
|
|
32185
32507
|
'The set of modalities the model can respond with. It defaults to `["audio"]`, indicating\nthat the model will respond with audio plus a transcript. `["text"]` can be used to make\nthe model respond with text only. It is not possible to request both `text` and `audio` at the same time.\n'
|
|
32186
32508
|
),
|
|
@@ -32188,6 +32510,7 @@ var createRealtimeClientSecretResponse = import_zod6.z.object({
|
|
|
32188
32510
|
import_zod6.z.enum([
|
|
32189
32511
|
"gpt-realtime",
|
|
32190
32512
|
"gpt-realtime-1.5",
|
|
32513
|
+
"gpt-realtime-2",
|
|
32191
32514
|
"gpt-realtime-2025-08-28",
|
|
32192
32515
|
"gpt-4o-realtime-preview",
|
|
32193
32516
|
"gpt-4o-realtime-preview-2024-10-01",
|
|
@@ -32210,15 +32533,15 @@ var createRealtimeClientSecretResponse = import_zod6.z.object({
|
|
|
32210
32533
|
audio: import_zod6.z.object({
|
|
32211
32534
|
input: import_zod6.z.object({
|
|
32212
32535
|
format: import_zod6.z.object({
|
|
32213
|
-
type: import_zod6.z.enum(["audio/pcm"]).describe("The audio format. Always `audio/pcm`."),
|
|
32214
|
-
rate: import_zod6.z.literal(24e3).describe("The sample rate of the audio. Always `24000`.")
|
|
32536
|
+
type: import_zod6.z.enum(["audio/pcm"]).optional().describe("The audio format. Always `audio/pcm`."),
|
|
32537
|
+
rate: import_zod6.z.literal(24e3).optional().describe("The sample rate of the audio. Always `24000`.")
|
|
32215
32538
|
}).describe("The PCM audio format. Only a 24kHz sample rate is supported.").or(
|
|
32216
32539
|
import_zod6.z.object({
|
|
32217
|
-
type: import_zod6.z.enum(["audio/pcmu"]).describe("The audio format. Always `audio/pcmu`.")
|
|
32540
|
+
type: import_zod6.z.enum(["audio/pcmu"]).optional().describe("The audio format. Always `audio/pcmu`.")
|
|
32218
32541
|
}).describe("The G.711 \u03BC-law format.")
|
|
32219
32542
|
).or(
|
|
32220
32543
|
import_zod6.z.object({
|
|
32221
|
-
type: import_zod6.z.enum(["audio/pcma"]).describe("The audio format. Always `audio/pcma`.")
|
|
32544
|
+
type: import_zod6.z.enum(["audio/pcma"]).optional().describe("The audio format. Always `audio/pcma`.")
|
|
32222
32545
|
}).describe("The G.711 A-law format.")
|
|
32223
32546
|
).optional(),
|
|
32224
32547
|
transcription: import_zod6.z.object({
|
|
@@ -32228,20 +32551,19 @@ var createRealtimeClientSecretResponse = import_zod6.z.object({
|
|
|
32228
32551
|
"gpt-4o-mini-transcribe",
|
|
32229
32552
|
"gpt-4o-mini-transcribe-2025-12-15",
|
|
32230
32553
|
"gpt-4o-transcribe",
|
|
32231
|
-
"gpt-4o-transcribe-diarize"
|
|
32554
|
+
"gpt-4o-transcribe-diarize",
|
|
32555
|
+
"gpt-realtime-whisper"
|
|
32232
32556
|
])
|
|
32233
32557
|
).optional().describe(
|
|
32234
|
-
"The model
|
|
32235
|
-
),
|
|
32236
|
-
language: import_zod6.z.string().optional().describe(
|
|
32237
|
-
"The language of the input audio. Supplying the input language in\n[ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`) format\nwill improve accuracy and latency.\n"
|
|
32558
|
+
"The model used for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`, `gpt-4o-transcribe-diarize`, and `gpt-realtime-whisper`.\n"
|
|
32238
32559
|
),
|
|
32560
|
+
language: import_zod6.z.string().optional().describe("The language of the input audio.\n"),
|
|
32239
32561
|
prompt: import_zod6.z.string().optional().describe(
|
|
32240
|
-
|
|
32562
|
+
"The prompt configured for input audio transcription, when present.\n"
|
|
32241
32563
|
)
|
|
32242
32564
|
}).optional(),
|
|
32243
32565
|
noise_reduction: import_zod6.z.object({
|
|
32244
|
-
type: import_zod6.z.enum(["near_field", "far_field"]).describe(
|
|
32566
|
+
type: import_zod6.z.enum(["near_field", "far_field"]).optional().describe(
|
|
32245
32567
|
"Type of noise reduction. `near_field` is for close-talking microphones such as headphones, `far_field` is for far-field microphones such as laptop or conference room microphones.\n"
|
|
32246
32568
|
)
|
|
32247
32569
|
}).optional().describe(
|
|
@@ -32304,20 +32626,20 @@ var createRealtimeClientSecretResponse = import_zod6.z.object({
|
|
|
32304
32626
|
"Server-side semantic turn detection which uses a model to determine when the user has finished speaking."
|
|
32305
32627
|
)
|
|
32306
32628
|
]).describe(
|
|
32307
|
-
'Configuration for turn detection, ether Server VAD or Semantic VAD. This can be set to `null` to turn off, in which case the client must manually trigger model response.\n\nServer VAD means that the model will detect the start and end of speech based on audio volume and respond at the end of user speech.\n\nSemantic VAD is more advanced and uses a turn detection model (in conjunction with VAD) to semantically estimate whether the user has finished speaking, then dynamically sets a timeout based on this probability. For example, if user audio trails off with "uhhm", the model will score a low probability of turn end and wait longer for the user to continue speaking. This can be useful for more natural conversations, but may have a higher latency.\n'
|
|
32629
|
+
'Configuration for turn detection, ether Server VAD or Semantic VAD. This can be set to `null` to turn off, in which case the client must manually trigger model response.\n\nServer VAD means that the model will detect the start and end of speech based on audio volume and respond at the end of user speech.\n\nSemantic VAD is more advanced and uses a turn detection model (in conjunction with VAD) to semantically estimate whether the user has finished speaking, then dynamically sets a timeout based on this probability. For example, if user audio trails off with "uhhm", the model will score a low probability of turn end and wait longer for the user to continue speaking. This can be useful for more natural conversations, but may have a higher latency.\n\nFor `gpt-realtime-whisper` transcription sessions, turn detection must be\nset to `null`; VAD is not supported.\n'
|
|
32308
32630
|
).or(import_zod6.z.null()).optional()
|
|
32309
32631
|
}).optional(),
|
|
32310
32632
|
output: import_zod6.z.object({
|
|
32311
32633
|
format: import_zod6.z.object({
|
|
32312
|
-
type: import_zod6.z.enum(["audio/pcm"]).describe("The audio format. Always `audio/pcm`."),
|
|
32313
|
-
rate: import_zod6.z.literal(24e3).describe("The sample rate of the audio. Always `24000`.")
|
|
32634
|
+
type: import_zod6.z.enum(["audio/pcm"]).optional().describe("The audio format. Always `audio/pcm`."),
|
|
32635
|
+
rate: import_zod6.z.literal(24e3).optional().describe("The sample rate of the audio. Always `24000`.")
|
|
32314
32636
|
}).describe("The PCM audio format. Only a 24kHz sample rate is supported.").or(
|
|
32315
32637
|
import_zod6.z.object({
|
|
32316
|
-
type: import_zod6.z.enum(["audio/pcmu"]).describe("The audio format. Always `audio/pcmu`.")
|
|
32638
|
+
type: import_zod6.z.enum(["audio/pcmu"]).optional().describe("The audio format. Always `audio/pcmu`.")
|
|
32317
32639
|
}).describe("The G.711 \u03BC-law format.")
|
|
32318
32640
|
).or(
|
|
32319
32641
|
import_zod6.z.object({
|
|
32320
|
-
type: import_zod6.z.enum(["audio/pcma"]).describe("The audio format. Always `audio/pcma`.")
|
|
32642
|
+
type: import_zod6.z.enum(["audio/pcma"]).optional().describe("The audio format. Always `audio/pcma`.")
|
|
32321
32643
|
}).describe("The G.711 A-law format.")
|
|
32322
32644
|
).optional(),
|
|
32323
32645
|
voice: import_zod6.z.string().or(
|
|
@@ -32361,7 +32683,7 @@ var createRealtimeClientSecretResponse = import_zod6.z.object({
|
|
|
32361
32683
|
).or(import_zod6.z.null()).optional(),
|
|
32362
32684
|
tools: import_zod6.z.array(
|
|
32363
32685
|
import_zod6.z.object({
|
|
32364
|
-
type: import_zod6.z.enum(["function"]).describe("The type of the tool, i.e. `function`."),
|
|
32686
|
+
type: import_zod6.z.enum(["function"]).optional().describe("The type of the tool, i.e. `function`."),
|
|
32365
32687
|
name: import_zod6.z.string().optional().describe("The name of the function."),
|
|
32366
32688
|
description: import_zod6.z.string().optional().describe(
|
|
32367
32689
|
"The description of the function, including guidance on when and how\nto call it, and guidance about what to tell the user when calling\n(if anything).\n"
|
|
@@ -32373,7 +32695,7 @@ var createRealtimeClientSecretResponse = import_zod6.z.object({
|
|
|
32373
32695
|
server_label: import_zod6.z.string().describe(
|
|
32374
32696
|
"A label for this MCP server, used to identify it in tool calls.\n"
|
|
32375
32697
|
),
|
|
32376
|
-
server_url: import_zod6.z.string().optional().describe(
|
|
32698
|
+
server_url: import_zod6.z.string().url().optional().describe(
|
|
32377
32699
|
"The URL for the MCP server. One of `server_url` or `connector_id` must be\nprovided.\n"
|
|
32378
32700
|
),
|
|
32379
32701
|
connector_id: import_zod6.z.enum([
|
|
@@ -32385,7 +32707,7 @@ var createRealtimeClientSecretResponse = import_zod6.z.object({
|
|
|
32385
32707
|
"connector_outlookcalendar",
|
|
32386
32708
|
"connector_outlookemail",
|
|
32387
32709
|
"connector_sharepoint"
|
|
32388
|
-
]).describe(
|
|
32710
|
+
]).optional().describe(
|
|
32389
32711
|
"Identifier for service connectors, like those available in ChatGPT. One of\n`server_url` or `connector_id` must be provided. Learn more about service\nconnectors [here](/docs/guides/tools-remote-mcp#connectors).\n\nCurrently supported `connector_id` values are:\n\n- Dropbox: `connector_dropbox`\n- Gmail: `connector_gmail`\n- Google Calendar: `connector_googlecalendar`\n- Google Drive: `connector_googledrive`\n- Microsoft Teams: `connector_microsoftteams`\n- Outlook Calendar: `connector_outlookcalendar`\n- Outlook Email: `connector_outlookemail`\n- SharePoint: `connector_sharepoint`\n"
|
|
32390
32712
|
),
|
|
32391
32713
|
authorization: import_zod6.z.string().optional().describe(
|
|
@@ -32451,6 +32773,13 @@ var createRealtimeClientSecretResponse = import_zod6.z.object({
|
|
|
32451
32773
|
).default(createRealtimeClientSecretResponseSessionToolChoiceDefault).describe(
|
|
32452
32774
|
"How the model chooses tools. Provide one of the string modes or force a specific\nfunction/MCP tool.\n"
|
|
32453
32775
|
),
|
|
32776
|
+
reasoning: import_zod6.z.object({
|
|
32777
|
+
effort: import_zod6.z.enum(["minimal", "low", "medium", "high", "xhigh"]).default(createRealtimeClientSecretResponseSessionReasoningEffortDefault).describe(
|
|
32778
|
+
"Constrains effort on reasoning for reasoning-capable Realtime models such as\n`gpt-realtime-2`.\n"
|
|
32779
|
+
)
|
|
32780
|
+
}).optional().describe(
|
|
32781
|
+
"Configuration for reasoning-capable Realtime models such as `gpt-realtime-2`.\n"
|
|
32782
|
+
),
|
|
32454
32783
|
max_output_tokens: import_zod6.z.number().or(import_zod6.z.enum(["inf"])).optional().describe(
|
|
32455
32784
|
"Maximum number of output tokens for a single assistant response,\ninclusive of tool calls. Provide an integer between 1 and 4096 to\nlimit output tokens, or `inf` for the maximum available tokens for a\ngiven model. Defaults to `inf`.\n"
|
|
32456
32785
|
),
|
|
@@ -32490,7 +32819,7 @@ var createRealtimeClientSecretResponse = import_zod6.z.object({
|
|
|
32490
32819
|
).or(
|
|
32491
32820
|
import_zod6.z.object({
|
|
32492
32821
|
type: import_zod6.z.enum(["input_image"]).describe("The type of the input item. Always `input_image`."),
|
|
32493
|
-
image_url: import_zod6.z.string().describe(
|
|
32822
|
+
image_url: import_zod6.z.string().url().describe(
|
|
32494
32823
|
"The URL of the image to be sent to the model. A fully qualified URL or base64 encoded image in a data URL."
|
|
32495
32824
|
).or(import_zod6.z.null()).optional(),
|
|
32496
32825
|
file_id: import_zod6.z.string().describe("The ID of the file to be sent to the model.").or(import_zod6.z.null()).optional(),
|
|
@@ -32504,8 +32833,8 @@ var createRealtimeClientSecretResponse = import_zod6.z.object({
|
|
|
32504
32833
|
file_id: import_zod6.z.string().describe("The ID of the file to be sent to the model.").or(import_zod6.z.null()).optional(),
|
|
32505
32834
|
filename: import_zod6.z.string().optional().describe("The name of the file to be sent to the model."),
|
|
32506
32835
|
file_data: import_zod6.z.string().optional().describe("The content of the file to be sent to the model.\n"),
|
|
32507
|
-
file_url: import_zod6.z.string().optional().describe("The URL of the file to be sent to the model."),
|
|
32508
|
-
detail: import_zod6.z.enum(["low", "high"])
|
|
32836
|
+
file_url: import_zod6.z.string().url().optional().describe("The URL of the file to be sent to the model."),
|
|
32837
|
+
detail: import_zod6.z.enum(["low", "high"]).optional()
|
|
32509
32838
|
}).describe("A file input to the model.")
|
|
32510
32839
|
)
|
|
32511
32840
|
).describe(
|
|
@@ -32514,9 +32843,7 @@ var createRealtimeClientSecretResponse = import_zod6.z.object({
|
|
|
32514
32843
|
}).describe(
|
|
32515
32844
|
"Reference to a prompt template and its variables.\n[Learn more](/docs/guides/text?api-mode=responses#reusable-prompts).\n"
|
|
32516
32845
|
).or(import_zod6.z.null()).optional()
|
|
32517
|
-
}).describe(
|
|
32518
|
-
"A new Realtime session configuration, with an ephemeral key. Default TTL\nfor keys is one minute.\n"
|
|
32519
|
-
),
|
|
32846
|
+
}).describe("A Realtime session configuration object.\n"),
|
|
32520
32847
|
import_zod6.z.object({
|
|
32521
32848
|
type: import_zod6.z.enum(["transcription"]).describe(
|
|
32522
32849
|
"The type of session. Always `transcription` for transcription sessions.\n"
|
|
@@ -32532,15 +32859,15 @@ var createRealtimeClientSecretResponse = import_zod6.z.object({
|
|
|
32532
32859
|
audio: import_zod6.z.object({
|
|
32533
32860
|
input: import_zod6.z.object({
|
|
32534
32861
|
format: import_zod6.z.object({
|
|
32535
|
-
type: import_zod6.z.enum(["audio/pcm"]).describe("The audio format. Always `audio/pcm`."),
|
|
32536
|
-
rate: import_zod6.z.literal(24e3).describe("The sample rate of the audio. Always `24000`.")
|
|
32862
|
+
type: import_zod6.z.enum(["audio/pcm"]).optional().describe("The audio format. Always `audio/pcm`."),
|
|
32863
|
+
rate: import_zod6.z.literal(24e3).optional().describe("The sample rate of the audio. Always `24000`.")
|
|
32537
32864
|
}).describe("The PCM audio format. Only a 24kHz sample rate is supported.").or(
|
|
32538
32865
|
import_zod6.z.object({
|
|
32539
|
-
type: import_zod6.z.enum(["audio/pcmu"]).describe("The audio format. Always `audio/pcmu`.")
|
|
32866
|
+
type: import_zod6.z.enum(["audio/pcmu"]).optional().describe("The audio format. Always `audio/pcmu`.")
|
|
32540
32867
|
}).describe("The G.711 \u03BC-law format.")
|
|
32541
32868
|
).or(
|
|
32542
32869
|
import_zod6.z.object({
|
|
32543
|
-
type: import_zod6.z.enum(["audio/pcma"]).describe("The audio format. Always `audio/pcma`.")
|
|
32870
|
+
type: import_zod6.z.enum(["audio/pcma"]).optional().describe("The audio format. Always `audio/pcma`.")
|
|
32544
32871
|
}).describe("The G.711 A-law format.")
|
|
32545
32872
|
).optional(),
|
|
32546
32873
|
transcription: import_zod6.z.object({
|
|
@@ -32550,20 +32877,19 @@ var createRealtimeClientSecretResponse = import_zod6.z.object({
|
|
|
32550
32877
|
"gpt-4o-mini-transcribe",
|
|
32551
32878
|
"gpt-4o-mini-transcribe-2025-12-15",
|
|
32552
32879
|
"gpt-4o-transcribe",
|
|
32553
|
-
"gpt-4o-transcribe-diarize"
|
|
32880
|
+
"gpt-4o-transcribe-diarize",
|
|
32881
|
+
"gpt-realtime-whisper"
|
|
32554
32882
|
])
|
|
32555
32883
|
).optional().describe(
|
|
32556
|
-
"The model
|
|
32557
|
-
),
|
|
32558
|
-
language: import_zod6.z.string().optional().describe(
|
|
32559
|
-
"The language of the input audio. Supplying the input language in\n[ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`) format\nwill improve accuracy and latency.\n"
|
|
32884
|
+
"The model used for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`, `gpt-4o-transcribe-diarize`, and `gpt-realtime-whisper`.\n"
|
|
32560
32885
|
),
|
|
32886
|
+
language: import_zod6.z.string().optional().describe("The language of the input audio.\n"),
|
|
32561
32887
|
prompt: import_zod6.z.string().optional().describe(
|
|
32562
|
-
|
|
32888
|
+
"The prompt configured for input audio transcription, when present.\n"
|
|
32563
32889
|
)
|
|
32564
32890
|
}).optional(),
|
|
32565
32891
|
noise_reduction: import_zod6.z.object({
|
|
32566
|
-
type: import_zod6.z.enum(["near_field", "far_field"]).describe(
|
|
32892
|
+
type: import_zod6.z.enum(["near_field", "far_field"]).optional().describe(
|
|
32567
32893
|
"Type of noise reduction. `near_field` is for close-talking microphones such as headphones, `far_field` is for far-field microphones such as laptop or conference room microphones.\n"
|
|
32568
32894
|
)
|
|
32569
32895
|
}).optional().describe("Configuration for input audio noise reduction.\n"),
|
|
@@ -32580,8 +32906,10 @@ var createRealtimeClientSecretResponse = import_zod6.z.object({
|
|
|
32580
32906
|
silence_duration_ms: import_zod6.z.number().optional().describe(
|
|
32581
32907
|
"Duration of silence to detect speech stop (in milliseconds). Defaults\nto 500ms. With shorter values the model will respond more quickly,\nbut may jump in on short pauses from the user.\n"
|
|
32582
32908
|
)
|
|
32583
|
-
}).
|
|
32584
|
-
"Configuration for turn detection. Can be set to `null` to turn off. Server\nVAD means that the model will detect the start and end of speech based on\naudio volume and respond at the end of user speech.\n"
|
|
32909
|
+
}).describe(
|
|
32910
|
+
"Configuration for turn detection. Can be set to `null` to turn off. Server\nVAD means that the model will detect the start and end of speech based on\naudio volume and respond at the end of user speech. For `gpt-realtime-whisper`, this must be `null`; VAD is not supported.\n"
|
|
32911
|
+
).or(import_zod6.z.null()).optional().describe(
|
|
32912
|
+
"Configuration for turn detection. For `gpt-realtime-whisper`, this must be `null`; VAD is not supported.\n"
|
|
32585
32913
|
)
|
|
32586
32914
|
}).optional()
|
|
32587
32915
|
}).optional().describe("Configuration for input audio for the session.\n")
|
|
@@ -32721,7 +33049,7 @@ var createRealtimeSessionBody = import_zod6.z.object({
|
|
|
32721
33049
|
).or(
|
|
32722
33050
|
import_zod6.z.object({
|
|
32723
33051
|
type: import_zod6.z.enum(["input_image"]).describe("The type of the input item. Always `input_image`."),
|
|
32724
|
-
image_url: import_zod6.z.string().describe(
|
|
33052
|
+
image_url: import_zod6.z.string().url().describe(
|
|
32725
33053
|
"The URL of the image to be sent to the model. A fully qualified URL or base64 encoded image in a data URL."
|
|
32726
33054
|
).or(import_zod6.z.null()).optional(),
|
|
32727
33055
|
file_id: import_zod6.z.string().describe("The ID of the file to be sent to the model.").or(import_zod6.z.null()).optional(),
|
|
@@ -32735,7 +33063,7 @@ var createRealtimeSessionBody = import_zod6.z.object({
|
|
|
32735
33063
|
file_id: import_zod6.z.string().describe("The ID of the file to be sent to the model.").or(import_zod6.z.null()).optional(),
|
|
32736
33064
|
filename: import_zod6.z.string().optional().describe("The name of the file to be sent to the model."),
|
|
32737
33065
|
file_data: import_zod6.z.string().optional().describe("The content of the file to be sent to the model.\n"),
|
|
32738
|
-
file_url: import_zod6.z.string().optional().describe("The URL of the file to be sent to the model."),
|
|
33066
|
+
file_url: import_zod6.z.string().url().optional().describe("The URL of the file to be sent to the model."),
|
|
32739
33067
|
detail: import_zod6.z.enum(["low", "high"]).optional()
|
|
32740
33068
|
}).describe("A file input to the model.")
|
|
32741
33069
|
)
|
|
@@ -32784,17 +33112,14 @@ var createRealtimeSessionResponse = import_zod6.z.object({
|
|
|
32784
33112
|
"gpt-4o-mini-transcribe",
|
|
32785
33113
|
"gpt-4o-mini-transcribe-2025-12-15",
|
|
32786
33114
|
"gpt-4o-transcribe",
|
|
32787
|
-
"gpt-4o-transcribe-diarize"
|
|
33115
|
+
"gpt-4o-transcribe-diarize",
|
|
33116
|
+
"gpt-realtime-whisper"
|
|
32788
33117
|
])
|
|
32789
33118
|
).optional().describe(
|
|
32790
|
-
"The model
|
|
32791
|
-
),
|
|
32792
|
-
language: import_zod6.z.string().optional().describe(
|
|
32793
|
-
"The language of the input audio. Supplying the input language in\n[ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`) format\nwill improve accuracy and latency.\n"
|
|
33119
|
+
"The model used for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`, `gpt-4o-transcribe-diarize`, and `gpt-realtime-whisper`.\n"
|
|
32794
33120
|
),
|
|
32795
|
-
|
|
32796
|
-
|
|
32797
|
-
)
|
|
33121
|
+
language: import_zod6.z.string().optional().describe("The language of the input audio.\n"),
|
|
33122
|
+
prompt: import_zod6.z.string().optional().describe("The prompt configured for input audio transcription, when present.\n")
|
|
32798
33123
|
}).optional(),
|
|
32799
33124
|
noise_reduction: import_zod6.z.object({
|
|
32800
33125
|
type: import_zod6.z.enum(["near_field", "far_field"]).optional().describe(
|
|
@@ -32920,16 +33245,20 @@ var createRealtimeTranscriptionSessionBody = import_zod6.z.object({
|
|
|
32920
33245
|
"gpt-4o-mini-transcribe",
|
|
32921
33246
|
"gpt-4o-mini-transcribe-2025-12-15",
|
|
32922
33247
|
"gpt-4o-transcribe",
|
|
32923
|
-
"gpt-4o-transcribe-diarize"
|
|
33248
|
+
"gpt-4o-transcribe-diarize",
|
|
33249
|
+
"gpt-realtime-whisper"
|
|
32924
33250
|
])
|
|
32925
33251
|
).optional().describe(
|
|
32926
|
-
"The model to use for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`,
|
|
33252
|
+
"The model to use for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`, `gpt-4o-transcribe-diarize`, and `gpt-realtime-whisper`. Use `gpt-4o-transcribe-diarize` when you need diarization with speaker labels.\n"
|
|
32927
33253
|
),
|
|
32928
33254
|
language: import_zod6.z.string().optional().describe(
|
|
32929
33255
|
"The language of the input audio. Supplying the input language in\n[ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`) format\nwill improve accuracy and latency.\n"
|
|
32930
33256
|
),
|
|
32931
33257
|
prompt: import_zod6.z.string().optional().describe(
|
|
32932
|
-
'An optional text to guide the model\'s style or continue a previous audio\nsegment.\nFor `whisper-1`, the [prompt is a list of keywords](/docs/guides/speech-to-text#prompting).\nFor `gpt-4o-transcribe` models (excluding `gpt-4o-transcribe-diarize`), the prompt is a free text string, for example "expect words related to technology".\n'
|
|
33258
|
+
'An optional text to guide the model\'s style or continue a previous audio\nsegment.\nFor `whisper-1`, the [prompt is a list of keywords](/docs/guides/speech-to-text#prompting).\nFor `gpt-4o-transcribe` models (excluding `gpt-4o-transcribe-diarize`), the prompt is a free text string, for example "expect words related to technology".\nPrompt is not supported with `gpt-realtime-whisper` in GA Realtime sessions.\n'
|
|
33259
|
+
),
|
|
33260
|
+
delay: import_zod6.z.enum(["minimal", "low", "medium", "high", "xhigh"]).optional().describe(
|
|
33261
|
+
"Controls how long the model waits before emitting transcription text.\nHigher values can improve transcription accuracy at the cost of latency.\nOnly supported with `gpt-realtime-whisper` in GA Realtime sessions.\n"
|
|
32933
33262
|
)
|
|
32934
33263
|
}).optional(),
|
|
32935
33264
|
include: import_zod6.z.array(import_zod6.z.enum(["item.input_audio_transcription.logprobs"])).optional().describe(
|
|
@@ -32958,17 +33287,14 @@ var createRealtimeTranscriptionSessionResponse = import_zod6.z.object({
|
|
|
32958
33287
|
"gpt-4o-mini-transcribe",
|
|
32959
33288
|
"gpt-4o-mini-transcribe-2025-12-15",
|
|
32960
33289
|
"gpt-4o-transcribe",
|
|
32961
|
-
"gpt-4o-transcribe-diarize"
|
|
33290
|
+
"gpt-4o-transcribe-diarize",
|
|
33291
|
+
"gpt-realtime-whisper"
|
|
32962
33292
|
])
|
|
32963
33293
|
).optional().describe(
|
|
32964
|
-
"The model
|
|
33294
|
+
"The model used for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`, `gpt-4o-transcribe-diarize`, and `gpt-realtime-whisper`.\n"
|
|
32965
33295
|
),
|
|
32966
|
-
language: import_zod6.z.string().optional().describe(
|
|
32967
|
-
|
|
32968
|
-
),
|
|
32969
|
-
prompt: import_zod6.z.string().optional().describe(
|
|
32970
|
-
'An optional text to guide the model\'s style or continue a previous audio\nsegment.\nFor `whisper-1`, the [prompt is a list of keywords](/docs/guides/speech-to-text#prompting).\nFor `gpt-4o-transcribe` models (excluding `gpt-4o-transcribe-diarize`), the prompt is a free text string, for example "expect words related to technology".\n'
|
|
32971
|
-
)
|
|
33296
|
+
language: import_zod6.z.string().optional().describe("The language of the input audio.\n"),
|
|
33297
|
+
prompt: import_zod6.z.string().optional().describe("The prompt configured for input audio transcription, when present.\n")
|
|
32972
33298
|
}).optional(),
|
|
32973
33299
|
turn_detection: import_zod6.z.object({
|
|
32974
33300
|
type: import_zod6.z.string().optional().describe("Type of turn detection, only `server_vad` is currently supported.\n"),
|
|
@@ -36353,6 +36679,7 @@ __export(sonioxPublicAPI_zod_exports, {
|
|
|
36353
36679
|
createTranscriptionBodyWebhookUrlRegExpOne: () => createTranscriptionBodyWebhookUrlRegExpOne,
|
|
36354
36680
|
deleteFileParams: () => deleteFileParams,
|
|
36355
36681
|
deleteTranscriptionParams: () => deleteTranscriptionParams,
|
|
36682
|
+
getConcurrencyLimitsResponse: () => getConcurrencyLimitsResponse,
|
|
36356
36683
|
getFileParams: () => getFileParams,
|
|
36357
36684
|
getFileResponse: () => getFileResponse,
|
|
36358
36685
|
getFilesCountResponse: () => getFilesCountResponse,
|
|
@@ -36370,6 +36697,12 @@ __export(sonioxPublicAPI_zod_exports, {
|
|
|
36370
36697
|
getTranscriptionsQueryLimitMax: () => getTranscriptionsQueryLimitMax,
|
|
36371
36698
|
getTranscriptionsQueryParams: () => getTranscriptionsQueryParams,
|
|
36372
36699
|
getTranscriptionsResponse: () => getTranscriptionsResponse,
|
|
36700
|
+
getTtsModelsResponse: () => getTtsModelsResponse,
|
|
36701
|
+
getUsageLogsQueryLimitDefault: () => getUsageLogsQueryLimitDefault,
|
|
36702
|
+
getUsageLogsQueryLimitMax: () => getUsageLogsQueryLimitMax,
|
|
36703
|
+
getUsageLogsQueryParams: () => getUsageLogsQueryParams,
|
|
36704
|
+
getUsageLogsQuerySortDefault: () => getUsageLogsQuerySortDefault,
|
|
36705
|
+
getUsageLogsResponse: () => getUsageLogsResponse,
|
|
36373
36706
|
uploadFileBody: () => uploadFileBody,
|
|
36374
36707
|
uploadFileBodyClientReferenceIdMaxOne: () => uploadFileBodyClientReferenceIdMaxOne
|
|
36375
36708
|
});
|
|
@@ -36620,11 +36953,73 @@ var getModelsResponse = import_zod10.z.object({
|
|
|
36620
36953
|
})
|
|
36621
36954
|
).describe("List of available models and their attributes.")
|
|
36622
36955
|
});
|
|
36956
|
+
var getTtsModelsResponse = import_zod10.z.object({
|
|
36957
|
+
models: import_zod10.z.array(
|
|
36958
|
+
import_zod10.z.object({
|
|
36959
|
+
id: import_zod10.z.string().describe("Unique identifier of the model."),
|
|
36960
|
+
aliased_model_id: import_zod10.z.string().or(import_zod10.z.null()).describe("If this is an alias, the id of the aliased model."),
|
|
36961
|
+
name: import_zod10.z.string().describe("Name of the model."),
|
|
36962
|
+
voices: import_zod10.z.array(
|
|
36963
|
+
import_zod10.z.object({
|
|
36964
|
+
id: import_zod10.z.string().describe("Unique identifier of the voice."),
|
|
36965
|
+
description: import_zod10.z.string().describe("Description of the TTS voice."),
|
|
36966
|
+
gender: import_zod10.z.enum(["male", "female", "neutral"])
|
|
36967
|
+
})
|
|
36968
|
+
).describe("List of available voices for this model."),
|
|
36969
|
+
languages: import_zod10.z.array(
|
|
36970
|
+
import_zod10.z.object({
|
|
36971
|
+
code: import_zod10.z.string().describe("2-letter language code."),
|
|
36972
|
+
name: import_zod10.z.string().describe("Language name.")
|
|
36973
|
+
})
|
|
36974
|
+
).describe("List of languages supported by the model.")
|
|
36975
|
+
})
|
|
36976
|
+
).describe("List of available TTS models and their attributes.")
|
|
36977
|
+
});
|
|
36978
|
+
var getUsageLogsQueryLimitDefault = 1e3;
|
|
36979
|
+
var getUsageLogsQueryLimitMax = 1e3;
|
|
36980
|
+
var getUsageLogsQuerySortDefault = "end_time_asc";
|
|
36981
|
+
var getUsageLogsQueryParams = import_zod10.z.object({
|
|
36982
|
+
start_time: import_zod10.z.string().describe("Start of the time window (inclusive). Filters by request end time."),
|
|
36983
|
+
end_time: import_zod10.z.string().describe("End of the time window (exclusive). Filters by request end time."),
|
|
36984
|
+
limit: import_zod10.z.number().min(1).max(getUsageLogsQueryLimitMax).default(getUsageLogsQueryLimitDefault).describe("Maximum number of usage log entries to return."),
|
|
36985
|
+
sort: import_zod10.z.enum(["end_time_asc", "end_time_desc"]).default(getUsageLogsQuerySortDefault).describe(
|
|
36986
|
+
"Sort order by end_time.Use `end_time_desc` to get the most recent entries first. When paginating, pass the same `sort` value alongside the cursor."
|
|
36987
|
+
),
|
|
36988
|
+
cursor: import_zod10.z.string().or(import_zod10.z.null()).optional().describe("Pagination cursor for the next page of results.")
|
|
36989
|
+
});
|
|
36990
|
+
var getUsageLogsResponse = import_zod10.z.object({
|
|
36991
|
+
usage_logs: import_zod10.z.array(
|
|
36992
|
+
import_zod10.z.object({
|
|
36993
|
+
uuid: import_zod10.z.string().uuid().describe("Unique identifier of the request."),
|
|
36994
|
+
request_scope: import_zod10.z.string().describe("Scope of the request (api / playground)."),
|
|
36995
|
+
client_reference_id: import_zod10.z.string().describe("Client reference ID supplied on the original request. Empty string if none."),
|
|
36996
|
+
model: import_zod10.z.string().describe("Model identifier."),
|
|
36997
|
+
start_time: import_zod10.z.string().datetime({}).describe("When the request started."),
|
|
36998
|
+
end_time: import_zod10.z.string().datetime({}).describe("When the request ended."),
|
|
36999
|
+
input_text_tokens: import_zod10.z.number(),
|
|
37000
|
+
input_audio_tokens: import_zod10.z.number(),
|
|
37001
|
+
input_audio_duration_ms: import_zod10.z.number(),
|
|
37002
|
+
output_text_tokens: import_zod10.z.number(),
|
|
37003
|
+
output_audio_tokens: import_zod10.z.number(),
|
|
37004
|
+
output_audio_duration_ms: import_zod10.z.number(),
|
|
37005
|
+
cost_usd: import_zod10.z.string(),
|
|
37006
|
+
input_cost_usd: import_zod10.z.string(),
|
|
37007
|
+
input_text_cost_usd: import_zod10.z.string(),
|
|
37008
|
+
input_audio_cost_usd: import_zod10.z.string(),
|
|
37009
|
+
output_cost_usd: import_zod10.z.string(),
|
|
37010
|
+
output_text_cost_usd: import_zod10.z.string(),
|
|
37011
|
+
output_audio_cost_usd: import_zod10.z.string()
|
|
37012
|
+
})
|
|
37013
|
+
).describe("Per-request usage log entries ordered by end_time, uuid (per `sort`)."),
|
|
37014
|
+
next_page_cursor: import_zod10.z.string().or(import_zod10.z.null()).optional().describe(
|
|
37015
|
+
"A pagination token that references the next page of results. When more data is available, this field contains a value to pass in the cursor parameter of a subsequent request. When null, no additional results are available."
|
|
37016
|
+
)
|
|
37017
|
+
});
|
|
36623
37018
|
var createTemporaryApiKeyBodyExpiresInSecondsMax = 3600;
|
|
36624
37019
|
var createTemporaryApiKeyBodyClientReferenceIdMaxOne = 256;
|
|
36625
37020
|
var createTemporaryApiKeyBodyMaxSessionDurationSecondsMaxOne = 18e3;
|
|
36626
37021
|
var createTemporaryApiKeyBody = import_zod10.z.object({
|
|
36627
|
-
usage_type: import_zod10.z.enum(["transcribe_websocket"]),
|
|
37022
|
+
usage_type: import_zod10.z.enum(["transcribe_websocket", "tts_rt"]),
|
|
36628
37023
|
expires_in_seconds: import_zod10.z.number().min(1).max(createTemporaryApiKeyBodyExpiresInSecondsMax).describe("Duration in seconds until the temporary API key expires."),
|
|
36629
37024
|
client_reference_id: import_zod10.z.string().max(createTemporaryApiKeyBodyClientReferenceIdMaxOne).or(import_zod10.z.null()).optional().describe("Optional tracking identifier string. Does not need to be unique."),
|
|
36630
37025
|
single_use: import_zod10.z.boolean().or(import_zod10.z.null()).optional().describe("If true, the temporary API key can be used only once."),
|
|
@@ -36632,6 +37027,28 @@ var createTemporaryApiKeyBody = import_zod10.z.object({
|
|
|
36632
37027
|
"Maximum WebSocket connection duration in seconds. If exceeded, the connection will be dropped. If not set, no limit is applied."
|
|
36633
37028
|
)
|
|
36634
37029
|
});
|
|
37030
|
+
var getConcurrencyLimitsResponse = import_zod10.z.object({
|
|
37031
|
+
project: import_zod10.z.object({
|
|
37032
|
+
current: import_zod10.z.object({
|
|
37033
|
+
transcribe_concurrent: import_zod10.z.number(),
|
|
37034
|
+
tts_concurrent: import_zod10.z.number()
|
|
37035
|
+
}).describe("Live counts read from Redis"),
|
|
37036
|
+
limits: import_zod10.z.object({
|
|
37037
|
+
transcribe_concurrent: import_zod10.z.number().or(import_zod10.z.null()),
|
|
37038
|
+
tts_concurrent: import_zod10.z.number().or(import_zod10.z.null())
|
|
37039
|
+
}).describe("Configured limits")
|
|
37040
|
+
}),
|
|
37041
|
+
organization: import_zod10.z.object({
|
|
37042
|
+
current: import_zod10.z.object({
|
|
37043
|
+
transcribe_concurrent: import_zod10.z.number(),
|
|
37044
|
+
tts_concurrent: import_zod10.z.number()
|
|
37045
|
+
}).describe("Live counts read from Redis"),
|
|
37046
|
+
limits: import_zod10.z.object({
|
|
37047
|
+
transcribe_concurrent: import_zod10.z.number().or(import_zod10.z.null()),
|
|
37048
|
+
tts_concurrent: import_zod10.z.number().or(import_zod10.z.null())
|
|
37049
|
+
}).describe("Configured limits")
|
|
37050
|
+
})
|
|
37051
|
+
});
|
|
36635
37052
|
|
|
36636
37053
|
// src/generated/soniox/streaming-types.zod.ts
|
|
36637
37054
|
var streaming_types_zod_exports = {};
|
|
@@ -36716,10 +37133,10 @@ var sonioxStructuredContextSchema = import_zod11.z.object({
|
|
|
36716
37133
|
var sonioxContextSchema = import_zod11.z.union([sonioxStructuredContextSchema, import_zod11.z.string()]);
|
|
36717
37134
|
var sonioxRealtimeModelSchema = import_zod11.z.enum([
|
|
36718
37135
|
"stt-rt-v4",
|
|
36719
|
-
"stt-rt-v3",
|
|
36720
37136
|
"stt-rt-preview",
|
|
36721
37137
|
"stt-rt-v3-preview",
|
|
36722
|
-
"stt-rt-preview-v2"
|
|
37138
|
+
"stt-rt-preview-v2",
|
|
37139
|
+
"stt-rt-v3"
|
|
36723
37140
|
]);
|
|
36724
37141
|
var streamingTranscriberParams3 = import_zod11.z.object({
|
|
36725
37142
|
model: sonioxRealtimeModelSchema,
|
|
@@ -36727,12 +37144,16 @@ var streamingTranscriberParams3 = import_zod11.z.object({
|
|
|
36727
37144
|
sampleRate: import_zod11.z.number().optional(),
|
|
36728
37145
|
numChannels: import_zod11.z.number().optional(),
|
|
36729
37146
|
languageHints: import_zod11.z.array(import_zod11.z.string()).optional(),
|
|
37147
|
+
languageHintsStrict: import_zod11.z.boolean().optional(),
|
|
36730
37148
|
context: sonioxContextSchema.optional(),
|
|
36731
37149
|
enableSpeakerDiarization: import_zod11.z.boolean().optional(),
|
|
36732
37150
|
enableLanguageIdentification: import_zod11.z.boolean().optional(),
|
|
36733
37151
|
enableEndpointDetection: import_zod11.z.boolean().optional(),
|
|
37152
|
+
maxEndpointDelayMs: import_zod11.z.number().optional(),
|
|
36734
37153
|
translation: sonioxTranslationConfigSchema.optional(),
|
|
36735
|
-
clientReferenceId: import_zod11.z.string().optional()
|
|
37154
|
+
clientReferenceId: import_zod11.z.string().optional(),
|
|
37155
|
+
keepaliveIntervalMs: import_zod11.z.number().optional(),
|
|
37156
|
+
connectTimeoutMs: import_zod11.z.number().optional()
|
|
36736
37157
|
});
|
|
36737
37158
|
var sonioxTranslationStatusSchema = import_zod11.z.enum(["original", "translation", "none"]);
|
|
36738
37159
|
var sonioxTokenSchema = import_zod11.z.object({
|
|
@@ -37324,6 +37745,7 @@ __export(schema_exports5, {
|
|
|
37324
37745
|
V1ListenPostParametersCallbackMethod: () => V1ListenPostParametersCallbackMethod,
|
|
37325
37746
|
V1ListenPostParametersCustomIntentMode: () => V1ListenPostParametersCustomIntentMode,
|
|
37326
37747
|
V1ListenPostParametersCustomTopicMode: () => V1ListenPostParametersCustomTopicMode,
|
|
37748
|
+
V1ListenPostParametersDiarizeModel: () => V1ListenPostParametersDiarizeModel,
|
|
37327
37749
|
V1ListenPostParametersEncoding: () => V1ListenPostParametersEncoding,
|
|
37328
37750
|
V1ListenPostParametersModel0: () => V1ListenPostParametersModel0,
|
|
37329
37751
|
V1ListenPostParametersRedactSchemaOneOf1Items: () => V1ListenPostParametersRedactSchemaOneOf1Items,
|
|
@@ -37362,6 +37784,13 @@ __export(schema_exports5, {
|
|
|
37362
37784
|
V1SpeakPostParametersSampleRate4: () => V1SpeakPostParametersSampleRate4
|
|
37363
37785
|
});
|
|
37364
37786
|
|
|
37787
|
+
// src/generated/deepgram/schema/v1ListenPostParametersDiarizeModel.ts
|
|
37788
|
+
var V1ListenPostParametersDiarizeModel = {
|
|
37789
|
+
latest: "latest",
|
|
37790
|
+
v1: "v1",
|
|
37791
|
+
v2: "v2"
|
|
37792
|
+
};
|
|
37793
|
+
|
|
37365
37794
|
// src/generated/deepgram/schema/v1ListenPostParametersModel0.ts
|
|
37366
37795
|
var V1ListenPostParametersModel0 = {
|
|
37367
37796
|
"nova-3": "nova-3",
|
|
@@ -37578,6 +38007,7 @@ var V1SpeakPostParametersSampleRate = {
|
|
|
37578
38007
|
var schema_exports6 = {};
|
|
37579
38008
|
__export(schema_exports6, {
|
|
37580
38009
|
AudioResponseFormat: () => AudioResponseFormat,
|
|
38010
|
+
AudioTranscriptionDelay: () => AudioTranscriptionDelay,
|
|
37581
38011
|
CreateSpeechRequestResponseFormat: () => CreateSpeechRequestResponseFormat,
|
|
37582
38012
|
CreateSpeechRequestStreamFormat: () => CreateSpeechRequestStreamFormat,
|
|
37583
38013
|
CreateTranscriptionRequestTimestampGranularitiesItem: () => CreateTranscriptionRequestTimestampGranularitiesItem,
|
|
@@ -37597,12 +38027,14 @@ __export(schema_exports6, {
|
|
|
37597
38027
|
RealtimeAudioFormatsAnyOfType: () => RealtimeAudioFormatsAnyOfType,
|
|
37598
38028
|
RealtimeCreateClientSecretRequestExpiresAfterAnchor: () => RealtimeCreateClientSecretRequestExpiresAfterAnchor,
|
|
37599
38029
|
RealtimeFunctionToolType: () => RealtimeFunctionToolType,
|
|
38030
|
+
RealtimeReasoningEffort: () => RealtimeReasoningEffort,
|
|
37600
38031
|
RealtimeSessionCreateRequestGAIncludeItem: () => RealtimeSessionCreateRequestGAIncludeItem,
|
|
37601
38032
|
RealtimeSessionCreateRequestGAOutputModalitiesItem: () => RealtimeSessionCreateRequestGAOutputModalitiesItem,
|
|
37602
38033
|
RealtimeSessionCreateRequestGAType: () => RealtimeSessionCreateRequestGAType,
|
|
37603
38034
|
RealtimeSessionCreateRequestModalitiesItem: () => RealtimeSessionCreateRequestModalitiesItem,
|
|
37604
38035
|
RealtimeSessionCreateRequestToolsItemType: () => RealtimeSessionCreateRequestToolsItemType,
|
|
37605
38036
|
RealtimeSessionCreateResponseGAIncludeItem: () => RealtimeSessionCreateResponseGAIncludeItem,
|
|
38037
|
+
RealtimeSessionCreateResponseGAObject: () => RealtimeSessionCreateResponseGAObject,
|
|
37606
38038
|
RealtimeSessionCreateResponseGAOutputModalitiesItem: () => RealtimeSessionCreateResponseGAOutputModalitiesItem,
|
|
37607
38039
|
RealtimeSessionCreateResponseGAType: () => RealtimeSessionCreateResponseGAType,
|
|
37608
38040
|
RealtimeSessionCreateResponseIncludeItem: () => RealtimeSessionCreateResponseIncludeItem,
|
|
@@ -37633,6 +38065,15 @@ __export(schema_exports6, {
|
|
|
37633
38065
|
VoiceResourceObject: () => VoiceResourceObject
|
|
37634
38066
|
});
|
|
37635
38067
|
|
|
38068
|
+
// src/generated/openai/schema/audioTranscriptionDelay.ts
|
|
38069
|
+
var AudioTranscriptionDelay = {
|
|
38070
|
+
minimal: "minimal",
|
|
38071
|
+
low: "low",
|
|
38072
|
+
medium: "medium",
|
|
38073
|
+
high: "high",
|
|
38074
|
+
xhigh: "xhigh"
|
|
38075
|
+
};
|
|
38076
|
+
|
|
37636
38077
|
// src/generated/openai/schema/createSpeechRequestResponseFormat.ts
|
|
37637
38078
|
var CreateSpeechRequestResponseFormat = {
|
|
37638
38079
|
mp3: "mp3",
|
|
@@ -37745,6 +38186,15 @@ var RealtimeFunctionToolType = {
|
|
|
37745
38186
|
function: "function"
|
|
37746
38187
|
};
|
|
37747
38188
|
|
|
38189
|
+
// src/generated/openai/schema/realtimeReasoningEffort.ts
|
|
38190
|
+
var RealtimeReasoningEffort = {
|
|
38191
|
+
minimal: "minimal",
|
|
38192
|
+
low: "low",
|
|
38193
|
+
medium: "medium",
|
|
38194
|
+
high: "high",
|
|
38195
|
+
xhigh: "xhigh"
|
|
38196
|
+
};
|
|
38197
|
+
|
|
37748
38198
|
// src/generated/openai/schema/realtimeSessionCreateRequestGAIncludeItem.ts
|
|
37749
38199
|
var RealtimeSessionCreateRequestGAIncludeItem = {
|
|
37750
38200
|
iteminput_audio_transcriptionlogprobs: "item.input_audio_transcription.logprobs"
|
|
@@ -37777,6 +38227,11 @@ var RealtimeSessionCreateResponseGAIncludeItem = {
|
|
|
37777
38227
|
iteminput_audio_transcriptionlogprobs: "item.input_audio_transcription.logprobs"
|
|
37778
38228
|
};
|
|
37779
38229
|
|
|
38230
|
+
// src/generated/openai/schema/realtimeSessionCreateResponseGAObject.ts
|
|
38231
|
+
var RealtimeSessionCreateResponseGAObject = {
|
|
38232
|
+
realtimesession: "realtime.session"
|
|
38233
|
+
};
|
|
38234
|
+
|
|
37780
38235
|
// src/generated/openai/schema/realtimeSessionCreateResponseGAOutputModalitiesItem.ts
|
|
37781
38236
|
var RealtimeSessionCreateResponseGAOutputModalitiesItem = {
|
|
37782
38237
|
text: "text",
|
|
@@ -37921,6 +38376,7 @@ __export(schema_exports7, {
|
|
|
37921
38376
|
AutoChaptersResultErrorType: () => AutoChaptersResultErrorType,
|
|
37922
38377
|
ErrorResponseError: () => ErrorResponseError,
|
|
37923
38378
|
GetJobsJobidAlignmentTags: () => GetJobsJobidAlignmentTags,
|
|
38379
|
+
GetJobsJobidObjectUrlsUrlForItem: () => GetJobsJobidObjectUrlsUrlForItem,
|
|
37924
38380
|
GetJobsJobidTranscriptFormat: () => GetJobsJobidTranscriptFormat,
|
|
37925
38381
|
JobDetailsStatus: () => JobDetailsStatus,
|
|
37926
38382
|
JobMode: () => JobMode,
|
|
@@ -37990,6 +38446,13 @@ var GetJobsJobidAlignmentTags = {
|
|
|
37990
38446
|
one_per_line: "one_per_line"
|
|
37991
38447
|
};
|
|
37992
38448
|
|
|
38449
|
+
// src/generated/speechmatics/schema/getJobsJobidObjectUrlsUrlForItem.ts
|
|
38450
|
+
var GetJobsJobidObjectUrlsUrlForItem = {
|
|
38451
|
+
data: "data",
|
|
38452
|
+
audio_mp3: "audio_mp3",
|
|
38453
|
+
transcript: "transcript"
|
|
38454
|
+
};
|
|
38455
|
+
|
|
37993
38456
|
// src/generated/speechmatics/schema/getJobsJobidTranscriptFormat.ts
|
|
37994
38457
|
var GetJobsJobidTranscriptFormat = {
|
|
37995
38458
|
"json-v2": "json-v2",
|
|
@@ -38106,6 +38569,15 @@ var WrittenFormRecognitionResultType = {
|
|
|
38106
38569
|
word: "word"
|
|
38107
38570
|
};
|
|
38108
38571
|
|
|
38572
|
+
// src/generated/soniox/sdk-types.ts
|
|
38573
|
+
var sdk_types_exports = {};
|
|
38574
|
+
__export(sdk_types_exports, {
|
|
38575
|
+
RealtimeSttSession: () => import_node.RealtimeSttSession,
|
|
38576
|
+
SonioxFetchHttpClient: () => import_node.FetchHttpClient,
|
|
38577
|
+
SonioxNodeClient: () => import_node.SonioxNodeClient
|
|
38578
|
+
});
|
|
38579
|
+
var import_node = require("@soniox/node");
|
|
38580
|
+
|
|
38109
38581
|
// src/generated/elevenlabs/schema/index.ts
|
|
38110
38582
|
var schema_exports8 = {};
|
|
38111
38583
|
__export(schema_exports8, {
|
|
@@ -38183,6 +38655,10 @@ __export(speechmaticsASRRESTAPI_zod_exports, {
|
|
|
38183
38655
|
deleteJobsJobidParams: () => deleteJobsJobidParams,
|
|
38184
38656
|
deleteJobsJobidQueryParams: () => deleteJobsJobidQueryParams,
|
|
38185
38657
|
deleteJobsJobidResponse: () => deleteJobsJobidResponse,
|
|
38658
|
+
deleteJobsJobidResponseJobConfigSummarizationConfigContentTypeDefault: () => deleteJobsJobidResponseJobConfigSummarizationConfigContentTypeDefault,
|
|
38659
|
+
deleteJobsJobidResponseJobConfigSummarizationConfigSummaryLengthDefault: () => deleteJobsJobidResponseJobConfigSummarizationConfigSummaryLengthDefault,
|
|
38660
|
+
deleteJobsJobidResponseJobConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMax: () => deleteJobsJobidResponseJobConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMax,
|
|
38661
|
+
deleteJobsJobidResponseJobConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMin: () => deleteJobsJobidResponseJobConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMin,
|
|
38186
38662
|
deleteJobsJobidResponseJobConfigTranscriptionConfigChannelDiarizationLabelsItemRegExp: () => deleteJobsJobidResponseJobConfigTranscriptionConfigChannelDiarizationLabelsItemRegExp,
|
|
38187
38663
|
deleteJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesPermittedMarksItemRegExp: () => deleteJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesPermittedMarksItemRegExp,
|
|
38188
38664
|
deleteJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesSensitivityMax: () => deleteJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesSensitivityMax,
|
|
@@ -38198,8 +38674,15 @@ __export(speechmaticsASRRESTAPI_zod_exports, {
|
|
|
38198
38674
|
getJobsJobidDataResponse: () => getJobsJobidDataResponse,
|
|
38199
38675
|
getJobsJobidLogParams: () => getJobsJobidLogParams,
|
|
38200
38676
|
getJobsJobidLogResponse: () => getJobsJobidLogResponse,
|
|
38677
|
+
getJobsJobidObjectUrlsParams: () => getJobsJobidObjectUrlsParams,
|
|
38678
|
+
getJobsJobidObjectUrlsQueryParams: () => getJobsJobidObjectUrlsQueryParams,
|
|
38679
|
+
getJobsJobidObjectUrlsResponse: () => getJobsJobidObjectUrlsResponse,
|
|
38201
38680
|
getJobsJobidParams: () => getJobsJobidParams,
|
|
38202
38681
|
getJobsJobidResponse: () => getJobsJobidResponse,
|
|
38682
|
+
getJobsJobidResponseJobConfigSummarizationConfigContentTypeDefault: () => getJobsJobidResponseJobConfigSummarizationConfigContentTypeDefault,
|
|
38683
|
+
getJobsJobidResponseJobConfigSummarizationConfigSummaryLengthDefault: () => getJobsJobidResponseJobConfigSummarizationConfigSummaryLengthDefault,
|
|
38684
|
+
getJobsJobidResponseJobConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMax: () => getJobsJobidResponseJobConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMax,
|
|
38685
|
+
getJobsJobidResponseJobConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMin: () => getJobsJobidResponseJobConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMin,
|
|
38203
38686
|
getJobsJobidResponseJobConfigTranscriptionConfigChannelDiarizationLabelsItemRegExp: () => getJobsJobidResponseJobConfigTranscriptionConfigChannelDiarizationLabelsItemRegExp,
|
|
38204
38687
|
getJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesPermittedMarksItemRegExp: () => getJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesPermittedMarksItemRegExp,
|
|
38205
38688
|
getJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesSensitivityMax: () => getJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesSensitivityMax,
|
|
@@ -38214,6 +38697,8 @@ __export(speechmaticsASRRESTAPI_zod_exports, {
|
|
|
38214
38697
|
getJobsJobidTranscriptQueryParams: () => getJobsJobidTranscriptQueryParams,
|
|
38215
38698
|
getJobsJobidTranscriptResponse: () => getJobsJobidTranscriptResponse,
|
|
38216
38699
|
getJobsJobidTranscriptResponseJobDurationMin: () => getJobsJobidTranscriptResponseJobDurationMin,
|
|
38700
|
+
getJobsJobidTranscriptResponseMetadataTranscriptionConfigAudioFilteringConfigVolumeThresholdMax: () => getJobsJobidTranscriptResponseMetadataTranscriptionConfigAudioFilteringConfigVolumeThresholdMax,
|
|
38701
|
+
getJobsJobidTranscriptResponseMetadataTranscriptionConfigAudioFilteringConfigVolumeThresholdMin: () => getJobsJobidTranscriptResponseMetadataTranscriptionConfigAudioFilteringConfigVolumeThresholdMin,
|
|
38217
38702
|
getJobsJobidTranscriptResponseMetadataTranscriptionConfigChannelDiarizationLabelsItemRegExp: () => getJobsJobidTranscriptResponseMetadataTranscriptionConfigChannelDiarizationLabelsItemRegExp,
|
|
38218
38703
|
getJobsJobidTranscriptResponseMetadataTranscriptionConfigPunctuationOverridesPermittedMarksItemRegExp: () => getJobsJobidTranscriptResponseMetadataTranscriptionConfigPunctuationOverridesPermittedMarksItemRegExp,
|
|
38219
38704
|
getJobsJobidTranscriptResponseMetadataTranscriptionConfigPunctuationOverridesSensitivityMax: () => getJobsJobidTranscriptResponseMetadataTranscriptionConfigPunctuationOverridesSensitivityMax,
|
|
@@ -38225,6 +38710,10 @@ __export(speechmaticsASRRESTAPI_zod_exports, {
|
|
|
38225
38710
|
getJobsQueryLimitMax: () => getJobsQueryLimitMax,
|
|
38226
38711
|
getJobsQueryParams: () => getJobsQueryParams,
|
|
38227
38712
|
getJobsResponse: () => getJobsResponse,
|
|
38713
|
+
getJobsResponseJobsItemConfigSummarizationConfigContentTypeDefault: () => getJobsResponseJobsItemConfigSummarizationConfigContentTypeDefault,
|
|
38714
|
+
getJobsResponseJobsItemConfigSummarizationConfigSummaryLengthDefault: () => getJobsResponseJobsItemConfigSummarizationConfigSummaryLengthDefault,
|
|
38715
|
+
getJobsResponseJobsItemConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMax: () => getJobsResponseJobsItemConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMax,
|
|
38716
|
+
getJobsResponseJobsItemConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMin: () => getJobsResponseJobsItemConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMin,
|
|
38228
38717
|
getJobsResponseJobsItemConfigTranscriptionConfigChannelDiarizationLabelsItemRegExp: () => getJobsResponseJobsItemConfigTranscriptionConfigChannelDiarizationLabelsItemRegExp,
|
|
38229
38718
|
getJobsResponseJobsItemConfigTranscriptionConfigPunctuationOverridesPermittedMarksItemRegExp: () => getJobsResponseJobsItemConfigTranscriptionConfigPunctuationOverridesPermittedMarksItemRegExp,
|
|
38230
38719
|
getJobsResponseJobsItemConfigTranscriptionConfigPunctuationOverridesSensitivityMax: () => getJobsResponseJobsItemConfigTranscriptionConfigPunctuationOverridesSensitivityMax,
|
|
@@ -38235,12 +38724,18 @@ __export(speechmaticsASRRESTAPI_zod_exports, {
|
|
|
38235
38724
|
getJobsResponseJobsItemDurationMin: () => getJobsResponseJobsItemDurationMin,
|
|
38236
38725
|
getUsageQueryParams: () => getUsageQueryParams,
|
|
38237
38726
|
getUsageResponse: () => getUsageResponse,
|
|
38238
|
-
postJobsBody: () => postJobsBody
|
|
38727
|
+
postJobsBody: () => postJobsBody,
|
|
38728
|
+
postJobsHeader: () => postJobsHeader
|
|
38239
38729
|
});
|
|
38240
38730
|
var import_zod12 = require("zod");
|
|
38731
|
+
var postJobsHeader = import_zod12.z.object({
|
|
38732
|
+
"X-SM-Processing-Data": import_zod12.z.string().optional().describe(
|
|
38733
|
+
'**Note**: Only available for on-prem\nJSON dictionary of processing settings for the job worker. Currently supports `parallel_engines` (integer), which controls the number of engines the worker can use in parallel for this job, and `user_id` (string), which is the user id for this job. Example: `{"parallel_engines": 4}`'
|
|
38734
|
+
)
|
|
38735
|
+
});
|
|
38241
38736
|
var postJobsBody = import_zod12.z.object({
|
|
38242
38737
|
config: import_zod12.z.string().describe(
|
|
38243
|
-
"JSON containing a `JobConfig` model indicating the type and parameters for the recognition job."
|
|
38738
|
+
"JSON containing a [`JobConfig`](/speech-to-text/batch/input#jobconfig-schema) model indicating the type and parameters for the recognition job."
|
|
38244
38739
|
),
|
|
38245
38740
|
data_file: import_zod12.z.instanceof(File).optional().describe(
|
|
38246
38741
|
"The data file to be processed. Alternatively the data file can be fetched from a url specified in `JobConfig`."
|
|
@@ -38262,9 +38757,13 @@ var getJobsResponseJobsItemConfigTranscriptionConfigPunctuationOverridesSensitiv
|
|
|
38262
38757
|
var getJobsResponseJobsItemConfigTranscriptionConfigPunctuationOverridesSensitivityMax = 1;
|
|
38263
38758
|
var getJobsResponseJobsItemConfigTranscriptionConfigPunctuationOverridesPermittedMarksItemRegExp = /^(.|all)$/;
|
|
38264
38759
|
var getJobsResponseJobsItemConfigTranscriptionConfigChannelDiarizationLabelsItemRegExp = /^[A-Za-z0-9._]+$/;
|
|
38760
|
+
var getJobsResponseJobsItemConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMin = 0;
|
|
38761
|
+
var getJobsResponseJobsItemConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMax = 100;
|
|
38265
38762
|
var getJobsResponseJobsItemConfigTranscriptionConfigSpeakerDiarizationConfigSpeakerSensitivityMin = 0;
|
|
38266
38763
|
var getJobsResponseJobsItemConfigTranscriptionConfigSpeakerDiarizationConfigSpeakerSensitivityMax = 1;
|
|
38267
38764
|
var getJobsResponseJobsItemConfigTranslationConfigTargetLanguagesMax = 5;
|
|
38765
|
+
var getJobsResponseJobsItemConfigSummarizationConfigContentTypeDefault = "auto";
|
|
38766
|
+
var getJobsResponseJobsItemConfigSummarizationConfigSummaryLengthDefault = "brief";
|
|
38268
38767
|
var getJobsResponse = import_zod12.z.object({
|
|
38269
38768
|
jobs: import_zod12.z.array(
|
|
38270
38769
|
import_zod12.z.object({
|
|
@@ -38344,19 +38843,30 @@ var getJobsResponse = import_zod12.z.object({
|
|
|
38344
38843
|
max_delay_mode: import_zod12.z.enum(["fixed", "flexible"]).optional().describe(
|
|
38345
38844
|
"Whether or not to enable flexible endpointing and allow the entity to continue to be spoken."
|
|
38346
38845
|
),
|
|
38846
|
+
audio_filtering_config: import_zod12.z.object({
|
|
38847
|
+
volume_threshold: import_zod12.z.number().min(
|
|
38848
|
+
getJobsResponseJobsItemConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMin
|
|
38849
|
+
).max(
|
|
38850
|
+
getJobsResponseJobsItemConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMax
|
|
38851
|
+
).optional().describe(
|
|
38852
|
+
"Controls the lower limit of audio volume at which speech and audio events will be transcribed. If the volume limit is very low, then most sound will be passed to the speech recognition engine. Higher numbers will cut out increasing amounts of sound."
|
|
38853
|
+
)
|
|
38854
|
+
}).optional().describe("Configuration for limiting the transcription of quiet audio."),
|
|
38347
38855
|
transcript_filtering_config: import_zod12.z.object({
|
|
38348
38856
|
remove_disfluencies: import_zod12.z.boolean().optional().describe(
|
|
38349
|
-
"If true, words
|
|
38857
|
+
"If true, words identified as disfluencies (e.g., 'um', 'uh') will be removed from the transcript. If false (default), they are tagged in the transcript as 'disfluency'."
|
|
38350
38858
|
),
|
|
38351
38859
|
replacements: import_zod12.z.array(
|
|
38352
38860
|
import_zod12.z.object({
|
|
38353
|
-
from: import_zod12.z.string(),
|
|
38354
|
-
to: import_zod12.z.string()
|
|
38861
|
+
from: import_zod12.z.string().describe("The text or pattern identified to be replaced."),
|
|
38862
|
+
to: import_zod12.z.string().describe(
|
|
38863
|
+
"The corrected or formatted string to appear in the transcript."
|
|
38864
|
+
)
|
|
38355
38865
|
})
|
|
38356
38866
|
).optional().describe(
|
|
38357
|
-
|
|
38867
|
+
'An array of objects defining custom replacements. Each replacement contains a pair of strings: the text to find ("from:") and the text to replace it with ("to:").'
|
|
38358
38868
|
)
|
|
38359
|
-
}).optional().describe("Configuration for applying filtering to the transcription"),
|
|
38869
|
+
}).optional().describe("Configuration for applying filtering to the transcription."),
|
|
38360
38870
|
speaker_diarization_config: import_zod12.z.object({
|
|
38361
38871
|
prefer_current_speaker: import_zod12.z.boolean().optional().describe(
|
|
38362
38872
|
'If true, the algorithm will prefer to stay with the current active speaker if it is a close enough match, even if other speakers may be closer. This is useful for cases where we can flip incorrectly between similar speakers during a single speaker section."'
|
|
@@ -38367,6 +38877,19 @@ var getJobsResponse = import_zod12.z.object({
|
|
|
38367
38877
|
getJobsResponseJobsItemConfigTranscriptionConfigSpeakerDiarizationConfigSpeakerSensitivityMax
|
|
38368
38878
|
).optional().describe(
|
|
38369
38879
|
"Controls how sensitive the algorithm is in terms of keeping similar speakers separate, as opposed to combining them into a single speaker. Higher values will typically lead to more speakers, as the degree of difference between speakers in order to allow them to remain distinct will be lower. A lower value for this parameter will conversely guide the algorithm towards being less sensitive in terms of retaining similar speakers, and as such may lead to fewer speakers overall. The default is 0.5."
|
|
38880
|
+
),
|
|
38881
|
+
get_speakers: import_zod12.z.boolean().optional().describe(
|
|
38882
|
+
"If true, speaker identifiers will be returned at the end of transcript."
|
|
38883
|
+
),
|
|
38884
|
+
speakers: import_zod12.z.array(
|
|
38885
|
+
import_zod12.z.object({
|
|
38886
|
+
label: import_zod12.z.string().min(1).describe(
|
|
38887
|
+
"Speaker label, which must not match the format used internally (e.g. S1, S2, etc)"
|
|
38888
|
+
),
|
|
38889
|
+
speaker_identifiers: import_zod12.z.array(import_zod12.z.string().describe("Speaker identifiers.")).min(1)
|
|
38890
|
+
})
|
|
38891
|
+
).optional().describe(
|
|
38892
|
+
"Use this option to provide speaker labels linked to their speaker identifiers. When passed, the transcription system will tag spoken words in the transcript with the provided speaker labels whenever any of the specified speakers is detected in the audio. A maximum of 50 speakers identifiers across all speakers can be provided."
|
|
38370
38893
|
)
|
|
38371
38894
|
}).optional().describe("Configuration for speaker diarization")
|
|
38372
38895
|
}).optional(),
|
|
@@ -38424,10 +38947,14 @@ var getJobsResponse = import_zod12.z.object({
|
|
|
38424
38947
|
default_language: import_zod12.z.string().optional()
|
|
38425
38948
|
}).optional(),
|
|
38426
38949
|
summarization_config: import_zod12.z.object({
|
|
38427
|
-
content_type: import_zod12.z.enum(["auto", "informative", "conversational"]).
|
|
38428
|
-
|
|
38950
|
+
content_type: import_zod12.z.enum(["auto", "informative", "conversational"]).default(getJobsResponseJobsItemConfigSummarizationConfigContentTypeDefault).describe(
|
|
38951
|
+
"Choose from three options:\n- `conversational` - Best suited for dialogues involving multiple participants, such as calls, meetings or discussions. It focuses on summarizing key points of the conversation.\n- `informative` - Recommended for more structured information delivered by one or more people, making it ideal for videos, podcasts, lectures, and presentations.\n- `auto` - Automatically selects the most appropriate content type based on an analysis of the transcript.\n"
|
|
38952
|
+
),
|
|
38953
|
+
summary_length: import_zod12.z.enum(["brief", "detailed"]).default(getJobsResponseJobsItemConfigSummarizationConfigSummaryLengthDefault).describe(
|
|
38954
|
+
"Determines the depth of the summary:\n- `brief` - Provides a succinct summary, condensing the content into just a few sentences.\n- `detailed` - Provide a longer, structured summary. For _conversational_ content, it includes key topics and a summary of the entire conversation. For _informative_ content, it logically divides the audio into sections and provides a summary for each."
|
|
38955
|
+
),
|
|
38429
38956
|
summary_type: import_zod12.z.enum(["paragraphs", "bullets"]).optional()
|
|
38430
|
-
}).optional(),
|
|
38957
|
+
}).optional().describe("Configuration options for summarization."),
|
|
38431
38958
|
sentiment_analysis_config: import_zod12.z.object({}).optional(),
|
|
38432
38959
|
topic_detection_config: import_zod12.z.object({
|
|
38433
38960
|
topics: import_zod12.z.array(import_zod12.z.string()).optional()
|
|
@@ -38449,7 +38976,7 @@ var getJobsResponse = import_zod12.z.object({
|
|
|
38449
38976
|
"Optional list of errors that have occurred in user interaction, for example: audio could not be fetched or notification could not be sent."
|
|
38450
38977
|
)
|
|
38451
38978
|
}).describe(
|
|
38452
|
-
"Document describing a job. JobConfig will be present in JobDetails returned for GET jobs
|
|
38979
|
+
"Document describing a job. JobConfig will be present in JobDetails returned for GET jobs/{id} request in SaaS and in Batch Appliance, but it will not be present in JobDetails returned as item in RetrieveJobsResponse in case of Batch Appliance."
|
|
38453
38980
|
)
|
|
38454
38981
|
)
|
|
38455
38982
|
});
|
|
@@ -38461,9 +38988,13 @@ var getJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesSensitiv
|
|
|
38461
38988
|
var getJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesSensitivityMax = 1;
|
|
38462
38989
|
var getJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesPermittedMarksItemRegExp = /^(.|all)$/;
|
|
38463
38990
|
var getJobsJobidResponseJobConfigTranscriptionConfigChannelDiarizationLabelsItemRegExp = /^[A-Za-z0-9._]+$/;
|
|
38991
|
+
var getJobsJobidResponseJobConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMin = 0;
|
|
38992
|
+
var getJobsJobidResponseJobConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMax = 100;
|
|
38464
38993
|
var getJobsJobidResponseJobConfigTranscriptionConfigSpeakerDiarizationConfigSpeakerSensitivityMin = 0;
|
|
38465
38994
|
var getJobsJobidResponseJobConfigTranscriptionConfigSpeakerDiarizationConfigSpeakerSensitivityMax = 1;
|
|
38466
38995
|
var getJobsJobidResponseJobConfigTranslationConfigTargetLanguagesMax = 5;
|
|
38996
|
+
var getJobsJobidResponseJobConfigSummarizationConfigContentTypeDefault = "auto";
|
|
38997
|
+
var getJobsJobidResponseJobConfigSummarizationConfigSummaryLengthDefault = "brief";
|
|
38467
38998
|
var getJobsJobidResponse = import_zod12.z.object({
|
|
38468
38999
|
job: import_zod12.z.object({
|
|
38469
39000
|
created_at: import_zod12.z.string().datetime({}).describe("The UTC date time the job was created."),
|
|
@@ -38540,19 +39071,30 @@ var getJobsJobidResponse = import_zod12.z.object({
|
|
|
38540
39071
|
max_delay_mode: import_zod12.z.enum(["fixed", "flexible"]).optional().describe(
|
|
38541
39072
|
"Whether or not to enable flexible endpointing and allow the entity to continue to be spoken."
|
|
38542
39073
|
),
|
|
39074
|
+
audio_filtering_config: import_zod12.z.object({
|
|
39075
|
+
volume_threshold: import_zod12.z.number().min(
|
|
39076
|
+
getJobsJobidResponseJobConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMin
|
|
39077
|
+
).max(
|
|
39078
|
+
getJobsJobidResponseJobConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMax
|
|
39079
|
+
).optional().describe(
|
|
39080
|
+
"Controls the lower limit of audio volume at which speech and audio events will be transcribed. If the volume limit is very low, then most sound will be passed to the speech recognition engine. Higher numbers will cut out increasing amounts of sound."
|
|
39081
|
+
)
|
|
39082
|
+
}).optional().describe("Configuration for limiting the transcription of quiet audio."),
|
|
38543
39083
|
transcript_filtering_config: import_zod12.z.object({
|
|
38544
39084
|
remove_disfluencies: import_zod12.z.boolean().optional().describe(
|
|
38545
|
-
"If true, words
|
|
39085
|
+
"If true, words identified as disfluencies (e.g., 'um', 'uh') will be removed from the transcript. If false (default), they are tagged in the transcript as 'disfluency'."
|
|
38546
39086
|
),
|
|
38547
39087
|
replacements: import_zod12.z.array(
|
|
38548
39088
|
import_zod12.z.object({
|
|
38549
|
-
from: import_zod12.z.string(),
|
|
38550
|
-
to: import_zod12.z.string()
|
|
39089
|
+
from: import_zod12.z.string().describe("The text or pattern identified to be replaced."),
|
|
39090
|
+
to: import_zod12.z.string().describe(
|
|
39091
|
+
"The corrected or formatted string to appear in the transcript."
|
|
39092
|
+
)
|
|
38551
39093
|
})
|
|
38552
39094
|
).optional().describe(
|
|
38553
|
-
|
|
39095
|
+
'An array of objects defining custom replacements. Each replacement contains a pair of strings: the text to find ("from:") and the text to replace it with ("to:").'
|
|
38554
39096
|
)
|
|
38555
|
-
}).optional().describe("Configuration for applying filtering to the transcription"),
|
|
39097
|
+
}).optional().describe("Configuration for applying filtering to the transcription."),
|
|
38556
39098
|
speaker_diarization_config: import_zod12.z.object({
|
|
38557
39099
|
prefer_current_speaker: import_zod12.z.boolean().optional().describe(
|
|
38558
39100
|
'If true, the algorithm will prefer to stay with the current active speaker if it is a close enough match, even if other speakers may be closer. This is useful for cases where we can flip incorrectly between similar speakers during a single speaker section."'
|
|
@@ -38563,6 +39105,19 @@ var getJobsJobidResponse = import_zod12.z.object({
|
|
|
38563
39105
|
getJobsJobidResponseJobConfigTranscriptionConfigSpeakerDiarizationConfigSpeakerSensitivityMax
|
|
38564
39106
|
).optional().describe(
|
|
38565
39107
|
"Controls how sensitive the algorithm is in terms of keeping similar speakers separate, as opposed to combining them into a single speaker. Higher values will typically lead to more speakers, as the degree of difference between speakers in order to allow them to remain distinct will be lower. A lower value for this parameter will conversely guide the algorithm towards being less sensitive in terms of retaining similar speakers, and as such may lead to fewer speakers overall. The default is 0.5."
|
|
39108
|
+
),
|
|
39109
|
+
get_speakers: import_zod12.z.boolean().optional().describe(
|
|
39110
|
+
"If true, speaker identifiers will be returned at the end of transcript."
|
|
39111
|
+
),
|
|
39112
|
+
speakers: import_zod12.z.array(
|
|
39113
|
+
import_zod12.z.object({
|
|
39114
|
+
label: import_zod12.z.string().min(1).describe(
|
|
39115
|
+
"Speaker label, which must not match the format used internally (e.g. S1, S2, etc)"
|
|
39116
|
+
),
|
|
39117
|
+
speaker_identifiers: import_zod12.z.array(import_zod12.z.string().describe("Speaker identifiers.")).min(1)
|
|
39118
|
+
})
|
|
39119
|
+
).optional().describe(
|
|
39120
|
+
"Use this option to provide speaker labels linked to their speaker identifiers. When passed, the transcription system will tag spoken words in the transcript with the provided speaker labels whenever any of the specified speakers is detected in the audio. A maximum of 50 speakers identifiers across all speakers can be provided."
|
|
38566
39121
|
)
|
|
38567
39122
|
}).optional().describe("Configuration for speaker diarization")
|
|
38568
39123
|
}).optional(),
|
|
@@ -38618,10 +39173,14 @@ var getJobsJobidResponse = import_zod12.z.object({
|
|
|
38618
39173
|
default_language: import_zod12.z.string().optional()
|
|
38619
39174
|
}).optional(),
|
|
38620
39175
|
summarization_config: import_zod12.z.object({
|
|
38621
|
-
content_type: import_zod12.z.enum(["auto", "informative", "conversational"]).
|
|
38622
|
-
|
|
39176
|
+
content_type: import_zod12.z.enum(["auto", "informative", "conversational"]).default(getJobsJobidResponseJobConfigSummarizationConfigContentTypeDefault).describe(
|
|
39177
|
+
"Choose from three options:\n- `conversational` - Best suited for dialogues involving multiple participants, such as calls, meetings or discussions. It focuses on summarizing key points of the conversation.\n- `informative` - Recommended for more structured information delivered by one or more people, making it ideal for videos, podcasts, lectures, and presentations.\n- `auto` - Automatically selects the most appropriate content type based on an analysis of the transcript.\n"
|
|
39178
|
+
),
|
|
39179
|
+
summary_length: import_zod12.z.enum(["brief", "detailed"]).default(getJobsJobidResponseJobConfigSummarizationConfigSummaryLengthDefault).describe(
|
|
39180
|
+
"Determines the depth of the summary:\n- `brief` - Provides a succinct summary, condensing the content into just a few sentences.\n- `detailed` - Provide a longer, structured summary. For _conversational_ content, it includes key topics and a summary of the entire conversation. For _informative_ content, it logically divides the audio into sections and provides a summary for each."
|
|
39181
|
+
),
|
|
38623
39182
|
summary_type: import_zod12.z.enum(["paragraphs", "bullets"]).optional()
|
|
38624
|
-
}).optional(),
|
|
39183
|
+
}).optional().describe("Configuration options for summarization."),
|
|
38625
39184
|
sentiment_analysis_config: import_zod12.z.object({}).optional(),
|
|
38626
39185
|
topic_detection_config: import_zod12.z.object({
|
|
38627
39186
|
topics: import_zod12.z.array(import_zod12.z.string()).optional()
|
|
@@ -38643,7 +39202,7 @@ var getJobsJobidResponse = import_zod12.z.object({
|
|
|
38643
39202
|
"Optional list of errors that have occurred in user interaction, for example: audio could not be fetched or notification could not be sent."
|
|
38644
39203
|
)
|
|
38645
39204
|
}).describe(
|
|
38646
|
-
"Document describing a job. JobConfig will be present in JobDetails returned for GET jobs
|
|
39205
|
+
"Document describing a job. JobConfig will be present in JobDetails returned for GET jobs/{id} request in SaaS and in Batch Appliance, but it will not be present in JobDetails returned as item in RetrieveJobsResponse in case of Batch Appliance."
|
|
38647
39206
|
)
|
|
38648
39207
|
});
|
|
38649
39208
|
var deleteJobsJobidParams = import_zod12.z.object({
|
|
@@ -38659,9 +39218,13 @@ var deleteJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesSensi
|
|
|
38659
39218
|
var deleteJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesSensitivityMax = 1;
|
|
38660
39219
|
var deleteJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesPermittedMarksItemRegExp = /^(.|all)$/;
|
|
38661
39220
|
var deleteJobsJobidResponseJobConfigTranscriptionConfigChannelDiarizationLabelsItemRegExp = /^[A-Za-z0-9._]+$/;
|
|
39221
|
+
var deleteJobsJobidResponseJobConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMin = 0;
|
|
39222
|
+
var deleteJobsJobidResponseJobConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMax = 100;
|
|
38662
39223
|
var deleteJobsJobidResponseJobConfigTranscriptionConfigSpeakerDiarizationConfigSpeakerSensitivityMin = 0;
|
|
38663
39224
|
var deleteJobsJobidResponseJobConfigTranscriptionConfigSpeakerDiarizationConfigSpeakerSensitivityMax = 1;
|
|
38664
39225
|
var deleteJobsJobidResponseJobConfigTranslationConfigTargetLanguagesMax = 5;
|
|
39226
|
+
var deleteJobsJobidResponseJobConfigSummarizationConfigContentTypeDefault = "auto";
|
|
39227
|
+
var deleteJobsJobidResponseJobConfigSummarizationConfigSummaryLengthDefault = "brief";
|
|
38665
39228
|
var deleteJobsJobidResponse = import_zod12.z.object({
|
|
38666
39229
|
job: import_zod12.z.object({
|
|
38667
39230
|
created_at: import_zod12.z.string().datetime({}).describe("The UTC date time the job was created."),
|
|
@@ -38738,19 +39301,30 @@ var deleteJobsJobidResponse = import_zod12.z.object({
|
|
|
38738
39301
|
max_delay_mode: import_zod12.z.enum(["fixed", "flexible"]).optional().describe(
|
|
38739
39302
|
"Whether or not to enable flexible endpointing and allow the entity to continue to be spoken."
|
|
38740
39303
|
),
|
|
39304
|
+
audio_filtering_config: import_zod12.z.object({
|
|
39305
|
+
volume_threshold: import_zod12.z.number().min(
|
|
39306
|
+
deleteJobsJobidResponseJobConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMin
|
|
39307
|
+
).max(
|
|
39308
|
+
deleteJobsJobidResponseJobConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMax
|
|
39309
|
+
).optional().describe(
|
|
39310
|
+
"Controls the lower limit of audio volume at which speech and audio events will be transcribed. If the volume limit is very low, then most sound will be passed to the speech recognition engine. Higher numbers will cut out increasing amounts of sound."
|
|
39311
|
+
)
|
|
39312
|
+
}).optional().describe("Configuration for limiting the transcription of quiet audio."),
|
|
38741
39313
|
transcript_filtering_config: import_zod12.z.object({
|
|
38742
39314
|
remove_disfluencies: import_zod12.z.boolean().optional().describe(
|
|
38743
|
-
"If true, words
|
|
39315
|
+
"If true, words identified as disfluencies (e.g., 'um', 'uh') will be removed from the transcript. If false (default), they are tagged in the transcript as 'disfluency'."
|
|
38744
39316
|
),
|
|
38745
39317
|
replacements: import_zod12.z.array(
|
|
38746
39318
|
import_zod12.z.object({
|
|
38747
|
-
from: import_zod12.z.string(),
|
|
38748
|
-
to: import_zod12.z.string()
|
|
39319
|
+
from: import_zod12.z.string().describe("The text or pattern identified to be replaced."),
|
|
39320
|
+
to: import_zod12.z.string().describe(
|
|
39321
|
+
"The corrected or formatted string to appear in the transcript."
|
|
39322
|
+
)
|
|
38749
39323
|
})
|
|
38750
39324
|
).optional().describe(
|
|
38751
|
-
|
|
39325
|
+
'An array of objects defining custom replacements. Each replacement contains a pair of strings: the text to find ("from:") and the text to replace it with ("to:").'
|
|
38752
39326
|
)
|
|
38753
|
-
}).optional().describe("Configuration for applying filtering to the transcription"),
|
|
39327
|
+
}).optional().describe("Configuration for applying filtering to the transcription."),
|
|
38754
39328
|
speaker_diarization_config: import_zod12.z.object({
|
|
38755
39329
|
prefer_current_speaker: import_zod12.z.boolean().optional().describe(
|
|
38756
39330
|
'If true, the algorithm will prefer to stay with the current active speaker if it is a close enough match, even if other speakers may be closer. This is useful for cases where we can flip incorrectly between similar speakers during a single speaker section."'
|
|
@@ -38761,6 +39335,19 @@ var deleteJobsJobidResponse = import_zod12.z.object({
|
|
|
38761
39335
|
deleteJobsJobidResponseJobConfigTranscriptionConfigSpeakerDiarizationConfigSpeakerSensitivityMax
|
|
38762
39336
|
).optional().describe(
|
|
38763
39337
|
"Controls how sensitive the algorithm is in terms of keeping similar speakers separate, as opposed to combining them into a single speaker. Higher values will typically lead to more speakers, as the degree of difference between speakers in order to allow them to remain distinct will be lower. A lower value for this parameter will conversely guide the algorithm towards being less sensitive in terms of retaining similar speakers, and as such may lead to fewer speakers overall. The default is 0.5."
|
|
39338
|
+
),
|
|
39339
|
+
get_speakers: import_zod12.z.boolean().optional().describe(
|
|
39340
|
+
"If true, speaker identifiers will be returned at the end of transcript."
|
|
39341
|
+
),
|
|
39342
|
+
speakers: import_zod12.z.array(
|
|
39343
|
+
import_zod12.z.object({
|
|
39344
|
+
label: import_zod12.z.string().min(1).describe(
|
|
39345
|
+
"Speaker label, which must not match the format used internally (e.g. S1, S2, etc)"
|
|
39346
|
+
),
|
|
39347
|
+
speaker_identifiers: import_zod12.z.array(import_zod12.z.string().describe("Speaker identifiers.")).min(1)
|
|
39348
|
+
})
|
|
39349
|
+
).optional().describe(
|
|
39350
|
+
"Use this option to provide speaker labels linked to their speaker identifiers. When passed, the transcription system will tag spoken words in the transcript with the provided speaker labels whenever any of the specified speakers is detected in the audio. A maximum of 50 speakers identifiers across all speakers can be provided."
|
|
38764
39351
|
)
|
|
38765
39352
|
}).optional().describe("Configuration for speaker diarization")
|
|
38766
39353
|
}).optional(),
|
|
@@ -38816,10 +39403,14 @@ var deleteJobsJobidResponse = import_zod12.z.object({
|
|
|
38816
39403
|
default_language: import_zod12.z.string().optional()
|
|
38817
39404
|
}).optional(),
|
|
38818
39405
|
summarization_config: import_zod12.z.object({
|
|
38819
|
-
content_type: import_zod12.z.enum(["auto", "informative", "conversational"]).
|
|
38820
|
-
|
|
39406
|
+
content_type: import_zod12.z.enum(["auto", "informative", "conversational"]).default(deleteJobsJobidResponseJobConfigSummarizationConfigContentTypeDefault).describe(
|
|
39407
|
+
"Choose from three options:\n- `conversational` - Best suited for dialogues involving multiple participants, such as calls, meetings or discussions. It focuses on summarizing key points of the conversation.\n- `informative` - Recommended for more structured information delivered by one or more people, making it ideal for videos, podcasts, lectures, and presentations.\n- `auto` - Automatically selects the most appropriate content type based on an analysis of the transcript.\n"
|
|
39408
|
+
),
|
|
39409
|
+
summary_length: import_zod12.z.enum(["brief", "detailed"]).default(deleteJobsJobidResponseJobConfigSummarizationConfigSummaryLengthDefault).describe(
|
|
39410
|
+
"Determines the depth of the summary:\n- `brief` - Provides a succinct summary, condensing the content into just a few sentences.\n- `detailed` - Provide a longer, structured summary. For _conversational_ content, it includes key topics and a summary of the entire conversation. For _informative_ content, it logically divides the audio into sections and provides a summary for each."
|
|
39411
|
+
),
|
|
38821
39412
|
summary_type: import_zod12.z.enum(["paragraphs", "bullets"]).optional()
|
|
38822
|
-
}).optional(),
|
|
39413
|
+
}).optional().describe("Configuration options for summarization."),
|
|
38823
39414
|
sentiment_analysis_config: import_zod12.z.object({}).optional(),
|
|
38824
39415
|
topic_detection_config: import_zod12.z.object({
|
|
38825
39416
|
topics: import_zod12.z.array(import_zod12.z.string()).optional()
|
|
@@ -38841,7 +39432,7 @@ var deleteJobsJobidResponse = import_zod12.z.object({
|
|
|
38841
39432
|
"Optional list of errors that have occurred in user interaction, for example: audio could not be fetched or notification could not be sent."
|
|
38842
39433
|
)
|
|
38843
39434
|
}).describe(
|
|
38844
|
-
"Document describing a job. JobConfig will be present in JobDetails returned for GET jobs
|
|
39435
|
+
"Document describing a job. JobConfig will be present in JobDetails returned for GET jobs/{id} request in SaaS and in Batch Appliance, but it will not be present in JobDetails returned as item in RetrieveJobsResponse in case of Batch Appliance."
|
|
38845
39436
|
)
|
|
38846
39437
|
});
|
|
38847
39438
|
var getJobsJobidDataParams = import_zod12.z.object({
|
|
@@ -38863,6 +39454,8 @@ var getJobsJobidTranscriptResponseMetadataTranscriptionConfigPunctuationOverride
|
|
|
38863
39454
|
var getJobsJobidTranscriptResponseMetadataTranscriptionConfigPunctuationOverridesSensitivityMax = 1;
|
|
38864
39455
|
var getJobsJobidTranscriptResponseMetadataTranscriptionConfigPunctuationOverridesPermittedMarksItemRegExp = /^(.|all)$/;
|
|
38865
39456
|
var getJobsJobidTranscriptResponseMetadataTranscriptionConfigChannelDiarizationLabelsItemRegExp = /^[A-Za-z0-9._]+$/;
|
|
39457
|
+
var getJobsJobidTranscriptResponseMetadataTranscriptionConfigAudioFilteringConfigVolumeThresholdMin = 0;
|
|
39458
|
+
var getJobsJobidTranscriptResponseMetadataTranscriptionConfigAudioFilteringConfigVolumeThresholdMax = 100;
|
|
38866
39459
|
var getJobsJobidTranscriptResponseMetadataTranscriptionConfigSpeakerDiarizationConfigSpeakerSensitivityMin = 0;
|
|
38867
39460
|
var getJobsJobidTranscriptResponseMetadataTranscriptionConfigSpeakerDiarizationConfigSpeakerSensitivityMax = 1;
|
|
38868
39461
|
var getJobsJobidTranscriptResponseResultsItemVolumeMin = 0;
|
|
@@ -38934,19 +39527,28 @@ var getJobsJobidTranscriptResponse = import_zod12.z.object({
|
|
|
38934
39527
|
max_delay_mode: import_zod12.z.enum(["fixed", "flexible"]).optional().describe(
|
|
38935
39528
|
"Whether or not to enable flexible endpointing and allow the entity to continue to be spoken."
|
|
38936
39529
|
),
|
|
39530
|
+
audio_filtering_config: import_zod12.z.object({
|
|
39531
|
+
volume_threshold: import_zod12.z.number().min(
|
|
39532
|
+
getJobsJobidTranscriptResponseMetadataTranscriptionConfigAudioFilteringConfigVolumeThresholdMin
|
|
39533
|
+
).max(
|
|
39534
|
+
getJobsJobidTranscriptResponseMetadataTranscriptionConfigAudioFilteringConfigVolumeThresholdMax
|
|
39535
|
+
).optional().describe(
|
|
39536
|
+
"Controls the lower limit of audio volume at which speech and audio events will be transcribed. If the volume limit is very low, then most sound will be passed to the speech recognition engine. Higher numbers will cut out increasing amounts of sound."
|
|
39537
|
+
)
|
|
39538
|
+
}).optional().describe("Configuration for limiting the transcription of quiet audio."),
|
|
38937
39539
|
transcript_filtering_config: import_zod12.z.object({
|
|
38938
39540
|
remove_disfluencies: import_zod12.z.boolean().optional().describe(
|
|
38939
|
-
"If true, words
|
|
39541
|
+
"If true, words identified as disfluencies (e.g., 'um', 'uh') will be removed from the transcript. If false (default), they are tagged in the transcript as 'disfluency'."
|
|
38940
39542
|
),
|
|
38941
39543
|
replacements: import_zod12.z.array(
|
|
38942
39544
|
import_zod12.z.object({
|
|
38943
|
-
from: import_zod12.z.string(),
|
|
38944
|
-
to: import_zod12.z.string()
|
|
39545
|
+
from: import_zod12.z.string().describe("The text or pattern identified to be replaced."),
|
|
39546
|
+
to: import_zod12.z.string().describe("The corrected or formatted string to appear in the transcript.")
|
|
38945
39547
|
})
|
|
38946
39548
|
).optional().describe(
|
|
38947
|
-
|
|
39549
|
+
'An array of objects defining custom replacements. Each replacement contains a pair of strings: the text to find ("from:") and the text to replace it with ("to:").'
|
|
38948
39550
|
)
|
|
38949
|
-
}).optional().describe("Configuration for applying filtering to the transcription"),
|
|
39551
|
+
}).optional().describe("Configuration for applying filtering to the transcription."),
|
|
38950
39552
|
speaker_diarization_config: import_zod12.z.object({
|
|
38951
39553
|
prefer_current_speaker: import_zod12.z.boolean().optional().describe(
|
|
38952
39554
|
'If true, the algorithm will prefer to stay with the current active speaker if it is a close enough match, even if other speakers may be closer. This is useful for cases where we can flip incorrectly between similar speakers during a single speaker section."'
|
|
@@ -38957,9 +39559,23 @@ var getJobsJobidTranscriptResponse = import_zod12.z.object({
|
|
|
38957
39559
|
getJobsJobidTranscriptResponseMetadataTranscriptionConfigSpeakerDiarizationConfigSpeakerSensitivityMax
|
|
38958
39560
|
).optional().describe(
|
|
38959
39561
|
"Controls how sensitive the algorithm is in terms of keeping similar speakers separate, as opposed to combining them into a single speaker. Higher values will typically lead to more speakers, as the degree of difference between speakers in order to allow them to remain distinct will be lower. A lower value for this parameter will conversely guide the algorithm towards being less sensitive in terms of retaining similar speakers, and as such may lead to fewer speakers overall. The default is 0.5."
|
|
39562
|
+
),
|
|
39563
|
+
get_speakers: import_zod12.z.boolean().optional().describe(
|
|
39564
|
+
"If true, speaker identifiers will be returned at the end of transcript."
|
|
39565
|
+
),
|
|
39566
|
+
speakers: import_zod12.z.array(
|
|
39567
|
+
import_zod12.z.object({
|
|
39568
|
+
label: import_zod12.z.string().min(1).describe(
|
|
39569
|
+
"Speaker label, which must not match the format used internally (e.g. S1, S2, etc)"
|
|
39570
|
+
),
|
|
39571
|
+
speaker_identifiers: import_zod12.z.array(import_zod12.z.string().describe("Speaker identifiers.")).min(1)
|
|
39572
|
+
})
|
|
39573
|
+
).optional().describe(
|
|
39574
|
+
"Use this option to provide speaker labels linked to their speaker identifiers. When passed, the transcription system will tag spoken words in the transcript with the provided speaker labels whenever any of the specified speakers is detected in the audio. A maximum of 50 speakers identifiers across all speakers can be provided."
|
|
38960
39575
|
)
|
|
38961
39576
|
}).optional().describe("Configuration for speaker diarization")
|
|
38962
39577
|
}).optional(),
|
|
39578
|
+
orchestrator_version: import_zod12.z.string().optional().describe("The engine version used to generate transcription output."),
|
|
38963
39579
|
translation_errors: import_zod12.z.array(
|
|
38964
39580
|
import_zod12.z.object({
|
|
38965
39581
|
type: import_zod12.z.enum(["translation_failed", "unsupported_translation_pair"]).optional(),
|
|
@@ -39037,10 +39653,7 @@ var getJobsJobidTranscriptResponse = import_zod12.z.object({
|
|
|
39037
39653
|
"OTHER"
|
|
39038
39654
|
]).optional(),
|
|
39039
39655
|
message: import_zod12.z.string().optional()
|
|
39040
|
-
}).optional()
|
|
39041
|
-
orchestrator_version: import_zod12.z.string().optional().describe(
|
|
39042
|
-
"Orchestrator version in PEP 440 Format or set to 'version_not_found' as default."
|
|
39043
|
-
)
|
|
39656
|
+
}).optional()
|
|
39044
39657
|
}).describe(
|
|
39045
39658
|
"Summary information about the output from an ASR job, comprising the job type and configuration parameters used when generating the output."
|
|
39046
39659
|
),
|
|
@@ -39123,6 +39736,12 @@ var getJobsJobidTranscriptResponse = import_zod12.z.object({
|
|
|
39123
39736
|
"An ASR job output item. The primary item types are `word` and `punctuation`. Other item types may be present, for example to provide semantic information of different forms."
|
|
39124
39737
|
)
|
|
39125
39738
|
),
|
|
39739
|
+
speakers: import_zod12.z.array(
|
|
39740
|
+
import_zod12.z.object({
|
|
39741
|
+
label: import_zod12.z.string().min(1).describe("Speaker label."),
|
|
39742
|
+
speaker_identifiers: import_zod12.z.array(import_zod12.z.string().describe("Speaker identifiers.")).min(1)
|
|
39743
|
+
})
|
|
39744
|
+
).optional().describe("List of unique speaker identifiers detected in the transcript."),
|
|
39126
39745
|
translations: import_zod12.z.record(
|
|
39127
39746
|
import_zod12.z.string(),
|
|
39128
39747
|
import_zod12.z.array(
|
|
@@ -39144,13 +39763,23 @@ var getJobsJobidTranscriptResponse = import_zod12.z.object({
|
|
|
39144
39763
|
sentiment_analysis: import_zod12.z.object({
|
|
39145
39764
|
segments: import_zod12.z.array(
|
|
39146
39765
|
import_zod12.z.object({
|
|
39147
|
-
text: import_zod12.z.string().optional(),
|
|
39148
|
-
|
|
39149
|
-
|
|
39150
|
-
|
|
39151
|
-
|
|
39152
|
-
|
|
39153
|
-
|
|
39766
|
+
text: import_zod12.z.string().optional().describe("Represents the transcript of the analysed segment"),
|
|
39767
|
+
sentiment: import_zod12.z.string().optional().describe(
|
|
39768
|
+
"The assigned sentiment to the segment, which can be positive, neutral or negative"
|
|
39769
|
+
),
|
|
39770
|
+
start_time: import_zod12.z.number().optional().describe(
|
|
39771
|
+
"The timestamp corresponding to the beginning of the transcription segment"
|
|
39772
|
+
),
|
|
39773
|
+
end_time: import_zod12.z.number().optional().describe(
|
|
39774
|
+
"The timestamp corresponding to the end of the transcription segment"
|
|
39775
|
+
),
|
|
39776
|
+
speaker: import_zod12.z.string().optional().describe(
|
|
39777
|
+
"The speaker label for the segment, if speaker diarization is enabled"
|
|
39778
|
+
),
|
|
39779
|
+
channel: import_zod12.z.string().optional().describe(
|
|
39780
|
+
"The channel label for the segment, if channel diarization is enabled"
|
|
39781
|
+
),
|
|
39782
|
+
confidence: import_zod12.z.number().optional().describe("A confidence score in the range of 0-1")
|
|
39154
39783
|
}).describe("Represents a segment of text and its associated sentiment.")
|
|
39155
39784
|
).optional().describe(
|
|
39156
39785
|
"An array of objects that represent a segment of text and its associated sentiment."
|
|
@@ -39209,10 +39838,10 @@ var getJobsJobidTranscriptResponse = import_zod12.z.object({
|
|
|
39209
39838
|
}).optional().describe("Main object that holds topic detection results."),
|
|
39210
39839
|
chapters: import_zod12.z.array(
|
|
39211
39840
|
import_zod12.z.object({
|
|
39212
|
-
title: import_zod12.z.string().optional(),
|
|
39213
|
-
summary: import_zod12.z.string().optional(),
|
|
39214
|
-
start_time: import_zod12.z.number().optional(),
|
|
39215
|
-
end_time: import_zod12.z.number().optional()
|
|
39841
|
+
title: import_zod12.z.string().optional().describe("The auto-generated title for the chapter"),
|
|
39842
|
+
summary: import_zod12.z.string().optional().describe("An auto-generated paragraph-style, short summary of the chapter"),
|
|
39843
|
+
start_time: import_zod12.z.number().optional().describe("The start time of the chapter in the audio file"),
|
|
39844
|
+
end_time: import_zod12.z.number().optional().describe("The end time of the chapter in the audio file")
|
|
39216
39845
|
})
|
|
39217
39846
|
).optional().describe("An array of objects that represent summarized chapters of the transcript"),
|
|
39218
39847
|
audio_events: import_zod12.z.array(
|
|
@@ -39257,6 +39886,18 @@ var getJobsJobidLogParams = import_zod12.z.object({
|
|
|
39257
39886
|
jobid: import_zod12.z.string().describe("ID of the job.")
|
|
39258
39887
|
});
|
|
39259
39888
|
var getJobsJobidLogResponse = import_zod12.z.instanceof(File);
|
|
39889
|
+
var getJobsJobidObjectUrlsParams = import_zod12.z.object({
|
|
39890
|
+
jobid: import_zod12.z.string().describe("ID of the job.")
|
|
39891
|
+
});
|
|
39892
|
+
var getJobsJobidObjectUrlsQueryParams = import_zod12.z.object({
|
|
39893
|
+
ttl: import_zod12.z.number().describe("Time to live in seconds for the signed URLs"),
|
|
39894
|
+
url_for: import_zod12.z.array(import_zod12.z.enum(["data", "audio_mp3", "transcript"]))
|
|
39895
|
+
});
|
|
39896
|
+
var getJobsJobidObjectUrlsResponse = import_zod12.z.object({
|
|
39897
|
+
data: import_zod12.z.string().optional(),
|
|
39898
|
+
audio_mp3: import_zod12.z.string().optional(),
|
|
39899
|
+
transcript: import_zod12.z.string().optional()
|
|
39900
|
+
});
|
|
39260
39901
|
var getUsageQueryParams = import_zod12.z.object({
|
|
39261
39902
|
since: import_zod12.z.string().date().optional().describe(
|
|
39262
39903
|
"Include usage after the given date (inclusive). This is a [ISO-8601](https://en.wikipedia.org/wiki/ISO_8601) calendar date format: `YYYY-MM-DD`."
|
|
@@ -39390,7 +40031,7 @@ var speechToTextBodyKeytermsDefault = [];
|
|
|
39390
40031
|
var speechToTextBody = import_zod13.z.object({
|
|
39391
40032
|
model_id: import_zod13.z.enum(["scribe_v1", "scribe_v2"]).describe("The ID of the model to use for transcription."),
|
|
39392
40033
|
file: import_zod13.z.instanceof(File).or(import_zod13.z.null()).optional().describe(
|
|
39393
|
-
"The file to transcribe (100ms minimum audio length). All major audio and video formats are supported. Exactly one of the file or cloud_storage_url parameters must be provided. The file size must be less than
|
|
40034
|
+
"The file to transcribe (100ms minimum audio length). All major audio and video formats are supported. Exactly one of the file or cloud_storage_url parameters must be provided. The file size must be less than 5.0GB."
|
|
39394
40035
|
),
|
|
39395
40036
|
language_code: import_zod13.z.string().or(import_zod13.z.null()).optional().describe(
|
|
39396
40037
|
"An ISO-639-1 or ISO-639-3 language_code corresponding to the language of the audio file. Can sometimes improve transcription performance if known beforehand. Defaults to null, in this case the language is predicted automatically."
|
|
@@ -39468,7 +40109,7 @@ var speechToTextBody = import_zod13.z.object({
|
|
|
39468
40109
|
"The format of input audio. Options are 'pcm_s16le_16' or 'other' For `pcm_s16le_16`, the input audio must be 16-bit PCM at a 16kHz sample rate, single channel (mono), and little-endian byte order. Latency will be lower than with passing an encoded waveform."
|
|
39469
40110
|
),
|
|
39470
40111
|
cloud_storage_url: import_zod13.z.string().or(import_zod13.z.null()).optional().describe(
|
|
39471
|
-
"The HTTPS URL of the file to transcribe. Exactly one of the file or cloud_storage_url parameters must be provided. The file must be accessible via HTTPS and the file size must be less than 2GB. Any valid HTTPS URL is accepted, including URLs from cloud storage providers (AWS S3, Google Cloud Storage, Cloudflare R2, etc.), CDNs, or any other HTTPS source. URLs can be pre-signed or include authentication tokens in query parameters."
|
|
40112
|
+
"[Deprecated] This parameter is deprecated and will be removed in the future. Use 'source_url' instead.The HTTPS URL of the file to transcribe. Exactly one of the file or cloud_storage_url parameters must be provided. The file must be accessible via HTTPS and the file size must be less than 2GB. Any valid HTTPS URL is accepted, including URLs from cloud storage providers (AWS S3, Google Cloud Storage, Cloudflare R2, etc.), CDNs, or any other HTTPS source. URLs can be pre-signed or include authentication tokens in query parameters."
|
|
39472
40113
|
),
|
|
39473
40114
|
source_url: import_zod13.z.string().or(import_zod13.z.null()).optional().describe(
|
|
39474
40115
|
"The URL of an audio or video file to transcribe. Supports hosted video or audio files, YouTube video URLs, TikTok video URLs, and other video hosting services."
|
|
@@ -39507,7 +40148,7 @@ var speechToTextBody = import_zod13.z.object({
|
|
|
39507
40148
|
"How to format redacted entities. 'redacted' replaces with {REDACTED}, 'entity_type' replaces with {ENTITY_TYPE}, 'enumerated_entity_type' replaces with {ENTITY_TYPE_N} where N enumerates each occurrence. Only used when entity_redaction is set."
|
|
39508
40149
|
),
|
|
39509
40150
|
keyterms: import_zod13.z.array(import_zod13.z.string()).default(speechToTextBodyKeytermsDefault).describe(
|
|
39510
|
-
'A list of keyterms to bias the transcription towards. The keyterms are words or phrases you want the model to recognise more accurately. The number of keyterms cannot exceed 1000. The length of each keyterm must be less than 50 characters. Keyterms can contain at most 5 words (after normalisation). For example ["hello", "world", "technical term"]. Usage of this parameter will incur an additional 20% surcharge on the base transcription cost. When more than 100 keyterms are provided, a minimum billable duration of 20 seconds applies per request.'
|
|
40151
|
+
'A list of keyterms to bias the transcription towards. The keyterms are words or phrases you want the model to recognise more accurately. The number of keyterms cannot exceed 1000. The length of each keyterm must be less than 50 characters. Keyterms can contain at most 5 words (after normalisation). For example ["hello", "world", "technical term"]. The following characters are not supported: `<`, `>`, `{`, `}`, `[`, `]`, `\\`. Usage of this parameter will incur an additional 20% surcharge on the base transcription cost. When more than 100 keyterms are provided, a minimum billable duration of 20 seconds applies per request.'
|
|
39511
40152
|
)
|
|
39512
40153
|
});
|
|
39513
40154
|
var speechToTextResponse = import_zod13.z.object({
|
|
@@ -39873,6 +40514,7 @@ var deleteTranscriptByIdResponse = import_zod13.z.any();
|
|
|
39873
40514
|
SonioxModels,
|
|
39874
40515
|
SonioxRealtimeModel,
|
|
39875
40516
|
SonioxRegion,
|
|
40517
|
+
SonioxSDK,
|
|
39876
40518
|
SonioxStreamingSchema,
|
|
39877
40519
|
SonioxStreamingTypes,
|
|
39878
40520
|
SonioxStreamingUpdateSchema,
|