voice-router-dev 0.9.3 → 0.9.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +12 -0
- package/dist/constants.d.mts +11 -92
- package/dist/constants.d.ts +11 -92
- package/dist/constants.js +11 -88
- package/dist/constants.mjs +11 -88
- package/dist/{field-configs-FbtCPxzs.d.mts → field-configs-BVOZQiG3.d.mts} +4855 -3773
- package/dist/{field-configs-FbtCPxzs.d.ts → field-configs-BVOZQiG3.d.ts} +4855 -3773
- package/dist/field-configs.d.mts +1 -1
- package/dist/field-configs.d.ts +1 -1
- package/dist/field-configs.js +583 -150
- package/dist/field-configs.mjs +583 -150
- package/dist/index.d.mts +1211 -162
- package/dist/index.d.ts +1211 -162
- package/dist/index.js +924 -275
- package/dist/index.mjs +927 -275
- package/dist/{provider-metadata-D1d-9cng.d.ts → provider-metadata-CiSA4fWP.d.ts} +2 -2
- package/dist/{provider-metadata-BJ29OPW1.d.mts → provider-metadata-oxzd1q6t.d.mts} +2 -2
- package/dist/provider-metadata.d.mts +1 -1
- package/dist/provider-metadata.d.ts +1 -1
- package/dist/provider-metadata.js +3 -66
- package/dist/provider-metadata.mjs +3 -66
- package/dist/{speechToTextChunkResponseModel-BY2lGyZ3.d.ts → speechToTextChunkResponseModel-Dns0Ma9x.d.ts} +364 -39
- package/dist/{speechToTextChunkResponseModel-KayxDiZ7.d.mts → speechToTextChunkResponseModel-_ZvHTD4e.d.mts} +364 -39
- package/dist/webhooks.d.mts +3 -2
- package/dist/webhooks.d.ts +3 -2
- package/package.json +8 -3
package/dist/index.js
CHANGED
|
@@ -145,6 +145,7 @@ __export(src_exports, {
|
|
|
145
145
|
SonioxModels: () => SonioxModels,
|
|
146
146
|
SonioxRealtimeModel: () => SonioxRealtimeModel,
|
|
147
147
|
SonioxRegion: () => SonioxRegion,
|
|
148
|
+
SonioxSDK: () => sdk_types_exports,
|
|
148
149
|
SonioxStreamingSchema: () => SonioxStreamingSchema,
|
|
149
150
|
SonioxStreamingTypes: () => streaming_types_zod_exports,
|
|
150
151
|
SonioxStreamingUpdateSchema: () => SonioxStreamingUpdateSchema,
|
|
@@ -1333,7 +1334,6 @@ var AzureLocales = [
|
|
|
1333
1334
|
{ code: "ar-YE", name: "Arabic (Yemen)" },
|
|
1334
1335
|
{ code: "as-IN", name: "Assamese (India)" },
|
|
1335
1336
|
{ code: "az-AZ", name: "Azerbaijani (Azerbaijan)" },
|
|
1336
|
-
{ code: "be-BY", name: "Belarusian (Belarus)" },
|
|
1337
1337
|
{ code: "bg-BG", name: "Bulgarian (Bulgaria)" },
|
|
1338
1338
|
{ code: "bn-BD", name: "Bengali (Bangladesh)" },
|
|
1339
1339
|
{ code: "bn-IN", name: "Bengali (India)" },
|
|
@@ -1414,7 +1414,6 @@ var AzureLocales = [
|
|
|
1414
1414
|
{ code: "lo-LA", name: "Lao (Latin)" },
|
|
1415
1415
|
{ code: "lt-LT", name: "Lithuanian (Lithuania)" },
|
|
1416
1416
|
{ code: "lv-LV", name: "Latvian (Latvia)" },
|
|
1417
|
-
{ code: "mi-NZ", name: "Maori (New Zealand)" },
|
|
1418
1417
|
{ code: "mk-MK", name: "Macedonian (North Macedonia)" },
|
|
1419
1418
|
{ code: "ml-IN", name: "Malayalam (India)" },
|
|
1420
1419
|
{ code: "mn-MN", name: "Mongolian (Mongolia)" },
|
|
@@ -1490,7 +1489,6 @@ var AzureLocaleCodes = [
|
|
|
1490
1489
|
"ar-YE",
|
|
1491
1490
|
"as-IN",
|
|
1492
1491
|
"az-AZ",
|
|
1493
|
-
"be-BY",
|
|
1494
1492
|
"bg-BG",
|
|
1495
1493
|
"bn-BD",
|
|
1496
1494
|
"bn-IN",
|
|
@@ -1571,7 +1569,6 @@ var AzureLocaleCodes = [
|
|
|
1571
1569
|
"lo-LA",
|
|
1572
1570
|
"lt-LT",
|
|
1573
1571
|
"lv-LV",
|
|
1574
|
-
"mi-NZ",
|
|
1575
1572
|
"mk-MK",
|
|
1576
1573
|
"ml-IN",
|
|
1577
1574
|
"mn-MN",
|
|
@@ -1647,7 +1644,6 @@ var AzureLocaleLabels = {
|
|
|
1647
1644
|
"ar-YE": "Arabic (Yemen)",
|
|
1648
1645
|
"as-IN": "Assamese (India)",
|
|
1649
1646
|
"az-AZ": "Azerbaijani (Azerbaijan)",
|
|
1650
|
-
"be-BY": "Belarusian (Belarus)",
|
|
1651
1647
|
"bg-BG": "Bulgarian (Bulgaria)",
|
|
1652
1648
|
"bn-BD": "Bengali (Bangladesh)",
|
|
1653
1649
|
"bn-IN": "Bengali (India)",
|
|
@@ -1728,7 +1724,6 @@ var AzureLocaleLabels = {
|
|
|
1728
1724
|
"lo-LA": "Lao (Latin)",
|
|
1729
1725
|
"lt-LT": "Lithuanian (Lithuania)",
|
|
1730
1726
|
"lv-LV": "Latvian (Latvia)",
|
|
1731
|
-
"mi-NZ": "Maori (New Zealand)",
|
|
1732
1727
|
"mk-MK": "Macedonian (North Macedonia)",
|
|
1733
1728
|
"ml-IN": "Malayalam (India)",
|
|
1734
1729
|
"mn-MN": "Mongolian (Mongolia)",
|
|
@@ -1804,7 +1799,6 @@ var AzureLocale = {
|
|
|
1804
1799
|
"ar-YE": "ar-YE",
|
|
1805
1800
|
"as-IN": "as-IN",
|
|
1806
1801
|
"az-AZ": "az-AZ",
|
|
1807
|
-
"be-BY": "be-BY",
|
|
1808
1802
|
"bg-BG": "bg-BG",
|
|
1809
1803
|
"bn-BD": "bn-BD",
|
|
1810
1804
|
"bn-IN": "bn-IN",
|
|
@@ -1885,7 +1879,6 @@ var AzureLocale = {
|
|
|
1885
1879
|
"lo-LA": "lo-LA",
|
|
1886
1880
|
"lt-LT": "lt-LT",
|
|
1887
1881
|
"lv-LV": "lv-LV",
|
|
1888
|
-
"mi-NZ": "mi-NZ",
|
|
1889
1882
|
"mk-MK": "mk-MK",
|
|
1890
1883
|
"ml-IN": "ml-IN",
|
|
1891
1884
|
"mn-MN": "mn-MN",
|
|
@@ -1976,8 +1969,6 @@ var ElevenLabsLanguages = [
|
|
|
1976
1969
|
{ code: "hr", name: "Croatian" },
|
|
1977
1970
|
{ code: "bg", name: "Bulgarian" },
|
|
1978
1971
|
{ code: "lt", name: "Lithuanian" },
|
|
1979
|
-
{ code: "la", name: "Latin" },
|
|
1980
|
-
{ code: "mi", name: "Maori" },
|
|
1981
1972
|
{ code: "ml", name: "Malayalam" },
|
|
1982
1973
|
{ code: "cy", name: "Welsh" },
|
|
1983
1974
|
{ code: "sk", name: "Slovak" },
|
|
@@ -1991,20 +1982,16 @@ var ElevenLabsLanguages = [
|
|
|
1991
1982
|
{ code: "kn", name: "Kannada" },
|
|
1992
1983
|
{ code: "et", name: "Estonian" },
|
|
1993
1984
|
{ code: "mk", name: "Macedonian" },
|
|
1994
|
-
{ code: "br", name: "Breton" },
|
|
1995
|
-
{ code: "eu", name: "Basque" },
|
|
1996
1985
|
{ code: "is", name: "Icelandic" },
|
|
1997
1986
|
{ code: "hy", name: "Armenian" },
|
|
1998
1987
|
{ code: "ne", name: "Nepali" },
|
|
1999
1988
|
{ code: "mn", name: "Mongolian" },
|
|
2000
1989
|
{ code: "bs", name: "Bosnian" },
|
|
2001
1990
|
{ code: "kk", name: "Kazakh" },
|
|
2002
|
-
{ code: "sq", name: "Albanian" },
|
|
2003
1991
|
{ code: "sw", name: "Swahili" },
|
|
2004
1992
|
{ code: "gl", name: "Galician" },
|
|
2005
1993
|
{ code: "mr", name: "Marathi" },
|
|
2006
1994
|
{ code: "pa", name: "Punjabi" },
|
|
2007
|
-
{ code: "si", name: "Sinhala" },
|
|
2008
1995
|
{ code: "km", name: "Khmer" },
|
|
2009
1996
|
{ code: "sn", name: "Shona" },
|
|
2010
1997
|
{ code: "yo", name: "Yoruba" },
|
|
@@ -2017,29 +2004,16 @@ var ElevenLabsLanguages = [
|
|
|
2017
2004
|
{ code: "sd", name: "Sindhi" },
|
|
2018
2005
|
{ code: "gu", name: "Gujarati" },
|
|
2019
2006
|
{ code: "am", name: "Amharic" },
|
|
2020
|
-
{ code: "yi", name: "Yiddish" },
|
|
2021
2007
|
{ code: "lo", name: "Lao" },
|
|
2022
2008
|
{ code: "uz", name: "Uzbek" },
|
|
2023
|
-
{ code: "fo", name: "Faroese" },
|
|
2024
|
-
{ code: "ht", name: "Haitian Creole" },
|
|
2025
2009
|
{ code: "ps", name: "Pashto" },
|
|
2026
|
-
{ code: "tk", name: "Turkmen" },
|
|
2027
|
-
{ code: "nn", name: "Norwegian Nynorsk" },
|
|
2028
2010
|
{ code: "mt", name: "Maltese" },
|
|
2029
|
-
{ code: "sa", name: "Sanskrit" },
|
|
2030
2011
|
{ code: "lb", name: "Luxembourgish" },
|
|
2031
2012
|
{ code: "my", name: "Burmese" },
|
|
2032
|
-
{ code: "bo", name: "Tibetan" },
|
|
2033
|
-
{ code: "tl", name: "Tagalog" },
|
|
2034
|
-
{ code: "mg", name: "Malagasy" },
|
|
2035
2013
|
{ code: "as", name: "Assamese" },
|
|
2036
|
-
{ code: "tt", name: "Tatar" },
|
|
2037
|
-
{ code: "haw", name: "Hawaiian" },
|
|
2038
2014
|
{ code: "ln", name: "Lingala" },
|
|
2039
2015
|
{ code: "ha", name: "Hausa" },
|
|
2040
|
-
{ code: "
|
|
2041
|
-
{ code: "jw", name: "Javanese" },
|
|
2042
|
-
{ code: "su", name: "Sundanese" }
|
|
2016
|
+
{ code: "jw", name: "Javanese" }
|
|
2043
2017
|
];
|
|
2044
2018
|
var ElevenLabsLanguageCodes = [
|
|
2045
2019
|
"en",
|
|
@@ -2077,8 +2051,6 @@ var ElevenLabsLanguageCodes = [
|
|
|
2077
2051
|
"hr",
|
|
2078
2052
|
"bg",
|
|
2079
2053
|
"lt",
|
|
2080
|
-
"la",
|
|
2081
|
-
"mi",
|
|
2082
2054
|
"ml",
|
|
2083
2055
|
"cy",
|
|
2084
2056
|
"sk",
|
|
@@ -2092,20 +2064,16 @@ var ElevenLabsLanguageCodes = [
|
|
|
2092
2064
|
"kn",
|
|
2093
2065
|
"et",
|
|
2094
2066
|
"mk",
|
|
2095
|
-
"br",
|
|
2096
|
-
"eu",
|
|
2097
2067
|
"is",
|
|
2098
2068
|
"hy",
|
|
2099
2069
|
"ne",
|
|
2100
2070
|
"mn",
|
|
2101
2071
|
"bs",
|
|
2102
2072
|
"kk",
|
|
2103
|
-
"sq",
|
|
2104
2073
|
"sw",
|
|
2105
2074
|
"gl",
|
|
2106
2075
|
"mr",
|
|
2107
2076
|
"pa",
|
|
2108
|
-
"si",
|
|
2109
2077
|
"km",
|
|
2110
2078
|
"sn",
|
|
2111
2079
|
"yo",
|
|
@@ -2118,29 +2086,16 @@ var ElevenLabsLanguageCodes = [
|
|
|
2118
2086
|
"sd",
|
|
2119
2087
|
"gu",
|
|
2120
2088
|
"am",
|
|
2121
|
-
"yi",
|
|
2122
2089
|
"lo",
|
|
2123
2090
|
"uz",
|
|
2124
|
-
"fo",
|
|
2125
|
-
"ht",
|
|
2126
2091
|
"ps",
|
|
2127
|
-
"tk",
|
|
2128
|
-
"nn",
|
|
2129
2092
|
"mt",
|
|
2130
|
-
"sa",
|
|
2131
2093
|
"lb",
|
|
2132
2094
|
"my",
|
|
2133
|
-
"bo",
|
|
2134
|
-
"tl",
|
|
2135
|
-
"mg",
|
|
2136
2095
|
"as",
|
|
2137
|
-
"tt",
|
|
2138
|
-
"haw",
|
|
2139
2096
|
"ln",
|
|
2140
2097
|
"ha",
|
|
2141
|
-
"
|
|
2142
|
-
"jw",
|
|
2143
|
-
"su"
|
|
2098
|
+
"jw"
|
|
2144
2099
|
];
|
|
2145
2100
|
var ElevenLabsLanguageLabels = {
|
|
2146
2101
|
en: "English",
|
|
@@ -2178,8 +2133,6 @@ var ElevenLabsLanguageLabels = {
|
|
|
2178
2133
|
hr: "Croatian",
|
|
2179
2134
|
bg: "Bulgarian",
|
|
2180
2135
|
lt: "Lithuanian",
|
|
2181
|
-
la: "Latin",
|
|
2182
|
-
mi: "Maori",
|
|
2183
2136
|
ml: "Malayalam",
|
|
2184
2137
|
cy: "Welsh",
|
|
2185
2138
|
sk: "Slovak",
|
|
@@ -2193,20 +2146,16 @@ var ElevenLabsLanguageLabels = {
|
|
|
2193
2146
|
kn: "Kannada",
|
|
2194
2147
|
et: "Estonian",
|
|
2195
2148
|
mk: "Macedonian",
|
|
2196
|
-
br: "Breton",
|
|
2197
|
-
eu: "Basque",
|
|
2198
2149
|
is: "Icelandic",
|
|
2199
2150
|
hy: "Armenian",
|
|
2200
2151
|
ne: "Nepali",
|
|
2201
2152
|
mn: "Mongolian",
|
|
2202
2153
|
bs: "Bosnian",
|
|
2203
2154
|
kk: "Kazakh",
|
|
2204
|
-
sq: "Albanian",
|
|
2205
2155
|
sw: "Swahili",
|
|
2206
2156
|
gl: "Galician",
|
|
2207
2157
|
mr: "Marathi",
|
|
2208
2158
|
pa: "Punjabi",
|
|
2209
|
-
si: "Sinhala",
|
|
2210
2159
|
km: "Khmer",
|
|
2211
2160
|
sn: "Shona",
|
|
2212
2161
|
yo: "Yoruba",
|
|
@@ -2219,29 +2168,16 @@ var ElevenLabsLanguageLabels = {
|
|
|
2219
2168
|
sd: "Sindhi",
|
|
2220
2169
|
gu: "Gujarati",
|
|
2221
2170
|
am: "Amharic",
|
|
2222
|
-
yi: "Yiddish",
|
|
2223
2171
|
lo: "Lao",
|
|
2224
2172
|
uz: "Uzbek",
|
|
2225
|
-
fo: "Faroese",
|
|
2226
|
-
ht: "Haitian Creole",
|
|
2227
2173
|
ps: "Pashto",
|
|
2228
|
-
tk: "Turkmen",
|
|
2229
|
-
nn: "Norwegian Nynorsk",
|
|
2230
2174
|
mt: "Maltese",
|
|
2231
|
-
sa: "Sanskrit",
|
|
2232
2175
|
lb: "Luxembourgish",
|
|
2233
2176
|
my: "Burmese",
|
|
2234
|
-
bo: "Tibetan",
|
|
2235
|
-
tl: "Tagalog",
|
|
2236
|
-
mg: "Malagasy",
|
|
2237
2177
|
as: "Assamese",
|
|
2238
|
-
tt: "Tatar",
|
|
2239
|
-
haw: "Hawaiian",
|
|
2240
2178
|
ln: "Lingala",
|
|
2241
2179
|
ha: "Hausa",
|
|
2242
|
-
|
|
2243
|
-
jw: "Javanese",
|
|
2244
|
-
su: "Sundanese"
|
|
2180
|
+
jw: "Javanese"
|
|
2245
2181
|
};
|
|
2246
2182
|
|
|
2247
2183
|
// src/generated/gladia/schema/streamingSupportedBitDepthEnum.ts
|
|
@@ -2746,6 +2682,7 @@ var OpenAITranscriptionModel = {
|
|
|
2746
2682
|
"gpt-4o-mini-transcribe-2025-12-15": "gpt-4o-mini-transcribe-2025-12-15",
|
|
2747
2683
|
"gpt-4o-transcribe": "gpt-4o-transcribe",
|
|
2748
2684
|
"gpt-4o-transcribe-diarize": "gpt-4o-transcribe-diarize",
|
|
2685
|
+
"gpt-realtime-whisper": "gpt-realtime-whisper",
|
|
2749
2686
|
"whisper-1": "whisper-1"
|
|
2750
2687
|
};
|
|
2751
2688
|
var OpenAIRealtimeModel = {
|
|
@@ -2761,6 +2698,7 @@ var OpenAIRealtimeModel = {
|
|
|
2761
2698
|
"gpt-audio-mini-2025-12-15": "gpt-audio-mini-2025-12-15",
|
|
2762
2699
|
"gpt-realtime": "gpt-realtime",
|
|
2763
2700
|
"gpt-realtime-1.5": "gpt-realtime-1.5",
|
|
2701
|
+
"gpt-realtime-2": "gpt-realtime-2",
|
|
2764
2702
|
"gpt-realtime-2025-08-28": "gpt-realtime-2025-08-28",
|
|
2765
2703
|
"gpt-realtime-mini": "gpt-realtime-mini",
|
|
2766
2704
|
"gpt-realtime-mini-2025-10-06": "gpt-realtime-mini-2025-10-06",
|
|
@@ -4360,6 +4298,12 @@ var GladiaAdapter = class extends BaseAdapter {
|
|
|
4360
4298
|
};
|
|
4361
4299
|
this.baseUrl = "https://api.gladia.io";
|
|
4362
4300
|
}
|
|
4301
|
+
initialize(config) {
|
|
4302
|
+
super.initialize(config);
|
|
4303
|
+
if (config.region) {
|
|
4304
|
+
this.streamingRegion = config.region;
|
|
4305
|
+
}
|
|
4306
|
+
}
|
|
4363
4307
|
/**
|
|
4364
4308
|
* Get axios config for generated API client functions
|
|
4365
4309
|
* Configures headers and base URL using Gladia's x-gladia-key header
|
|
@@ -5021,9 +4965,10 @@ var GladiaAdapter = class extends BaseAdapter {
|
|
|
5021
4965
|
async transcribeStream(options, callbacks) {
|
|
5022
4966
|
this.validateConfig();
|
|
5023
4967
|
const streamingRequest = this.buildStreamingRequest(options);
|
|
4968
|
+
const region = options?.region ?? this.streamingRegion;
|
|
5024
4969
|
const initResponse = await streamingControllerInitStreamingSessionV2(
|
|
5025
4970
|
streamingRequest,
|
|
5026
|
-
|
|
4971
|
+
region ? { region } : void 0,
|
|
5027
4972
|
this.getAxiosConfig()
|
|
5028
4973
|
);
|
|
5029
4974
|
const { id, url: apiWsUrl } = initResponse.data;
|
|
@@ -5570,12 +5515,20 @@ var EntityType = {
|
|
|
5570
5515
|
email_address: "email_address",
|
|
5571
5516
|
event: "event",
|
|
5572
5517
|
filename: "filename",
|
|
5518
|
+
gender: "gender",
|
|
5573
5519
|
gender_sexuality: "gender_sexuality",
|
|
5574
5520
|
healthcare_number: "healthcare_number",
|
|
5575
5521
|
injury: "injury",
|
|
5576
5522
|
ip_address: "ip_address",
|
|
5577
5523
|
language: "language",
|
|
5578
5524
|
location: "location",
|
|
5525
|
+
location_address: "location_address",
|
|
5526
|
+
location_address_street: "location_address_street",
|
|
5527
|
+
location_city: "location_city",
|
|
5528
|
+
location_coordinate: "location_coordinate",
|
|
5529
|
+
location_country: "location_country",
|
|
5530
|
+
location_state: "location_state",
|
|
5531
|
+
location_zip: "location_zip",
|
|
5579
5532
|
marital_status: "marital_status",
|
|
5580
5533
|
medical_condition: "medical_condition",
|
|
5581
5534
|
medical_process: "medical_process",
|
|
@@ -5584,6 +5537,7 @@ var EntityType = {
|
|
|
5584
5537
|
number_sequence: "number_sequence",
|
|
5585
5538
|
occupation: "occupation",
|
|
5586
5539
|
organization: "organization",
|
|
5540
|
+
organization_medical_facility: "organization_medical_facility",
|
|
5587
5541
|
passport_number: "passport_number",
|
|
5588
5542
|
password: "password",
|
|
5589
5543
|
person_age: "person_age",
|
|
@@ -5592,6 +5546,7 @@ var EntityType = {
|
|
|
5592
5546
|
physical_attribute: "physical_attribute",
|
|
5593
5547
|
political_affiliation: "political_affiliation",
|
|
5594
5548
|
religion: "religion",
|
|
5549
|
+
sexuality: "sexuality",
|
|
5595
5550
|
statistics: "statistics",
|
|
5596
5551
|
time: "time",
|
|
5597
5552
|
url: "url",
|
|
@@ -5618,12 +5573,20 @@ var PiiPolicy = {
|
|
|
5618
5573
|
email_address: "email_address",
|
|
5619
5574
|
event: "event",
|
|
5620
5575
|
filename: "filename",
|
|
5576
|
+
gender: "gender",
|
|
5621
5577
|
gender_sexuality: "gender_sexuality",
|
|
5622
5578
|
healthcare_number: "healthcare_number",
|
|
5623
5579
|
injury: "injury",
|
|
5624
5580
|
ip_address: "ip_address",
|
|
5625
5581
|
language: "language",
|
|
5626
5582
|
location: "location",
|
|
5583
|
+
location_address: "location_address",
|
|
5584
|
+
location_address_street: "location_address_street",
|
|
5585
|
+
location_city: "location_city",
|
|
5586
|
+
location_coordinate: "location_coordinate",
|
|
5587
|
+
location_country: "location_country",
|
|
5588
|
+
location_state: "location_state",
|
|
5589
|
+
location_zip: "location_zip",
|
|
5627
5590
|
marital_status: "marital_status",
|
|
5628
5591
|
medical_condition: "medical_condition",
|
|
5629
5592
|
medical_process: "medical_process",
|
|
@@ -5632,6 +5595,7 @@ var PiiPolicy = {
|
|
|
5632
5595
|
number_sequence: "number_sequence",
|
|
5633
5596
|
occupation: "occupation",
|
|
5634
5597
|
organization: "organization",
|
|
5598
|
+
organization_medical_facility: "organization_medical_facility",
|
|
5635
5599
|
passport_number: "passport_number",
|
|
5636
5600
|
password: "password",
|
|
5637
5601
|
person_age: "person_age",
|
|
@@ -5640,6 +5604,7 @@ var PiiPolicy = {
|
|
|
5640
5604
|
physical_attribute: "physical_attribute",
|
|
5641
5605
|
political_affiliation: "political_affiliation",
|
|
5642
5606
|
religion: "religion",
|
|
5607
|
+
sexuality: "sexuality",
|
|
5643
5608
|
statistics: "statistics",
|
|
5644
5609
|
time: "time",
|
|
5645
5610
|
url: "url",
|
|
@@ -5708,7 +5673,8 @@ var TranscriptOptionalParamsRedactPiiAudioOptionsOverrideAudioRedactionMethod =
|
|
|
5708
5673
|
|
|
5709
5674
|
// src/generated/assemblyai/schema/transcriptOptionalParamsRemoveAudioTags.ts
|
|
5710
5675
|
var TranscriptOptionalParamsRemoveAudioTags = {
|
|
5711
|
-
all: "all"
|
|
5676
|
+
all: "all",
|
|
5677
|
+
speaker: "speaker"
|
|
5712
5678
|
};
|
|
5713
5679
|
|
|
5714
5680
|
// src/generated/assemblyai/schema/transcriptRedactPiiAudioOptionsOverrideAudioRedactionMethod.ts
|
|
@@ -5718,7 +5684,8 @@ var TranscriptRedactPiiAudioOptionsOverrideAudioRedactionMethod = {
|
|
|
5718
5684
|
|
|
5719
5685
|
// src/generated/assemblyai/schema/transcriptRemoveAudioTags.ts
|
|
5720
5686
|
var TranscriptRemoveAudioTags = {
|
|
5721
|
-
all: "all"
|
|
5687
|
+
all: "all",
|
|
5688
|
+
speaker: "speaker"
|
|
5722
5689
|
};
|
|
5723
5690
|
|
|
5724
5691
|
// src/generated/assemblyai/api/assemblyAIAPI.ts
|
|
@@ -9610,15 +9577,18 @@ var import_axios9 = __toESM(require("axios"));
|
|
|
9610
9577
|
// src/generated/soniox/schema/index.ts
|
|
9611
9578
|
var schema_exports4 = {};
|
|
9612
9579
|
__export(schema_exports4, {
|
|
9580
|
+
TTSVoiceGender: () => TTSVoiceGender,
|
|
9613
9581
|
TemporaryApiKeyUsageType: () => TemporaryApiKeyUsageType,
|
|
9614
9582
|
TranscriptionMode: () => TranscriptionMode,
|
|
9615
9583
|
TranscriptionStatus: () => TranscriptionStatus,
|
|
9616
|
-
TranslationConfigType: () => TranslationConfigType
|
|
9584
|
+
TranslationConfigType: () => TranslationConfigType,
|
|
9585
|
+
UsageLogsSort: () => UsageLogsSort
|
|
9617
9586
|
});
|
|
9618
9587
|
|
|
9619
9588
|
// src/generated/soniox/schema/temporaryApiKeyUsageType.ts
|
|
9620
9589
|
var TemporaryApiKeyUsageType = {
|
|
9621
|
-
transcribe_websocket: "transcribe_websocket"
|
|
9590
|
+
transcribe_websocket: "transcribe_websocket",
|
|
9591
|
+
tts_rt: "tts_rt"
|
|
9622
9592
|
};
|
|
9623
9593
|
|
|
9624
9594
|
// src/generated/soniox/schema/transcriptionMode.ts
|
|
@@ -9633,6 +9603,19 @@ var TranslationConfigType = {
|
|
|
9633
9603
|
two_way: "two_way"
|
|
9634
9604
|
};
|
|
9635
9605
|
|
|
9606
|
+
// src/generated/soniox/schema/tTSVoiceGender.ts
|
|
9607
|
+
var TTSVoiceGender = {
|
|
9608
|
+
male: "male",
|
|
9609
|
+
female: "female",
|
|
9610
|
+
neutral: "neutral"
|
|
9611
|
+
};
|
|
9612
|
+
|
|
9613
|
+
// src/generated/soniox/schema/usageLogsSort.ts
|
|
9614
|
+
var UsageLogsSort = {
|
|
9615
|
+
end_time_asc: "end_time_asc",
|
|
9616
|
+
end_time_desc: "end_time_desc"
|
|
9617
|
+
};
|
|
9618
|
+
|
|
9636
9619
|
// src/generated/soniox/api/sonioxPublicAPI.ts
|
|
9637
9620
|
var uploadFile = (uploadFileBody2, options) => {
|
|
9638
9621
|
const formData = new FormData();
|
|
@@ -11000,6 +10983,7 @@ __export(deepgramAPI_zod_exports, {
|
|
|
11000
10983
|
speakGenerateQueryMipOptOutDefault: () => speakGenerateQueryMipOptOutDefault,
|
|
11001
10984
|
speakGenerateQueryModelDefault: () => speakGenerateQueryModelDefault,
|
|
11002
10985
|
speakGenerateQueryParams: () => speakGenerateQueryParams,
|
|
10986
|
+
speakGenerateQuerySpeedDefault: () => speakGenerateQuerySpeedDefault,
|
|
11003
10987
|
speakGenerateResponse: () => speakGenerateResponse
|
|
11004
10988
|
});
|
|
11005
10989
|
var import_zod = require("zod");
|
|
@@ -11054,6 +11038,9 @@ var listenTranscribeQueryParams = import_zod.z.object({
|
|
|
11054
11038
|
diarize: import_zod.z.boolean().optional().describe(
|
|
11055
11039
|
"Recognize speaker changes. Each word in the transcript will be assigned a speaker number starting at 0"
|
|
11056
11040
|
),
|
|
11041
|
+
diarize_model: import_zod.z.enum(["latest", "v1", "v2"]).optional().describe(
|
|
11042
|
+
"Select and enable a specific batch diarization model version. If specifying this parameter, you should not set the deprecated `diarize=true` parameter. Not accepted on streaming requests."
|
|
11043
|
+
),
|
|
11057
11044
|
dictation: import_zod.z.boolean().optional().describe("Dictation mode for controlling formatting with dictated speech"),
|
|
11058
11045
|
encoding: import_zod.z.enum(["linear16", "flac", "mulaw", "amr-nb", "amr-wb", "opus", "speex", "g729"]).optional().describe("Specify the expected encoding of your submitted audio"),
|
|
11059
11046
|
filler_words: import_zod.z.boolean().optional().describe('Filler Words can help transcribe interruptions in your audio, like "uh" and "um"'),
|
|
@@ -11319,6 +11306,7 @@ var listenTranscribeResponse = import_zod.z.object({
|
|
|
11319
11306
|
var speakGenerateQueryCallbackMethodDefault = "POST";
|
|
11320
11307
|
var speakGenerateQueryMipOptOutDefault = false;
|
|
11321
11308
|
var speakGenerateQueryModelDefault = "aura-asteria-en";
|
|
11309
|
+
var speakGenerateQuerySpeedDefault = 1;
|
|
11322
11310
|
var speakGenerateQueryParams = import_zod.z.object({
|
|
11323
11311
|
callback: import_zod.z.string().optional().describe("URL to which we'll make the callback request"),
|
|
11324
11312
|
callback_method: import_zod.z.enum(["POST", "PUT"]).default(speakGenerateQueryCallbackMethodDefault).describe("HTTP method by which the callback request will be made"),
|
|
@@ -11430,6 +11418,9 @@ var speakGenerateQueryParams = import_zod.z.object({
|
|
|
11430
11418
|
import_zod.z.enum(["22050"]).describe("Encoding - mp3. Sample rate is fixed and not configurable (22050 Hz).")
|
|
11431
11419
|
).or(import_zod.z.enum(["48000"]).describe("Encoding - opus. Sample rate is fixed at 48000 Hz.")).optional().describe(
|
|
11432
11420
|
"Sample Rate specifies the sample rate for the output audio. Based on the encoding, different sample rates are supported. For some encodings, the sample rate is not configurable"
|
|
11421
|
+
),
|
|
11422
|
+
speed: import_zod.z.number().default(speakGenerateQuerySpeedDefault).describe(
|
|
11423
|
+
"Speaking rate multiplier that adjusts the pace of generated speech while preserving natural prosody and voice quality. Not yet supported in all languages."
|
|
11433
11424
|
)
|
|
11434
11425
|
});
|
|
11435
11426
|
var speakGenerateHeader = import_zod.z.object({
|
|
@@ -11754,6 +11745,7 @@ __export(assemblyAIAPI_zod_exports, {
|
|
|
11754
11745
|
createTranscriptBodyRedactPiiAudioDefault: () => createTranscriptBodyRedactPiiAudioDefault,
|
|
11755
11746
|
createTranscriptBodyRedactPiiAudioOptionsReturnRedactedNoSpeechAudioDefault: () => createTranscriptBodyRedactPiiAudioOptionsReturnRedactedNoSpeechAudioDefault,
|
|
11756
11747
|
createTranscriptBodyRedactPiiDefault: () => createTranscriptBodyRedactPiiDefault,
|
|
11748
|
+
createTranscriptBodyRedactPiiReturnUnredactedDefault: () => createTranscriptBodyRedactPiiReturnUnredactedDefault,
|
|
11757
11749
|
createTranscriptBodySentimentAnalysisDefault: () => createTranscriptBodySentimentAnalysisDefault,
|
|
11758
11750
|
createTranscriptBodySpeakerLabelsDefault: () => createTranscriptBodySpeakerLabelsDefault,
|
|
11759
11751
|
createTranscriptBodySpeakerOptionsMinSpeakersExpectedDefault: () => createTranscriptBodySpeakerOptionsMinSpeakersExpectedDefault,
|
|
@@ -11824,6 +11816,7 @@ var createTranscriptBodyPunctuateDefault = true;
|
|
|
11824
11816
|
var createTranscriptBodyRedactPiiDefault = false;
|
|
11825
11817
|
var createTranscriptBodyRedactPiiAudioDefault = false;
|
|
11826
11818
|
var createTranscriptBodyRedactPiiAudioOptionsReturnRedactedNoSpeechAudioDefault = false;
|
|
11819
|
+
var createTranscriptBodyRedactPiiReturnUnredactedDefault = false;
|
|
11827
11820
|
var createTranscriptBodySentimentAnalysisDefault = false;
|
|
11828
11821
|
var createTranscriptBodySpeakerLabelsDefault = false;
|
|
11829
11822
|
var createTranscriptBodySpeakerOptionsMinSpeakersExpectedDefault = 1;
|
|
@@ -11862,7 +11855,7 @@ var createTranscriptBody = import_zod3.z.object({
|
|
|
11862
11855
|
"Customize how words are spelled and formatted using to and from values. See [Custom Spelling](https://www.assemblyai.com/docs/pre-recorded-audio/correct-spelling-of-terms) for more details."
|
|
11863
11856
|
),
|
|
11864
11857
|
disfluencies: import_zod3.z.boolean().optional().describe(
|
|
11865
|
-
'Transcribe [Filler Words](https://www.assemblyai.com/docs/pre-recorded-audio/include-filler-words), like "umm", in your media file; can be true or false'
|
|
11858
|
+
'Transcribe [Filler Words](https://www.assemblyai.com/docs/pre-recorded-audio/include-filler-words), like "umm", in your media file; can be true or false. Supported on Universal-3 Pro and Universal-2.'
|
|
11866
11859
|
),
|
|
11867
11860
|
domain: import_zod3.z.string().nullish().describe(
|
|
11868
11861
|
'Enable domain-specific transcription models to improve accuracy for specialized terminology. Set to `"medical-v1"` to enable [Medical Mode](https://www.assemblyai.com/docs/pre-recorded-audio/medical-mode) for improved accuracy of medical terms such as medications, procedures, conditions, and dosages.\n\nSupported languages: English (`en`), Spanish (`es`), German (`de`), French (`fr`). If used with an unsupported language, the parameter is ignored and a warning is returned.\n'
|
|
@@ -12169,12 +12162,20 @@ var createTranscriptBody = import_zod3.z.object({
|
|
|
12169
12162
|
"email_address",
|
|
12170
12163
|
"event",
|
|
12171
12164
|
"filename",
|
|
12165
|
+
"gender",
|
|
12172
12166
|
"gender_sexuality",
|
|
12173
12167
|
"healthcare_number",
|
|
12174
12168
|
"injury",
|
|
12175
12169
|
"ip_address",
|
|
12176
12170
|
"language",
|
|
12177
12171
|
"location",
|
|
12172
|
+
"location_address",
|
|
12173
|
+
"location_address_street",
|
|
12174
|
+
"location_city",
|
|
12175
|
+
"location_coordinate",
|
|
12176
|
+
"location_country",
|
|
12177
|
+
"location_state",
|
|
12178
|
+
"location_zip",
|
|
12178
12179
|
"marital_status",
|
|
12179
12180
|
"medical_condition",
|
|
12180
12181
|
"medical_process",
|
|
@@ -12183,6 +12184,7 @@ var createTranscriptBody = import_zod3.z.object({
|
|
|
12183
12184
|
"number_sequence",
|
|
12184
12185
|
"occupation",
|
|
12185
12186
|
"organization",
|
|
12187
|
+
"organization_medical_facility",
|
|
12186
12188
|
"passport_number",
|
|
12187
12189
|
"password",
|
|
12188
12190
|
"person_age",
|
|
@@ -12191,6 +12193,7 @@ var createTranscriptBody = import_zod3.z.object({
|
|
|
12191
12193
|
"physical_attribute",
|
|
12192
12194
|
"political_affiliation",
|
|
12193
12195
|
"religion",
|
|
12196
|
+
"sexuality",
|
|
12194
12197
|
"statistics",
|
|
12195
12198
|
"time",
|
|
12196
12199
|
"url",
|
|
@@ -12198,15 +12201,20 @@ var createTranscriptBody = import_zod3.z.object({
|
|
|
12198
12201
|
"username",
|
|
12199
12202
|
"vehicle_id",
|
|
12200
12203
|
"zodiac_sign"
|
|
12201
|
-
]).describe(
|
|
12204
|
+
]).describe(
|
|
12205
|
+
"The type of PII to redact. See [PII redaction](https://www.assemblyai.com/docs/streaming/pii-redaction) for the full list of policies and their descriptions."
|
|
12206
|
+
)
|
|
12202
12207
|
).optional().describe(
|
|
12203
12208
|
"The list of PII Redaction policies to enable. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more details."
|
|
12204
12209
|
),
|
|
12205
12210
|
redact_pii_sub: import_zod3.z.enum(["entity_name", "hash"]).describe(
|
|
12206
|
-
"The replacement logic for detected PII, can be `entity_name` or `hash`. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more details."
|
|
12211
|
+
"The replacement logic for detected PII, can be `entity_name` or `hash`. See [PII redaction](https://www.assemblyai.com/docs/streaming/pii-redaction) for more details."
|
|
12207
12212
|
).or(import_zod3.z.null()).optional().describe(
|
|
12208
12213
|
"The replacement logic for detected PII, can be `entity_type` or `hash`. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more details."
|
|
12209
12214
|
),
|
|
12215
|
+
redact_pii_return_unredacted: import_zod3.z.boolean().optional().describe(
|
|
12216
|
+
"When set to `true`, returns the original unredacted transcript alongside the redacted one in the same response. Requires `redact_pii` to be `true`, otherwise a 400 error is returned.\n\nWhen enabled, the response includes the additional fields `unredacted_text`, `unredacted_words`, and `unredacted_utterances`. The existing `text`, `words`, and `utterances` fields remain fully redacted. When disabled (default), the response is unchanged and contains only the redacted transcript. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more details.\n"
|
|
12217
|
+
),
|
|
12210
12218
|
sentiment_analysis: import_zod3.z.boolean().optional().describe(
|
|
12211
12219
|
"Enable [Sentiment Analysis](https://www.assemblyai.com/docs/speech-understanding/analyze-sentiment-of-speech), can be true or false"
|
|
12212
12220
|
),
|
|
@@ -12304,10 +12312,10 @@ var createTranscriptBody = import_zod3.z.object({
|
|
|
12304
12312
|
),
|
|
12305
12313
|
summary_model: import_zod3.z.enum(["informative", "conversational", "catchy"]).optional().describe("The model to summarize the transcript"),
|
|
12306
12314
|
summary_type: import_zod3.z.enum(["bullets", "bullets_verbose", "gist", "headline", "paragraph"]).optional().describe("The type of summary"),
|
|
12307
|
-
remove_audio_tags: import_zod3.z.enum(["all"]).describe(
|
|
12308
|
-
'
|
|
12315
|
+
remove_audio_tags: import_zod3.z.enum(["all", "speaker"]).describe(
|
|
12316
|
+
'Universal-3 Pro generates rich transcripts that can include inline annotations such as audio event markers and speaker cues. Set to `"all"` to remove all inline annotations, or `"speaker"` to remove only speaker cues while keeping other annotations.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n'
|
|
12309
12317
|
).or(import_zod3.z.null()).optional().describe(
|
|
12310
|
-
'
|
|
12318
|
+
'Universal-3 Pro generates rich transcripts that can include inline annotations such as audio event markers and speaker cues. Set to `"all"` to remove all inline annotations, or `"speaker"` to remove only speaker cues while keeping other annotations.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n'
|
|
12311
12319
|
),
|
|
12312
12320
|
temperature: import_zod3.z.number().optional().describe(
|
|
12313
12321
|
"Control the amount of randomness injected into the model's response. See the [Prompting Guide](https://www.assemblyai.com/docs/pre-recorded-audio/prompting) for more details.\n\nNote: This parameter can only be used with the Universal-3 Pro model.\n"
|
|
@@ -12441,7 +12449,7 @@ var createTranscriptResponse = import_zod3.z.object({
|
|
|
12441
12449
|
"Customize how words are spelled and formatted using to and from values. See [Custom Spelling](https://www.assemblyai.com/docs/pre-recorded-audio/correct-spelling-of-terms) for more details."
|
|
12442
12450
|
),
|
|
12443
12451
|
disfluencies: import_zod3.z.boolean().nullish().describe(
|
|
12444
|
-
'Transcribe [Filler Words](https://www.assemblyai.com/docs/pre-recorded-audio/include-filler-words), like "umm", in your media file; can be true or false'
|
|
12452
|
+
'Transcribe [Filler Words](https://www.assemblyai.com/docs/pre-recorded-audio/include-filler-words), like "umm", in your media file; can be true or false. Supported on Universal-3 Pro and Universal-2.'
|
|
12445
12453
|
),
|
|
12446
12454
|
domain: import_zod3.z.string().nullish().describe(
|
|
12447
12455
|
'The domain-specific model applied to the transcript. When set to `"medical-v1"`, [Medical Mode](https://www.assemblyai.com/docs/pre-recorded-audio/medical-mode) was used to improve accuracy for medical terminology.\n'
|
|
@@ -12464,12 +12472,20 @@ var createTranscriptResponse = import_zod3.z.object({
|
|
|
12464
12472
|
"email_address",
|
|
12465
12473
|
"event",
|
|
12466
12474
|
"filename",
|
|
12475
|
+
"gender",
|
|
12467
12476
|
"gender_sexuality",
|
|
12468
12477
|
"healthcare_number",
|
|
12469
12478
|
"injury",
|
|
12470
12479
|
"ip_address",
|
|
12471
12480
|
"language",
|
|
12472
12481
|
"location",
|
|
12482
|
+
"location_address",
|
|
12483
|
+
"location_address_street",
|
|
12484
|
+
"location_city",
|
|
12485
|
+
"location_coordinate",
|
|
12486
|
+
"location_country",
|
|
12487
|
+
"location_state",
|
|
12488
|
+
"location_zip",
|
|
12473
12489
|
"marital_status",
|
|
12474
12490
|
"medical_condition",
|
|
12475
12491
|
"medical_process",
|
|
@@ -12478,6 +12494,7 @@ var createTranscriptResponse = import_zod3.z.object({
|
|
|
12478
12494
|
"number_sequence",
|
|
12479
12495
|
"occupation",
|
|
12480
12496
|
"organization",
|
|
12497
|
+
"organization_medical_facility",
|
|
12481
12498
|
"passport_number",
|
|
12482
12499
|
"password",
|
|
12483
12500
|
"person_age",
|
|
@@ -12486,6 +12503,7 @@ var createTranscriptResponse = import_zod3.z.object({
|
|
|
12486
12503
|
"physical_attribute",
|
|
12487
12504
|
"political_affiliation",
|
|
12488
12505
|
"religion",
|
|
12506
|
+
"sexuality",
|
|
12489
12507
|
"statistics",
|
|
12490
12508
|
"time",
|
|
12491
12509
|
"url",
|
|
@@ -12790,6 +12808,24 @@ var createTranscriptResponse = import_zod3.z.object({
|
|
|
12790
12808
|
}).optional().describe(
|
|
12791
12809
|
"Specify options for [Automatic Language Detection](https://www.assemblyai.com/docs/pre-recorded-audio/language-detection)."
|
|
12792
12810
|
),
|
|
12811
|
+
metadata: import_zod3.z.object({
|
|
12812
|
+
domain_used: import_zod3.z.string().nullish().describe(
|
|
12813
|
+
'The domain-specific model that was applied to the transcription (for example, `"medical-v1"` when [Medical Mode](https://www.assemblyai.com/docs/pre-recorded-audio/medical-mode) was applied), or `null` if no domain-specific model was used. Always present when `metadata` is present.\n'
|
|
12814
|
+
),
|
|
12815
|
+
warnings: import_zod3.z.array(
|
|
12816
|
+
import_zod3.z.object({
|
|
12817
|
+
message: import_zod3.z.string().describe("A human-readable description of the warning.")
|
|
12818
|
+
}).describe(
|
|
12819
|
+
"A warning message emitted while processing a transcription request. Warnings are surfaced on the transcript response under `metadata.warnings`.\n"
|
|
12820
|
+
)
|
|
12821
|
+
).optional().describe(
|
|
12822
|
+
"Warning messages emitted while processing the request. Each warning is an object with a human-readable `message`. When there are no warnings to report, this field is omitted from the `metadata` object entirely.\n"
|
|
12823
|
+
)
|
|
12824
|
+
}).describe(
|
|
12825
|
+
"Additional metadata about the transcription returned on the `Transcript` object under `metadata`. Only present when there is information to report \u2014 when all of its fields would be empty, the `metadata` object is omitted from the response entirely.\n"
|
|
12826
|
+
).or(import_zod3.z.null()).optional().describe(
|
|
12827
|
+
"Additional metadata about the transcription, including any warnings emitted while processing the request. Only present when there is information to report; if no fields would be populated, `metadata` is omitted from the response entirely.\n"
|
|
12828
|
+
),
|
|
12793
12829
|
multichannel: import_zod3.z.boolean().nullish().describe(
|
|
12794
12830
|
"Whether [Multichannel transcription](https://www.assemblyai.com/docs/pre-recorded-audio/transcribe-multiple-audio-channels) was enabled in the transcription request, either true or false"
|
|
12795
12831
|
),
|
|
@@ -12837,12 +12873,20 @@ var createTranscriptResponse = import_zod3.z.object({
|
|
|
12837
12873
|
"email_address",
|
|
12838
12874
|
"event",
|
|
12839
12875
|
"filename",
|
|
12876
|
+
"gender",
|
|
12840
12877
|
"gender_sexuality",
|
|
12841
12878
|
"healthcare_number",
|
|
12842
12879
|
"injury",
|
|
12843
12880
|
"ip_address",
|
|
12844
12881
|
"language",
|
|
12845
12882
|
"location",
|
|
12883
|
+
"location_address",
|
|
12884
|
+
"location_address_street",
|
|
12885
|
+
"location_city",
|
|
12886
|
+
"location_coordinate",
|
|
12887
|
+
"location_country",
|
|
12888
|
+
"location_state",
|
|
12889
|
+
"location_zip",
|
|
12846
12890
|
"marital_status",
|
|
12847
12891
|
"medical_condition",
|
|
12848
12892
|
"medical_process",
|
|
@@ -12851,6 +12895,7 @@ var createTranscriptResponse = import_zod3.z.object({
|
|
|
12851
12895
|
"number_sequence",
|
|
12852
12896
|
"occupation",
|
|
12853
12897
|
"organization",
|
|
12898
|
+
"organization_medical_facility",
|
|
12854
12899
|
"passport_number",
|
|
12855
12900
|
"password",
|
|
12856
12901
|
"person_age",
|
|
@@ -12859,6 +12904,7 @@ var createTranscriptResponse = import_zod3.z.object({
|
|
|
12859
12904
|
"physical_attribute",
|
|
12860
12905
|
"political_affiliation",
|
|
12861
12906
|
"religion",
|
|
12907
|
+
"sexuality",
|
|
12862
12908
|
"statistics",
|
|
12863
12909
|
"time",
|
|
12864
12910
|
"url",
|
|
@@ -12866,12 +12912,17 @@ var createTranscriptResponse = import_zod3.z.object({
|
|
|
12866
12912
|
"username",
|
|
12867
12913
|
"vehicle_id",
|
|
12868
12914
|
"zodiac_sign"
|
|
12869
|
-
]).describe(
|
|
12915
|
+
]).describe(
|
|
12916
|
+
"The type of PII to redact. See [PII redaction](https://www.assemblyai.com/docs/streaming/pii-redaction) for the full list of policies and their descriptions."
|
|
12917
|
+
)
|
|
12870
12918
|
).nullish().describe(
|
|
12871
12919
|
"The list of PII Redaction policies that were enabled, if PII Redaction is enabled.\nSee [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
|
|
12872
12920
|
),
|
|
12873
12921
|
redact_pii_sub: import_zod3.z.enum(["entity_name", "hash"]).optional().describe(
|
|
12874
|
-
"The replacement logic for detected PII, can be `entity_name` or `hash`. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more details."
|
|
12922
|
+
"The replacement logic for detected PII, can be `entity_name` or `hash`. See [PII redaction](https://www.assemblyai.com/docs/streaming/pii-redaction) for more details."
|
|
12923
|
+
),
|
|
12924
|
+
redact_pii_return_unredacted: import_zod3.z.boolean().nullish().describe(
|
|
12925
|
+
"Whether the original unredacted transcript was also returned alongside the redacted one. When `true`, the response includes `unredacted_text`, `unredacted_words`, and `unredacted_utterances`. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
|
|
12875
12926
|
),
|
|
12876
12927
|
sentiment_analysis: import_zod3.z.boolean().nullish().describe(
|
|
12877
12928
|
"Whether [Sentiment Analysis](https://www.assemblyai.com/docs/speech-understanding/analyze-sentiment-of-speech) is enabled, can be true or false"
|
|
@@ -13008,20 +13059,23 @@ var createTranscriptResponse = import_zod3.z.object({
|
|
|
13008
13059
|
"The generated summary of the media file, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details."
|
|
13009
13060
|
),
|
|
13010
13061
|
summary_model: import_zod3.z.string().nullish().describe(
|
|
13011
|
-
"The Summarization model used to generate the summary,\nif [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts
|
|
13062
|
+
"The Summarization model used to generate the summary,\nif [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details.\n"
|
|
13012
13063
|
),
|
|
13013
13064
|
summary_type: import_zod3.z.string().nullish().describe(
|
|
13014
|
-
"The type of summary generated, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts
|
|
13065
|
+
"The type of summary generated, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details."
|
|
13015
13066
|
),
|
|
13016
|
-
remove_audio_tags: import_zod3.z.enum(["all"]).describe(
|
|
13017
|
-
|
|
13067
|
+
remove_audio_tags: import_zod3.z.enum(["all", "speaker"]).describe(
|
|
13068
|
+
'Universal-3 Pro generates rich transcripts that can include inline annotations such as audio event markers and speaker cues. Set to `"all"` to remove all inline annotations, or `"speaker"` to remove only speaker cues while keeping other annotations.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n'
|
|
13018
13069
|
).or(import_zod3.z.null()).optional().describe(
|
|
13019
|
-
|
|
13070
|
+
'Universal-3 Pro generates rich transcripts that can include inline annotations such as audio event markers and speaker cues. Set to `"all"` to remove all inline annotations, or `"speaker"` to remove only speaker cues while keeping other annotations.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n'
|
|
13020
13071
|
),
|
|
13021
13072
|
temperature: import_zod3.z.number().nullish().describe(
|
|
13022
13073
|
"The temperature that was used for the model's response. See the [Prompting Guide](https://www.assemblyai.com/docs/pre-recorded-audio/prompting) for more details.\n\nNote: This parameter can only be used with the Universal-3 Pro model.\n"
|
|
13023
13074
|
),
|
|
13024
13075
|
text: import_zod3.z.string().nullish().describe("The textual transcript of your media file"),
|
|
13076
|
+
unredacted_text: import_zod3.z.string().nullish().describe(
|
|
13077
|
+
"The original textual transcript of your media file before PII redaction was applied. Only returned when `redact_pii_return_unredacted` was set to `true` on the transcription request, otherwise this field is omitted and the `text` field remains fully redacted. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
|
|
13078
|
+
),
|
|
13025
13079
|
throttled: import_zod3.z.boolean().nullish().describe(
|
|
13026
13080
|
"True while a request is throttled and false when a request is no longer throttled"
|
|
13027
13081
|
),
|
|
@@ -13058,6 +13112,39 @@ var createTranscriptResponse = import_zod3.z.object({
|
|
|
13058
13112
|
).nullish().describe(
|
|
13059
13113
|
"When multichannel or speaker_labels is enabled, a list of turn-by-turn utterance objects.\nSee [Speaker diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) and [Multichannel transcription](https://www.assemblyai.com/docs/pre-recorded-audio/transcribe-multiple-audio-channels) for more information.\n"
|
|
13060
13114
|
),
|
|
13115
|
+
unredacted_utterances: import_zod3.z.array(
|
|
13116
|
+
import_zod3.z.object({
|
|
13117
|
+
confidence: import_zod3.z.number().describe("The confidence score for the transcript of this utterance"),
|
|
13118
|
+
start: import_zod3.z.number().describe("The starting time, in milliseconds, of the utterance in the audio file"),
|
|
13119
|
+
end: import_zod3.z.number().describe("The ending time, in milliseconds, of the utterance in the audio file"),
|
|
13120
|
+
text: import_zod3.z.string().describe("The text for this utterance"),
|
|
13121
|
+
words: import_zod3.z.array(
|
|
13122
|
+
import_zod3.z.object({
|
|
13123
|
+
confidence: import_zod3.z.number().describe("The confidence score for the transcript of this word"),
|
|
13124
|
+
start: import_zod3.z.number().describe("The starting time, in milliseconds, for the word"),
|
|
13125
|
+
end: import_zod3.z.number().describe("The ending time, in milliseconds, for the word"),
|
|
13126
|
+
text: import_zod3.z.string().describe("The text of the word"),
|
|
13127
|
+
channel: import_zod3.z.string().nullish().describe(
|
|
13128
|
+
"The channel of the word. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
|
|
13129
|
+
),
|
|
13130
|
+
speaker: import_zod3.z.string().nullable().describe(
|
|
13131
|
+
"The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) is enabled, else null"
|
|
13132
|
+
)
|
|
13133
|
+
})
|
|
13134
|
+
).describe("The words in the utterance."),
|
|
13135
|
+
channel: import_zod3.z.string().nullish().describe(
|
|
13136
|
+
"The channel of this utterance. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
|
|
13137
|
+
),
|
|
13138
|
+
speaker: import_zod3.z.string().describe(
|
|
13139
|
+
'The speaker of this utterance, where each speaker is assigned a sequential capital letter - e.g. "A" for Speaker A, "B" for Speaker B, etc.'
|
|
13140
|
+
),
|
|
13141
|
+
translated_texts: import_zod3.z.record(import_zod3.z.string(), import_zod3.z.string()).optional().describe(
|
|
13142
|
+
'Translations keyed by language code (e.g., `{"es": "Texto traducido", "de": "\xDCbersetzter Text"}`). Only present when `match_original_utterance` is enabled with translation.'
|
|
13143
|
+
)
|
|
13144
|
+
})
|
|
13145
|
+
).nullish().describe(
|
|
13146
|
+
"The original turn-by-turn utterance objects before PII redaction was applied. Same shape as `utterances`. Only returned when `redact_pii_return_unredacted` was set to `true` on the transcription request, otherwise this field is omitted and the `utterances` field remains fully redacted. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
|
|
13147
|
+
),
|
|
13061
13148
|
webhook_auth: import_zod3.z.boolean().describe(
|
|
13062
13149
|
"Whether [webhook](https://www.assemblyai.com/docs/deployment/webhooks-for-pre-recorded-audio) authentication details were provided"
|
|
13063
13150
|
),
|
|
@@ -13086,6 +13173,22 @@ var createTranscriptResponse = import_zod3.z.object({
|
|
|
13086
13173
|
).nullish().describe(
|
|
13087
13174
|
"An array of temporally-sequential word objects, one for each word in the transcript.\n"
|
|
13088
13175
|
),
|
|
13176
|
+
unredacted_words: import_zod3.z.array(
|
|
13177
|
+
import_zod3.z.object({
|
|
13178
|
+
confidence: import_zod3.z.number().describe("The confidence score for the transcript of this word"),
|
|
13179
|
+
start: import_zod3.z.number().describe("The starting time, in milliseconds, for the word"),
|
|
13180
|
+
end: import_zod3.z.number().describe("The ending time, in milliseconds, for the word"),
|
|
13181
|
+
text: import_zod3.z.string().describe("The text of the word"),
|
|
13182
|
+
channel: import_zod3.z.string().nullish().describe(
|
|
13183
|
+
"The channel of the word. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
|
|
13184
|
+
),
|
|
13185
|
+
speaker: import_zod3.z.string().nullable().describe(
|
|
13186
|
+
"The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) is enabled, else null"
|
|
13187
|
+
)
|
|
13188
|
+
})
|
|
13189
|
+
).nullish().describe(
|
|
13190
|
+
"The original temporally-sequential word objects before PII redaction was applied. Same shape as `words`. Only returned when `redact_pii_return_unredacted` was set to `true` on the transcription request, otherwise this field is omitted and the `words` field remains fully redacted. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
|
|
13191
|
+
),
|
|
13089
13192
|
acoustic_model: import_zod3.z.string().describe("This parameter does not currently have any functionality attached to it."),
|
|
13090
13193
|
custom_topics: import_zod3.z.boolean().nullish().describe("This parameter does not currently have any functionality attached to it."),
|
|
13091
13194
|
language_model: import_zod3.z.string().describe("This parameter does not currently have any functionality attached to it."),
|
|
@@ -13261,7 +13364,7 @@ var getTranscriptResponse = import_zod3.z.object({
|
|
|
13261
13364
|
"Customize how words are spelled and formatted using to and from values. See [Custom Spelling](https://www.assemblyai.com/docs/pre-recorded-audio/correct-spelling-of-terms) for more details."
|
|
13262
13365
|
),
|
|
13263
13366
|
disfluencies: import_zod3.z.boolean().nullish().describe(
|
|
13264
|
-
'Transcribe [Filler Words](https://www.assemblyai.com/docs/pre-recorded-audio/include-filler-words), like "umm", in your media file; can be true or false'
|
|
13367
|
+
'Transcribe [Filler Words](https://www.assemblyai.com/docs/pre-recorded-audio/include-filler-words), like "umm", in your media file; can be true or false. Supported on Universal-3 Pro and Universal-2.'
|
|
13265
13368
|
),
|
|
13266
13369
|
domain: import_zod3.z.string().nullish().describe(
|
|
13267
13370
|
'The domain-specific model applied to the transcript. When set to `"medical-v1"`, [Medical Mode](https://www.assemblyai.com/docs/pre-recorded-audio/medical-mode) was used to improve accuracy for medical terminology.\n'
|
|
@@ -13284,12 +13387,20 @@ var getTranscriptResponse = import_zod3.z.object({
|
|
|
13284
13387
|
"email_address",
|
|
13285
13388
|
"event",
|
|
13286
13389
|
"filename",
|
|
13390
|
+
"gender",
|
|
13287
13391
|
"gender_sexuality",
|
|
13288
13392
|
"healthcare_number",
|
|
13289
13393
|
"injury",
|
|
13290
13394
|
"ip_address",
|
|
13291
13395
|
"language",
|
|
13292
13396
|
"location",
|
|
13397
|
+
"location_address",
|
|
13398
|
+
"location_address_street",
|
|
13399
|
+
"location_city",
|
|
13400
|
+
"location_coordinate",
|
|
13401
|
+
"location_country",
|
|
13402
|
+
"location_state",
|
|
13403
|
+
"location_zip",
|
|
13293
13404
|
"marital_status",
|
|
13294
13405
|
"medical_condition",
|
|
13295
13406
|
"medical_process",
|
|
@@ -13298,6 +13409,7 @@ var getTranscriptResponse = import_zod3.z.object({
|
|
|
13298
13409
|
"number_sequence",
|
|
13299
13410
|
"occupation",
|
|
13300
13411
|
"organization",
|
|
13412
|
+
"organization_medical_facility",
|
|
13301
13413
|
"passport_number",
|
|
13302
13414
|
"password",
|
|
13303
13415
|
"person_age",
|
|
@@ -13306,6 +13418,7 @@ var getTranscriptResponse = import_zod3.z.object({
|
|
|
13306
13418
|
"physical_attribute",
|
|
13307
13419
|
"political_affiliation",
|
|
13308
13420
|
"religion",
|
|
13421
|
+
"sexuality",
|
|
13309
13422
|
"statistics",
|
|
13310
13423
|
"time",
|
|
13311
13424
|
"url",
|
|
@@ -13610,6 +13723,24 @@ var getTranscriptResponse = import_zod3.z.object({
|
|
|
13610
13723
|
}).optional().describe(
|
|
13611
13724
|
"Specify options for [Automatic Language Detection](https://www.assemblyai.com/docs/pre-recorded-audio/language-detection)."
|
|
13612
13725
|
),
|
|
13726
|
+
metadata: import_zod3.z.object({
|
|
13727
|
+
domain_used: import_zod3.z.string().nullish().describe(
|
|
13728
|
+
'The domain-specific model that was applied to the transcription (for example, `"medical-v1"` when [Medical Mode](https://www.assemblyai.com/docs/pre-recorded-audio/medical-mode) was applied), or `null` if no domain-specific model was used. Always present when `metadata` is present.\n'
|
|
13729
|
+
),
|
|
13730
|
+
warnings: import_zod3.z.array(
|
|
13731
|
+
import_zod3.z.object({
|
|
13732
|
+
message: import_zod3.z.string().describe("A human-readable description of the warning.")
|
|
13733
|
+
}).describe(
|
|
13734
|
+
"A warning message emitted while processing a transcription request. Warnings are surfaced on the transcript response under `metadata.warnings`.\n"
|
|
13735
|
+
)
|
|
13736
|
+
).optional().describe(
|
|
13737
|
+
"Warning messages emitted while processing the request. Each warning is an object with a human-readable `message`. When there are no warnings to report, this field is omitted from the `metadata` object entirely.\n"
|
|
13738
|
+
)
|
|
13739
|
+
}).describe(
|
|
13740
|
+
"Additional metadata about the transcription returned on the `Transcript` object under `metadata`. Only present when there is information to report \u2014 when all of its fields would be empty, the `metadata` object is omitted from the response entirely.\n"
|
|
13741
|
+
).or(import_zod3.z.null()).optional().describe(
|
|
13742
|
+
"Additional metadata about the transcription, including any warnings emitted while processing the request. Only present when there is information to report; if no fields would be populated, `metadata` is omitted from the response entirely.\n"
|
|
13743
|
+
),
|
|
13613
13744
|
multichannel: import_zod3.z.boolean().nullish().describe(
|
|
13614
13745
|
"Whether [Multichannel transcription](https://www.assemblyai.com/docs/pre-recorded-audio/transcribe-multiple-audio-channels) was enabled in the transcription request, either true or false"
|
|
13615
13746
|
),
|
|
@@ -13657,12 +13788,20 @@ var getTranscriptResponse = import_zod3.z.object({
|
|
|
13657
13788
|
"email_address",
|
|
13658
13789
|
"event",
|
|
13659
13790
|
"filename",
|
|
13791
|
+
"gender",
|
|
13660
13792
|
"gender_sexuality",
|
|
13661
13793
|
"healthcare_number",
|
|
13662
13794
|
"injury",
|
|
13663
13795
|
"ip_address",
|
|
13664
13796
|
"language",
|
|
13665
13797
|
"location",
|
|
13798
|
+
"location_address",
|
|
13799
|
+
"location_address_street",
|
|
13800
|
+
"location_city",
|
|
13801
|
+
"location_coordinate",
|
|
13802
|
+
"location_country",
|
|
13803
|
+
"location_state",
|
|
13804
|
+
"location_zip",
|
|
13666
13805
|
"marital_status",
|
|
13667
13806
|
"medical_condition",
|
|
13668
13807
|
"medical_process",
|
|
@@ -13671,6 +13810,7 @@ var getTranscriptResponse = import_zod3.z.object({
|
|
|
13671
13810
|
"number_sequence",
|
|
13672
13811
|
"occupation",
|
|
13673
13812
|
"organization",
|
|
13813
|
+
"organization_medical_facility",
|
|
13674
13814
|
"passport_number",
|
|
13675
13815
|
"password",
|
|
13676
13816
|
"person_age",
|
|
@@ -13679,6 +13819,7 @@ var getTranscriptResponse = import_zod3.z.object({
|
|
|
13679
13819
|
"physical_attribute",
|
|
13680
13820
|
"political_affiliation",
|
|
13681
13821
|
"religion",
|
|
13822
|
+
"sexuality",
|
|
13682
13823
|
"statistics",
|
|
13683
13824
|
"time",
|
|
13684
13825
|
"url",
|
|
@@ -13686,12 +13827,17 @@ var getTranscriptResponse = import_zod3.z.object({
|
|
|
13686
13827
|
"username",
|
|
13687
13828
|
"vehicle_id",
|
|
13688
13829
|
"zodiac_sign"
|
|
13689
|
-
]).describe(
|
|
13830
|
+
]).describe(
|
|
13831
|
+
"The type of PII to redact. See [PII redaction](https://www.assemblyai.com/docs/streaming/pii-redaction) for the full list of policies and their descriptions."
|
|
13832
|
+
)
|
|
13690
13833
|
).nullish().describe(
|
|
13691
13834
|
"The list of PII Redaction policies that were enabled, if PII Redaction is enabled.\nSee [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
|
|
13692
13835
|
),
|
|
13693
13836
|
redact_pii_sub: import_zod3.z.enum(["entity_name", "hash"]).optional().describe(
|
|
13694
|
-
"The replacement logic for detected PII, can be `entity_name` or `hash`. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more details."
|
|
13837
|
+
"The replacement logic for detected PII, can be `entity_name` or `hash`. See [PII redaction](https://www.assemblyai.com/docs/streaming/pii-redaction) for more details."
|
|
13838
|
+
),
|
|
13839
|
+
redact_pii_return_unredacted: import_zod3.z.boolean().nullish().describe(
|
|
13840
|
+
"Whether the original unredacted transcript was also returned alongside the redacted one. When `true`, the response includes `unredacted_text`, `unredacted_words`, and `unredacted_utterances`. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
|
|
13695
13841
|
),
|
|
13696
13842
|
sentiment_analysis: import_zod3.z.boolean().nullish().describe(
|
|
13697
13843
|
"Whether [Sentiment Analysis](https://www.assemblyai.com/docs/speech-understanding/analyze-sentiment-of-speech) is enabled, can be true or false"
|
|
@@ -13828,20 +13974,23 @@ var getTranscriptResponse = import_zod3.z.object({
|
|
|
13828
13974
|
"The generated summary of the media file, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details."
|
|
13829
13975
|
),
|
|
13830
13976
|
summary_model: import_zod3.z.string().nullish().describe(
|
|
13831
|
-
"The Summarization model used to generate the summary,\nif [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts
|
|
13977
|
+
"The Summarization model used to generate the summary,\nif [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details.\n"
|
|
13832
13978
|
),
|
|
13833
13979
|
summary_type: import_zod3.z.string().nullish().describe(
|
|
13834
|
-
"The type of summary generated, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts
|
|
13980
|
+
"The type of summary generated, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details."
|
|
13835
13981
|
),
|
|
13836
|
-
remove_audio_tags: import_zod3.z.enum(["all"]).describe(
|
|
13837
|
-
|
|
13982
|
+
remove_audio_tags: import_zod3.z.enum(["all", "speaker"]).describe(
|
|
13983
|
+
'Universal-3 Pro generates rich transcripts that can include inline annotations such as audio event markers and speaker cues. Set to `"all"` to remove all inline annotations, or `"speaker"` to remove only speaker cues while keeping other annotations.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n'
|
|
13838
13984
|
).or(import_zod3.z.null()).optional().describe(
|
|
13839
|
-
|
|
13985
|
+
'Universal-3 Pro generates rich transcripts that can include inline annotations such as audio event markers and speaker cues. Set to `"all"` to remove all inline annotations, or `"speaker"` to remove only speaker cues while keeping other annotations.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n'
|
|
13840
13986
|
),
|
|
13841
13987
|
temperature: import_zod3.z.number().nullish().describe(
|
|
13842
13988
|
"The temperature that was used for the model's response. See the [Prompting Guide](https://www.assemblyai.com/docs/pre-recorded-audio/prompting) for more details.\n\nNote: This parameter can only be used with the Universal-3 Pro model.\n"
|
|
13843
13989
|
),
|
|
13844
13990
|
text: import_zod3.z.string().nullish().describe("The textual transcript of your media file"),
|
|
13991
|
+
unredacted_text: import_zod3.z.string().nullish().describe(
|
|
13992
|
+
"The original textual transcript of your media file before PII redaction was applied. Only returned when `redact_pii_return_unredacted` was set to `true` on the transcription request, otherwise this field is omitted and the `text` field remains fully redacted. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
|
|
13993
|
+
),
|
|
13845
13994
|
throttled: import_zod3.z.boolean().nullish().describe(
|
|
13846
13995
|
"True while a request is throttled and false when a request is no longer throttled"
|
|
13847
13996
|
),
|
|
@@ -13878,6 +14027,39 @@ var getTranscriptResponse = import_zod3.z.object({
|
|
|
13878
14027
|
).nullish().describe(
|
|
13879
14028
|
"When multichannel or speaker_labels is enabled, a list of turn-by-turn utterance objects.\nSee [Speaker diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) and [Multichannel transcription](https://www.assemblyai.com/docs/pre-recorded-audio/transcribe-multiple-audio-channels) for more information.\n"
|
|
13880
14029
|
),
|
|
14030
|
+
unredacted_utterances: import_zod3.z.array(
|
|
14031
|
+
import_zod3.z.object({
|
|
14032
|
+
confidence: import_zod3.z.number().describe("The confidence score for the transcript of this utterance"),
|
|
14033
|
+
start: import_zod3.z.number().describe("The starting time, in milliseconds, of the utterance in the audio file"),
|
|
14034
|
+
end: import_zod3.z.number().describe("The ending time, in milliseconds, of the utterance in the audio file"),
|
|
14035
|
+
text: import_zod3.z.string().describe("The text for this utterance"),
|
|
14036
|
+
words: import_zod3.z.array(
|
|
14037
|
+
import_zod3.z.object({
|
|
14038
|
+
confidence: import_zod3.z.number().describe("The confidence score for the transcript of this word"),
|
|
14039
|
+
start: import_zod3.z.number().describe("The starting time, in milliseconds, for the word"),
|
|
14040
|
+
end: import_zod3.z.number().describe("The ending time, in milliseconds, for the word"),
|
|
14041
|
+
text: import_zod3.z.string().describe("The text of the word"),
|
|
14042
|
+
channel: import_zod3.z.string().nullish().describe(
|
|
14043
|
+
"The channel of the word. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
|
|
14044
|
+
),
|
|
14045
|
+
speaker: import_zod3.z.string().nullable().describe(
|
|
14046
|
+
"The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) is enabled, else null"
|
|
14047
|
+
)
|
|
14048
|
+
})
|
|
14049
|
+
).describe("The words in the utterance."),
|
|
14050
|
+
channel: import_zod3.z.string().nullish().describe(
|
|
14051
|
+
"The channel of this utterance. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
|
|
14052
|
+
),
|
|
14053
|
+
speaker: import_zod3.z.string().describe(
|
|
14054
|
+
'The speaker of this utterance, where each speaker is assigned a sequential capital letter - e.g. "A" for Speaker A, "B" for Speaker B, etc.'
|
|
14055
|
+
),
|
|
14056
|
+
translated_texts: import_zod3.z.record(import_zod3.z.string(), import_zod3.z.string()).optional().describe(
|
|
14057
|
+
'Translations keyed by language code (e.g., `{"es": "Texto traducido", "de": "\xDCbersetzter Text"}`). Only present when `match_original_utterance` is enabled with translation.'
|
|
14058
|
+
)
|
|
14059
|
+
})
|
|
14060
|
+
).nullish().describe(
|
|
14061
|
+
"The original turn-by-turn utterance objects before PII redaction was applied. Same shape as `utterances`. Only returned when `redact_pii_return_unredacted` was set to `true` on the transcription request, otherwise this field is omitted and the `utterances` field remains fully redacted. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
|
|
14062
|
+
),
|
|
13881
14063
|
webhook_auth: import_zod3.z.boolean().describe(
|
|
13882
14064
|
"Whether [webhook](https://www.assemblyai.com/docs/deployment/webhooks-for-pre-recorded-audio) authentication details were provided"
|
|
13883
14065
|
),
|
|
@@ -13906,6 +14088,22 @@ var getTranscriptResponse = import_zod3.z.object({
|
|
|
13906
14088
|
).nullish().describe(
|
|
13907
14089
|
"An array of temporally-sequential word objects, one for each word in the transcript.\n"
|
|
13908
14090
|
),
|
|
14091
|
+
unredacted_words: import_zod3.z.array(
|
|
14092
|
+
import_zod3.z.object({
|
|
14093
|
+
confidence: import_zod3.z.number().describe("The confidence score for the transcript of this word"),
|
|
14094
|
+
start: import_zod3.z.number().describe("The starting time, in milliseconds, for the word"),
|
|
14095
|
+
end: import_zod3.z.number().describe("The ending time, in milliseconds, for the word"),
|
|
14096
|
+
text: import_zod3.z.string().describe("The text of the word"),
|
|
14097
|
+
channel: import_zod3.z.string().nullish().describe(
|
|
14098
|
+
"The channel of the word. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
|
|
14099
|
+
),
|
|
14100
|
+
speaker: import_zod3.z.string().nullable().describe(
|
|
14101
|
+
"The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) is enabled, else null"
|
|
14102
|
+
)
|
|
14103
|
+
})
|
|
14104
|
+
).nullish().describe(
|
|
14105
|
+
"The original temporally-sequential word objects before PII redaction was applied. Same shape as `words`. Only returned when `redact_pii_return_unredacted` was set to `true` on the transcription request, otherwise this field is omitted and the `words` field remains fully redacted. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
|
|
14106
|
+
),
|
|
13909
14107
|
acoustic_model: import_zod3.z.string().describe("This parameter does not currently have any functionality attached to it."),
|
|
13910
14108
|
custom_topics: import_zod3.z.boolean().nullish().describe("This parameter does not currently have any functionality attached to it."),
|
|
13911
14109
|
language_model: import_zod3.z.string().describe("This parameter does not currently have any functionality attached to it."),
|
|
@@ -14041,7 +14239,7 @@ var deleteTranscriptResponse = import_zod3.z.object({
|
|
|
14041
14239
|
"Customize how words are spelled and formatted using to and from values. See [Custom Spelling](https://www.assemblyai.com/docs/pre-recorded-audio/correct-spelling-of-terms) for more details."
|
|
14042
14240
|
),
|
|
14043
14241
|
disfluencies: import_zod3.z.boolean().nullish().describe(
|
|
14044
|
-
'Transcribe [Filler Words](https://www.assemblyai.com/docs/pre-recorded-audio/include-filler-words), like "umm", in your media file; can be true or false'
|
|
14242
|
+
'Transcribe [Filler Words](https://www.assemblyai.com/docs/pre-recorded-audio/include-filler-words), like "umm", in your media file; can be true or false. Supported on Universal-3 Pro and Universal-2.'
|
|
14045
14243
|
),
|
|
14046
14244
|
domain: import_zod3.z.string().nullish().describe(
|
|
14047
14245
|
'The domain-specific model applied to the transcript. When set to `"medical-v1"`, [Medical Mode](https://www.assemblyai.com/docs/pre-recorded-audio/medical-mode) was used to improve accuracy for medical terminology.\n'
|
|
@@ -14064,12 +14262,20 @@ var deleteTranscriptResponse = import_zod3.z.object({
|
|
|
14064
14262
|
"email_address",
|
|
14065
14263
|
"event",
|
|
14066
14264
|
"filename",
|
|
14265
|
+
"gender",
|
|
14067
14266
|
"gender_sexuality",
|
|
14068
14267
|
"healthcare_number",
|
|
14069
14268
|
"injury",
|
|
14070
14269
|
"ip_address",
|
|
14071
14270
|
"language",
|
|
14072
14271
|
"location",
|
|
14272
|
+
"location_address",
|
|
14273
|
+
"location_address_street",
|
|
14274
|
+
"location_city",
|
|
14275
|
+
"location_coordinate",
|
|
14276
|
+
"location_country",
|
|
14277
|
+
"location_state",
|
|
14278
|
+
"location_zip",
|
|
14073
14279
|
"marital_status",
|
|
14074
14280
|
"medical_condition",
|
|
14075
14281
|
"medical_process",
|
|
@@ -14078,6 +14284,7 @@ var deleteTranscriptResponse = import_zod3.z.object({
|
|
|
14078
14284
|
"number_sequence",
|
|
14079
14285
|
"occupation",
|
|
14080
14286
|
"organization",
|
|
14287
|
+
"organization_medical_facility",
|
|
14081
14288
|
"passport_number",
|
|
14082
14289
|
"password",
|
|
14083
14290
|
"person_age",
|
|
@@ -14086,6 +14293,7 @@ var deleteTranscriptResponse = import_zod3.z.object({
|
|
|
14086
14293
|
"physical_attribute",
|
|
14087
14294
|
"political_affiliation",
|
|
14088
14295
|
"religion",
|
|
14296
|
+
"sexuality",
|
|
14089
14297
|
"statistics",
|
|
14090
14298
|
"time",
|
|
14091
14299
|
"url",
|
|
@@ -14390,6 +14598,24 @@ var deleteTranscriptResponse = import_zod3.z.object({
|
|
|
14390
14598
|
}).optional().describe(
|
|
14391
14599
|
"Specify options for [Automatic Language Detection](https://www.assemblyai.com/docs/pre-recorded-audio/language-detection)."
|
|
14392
14600
|
),
|
|
14601
|
+
metadata: import_zod3.z.object({
|
|
14602
|
+
domain_used: import_zod3.z.string().nullish().describe(
|
|
14603
|
+
'The domain-specific model that was applied to the transcription (for example, `"medical-v1"` when [Medical Mode](https://www.assemblyai.com/docs/pre-recorded-audio/medical-mode) was applied), or `null` if no domain-specific model was used. Always present when `metadata` is present.\n'
|
|
14604
|
+
),
|
|
14605
|
+
warnings: import_zod3.z.array(
|
|
14606
|
+
import_zod3.z.object({
|
|
14607
|
+
message: import_zod3.z.string().describe("A human-readable description of the warning.")
|
|
14608
|
+
}).describe(
|
|
14609
|
+
"A warning message emitted while processing a transcription request. Warnings are surfaced on the transcript response under `metadata.warnings`.\n"
|
|
14610
|
+
)
|
|
14611
|
+
).optional().describe(
|
|
14612
|
+
"Warning messages emitted while processing the request. Each warning is an object with a human-readable `message`. When there are no warnings to report, this field is omitted from the `metadata` object entirely.\n"
|
|
14613
|
+
)
|
|
14614
|
+
}).describe(
|
|
14615
|
+
"Additional metadata about the transcription returned on the `Transcript` object under `metadata`. Only present when there is information to report \u2014 when all of its fields would be empty, the `metadata` object is omitted from the response entirely.\n"
|
|
14616
|
+
).or(import_zod3.z.null()).optional().describe(
|
|
14617
|
+
"Additional metadata about the transcription, including any warnings emitted while processing the request. Only present when there is information to report; if no fields would be populated, `metadata` is omitted from the response entirely.\n"
|
|
14618
|
+
),
|
|
14393
14619
|
multichannel: import_zod3.z.boolean().nullish().describe(
|
|
14394
14620
|
"Whether [Multichannel transcription](https://www.assemblyai.com/docs/pre-recorded-audio/transcribe-multiple-audio-channels) was enabled in the transcription request, either true or false"
|
|
14395
14621
|
),
|
|
@@ -14437,12 +14663,20 @@ var deleteTranscriptResponse = import_zod3.z.object({
|
|
|
14437
14663
|
"email_address",
|
|
14438
14664
|
"event",
|
|
14439
14665
|
"filename",
|
|
14666
|
+
"gender",
|
|
14440
14667
|
"gender_sexuality",
|
|
14441
14668
|
"healthcare_number",
|
|
14442
14669
|
"injury",
|
|
14443
14670
|
"ip_address",
|
|
14444
14671
|
"language",
|
|
14445
14672
|
"location",
|
|
14673
|
+
"location_address",
|
|
14674
|
+
"location_address_street",
|
|
14675
|
+
"location_city",
|
|
14676
|
+
"location_coordinate",
|
|
14677
|
+
"location_country",
|
|
14678
|
+
"location_state",
|
|
14679
|
+
"location_zip",
|
|
14446
14680
|
"marital_status",
|
|
14447
14681
|
"medical_condition",
|
|
14448
14682
|
"medical_process",
|
|
@@ -14451,6 +14685,7 @@ var deleteTranscriptResponse = import_zod3.z.object({
|
|
|
14451
14685
|
"number_sequence",
|
|
14452
14686
|
"occupation",
|
|
14453
14687
|
"organization",
|
|
14688
|
+
"organization_medical_facility",
|
|
14454
14689
|
"passport_number",
|
|
14455
14690
|
"password",
|
|
14456
14691
|
"person_age",
|
|
@@ -14459,6 +14694,7 @@ var deleteTranscriptResponse = import_zod3.z.object({
|
|
|
14459
14694
|
"physical_attribute",
|
|
14460
14695
|
"political_affiliation",
|
|
14461
14696
|
"religion",
|
|
14697
|
+
"sexuality",
|
|
14462
14698
|
"statistics",
|
|
14463
14699
|
"time",
|
|
14464
14700
|
"url",
|
|
@@ -14466,12 +14702,17 @@ var deleteTranscriptResponse = import_zod3.z.object({
|
|
|
14466
14702
|
"username",
|
|
14467
14703
|
"vehicle_id",
|
|
14468
14704
|
"zodiac_sign"
|
|
14469
|
-
]).describe(
|
|
14705
|
+
]).describe(
|
|
14706
|
+
"The type of PII to redact. See [PII redaction](https://www.assemblyai.com/docs/streaming/pii-redaction) for the full list of policies and their descriptions."
|
|
14707
|
+
)
|
|
14470
14708
|
).nullish().describe(
|
|
14471
14709
|
"The list of PII Redaction policies that were enabled, if PII Redaction is enabled.\nSee [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
|
|
14472
14710
|
),
|
|
14473
14711
|
redact_pii_sub: import_zod3.z.enum(["entity_name", "hash"]).optional().describe(
|
|
14474
|
-
"The replacement logic for detected PII, can be `entity_name` or `hash`. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more details."
|
|
14712
|
+
"The replacement logic for detected PII, can be `entity_name` or `hash`. See [PII redaction](https://www.assemblyai.com/docs/streaming/pii-redaction) for more details."
|
|
14713
|
+
),
|
|
14714
|
+
redact_pii_return_unredacted: import_zod3.z.boolean().nullish().describe(
|
|
14715
|
+
"Whether the original unredacted transcript was also returned alongside the redacted one. When `true`, the response includes `unredacted_text`, `unredacted_words`, and `unredacted_utterances`. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
|
|
14475
14716
|
),
|
|
14476
14717
|
sentiment_analysis: import_zod3.z.boolean().nullish().describe(
|
|
14477
14718
|
"Whether [Sentiment Analysis](https://www.assemblyai.com/docs/speech-understanding/analyze-sentiment-of-speech) is enabled, can be true or false"
|
|
@@ -14608,20 +14849,23 @@ var deleteTranscriptResponse = import_zod3.z.object({
|
|
|
14608
14849
|
"The generated summary of the media file, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details."
|
|
14609
14850
|
),
|
|
14610
14851
|
summary_model: import_zod3.z.string().nullish().describe(
|
|
14611
|
-
"The Summarization model used to generate the summary,\nif [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts
|
|
14852
|
+
"The Summarization model used to generate the summary,\nif [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details.\n"
|
|
14612
14853
|
),
|
|
14613
14854
|
summary_type: import_zod3.z.string().nullish().describe(
|
|
14614
|
-
"The type of summary generated, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts
|
|
14855
|
+
"The type of summary generated, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details."
|
|
14615
14856
|
),
|
|
14616
|
-
remove_audio_tags: import_zod3.z.enum(["all"]).describe(
|
|
14617
|
-
|
|
14857
|
+
remove_audio_tags: import_zod3.z.enum(["all", "speaker"]).describe(
|
|
14858
|
+
'Universal-3 Pro generates rich transcripts that can include inline annotations such as audio event markers and speaker cues. Set to `"all"` to remove all inline annotations, or `"speaker"` to remove only speaker cues while keeping other annotations.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n'
|
|
14618
14859
|
).or(import_zod3.z.null()).optional().describe(
|
|
14619
|
-
|
|
14860
|
+
'Universal-3 Pro generates rich transcripts that can include inline annotations such as audio event markers and speaker cues. Set to `"all"` to remove all inline annotations, or `"speaker"` to remove only speaker cues while keeping other annotations.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n'
|
|
14620
14861
|
),
|
|
14621
14862
|
temperature: import_zod3.z.number().nullish().describe(
|
|
14622
14863
|
"The temperature that was used for the model's response. See the [Prompting Guide](https://www.assemblyai.com/docs/pre-recorded-audio/prompting) for more details.\n\nNote: This parameter can only be used with the Universal-3 Pro model.\n"
|
|
14623
14864
|
),
|
|
14624
14865
|
text: import_zod3.z.string().nullish().describe("The textual transcript of your media file"),
|
|
14866
|
+
unredacted_text: import_zod3.z.string().nullish().describe(
|
|
14867
|
+
"The original textual transcript of your media file before PII redaction was applied. Only returned when `redact_pii_return_unredacted` was set to `true` on the transcription request, otherwise this field is omitted and the `text` field remains fully redacted. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
|
|
14868
|
+
),
|
|
14625
14869
|
throttled: import_zod3.z.boolean().nullish().describe(
|
|
14626
14870
|
"True while a request is throttled and false when a request is no longer throttled"
|
|
14627
14871
|
),
|
|
@@ -14658,6 +14902,39 @@ var deleteTranscriptResponse = import_zod3.z.object({
|
|
|
14658
14902
|
).nullish().describe(
|
|
14659
14903
|
"When multichannel or speaker_labels is enabled, a list of turn-by-turn utterance objects.\nSee [Speaker diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) and [Multichannel transcription](https://www.assemblyai.com/docs/pre-recorded-audio/transcribe-multiple-audio-channels) for more information.\n"
|
|
14660
14904
|
),
|
|
14905
|
+
unredacted_utterances: import_zod3.z.array(
|
|
14906
|
+
import_zod3.z.object({
|
|
14907
|
+
confidence: import_zod3.z.number().describe("The confidence score for the transcript of this utterance"),
|
|
14908
|
+
start: import_zod3.z.number().describe("The starting time, in milliseconds, of the utterance in the audio file"),
|
|
14909
|
+
end: import_zod3.z.number().describe("The ending time, in milliseconds, of the utterance in the audio file"),
|
|
14910
|
+
text: import_zod3.z.string().describe("The text for this utterance"),
|
|
14911
|
+
words: import_zod3.z.array(
|
|
14912
|
+
import_zod3.z.object({
|
|
14913
|
+
confidence: import_zod3.z.number().describe("The confidence score for the transcript of this word"),
|
|
14914
|
+
start: import_zod3.z.number().describe("The starting time, in milliseconds, for the word"),
|
|
14915
|
+
end: import_zod3.z.number().describe("The ending time, in milliseconds, for the word"),
|
|
14916
|
+
text: import_zod3.z.string().describe("The text of the word"),
|
|
14917
|
+
channel: import_zod3.z.string().nullish().describe(
|
|
14918
|
+
"The channel of the word. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
|
|
14919
|
+
),
|
|
14920
|
+
speaker: import_zod3.z.string().nullable().describe(
|
|
14921
|
+
"The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) is enabled, else null"
|
|
14922
|
+
)
|
|
14923
|
+
})
|
|
14924
|
+
).describe("The words in the utterance."),
|
|
14925
|
+
channel: import_zod3.z.string().nullish().describe(
|
|
14926
|
+
"The channel of this utterance. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
|
|
14927
|
+
),
|
|
14928
|
+
speaker: import_zod3.z.string().describe(
|
|
14929
|
+
'The speaker of this utterance, where each speaker is assigned a sequential capital letter - e.g. "A" for Speaker A, "B" for Speaker B, etc.'
|
|
14930
|
+
),
|
|
14931
|
+
translated_texts: import_zod3.z.record(import_zod3.z.string(), import_zod3.z.string()).optional().describe(
|
|
14932
|
+
'Translations keyed by language code (e.g., `{"es": "Texto traducido", "de": "\xDCbersetzter Text"}`). Only present when `match_original_utterance` is enabled with translation.'
|
|
14933
|
+
)
|
|
14934
|
+
})
|
|
14935
|
+
).nullish().describe(
|
|
14936
|
+
"The original turn-by-turn utterance objects before PII redaction was applied. Same shape as `utterances`. Only returned when `redact_pii_return_unredacted` was set to `true` on the transcription request, otherwise this field is omitted and the `utterances` field remains fully redacted. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
|
|
14937
|
+
),
|
|
14661
14938
|
webhook_auth: import_zod3.z.boolean().describe(
|
|
14662
14939
|
"Whether [webhook](https://www.assemblyai.com/docs/deployment/webhooks-for-pre-recorded-audio) authentication details were provided"
|
|
14663
14940
|
),
|
|
@@ -14686,6 +14963,22 @@ var deleteTranscriptResponse = import_zod3.z.object({
|
|
|
14686
14963
|
).nullish().describe(
|
|
14687
14964
|
"An array of temporally-sequential word objects, one for each word in the transcript.\n"
|
|
14688
14965
|
),
|
|
14966
|
+
unredacted_words: import_zod3.z.array(
|
|
14967
|
+
import_zod3.z.object({
|
|
14968
|
+
confidence: import_zod3.z.number().describe("The confidence score for the transcript of this word"),
|
|
14969
|
+
start: import_zod3.z.number().describe("The starting time, in milliseconds, for the word"),
|
|
14970
|
+
end: import_zod3.z.number().describe("The ending time, in milliseconds, for the word"),
|
|
14971
|
+
text: import_zod3.z.string().describe("The text of the word"),
|
|
14972
|
+
channel: import_zod3.z.string().nullish().describe(
|
|
14973
|
+
"The channel of the word. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
|
|
14974
|
+
),
|
|
14975
|
+
speaker: import_zod3.z.string().nullable().describe(
|
|
14976
|
+
"The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) is enabled, else null"
|
|
14977
|
+
)
|
|
14978
|
+
})
|
|
14979
|
+
).nullish().describe(
|
|
14980
|
+
"The original temporally-sequential word objects before PII redaction was applied. Same shape as `words`. Only returned when `redact_pii_return_unredacted` was set to `true` on the transcription request, otherwise this field is omitted and the `words` field remains fully redacted. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
|
|
14981
|
+
),
|
|
14689
14982
|
acoustic_model: import_zod3.z.string().describe("This parameter does not currently have any functionality attached to it."),
|
|
14690
14983
|
custom_topics: import_zod3.z.boolean().nullish().describe("This parameter does not currently have any functionality attached to it."),
|
|
14691
14984
|
language_model: import_zod3.z.string().describe("This parameter does not currently have any functionality attached to it."),
|
|
@@ -14841,7 +15134,21 @@ var streamingTranscriberParams = import_zod4.z.object({
|
|
|
14841
15134
|
inactivityTimeout: import_zod4.z.number().optional().describe("From SDK v3"),
|
|
14842
15135
|
speakerLabels: import_zod4.z.boolean().optional().describe("From SDK v3"),
|
|
14843
15136
|
maxSpeakers: import_zod4.z.number().optional().describe("From SDK v3"),
|
|
14844
|
-
|
|
15137
|
+
voiceFocus: import_zod4.z.unknown().optional().describe("From SDK v3"),
|
|
15138
|
+
voiceFocusThreshold: import_zod4.z.number().optional().describe("From SDK v3"),
|
|
15139
|
+
continuousPartials: import_zod4.z.boolean().optional().describe("From SDK v3"),
|
|
15140
|
+
interruptionDelay: import_zod4.z.number().optional().describe("From SDK v3"),
|
|
15141
|
+
turnLeftPadMs: import_zod4.z.number().optional().describe("From SDK v3"),
|
|
15142
|
+
customerSupportAudioCapture: import_zod4.z.boolean().optional().describe("From SDK v3"),
|
|
15143
|
+
includePartialTurns: import_zod4.z.boolean().optional().describe("From SDK v3"),
|
|
15144
|
+
redactPii: import_zod4.z.boolean().optional().describe("From SDK v3"),
|
|
15145
|
+
redactPiiPolicies: import_zod4.z.unknown().optional().describe("From SDK v3"),
|
|
15146
|
+
redactPiiSub: import_zod4.z.unknown().optional().describe("From SDK v3"),
|
|
15147
|
+
llmGateway: import_zod4.z.unknown().optional().describe("From SDK v3"),
|
|
15148
|
+
webhookUrl: import_zod4.z.string().optional().describe("From SDK v3"),
|
|
15149
|
+
webhookAuthHeaderName: import_zod4.z.string().optional().describe("From SDK v3"),
|
|
15150
|
+
webhookAuthHeaderValue: import_zod4.z.string().optional().describe("From SDK v3"),
|
|
15151
|
+
mode: import_zod4.z.unknown().describe("From SDK v3")
|
|
14845
15152
|
});
|
|
14846
15153
|
var streamingUpdateConfigParams = import_zod4.z.object({
|
|
14847
15154
|
end_utterance_silence_threshold: import_zod4.z.number().min(0).max(2e4).optional().describe("The duration threshold in milliseconds"),
|
|
@@ -14853,7 +15160,9 @@ var streamingUpdateConfigParams = import_zod4.z.object({
|
|
|
14853
15160
|
format_turns: import_zod4.z.boolean().optional().describe("From SDK v3"),
|
|
14854
15161
|
keyterms_prompt: import_zod4.z.array(import_zod4.z.string()).optional().describe("From SDK v3"),
|
|
14855
15162
|
prompt: import_zod4.z.string().optional().describe("From SDK v3"),
|
|
14856
|
-
filter_profanity: import_zod4.z.boolean().optional().describe("From SDK v3")
|
|
15163
|
+
filter_profanity: import_zod4.z.boolean().optional().describe("From SDK v3"),
|
|
15164
|
+
interruption_delay: import_zod4.z.number().optional().describe("From SDK v3"),
|
|
15165
|
+
turn_left_pad_ms: import_zod4.z.number().optional().describe("From SDK v3")
|
|
14857
15166
|
});
|
|
14858
15167
|
|
|
14859
15168
|
// src/generated/gladia/api/gladiaControlAPI.zod.ts
|
|
@@ -15602,7 +15911,7 @@ var preRecordedControllerInitPreRecordedJobV2BodyNamedEntityRecognitionDefault =
|
|
|
15602
15911
|
var preRecordedControllerInitPreRecordedJobV2BodyCustomSpellingDefault = false;
|
|
15603
15912
|
var preRecordedControllerInitPreRecordedJobV2BodySentimentAnalysisDefault = false;
|
|
15604
15913
|
var preRecordedControllerInitPreRecordedJobV2BodyAudioToLlmDefault = false;
|
|
15605
|
-
var preRecordedControllerInitPreRecordedJobV2BodyAudioToLlmConfigModelDefault = "openai/gpt-
|
|
15914
|
+
var preRecordedControllerInitPreRecordedJobV2BodyAudioToLlmConfigModelDefault = "openai/gpt-5.4-nano";
|
|
15606
15915
|
var preRecordedControllerInitPreRecordedJobV2BodyPiiRedactionDefault = false;
|
|
15607
15916
|
var preRecordedControllerInitPreRecordedJobV2BodySentencesDefault = false;
|
|
15608
15917
|
var preRecordedControllerInitPreRecordedJobV2BodyPunctuationEnhancedDefault = false;
|
|
@@ -15891,23 +16200,23 @@ var preRecordedControllerInitPreRecordedJobV2Body = import_zod5.z.object({
|
|
|
15891
16200
|
"Forces the translation to use informal language forms when available in the target language."
|
|
15892
16201
|
)
|
|
15893
16202
|
}).optional().describe("**[Beta]** Translation configuration, if `translation` is enabled"),
|
|
15894
|
-
summarization: import_zod5.z.boolean().optional().describe("
|
|
16203
|
+
summarization: import_zod5.z.boolean().optional().describe("Enable summarization for this audio"),
|
|
15895
16204
|
summarization_config: import_zod5.z.object({
|
|
15896
16205
|
type: import_zod5.z.enum(["general", "bullet_points", "concise"]).describe("The type of summarization to apply").default(preRecordedControllerInitPreRecordedJobV2BodySummarizationConfigTypeDefault).describe("The type of summarization to apply")
|
|
15897
|
-
}).optional().describe("
|
|
16206
|
+
}).optional().describe("Summarization configuration, if `summarization` is enabled"),
|
|
15898
16207
|
named_entity_recognition: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable named entity recognition for this audio"),
|
|
15899
16208
|
custom_spelling: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable custom spelling for this audio"),
|
|
15900
16209
|
custom_spelling_config: import_zod5.z.object({
|
|
15901
16210
|
spelling_dictionary: import_zod5.z.record(import_zod5.z.string(), import_zod5.z.array(import_zod5.z.string())).describe("The list of spelling applied on the audio transcription")
|
|
15902
16211
|
}).optional().describe("**[Alpha]** Custom spelling configuration, if `custom_spelling` is enabled"),
|
|
15903
16212
|
sentiment_analysis: import_zod5.z.boolean().optional().describe("Enable sentiment analysis for this audio"),
|
|
15904
|
-
audio_to_llm: import_zod5.z.boolean().optional().describe("
|
|
16213
|
+
audio_to_llm: import_zod5.z.boolean().optional().describe("Enable audio to LLM processing for this audio"),
|
|
15905
16214
|
audio_to_llm_config: import_zod5.z.object({
|
|
15906
16215
|
prompts: import_zod5.z.array(import_zod5.z.array(import_zod5.z.unknown())).min(1).describe("The list of prompts applied on the audio transcription"),
|
|
15907
16216
|
model: import_zod5.z.string().default(preRecordedControllerInitPreRecordedJobV2BodyAudioToLlmConfigModelDefault).describe(
|
|
15908
16217
|
"The model to use for the prompt execution. You can find the list of supported models [here](https://openrouter.ai/models)."
|
|
15909
16218
|
)
|
|
15910
|
-
}).optional().describe("
|
|
16219
|
+
}).optional().describe("Audio to LLM configuration, if `audio_to_llm` is enabled"),
|
|
15911
16220
|
pii_redaction: import_zod5.z.boolean().optional().describe("Enable PII redaction for this audio"),
|
|
15912
16221
|
pii_redaction_config: import_zod5.z.object({
|
|
15913
16222
|
entity_types: import_zod5.z.enum([
|
|
@@ -16162,7 +16471,7 @@ var preRecordedControllerGetPreRecordedJobsV2ResponseItemsItemRequestParamsNamed
|
|
|
16162
16471
|
var preRecordedControllerGetPreRecordedJobsV2ResponseItemsItemRequestParamsCustomSpellingDefault = false;
|
|
16163
16472
|
var preRecordedControllerGetPreRecordedJobsV2ResponseItemsItemRequestParamsSentimentAnalysisDefault = false;
|
|
16164
16473
|
var preRecordedControllerGetPreRecordedJobsV2ResponseItemsItemRequestParamsAudioToLlmDefault = false;
|
|
16165
|
-
var preRecordedControllerGetPreRecordedJobsV2ResponseItemsItemRequestParamsAudioToLlmConfigModelDefault = "openai/gpt-
|
|
16474
|
+
var preRecordedControllerGetPreRecordedJobsV2ResponseItemsItemRequestParamsAudioToLlmConfigModelDefault = "openai/gpt-5.4-nano";
|
|
16166
16475
|
var preRecordedControllerGetPreRecordedJobsV2ResponseItemsItemRequestParamsPiiRedactionDefault = false;
|
|
16167
16476
|
var preRecordedControllerGetPreRecordedJobsV2ResponseItemsItemRequestParamsSentencesDefault = false;
|
|
16168
16477
|
var preRecordedControllerGetPreRecordedJobsV2ResponseItemsItemRequestParamsPunctuationEnhancedDefault = false;
|
|
@@ -16510,12 +16819,12 @@ var preRecordedControllerGetPreRecordedJobsV2Response = import_zod5.z.object({
|
|
|
16510
16819
|
"Forces the translation to use informal language forms when available in the target language."
|
|
16511
16820
|
)
|
|
16512
16821
|
}).optional().describe("**[Beta]** Translation configuration, if `translation` is enabled"),
|
|
16513
|
-
summarization: import_zod5.z.boolean().optional().describe("
|
|
16822
|
+
summarization: import_zod5.z.boolean().optional().describe("Enable summarization for this audio"),
|
|
16514
16823
|
summarization_config: import_zod5.z.object({
|
|
16515
16824
|
type: import_zod5.z.enum(["general", "bullet_points", "concise"]).describe("The type of summarization to apply").default(
|
|
16516
16825
|
preRecordedControllerGetPreRecordedJobsV2ResponseItemsItemRequestParamsSummarizationConfigTypeDefault
|
|
16517
16826
|
).describe("The type of summarization to apply")
|
|
16518
|
-
}).optional().describe("
|
|
16827
|
+
}).optional().describe("Summarization configuration, if `summarization` is enabled"),
|
|
16519
16828
|
named_entity_recognition: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable named entity recognition for this audio"),
|
|
16520
16829
|
custom_spelling: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable custom spelling for this audio"),
|
|
16521
16830
|
custom_spelling_config: import_zod5.z.object({
|
|
@@ -16524,7 +16833,7 @@ var preRecordedControllerGetPreRecordedJobsV2Response = import_zod5.z.object({
|
|
|
16524
16833
|
"**[Alpha]** Custom spelling configuration, if `custom_spelling` is enabled"
|
|
16525
16834
|
),
|
|
16526
16835
|
sentiment_analysis: import_zod5.z.boolean().optional().describe("Enable sentiment analysis for this audio"),
|
|
16527
|
-
audio_to_llm: import_zod5.z.boolean().optional().describe("
|
|
16836
|
+
audio_to_llm: import_zod5.z.boolean().optional().describe("Enable audio to LLM processing for this audio"),
|
|
16528
16837
|
audio_to_llm_config: import_zod5.z.object({
|
|
16529
16838
|
prompts: import_zod5.z.array(import_zod5.z.array(import_zod5.z.unknown())).min(1).describe("The list of prompts applied on the audio transcription"),
|
|
16530
16839
|
model: import_zod5.z.string().default(
|
|
@@ -16532,7 +16841,7 @@ var preRecordedControllerGetPreRecordedJobsV2Response = import_zod5.z.object({
|
|
|
16532
16841
|
).describe(
|
|
16533
16842
|
"The model to use for the prompt execution. You can find the list of supported models [here](https://openrouter.ai/models)."
|
|
16534
16843
|
)
|
|
16535
|
-
}).optional().describe("
|
|
16844
|
+
}).optional().describe("Audio to LLM configuration, if `audio_to_llm` is enabled"),
|
|
16536
16845
|
pii_redaction: import_zod5.z.boolean().optional().describe("Enable PII redaction for this audio"),
|
|
16537
16846
|
pii_redaction_config: import_zod5.z.object({
|
|
16538
16847
|
entity_types: import_zod5.z.enum([
|
|
@@ -17669,7 +17978,7 @@ var preRecordedControllerGetPreRecordedJobV2ResponseRequestParamsNamedEntityReco
|
|
|
17669
17978
|
var preRecordedControllerGetPreRecordedJobV2ResponseRequestParamsCustomSpellingDefault = false;
|
|
17670
17979
|
var preRecordedControllerGetPreRecordedJobV2ResponseRequestParamsSentimentAnalysisDefault = false;
|
|
17671
17980
|
var preRecordedControllerGetPreRecordedJobV2ResponseRequestParamsAudioToLlmDefault = false;
|
|
17672
|
-
var preRecordedControllerGetPreRecordedJobV2ResponseRequestParamsAudioToLlmConfigModelDefault = "openai/gpt-
|
|
17981
|
+
var preRecordedControllerGetPreRecordedJobV2ResponseRequestParamsAudioToLlmConfigModelDefault = "openai/gpt-5.4-nano";
|
|
17673
17982
|
var preRecordedControllerGetPreRecordedJobV2ResponseRequestParamsPiiRedactionDefault = false;
|
|
17674
17983
|
var preRecordedControllerGetPreRecordedJobV2ResponseRequestParamsSentencesDefault = false;
|
|
17675
17984
|
var preRecordedControllerGetPreRecordedJobV2ResponseRequestParamsPunctuationEnhancedDefault = false;
|
|
@@ -18010,19 +18319,19 @@ var preRecordedControllerGetPreRecordedJobV2Response = import_zod5.z.object({
|
|
|
18010
18319
|
"Forces the translation to use informal language forms when available in the target language."
|
|
18011
18320
|
)
|
|
18012
18321
|
}).optional().describe("**[Beta]** Translation configuration, if `translation` is enabled"),
|
|
18013
|
-
summarization: import_zod5.z.boolean().optional().describe("
|
|
18322
|
+
summarization: import_zod5.z.boolean().optional().describe("Enable summarization for this audio"),
|
|
18014
18323
|
summarization_config: import_zod5.z.object({
|
|
18015
18324
|
type: import_zod5.z.enum(["general", "bullet_points", "concise"]).describe("The type of summarization to apply").default(
|
|
18016
18325
|
preRecordedControllerGetPreRecordedJobV2ResponseRequestParamsSummarizationConfigTypeDefault
|
|
18017
18326
|
).describe("The type of summarization to apply")
|
|
18018
|
-
}).optional().describe("
|
|
18327
|
+
}).optional().describe("Summarization configuration, if `summarization` is enabled"),
|
|
18019
18328
|
named_entity_recognition: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable named entity recognition for this audio"),
|
|
18020
18329
|
custom_spelling: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable custom spelling for this audio"),
|
|
18021
18330
|
custom_spelling_config: import_zod5.z.object({
|
|
18022
18331
|
spelling_dictionary: import_zod5.z.record(import_zod5.z.string(), import_zod5.z.array(import_zod5.z.string())).describe("The list of spelling applied on the audio transcription")
|
|
18023
18332
|
}).optional().describe("**[Alpha]** Custom spelling configuration, if `custom_spelling` is enabled"),
|
|
18024
18333
|
sentiment_analysis: import_zod5.z.boolean().optional().describe("Enable sentiment analysis for this audio"),
|
|
18025
|
-
audio_to_llm: import_zod5.z.boolean().optional().describe("
|
|
18334
|
+
audio_to_llm: import_zod5.z.boolean().optional().describe("Enable audio to LLM processing for this audio"),
|
|
18026
18335
|
audio_to_llm_config: import_zod5.z.object({
|
|
18027
18336
|
prompts: import_zod5.z.array(import_zod5.z.array(import_zod5.z.unknown())).min(1).describe("The list of prompts applied on the audio transcription"),
|
|
18028
18337
|
model: import_zod5.z.string().default(
|
|
@@ -18030,7 +18339,7 @@ var preRecordedControllerGetPreRecordedJobV2Response = import_zod5.z.object({
|
|
|
18030
18339
|
).describe(
|
|
18031
18340
|
"The model to use for the prompt execution. You can find the list of supported models [here](https://openrouter.ai/models)."
|
|
18032
18341
|
)
|
|
18033
|
-
}).optional().describe("
|
|
18342
|
+
}).optional().describe("Audio to LLM configuration, if `audio_to_llm` is enabled"),
|
|
18034
18343
|
pii_redaction: import_zod5.z.boolean().optional().describe("Enable PII redaction for this audio"),
|
|
18035
18344
|
pii_redaction_config: import_zod5.z.object({
|
|
18036
18345
|
entity_types: import_zod5.z.enum([
|
|
@@ -19143,7 +19452,7 @@ var transcriptionControllerInitPreRecordedJobV2BodyNamedEntityRecognitionDefault
|
|
|
19143
19452
|
var transcriptionControllerInitPreRecordedJobV2BodyCustomSpellingDefault = false;
|
|
19144
19453
|
var transcriptionControllerInitPreRecordedJobV2BodySentimentAnalysisDefault = false;
|
|
19145
19454
|
var transcriptionControllerInitPreRecordedJobV2BodyAudioToLlmDefault = false;
|
|
19146
|
-
var transcriptionControllerInitPreRecordedJobV2BodyAudioToLlmConfigModelDefault = "openai/gpt-
|
|
19455
|
+
var transcriptionControllerInitPreRecordedJobV2BodyAudioToLlmConfigModelDefault = "openai/gpt-5.4-nano";
|
|
19147
19456
|
var transcriptionControllerInitPreRecordedJobV2BodyPiiRedactionDefault = false;
|
|
19148
19457
|
var transcriptionControllerInitPreRecordedJobV2BodySentencesDefault = false;
|
|
19149
19458
|
var transcriptionControllerInitPreRecordedJobV2BodyPunctuationEnhancedDefault = false;
|
|
@@ -19436,23 +19745,23 @@ var transcriptionControllerInitPreRecordedJobV2Body = import_zod5.z.object({
|
|
|
19436
19745
|
"Forces the translation to use informal language forms when available in the target language."
|
|
19437
19746
|
)
|
|
19438
19747
|
}).optional().describe("**[Beta]** Translation configuration, if `translation` is enabled"),
|
|
19439
|
-
summarization: import_zod5.z.boolean().optional().describe("
|
|
19748
|
+
summarization: import_zod5.z.boolean().optional().describe("Enable summarization for this audio"),
|
|
19440
19749
|
summarization_config: import_zod5.z.object({
|
|
19441
19750
|
type: import_zod5.z.enum(["general", "bullet_points", "concise"]).describe("The type of summarization to apply").default(transcriptionControllerInitPreRecordedJobV2BodySummarizationConfigTypeDefault).describe("The type of summarization to apply")
|
|
19442
|
-
}).optional().describe("
|
|
19751
|
+
}).optional().describe("Summarization configuration, if `summarization` is enabled"),
|
|
19443
19752
|
named_entity_recognition: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable named entity recognition for this audio"),
|
|
19444
19753
|
custom_spelling: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable custom spelling for this audio"),
|
|
19445
19754
|
custom_spelling_config: import_zod5.z.object({
|
|
19446
19755
|
spelling_dictionary: import_zod5.z.record(import_zod5.z.string(), import_zod5.z.array(import_zod5.z.string())).describe("The list of spelling applied on the audio transcription")
|
|
19447
19756
|
}).optional().describe("**[Alpha]** Custom spelling configuration, if `custom_spelling` is enabled"),
|
|
19448
19757
|
sentiment_analysis: import_zod5.z.boolean().optional().describe("Enable sentiment analysis for this audio"),
|
|
19449
|
-
audio_to_llm: import_zod5.z.boolean().optional().describe("
|
|
19758
|
+
audio_to_llm: import_zod5.z.boolean().optional().describe("Enable audio to LLM processing for this audio"),
|
|
19450
19759
|
audio_to_llm_config: import_zod5.z.object({
|
|
19451
19760
|
prompts: import_zod5.z.array(import_zod5.z.array(import_zod5.z.unknown())).min(1).describe("The list of prompts applied on the audio transcription"),
|
|
19452
19761
|
model: import_zod5.z.string().default(transcriptionControllerInitPreRecordedJobV2BodyAudioToLlmConfigModelDefault).describe(
|
|
19453
19762
|
"The model to use for the prompt execution. You can find the list of supported models [here](https://openrouter.ai/models)."
|
|
19454
19763
|
)
|
|
19455
|
-
}).optional().describe("
|
|
19764
|
+
}).optional().describe("Audio to LLM configuration, if `audio_to_llm` is enabled"),
|
|
19456
19765
|
pii_redaction: import_zod5.z.boolean().optional().describe("Enable PII redaction for this audio"),
|
|
19457
19766
|
pii_redaction_config: import_zod5.z.object({
|
|
19458
19767
|
entity_types: import_zod5.z.enum([
|
|
@@ -19710,7 +20019,7 @@ var transcriptionControllerListV2ResponseItemsItemRequestParamsNamedEntityRecogn
|
|
|
19710
20019
|
var transcriptionControllerListV2ResponseItemsItemRequestParamsCustomSpellingDefault = false;
|
|
19711
20020
|
var transcriptionControllerListV2ResponseItemsItemRequestParamsSentimentAnalysisDefault = false;
|
|
19712
20021
|
var transcriptionControllerListV2ResponseItemsItemRequestParamsAudioToLlmDefault = false;
|
|
19713
|
-
var transcriptionControllerListV2ResponseItemsItemRequestParamsAudioToLlmConfigModelDefault = "openai/gpt-
|
|
20022
|
+
var transcriptionControllerListV2ResponseItemsItemRequestParamsAudioToLlmConfigModelDefault = "openai/gpt-5.4-nano";
|
|
19714
20023
|
var transcriptionControllerListV2ResponseItemsItemRequestParamsPiiRedactionDefault = false;
|
|
19715
20024
|
var transcriptionControllerListV2ResponseItemsItemRequestParamsSentencesDefault = false;
|
|
19716
20025
|
var transcriptionControllerListV2ResponseItemsItemRequestParamsPunctuationEnhancedDefault = false;
|
|
@@ -20121,12 +20430,12 @@ var transcriptionControllerListV2Response = import_zod5.z.object({
|
|
|
20121
20430
|
"Forces the translation to use informal language forms when available in the target language."
|
|
20122
20431
|
)
|
|
20123
20432
|
}).optional().describe("**[Beta]** Translation configuration, if `translation` is enabled"),
|
|
20124
|
-
summarization: import_zod5.z.boolean().optional().describe("
|
|
20433
|
+
summarization: import_zod5.z.boolean().optional().describe("Enable summarization for this audio"),
|
|
20125
20434
|
summarization_config: import_zod5.z.object({
|
|
20126
20435
|
type: import_zod5.z.enum(["general", "bullet_points", "concise"]).describe("The type of summarization to apply").default(
|
|
20127
20436
|
transcriptionControllerListV2ResponseItemsItemRequestParamsSummarizationConfigTypeDefault
|
|
20128
20437
|
).describe("The type of summarization to apply")
|
|
20129
|
-
}).optional().describe("
|
|
20438
|
+
}).optional().describe("Summarization configuration, if `summarization` is enabled"),
|
|
20130
20439
|
named_entity_recognition: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable named entity recognition for this audio"),
|
|
20131
20440
|
custom_spelling: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable custom spelling for this audio"),
|
|
20132
20441
|
custom_spelling_config: import_zod5.z.object({
|
|
@@ -20135,7 +20444,7 @@ var transcriptionControllerListV2Response = import_zod5.z.object({
|
|
|
20135
20444
|
"**[Alpha]** Custom spelling configuration, if `custom_spelling` is enabled"
|
|
20136
20445
|
),
|
|
20137
20446
|
sentiment_analysis: import_zod5.z.boolean().optional().describe("Enable sentiment analysis for this audio"),
|
|
20138
|
-
audio_to_llm: import_zod5.z.boolean().optional().describe("
|
|
20447
|
+
audio_to_llm: import_zod5.z.boolean().optional().describe("Enable audio to LLM processing for this audio"),
|
|
20139
20448
|
audio_to_llm_config: import_zod5.z.object({
|
|
20140
20449
|
prompts: import_zod5.z.array(import_zod5.z.array(import_zod5.z.unknown())).min(1).describe("The list of prompts applied on the audio transcription"),
|
|
20141
20450
|
model: import_zod5.z.string().default(
|
|
@@ -20143,7 +20452,7 @@ var transcriptionControllerListV2Response = import_zod5.z.object({
|
|
|
20143
20452
|
).describe(
|
|
20144
20453
|
"The model to use for the prompt execution. You can find the list of supported models [here](https://openrouter.ai/models)."
|
|
20145
20454
|
)
|
|
20146
|
-
}).optional().describe("
|
|
20455
|
+
}).optional().describe("Audio to LLM configuration, if `audio_to_llm` is enabled"),
|
|
20147
20456
|
pii_redaction: import_zod5.z.boolean().optional().describe("Enable PII redaction for this audio"),
|
|
20148
20457
|
pii_redaction_config: import_zod5.z.object({
|
|
20149
20458
|
entity_types: import_zod5.z.enum([
|
|
@@ -22461,7 +22770,7 @@ var transcriptionControllerGetTranscriptV2ResponseRequestParamsNamedEntityRecogn
|
|
|
22461
22770
|
var transcriptionControllerGetTranscriptV2ResponseRequestParamsCustomSpellingDefault = false;
|
|
22462
22771
|
var transcriptionControllerGetTranscriptV2ResponseRequestParamsSentimentAnalysisDefault = false;
|
|
22463
22772
|
var transcriptionControllerGetTranscriptV2ResponseRequestParamsAudioToLlmDefault = false;
|
|
22464
|
-
var transcriptionControllerGetTranscriptV2ResponseRequestParamsAudioToLlmConfigModelDefault = "openai/gpt-
|
|
22773
|
+
var transcriptionControllerGetTranscriptV2ResponseRequestParamsAudioToLlmConfigModelDefault = "openai/gpt-5.4-nano";
|
|
22465
22774
|
var transcriptionControllerGetTranscriptV2ResponseRequestParamsPiiRedactionDefault = false;
|
|
22466
22775
|
var transcriptionControllerGetTranscriptV2ResponseRequestParamsSentencesDefault = false;
|
|
22467
22776
|
var transcriptionControllerGetTranscriptV2ResponseRequestParamsPunctuationEnhancedDefault = false;
|
|
@@ -22866,19 +23175,19 @@ var transcriptionControllerGetTranscriptV2Response = import_zod5.z.discriminated
|
|
|
22866
23175
|
"Forces the translation to use informal language forms when available in the target language."
|
|
22867
23176
|
)
|
|
22868
23177
|
}).optional().describe("**[Beta]** Translation configuration, if `translation` is enabled"),
|
|
22869
|
-
summarization: import_zod5.z.boolean().optional().describe("
|
|
23178
|
+
summarization: import_zod5.z.boolean().optional().describe("Enable summarization for this audio"),
|
|
22870
23179
|
summarization_config: import_zod5.z.object({
|
|
22871
23180
|
type: import_zod5.z.enum(["general", "bullet_points", "concise"]).describe("The type of summarization to apply").default(
|
|
22872
23181
|
transcriptionControllerGetTranscriptV2ResponseRequestParamsSummarizationConfigTypeDefault
|
|
22873
23182
|
).describe("The type of summarization to apply")
|
|
22874
|
-
}).optional().describe("
|
|
23183
|
+
}).optional().describe("Summarization configuration, if `summarization` is enabled"),
|
|
22875
23184
|
named_entity_recognition: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable named entity recognition for this audio"),
|
|
22876
23185
|
custom_spelling: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable custom spelling for this audio"),
|
|
22877
23186
|
custom_spelling_config: import_zod5.z.object({
|
|
22878
23187
|
spelling_dictionary: import_zod5.z.record(import_zod5.z.string(), import_zod5.z.array(import_zod5.z.string())).describe("The list of spelling applied on the audio transcription")
|
|
22879
23188
|
}).optional().describe("**[Alpha]** Custom spelling configuration, if `custom_spelling` is enabled"),
|
|
22880
23189
|
sentiment_analysis: import_zod5.z.boolean().optional().describe("Enable sentiment analysis for this audio"),
|
|
22881
|
-
audio_to_llm: import_zod5.z.boolean().optional().describe("
|
|
23190
|
+
audio_to_llm: import_zod5.z.boolean().optional().describe("Enable audio to LLM processing for this audio"),
|
|
22882
23191
|
audio_to_llm_config: import_zod5.z.object({
|
|
22883
23192
|
prompts: import_zod5.z.array(import_zod5.z.array(import_zod5.z.unknown())).min(1).describe("The list of prompts applied on the audio transcription"),
|
|
22884
23193
|
model: import_zod5.z.string().default(
|
|
@@ -22886,7 +23195,7 @@ var transcriptionControllerGetTranscriptV2Response = import_zod5.z.discriminated
|
|
|
22886
23195
|
).describe(
|
|
22887
23196
|
"The model to use for the prompt execution. You can find the list of supported models [here](https://openrouter.ai/models)."
|
|
22888
23197
|
)
|
|
22889
|
-
}).optional().describe("
|
|
23198
|
+
}).optional().describe("Audio to LLM configuration, if `audio_to_llm` is enabled"),
|
|
22890
23199
|
pii_redaction: import_zod5.z.boolean().optional().describe("Enable PII redaction for this audio"),
|
|
22891
23200
|
pii_redaction_config: import_zod5.z.object({
|
|
22892
23201
|
entity_types: import_zod5.z.enum([
|
|
@@ -25598,7 +25907,7 @@ var historyControllerGetListV1ResponseItemsItemRequestParamsNamedEntityRecogniti
|
|
|
25598
25907
|
var historyControllerGetListV1ResponseItemsItemRequestParamsCustomSpellingDefault = false;
|
|
25599
25908
|
var historyControllerGetListV1ResponseItemsItemRequestParamsSentimentAnalysisDefault = false;
|
|
25600
25909
|
var historyControllerGetListV1ResponseItemsItemRequestParamsAudioToLlmDefault = false;
|
|
25601
|
-
var historyControllerGetListV1ResponseItemsItemRequestParamsAudioToLlmConfigModelDefault = "openai/gpt-
|
|
25910
|
+
var historyControllerGetListV1ResponseItemsItemRequestParamsAudioToLlmConfigModelDefault = "openai/gpt-5.4-nano";
|
|
25602
25911
|
var historyControllerGetListV1ResponseItemsItemRequestParamsPiiRedactionDefault = false;
|
|
25603
25912
|
var historyControllerGetListV1ResponseItemsItemRequestParamsSentencesDefault = false;
|
|
25604
25913
|
var historyControllerGetListV1ResponseItemsItemRequestParamsPunctuationEnhancedDefault = false;
|
|
@@ -26009,12 +26318,12 @@ var historyControllerGetListV1Response = import_zod5.z.object({
|
|
|
26009
26318
|
"Forces the translation to use informal language forms when available in the target language."
|
|
26010
26319
|
)
|
|
26011
26320
|
}).optional().describe("**[Beta]** Translation configuration, if `translation` is enabled"),
|
|
26012
|
-
summarization: import_zod5.z.boolean().optional().describe("
|
|
26321
|
+
summarization: import_zod5.z.boolean().optional().describe("Enable summarization for this audio"),
|
|
26013
26322
|
summarization_config: import_zod5.z.object({
|
|
26014
26323
|
type: import_zod5.z.enum(["general", "bullet_points", "concise"]).describe("The type of summarization to apply").default(
|
|
26015
26324
|
historyControllerGetListV1ResponseItemsItemRequestParamsSummarizationConfigTypeDefault
|
|
26016
26325
|
).describe("The type of summarization to apply")
|
|
26017
|
-
}).optional().describe("
|
|
26326
|
+
}).optional().describe("Summarization configuration, if `summarization` is enabled"),
|
|
26018
26327
|
named_entity_recognition: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable named entity recognition for this audio"),
|
|
26019
26328
|
custom_spelling: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable custom spelling for this audio"),
|
|
26020
26329
|
custom_spelling_config: import_zod5.z.object({
|
|
@@ -26023,7 +26332,7 @@ var historyControllerGetListV1Response = import_zod5.z.object({
|
|
|
26023
26332
|
"**[Alpha]** Custom spelling configuration, if `custom_spelling` is enabled"
|
|
26024
26333
|
),
|
|
26025
26334
|
sentiment_analysis: import_zod5.z.boolean().optional().describe("Enable sentiment analysis for this audio"),
|
|
26026
|
-
audio_to_llm: import_zod5.z.boolean().optional().describe("
|
|
26335
|
+
audio_to_llm: import_zod5.z.boolean().optional().describe("Enable audio to LLM processing for this audio"),
|
|
26027
26336
|
audio_to_llm_config: import_zod5.z.object({
|
|
26028
26337
|
prompts: import_zod5.z.array(import_zod5.z.array(import_zod5.z.unknown())).min(1).describe("The list of prompts applied on the audio transcription"),
|
|
26029
26338
|
model: import_zod5.z.string().default(
|
|
@@ -26031,7 +26340,7 @@ var historyControllerGetListV1Response = import_zod5.z.object({
|
|
|
26031
26340
|
).describe(
|
|
26032
26341
|
"The model to use for the prompt execution. You can find the list of supported models [here](https://openrouter.ai/models)."
|
|
26033
26342
|
)
|
|
26034
|
-
}).optional().describe("
|
|
26343
|
+
}).optional().describe("Audio to LLM configuration, if `audio_to_llm` is enabled"),
|
|
26035
26344
|
pii_redaction: import_zod5.z.boolean().optional().describe("Enable PII redaction for this audio"),
|
|
26036
26345
|
pii_redaction_config: import_zod5.z.object({
|
|
26037
26346
|
entity_types: import_zod5.z.enum([
|
|
@@ -31276,6 +31585,7 @@ __export(openAIAudioRealtimeAPI_zod_exports, {
|
|
|
31276
31585
|
createRealtimeClientSecretBodySessionPromptVariablesTypeDefault: () => createRealtimeClientSecretBodySessionPromptVariablesTypeDefault,
|
|
31277
31586
|
createRealtimeClientSecretBodySessionPromptVariablesTypeDefaultOne: () => createRealtimeClientSecretBodySessionPromptVariablesTypeDefaultOne,
|
|
31278
31587
|
createRealtimeClientSecretBodySessionPromptVariablesTypeDefaultTwo: () => createRealtimeClientSecretBodySessionPromptVariablesTypeDefaultTwo,
|
|
31588
|
+
createRealtimeClientSecretBodySessionReasoningEffortDefault: () => createRealtimeClientSecretBodySessionReasoningEffortDefault,
|
|
31279
31589
|
createRealtimeClientSecretBodySessionToolChoiceDefault: () => createRealtimeClientSecretBodySessionToolChoiceDefault,
|
|
31280
31590
|
createRealtimeClientSecretBodySessionToolsItemRequireApprovalDefaultOne: () => createRealtimeClientSecretBodySessionToolsItemRequireApprovalDefaultOne,
|
|
31281
31591
|
createRealtimeClientSecretBodySessionTracingDefault: () => createRealtimeClientSecretBodySessionTracingDefault,
|
|
@@ -31300,6 +31610,7 @@ __export(openAIAudioRealtimeAPI_zod_exports, {
|
|
|
31300
31610
|
createRealtimeClientSecretResponseSessionPromptVariablesTypeDefault: () => createRealtimeClientSecretResponseSessionPromptVariablesTypeDefault,
|
|
31301
31611
|
createRealtimeClientSecretResponseSessionPromptVariablesTypeDefaultOne: () => createRealtimeClientSecretResponseSessionPromptVariablesTypeDefaultOne,
|
|
31302
31612
|
createRealtimeClientSecretResponseSessionPromptVariablesTypeDefaultTwo: () => createRealtimeClientSecretResponseSessionPromptVariablesTypeDefaultTwo,
|
|
31613
|
+
createRealtimeClientSecretResponseSessionReasoningEffortDefault: () => createRealtimeClientSecretResponseSessionReasoningEffortDefault,
|
|
31303
31614
|
createRealtimeClientSecretResponseSessionToolChoiceDefault: () => createRealtimeClientSecretResponseSessionToolChoiceDefault,
|
|
31304
31615
|
createRealtimeClientSecretResponseSessionToolsItemRequireApprovalDefaultOne: () => createRealtimeClientSecretResponseSessionToolsItemRequireApprovalDefaultOne,
|
|
31305
31616
|
createRealtimeClientSecretResponseSessionTracingDefaultOne: () => createRealtimeClientSecretResponseSessionTracingDefaultOne,
|
|
@@ -31656,6 +31967,7 @@ var createRealtimeClientSecretBodySessionTracingDefaultOne = "auto";
|
|
|
31656
31967
|
var createRealtimeClientSecretBodySessionTracingDefault = null;
|
|
31657
31968
|
var createRealtimeClientSecretBodySessionToolsItemRequireApprovalDefaultOne = "always";
|
|
31658
31969
|
var createRealtimeClientSecretBodySessionToolChoiceDefault = "auto";
|
|
31970
|
+
var createRealtimeClientSecretBodySessionReasoningEffortDefault = "low";
|
|
31659
31971
|
var createRealtimeClientSecretBodySessionTruncationRetentionRatioMin = 0;
|
|
31660
31972
|
var createRealtimeClientSecretBodySessionTruncationRetentionRatioMax = 1;
|
|
31661
31973
|
var createRealtimeClientSecretBodySessionTruncationTokenLimitsPostInstructionsMin = 0;
|
|
@@ -31691,6 +32003,7 @@ var createRealtimeClientSecretBody = import_zod6.z.object({
|
|
|
31691
32003
|
import_zod6.z.enum([
|
|
31692
32004
|
"gpt-realtime",
|
|
31693
32005
|
"gpt-realtime-1.5",
|
|
32006
|
+
"gpt-realtime-2",
|
|
31694
32007
|
"gpt-realtime-2025-08-28",
|
|
31695
32008
|
"gpt-4o-realtime-preview",
|
|
31696
32009
|
"gpt-4o-realtime-preview-2024-10-01",
|
|
@@ -31731,16 +32044,20 @@ var createRealtimeClientSecretBody = import_zod6.z.object({
|
|
|
31731
32044
|
"gpt-4o-mini-transcribe",
|
|
31732
32045
|
"gpt-4o-mini-transcribe-2025-12-15",
|
|
31733
32046
|
"gpt-4o-transcribe",
|
|
31734
|
-
"gpt-4o-transcribe-diarize"
|
|
32047
|
+
"gpt-4o-transcribe-diarize",
|
|
32048
|
+
"gpt-realtime-whisper"
|
|
31735
32049
|
])
|
|
31736
32050
|
).optional().describe(
|
|
31737
|
-
"The model to use for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`,
|
|
32051
|
+
"The model to use for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`, `gpt-4o-transcribe-diarize`, and `gpt-realtime-whisper`. Use `gpt-4o-transcribe-diarize` when you need diarization with speaker labels.\n"
|
|
31738
32052
|
),
|
|
31739
32053
|
language: import_zod6.z.string().optional().describe(
|
|
31740
32054
|
"The language of the input audio. Supplying the input language in\n[ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`) format\nwill improve accuracy and latency.\n"
|
|
31741
32055
|
),
|
|
31742
32056
|
prompt: import_zod6.z.string().optional().describe(
|
|
31743
|
-
'An optional text to guide the model\'s style or continue a previous audio\nsegment.\nFor `whisper-1`, the [prompt is a list of keywords](/docs/guides/speech-to-text#prompting).\nFor `gpt-4o-transcribe` models (excluding `gpt-4o-transcribe-diarize`), the prompt is a free text string, for example "expect words related to technology".\n'
|
|
32057
|
+
'An optional text to guide the model\'s style or continue a previous audio\nsegment.\nFor `whisper-1`, the [prompt is a list of keywords](/docs/guides/speech-to-text#prompting).\nFor `gpt-4o-transcribe` models (excluding `gpt-4o-transcribe-diarize`), the prompt is a free text string, for example "expect words related to technology".\nPrompt is not supported with `gpt-realtime-whisper` in GA Realtime sessions.\n'
|
|
32058
|
+
),
|
|
32059
|
+
delay: import_zod6.z.enum(["minimal", "low", "medium", "high", "xhigh"]).optional().describe(
|
|
32060
|
+
"Controls how long the model waits before emitting transcription text.\nHigher values can improve transcription accuracy at the cost of latency.\nOnly supported with `gpt-realtime-whisper` in GA Realtime sessions.\n"
|
|
31744
32061
|
)
|
|
31745
32062
|
}).optional(),
|
|
31746
32063
|
noise_reduction: import_zod6.z.object({
|
|
@@ -31807,7 +32124,7 @@ var createRealtimeClientSecretBody = import_zod6.z.object({
|
|
|
31807
32124
|
"Server-side semantic turn detection which uses a model to determine when the user has finished speaking."
|
|
31808
32125
|
)
|
|
31809
32126
|
]).describe(
|
|
31810
|
-
'Configuration for turn detection, ether Server VAD or Semantic VAD. This can be set to `null` to turn off, in which case the client must manually trigger model response.\n\nServer VAD means that the model will detect the start and end of speech based on audio volume and respond at the end of user speech.\n\nSemantic VAD is more advanced and uses a turn detection model (in conjunction with VAD) to semantically estimate whether the user has finished speaking, then dynamically sets a timeout based on this probability. For example, if user audio trails off with "uhhm", the model will score a low probability of turn end and wait longer for the user to continue speaking. This can be useful for more natural conversations, but may have a higher latency.\n'
|
|
32127
|
+
'Configuration for turn detection, ether Server VAD or Semantic VAD. This can be set to `null` to turn off, in which case the client must manually trigger model response.\n\nServer VAD means that the model will detect the start and end of speech based on audio volume and respond at the end of user speech.\n\nSemantic VAD is more advanced and uses a turn detection model (in conjunction with VAD) to semantically estimate whether the user has finished speaking, then dynamically sets a timeout based on this probability. For example, if user audio trails off with "uhhm", the model will score a low probability of turn end and wait longer for the user to continue speaking. This can be useful for more natural conversations, but may have a higher latency.\n\nFor `gpt-realtime-whisper` transcription sessions, turn detection must be\nset to `null`; VAD is not supported.\n'
|
|
31811
32128
|
).or(import_zod6.z.null()).optional()
|
|
31812
32129
|
}).optional(),
|
|
31813
32130
|
output: import_zod6.z.object({
|
|
@@ -31880,7 +32197,7 @@ var createRealtimeClientSecretBody = import_zod6.z.object({
|
|
|
31880
32197
|
server_label: import_zod6.z.string().describe(
|
|
31881
32198
|
"A label for this MCP server, used to identify it in tool calls.\n"
|
|
31882
32199
|
),
|
|
31883
|
-
server_url: import_zod6.z.string().optional().describe(
|
|
32200
|
+
server_url: import_zod6.z.string().url().optional().describe(
|
|
31884
32201
|
"The URL for the MCP server. One of `server_url` or `connector_id` must be\nprovided.\n"
|
|
31885
32202
|
),
|
|
31886
32203
|
connector_id: import_zod6.z.enum([
|
|
@@ -31958,6 +32275,16 @@ var createRealtimeClientSecretBody = import_zod6.z.object({
|
|
|
31958
32275
|
).default(createRealtimeClientSecretBodySessionToolChoiceDefault).describe(
|
|
31959
32276
|
"How the model chooses tools. Provide one of the string modes or force a specific\nfunction/MCP tool.\n"
|
|
31960
32277
|
),
|
|
32278
|
+
parallel_tool_calls: import_zod6.z.boolean().optional().describe(
|
|
32279
|
+
"Whether the model may call multiple tools in parallel. Only supported by\nreasoning Realtime models such as `gpt-realtime-2`.\n"
|
|
32280
|
+
),
|
|
32281
|
+
reasoning: import_zod6.z.object({
|
|
32282
|
+
effort: import_zod6.z.enum(["minimal", "low", "medium", "high", "xhigh"]).default(createRealtimeClientSecretBodySessionReasoningEffortDefault).describe(
|
|
32283
|
+
"Constrains effort on reasoning for reasoning-capable Realtime models such as\n`gpt-realtime-2`.\n"
|
|
32284
|
+
)
|
|
32285
|
+
}).optional().describe(
|
|
32286
|
+
"Configuration for reasoning-capable Realtime models such as `gpt-realtime-2`.\n"
|
|
32287
|
+
),
|
|
31961
32288
|
max_output_tokens: import_zod6.z.number().or(import_zod6.z.enum(["inf"])).optional().describe(
|
|
31962
32289
|
"Maximum number of output tokens for a single assistant response,\ninclusive of tool calls. Provide an integer between 1 and 4096 to\nlimit output tokens, or `inf` for the maximum available tokens for a\ngiven model. Defaults to `inf`.\n"
|
|
31963
32290
|
),
|
|
@@ -31997,7 +32324,7 @@ var createRealtimeClientSecretBody = import_zod6.z.object({
|
|
|
31997
32324
|
).or(
|
|
31998
32325
|
import_zod6.z.object({
|
|
31999
32326
|
type: import_zod6.z.enum(["input_image"]).describe("The type of the input item. Always `input_image`."),
|
|
32000
|
-
image_url: import_zod6.z.string().describe(
|
|
32327
|
+
image_url: import_zod6.z.string().url().describe(
|
|
32001
32328
|
"The URL of the image to be sent to the model. A fully qualified URL or base64 encoded image in a data URL."
|
|
32002
32329
|
).or(import_zod6.z.null()).optional(),
|
|
32003
32330
|
file_id: import_zod6.z.string().describe("The ID of the file to be sent to the model.").or(import_zod6.z.null()).optional(),
|
|
@@ -32011,7 +32338,7 @@ var createRealtimeClientSecretBody = import_zod6.z.object({
|
|
|
32011
32338
|
file_id: import_zod6.z.string().describe("The ID of the file to be sent to the model.").or(import_zod6.z.null()).optional(),
|
|
32012
32339
|
filename: import_zod6.z.string().optional().describe("The name of the file to be sent to the model."),
|
|
32013
32340
|
file_data: import_zod6.z.string().optional().describe("The content of the file to be sent to the model.\n"),
|
|
32014
|
-
file_url: import_zod6.z.string().optional().describe("The URL of the file to be sent to the model."),
|
|
32341
|
+
file_url: import_zod6.z.string().url().optional().describe("The URL of the file to be sent to the model."),
|
|
32015
32342
|
detail: import_zod6.z.enum(["low", "high"]).optional()
|
|
32016
32343
|
}).describe("A file input to the model.")
|
|
32017
32344
|
)
|
|
@@ -32047,16 +32374,20 @@ var createRealtimeClientSecretBody = import_zod6.z.object({
|
|
|
32047
32374
|
"gpt-4o-mini-transcribe",
|
|
32048
32375
|
"gpt-4o-mini-transcribe-2025-12-15",
|
|
32049
32376
|
"gpt-4o-transcribe",
|
|
32050
|
-
"gpt-4o-transcribe-diarize"
|
|
32377
|
+
"gpt-4o-transcribe-diarize",
|
|
32378
|
+
"gpt-realtime-whisper"
|
|
32051
32379
|
])
|
|
32052
32380
|
).optional().describe(
|
|
32053
|
-
"The model to use for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`,
|
|
32381
|
+
"The model to use for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`, `gpt-4o-transcribe-diarize`, and `gpt-realtime-whisper`. Use `gpt-4o-transcribe-diarize` when you need diarization with speaker labels.\n"
|
|
32054
32382
|
),
|
|
32055
32383
|
language: import_zod6.z.string().optional().describe(
|
|
32056
32384
|
"The language of the input audio. Supplying the input language in\n[ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`) format\nwill improve accuracy and latency.\n"
|
|
32057
32385
|
),
|
|
32058
32386
|
prompt: import_zod6.z.string().optional().describe(
|
|
32059
|
-
'An optional text to guide the model\'s style or continue a previous audio\nsegment.\nFor `whisper-1`, the [prompt is a list of keywords](/docs/guides/speech-to-text#prompting).\nFor `gpt-4o-transcribe` models (excluding `gpt-4o-transcribe-diarize`), the prompt is a free text string, for example "expect words related to technology".\n'
|
|
32387
|
+
'An optional text to guide the model\'s style or continue a previous audio\nsegment.\nFor `whisper-1`, the [prompt is a list of keywords](/docs/guides/speech-to-text#prompting).\nFor `gpt-4o-transcribe` models (excluding `gpt-4o-transcribe-diarize`), the prompt is a free text string, for example "expect words related to technology".\nPrompt is not supported with `gpt-realtime-whisper` in GA Realtime sessions.\n'
|
|
32388
|
+
),
|
|
32389
|
+
delay: import_zod6.z.enum(["minimal", "low", "medium", "high", "xhigh"]).optional().describe(
|
|
32390
|
+
"Controls how long the model waits before emitting transcription text.\nHigher values can improve transcription accuracy at the cost of latency.\nOnly supported with `gpt-realtime-whisper` in GA Realtime sessions.\n"
|
|
32060
32391
|
)
|
|
32061
32392
|
}).optional(),
|
|
32062
32393
|
noise_reduction: import_zod6.z.object({
|
|
@@ -32123,7 +32454,7 @@ var createRealtimeClientSecretBody = import_zod6.z.object({
|
|
|
32123
32454
|
"Server-side semantic turn detection which uses a model to determine when the user has finished speaking."
|
|
32124
32455
|
)
|
|
32125
32456
|
]).describe(
|
|
32126
|
-
'Configuration for turn detection, ether Server VAD or Semantic VAD. This can be set to `null` to turn off, in which case the client must manually trigger model response.\n\nServer VAD means that the model will detect the start and end of speech based on audio volume and respond at the end of user speech.\n\nSemantic VAD is more advanced and uses a turn detection model (in conjunction with VAD) to semantically estimate whether the user has finished speaking, then dynamically sets a timeout based on this probability. For example, if user audio trails off with "uhhm", the model will score a low probability of turn end and wait longer for the user to continue speaking. This can be useful for more natural conversations, but may have a higher latency.\n'
|
|
32457
|
+
'Configuration for turn detection, ether Server VAD or Semantic VAD. This can be set to `null` to turn off, in which case the client must manually trigger model response.\n\nServer VAD means that the model will detect the start and end of speech based on audio volume and respond at the end of user speech.\n\nSemantic VAD is more advanced and uses a turn detection model (in conjunction with VAD) to semantically estimate whether the user has finished speaking, then dynamically sets a timeout based on this probability. For example, if user audio trails off with "uhhm", the model will score a low probability of turn end and wait longer for the user to continue speaking. This can be useful for more natural conversations, but may have a higher latency.\n\nFor `gpt-realtime-whisper` transcription sessions, turn detection must be\nset to `null`; VAD is not supported.\n'
|
|
32127
32458
|
).or(import_zod6.z.null()).optional()
|
|
32128
32459
|
}).optional()
|
|
32129
32460
|
}).optional().describe("Configuration for input and output audio.\n"),
|
|
@@ -32154,6 +32485,7 @@ var createRealtimeClientSecretResponseSessionTracingDefaultTwo = "auto";
|
|
|
32154
32485
|
var createRealtimeClientSecretResponseSessionTracingDefaultOne = null;
|
|
32155
32486
|
var createRealtimeClientSecretResponseSessionToolsItemRequireApprovalDefaultOne = "always";
|
|
32156
32487
|
var createRealtimeClientSecretResponseSessionToolChoiceDefault = "auto";
|
|
32488
|
+
var createRealtimeClientSecretResponseSessionReasoningEffortDefault = "low";
|
|
32157
32489
|
var createRealtimeClientSecretResponseSessionTruncationRetentionRatioMin = 0;
|
|
32158
32490
|
var createRealtimeClientSecretResponseSessionTruncationRetentionRatioMax = 1;
|
|
32159
32491
|
var createRealtimeClientSecretResponseSessionTruncationTokenLimitsPostInstructionsMin = 0;
|
|
@@ -32163,17 +32495,14 @@ var createRealtimeClientSecretResponseSessionPromptVariablesTypeDefaultTwo = "in
|
|
|
32163
32495
|
var createRealtimeClientSecretResponse = import_zod6.z.object({
|
|
32164
32496
|
value: import_zod6.z.string().describe("The generated client secret value."),
|
|
32165
32497
|
expires_at: import_zod6.z.number().describe("Expiration timestamp for the client secret, in seconds since epoch."),
|
|
32166
|
-
session: import_zod6.z.
|
|
32498
|
+
session: import_zod6.z.union([
|
|
32167
32499
|
import_zod6.z.object({
|
|
32168
|
-
client_secret: import_zod6.z.object({
|
|
32169
|
-
value: import_zod6.z.string().describe(
|
|
32170
|
-
"Ephemeral key usable in client environments to authenticate connections to the Realtime API. Use this in client-side environments rather than a standard API token, which should only be used server-side.\n"
|
|
32171
|
-
),
|
|
32172
|
-
expires_at: import_zod6.z.number().describe(
|
|
32173
|
-
"Timestamp for when the token expires. Currently, all tokens expire\nafter one minute.\n"
|
|
32174
|
-
)
|
|
32175
|
-
}).describe("Ephemeral key returned by the API."),
|
|
32176
32500
|
type: import_zod6.z.enum(["realtime"]).describe("The type of session to create. Always `realtime` for the Realtime API.\n"),
|
|
32501
|
+
id: import_zod6.z.string().describe(
|
|
32502
|
+
"Unique identifier for the session that looks like `sess_1234567890abcdef`.\n"
|
|
32503
|
+
),
|
|
32504
|
+
object: import_zod6.z.enum(["realtime.session"]).describe("The object type. Always `realtime.session`."),
|
|
32505
|
+
expires_at: import_zod6.z.number().optional().describe("Expiration timestamp for the session, in seconds since epoch."),
|
|
32177
32506
|
output_modalities: import_zod6.z.array(import_zod6.z.enum(["text", "audio"])).default(createRealtimeClientSecretResponseSessionOutputModalitiesDefault).describe(
|
|
32178
32507
|
'The set of modalities the model can respond with. It defaults to `["audio"]`, indicating\nthat the model will respond with audio plus a transcript. `["text"]` can be used to make\nthe model respond with text only. It is not possible to request both `text` and `audio` at the same time.\n'
|
|
32179
32508
|
),
|
|
@@ -32181,6 +32510,7 @@ var createRealtimeClientSecretResponse = import_zod6.z.object({
|
|
|
32181
32510
|
import_zod6.z.enum([
|
|
32182
32511
|
"gpt-realtime",
|
|
32183
32512
|
"gpt-realtime-1.5",
|
|
32513
|
+
"gpt-realtime-2",
|
|
32184
32514
|
"gpt-realtime-2025-08-28",
|
|
32185
32515
|
"gpt-4o-realtime-preview",
|
|
32186
32516
|
"gpt-4o-realtime-preview-2024-10-01",
|
|
@@ -32203,15 +32533,15 @@ var createRealtimeClientSecretResponse = import_zod6.z.object({
|
|
|
32203
32533
|
audio: import_zod6.z.object({
|
|
32204
32534
|
input: import_zod6.z.object({
|
|
32205
32535
|
format: import_zod6.z.object({
|
|
32206
|
-
type: import_zod6.z.enum(["audio/pcm"]).describe("The audio format. Always `audio/pcm`."),
|
|
32207
|
-
rate: import_zod6.z.literal(24e3).describe("The sample rate of the audio. Always `24000`.")
|
|
32536
|
+
type: import_zod6.z.enum(["audio/pcm"]).optional().describe("The audio format. Always `audio/pcm`."),
|
|
32537
|
+
rate: import_zod6.z.literal(24e3).optional().describe("The sample rate of the audio. Always `24000`.")
|
|
32208
32538
|
}).describe("The PCM audio format. Only a 24kHz sample rate is supported.").or(
|
|
32209
32539
|
import_zod6.z.object({
|
|
32210
|
-
type: import_zod6.z.enum(["audio/pcmu"]).describe("The audio format. Always `audio/pcmu`.")
|
|
32540
|
+
type: import_zod6.z.enum(["audio/pcmu"]).optional().describe("The audio format. Always `audio/pcmu`.")
|
|
32211
32541
|
}).describe("The G.711 \u03BC-law format.")
|
|
32212
32542
|
).or(
|
|
32213
32543
|
import_zod6.z.object({
|
|
32214
|
-
type: import_zod6.z.enum(["audio/pcma"]).describe("The audio format. Always `audio/pcma`.")
|
|
32544
|
+
type: import_zod6.z.enum(["audio/pcma"]).optional().describe("The audio format. Always `audio/pcma`.")
|
|
32215
32545
|
}).describe("The G.711 A-law format.")
|
|
32216
32546
|
).optional(),
|
|
32217
32547
|
transcription: import_zod6.z.object({
|
|
@@ -32221,20 +32551,19 @@ var createRealtimeClientSecretResponse = import_zod6.z.object({
|
|
|
32221
32551
|
"gpt-4o-mini-transcribe",
|
|
32222
32552
|
"gpt-4o-mini-transcribe-2025-12-15",
|
|
32223
32553
|
"gpt-4o-transcribe",
|
|
32224
|
-
"gpt-4o-transcribe-diarize"
|
|
32554
|
+
"gpt-4o-transcribe-diarize",
|
|
32555
|
+
"gpt-realtime-whisper"
|
|
32225
32556
|
])
|
|
32226
32557
|
).optional().describe(
|
|
32227
|
-
"The model
|
|
32228
|
-
),
|
|
32229
|
-
language: import_zod6.z.string().optional().describe(
|
|
32230
|
-
"The language of the input audio. Supplying the input language in\n[ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`) format\nwill improve accuracy and latency.\n"
|
|
32558
|
+
"The model used for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`, `gpt-4o-transcribe-diarize`, and `gpt-realtime-whisper`.\n"
|
|
32231
32559
|
),
|
|
32560
|
+
language: import_zod6.z.string().optional().describe("The language of the input audio.\n"),
|
|
32232
32561
|
prompt: import_zod6.z.string().optional().describe(
|
|
32233
|
-
|
|
32562
|
+
"The prompt configured for input audio transcription, when present.\n"
|
|
32234
32563
|
)
|
|
32235
32564
|
}).optional(),
|
|
32236
32565
|
noise_reduction: import_zod6.z.object({
|
|
32237
|
-
type: import_zod6.z.enum(["near_field", "far_field"]).describe(
|
|
32566
|
+
type: import_zod6.z.enum(["near_field", "far_field"]).optional().describe(
|
|
32238
32567
|
"Type of noise reduction. `near_field` is for close-talking microphones such as headphones, `far_field` is for far-field microphones such as laptop or conference room microphones.\n"
|
|
32239
32568
|
)
|
|
32240
32569
|
}).optional().describe(
|
|
@@ -32297,20 +32626,20 @@ var createRealtimeClientSecretResponse = import_zod6.z.object({
|
|
|
32297
32626
|
"Server-side semantic turn detection which uses a model to determine when the user has finished speaking."
|
|
32298
32627
|
)
|
|
32299
32628
|
]).describe(
|
|
32300
|
-
'Configuration for turn detection, ether Server VAD or Semantic VAD. This can be set to `null` to turn off, in which case the client must manually trigger model response.\n\nServer VAD means that the model will detect the start and end of speech based on audio volume and respond at the end of user speech.\n\nSemantic VAD is more advanced and uses a turn detection model (in conjunction with VAD) to semantically estimate whether the user has finished speaking, then dynamically sets a timeout based on this probability. For example, if user audio trails off with "uhhm", the model will score a low probability of turn end and wait longer for the user to continue speaking. This can be useful for more natural conversations, but may have a higher latency.\n'
|
|
32629
|
+
'Configuration for turn detection, ether Server VAD or Semantic VAD. This can be set to `null` to turn off, in which case the client must manually trigger model response.\n\nServer VAD means that the model will detect the start and end of speech based on audio volume and respond at the end of user speech.\n\nSemantic VAD is more advanced and uses a turn detection model (in conjunction with VAD) to semantically estimate whether the user has finished speaking, then dynamically sets a timeout based on this probability. For example, if user audio trails off with "uhhm", the model will score a low probability of turn end and wait longer for the user to continue speaking. This can be useful for more natural conversations, but may have a higher latency.\n\nFor `gpt-realtime-whisper` transcription sessions, turn detection must be\nset to `null`; VAD is not supported.\n'
|
|
32301
32630
|
).or(import_zod6.z.null()).optional()
|
|
32302
32631
|
}).optional(),
|
|
32303
32632
|
output: import_zod6.z.object({
|
|
32304
32633
|
format: import_zod6.z.object({
|
|
32305
|
-
type: import_zod6.z.enum(["audio/pcm"]).describe("The audio format. Always `audio/pcm`."),
|
|
32306
|
-
rate: import_zod6.z.literal(24e3).describe("The sample rate of the audio. Always `24000`.")
|
|
32634
|
+
type: import_zod6.z.enum(["audio/pcm"]).optional().describe("The audio format. Always `audio/pcm`."),
|
|
32635
|
+
rate: import_zod6.z.literal(24e3).optional().describe("The sample rate of the audio. Always `24000`.")
|
|
32307
32636
|
}).describe("The PCM audio format. Only a 24kHz sample rate is supported.").or(
|
|
32308
32637
|
import_zod6.z.object({
|
|
32309
|
-
type: import_zod6.z.enum(["audio/pcmu"]).describe("The audio format. Always `audio/pcmu`.")
|
|
32638
|
+
type: import_zod6.z.enum(["audio/pcmu"]).optional().describe("The audio format. Always `audio/pcmu`.")
|
|
32310
32639
|
}).describe("The G.711 \u03BC-law format.")
|
|
32311
32640
|
).or(
|
|
32312
32641
|
import_zod6.z.object({
|
|
32313
|
-
type: import_zod6.z.enum(["audio/pcma"]).describe("The audio format. Always `audio/pcma`.")
|
|
32642
|
+
type: import_zod6.z.enum(["audio/pcma"]).optional().describe("The audio format. Always `audio/pcma`.")
|
|
32314
32643
|
}).describe("The G.711 A-law format.")
|
|
32315
32644
|
).optional(),
|
|
32316
32645
|
voice: import_zod6.z.string().or(
|
|
@@ -32354,7 +32683,7 @@ var createRealtimeClientSecretResponse = import_zod6.z.object({
|
|
|
32354
32683
|
).or(import_zod6.z.null()).optional(),
|
|
32355
32684
|
tools: import_zod6.z.array(
|
|
32356
32685
|
import_zod6.z.object({
|
|
32357
|
-
type: import_zod6.z.enum(["function"]).describe("The type of the tool, i.e. `function`."),
|
|
32686
|
+
type: import_zod6.z.enum(["function"]).optional().describe("The type of the tool, i.e. `function`."),
|
|
32358
32687
|
name: import_zod6.z.string().optional().describe("The name of the function."),
|
|
32359
32688
|
description: import_zod6.z.string().optional().describe(
|
|
32360
32689
|
"The description of the function, including guidance on when and how\nto call it, and guidance about what to tell the user when calling\n(if anything).\n"
|
|
@@ -32366,7 +32695,7 @@ var createRealtimeClientSecretResponse = import_zod6.z.object({
|
|
|
32366
32695
|
server_label: import_zod6.z.string().describe(
|
|
32367
32696
|
"A label for this MCP server, used to identify it in tool calls.\n"
|
|
32368
32697
|
),
|
|
32369
|
-
server_url: import_zod6.z.string().optional().describe(
|
|
32698
|
+
server_url: import_zod6.z.string().url().optional().describe(
|
|
32370
32699
|
"The URL for the MCP server. One of `server_url` or `connector_id` must be\nprovided.\n"
|
|
32371
32700
|
),
|
|
32372
32701
|
connector_id: import_zod6.z.enum([
|
|
@@ -32378,7 +32707,7 @@ var createRealtimeClientSecretResponse = import_zod6.z.object({
|
|
|
32378
32707
|
"connector_outlookcalendar",
|
|
32379
32708
|
"connector_outlookemail",
|
|
32380
32709
|
"connector_sharepoint"
|
|
32381
|
-
]).describe(
|
|
32710
|
+
]).optional().describe(
|
|
32382
32711
|
"Identifier for service connectors, like those available in ChatGPT. One of\n`server_url` or `connector_id` must be provided. Learn more about service\nconnectors [here](/docs/guides/tools-remote-mcp#connectors).\n\nCurrently supported `connector_id` values are:\n\n- Dropbox: `connector_dropbox`\n- Gmail: `connector_gmail`\n- Google Calendar: `connector_googlecalendar`\n- Google Drive: `connector_googledrive`\n- Microsoft Teams: `connector_microsoftteams`\n- Outlook Calendar: `connector_outlookcalendar`\n- Outlook Email: `connector_outlookemail`\n- SharePoint: `connector_sharepoint`\n"
|
|
32383
32712
|
),
|
|
32384
32713
|
authorization: import_zod6.z.string().optional().describe(
|
|
@@ -32444,6 +32773,13 @@ var createRealtimeClientSecretResponse = import_zod6.z.object({
|
|
|
32444
32773
|
).default(createRealtimeClientSecretResponseSessionToolChoiceDefault).describe(
|
|
32445
32774
|
"How the model chooses tools. Provide one of the string modes or force a specific\nfunction/MCP tool.\n"
|
|
32446
32775
|
),
|
|
32776
|
+
reasoning: import_zod6.z.object({
|
|
32777
|
+
effort: import_zod6.z.enum(["minimal", "low", "medium", "high", "xhigh"]).default(createRealtimeClientSecretResponseSessionReasoningEffortDefault).describe(
|
|
32778
|
+
"Constrains effort on reasoning for reasoning-capable Realtime models such as\n`gpt-realtime-2`.\n"
|
|
32779
|
+
)
|
|
32780
|
+
}).optional().describe(
|
|
32781
|
+
"Configuration for reasoning-capable Realtime models such as `gpt-realtime-2`.\n"
|
|
32782
|
+
),
|
|
32447
32783
|
max_output_tokens: import_zod6.z.number().or(import_zod6.z.enum(["inf"])).optional().describe(
|
|
32448
32784
|
"Maximum number of output tokens for a single assistant response,\ninclusive of tool calls. Provide an integer between 1 and 4096 to\nlimit output tokens, or `inf` for the maximum available tokens for a\ngiven model. Defaults to `inf`.\n"
|
|
32449
32785
|
),
|
|
@@ -32483,7 +32819,7 @@ var createRealtimeClientSecretResponse = import_zod6.z.object({
|
|
|
32483
32819
|
).or(
|
|
32484
32820
|
import_zod6.z.object({
|
|
32485
32821
|
type: import_zod6.z.enum(["input_image"]).describe("The type of the input item. Always `input_image`."),
|
|
32486
|
-
image_url: import_zod6.z.string().describe(
|
|
32822
|
+
image_url: import_zod6.z.string().url().describe(
|
|
32487
32823
|
"The URL of the image to be sent to the model. A fully qualified URL or base64 encoded image in a data URL."
|
|
32488
32824
|
).or(import_zod6.z.null()).optional(),
|
|
32489
32825
|
file_id: import_zod6.z.string().describe("The ID of the file to be sent to the model.").or(import_zod6.z.null()).optional(),
|
|
@@ -32497,8 +32833,8 @@ var createRealtimeClientSecretResponse = import_zod6.z.object({
|
|
|
32497
32833
|
file_id: import_zod6.z.string().describe("The ID of the file to be sent to the model.").or(import_zod6.z.null()).optional(),
|
|
32498
32834
|
filename: import_zod6.z.string().optional().describe("The name of the file to be sent to the model."),
|
|
32499
32835
|
file_data: import_zod6.z.string().optional().describe("The content of the file to be sent to the model.\n"),
|
|
32500
|
-
file_url: import_zod6.z.string().optional().describe("The URL of the file to be sent to the model."),
|
|
32501
|
-
detail: import_zod6.z.enum(["low", "high"])
|
|
32836
|
+
file_url: import_zod6.z.string().url().optional().describe("The URL of the file to be sent to the model."),
|
|
32837
|
+
detail: import_zod6.z.enum(["low", "high"]).optional()
|
|
32502
32838
|
}).describe("A file input to the model.")
|
|
32503
32839
|
)
|
|
32504
32840
|
).describe(
|
|
@@ -32507,9 +32843,7 @@ var createRealtimeClientSecretResponse = import_zod6.z.object({
|
|
|
32507
32843
|
}).describe(
|
|
32508
32844
|
"Reference to a prompt template and its variables.\n[Learn more](/docs/guides/text?api-mode=responses#reusable-prompts).\n"
|
|
32509
32845
|
).or(import_zod6.z.null()).optional()
|
|
32510
|
-
}).describe(
|
|
32511
|
-
"A new Realtime session configuration, with an ephemeral key. Default TTL\nfor keys is one minute.\n"
|
|
32512
|
-
),
|
|
32846
|
+
}).describe("A Realtime session configuration object.\n"),
|
|
32513
32847
|
import_zod6.z.object({
|
|
32514
32848
|
type: import_zod6.z.enum(["transcription"]).describe(
|
|
32515
32849
|
"The type of session. Always `transcription` for transcription sessions.\n"
|
|
@@ -32525,15 +32859,15 @@ var createRealtimeClientSecretResponse = import_zod6.z.object({
|
|
|
32525
32859
|
audio: import_zod6.z.object({
|
|
32526
32860
|
input: import_zod6.z.object({
|
|
32527
32861
|
format: import_zod6.z.object({
|
|
32528
|
-
type: import_zod6.z.enum(["audio/pcm"]).describe("The audio format. Always `audio/pcm`."),
|
|
32529
|
-
rate: import_zod6.z.literal(24e3).describe("The sample rate of the audio. Always `24000`.")
|
|
32862
|
+
type: import_zod6.z.enum(["audio/pcm"]).optional().describe("The audio format. Always `audio/pcm`."),
|
|
32863
|
+
rate: import_zod6.z.literal(24e3).optional().describe("The sample rate of the audio. Always `24000`.")
|
|
32530
32864
|
}).describe("The PCM audio format. Only a 24kHz sample rate is supported.").or(
|
|
32531
32865
|
import_zod6.z.object({
|
|
32532
|
-
type: import_zod6.z.enum(["audio/pcmu"]).describe("The audio format. Always `audio/pcmu`.")
|
|
32866
|
+
type: import_zod6.z.enum(["audio/pcmu"]).optional().describe("The audio format. Always `audio/pcmu`.")
|
|
32533
32867
|
}).describe("The G.711 \u03BC-law format.")
|
|
32534
32868
|
).or(
|
|
32535
32869
|
import_zod6.z.object({
|
|
32536
|
-
type: import_zod6.z.enum(["audio/pcma"]).describe("The audio format. Always `audio/pcma`.")
|
|
32870
|
+
type: import_zod6.z.enum(["audio/pcma"]).optional().describe("The audio format. Always `audio/pcma`.")
|
|
32537
32871
|
}).describe("The G.711 A-law format.")
|
|
32538
32872
|
).optional(),
|
|
32539
32873
|
transcription: import_zod6.z.object({
|
|
@@ -32543,20 +32877,19 @@ var createRealtimeClientSecretResponse = import_zod6.z.object({
|
|
|
32543
32877
|
"gpt-4o-mini-transcribe",
|
|
32544
32878
|
"gpt-4o-mini-transcribe-2025-12-15",
|
|
32545
32879
|
"gpt-4o-transcribe",
|
|
32546
|
-
"gpt-4o-transcribe-diarize"
|
|
32880
|
+
"gpt-4o-transcribe-diarize",
|
|
32881
|
+
"gpt-realtime-whisper"
|
|
32547
32882
|
])
|
|
32548
32883
|
).optional().describe(
|
|
32549
|
-
"The model
|
|
32550
|
-
),
|
|
32551
|
-
language: import_zod6.z.string().optional().describe(
|
|
32552
|
-
"The language of the input audio. Supplying the input language in\n[ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`) format\nwill improve accuracy and latency.\n"
|
|
32884
|
+
"The model used for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`, `gpt-4o-transcribe-diarize`, and `gpt-realtime-whisper`.\n"
|
|
32553
32885
|
),
|
|
32886
|
+
language: import_zod6.z.string().optional().describe("The language of the input audio.\n"),
|
|
32554
32887
|
prompt: import_zod6.z.string().optional().describe(
|
|
32555
|
-
|
|
32888
|
+
"The prompt configured for input audio transcription, when present.\n"
|
|
32556
32889
|
)
|
|
32557
32890
|
}).optional(),
|
|
32558
32891
|
noise_reduction: import_zod6.z.object({
|
|
32559
|
-
type: import_zod6.z.enum(["near_field", "far_field"]).describe(
|
|
32892
|
+
type: import_zod6.z.enum(["near_field", "far_field"]).optional().describe(
|
|
32560
32893
|
"Type of noise reduction. `near_field` is for close-talking microphones such as headphones, `far_field` is for far-field microphones such as laptop or conference room microphones.\n"
|
|
32561
32894
|
)
|
|
32562
32895
|
}).optional().describe("Configuration for input audio noise reduction.\n"),
|
|
@@ -32573,8 +32906,10 @@ var createRealtimeClientSecretResponse = import_zod6.z.object({
|
|
|
32573
32906
|
silence_duration_ms: import_zod6.z.number().optional().describe(
|
|
32574
32907
|
"Duration of silence to detect speech stop (in milliseconds). Defaults\nto 500ms. With shorter values the model will respond more quickly,\nbut may jump in on short pauses from the user.\n"
|
|
32575
32908
|
)
|
|
32576
|
-
}).
|
|
32577
|
-
"Configuration for turn detection. Can be set to `null` to turn off. Server\nVAD means that the model will detect the start and end of speech based on\naudio volume and respond at the end of user speech.\n"
|
|
32909
|
+
}).describe(
|
|
32910
|
+
"Configuration for turn detection. Can be set to `null` to turn off. Server\nVAD means that the model will detect the start and end of speech based on\naudio volume and respond at the end of user speech. For `gpt-realtime-whisper`, this must be `null`; VAD is not supported.\n"
|
|
32911
|
+
).or(import_zod6.z.null()).optional().describe(
|
|
32912
|
+
"Configuration for turn detection. For `gpt-realtime-whisper`, this must be `null`; VAD is not supported.\n"
|
|
32578
32913
|
)
|
|
32579
32914
|
}).optional()
|
|
32580
32915
|
}).optional().describe("Configuration for input audio for the session.\n")
|
|
@@ -32714,7 +33049,7 @@ var createRealtimeSessionBody = import_zod6.z.object({
|
|
|
32714
33049
|
).or(
|
|
32715
33050
|
import_zod6.z.object({
|
|
32716
33051
|
type: import_zod6.z.enum(["input_image"]).describe("The type of the input item. Always `input_image`."),
|
|
32717
|
-
image_url: import_zod6.z.string().describe(
|
|
33052
|
+
image_url: import_zod6.z.string().url().describe(
|
|
32718
33053
|
"The URL of the image to be sent to the model. A fully qualified URL or base64 encoded image in a data URL."
|
|
32719
33054
|
).or(import_zod6.z.null()).optional(),
|
|
32720
33055
|
file_id: import_zod6.z.string().describe("The ID of the file to be sent to the model.").or(import_zod6.z.null()).optional(),
|
|
@@ -32728,7 +33063,7 @@ var createRealtimeSessionBody = import_zod6.z.object({
|
|
|
32728
33063
|
file_id: import_zod6.z.string().describe("The ID of the file to be sent to the model.").or(import_zod6.z.null()).optional(),
|
|
32729
33064
|
filename: import_zod6.z.string().optional().describe("The name of the file to be sent to the model."),
|
|
32730
33065
|
file_data: import_zod6.z.string().optional().describe("The content of the file to be sent to the model.\n"),
|
|
32731
|
-
file_url: import_zod6.z.string().optional().describe("The URL of the file to be sent to the model."),
|
|
33066
|
+
file_url: import_zod6.z.string().url().optional().describe("The URL of the file to be sent to the model."),
|
|
32732
33067
|
detail: import_zod6.z.enum(["low", "high"]).optional()
|
|
32733
33068
|
}).describe("A file input to the model.")
|
|
32734
33069
|
)
|
|
@@ -32777,17 +33112,14 @@ var createRealtimeSessionResponse = import_zod6.z.object({
|
|
|
32777
33112
|
"gpt-4o-mini-transcribe",
|
|
32778
33113
|
"gpt-4o-mini-transcribe-2025-12-15",
|
|
32779
33114
|
"gpt-4o-transcribe",
|
|
32780
|
-
"gpt-4o-transcribe-diarize"
|
|
33115
|
+
"gpt-4o-transcribe-diarize",
|
|
33116
|
+
"gpt-realtime-whisper"
|
|
32781
33117
|
])
|
|
32782
33118
|
).optional().describe(
|
|
32783
|
-
"The model
|
|
33119
|
+
"The model used for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`, `gpt-4o-transcribe-diarize`, and `gpt-realtime-whisper`.\n"
|
|
32784
33120
|
),
|
|
32785
|
-
language: import_zod6.z.string().optional().describe(
|
|
32786
|
-
|
|
32787
|
-
),
|
|
32788
|
-
prompt: import_zod6.z.string().optional().describe(
|
|
32789
|
-
'An optional text to guide the model\'s style or continue a previous audio\nsegment.\nFor `whisper-1`, the [prompt is a list of keywords](/docs/guides/speech-to-text#prompting).\nFor `gpt-4o-transcribe` models (excluding `gpt-4o-transcribe-diarize`), the prompt is a free text string, for example "expect words related to technology".\n'
|
|
32790
|
-
)
|
|
33121
|
+
language: import_zod6.z.string().optional().describe("The language of the input audio.\n"),
|
|
33122
|
+
prompt: import_zod6.z.string().optional().describe("The prompt configured for input audio transcription, when present.\n")
|
|
32791
33123
|
}).optional(),
|
|
32792
33124
|
noise_reduction: import_zod6.z.object({
|
|
32793
33125
|
type: import_zod6.z.enum(["near_field", "far_field"]).optional().describe(
|
|
@@ -32913,16 +33245,20 @@ var createRealtimeTranscriptionSessionBody = import_zod6.z.object({
|
|
|
32913
33245
|
"gpt-4o-mini-transcribe",
|
|
32914
33246
|
"gpt-4o-mini-transcribe-2025-12-15",
|
|
32915
33247
|
"gpt-4o-transcribe",
|
|
32916
|
-
"gpt-4o-transcribe-diarize"
|
|
33248
|
+
"gpt-4o-transcribe-diarize",
|
|
33249
|
+
"gpt-realtime-whisper"
|
|
32917
33250
|
])
|
|
32918
33251
|
).optional().describe(
|
|
32919
|
-
"The model to use for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`,
|
|
33252
|
+
"The model to use for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`, `gpt-4o-transcribe-diarize`, and `gpt-realtime-whisper`. Use `gpt-4o-transcribe-diarize` when you need diarization with speaker labels.\n"
|
|
32920
33253
|
),
|
|
32921
33254
|
language: import_zod6.z.string().optional().describe(
|
|
32922
33255
|
"The language of the input audio. Supplying the input language in\n[ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`) format\nwill improve accuracy and latency.\n"
|
|
32923
33256
|
),
|
|
32924
33257
|
prompt: import_zod6.z.string().optional().describe(
|
|
32925
|
-
'An optional text to guide the model\'s style or continue a previous audio\nsegment.\nFor `whisper-1`, the [prompt is a list of keywords](/docs/guides/speech-to-text#prompting).\nFor `gpt-4o-transcribe` models (excluding `gpt-4o-transcribe-diarize`), the prompt is a free text string, for example "expect words related to technology".\n'
|
|
33258
|
+
'An optional text to guide the model\'s style or continue a previous audio\nsegment.\nFor `whisper-1`, the [prompt is a list of keywords](/docs/guides/speech-to-text#prompting).\nFor `gpt-4o-transcribe` models (excluding `gpt-4o-transcribe-diarize`), the prompt is a free text string, for example "expect words related to technology".\nPrompt is not supported with `gpt-realtime-whisper` in GA Realtime sessions.\n'
|
|
33259
|
+
),
|
|
33260
|
+
delay: import_zod6.z.enum(["minimal", "low", "medium", "high", "xhigh"]).optional().describe(
|
|
33261
|
+
"Controls how long the model waits before emitting transcription text.\nHigher values can improve transcription accuracy at the cost of latency.\nOnly supported with `gpt-realtime-whisper` in GA Realtime sessions.\n"
|
|
32926
33262
|
)
|
|
32927
33263
|
}).optional(),
|
|
32928
33264
|
include: import_zod6.z.array(import_zod6.z.enum(["item.input_audio_transcription.logprobs"])).optional().describe(
|
|
@@ -32951,17 +33287,14 @@ var createRealtimeTranscriptionSessionResponse = import_zod6.z.object({
|
|
|
32951
33287
|
"gpt-4o-mini-transcribe",
|
|
32952
33288
|
"gpt-4o-mini-transcribe-2025-12-15",
|
|
32953
33289
|
"gpt-4o-transcribe",
|
|
32954
|
-
"gpt-4o-transcribe-diarize"
|
|
33290
|
+
"gpt-4o-transcribe-diarize",
|
|
33291
|
+
"gpt-realtime-whisper"
|
|
32955
33292
|
])
|
|
32956
33293
|
).optional().describe(
|
|
32957
|
-
"The model
|
|
33294
|
+
"The model used for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`, `gpt-4o-transcribe-diarize`, and `gpt-realtime-whisper`.\n"
|
|
32958
33295
|
),
|
|
32959
|
-
language: import_zod6.z.string().optional().describe(
|
|
32960
|
-
|
|
32961
|
-
),
|
|
32962
|
-
prompt: import_zod6.z.string().optional().describe(
|
|
32963
|
-
'An optional text to guide the model\'s style or continue a previous audio\nsegment.\nFor `whisper-1`, the [prompt is a list of keywords](/docs/guides/speech-to-text#prompting).\nFor `gpt-4o-transcribe` models (excluding `gpt-4o-transcribe-diarize`), the prompt is a free text string, for example "expect words related to technology".\n'
|
|
32964
|
-
)
|
|
33296
|
+
language: import_zod6.z.string().optional().describe("The language of the input audio.\n"),
|
|
33297
|
+
prompt: import_zod6.z.string().optional().describe("The prompt configured for input audio transcription, when present.\n")
|
|
32965
33298
|
}).optional(),
|
|
32966
33299
|
turn_detection: import_zod6.z.object({
|
|
32967
33300
|
type: import_zod6.z.string().optional().describe("Type of turn detection, only `server_vad` is currently supported.\n"),
|
|
@@ -36346,6 +36679,7 @@ __export(sonioxPublicAPI_zod_exports, {
|
|
|
36346
36679
|
createTranscriptionBodyWebhookUrlRegExpOne: () => createTranscriptionBodyWebhookUrlRegExpOne,
|
|
36347
36680
|
deleteFileParams: () => deleteFileParams,
|
|
36348
36681
|
deleteTranscriptionParams: () => deleteTranscriptionParams,
|
|
36682
|
+
getConcurrencyLimitsResponse: () => getConcurrencyLimitsResponse,
|
|
36349
36683
|
getFileParams: () => getFileParams,
|
|
36350
36684
|
getFileResponse: () => getFileResponse,
|
|
36351
36685
|
getFilesCountResponse: () => getFilesCountResponse,
|
|
@@ -36363,6 +36697,12 @@ __export(sonioxPublicAPI_zod_exports, {
|
|
|
36363
36697
|
getTranscriptionsQueryLimitMax: () => getTranscriptionsQueryLimitMax,
|
|
36364
36698
|
getTranscriptionsQueryParams: () => getTranscriptionsQueryParams,
|
|
36365
36699
|
getTranscriptionsResponse: () => getTranscriptionsResponse,
|
|
36700
|
+
getTtsModelsResponse: () => getTtsModelsResponse,
|
|
36701
|
+
getUsageLogsQueryLimitDefault: () => getUsageLogsQueryLimitDefault,
|
|
36702
|
+
getUsageLogsQueryLimitMax: () => getUsageLogsQueryLimitMax,
|
|
36703
|
+
getUsageLogsQueryParams: () => getUsageLogsQueryParams,
|
|
36704
|
+
getUsageLogsQuerySortDefault: () => getUsageLogsQuerySortDefault,
|
|
36705
|
+
getUsageLogsResponse: () => getUsageLogsResponse,
|
|
36366
36706
|
uploadFileBody: () => uploadFileBody,
|
|
36367
36707
|
uploadFileBodyClientReferenceIdMaxOne: () => uploadFileBodyClientReferenceIdMaxOne
|
|
36368
36708
|
});
|
|
@@ -36613,11 +36953,73 @@ var getModelsResponse = import_zod10.z.object({
|
|
|
36613
36953
|
})
|
|
36614
36954
|
).describe("List of available models and their attributes.")
|
|
36615
36955
|
});
|
|
36956
|
+
var getTtsModelsResponse = import_zod10.z.object({
|
|
36957
|
+
models: import_zod10.z.array(
|
|
36958
|
+
import_zod10.z.object({
|
|
36959
|
+
id: import_zod10.z.string().describe("Unique identifier of the model."),
|
|
36960
|
+
aliased_model_id: import_zod10.z.string().or(import_zod10.z.null()).describe("If this is an alias, the id of the aliased model."),
|
|
36961
|
+
name: import_zod10.z.string().describe("Name of the model."),
|
|
36962
|
+
voices: import_zod10.z.array(
|
|
36963
|
+
import_zod10.z.object({
|
|
36964
|
+
id: import_zod10.z.string().describe("Unique identifier of the voice."),
|
|
36965
|
+
description: import_zod10.z.string().describe("Description of the TTS voice."),
|
|
36966
|
+
gender: import_zod10.z.enum(["male", "female", "neutral"])
|
|
36967
|
+
})
|
|
36968
|
+
).describe("List of available voices for this model."),
|
|
36969
|
+
languages: import_zod10.z.array(
|
|
36970
|
+
import_zod10.z.object({
|
|
36971
|
+
code: import_zod10.z.string().describe("2-letter language code."),
|
|
36972
|
+
name: import_zod10.z.string().describe("Language name.")
|
|
36973
|
+
})
|
|
36974
|
+
).describe("List of languages supported by the model.")
|
|
36975
|
+
})
|
|
36976
|
+
).describe("List of available TTS models and their attributes.")
|
|
36977
|
+
});
|
|
36978
|
+
var getUsageLogsQueryLimitDefault = 1e3;
|
|
36979
|
+
var getUsageLogsQueryLimitMax = 1e3;
|
|
36980
|
+
var getUsageLogsQuerySortDefault = "end_time_asc";
|
|
36981
|
+
var getUsageLogsQueryParams = import_zod10.z.object({
|
|
36982
|
+
start_time: import_zod10.z.string().describe("Start of the time window (inclusive). Filters by request end time."),
|
|
36983
|
+
end_time: import_zod10.z.string().describe("End of the time window (exclusive). Filters by request end time."),
|
|
36984
|
+
limit: import_zod10.z.number().min(1).max(getUsageLogsQueryLimitMax).default(getUsageLogsQueryLimitDefault).describe("Maximum number of usage log entries to return."),
|
|
36985
|
+
sort: import_zod10.z.enum(["end_time_asc", "end_time_desc"]).default(getUsageLogsQuerySortDefault).describe(
|
|
36986
|
+
"Sort order by end_time.Use `end_time_desc` to get the most recent entries first. When paginating, pass the same `sort` value alongside the cursor."
|
|
36987
|
+
),
|
|
36988
|
+
cursor: import_zod10.z.string().or(import_zod10.z.null()).optional().describe("Pagination cursor for the next page of results.")
|
|
36989
|
+
});
|
|
36990
|
+
var getUsageLogsResponse = import_zod10.z.object({
|
|
36991
|
+
usage_logs: import_zod10.z.array(
|
|
36992
|
+
import_zod10.z.object({
|
|
36993
|
+
uuid: import_zod10.z.string().uuid().describe("Unique identifier of the request."),
|
|
36994
|
+
request_scope: import_zod10.z.string().describe("Scope of the request (api / playground)."),
|
|
36995
|
+
client_reference_id: import_zod10.z.string().describe("Client reference ID supplied on the original request. Empty string if none."),
|
|
36996
|
+
model: import_zod10.z.string().describe("Model identifier."),
|
|
36997
|
+
start_time: import_zod10.z.string().datetime({}).describe("When the request started."),
|
|
36998
|
+
end_time: import_zod10.z.string().datetime({}).describe("When the request ended."),
|
|
36999
|
+
input_text_tokens: import_zod10.z.number(),
|
|
37000
|
+
input_audio_tokens: import_zod10.z.number(),
|
|
37001
|
+
input_audio_duration_ms: import_zod10.z.number(),
|
|
37002
|
+
output_text_tokens: import_zod10.z.number(),
|
|
37003
|
+
output_audio_tokens: import_zod10.z.number(),
|
|
37004
|
+
output_audio_duration_ms: import_zod10.z.number(),
|
|
37005
|
+
cost_usd: import_zod10.z.string(),
|
|
37006
|
+
input_cost_usd: import_zod10.z.string(),
|
|
37007
|
+
input_text_cost_usd: import_zod10.z.string(),
|
|
37008
|
+
input_audio_cost_usd: import_zod10.z.string(),
|
|
37009
|
+
output_cost_usd: import_zod10.z.string(),
|
|
37010
|
+
output_text_cost_usd: import_zod10.z.string(),
|
|
37011
|
+
output_audio_cost_usd: import_zod10.z.string()
|
|
37012
|
+
})
|
|
37013
|
+
).describe("Per-request usage log entries ordered by end_time, uuid (per `sort`)."),
|
|
37014
|
+
next_page_cursor: import_zod10.z.string().or(import_zod10.z.null()).optional().describe(
|
|
37015
|
+
"A pagination token that references the next page of results. When more data is available, this field contains a value to pass in the cursor parameter of a subsequent request. When null, no additional results are available."
|
|
37016
|
+
)
|
|
37017
|
+
});
|
|
36616
37018
|
var createTemporaryApiKeyBodyExpiresInSecondsMax = 3600;
|
|
36617
37019
|
var createTemporaryApiKeyBodyClientReferenceIdMaxOne = 256;
|
|
36618
37020
|
var createTemporaryApiKeyBodyMaxSessionDurationSecondsMaxOne = 18e3;
|
|
36619
37021
|
var createTemporaryApiKeyBody = import_zod10.z.object({
|
|
36620
|
-
usage_type: import_zod10.z.enum(["transcribe_websocket"]),
|
|
37022
|
+
usage_type: import_zod10.z.enum(["transcribe_websocket", "tts_rt"]),
|
|
36621
37023
|
expires_in_seconds: import_zod10.z.number().min(1).max(createTemporaryApiKeyBodyExpiresInSecondsMax).describe("Duration in seconds until the temporary API key expires."),
|
|
36622
37024
|
client_reference_id: import_zod10.z.string().max(createTemporaryApiKeyBodyClientReferenceIdMaxOne).or(import_zod10.z.null()).optional().describe("Optional tracking identifier string. Does not need to be unique."),
|
|
36623
37025
|
single_use: import_zod10.z.boolean().or(import_zod10.z.null()).optional().describe("If true, the temporary API key can be used only once."),
|
|
@@ -36625,6 +37027,28 @@ var createTemporaryApiKeyBody = import_zod10.z.object({
|
|
|
36625
37027
|
"Maximum WebSocket connection duration in seconds. If exceeded, the connection will be dropped. If not set, no limit is applied."
|
|
36626
37028
|
)
|
|
36627
37029
|
});
|
|
37030
|
+
var getConcurrencyLimitsResponse = import_zod10.z.object({
|
|
37031
|
+
project: import_zod10.z.object({
|
|
37032
|
+
current: import_zod10.z.object({
|
|
37033
|
+
transcribe_concurrent: import_zod10.z.number(),
|
|
37034
|
+
tts_concurrent: import_zod10.z.number()
|
|
37035
|
+
}).describe("Live counts read from Redis"),
|
|
37036
|
+
limits: import_zod10.z.object({
|
|
37037
|
+
transcribe_concurrent: import_zod10.z.number().or(import_zod10.z.null()),
|
|
37038
|
+
tts_concurrent: import_zod10.z.number().or(import_zod10.z.null())
|
|
37039
|
+
}).describe("Configured limits")
|
|
37040
|
+
}),
|
|
37041
|
+
organization: import_zod10.z.object({
|
|
37042
|
+
current: import_zod10.z.object({
|
|
37043
|
+
transcribe_concurrent: import_zod10.z.number(),
|
|
37044
|
+
tts_concurrent: import_zod10.z.number()
|
|
37045
|
+
}).describe("Live counts read from Redis"),
|
|
37046
|
+
limits: import_zod10.z.object({
|
|
37047
|
+
transcribe_concurrent: import_zod10.z.number().or(import_zod10.z.null()),
|
|
37048
|
+
tts_concurrent: import_zod10.z.number().or(import_zod10.z.null())
|
|
37049
|
+
}).describe("Configured limits")
|
|
37050
|
+
})
|
|
37051
|
+
});
|
|
36628
37052
|
|
|
36629
37053
|
// src/generated/soniox/streaming-types.zod.ts
|
|
36630
37054
|
var streaming_types_zod_exports = {};
|
|
@@ -36709,10 +37133,10 @@ var sonioxStructuredContextSchema = import_zod11.z.object({
|
|
|
36709
37133
|
var sonioxContextSchema = import_zod11.z.union([sonioxStructuredContextSchema, import_zod11.z.string()]);
|
|
36710
37134
|
var sonioxRealtimeModelSchema = import_zod11.z.enum([
|
|
36711
37135
|
"stt-rt-v4",
|
|
36712
|
-
"stt-rt-v3",
|
|
36713
37136
|
"stt-rt-preview",
|
|
36714
37137
|
"stt-rt-v3-preview",
|
|
36715
|
-
"stt-rt-preview-v2"
|
|
37138
|
+
"stt-rt-preview-v2",
|
|
37139
|
+
"stt-rt-v3"
|
|
36716
37140
|
]);
|
|
36717
37141
|
var streamingTranscriberParams3 = import_zod11.z.object({
|
|
36718
37142
|
model: sonioxRealtimeModelSchema,
|
|
@@ -36720,12 +37144,16 @@ var streamingTranscriberParams3 = import_zod11.z.object({
|
|
|
36720
37144
|
sampleRate: import_zod11.z.number().optional(),
|
|
36721
37145
|
numChannels: import_zod11.z.number().optional(),
|
|
36722
37146
|
languageHints: import_zod11.z.array(import_zod11.z.string()).optional(),
|
|
37147
|
+
languageHintsStrict: import_zod11.z.boolean().optional(),
|
|
36723
37148
|
context: sonioxContextSchema.optional(),
|
|
36724
37149
|
enableSpeakerDiarization: import_zod11.z.boolean().optional(),
|
|
36725
37150
|
enableLanguageIdentification: import_zod11.z.boolean().optional(),
|
|
36726
37151
|
enableEndpointDetection: import_zod11.z.boolean().optional(),
|
|
37152
|
+
maxEndpointDelayMs: import_zod11.z.number().optional(),
|
|
36727
37153
|
translation: sonioxTranslationConfigSchema.optional(),
|
|
36728
|
-
clientReferenceId: import_zod11.z.string().optional()
|
|
37154
|
+
clientReferenceId: import_zod11.z.string().optional(),
|
|
37155
|
+
keepaliveIntervalMs: import_zod11.z.number().optional(),
|
|
37156
|
+
connectTimeoutMs: import_zod11.z.number().optional()
|
|
36729
37157
|
});
|
|
36730
37158
|
var sonioxTranslationStatusSchema = import_zod11.z.enum(["original", "translation", "none"]);
|
|
36731
37159
|
var sonioxTokenSchema = import_zod11.z.object({
|
|
@@ -37317,6 +37745,7 @@ __export(schema_exports5, {
|
|
|
37317
37745
|
V1ListenPostParametersCallbackMethod: () => V1ListenPostParametersCallbackMethod,
|
|
37318
37746
|
V1ListenPostParametersCustomIntentMode: () => V1ListenPostParametersCustomIntentMode,
|
|
37319
37747
|
V1ListenPostParametersCustomTopicMode: () => V1ListenPostParametersCustomTopicMode,
|
|
37748
|
+
V1ListenPostParametersDiarizeModel: () => V1ListenPostParametersDiarizeModel,
|
|
37320
37749
|
V1ListenPostParametersEncoding: () => V1ListenPostParametersEncoding,
|
|
37321
37750
|
V1ListenPostParametersModel0: () => V1ListenPostParametersModel0,
|
|
37322
37751
|
V1ListenPostParametersRedactSchemaOneOf1Items: () => V1ListenPostParametersRedactSchemaOneOf1Items,
|
|
@@ -37355,6 +37784,13 @@ __export(schema_exports5, {
|
|
|
37355
37784
|
V1SpeakPostParametersSampleRate4: () => V1SpeakPostParametersSampleRate4
|
|
37356
37785
|
});
|
|
37357
37786
|
|
|
37787
|
+
// src/generated/deepgram/schema/v1ListenPostParametersDiarizeModel.ts
|
|
37788
|
+
var V1ListenPostParametersDiarizeModel = {
|
|
37789
|
+
latest: "latest",
|
|
37790
|
+
v1: "v1",
|
|
37791
|
+
v2: "v2"
|
|
37792
|
+
};
|
|
37793
|
+
|
|
37358
37794
|
// src/generated/deepgram/schema/v1ListenPostParametersModel0.ts
|
|
37359
37795
|
var V1ListenPostParametersModel0 = {
|
|
37360
37796
|
"nova-3": "nova-3",
|
|
@@ -37571,6 +38007,7 @@ var V1SpeakPostParametersSampleRate = {
|
|
|
37571
38007
|
var schema_exports6 = {};
|
|
37572
38008
|
__export(schema_exports6, {
|
|
37573
38009
|
AudioResponseFormat: () => AudioResponseFormat,
|
|
38010
|
+
AudioTranscriptionDelay: () => AudioTranscriptionDelay,
|
|
37574
38011
|
CreateSpeechRequestResponseFormat: () => CreateSpeechRequestResponseFormat,
|
|
37575
38012
|
CreateSpeechRequestStreamFormat: () => CreateSpeechRequestStreamFormat,
|
|
37576
38013
|
CreateTranscriptionRequestTimestampGranularitiesItem: () => CreateTranscriptionRequestTimestampGranularitiesItem,
|
|
@@ -37590,12 +38027,14 @@ __export(schema_exports6, {
|
|
|
37590
38027
|
RealtimeAudioFormatsAnyOfType: () => RealtimeAudioFormatsAnyOfType,
|
|
37591
38028
|
RealtimeCreateClientSecretRequestExpiresAfterAnchor: () => RealtimeCreateClientSecretRequestExpiresAfterAnchor,
|
|
37592
38029
|
RealtimeFunctionToolType: () => RealtimeFunctionToolType,
|
|
38030
|
+
RealtimeReasoningEffort: () => RealtimeReasoningEffort,
|
|
37593
38031
|
RealtimeSessionCreateRequestGAIncludeItem: () => RealtimeSessionCreateRequestGAIncludeItem,
|
|
37594
38032
|
RealtimeSessionCreateRequestGAOutputModalitiesItem: () => RealtimeSessionCreateRequestGAOutputModalitiesItem,
|
|
37595
38033
|
RealtimeSessionCreateRequestGAType: () => RealtimeSessionCreateRequestGAType,
|
|
37596
38034
|
RealtimeSessionCreateRequestModalitiesItem: () => RealtimeSessionCreateRequestModalitiesItem,
|
|
37597
38035
|
RealtimeSessionCreateRequestToolsItemType: () => RealtimeSessionCreateRequestToolsItemType,
|
|
37598
38036
|
RealtimeSessionCreateResponseGAIncludeItem: () => RealtimeSessionCreateResponseGAIncludeItem,
|
|
38037
|
+
RealtimeSessionCreateResponseGAObject: () => RealtimeSessionCreateResponseGAObject,
|
|
37599
38038
|
RealtimeSessionCreateResponseGAOutputModalitiesItem: () => RealtimeSessionCreateResponseGAOutputModalitiesItem,
|
|
37600
38039
|
RealtimeSessionCreateResponseGAType: () => RealtimeSessionCreateResponseGAType,
|
|
37601
38040
|
RealtimeSessionCreateResponseIncludeItem: () => RealtimeSessionCreateResponseIncludeItem,
|
|
@@ -37626,6 +38065,15 @@ __export(schema_exports6, {
|
|
|
37626
38065
|
VoiceResourceObject: () => VoiceResourceObject
|
|
37627
38066
|
});
|
|
37628
38067
|
|
|
38068
|
+
// src/generated/openai/schema/audioTranscriptionDelay.ts
|
|
38069
|
+
var AudioTranscriptionDelay = {
|
|
38070
|
+
minimal: "minimal",
|
|
38071
|
+
low: "low",
|
|
38072
|
+
medium: "medium",
|
|
38073
|
+
high: "high",
|
|
38074
|
+
xhigh: "xhigh"
|
|
38075
|
+
};
|
|
38076
|
+
|
|
37629
38077
|
// src/generated/openai/schema/createSpeechRequestResponseFormat.ts
|
|
37630
38078
|
var CreateSpeechRequestResponseFormat = {
|
|
37631
38079
|
mp3: "mp3",
|
|
@@ -37738,6 +38186,15 @@ var RealtimeFunctionToolType = {
|
|
|
37738
38186
|
function: "function"
|
|
37739
38187
|
};
|
|
37740
38188
|
|
|
38189
|
+
// src/generated/openai/schema/realtimeReasoningEffort.ts
|
|
38190
|
+
var RealtimeReasoningEffort = {
|
|
38191
|
+
minimal: "minimal",
|
|
38192
|
+
low: "low",
|
|
38193
|
+
medium: "medium",
|
|
38194
|
+
high: "high",
|
|
38195
|
+
xhigh: "xhigh"
|
|
38196
|
+
};
|
|
38197
|
+
|
|
37741
38198
|
// src/generated/openai/schema/realtimeSessionCreateRequestGAIncludeItem.ts
|
|
37742
38199
|
var RealtimeSessionCreateRequestGAIncludeItem = {
|
|
37743
38200
|
iteminput_audio_transcriptionlogprobs: "item.input_audio_transcription.logprobs"
|
|
@@ -37770,6 +38227,11 @@ var RealtimeSessionCreateResponseGAIncludeItem = {
|
|
|
37770
38227
|
iteminput_audio_transcriptionlogprobs: "item.input_audio_transcription.logprobs"
|
|
37771
38228
|
};
|
|
37772
38229
|
|
|
38230
|
+
// src/generated/openai/schema/realtimeSessionCreateResponseGAObject.ts
|
|
38231
|
+
var RealtimeSessionCreateResponseGAObject = {
|
|
38232
|
+
realtimesession: "realtime.session"
|
|
38233
|
+
};
|
|
38234
|
+
|
|
37773
38235
|
// src/generated/openai/schema/realtimeSessionCreateResponseGAOutputModalitiesItem.ts
|
|
37774
38236
|
var RealtimeSessionCreateResponseGAOutputModalitiesItem = {
|
|
37775
38237
|
text: "text",
|
|
@@ -37914,6 +38376,7 @@ __export(schema_exports7, {
|
|
|
37914
38376
|
AutoChaptersResultErrorType: () => AutoChaptersResultErrorType,
|
|
37915
38377
|
ErrorResponseError: () => ErrorResponseError,
|
|
37916
38378
|
GetJobsJobidAlignmentTags: () => GetJobsJobidAlignmentTags,
|
|
38379
|
+
GetJobsJobidObjectUrlsUrlForItem: () => GetJobsJobidObjectUrlsUrlForItem,
|
|
37917
38380
|
GetJobsJobidTranscriptFormat: () => GetJobsJobidTranscriptFormat,
|
|
37918
38381
|
JobDetailsStatus: () => JobDetailsStatus,
|
|
37919
38382
|
JobMode: () => JobMode,
|
|
@@ -37983,6 +38446,13 @@ var GetJobsJobidAlignmentTags = {
|
|
|
37983
38446
|
one_per_line: "one_per_line"
|
|
37984
38447
|
};
|
|
37985
38448
|
|
|
38449
|
+
// src/generated/speechmatics/schema/getJobsJobidObjectUrlsUrlForItem.ts
|
|
38450
|
+
var GetJobsJobidObjectUrlsUrlForItem = {
|
|
38451
|
+
data: "data",
|
|
38452
|
+
audio_mp3: "audio_mp3",
|
|
38453
|
+
transcript: "transcript"
|
|
38454
|
+
};
|
|
38455
|
+
|
|
37986
38456
|
// src/generated/speechmatics/schema/getJobsJobidTranscriptFormat.ts
|
|
37987
38457
|
var GetJobsJobidTranscriptFormat = {
|
|
37988
38458
|
"json-v2": "json-v2",
|
|
@@ -38099,6 +38569,15 @@ var WrittenFormRecognitionResultType = {
|
|
|
38099
38569
|
word: "word"
|
|
38100
38570
|
};
|
|
38101
38571
|
|
|
38572
|
+
// src/generated/soniox/sdk-types.ts
|
|
38573
|
+
var sdk_types_exports = {};
|
|
38574
|
+
__export(sdk_types_exports, {
|
|
38575
|
+
RealtimeSttSession: () => import_node.RealtimeSttSession,
|
|
38576
|
+
SonioxFetchHttpClient: () => import_node.FetchHttpClient,
|
|
38577
|
+
SonioxNodeClient: () => import_node.SonioxNodeClient
|
|
38578
|
+
});
|
|
38579
|
+
var import_node = require("@soniox/node");
|
|
38580
|
+
|
|
38102
38581
|
// src/generated/elevenlabs/schema/index.ts
|
|
38103
38582
|
var schema_exports8 = {};
|
|
38104
38583
|
__export(schema_exports8, {
|
|
@@ -38176,6 +38655,10 @@ __export(speechmaticsASRRESTAPI_zod_exports, {
|
|
|
38176
38655
|
deleteJobsJobidParams: () => deleteJobsJobidParams,
|
|
38177
38656
|
deleteJobsJobidQueryParams: () => deleteJobsJobidQueryParams,
|
|
38178
38657
|
deleteJobsJobidResponse: () => deleteJobsJobidResponse,
|
|
38658
|
+
deleteJobsJobidResponseJobConfigSummarizationConfigContentTypeDefault: () => deleteJobsJobidResponseJobConfigSummarizationConfigContentTypeDefault,
|
|
38659
|
+
deleteJobsJobidResponseJobConfigSummarizationConfigSummaryLengthDefault: () => deleteJobsJobidResponseJobConfigSummarizationConfigSummaryLengthDefault,
|
|
38660
|
+
deleteJobsJobidResponseJobConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMax: () => deleteJobsJobidResponseJobConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMax,
|
|
38661
|
+
deleteJobsJobidResponseJobConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMin: () => deleteJobsJobidResponseJobConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMin,
|
|
38179
38662
|
deleteJobsJobidResponseJobConfigTranscriptionConfigChannelDiarizationLabelsItemRegExp: () => deleteJobsJobidResponseJobConfigTranscriptionConfigChannelDiarizationLabelsItemRegExp,
|
|
38180
38663
|
deleteJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesPermittedMarksItemRegExp: () => deleteJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesPermittedMarksItemRegExp,
|
|
38181
38664
|
deleteJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesSensitivityMax: () => deleteJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesSensitivityMax,
|
|
@@ -38191,8 +38674,15 @@ __export(speechmaticsASRRESTAPI_zod_exports, {
|
|
|
38191
38674
|
getJobsJobidDataResponse: () => getJobsJobidDataResponse,
|
|
38192
38675
|
getJobsJobidLogParams: () => getJobsJobidLogParams,
|
|
38193
38676
|
getJobsJobidLogResponse: () => getJobsJobidLogResponse,
|
|
38677
|
+
getJobsJobidObjectUrlsParams: () => getJobsJobidObjectUrlsParams,
|
|
38678
|
+
getJobsJobidObjectUrlsQueryParams: () => getJobsJobidObjectUrlsQueryParams,
|
|
38679
|
+
getJobsJobidObjectUrlsResponse: () => getJobsJobidObjectUrlsResponse,
|
|
38194
38680
|
getJobsJobidParams: () => getJobsJobidParams,
|
|
38195
38681
|
getJobsJobidResponse: () => getJobsJobidResponse,
|
|
38682
|
+
getJobsJobidResponseJobConfigSummarizationConfigContentTypeDefault: () => getJobsJobidResponseJobConfigSummarizationConfigContentTypeDefault,
|
|
38683
|
+
getJobsJobidResponseJobConfigSummarizationConfigSummaryLengthDefault: () => getJobsJobidResponseJobConfigSummarizationConfigSummaryLengthDefault,
|
|
38684
|
+
getJobsJobidResponseJobConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMax: () => getJobsJobidResponseJobConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMax,
|
|
38685
|
+
getJobsJobidResponseJobConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMin: () => getJobsJobidResponseJobConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMin,
|
|
38196
38686
|
getJobsJobidResponseJobConfigTranscriptionConfigChannelDiarizationLabelsItemRegExp: () => getJobsJobidResponseJobConfigTranscriptionConfigChannelDiarizationLabelsItemRegExp,
|
|
38197
38687
|
getJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesPermittedMarksItemRegExp: () => getJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesPermittedMarksItemRegExp,
|
|
38198
38688
|
getJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesSensitivityMax: () => getJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesSensitivityMax,
|
|
@@ -38207,6 +38697,8 @@ __export(speechmaticsASRRESTAPI_zod_exports, {
|
|
|
38207
38697
|
getJobsJobidTranscriptQueryParams: () => getJobsJobidTranscriptQueryParams,
|
|
38208
38698
|
getJobsJobidTranscriptResponse: () => getJobsJobidTranscriptResponse,
|
|
38209
38699
|
getJobsJobidTranscriptResponseJobDurationMin: () => getJobsJobidTranscriptResponseJobDurationMin,
|
|
38700
|
+
getJobsJobidTranscriptResponseMetadataTranscriptionConfigAudioFilteringConfigVolumeThresholdMax: () => getJobsJobidTranscriptResponseMetadataTranscriptionConfigAudioFilteringConfigVolumeThresholdMax,
|
|
38701
|
+
getJobsJobidTranscriptResponseMetadataTranscriptionConfigAudioFilteringConfigVolumeThresholdMin: () => getJobsJobidTranscriptResponseMetadataTranscriptionConfigAudioFilteringConfigVolumeThresholdMin,
|
|
38210
38702
|
getJobsJobidTranscriptResponseMetadataTranscriptionConfigChannelDiarizationLabelsItemRegExp: () => getJobsJobidTranscriptResponseMetadataTranscriptionConfigChannelDiarizationLabelsItemRegExp,
|
|
38211
38703
|
getJobsJobidTranscriptResponseMetadataTranscriptionConfigPunctuationOverridesPermittedMarksItemRegExp: () => getJobsJobidTranscriptResponseMetadataTranscriptionConfigPunctuationOverridesPermittedMarksItemRegExp,
|
|
38212
38704
|
getJobsJobidTranscriptResponseMetadataTranscriptionConfigPunctuationOverridesSensitivityMax: () => getJobsJobidTranscriptResponseMetadataTranscriptionConfigPunctuationOverridesSensitivityMax,
|
|
@@ -38218,6 +38710,10 @@ __export(speechmaticsASRRESTAPI_zod_exports, {
|
|
|
38218
38710
|
getJobsQueryLimitMax: () => getJobsQueryLimitMax,
|
|
38219
38711
|
getJobsQueryParams: () => getJobsQueryParams,
|
|
38220
38712
|
getJobsResponse: () => getJobsResponse,
|
|
38713
|
+
getJobsResponseJobsItemConfigSummarizationConfigContentTypeDefault: () => getJobsResponseJobsItemConfigSummarizationConfigContentTypeDefault,
|
|
38714
|
+
getJobsResponseJobsItemConfigSummarizationConfigSummaryLengthDefault: () => getJobsResponseJobsItemConfigSummarizationConfigSummaryLengthDefault,
|
|
38715
|
+
getJobsResponseJobsItemConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMax: () => getJobsResponseJobsItemConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMax,
|
|
38716
|
+
getJobsResponseJobsItemConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMin: () => getJobsResponseJobsItemConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMin,
|
|
38221
38717
|
getJobsResponseJobsItemConfigTranscriptionConfigChannelDiarizationLabelsItemRegExp: () => getJobsResponseJobsItemConfigTranscriptionConfigChannelDiarizationLabelsItemRegExp,
|
|
38222
38718
|
getJobsResponseJobsItemConfigTranscriptionConfigPunctuationOverridesPermittedMarksItemRegExp: () => getJobsResponseJobsItemConfigTranscriptionConfigPunctuationOverridesPermittedMarksItemRegExp,
|
|
38223
38719
|
getJobsResponseJobsItemConfigTranscriptionConfigPunctuationOverridesSensitivityMax: () => getJobsResponseJobsItemConfigTranscriptionConfigPunctuationOverridesSensitivityMax,
|
|
@@ -38228,12 +38724,18 @@ __export(speechmaticsASRRESTAPI_zod_exports, {
|
|
|
38228
38724
|
getJobsResponseJobsItemDurationMin: () => getJobsResponseJobsItemDurationMin,
|
|
38229
38725
|
getUsageQueryParams: () => getUsageQueryParams,
|
|
38230
38726
|
getUsageResponse: () => getUsageResponse,
|
|
38231
|
-
postJobsBody: () => postJobsBody
|
|
38727
|
+
postJobsBody: () => postJobsBody,
|
|
38728
|
+
postJobsHeader: () => postJobsHeader
|
|
38232
38729
|
});
|
|
38233
38730
|
var import_zod12 = require("zod");
|
|
38731
|
+
var postJobsHeader = import_zod12.z.object({
|
|
38732
|
+
"X-SM-Processing-Data": import_zod12.z.string().optional().describe(
|
|
38733
|
+
'**Note**: Only available for on-prem\nJSON dictionary of processing settings for the job worker. Currently supports `parallel_engines` (integer), which controls the number of engines the worker can use in parallel for this job, and `user_id` (string), which is the user id for this job. Example: `{"parallel_engines": 4}`'
|
|
38734
|
+
)
|
|
38735
|
+
});
|
|
38234
38736
|
var postJobsBody = import_zod12.z.object({
|
|
38235
38737
|
config: import_zod12.z.string().describe(
|
|
38236
|
-
"JSON containing a `JobConfig` model indicating the type and parameters for the recognition job."
|
|
38738
|
+
"JSON containing a [`JobConfig`](/speech-to-text/batch/input#jobconfig-schema) model indicating the type and parameters for the recognition job."
|
|
38237
38739
|
),
|
|
38238
38740
|
data_file: import_zod12.z.instanceof(File).optional().describe(
|
|
38239
38741
|
"The data file to be processed. Alternatively the data file can be fetched from a url specified in `JobConfig`."
|
|
@@ -38255,9 +38757,13 @@ var getJobsResponseJobsItemConfigTranscriptionConfigPunctuationOverridesSensitiv
|
|
|
38255
38757
|
var getJobsResponseJobsItemConfigTranscriptionConfigPunctuationOverridesSensitivityMax = 1;
|
|
38256
38758
|
var getJobsResponseJobsItemConfigTranscriptionConfigPunctuationOverridesPermittedMarksItemRegExp = /^(.|all)$/;
|
|
38257
38759
|
var getJobsResponseJobsItemConfigTranscriptionConfigChannelDiarizationLabelsItemRegExp = /^[A-Za-z0-9._]+$/;
|
|
38760
|
+
var getJobsResponseJobsItemConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMin = 0;
|
|
38761
|
+
var getJobsResponseJobsItemConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMax = 100;
|
|
38258
38762
|
var getJobsResponseJobsItemConfigTranscriptionConfigSpeakerDiarizationConfigSpeakerSensitivityMin = 0;
|
|
38259
38763
|
var getJobsResponseJobsItemConfigTranscriptionConfigSpeakerDiarizationConfigSpeakerSensitivityMax = 1;
|
|
38260
38764
|
var getJobsResponseJobsItemConfigTranslationConfigTargetLanguagesMax = 5;
|
|
38765
|
+
var getJobsResponseJobsItemConfigSummarizationConfigContentTypeDefault = "auto";
|
|
38766
|
+
var getJobsResponseJobsItemConfigSummarizationConfigSummaryLengthDefault = "brief";
|
|
38261
38767
|
var getJobsResponse = import_zod12.z.object({
|
|
38262
38768
|
jobs: import_zod12.z.array(
|
|
38263
38769
|
import_zod12.z.object({
|
|
@@ -38337,19 +38843,30 @@ var getJobsResponse = import_zod12.z.object({
|
|
|
38337
38843
|
max_delay_mode: import_zod12.z.enum(["fixed", "flexible"]).optional().describe(
|
|
38338
38844
|
"Whether or not to enable flexible endpointing and allow the entity to continue to be spoken."
|
|
38339
38845
|
),
|
|
38846
|
+
audio_filtering_config: import_zod12.z.object({
|
|
38847
|
+
volume_threshold: import_zod12.z.number().min(
|
|
38848
|
+
getJobsResponseJobsItemConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMin
|
|
38849
|
+
).max(
|
|
38850
|
+
getJobsResponseJobsItemConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMax
|
|
38851
|
+
).optional().describe(
|
|
38852
|
+
"Controls the lower limit of audio volume at which speech and audio events will be transcribed. If the volume limit is very low, then most sound will be passed to the speech recognition engine. Higher numbers will cut out increasing amounts of sound."
|
|
38853
|
+
)
|
|
38854
|
+
}).optional().describe("Configuration for limiting the transcription of quiet audio."),
|
|
38340
38855
|
transcript_filtering_config: import_zod12.z.object({
|
|
38341
38856
|
remove_disfluencies: import_zod12.z.boolean().optional().describe(
|
|
38342
|
-
"If true, words
|
|
38857
|
+
"If true, words identified as disfluencies (e.g., 'um', 'uh') will be removed from the transcript. If false (default), they are tagged in the transcript as 'disfluency'."
|
|
38343
38858
|
),
|
|
38344
38859
|
replacements: import_zod12.z.array(
|
|
38345
38860
|
import_zod12.z.object({
|
|
38346
|
-
from: import_zod12.z.string(),
|
|
38347
|
-
to: import_zod12.z.string()
|
|
38861
|
+
from: import_zod12.z.string().describe("The text or pattern identified to be replaced."),
|
|
38862
|
+
to: import_zod12.z.string().describe(
|
|
38863
|
+
"The corrected or formatted string to appear in the transcript."
|
|
38864
|
+
)
|
|
38348
38865
|
})
|
|
38349
38866
|
).optional().describe(
|
|
38350
|
-
|
|
38867
|
+
'An array of objects defining custom replacements. Each replacement contains a pair of strings: the text to find ("from:") and the text to replace it with ("to:").'
|
|
38351
38868
|
)
|
|
38352
|
-
}).optional().describe("Configuration for applying filtering to the transcription"),
|
|
38869
|
+
}).optional().describe("Configuration for applying filtering to the transcription."),
|
|
38353
38870
|
speaker_diarization_config: import_zod12.z.object({
|
|
38354
38871
|
prefer_current_speaker: import_zod12.z.boolean().optional().describe(
|
|
38355
38872
|
'If true, the algorithm will prefer to stay with the current active speaker if it is a close enough match, even if other speakers may be closer. This is useful for cases where we can flip incorrectly between similar speakers during a single speaker section."'
|
|
@@ -38360,6 +38877,19 @@ var getJobsResponse = import_zod12.z.object({
|
|
|
38360
38877
|
getJobsResponseJobsItemConfigTranscriptionConfigSpeakerDiarizationConfigSpeakerSensitivityMax
|
|
38361
38878
|
).optional().describe(
|
|
38362
38879
|
"Controls how sensitive the algorithm is in terms of keeping similar speakers separate, as opposed to combining them into a single speaker. Higher values will typically lead to more speakers, as the degree of difference between speakers in order to allow them to remain distinct will be lower. A lower value for this parameter will conversely guide the algorithm towards being less sensitive in terms of retaining similar speakers, and as such may lead to fewer speakers overall. The default is 0.5."
|
|
38880
|
+
),
|
|
38881
|
+
get_speakers: import_zod12.z.boolean().optional().describe(
|
|
38882
|
+
"If true, speaker identifiers will be returned at the end of transcript."
|
|
38883
|
+
),
|
|
38884
|
+
speakers: import_zod12.z.array(
|
|
38885
|
+
import_zod12.z.object({
|
|
38886
|
+
label: import_zod12.z.string().min(1).describe(
|
|
38887
|
+
"Speaker label, which must not match the format used internally (e.g. S1, S2, etc)"
|
|
38888
|
+
),
|
|
38889
|
+
speaker_identifiers: import_zod12.z.array(import_zod12.z.string().describe("Speaker identifiers.")).min(1)
|
|
38890
|
+
})
|
|
38891
|
+
).optional().describe(
|
|
38892
|
+
"Use this option to provide speaker labels linked to their speaker identifiers. When passed, the transcription system will tag spoken words in the transcript with the provided speaker labels whenever any of the specified speakers is detected in the audio. A maximum of 50 speakers identifiers across all speakers can be provided."
|
|
38363
38893
|
)
|
|
38364
38894
|
}).optional().describe("Configuration for speaker diarization")
|
|
38365
38895
|
}).optional(),
|
|
@@ -38417,10 +38947,14 @@ var getJobsResponse = import_zod12.z.object({
|
|
|
38417
38947
|
default_language: import_zod12.z.string().optional()
|
|
38418
38948
|
}).optional(),
|
|
38419
38949
|
summarization_config: import_zod12.z.object({
|
|
38420
|
-
content_type: import_zod12.z.enum(["auto", "informative", "conversational"]).
|
|
38421
|
-
|
|
38950
|
+
content_type: import_zod12.z.enum(["auto", "informative", "conversational"]).default(getJobsResponseJobsItemConfigSummarizationConfigContentTypeDefault).describe(
|
|
38951
|
+
"Choose from three options:\n- `conversational` - Best suited for dialogues involving multiple participants, such as calls, meetings or discussions. It focuses on summarizing key points of the conversation.\n- `informative` - Recommended for more structured information delivered by one or more people, making it ideal for videos, podcasts, lectures, and presentations.\n- `auto` - Automatically selects the most appropriate content type based on an analysis of the transcript.\n"
|
|
38952
|
+
),
|
|
38953
|
+
summary_length: import_zod12.z.enum(["brief", "detailed"]).default(getJobsResponseJobsItemConfigSummarizationConfigSummaryLengthDefault).describe(
|
|
38954
|
+
"Determines the depth of the summary:\n- `brief` - Provides a succinct summary, condensing the content into just a few sentences.\n- `detailed` - Provide a longer, structured summary. For _conversational_ content, it includes key topics and a summary of the entire conversation. For _informative_ content, it logically divides the audio into sections and provides a summary for each."
|
|
38955
|
+
),
|
|
38422
38956
|
summary_type: import_zod12.z.enum(["paragraphs", "bullets"]).optional()
|
|
38423
|
-
}).optional(),
|
|
38957
|
+
}).optional().describe("Configuration options for summarization."),
|
|
38424
38958
|
sentiment_analysis_config: import_zod12.z.object({}).optional(),
|
|
38425
38959
|
topic_detection_config: import_zod12.z.object({
|
|
38426
38960
|
topics: import_zod12.z.array(import_zod12.z.string()).optional()
|
|
@@ -38442,7 +38976,7 @@ var getJobsResponse = import_zod12.z.object({
|
|
|
38442
38976
|
"Optional list of errors that have occurred in user interaction, for example: audio could not be fetched or notification could not be sent."
|
|
38443
38977
|
)
|
|
38444
38978
|
}).describe(
|
|
38445
|
-
"Document describing a job. JobConfig will be present in JobDetails returned for GET jobs
|
|
38979
|
+
"Document describing a job. JobConfig will be present in JobDetails returned for GET jobs/{id} request in SaaS and in Batch Appliance, but it will not be present in JobDetails returned as item in RetrieveJobsResponse in case of Batch Appliance."
|
|
38446
38980
|
)
|
|
38447
38981
|
)
|
|
38448
38982
|
});
|
|
@@ -38454,9 +38988,13 @@ var getJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesSensitiv
|
|
|
38454
38988
|
var getJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesSensitivityMax = 1;
|
|
38455
38989
|
var getJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesPermittedMarksItemRegExp = /^(.|all)$/;
|
|
38456
38990
|
var getJobsJobidResponseJobConfigTranscriptionConfigChannelDiarizationLabelsItemRegExp = /^[A-Za-z0-9._]+$/;
|
|
38991
|
+
var getJobsJobidResponseJobConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMin = 0;
|
|
38992
|
+
var getJobsJobidResponseJobConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMax = 100;
|
|
38457
38993
|
var getJobsJobidResponseJobConfigTranscriptionConfigSpeakerDiarizationConfigSpeakerSensitivityMin = 0;
|
|
38458
38994
|
var getJobsJobidResponseJobConfigTranscriptionConfigSpeakerDiarizationConfigSpeakerSensitivityMax = 1;
|
|
38459
38995
|
var getJobsJobidResponseJobConfigTranslationConfigTargetLanguagesMax = 5;
|
|
38996
|
+
var getJobsJobidResponseJobConfigSummarizationConfigContentTypeDefault = "auto";
|
|
38997
|
+
var getJobsJobidResponseJobConfigSummarizationConfigSummaryLengthDefault = "brief";
|
|
38460
38998
|
var getJobsJobidResponse = import_zod12.z.object({
|
|
38461
38999
|
job: import_zod12.z.object({
|
|
38462
39000
|
created_at: import_zod12.z.string().datetime({}).describe("The UTC date time the job was created."),
|
|
@@ -38533,19 +39071,30 @@ var getJobsJobidResponse = import_zod12.z.object({
|
|
|
38533
39071
|
max_delay_mode: import_zod12.z.enum(["fixed", "flexible"]).optional().describe(
|
|
38534
39072
|
"Whether or not to enable flexible endpointing and allow the entity to continue to be spoken."
|
|
38535
39073
|
),
|
|
39074
|
+
audio_filtering_config: import_zod12.z.object({
|
|
39075
|
+
volume_threshold: import_zod12.z.number().min(
|
|
39076
|
+
getJobsJobidResponseJobConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMin
|
|
39077
|
+
).max(
|
|
39078
|
+
getJobsJobidResponseJobConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMax
|
|
39079
|
+
).optional().describe(
|
|
39080
|
+
"Controls the lower limit of audio volume at which speech and audio events will be transcribed. If the volume limit is very low, then most sound will be passed to the speech recognition engine. Higher numbers will cut out increasing amounts of sound."
|
|
39081
|
+
)
|
|
39082
|
+
}).optional().describe("Configuration for limiting the transcription of quiet audio."),
|
|
38536
39083
|
transcript_filtering_config: import_zod12.z.object({
|
|
38537
39084
|
remove_disfluencies: import_zod12.z.boolean().optional().describe(
|
|
38538
|
-
"If true, words
|
|
39085
|
+
"If true, words identified as disfluencies (e.g., 'um', 'uh') will be removed from the transcript. If false (default), they are tagged in the transcript as 'disfluency'."
|
|
38539
39086
|
),
|
|
38540
39087
|
replacements: import_zod12.z.array(
|
|
38541
39088
|
import_zod12.z.object({
|
|
38542
|
-
from: import_zod12.z.string(),
|
|
38543
|
-
to: import_zod12.z.string()
|
|
39089
|
+
from: import_zod12.z.string().describe("The text or pattern identified to be replaced."),
|
|
39090
|
+
to: import_zod12.z.string().describe(
|
|
39091
|
+
"The corrected or formatted string to appear in the transcript."
|
|
39092
|
+
)
|
|
38544
39093
|
})
|
|
38545
39094
|
).optional().describe(
|
|
38546
|
-
|
|
39095
|
+
'An array of objects defining custom replacements. Each replacement contains a pair of strings: the text to find ("from:") and the text to replace it with ("to:").'
|
|
38547
39096
|
)
|
|
38548
|
-
}).optional().describe("Configuration for applying filtering to the transcription"),
|
|
39097
|
+
}).optional().describe("Configuration for applying filtering to the transcription."),
|
|
38549
39098
|
speaker_diarization_config: import_zod12.z.object({
|
|
38550
39099
|
prefer_current_speaker: import_zod12.z.boolean().optional().describe(
|
|
38551
39100
|
'If true, the algorithm will prefer to stay with the current active speaker if it is a close enough match, even if other speakers may be closer. This is useful for cases where we can flip incorrectly between similar speakers during a single speaker section."'
|
|
@@ -38556,6 +39105,19 @@ var getJobsJobidResponse = import_zod12.z.object({
|
|
|
38556
39105
|
getJobsJobidResponseJobConfigTranscriptionConfigSpeakerDiarizationConfigSpeakerSensitivityMax
|
|
38557
39106
|
).optional().describe(
|
|
38558
39107
|
"Controls how sensitive the algorithm is in terms of keeping similar speakers separate, as opposed to combining them into a single speaker. Higher values will typically lead to more speakers, as the degree of difference between speakers in order to allow them to remain distinct will be lower. A lower value for this parameter will conversely guide the algorithm towards being less sensitive in terms of retaining similar speakers, and as such may lead to fewer speakers overall. The default is 0.5."
|
|
39108
|
+
),
|
|
39109
|
+
get_speakers: import_zod12.z.boolean().optional().describe(
|
|
39110
|
+
"If true, speaker identifiers will be returned at the end of transcript."
|
|
39111
|
+
),
|
|
39112
|
+
speakers: import_zod12.z.array(
|
|
39113
|
+
import_zod12.z.object({
|
|
39114
|
+
label: import_zod12.z.string().min(1).describe(
|
|
39115
|
+
"Speaker label, which must not match the format used internally (e.g. S1, S2, etc)"
|
|
39116
|
+
),
|
|
39117
|
+
speaker_identifiers: import_zod12.z.array(import_zod12.z.string().describe("Speaker identifiers.")).min(1)
|
|
39118
|
+
})
|
|
39119
|
+
).optional().describe(
|
|
39120
|
+
"Use this option to provide speaker labels linked to their speaker identifiers. When passed, the transcription system will tag spoken words in the transcript with the provided speaker labels whenever any of the specified speakers is detected in the audio. A maximum of 50 speakers identifiers across all speakers can be provided."
|
|
38559
39121
|
)
|
|
38560
39122
|
}).optional().describe("Configuration for speaker diarization")
|
|
38561
39123
|
}).optional(),
|
|
@@ -38611,10 +39173,14 @@ var getJobsJobidResponse = import_zod12.z.object({
|
|
|
38611
39173
|
default_language: import_zod12.z.string().optional()
|
|
38612
39174
|
}).optional(),
|
|
38613
39175
|
summarization_config: import_zod12.z.object({
|
|
38614
|
-
content_type: import_zod12.z.enum(["auto", "informative", "conversational"]).
|
|
38615
|
-
|
|
39176
|
+
content_type: import_zod12.z.enum(["auto", "informative", "conversational"]).default(getJobsJobidResponseJobConfigSummarizationConfigContentTypeDefault).describe(
|
|
39177
|
+
"Choose from three options:\n- `conversational` - Best suited for dialogues involving multiple participants, such as calls, meetings or discussions. It focuses on summarizing key points of the conversation.\n- `informative` - Recommended for more structured information delivered by one or more people, making it ideal for videos, podcasts, lectures, and presentations.\n- `auto` - Automatically selects the most appropriate content type based on an analysis of the transcript.\n"
|
|
39178
|
+
),
|
|
39179
|
+
summary_length: import_zod12.z.enum(["brief", "detailed"]).default(getJobsJobidResponseJobConfigSummarizationConfigSummaryLengthDefault).describe(
|
|
39180
|
+
"Determines the depth of the summary:\n- `brief` - Provides a succinct summary, condensing the content into just a few sentences.\n- `detailed` - Provide a longer, structured summary. For _conversational_ content, it includes key topics and a summary of the entire conversation. For _informative_ content, it logically divides the audio into sections and provides a summary for each."
|
|
39181
|
+
),
|
|
38616
39182
|
summary_type: import_zod12.z.enum(["paragraphs", "bullets"]).optional()
|
|
38617
|
-
}).optional(),
|
|
39183
|
+
}).optional().describe("Configuration options for summarization."),
|
|
38618
39184
|
sentiment_analysis_config: import_zod12.z.object({}).optional(),
|
|
38619
39185
|
topic_detection_config: import_zod12.z.object({
|
|
38620
39186
|
topics: import_zod12.z.array(import_zod12.z.string()).optional()
|
|
@@ -38636,7 +39202,7 @@ var getJobsJobidResponse = import_zod12.z.object({
|
|
|
38636
39202
|
"Optional list of errors that have occurred in user interaction, for example: audio could not be fetched or notification could not be sent."
|
|
38637
39203
|
)
|
|
38638
39204
|
}).describe(
|
|
38639
|
-
"Document describing a job. JobConfig will be present in JobDetails returned for GET jobs
|
|
39205
|
+
"Document describing a job. JobConfig will be present in JobDetails returned for GET jobs/{id} request in SaaS and in Batch Appliance, but it will not be present in JobDetails returned as item in RetrieveJobsResponse in case of Batch Appliance."
|
|
38640
39206
|
)
|
|
38641
39207
|
});
|
|
38642
39208
|
var deleteJobsJobidParams = import_zod12.z.object({
|
|
@@ -38652,9 +39218,13 @@ var deleteJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesSensi
|
|
|
38652
39218
|
var deleteJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesSensitivityMax = 1;
|
|
38653
39219
|
var deleteJobsJobidResponseJobConfigTranscriptionConfigPunctuationOverridesPermittedMarksItemRegExp = /^(.|all)$/;
|
|
38654
39220
|
var deleteJobsJobidResponseJobConfigTranscriptionConfigChannelDiarizationLabelsItemRegExp = /^[A-Za-z0-9._]+$/;
|
|
39221
|
+
var deleteJobsJobidResponseJobConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMin = 0;
|
|
39222
|
+
var deleteJobsJobidResponseJobConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMax = 100;
|
|
38655
39223
|
var deleteJobsJobidResponseJobConfigTranscriptionConfigSpeakerDiarizationConfigSpeakerSensitivityMin = 0;
|
|
38656
39224
|
var deleteJobsJobidResponseJobConfigTranscriptionConfigSpeakerDiarizationConfigSpeakerSensitivityMax = 1;
|
|
38657
39225
|
var deleteJobsJobidResponseJobConfigTranslationConfigTargetLanguagesMax = 5;
|
|
39226
|
+
var deleteJobsJobidResponseJobConfigSummarizationConfigContentTypeDefault = "auto";
|
|
39227
|
+
var deleteJobsJobidResponseJobConfigSummarizationConfigSummaryLengthDefault = "brief";
|
|
38658
39228
|
var deleteJobsJobidResponse = import_zod12.z.object({
|
|
38659
39229
|
job: import_zod12.z.object({
|
|
38660
39230
|
created_at: import_zod12.z.string().datetime({}).describe("The UTC date time the job was created."),
|
|
@@ -38731,19 +39301,30 @@ var deleteJobsJobidResponse = import_zod12.z.object({
|
|
|
38731
39301
|
max_delay_mode: import_zod12.z.enum(["fixed", "flexible"]).optional().describe(
|
|
38732
39302
|
"Whether or not to enable flexible endpointing and allow the entity to continue to be spoken."
|
|
38733
39303
|
),
|
|
39304
|
+
audio_filtering_config: import_zod12.z.object({
|
|
39305
|
+
volume_threshold: import_zod12.z.number().min(
|
|
39306
|
+
deleteJobsJobidResponseJobConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMin
|
|
39307
|
+
).max(
|
|
39308
|
+
deleteJobsJobidResponseJobConfigTranscriptionConfigAudioFilteringConfigVolumeThresholdMax
|
|
39309
|
+
).optional().describe(
|
|
39310
|
+
"Controls the lower limit of audio volume at which speech and audio events will be transcribed. If the volume limit is very low, then most sound will be passed to the speech recognition engine. Higher numbers will cut out increasing amounts of sound."
|
|
39311
|
+
)
|
|
39312
|
+
}).optional().describe("Configuration for limiting the transcription of quiet audio."),
|
|
38734
39313
|
transcript_filtering_config: import_zod12.z.object({
|
|
38735
39314
|
remove_disfluencies: import_zod12.z.boolean().optional().describe(
|
|
38736
|
-
"If true, words
|
|
39315
|
+
"If true, words identified as disfluencies (e.g., 'um', 'uh') will be removed from the transcript. If false (default), they are tagged in the transcript as 'disfluency'."
|
|
38737
39316
|
),
|
|
38738
39317
|
replacements: import_zod12.z.array(
|
|
38739
39318
|
import_zod12.z.object({
|
|
38740
|
-
from: import_zod12.z.string(),
|
|
38741
|
-
to: import_zod12.z.string()
|
|
39319
|
+
from: import_zod12.z.string().describe("The text or pattern identified to be replaced."),
|
|
39320
|
+
to: import_zod12.z.string().describe(
|
|
39321
|
+
"The corrected or formatted string to appear in the transcript."
|
|
39322
|
+
)
|
|
38742
39323
|
})
|
|
38743
39324
|
).optional().describe(
|
|
38744
|
-
|
|
39325
|
+
'An array of objects defining custom replacements. Each replacement contains a pair of strings: the text to find ("from:") and the text to replace it with ("to:").'
|
|
38745
39326
|
)
|
|
38746
|
-
}).optional().describe("Configuration for applying filtering to the transcription"),
|
|
39327
|
+
}).optional().describe("Configuration for applying filtering to the transcription."),
|
|
38747
39328
|
speaker_diarization_config: import_zod12.z.object({
|
|
38748
39329
|
prefer_current_speaker: import_zod12.z.boolean().optional().describe(
|
|
38749
39330
|
'If true, the algorithm will prefer to stay with the current active speaker if it is a close enough match, even if other speakers may be closer. This is useful for cases where we can flip incorrectly between similar speakers during a single speaker section."'
|
|
@@ -38754,6 +39335,19 @@ var deleteJobsJobidResponse = import_zod12.z.object({
|
|
|
38754
39335
|
deleteJobsJobidResponseJobConfigTranscriptionConfigSpeakerDiarizationConfigSpeakerSensitivityMax
|
|
38755
39336
|
).optional().describe(
|
|
38756
39337
|
"Controls how sensitive the algorithm is in terms of keeping similar speakers separate, as opposed to combining them into a single speaker. Higher values will typically lead to more speakers, as the degree of difference between speakers in order to allow them to remain distinct will be lower. A lower value for this parameter will conversely guide the algorithm towards being less sensitive in terms of retaining similar speakers, and as such may lead to fewer speakers overall. The default is 0.5."
|
|
39338
|
+
),
|
|
39339
|
+
get_speakers: import_zod12.z.boolean().optional().describe(
|
|
39340
|
+
"If true, speaker identifiers will be returned at the end of transcript."
|
|
39341
|
+
),
|
|
39342
|
+
speakers: import_zod12.z.array(
|
|
39343
|
+
import_zod12.z.object({
|
|
39344
|
+
label: import_zod12.z.string().min(1).describe(
|
|
39345
|
+
"Speaker label, which must not match the format used internally (e.g. S1, S2, etc)"
|
|
39346
|
+
),
|
|
39347
|
+
speaker_identifiers: import_zod12.z.array(import_zod12.z.string().describe("Speaker identifiers.")).min(1)
|
|
39348
|
+
})
|
|
39349
|
+
).optional().describe(
|
|
39350
|
+
"Use this option to provide speaker labels linked to their speaker identifiers. When passed, the transcription system will tag spoken words in the transcript with the provided speaker labels whenever any of the specified speakers is detected in the audio. A maximum of 50 speakers identifiers across all speakers can be provided."
|
|
38757
39351
|
)
|
|
38758
39352
|
}).optional().describe("Configuration for speaker diarization")
|
|
38759
39353
|
}).optional(),
|
|
@@ -38809,10 +39403,14 @@ var deleteJobsJobidResponse = import_zod12.z.object({
|
|
|
38809
39403
|
default_language: import_zod12.z.string().optional()
|
|
38810
39404
|
}).optional(),
|
|
38811
39405
|
summarization_config: import_zod12.z.object({
|
|
38812
|
-
content_type: import_zod12.z.enum(["auto", "informative", "conversational"]).
|
|
38813
|
-
|
|
39406
|
+
content_type: import_zod12.z.enum(["auto", "informative", "conversational"]).default(deleteJobsJobidResponseJobConfigSummarizationConfigContentTypeDefault).describe(
|
|
39407
|
+
"Choose from three options:\n- `conversational` - Best suited for dialogues involving multiple participants, such as calls, meetings or discussions. It focuses on summarizing key points of the conversation.\n- `informative` - Recommended for more structured information delivered by one or more people, making it ideal for videos, podcasts, lectures, and presentations.\n- `auto` - Automatically selects the most appropriate content type based on an analysis of the transcript.\n"
|
|
39408
|
+
),
|
|
39409
|
+
summary_length: import_zod12.z.enum(["brief", "detailed"]).default(deleteJobsJobidResponseJobConfigSummarizationConfigSummaryLengthDefault).describe(
|
|
39410
|
+
"Determines the depth of the summary:\n- `brief` - Provides a succinct summary, condensing the content into just a few sentences.\n- `detailed` - Provide a longer, structured summary. For _conversational_ content, it includes key topics and a summary of the entire conversation. For _informative_ content, it logically divides the audio into sections and provides a summary for each."
|
|
39411
|
+
),
|
|
38814
39412
|
summary_type: import_zod12.z.enum(["paragraphs", "bullets"]).optional()
|
|
38815
|
-
}).optional(),
|
|
39413
|
+
}).optional().describe("Configuration options for summarization."),
|
|
38816
39414
|
sentiment_analysis_config: import_zod12.z.object({}).optional(),
|
|
38817
39415
|
topic_detection_config: import_zod12.z.object({
|
|
38818
39416
|
topics: import_zod12.z.array(import_zod12.z.string()).optional()
|
|
@@ -38834,7 +39432,7 @@ var deleteJobsJobidResponse = import_zod12.z.object({
|
|
|
38834
39432
|
"Optional list of errors that have occurred in user interaction, for example: audio could not be fetched or notification could not be sent."
|
|
38835
39433
|
)
|
|
38836
39434
|
}).describe(
|
|
38837
|
-
"Document describing a job. JobConfig will be present in JobDetails returned for GET jobs
|
|
39435
|
+
"Document describing a job. JobConfig will be present in JobDetails returned for GET jobs/{id} request in SaaS and in Batch Appliance, but it will not be present in JobDetails returned as item in RetrieveJobsResponse in case of Batch Appliance."
|
|
38838
39436
|
)
|
|
38839
39437
|
});
|
|
38840
39438
|
var getJobsJobidDataParams = import_zod12.z.object({
|
|
@@ -38856,6 +39454,8 @@ var getJobsJobidTranscriptResponseMetadataTranscriptionConfigPunctuationOverride
|
|
|
38856
39454
|
var getJobsJobidTranscriptResponseMetadataTranscriptionConfigPunctuationOverridesSensitivityMax = 1;
|
|
38857
39455
|
var getJobsJobidTranscriptResponseMetadataTranscriptionConfigPunctuationOverridesPermittedMarksItemRegExp = /^(.|all)$/;
|
|
38858
39456
|
var getJobsJobidTranscriptResponseMetadataTranscriptionConfigChannelDiarizationLabelsItemRegExp = /^[A-Za-z0-9._]+$/;
|
|
39457
|
+
var getJobsJobidTranscriptResponseMetadataTranscriptionConfigAudioFilteringConfigVolumeThresholdMin = 0;
|
|
39458
|
+
var getJobsJobidTranscriptResponseMetadataTranscriptionConfigAudioFilteringConfigVolumeThresholdMax = 100;
|
|
38859
39459
|
var getJobsJobidTranscriptResponseMetadataTranscriptionConfigSpeakerDiarizationConfigSpeakerSensitivityMin = 0;
|
|
38860
39460
|
var getJobsJobidTranscriptResponseMetadataTranscriptionConfigSpeakerDiarizationConfigSpeakerSensitivityMax = 1;
|
|
38861
39461
|
var getJobsJobidTranscriptResponseResultsItemVolumeMin = 0;
|
|
@@ -38927,19 +39527,28 @@ var getJobsJobidTranscriptResponse = import_zod12.z.object({
|
|
|
38927
39527
|
max_delay_mode: import_zod12.z.enum(["fixed", "flexible"]).optional().describe(
|
|
38928
39528
|
"Whether or not to enable flexible endpointing and allow the entity to continue to be spoken."
|
|
38929
39529
|
),
|
|
39530
|
+
audio_filtering_config: import_zod12.z.object({
|
|
39531
|
+
volume_threshold: import_zod12.z.number().min(
|
|
39532
|
+
getJobsJobidTranscriptResponseMetadataTranscriptionConfigAudioFilteringConfigVolumeThresholdMin
|
|
39533
|
+
).max(
|
|
39534
|
+
getJobsJobidTranscriptResponseMetadataTranscriptionConfigAudioFilteringConfigVolumeThresholdMax
|
|
39535
|
+
).optional().describe(
|
|
39536
|
+
"Controls the lower limit of audio volume at which speech and audio events will be transcribed. If the volume limit is very low, then most sound will be passed to the speech recognition engine. Higher numbers will cut out increasing amounts of sound."
|
|
39537
|
+
)
|
|
39538
|
+
}).optional().describe("Configuration for limiting the transcription of quiet audio."),
|
|
38930
39539
|
transcript_filtering_config: import_zod12.z.object({
|
|
38931
39540
|
remove_disfluencies: import_zod12.z.boolean().optional().describe(
|
|
38932
|
-
"If true, words
|
|
39541
|
+
"If true, words identified as disfluencies (e.g., 'um', 'uh') will be removed from the transcript. If false (default), they are tagged in the transcript as 'disfluency'."
|
|
38933
39542
|
),
|
|
38934
39543
|
replacements: import_zod12.z.array(
|
|
38935
39544
|
import_zod12.z.object({
|
|
38936
|
-
from: import_zod12.z.string(),
|
|
38937
|
-
to: import_zod12.z.string()
|
|
39545
|
+
from: import_zod12.z.string().describe("The text or pattern identified to be replaced."),
|
|
39546
|
+
to: import_zod12.z.string().describe("The corrected or formatted string to appear in the transcript.")
|
|
38938
39547
|
})
|
|
38939
39548
|
).optional().describe(
|
|
38940
|
-
|
|
39549
|
+
'An array of objects defining custom replacements. Each replacement contains a pair of strings: the text to find ("from:") and the text to replace it with ("to:").'
|
|
38941
39550
|
)
|
|
38942
|
-
}).optional().describe("Configuration for applying filtering to the transcription"),
|
|
39551
|
+
}).optional().describe("Configuration for applying filtering to the transcription."),
|
|
38943
39552
|
speaker_diarization_config: import_zod12.z.object({
|
|
38944
39553
|
prefer_current_speaker: import_zod12.z.boolean().optional().describe(
|
|
38945
39554
|
'If true, the algorithm will prefer to stay with the current active speaker if it is a close enough match, even if other speakers may be closer. This is useful for cases where we can flip incorrectly between similar speakers during a single speaker section."'
|
|
@@ -38950,9 +39559,23 @@ var getJobsJobidTranscriptResponse = import_zod12.z.object({
|
|
|
38950
39559
|
getJobsJobidTranscriptResponseMetadataTranscriptionConfigSpeakerDiarizationConfigSpeakerSensitivityMax
|
|
38951
39560
|
).optional().describe(
|
|
38952
39561
|
"Controls how sensitive the algorithm is in terms of keeping similar speakers separate, as opposed to combining them into a single speaker. Higher values will typically lead to more speakers, as the degree of difference between speakers in order to allow them to remain distinct will be lower. A lower value for this parameter will conversely guide the algorithm towards being less sensitive in terms of retaining similar speakers, and as such may lead to fewer speakers overall. The default is 0.5."
|
|
39562
|
+
),
|
|
39563
|
+
get_speakers: import_zod12.z.boolean().optional().describe(
|
|
39564
|
+
"If true, speaker identifiers will be returned at the end of transcript."
|
|
39565
|
+
),
|
|
39566
|
+
speakers: import_zod12.z.array(
|
|
39567
|
+
import_zod12.z.object({
|
|
39568
|
+
label: import_zod12.z.string().min(1).describe(
|
|
39569
|
+
"Speaker label, which must not match the format used internally (e.g. S1, S2, etc)"
|
|
39570
|
+
),
|
|
39571
|
+
speaker_identifiers: import_zod12.z.array(import_zod12.z.string().describe("Speaker identifiers.")).min(1)
|
|
39572
|
+
})
|
|
39573
|
+
).optional().describe(
|
|
39574
|
+
"Use this option to provide speaker labels linked to their speaker identifiers. When passed, the transcription system will tag spoken words in the transcript with the provided speaker labels whenever any of the specified speakers is detected in the audio. A maximum of 50 speakers identifiers across all speakers can be provided."
|
|
38953
39575
|
)
|
|
38954
39576
|
}).optional().describe("Configuration for speaker diarization")
|
|
38955
39577
|
}).optional(),
|
|
39578
|
+
orchestrator_version: import_zod12.z.string().optional().describe("The engine version used to generate transcription output."),
|
|
38956
39579
|
translation_errors: import_zod12.z.array(
|
|
38957
39580
|
import_zod12.z.object({
|
|
38958
39581
|
type: import_zod12.z.enum(["translation_failed", "unsupported_translation_pair"]).optional(),
|
|
@@ -39030,10 +39653,7 @@ var getJobsJobidTranscriptResponse = import_zod12.z.object({
|
|
|
39030
39653
|
"OTHER"
|
|
39031
39654
|
]).optional(),
|
|
39032
39655
|
message: import_zod12.z.string().optional()
|
|
39033
|
-
}).optional()
|
|
39034
|
-
orchestrator_version: import_zod12.z.string().optional().describe(
|
|
39035
|
-
"Orchestrator version in PEP 440 Format or set to 'version_not_found' as default."
|
|
39036
|
-
)
|
|
39656
|
+
}).optional()
|
|
39037
39657
|
}).describe(
|
|
39038
39658
|
"Summary information about the output from an ASR job, comprising the job type and configuration parameters used when generating the output."
|
|
39039
39659
|
),
|
|
@@ -39116,6 +39736,12 @@ var getJobsJobidTranscriptResponse = import_zod12.z.object({
|
|
|
39116
39736
|
"An ASR job output item. The primary item types are `word` and `punctuation`. Other item types may be present, for example to provide semantic information of different forms."
|
|
39117
39737
|
)
|
|
39118
39738
|
),
|
|
39739
|
+
speakers: import_zod12.z.array(
|
|
39740
|
+
import_zod12.z.object({
|
|
39741
|
+
label: import_zod12.z.string().min(1).describe("Speaker label."),
|
|
39742
|
+
speaker_identifiers: import_zod12.z.array(import_zod12.z.string().describe("Speaker identifiers.")).min(1)
|
|
39743
|
+
})
|
|
39744
|
+
).optional().describe("List of unique speaker identifiers detected in the transcript."),
|
|
39119
39745
|
translations: import_zod12.z.record(
|
|
39120
39746
|
import_zod12.z.string(),
|
|
39121
39747
|
import_zod12.z.array(
|
|
@@ -39137,13 +39763,23 @@ var getJobsJobidTranscriptResponse = import_zod12.z.object({
|
|
|
39137
39763
|
sentiment_analysis: import_zod12.z.object({
|
|
39138
39764
|
segments: import_zod12.z.array(
|
|
39139
39765
|
import_zod12.z.object({
|
|
39140
|
-
text: import_zod12.z.string().optional(),
|
|
39141
|
-
|
|
39142
|
-
|
|
39143
|
-
|
|
39144
|
-
|
|
39145
|
-
|
|
39146
|
-
|
|
39766
|
+
text: import_zod12.z.string().optional().describe("Represents the transcript of the analysed segment"),
|
|
39767
|
+
sentiment: import_zod12.z.string().optional().describe(
|
|
39768
|
+
"The assigned sentiment to the segment, which can be positive, neutral or negative"
|
|
39769
|
+
),
|
|
39770
|
+
start_time: import_zod12.z.number().optional().describe(
|
|
39771
|
+
"The timestamp corresponding to the beginning of the transcription segment"
|
|
39772
|
+
),
|
|
39773
|
+
end_time: import_zod12.z.number().optional().describe(
|
|
39774
|
+
"The timestamp corresponding to the end of the transcription segment"
|
|
39775
|
+
),
|
|
39776
|
+
speaker: import_zod12.z.string().optional().describe(
|
|
39777
|
+
"The speaker label for the segment, if speaker diarization is enabled"
|
|
39778
|
+
),
|
|
39779
|
+
channel: import_zod12.z.string().optional().describe(
|
|
39780
|
+
"The channel label for the segment, if channel diarization is enabled"
|
|
39781
|
+
),
|
|
39782
|
+
confidence: import_zod12.z.number().optional().describe("A confidence score in the range of 0-1")
|
|
39147
39783
|
}).describe("Represents a segment of text and its associated sentiment.")
|
|
39148
39784
|
).optional().describe(
|
|
39149
39785
|
"An array of objects that represent a segment of text and its associated sentiment."
|
|
@@ -39202,10 +39838,10 @@ var getJobsJobidTranscriptResponse = import_zod12.z.object({
|
|
|
39202
39838
|
}).optional().describe("Main object that holds topic detection results."),
|
|
39203
39839
|
chapters: import_zod12.z.array(
|
|
39204
39840
|
import_zod12.z.object({
|
|
39205
|
-
title: import_zod12.z.string().optional(),
|
|
39206
|
-
summary: import_zod12.z.string().optional(),
|
|
39207
|
-
start_time: import_zod12.z.number().optional(),
|
|
39208
|
-
end_time: import_zod12.z.number().optional()
|
|
39841
|
+
title: import_zod12.z.string().optional().describe("The auto-generated title for the chapter"),
|
|
39842
|
+
summary: import_zod12.z.string().optional().describe("An auto-generated paragraph-style, short summary of the chapter"),
|
|
39843
|
+
start_time: import_zod12.z.number().optional().describe("The start time of the chapter in the audio file"),
|
|
39844
|
+
end_time: import_zod12.z.number().optional().describe("The end time of the chapter in the audio file")
|
|
39209
39845
|
})
|
|
39210
39846
|
).optional().describe("An array of objects that represent summarized chapters of the transcript"),
|
|
39211
39847
|
audio_events: import_zod12.z.array(
|
|
@@ -39250,6 +39886,18 @@ var getJobsJobidLogParams = import_zod12.z.object({
|
|
|
39250
39886
|
jobid: import_zod12.z.string().describe("ID of the job.")
|
|
39251
39887
|
});
|
|
39252
39888
|
var getJobsJobidLogResponse = import_zod12.z.instanceof(File);
|
|
39889
|
+
var getJobsJobidObjectUrlsParams = import_zod12.z.object({
|
|
39890
|
+
jobid: import_zod12.z.string().describe("ID of the job.")
|
|
39891
|
+
});
|
|
39892
|
+
var getJobsJobidObjectUrlsQueryParams = import_zod12.z.object({
|
|
39893
|
+
ttl: import_zod12.z.number().describe("Time to live in seconds for the signed URLs"),
|
|
39894
|
+
url_for: import_zod12.z.array(import_zod12.z.enum(["data", "audio_mp3", "transcript"]))
|
|
39895
|
+
});
|
|
39896
|
+
var getJobsJobidObjectUrlsResponse = import_zod12.z.object({
|
|
39897
|
+
data: import_zod12.z.string().optional(),
|
|
39898
|
+
audio_mp3: import_zod12.z.string().optional(),
|
|
39899
|
+
transcript: import_zod12.z.string().optional()
|
|
39900
|
+
});
|
|
39253
39901
|
var getUsageQueryParams = import_zod12.z.object({
|
|
39254
39902
|
since: import_zod12.z.string().date().optional().describe(
|
|
39255
39903
|
"Include usage after the given date (inclusive). This is a [ISO-8601](https://en.wikipedia.org/wiki/ISO_8601) calendar date format: `YYYY-MM-DD`."
|
|
@@ -39383,7 +40031,7 @@ var speechToTextBodyKeytermsDefault = [];
|
|
|
39383
40031
|
var speechToTextBody = import_zod13.z.object({
|
|
39384
40032
|
model_id: import_zod13.z.enum(["scribe_v1", "scribe_v2"]).describe("The ID of the model to use for transcription."),
|
|
39385
40033
|
file: import_zod13.z.instanceof(File).or(import_zod13.z.null()).optional().describe(
|
|
39386
|
-
"The file to transcribe (100ms minimum audio length). All major audio and video formats are supported. Exactly one of the file or cloud_storage_url parameters must be provided. The file size must be less than
|
|
40034
|
+
"The file to transcribe (100ms minimum audio length). All major audio and video formats are supported. Exactly one of the file or cloud_storage_url parameters must be provided. The file size must be less than 5.0GB."
|
|
39387
40035
|
),
|
|
39388
40036
|
language_code: import_zod13.z.string().or(import_zod13.z.null()).optional().describe(
|
|
39389
40037
|
"An ISO-639-1 or ISO-639-3 language_code corresponding to the language of the audio file. Can sometimes improve transcription performance if known beforehand. Defaults to null, in this case the language is predicted automatically."
|
|
@@ -39461,7 +40109,7 @@ var speechToTextBody = import_zod13.z.object({
|
|
|
39461
40109
|
"The format of input audio. Options are 'pcm_s16le_16' or 'other' For `pcm_s16le_16`, the input audio must be 16-bit PCM at a 16kHz sample rate, single channel (mono), and little-endian byte order. Latency will be lower than with passing an encoded waveform."
|
|
39462
40110
|
),
|
|
39463
40111
|
cloud_storage_url: import_zod13.z.string().or(import_zod13.z.null()).optional().describe(
|
|
39464
|
-
"The HTTPS URL of the file to transcribe. Exactly one of the file or cloud_storage_url parameters must be provided. The file must be accessible via HTTPS and the file size must be less than 2GB. Any valid HTTPS URL is accepted, including URLs from cloud storage providers (AWS S3, Google Cloud Storage, Cloudflare R2, etc.), CDNs, or any other HTTPS source. URLs can be pre-signed or include authentication tokens in query parameters."
|
|
40112
|
+
"[Deprecated] This parameter is deprecated and will be removed in the future. Use 'source_url' instead.The HTTPS URL of the file to transcribe. Exactly one of the file or cloud_storage_url parameters must be provided. The file must be accessible via HTTPS and the file size must be less than 2GB. Any valid HTTPS URL is accepted, including URLs from cloud storage providers (AWS S3, Google Cloud Storage, Cloudflare R2, etc.), CDNs, or any other HTTPS source. URLs can be pre-signed or include authentication tokens in query parameters."
|
|
39465
40113
|
),
|
|
39466
40114
|
source_url: import_zod13.z.string().or(import_zod13.z.null()).optional().describe(
|
|
39467
40115
|
"The URL of an audio or video file to transcribe. Supports hosted video or audio files, YouTube video URLs, TikTok video URLs, and other video hosting services."
|
|
@@ -39500,7 +40148,7 @@ var speechToTextBody = import_zod13.z.object({
|
|
|
39500
40148
|
"How to format redacted entities. 'redacted' replaces with {REDACTED}, 'entity_type' replaces with {ENTITY_TYPE}, 'enumerated_entity_type' replaces with {ENTITY_TYPE_N} where N enumerates each occurrence. Only used when entity_redaction is set."
|
|
39501
40149
|
),
|
|
39502
40150
|
keyterms: import_zod13.z.array(import_zod13.z.string()).default(speechToTextBodyKeytermsDefault).describe(
|
|
39503
|
-
'A list of keyterms to bias the transcription towards. The keyterms are words or phrases you want the model to recognise more accurately. The number of keyterms cannot exceed 1000. The length of each keyterm must be less than 50 characters. Keyterms can contain at most 5 words (after normalisation). For example ["hello", "world", "technical term"]. Usage of this parameter will incur an additional 20% surcharge on the base transcription cost. When more than 100 keyterms are provided, a minimum billable duration of 20 seconds applies per request.'
|
|
40151
|
+
'A list of keyterms to bias the transcription towards. The keyterms are words or phrases you want the model to recognise more accurately. The number of keyterms cannot exceed 1000. The length of each keyterm must be less than 50 characters. Keyterms can contain at most 5 words (after normalisation). For example ["hello", "world", "technical term"]. The following characters are not supported: `<`, `>`, `{`, `}`, `[`, `]`, `\\`. Usage of this parameter will incur an additional 20% surcharge on the base transcription cost. When more than 100 keyterms are provided, a minimum billable duration of 20 seconds applies per request.'
|
|
39504
40152
|
)
|
|
39505
40153
|
});
|
|
39506
40154
|
var speechToTextResponse = import_zod13.z.object({
|
|
@@ -39866,6 +40514,7 @@ var deleteTranscriptByIdResponse = import_zod13.z.any();
|
|
|
39866
40514
|
SonioxModels,
|
|
39867
40515
|
SonioxRealtimeModel,
|
|
39868
40516
|
SonioxRegion,
|
|
40517
|
+
SonioxSDK,
|
|
39869
40518
|
SonioxStreamingSchema,
|
|
39870
40519
|
SonioxStreamingTypes,
|
|
39871
40520
|
SonioxStreamingUpdateSchema,
|