voice-router-dev 0.9.3 → 0.9.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +12 -0
- package/dist/constants.d.mts +11 -92
- package/dist/constants.d.ts +11 -92
- package/dist/constants.js +11 -88
- package/dist/constants.mjs +11 -88
- package/dist/{field-configs-FbtCPxzs.d.mts → field-configs-BVOZQiG3.d.mts} +4855 -3773
- package/dist/{field-configs-FbtCPxzs.d.ts → field-configs-BVOZQiG3.d.ts} +4855 -3773
- package/dist/field-configs.d.mts +1 -1
- package/dist/field-configs.d.ts +1 -1
- package/dist/field-configs.js +583 -150
- package/dist/field-configs.mjs +583 -150
- package/dist/index.d.mts +1211 -162
- package/dist/index.d.ts +1211 -162
- package/dist/index.js +924 -275
- package/dist/index.mjs +927 -275
- package/dist/{provider-metadata-D1d-9cng.d.ts → provider-metadata-CiSA4fWP.d.ts} +2 -2
- package/dist/{provider-metadata-BJ29OPW1.d.mts → provider-metadata-oxzd1q6t.d.mts} +2 -2
- package/dist/provider-metadata.d.mts +1 -1
- package/dist/provider-metadata.d.ts +1 -1
- package/dist/provider-metadata.js +3 -66
- package/dist/provider-metadata.mjs +3 -66
- package/dist/{speechToTextChunkResponseModel-BY2lGyZ3.d.ts → speechToTextChunkResponseModel-Dns0Ma9x.d.ts} +364 -39
- package/dist/{speechToTextChunkResponseModel-KayxDiZ7.d.mts → speechToTextChunkResponseModel-_ZvHTD4e.d.mts} +364 -39
- package/dist/webhooks.d.mts +3 -2
- package/dist/webhooks.d.ts +3 -2
- package/package.json +8 -3
package/dist/field-configs.mjs
CHANGED
|
@@ -223,6 +223,9 @@ var listenTranscribeQueryParams = zod.object({
|
|
|
223
223
|
diarize: zod.boolean().optional().describe(
|
|
224
224
|
"Recognize speaker changes. Each word in the transcript will be assigned a speaker number starting at 0"
|
|
225
225
|
),
|
|
226
|
+
diarize_model: zod.enum(["latest", "v1", "v2"]).optional().describe(
|
|
227
|
+
"Select and enable a specific batch diarization model version. If specifying this parameter, you should not set the deprecated `diarize=true` parameter. Not accepted on streaming requests."
|
|
228
|
+
),
|
|
226
229
|
dictation: zod.boolean().optional().describe("Dictation mode for controlling formatting with dictated speech"),
|
|
227
230
|
encoding: zod.enum(["linear16", "flac", "mulaw", "amr-nb", "amr-wb", "opus", "speex", "g729"]).optional().describe("Specify the expected encoding of your submitted audio"),
|
|
228
231
|
filler_words: zod.boolean().optional().describe('Filler Words can help transcribe interruptions in your audio, like "uh" and "um"'),
|
|
@@ -487,6 +490,7 @@ var listenTranscribeResponse = zod.object({
|
|
|
487
490
|
);
|
|
488
491
|
var speakGenerateQueryCallbackMethodDefault = "POST";
|
|
489
492
|
var speakGenerateQueryModelDefault = "aura-asteria-en";
|
|
493
|
+
var speakGenerateQuerySpeedDefault = 1;
|
|
490
494
|
var speakGenerateQueryParams = zod.object({
|
|
491
495
|
callback: zod.string().optional().describe("URL to which we'll make the callback request"),
|
|
492
496
|
callback_method: zod.enum(["POST", "PUT"]).default(speakGenerateQueryCallbackMethodDefault).describe("HTTP method by which the callback request will be made"),
|
|
@@ -598,6 +602,9 @@ var speakGenerateQueryParams = zod.object({
|
|
|
598
602
|
zod.enum(["22050"]).describe("Encoding - mp3. Sample rate is fixed and not configurable (22050 Hz).")
|
|
599
603
|
).or(zod.enum(["48000"]).describe("Encoding - opus. Sample rate is fixed at 48000 Hz.")).optional().describe(
|
|
600
604
|
"Sample Rate specifies the sample rate for the output audio. Based on the encoding, different sample rates are supported. For some encodings, the sample rate is not configurable"
|
|
605
|
+
),
|
|
606
|
+
speed: zod.number().default(speakGenerateQuerySpeedDefault).describe(
|
|
607
|
+
"Speaking rate multiplier that adjusts the pace of generated speech while preserving natural prosody and voice quality. Not yet supported in all languages."
|
|
601
608
|
)
|
|
602
609
|
});
|
|
603
610
|
var speakGenerateHeader = zod.object({
|
|
@@ -937,7 +944,7 @@ var createTranscriptBody = zod3.object({
|
|
|
937
944
|
"Customize how words are spelled and formatted using to and from values. See [Custom Spelling](https://www.assemblyai.com/docs/pre-recorded-audio/correct-spelling-of-terms) for more details."
|
|
938
945
|
),
|
|
939
946
|
disfluencies: zod3.boolean().optional().describe(
|
|
940
|
-
'Transcribe [Filler Words](https://www.assemblyai.com/docs/pre-recorded-audio/include-filler-words), like "umm", in your media file; can be true or false'
|
|
947
|
+
'Transcribe [Filler Words](https://www.assemblyai.com/docs/pre-recorded-audio/include-filler-words), like "umm", in your media file; can be true or false. Supported on Universal-3 Pro and Universal-2.'
|
|
941
948
|
),
|
|
942
949
|
domain: zod3.string().nullish().describe(
|
|
943
950
|
'Enable domain-specific transcription models to improve accuracy for specialized terminology. Set to `"medical-v1"` to enable [Medical Mode](https://www.assemblyai.com/docs/pre-recorded-audio/medical-mode) for improved accuracy of medical terms such as medications, procedures, conditions, and dosages.\n\nSupported languages: English (`en`), Spanish (`es`), German (`de`), French (`fr`). If used with an unsupported language, the parameter is ignored and a warning is returned.\n'
|
|
@@ -1244,12 +1251,20 @@ var createTranscriptBody = zod3.object({
|
|
|
1244
1251
|
"email_address",
|
|
1245
1252
|
"event",
|
|
1246
1253
|
"filename",
|
|
1254
|
+
"gender",
|
|
1247
1255
|
"gender_sexuality",
|
|
1248
1256
|
"healthcare_number",
|
|
1249
1257
|
"injury",
|
|
1250
1258
|
"ip_address",
|
|
1251
1259
|
"language",
|
|
1252
1260
|
"location",
|
|
1261
|
+
"location_address",
|
|
1262
|
+
"location_address_street",
|
|
1263
|
+
"location_city",
|
|
1264
|
+
"location_coordinate",
|
|
1265
|
+
"location_country",
|
|
1266
|
+
"location_state",
|
|
1267
|
+
"location_zip",
|
|
1253
1268
|
"marital_status",
|
|
1254
1269
|
"medical_condition",
|
|
1255
1270
|
"medical_process",
|
|
@@ -1258,6 +1273,7 @@ var createTranscriptBody = zod3.object({
|
|
|
1258
1273
|
"number_sequence",
|
|
1259
1274
|
"occupation",
|
|
1260
1275
|
"organization",
|
|
1276
|
+
"organization_medical_facility",
|
|
1261
1277
|
"passport_number",
|
|
1262
1278
|
"password",
|
|
1263
1279
|
"person_age",
|
|
@@ -1266,6 +1282,7 @@ var createTranscriptBody = zod3.object({
|
|
|
1266
1282
|
"physical_attribute",
|
|
1267
1283
|
"political_affiliation",
|
|
1268
1284
|
"religion",
|
|
1285
|
+
"sexuality",
|
|
1269
1286
|
"statistics",
|
|
1270
1287
|
"time",
|
|
1271
1288
|
"url",
|
|
@@ -1273,15 +1290,20 @@ var createTranscriptBody = zod3.object({
|
|
|
1273
1290
|
"username",
|
|
1274
1291
|
"vehicle_id",
|
|
1275
1292
|
"zodiac_sign"
|
|
1276
|
-
]).describe(
|
|
1293
|
+
]).describe(
|
|
1294
|
+
"The type of PII to redact. See [PII redaction](https://www.assemblyai.com/docs/streaming/pii-redaction) for the full list of policies and their descriptions."
|
|
1295
|
+
)
|
|
1277
1296
|
).optional().describe(
|
|
1278
1297
|
"The list of PII Redaction policies to enable. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more details."
|
|
1279
1298
|
),
|
|
1280
1299
|
redact_pii_sub: zod3.enum(["entity_name", "hash"]).describe(
|
|
1281
|
-
"The replacement logic for detected PII, can be `entity_name` or `hash`. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more details."
|
|
1300
|
+
"The replacement logic for detected PII, can be `entity_name` or `hash`. See [PII redaction](https://www.assemblyai.com/docs/streaming/pii-redaction) for more details."
|
|
1282
1301
|
).or(zod3.null()).optional().describe(
|
|
1283
1302
|
"The replacement logic for detected PII, can be `entity_type` or `hash`. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more details."
|
|
1284
1303
|
),
|
|
1304
|
+
redact_pii_return_unredacted: zod3.boolean().optional().describe(
|
|
1305
|
+
"When set to `true`, returns the original unredacted transcript alongside the redacted one in the same response. Requires `redact_pii` to be `true`, otherwise a 400 error is returned.\n\nWhen enabled, the response includes the additional fields `unredacted_text`, `unredacted_words`, and `unredacted_utterances`. The existing `text`, `words`, and `utterances` fields remain fully redacted. When disabled (default), the response is unchanged and contains only the redacted transcript. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more details.\n"
|
|
1306
|
+
),
|
|
1285
1307
|
sentiment_analysis: zod3.boolean().optional().describe(
|
|
1286
1308
|
"Enable [Sentiment Analysis](https://www.assemblyai.com/docs/speech-understanding/analyze-sentiment-of-speech), can be true or false"
|
|
1287
1309
|
),
|
|
@@ -1379,10 +1401,10 @@ var createTranscriptBody = zod3.object({
|
|
|
1379
1401
|
),
|
|
1380
1402
|
summary_model: zod3.enum(["informative", "conversational", "catchy"]).optional().describe("The model to summarize the transcript"),
|
|
1381
1403
|
summary_type: zod3.enum(["bullets", "bullets_verbose", "gist", "headline", "paragraph"]).optional().describe("The type of summary"),
|
|
1382
|
-
remove_audio_tags: zod3.enum(["all"]).describe(
|
|
1383
|
-
'
|
|
1404
|
+
remove_audio_tags: zod3.enum(["all", "speaker"]).describe(
|
|
1405
|
+
'Universal-3 Pro generates rich transcripts that can include inline annotations such as audio event markers and speaker cues. Set to `"all"` to remove all inline annotations, or `"speaker"` to remove only speaker cues while keeping other annotations.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n'
|
|
1384
1406
|
).or(zod3.null()).optional().describe(
|
|
1385
|
-
'
|
|
1407
|
+
'Universal-3 Pro generates rich transcripts that can include inline annotations such as audio event markers and speaker cues. Set to `"all"` to remove all inline annotations, or `"speaker"` to remove only speaker cues while keeping other annotations.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n'
|
|
1386
1408
|
),
|
|
1387
1409
|
temperature: zod3.number().optional().describe(
|
|
1388
1410
|
"Control the amount of randomness injected into the model's response. See the [Prompting Guide](https://www.assemblyai.com/docs/pre-recorded-audio/prompting) for more details.\n\nNote: This parameter can only be used with the Universal-3 Pro model.\n"
|
|
@@ -1513,7 +1535,7 @@ var createTranscriptResponse = zod3.object({
|
|
|
1513
1535
|
"Customize how words are spelled and formatted using to and from values. See [Custom Spelling](https://www.assemblyai.com/docs/pre-recorded-audio/correct-spelling-of-terms) for more details."
|
|
1514
1536
|
),
|
|
1515
1537
|
disfluencies: zod3.boolean().nullish().describe(
|
|
1516
|
-
'Transcribe [Filler Words](https://www.assemblyai.com/docs/pre-recorded-audio/include-filler-words), like "umm", in your media file; can be true or false'
|
|
1538
|
+
'Transcribe [Filler Words](https://www.assemblyai.com/docs/pre-recorded-audio/include-filler-words), like "umm", in your media file; can be true or false. Supported on Universal-3 Pro and Universal-2.'
|
|
1517
1539
|
),
|
|
1518
1540
|
domain: zod3.string().nullish().describe(
|
|
1519
1541
|
'The domain-specific model applied to the transcript. When set to `"medical-v1"`, [Medical Mode](https://www.assemblyai.com/docs/pre-recorded-audio/medical-mode) was used to improve accuracy for medical terminology.\n'
|
|
@@ -1536,12 +1558,20 @@ var createTranscriptResponse = zod3.object({
|
|
|
1536
1558
|
"email_address",
|
|
1537
1559
|
"event",
|
|
1538
1560
|
"filename",
|
|
1561
|
+
"gender",
|
|
1539
1562
|
"gender_sexuality",
|
|
1540
1563
|
"healthcare_number",
|
|
1541
1564
|
"injury",
|
|
1542
1565
|
"ip_address",
|
|
1543
1566
|
"language",
|
|
1544
1567
|
"location",
|
|
1568
|
+
"location_address",
|
|
1569
|
+
"location_address_street",
|
|
1570
|
+
"location_city",
|
|
1571
|
+
"location_coordinate",
|
|
1572
|
+
"location_country",
|
|
1573
|
+
"location_state",
|
|
1574
|
+
"location_zip",
|
|
1545
1575
|
"marital_status",
|
|
1546
1576
|
"medical_condition",
|
|
1547
1577
|
"medical_process",
|
|
@@ -1550,6 +1580,7 @@ var createTranscriptResponse = zod3.object({
|
|
|
1550
1580
|
"number_sequence",
|
|
1551
1581
|
"occupation",
|
|
1552
1582
|
"organization",
|
|
1583
|
+
"organization_medical_facility",
|
|
1553
1584
|
"passport_number",
|
|
1554
1585
|
"password",
|
|
1555
1586
|
"person_age",
|
|
@@ -1558,6 +1589,7 @@ var createTranscriptResponse = zod3.object({
|
|
|
1558
1589
|
"physical_attribute",
|
|
1559
1590
|
"political_affiliation",
|
|
1560
1591
|
"religion",
|
|
1592
|
+
"sexuality",
|
|
1561
1593
|
"statistics",
|
|
1562
1594
|
"time",
|
|
1563
1595
|
"url",
|
|
@@ -1862,6 +1894,24 @@ var createTranscriptResponse = zod3.object({
|
|
|
1862
1894
|
}).optional().describe(
|
|
1863
1895
|
"Specify options for [Automatic Language Detection](https://www.assemblyai.com/docs/pre-recorded-audio/language-detection)."
|
|
1864
1896
|
),
|
|
1897
|
+
metadata: zod3.object({
|
|
1898
|
+
domain_used: zod3.string().nullish().describe(
|
|
1899
|
+
'The domain-specific model that was applied to the transcription (for example, `"medical-v1"` when [Medical Mode](https://www.assemblyai.com/docs/pre-recorded-audio/medical-mode) was applied), or `null` if no domain-specific model was used. Always present when `metadata` is present.\n'
|
|
1900
|
+
),
|
|
1901
|
+
warnings: zod3.array(
|
|
1902
|
+
zod3.object({
|
|
1903
|
+
message: zod3.string().describe("A human-readable description of the warning.")
|
|
1904
|
+
}).describe(
|
|
1905
|
+
"A warning message emitted while processing a transcription request. Warnings are surfaced on the transcript response under `metadata.warnings`.\n"
|
|
1906
|
+
)
|
|
1907
|
+
).optional().describe(
|
|
1908
|
+
"Warning messages emitted while processing the request. Each warning is an object with a human-readable `message`. When there are no warnings to report, this field is omitted from the `metadata` object entirely.\n"
|
|
1909
|
+
)
|
|
1910
|
+
}).describe(
|
|
1911
|
+
"Additional metadata about the transcription returned on the `Transcript` object under `metadata`. Only present when there is information to report \u2014 when all of its fields would be empty, the `metadata` object is omitted from the response entirely.\n"
|
|
1912
|
+
).or(zod3.null()).optional().describe(
|
|
1913
|
+
"Additional metadata about the transcription, including any warnings emitted while processing the request. Only present when there is information to report; if no fields would be populated, `metadata` is omitted from the response entirely.\n"
|
|
1914
|
+
),
|
|
1865
1915
|
multichannel: zod3.boolean().nullish().describe(
|
|
1866
1916
|
"Whether [Multichannel transcription](https://www.assemblyai.com/docs/pre-recorded-audio/transcribe-multiple-audio-channels) was enabled in the transcription request, either true or false"
|
|
1867
1917
|
),
|
|
@@ -1909,12 +1959,20 @@ var createTranscriptResponse = zod3.object({
|
|
|
1909
1959
|
"email_address",
|
|
1910
1960
|
"event",
|
|
1911
1961
|
"filename",
|
|
1962
|
+
"gender",
|
|
1912
1963
|
"gender_sexuality",
|
|
1913
1964
|
"healthcare_number",
|
|
1914
1965
|
"injury",
|
|
1915
1966
|
"ip_address",
|
|
1916
1967
|
"language",
|
|
1917
1968
|
"location",
|
|
1969
|
+
"location_address",
|
|
1970
|
+
"location_address_street",
|
|
1971
|
+
"location_city",
|
|
1972
|
+
"location_coordinate",
|
|
1973
|
+
"location_country",
|
|
1974
|
+
"location_state",
|
|
1975
|
+
"location_zip",
|
|
1918
1976
|
"marital_status",
|
|
1919
1977
|
"medical_condition",
|
|
1920
1978
|
"medical_process",
|
|
@@ -1923,6 +1981,7 @@ var createTranscriptResponse = zod3.object({
|
|
|
1923
1981
|
"number_sequence",
|
|
1924
1982
|
"occupation",
|
|
1925
1983
|
"organization",
|
|
1984
|
+
"organization_medical_facility",
|
|
1926
1985
|
"passport_number",
|
|
1927
1986
|
"password",
|
|
1928
1987
|
"person_age",
|
|
@@ -1931,6 +1990,7 @@ var createTranscriptResponse = zod3.object({
|
|
|
1931
1990
|
"physical_attribute",
|
|
1932
1991
|
"political_affiliation",
|
|
1933
1992
|
"religion",
|
|
1993
|
+
"sexuality",
|
|
1934
1994
|
"statistics",
|
|
1935
1995
|
"time",
|
|
1936
1996
|
"url",
|
|
@@ -1938,12 +1998,17 @@ var createTranscriptResponse = zod3.object({
|
|
|
1938
1998
|
"username",
|
|
1939
1999
|
"vehicle_id",
|
|
1940
2000
|
"zodiac_sign"
|
|
1941
|
-
]).describe(
|
|
2001
|
+
]).describe(
|
|
2002
|
+
"The type of PII to redact. See [PII redaction](https://www.assemblyai.com/docs/streaming/pii-redaction) for the full list of policies and their descriptions."
|
|
2003
|
+
)
|
|
1942
2004
|
).nullish().describe(
|
|
1943
2005
|
"The list of PII Redaction policies that were enabled, if PII Redaction is enabled.\nSee [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
|
|
1944
2006
|
),
|
|
1945
2007
|
redact_pii_sub: zod3.enum(["entity_name", "hash"]).optional().describe(
|
|
1946
|
-
"The replacement logic for detected PII, can be `entity_name` or `hash`. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more details."
|
|
2008
|
+
"The replacement logic for detected PII, can be `entity_name` or `hash`. See [PII redaction](https://www.assemblyai.com/docs/streaming/pii-redaction) for more details."
|
|
2009
|
+
),
|
|
2010
|
+
redact_pii_return_unredacted: zod3.boolean().nullish().describe(
|
|
2011
|
+
"Whether the original unredacted transcript was also returned alongside the redacted one. When `true`, the response includes `unredacted_text`, `unredacted_words`, and `unredacted_utterances`. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
|
|
1947
2012
|
),
|
|
1948
2013
|
sentiment_analysis: zod3.boolean().nullish().describe(
|
|
1949
2014
|
"Whether [Sentiment Analysis](https://www.assemblyai.com/docs/speech-understanding/analyze-sentiment-of-speech) is enabled, can be true or false"
|
|
@@ -2080,20 +2145,23 @@ var createTranscriptResponse = zod3.object({
|
|
|
2080
2145
|
"The generated summary of the media file, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details."
|
|
2081
2146
|
),
|
|
2082
2147
|
summary_model: zod3.string().nullish().describe(
|
|
2083
|
-
"The Summarization model used to generate the summary,\nif [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts
|
|
2148
|
+
"The Summarization model used to generate the summary,\nif [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details.\n"
|
|
2084
2149
|
),
|
|
2085
2150
|
summary_type: zod3.string().nullish().describe(
|
|
2086
|
-
"The type of summary generated, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts
|
|
2151
|
+
"The type of summary generated, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details."
|
|
2087
2152
|
),
|
|
2088
|
-
remove_audio_tags: zod3.enum(["all"]).describe(
|
|
2089
|
-
|
|
2153
|
+
remove_audio_tags: zod3.enum(["all", "speaker"]).describe(
|
|
2154
|
+
'Universal-3 Pro generates rich transcripts that can include inline annotations such as audio event markers and speaker cues. Set to `"all"` to remove all inline annotations, or `"speaker"` to remove only speaker cues while keeping other annotations.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n'
|
|
2090
2155
|
).or(zod3.null()).optional().describe(
|
|
2091
|
-
|
|
2156
|
+
'Universal-3 Pro generates rich transcripts that can include inline annotations such as audio event markers and speaker cues. Set to `"all"` to remove all inline annotations, or `"speaker"` to remove only speaker cues while keeping other annotations.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n'
|
|
2092
2157
|
),
|
|
2093
2158
|
temperature: zod3.number().nullish().describe(
|
|
2094
2159
|
"The temperature that was used for the model's response. See the [Prompting Guide](https://www.assemblyai.com/docs/pre-recorded-audio/prompting) for more details.\n\nNote: This parameter can only be used with the Universal-3 Pro model.\n"
|
|
2095
2160
|
),
|
|
2096
2161
|
text: zod3.string().nullish().describe("The textual transcript of your media file"),
|
|
2162
|
+
unredacted_text: zod3.string().nullish().describe(
|
|
2163
|
+
"The original textual transcript of your media file before PII redaction was applied. Only returned when `redact_pii_return_unredacted` was set to `true` on the transcription request, otherwise this field is omitted and the `text` field remains fully redacted. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
|
|
2164
|
+
),
|
|
2097
2165
|
throttled: zod3.boolean().nullish().describe(
|
|
2098
2166
|
"True while a request is throttled and false when a request is no longer throttled"
|
|
2099
2167
|
),
|
|
@@ -2130,6 +2198,39 @@ var createTranscriptResponse = zod3.object({
|
|
|
2130
2198
|
).nullish().describe(
|
|
2131
2199
|
"When multichannel or speaker_labels is enabled, a list of turn-by-turn utterance objects.\nSee [Speaker diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) and [Multichannel transcription](https://www.assemblyai.com/docs/pre-recorded-audio/transcribe-multiple-audio-channels) for more information.\n"
|
|
2132
2200
|
),
|
|
2201
|
+
unredacted_utterances: zod3.array(
|
|
2202
|
+
zod3.object({
|
|
2203
|
+
confidence: zod3.number().describe("The confidence score for the transcript of this utterance"),
|
|
2204
|
+
start: zod3.number().describe("The starting time, in milliseconds, of the utterance in the audio file"),
|
|
2205
|
+
end: zod3.number().describe("The ending time, in milliseconds, of the utterance in the audio file"),
|
|
2206
|
+
text: zod3.string().describe("The text for this utterance"),
|
|
2207
|
+
words: zod3.array(
|
|
2208
|
+
zod3.object({
|
|
2209
|
+
confidence: zod3.number().describe("The confidence score for the transcript of this word"),
|
|
2210
|
+
start: zod3.number().describe("The starting time, in milliseconds, for the word"),
|
|
2211
|
+
end: zod3.number().describe("The ending time, in milliseconds, for the word"),
|
|
2212
|
+
text: zod3.string().describe("The text of the word"),
|
|
2213
|
+
channel: zod3.string().nullish().describe(
|
|
2214
|
+
"The channel of the word. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
|
|
2215
|
+
),
|
|
2216
|
+
speaker: zod3.string().nullable().describe(
|
|
2217
|
+
"The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) is enabled, else null"
|
|
2218
|
+
)
|
|
2219
|
+
})
|
|
2220
|
+
).describe("The words in the utterance."),
|
|
2221
|
+
channel: zod3.string().nullish().describe(
|
|
2222
|
+
"The channel of this utterance. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
|
|
2223
|
+
),
|
|
2224
|
+
speaker: zod3.string().describe(
|
|
2225
|
+
'The speaker of this utterance, where each speaker is assigned a sequential capital letter - e.g. "A" for Speaker A, "B" for Speaker B, etc.'
|
|
2226
|
+
),
|
|
2227
|
+
translated_texts: zod3.record(zod3.string(), zod3.string()).optional().describe(
|
|
2228
|
+
'Translations keyed by language code (e.g., `{"es": "Texto traducido", "de": "\xDCbersetzter Text"}`). Only present when `match_original_utterance` is enabled with translation.'
|
|
2229
|
+
)
|
|
2230
|
+
})
|
|
2231
|
+
).nullish().describe(
|
|
2232
|
+
"The original turn-by-turn utterance objects before PII redaction was applied. Same shape as `utterances`. Only returned when `redact_pii_return_unredacted` was set to `true` on the transcription request, otherwise this field is omitted and the `utterances` field remains fully redacted. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
|
|
2233
|
+
),
|
|
2133
2234
|
webhook_auth: zod3.boolean().describe(
|
|
2134
2235
|
"Whether [webhook](https://www.assemblyai.com/docs/deployment/webhooks-for-pre-recorded-audio) authentication details were provided"
|
|
2135
2236
|
),
|
|
@@ -2158,6 +2259,22 @@ var createTranscriptResponse = zod3.object({
|
|
|
2158
2259
|
).nullish().describe(
|
|
2159
2260
|
"An array of temporally-sequential word objects, one for each word in the transcript.\n"
|
|
2160
2261
|
),
|
|
2262
|
+
unredacted_words: zod3.array(
|
|
2263
|
+
zod3.object({
|
|
2264
|
+
confidence: zod3.number().describe("The confidence score for the transcript of this word"),
|
|
2265
|
+
start: zod3.number().describe("The starting time, in milliseconds, for the word"),
|
|
2266
|
+
end: zod3.number().describe("The ending time, in milliseconds, for the word"),
|
|
2267
|
+
text: zod3.string().describe("The text of the word"),
|
|
2268
|
+
channel: zod3.string().nullish().describe(
|
|
2269
|
+
"The channel of the word. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
|
|
2270
|
+
),
|
|
2271
|
+
speaker: zod3.string().nullable().describe(
|
|
2272
|
+
"The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) is enabled, else null"
|
|
2273
|
+
)
|
|
2274
|
+
})
|
|
2275
|
+
).nullish().describe(
|
|
2276
|
+
"The original temporally-sequential word objects before PII redaction was applied. Same shape as `words`. Only returned when `redact_pii_return_unredacted` was set to `true` on the transcription request, otherwise this field is omitted and the `words` field remains fully redacted. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
|
|
2277
|
+
),
|
|
2161
2278
|
acoustic_model: zod3.string().describe("This parameter does not currently have any functionality attached to it."),
|
|
2162
2279
|
custom_topics: zod3.boolean().nullish().describe("This parameter does not currently have any functionality attached to it."),
|
|
2163
2280
|
language_model: zod3.string().describe("This parameter does not currently have any functionality attached to it."),
|
|
@@ -2329,7 +2446,7 @@ var getTranscriptResponse = zod3.object({
|
|
|
2329
2446
|
"Customize how words are spelled and formatted using to and from values. See [Custom Spelling](https://www.assemblyai.com/docs/pre-recorded-audio/correct-spelling-of-terms) for more details."
|
|
2330
2447
|
),
|
|
2331
2448
|
disfluencies: zod3.boolean().nullish().describe(
|
|
2332
|
-
'Transcribe [Filler Words](https://www.assemblyai.com/docs/pre-recorded-audio/include-filler-words), like "umm", in your media file; can be true or false'
|
|
2449
|
+
'Transcribe [Filler Words](https://www.assemblyai.com/docs/pre-recorded-audio/include-filler-words), like "umm", in your media file; can be true or false. Supported on Universal-3 Pro and Universal-2.'
|
|
2333
2450
|
),
|
|
2334
2451
|
domain: zod3.string().nullish().describe(
|
|
2335
2452
|
'The domain-specific model applied to the transcript. When set to `"medical-v1"`, [Medical Mode](https://www.assemblyai.com/docs/pre-recorded-audio/medical-mode) was used to improve accuracy for medical terminology.\n'
|
|
@@ -2352,12 +2469,20 @@ var getTranscriptResponse = zod3.object({
|
|
|
2352
2469
|
"email_address",
|
|
2353
2470
|
"event",
|
|
2354
2471
|
"filename",
|
|
2472
|
+
"gender",
|
|
2355
2473
|
"gender_sexuality",
|
|
2356
2474
|
"healthcare_number",
|
|
2357
2475
|
"injury",
|
|
2358
2476
|
"ip_address",
|
|
2359
2477
|
"language",
|
|
2360
2478
|
"location",
|
|
2479
|
+
"location_address",
|
|
2480
|
+
"location_address_street",
|
|
2481
|
+
"location_city",
|
|
2482
|
+
"location_coordinate",
|
|
2483
|
+
"location_country",
|
|
2484
|
+
"location_state",
|
|
2485
|
+
"location_zip",
|
|
2361
2486
|
"marital_status",
|
|
2362
2487
|
"medical_condition",
|
|
2363
2488
|
"medical_process",
|
|
@@ -2366,6 +2491,7 @@ var getTranscriptResponse = zod3.object({
|
|
|
2366
2491
|
"number_sequence",
|
|
2367
2492
|
"occupation",
|
|
2368
2493
|
"organization",
|
|
2494
|
+
"organization_medical_facility",
|
|
2369
2495
|
"passport_number",
|
|
2370
2496
|
"password",
|
|
2371
2497
|
"person_age",
|
|
@@ -2374,6 +2500,7 @@ var getTranscriptResponse = zod3.object({
|
|
|
2374
2500
|
"physical_attribute",
|
|
2375
2501
|
"political_affiliation",
|
|
2376
2502
|
"religion",
|
|
2503
|
+
"sexuality",
|
|
2377
2504
|
"statistics",
|
|
2378
2505
|
"time",
|
|
2379
2506
|
"url",
|
|
@@ -2678,6 +2805,24 @@ var getTranscriptResponse = zod3.object({
|
|
|
2678
2805
|
}).optional().describe(
|
|
2679
2806
|
"Specify options for [Automatic Language Detection](https://www.assemblyai.com/docs/pre-recorded-audio/language-detection)."
|
|
2680
2807
|
),
|
|
2808
|
+
metadata: zod3.object({
|
|
2809
|
+
domain_used: zod3.string().nullish().describe(
|
|
2810
|
+
'The domain-specific model that was applied to the transcription (for example, `"medical-v1"` when [Medical Mode](https://www.assemblyai.com/docs/pre-recorded-audio/medical-mode) was applied), or `null` if no domain-specific model was used. Always present when `metadata` is present.\n'
|
|
2811
|
+
),
|
|
2812
|
+
warnings: zod3.array(
|
|
2813
|
+
zod3.object({
|
|
2814
|
+
message: zod3.string().describe("A human-readable description of the warning.")
|
|
2815
|
+
}).describe(
|
|
2816
|
+
"A warning message emitted while processing a transcription request. Warnings are surfaced on the transcript response under `metadata.warnings`.\n"
|
|
2817
|
+
)
|
|
2818
|
+
).optional().describe(
|
|
2819
|
+
"Warning messages emitted while processing the request. Each warning is an object with a human-readable `message`. When there are no warnings to report, this field is omitted from the `metadata` object entirely.\n"
|
|
2820
|
+
)
|
|
2821
|
+
}).describe(
|
|
2822
|
+
"Additional metadata about the transcription returned on the `Transcript` object under `metadata`. Only present when there is information to report \u2014 when all of its fields would be empty, the `metadata` object is omitted from the response entirely.\n"
|
|
2823
|
+
).or(zod3.null()).optional().describe(
|
|
2824
|
+
"Additional metadata about the transcription, including any warnings emitted while processing the request. Only present when there is information to report; if no fields would be populated, `metadata` is omitted from the response entirely.\n"
|
|
2825
|
+
),
|
|
2681
2826
|
multichannel: zod3.boolean().nullish().describe(
|
|
2682
2827
|
"Whether [Multichannel transcription](https://www.assemblyai.com/docs/pre-recorded-audio/transcribe-multiple-audio-channels) was enabled in the transcription request, either true or false"
|
|
2683
2828
|
),
|
|
@@ -2725,12 +2870,20 @@ var getTranscriptResponse = zod3.object({
|
|
|
2725
2870
|
"email_address",
|
|
2726
2871
|
"event",
|
|
2727
2872
|
"filename",
|
|
2873
|
+
"gender",
|
|
2728
2874
|
"gender_sexuality",
|
|
2729
2875
|
"healthcare_number",
|
|
2730
2876
|
"injury",
|
|
2731
2877
|
"ip_address",
|
|
2732
2878
|
"language",
|
|
2733
2879
|
"location",
|
|
2880
|
+
"location_address",
|
|
2881
|
+
"location_address_street",
|
|
2882
|
+
"location_city",
|
|
2883
|
+
"location_coordinate",
|
|
2884
|
+
"location_country",
|
|
2885
|
+
"location_state",
|
|
2886
|
+
"location_zip",
|
|
2734
2887
|
"marital_status",
|
|
2735
2888
|
"medical_condition",
|
|
2736
2889
|
"medical_process",
|
|
@@ -2739,6 +2892,7 @@ var getTranscriptResponse = zod3.object({
|
|
|
2739
2892
|
"number_sequence",
|
|
2740
2893
|
"occupation",
|
|
2741
2894
|
"organization",
|
|
2895
|
+
"organization_medical_facility",
|
|
2742
2896
|
"passport_number",
|
|
2743
2897
|
"password",
|
|
2744
2898
|
"person_age",
|
|
@@ -2747,6 +2901,7 @@ var getTranscriptResponse = zod3.object({
|
|
|
2747
2901
|
"physical_attribute",
|
|
2748
2902
|
"political_affiliation",
|
|
2749
2903
|
"religion",
|
|
2904
|
+
"sexuality",
|
|
2750
2905
|
"statistics",
|
|
2751
2906
|
"time",
|
|
2752
2907
|
"url",
|
|
@@ -2754,12 +2909,17 @@ var getTranscriptResponse = zod3.object({
|
|
|
2754
2909
|
"username",
|
|
2755
2910
|
"vehicle_id",
|
|
2756
2911
|
"zodiac_sign"
|
|
2757
|
-
]).describe(
|
|
2912
|
+
]).describe(
|
|
2913
|
+
"The type of PII to redact. See [PII redaction](https://www.assemblyai.com/docs/streaming/pii-redaction) for the full list of policies and their descriptions."
|
|
2914
|
+
)
|
|
2758
2915
|
).nullish().describe(
|
|
2759
2916
|
"The list of PII Redaction policies that were enabled, if PII Redaction is enabled.\nSee [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
|
|
2760
2917
|
),
|
|
2761
2918
|
redact_pii_sub: zod3.enum(["entity_name", "hash"]).optional().describe(
|
|
2762
|
-
"The replacement logic for detected PII, can be `entity_name` or `hash`. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more details."
|
|
2919
|
+
"The replacement logic for detected PII, can be `entity_name` or `hash`. See [PII redaction](https://www.assemblyai.com/docs/streaming/pii-redaction) for more details."
|
|
2920
|
+
),
|
|
2921
|
+
redact_pii_return_unredacted: zod3.boolean().nullish().describe(
|
|
2922
|
+
"Whether the original unredacted transcript was also returned alongside the redacted one. When `true`, the response includes `unredacted_text`, `unredacted_words`, and `unredacted_utterances`. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
|
|
2763
2923
|
),
|
|
2764
2924
|
sentiment_analysis: zod3.boolean().nullish().describe(
|
|
2765
2925
|
"Whether [Sentiment Analysis](https://www.assemblyai.com/docs/speech-understanding/analyze-sentiment-of-speech) is enabled, can be true or false"
|
|
@@ -2896,20 +3056,23 @@ var getTranscriptResponse = zod3.object({
|
|
|
2896
3056
|
"The generated summary of the media file, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details."
|
|
2897
3057
|
),
|
|
2898
3058
|
summary_model: zod3.string().nullish().describe(
|
|
2899
|
-
"The Summarization model used to generate the summary,\nif [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts
|
|
3059
|
+
"The Summarization model used to generate the summary,\nif [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details.\n"
|
|
2900
3060
|
),
|
|
2901
3061
|
summary_type: zod3.string().nullish().describe(
|
|
2902
|
-
"The type of summary generated, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts
|
|
3062
|
+
"The type of summary generated, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details."
|
|
2903
3063
|
),
|
|
2904
|
-
remove_audio_tags: zod3.enum(["all"]).describe(
|
|
2905
|
-
|
|
3064
|
+
remove_audio_tags: zod3.enum(["all", "speaker"]).describe(
|
|
3065
|
+
'Universal-3 Pro generates rich transcripts that can include inline annotations such as audio event markers and speaker cues. Set to `"all"` to remove all inline annotations, or `"speaker"` to remove only speaker cues while keeping other annotations.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n'
|
|
2906
3066
|
).or(zod3.null()).optional().describe(
|
|
2907
|
-
|
|
3067
|
+
'Universal-3 Pro generates rich transcripts that can include inline annotations such as audio event markers and speaker cues. Set to `"all"` to remove all inline annotations, or `"speaker"` to remove only speaker cues while keeping other annotations.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n'
|
|
2908
3068
|
),
|
|
2909
3069
|
temperature: zod3.number().nullish().describe(
|
|
2910
3070
|
"The temperature that was used for the model's response. See the [Prompting Guide](https://www.assemblyai.com/docs/pre-recorded-audio/prompting) for more details.\n\nNote: This parameter can only be used with the Universal-3 Pro model.\n"
|
|
2911
3071
|
),
|
|
2912
3072
|
text: zod3.string().nullish().describe("The textual transcript of your media file"),
|
|
3073
|
+
unredacted_text: zod3.string().nullish().describe(
|
|
3074
|
+
"The original textual transcript of your media file before PII redaction was applied. Only returned when `redact_pii_return_unredacted` was set to `true` on the transcription request, otherwise this field is omitted and the `text` field remains fully redacted. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
|
|
3075
|
+
),
|
|
2913
3076
|
throttled: zod3.boolean().nullish().describe(
|
|
2914
3077
|
"True while a request is throttled and false when a request is no longer throttled"
|
|
2915
3078
|
),
|
|
@@ -2946,6 +3109,39 @@ var getTranscriptResponse = zod3.object({
|
|
|
2946
3109
|
).nullish().describe(
|
|
2947
3110
|
"When multichannel or speaker_labels is enabled, a list of turn-by-turn utterance objects.\nSee [Speaker diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) and [Multichannel transcription](https://www.assemblyai.com/docs/pre-recorded-audio/transcribe-multiple-audio-channels) for more information.\n"
|
|
2948
3111
|
),
|
|
3112
|
+
unredacted_utterances: zod3.array(
|
|
3113
|
+
zod3.object({
|
|
3114
|
+
confidence: zod3.number().describe("The confidence score for the transcript of this utterance"),
|
|
3115
|
+
start: zod3.number().describe("The starting time, in milliseconds, of the utterance in the audio file"),
|
|
3116
|
+
end: zod3.number().describe("The ending time, in milliseconds, of the utterance in the audio file"),
|
|
3117
|
+
text: zod3.string().describe("The text for this utterance"),
|
|
3118
|
+
words: zod3.array(
|
|
3119
|
+
zod3.object({
|
|
3120
|
+
confidence: zod3.number().describe("The confidence score for the transcript of this word"),
|
|
3121
|
+
start: zod3.number().describe("The starting time, in milliseconds, for the word"),
|
|
3122
|
+
end: zod3.number().describe("The ending time, in milliseconds, for the word"),
|
|
3123
|
+
text: zod3.string().describe("The text of the word"),
|
|
3124
|
+
channel: zod3.string().nullish().describe(
|
|
3125
|
+
"The channel of the word. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
|
|
3126
|
+
),
|
|
3127
|
+
speaker: zod3.string().nullable().describe(
|
|
3128
|
+
"The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) is enabled, else null"
|
|
3129
|
+
)
|
|
3130
|
+
})
|
|
3131
|
+
).describe("The words in the utterance."),
|
|
3132
|
+
channel: zod3.string().nullish().describe(
|
|
3133
|
+
"The channel of this utterance. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
|
|
3134
|
+
),
|
|
3135
|
+
speaker: zod3.string().describe(
|
|
3136
|
+
'The speaker of this utterance, where each speaker is assigned a sequential capital letter - e.g. "A" for Speaker A, "B" for Speaker B, etc.'
|
|
3137
|
+
),
|
|
3138
|
+
translated_texts: zod3.record(zod3.string(), zod3.string()).optional().describe(
|
|
3139
|
+
'Translations keyed by language code (e.g., `{"es": "Texto traducido", "de": "\xDCbersetzter Text"}`). Only present when `match_original_utterance` is enabled with translation.'
|
|
3140
|
+
)
|
|
3141
|
+
})
|
|
3142
|
+
).nullish().describe(
|
|
3143
|
+
"The original turn-by-turn utterance objects before PII redaction was applied. Same shape as `utterances`. Only returned when `redact_pii_return_unredacted` was set to `true` on the transcription request, otherwise this field is omitted and the `utterances` field remains fully redacted. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
|
|
3144
|
+
),
|
|
2949
3145
|
webhook_auth: zod3.boolean().describe(
|
|
2950
3146
|
"Whether [webhook](https://www.assemblyai.com/docs/deployment/webhooks-for-pre-recorded-audio) authentication details were provided"
|
|
2951
3147
|
),
|
|
@@ -2974,6 +3170,22 @@ var getTranscriptResponse = zod3.object({
|
|
|
2974
3170
|
).nullish().describe(
|
|
2975
3171
|
"An array of temporally-sequential word objects, one for each word in the transcript.\n"
|
|
2976
3172
|
),
|
|
3173
|
+
unredacted_words: zod3.array(
|
|
3174
|
+
zod3.object({
|
|
3175
|
+
confidence: zod3.number().describe("The confidence score for the transcript of this word"),
|
|
3176
|
+
start: zod3.number().describe("The starting time, in milliseconds, for the word"),
|
|
3177
|
+
end: zod3.number().describe("The ending time, in milliseconds, for the word"),
|
|
3178
|
+
text: zod3.string().describe("The text of the word"),
|
|
3179
|
+
channel: zod3.string().nullish().describe(
|
|
3180
|
+
"The channel of the word. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
|
|
3181
|
+
),
|
|
3182
|
+
speaker: zod3.string().nullable().describe(
|
|
3183
|
+
"The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) is enabled, else null"
|
|
3184
|
+
)
|
|
3185
|
+
})
|
|
3186
|
+
).nullish().describe(
|
|
3187
|
+
"The original temporally-sequential word objects before PII redaction was applied. Same shape as `words`. Only returned when `redact_pii_return_unredacted` was set to `true` on the transcription request, otherwise this field is omitted and the `words` field remains fully redacted. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
|
|
3188
|
+
),
|
|
2977
3189
|
acoustic_model: zod3.string().describe("This parameter does not currently have any functionality attached to it."),
|
|
2978
3190
|
custom_topics: zod3.boolean().nullish().describe("This parameter does not currently have any functionality attached to it."),
|
|
2979
3191
|
language_model: zod3.string().describe("This parameter does not currently have any functionality attached to it."),
|
|
@@ -3106,7 +3318,7 @@ var deleteTranscriptResponse = zod3.object({
|
|
|
3106
3318
|
"Customize how words are spelled and formatted using to and from values. See [Custom Spelling](https://www.assemblyai.com/docs/pre-recorded-audio/correct-spelling-of-terms) for more details."
|
|
3107
3319
|
),
|
|
3108
3320
|
disfluencies: zod3.boolean().nullish().describe(
|
|
3109
|
-
'Transcribe [Filler Words](https://www.assemblyai.com/docs/pre-recorded-audio/include-filler-words), like "umm", in your media file; can be true or false'
|
|
3321
|
+
'Transcribe [Filler Words](https://www.assemblyai.com/docs/pre-recorded-audio/include-filler-words), like "umm", in your media file; can be true or false. Supported on Universal-3 Pro and Universal-2.'
|
|
3110
3322
|
),
|
|
3111
3323
|
domain: zod3.string().nullish().describe(
|
|
3112
3324
|
'The domain-specific model applied to the transcript. When set to `"medical-v1"`, [Medical Mode](https://www.assemblyai.com/docs/pre-recorded-audio/medical-mode) was used to improve accuracy for medical terminology.\n'
|
|
@@ -3129,12 +3341,20 @@ var deleteTranscriptResponse = zod3.object({
|
|
|
3129
3341
|
"email_address",
|
|
3130
3342
|
"event",
|
|
3131
3343
|
"filename",
|
|
3344
|
+
"gender",
|
|
3132
3345
|
"gender_sexuality",
|
|
3133
3346
|
"healthcare_number",
|
|
3134
3347
|
"injury",
|
|
3135
3348
|
"ip_address",
|
|
3136
3349
|
"language",
|
|
3137
3350
|
"location",
|
|
3351
|
+
"location_address",
|
|
3352
|
+
"location_address_street",
|
|
3353
|
+
"location_city",
|
|
3354
|
+
"location_coordinate",
|
|
3355
|
+
"location_country",
|
|
3356
|
+
"location_state",
|
|
3357
|
+
"location_zip",
|
|
3138
3358
|
"marital_status",
|
|
3139
3359
|
"medical_condition",
|
|
3140
3360
|
"medical_process",
|
|
@@ -3143,6 +3363,7 @@ var deleteTranscriptResponse = zod3.object({
|
|
|
3143
3363
|
"number_sequence",
|
|
3144
3364
|
"occupation",
|
|
3145
3365
|
"organization",
|
|
3366
|
+
"organization_medical_facility",
|
|
3146
3367
|
"passport_number",
|
|
3147
3368
|
"password",
|
|
3148
3369
|
"person_age",
|
|
@@ -3151,6 +3372,7 @@ var deleteTranscriptResponse = zod3.object({
|
|
|
3151
3372
|
"physical_attribute",
|
|
3152
3373
|
"political_affiliation",
|
|
3153
3374
|
"religion",
|
|
3375
|
+
"sexuality",
|
|
3154
3376
|
"statistics",
|
|
3155
3377
|
"time",
|
|
3156
3378
|
"url",
|
|
@@ -3455,6 +3677,24 @@ var deleteTranscriptResponse = zod3.object({
|
|
|
3455
3677
|
}).optional().describe(
|
|
3456
3678
|
"Specify options for [Automatic Language Detection](https://www.assemblyai.com/docs/pre-recorded-audio/language-detection)."
|
|
3457
3679
|
),
|
|
3680
|
+
metadata: zod3.object({
|
|
3681
|
+
domain_used: zod3.string().nullish().describe(
|
|
3682
|
+
'The domain-specific model that was applied to the transcription (for example, `"medical-v1"` when [Medical Mode](https://www.assemblyai.com/docs/pre-recorded-audio/medical-mode) was applied), or `null` if no domain-specific model was used. Always present when `metadata` is present.\n'
|
|
3683
|
+
),
|
|
3684
|
+
warnings: zod3.array(
|
|
3685
|
+
zod3.object({
|
|
3686
|
+
message: zod3.string().describe("A human-readable description of the warning.")
|
|
3687
|
+
}).describe(
|
|
3688
|
+
"A warning message emitted while processing a transcription request. Warnings are surfaced on the transcript response under `metadata.warnings`.\n"
|
|
3689
|
+
)
|
|
3690
|
+
).optional().describe(
|
|
3691
|
+
"Warning messages emitted while processing the request. Each warning is an object with a human-readable `message`. When there are no warnings to report, this field is omitted from the `metadata` object entirely.\n"
|
|
3692
|
+
)
|
|
3693
|
+
}).describe(
|
|
3694
|
+
"Additional metadata about the transcription returned on the `Transcript` object under `metadata`. Only present when there is information to report \u2014 when all of its fields would be empty, the `metadata` object is omitted from the response entirely.\n"
|
|
3695
|
+
).or(zod3.null()).optional().describe(
|
|
3696
|
+
"Additional metadata about the transcription, including any warnings emitted while processing the request. Only present when there is information to report; if no fields would be populated, `metadata` is omitted from the response entirely.\n"
|
|
3697
|
+
),
|
|
3458
3698
|
multichannel: zod3.boolean().nullish().describe(
|
|
3459
3699
|
"Whether [Multichannel transcription](https://www.assemblyai.com/docs/pre-recorded-audio/transcribe-multiple-audio-channels) was enabled in the transcription request, either true or false"
|
|
3460
3700
|
),
|
|
@@ -3502,12 +3742,20 @@ var deleteTranscriptResponse = zod3.object({
|
|
|
3502
3742
|
"email_address",
|
|
3503
3743
|
"event",
|
|
3504
3744
|
"filename",
|
|
3745
|
+
"gender",
|
|
3505
3746
|
"gender_sexuality",
|
|
3506
3747
|
"healthcare_number",
|
|
3507
3748
|
"injury",
|
|
3508
3749
|
"ip_address",
|
|
3509
3750
|
"language",
|
|
3510
3751
|
"location",
|
|
3752
|
+
"location_address",
|
|
3753
|
+
"location_address_street",
|
|
3754
|
+
"location_city",
|
|
3755
|
+
"location_coordinate",
|
|
3756
|
+
"location_country",
|
|
3757
|
+
"location_state",
|
|
3758
|
+
"location_zip",
|
|
3511
3759
|
"marital_status",
|
|
3512
3760
|
"medical_condition",
|
|
3513
3761
|
"medical_process",
|
|
@@ -3516,6 +3764,7 @@ var deleteTranscriptResponse = zod3.object({
|
|
|
3516
3764
|
"number_sequence",
|
|
3517
3765
|
"occupation",
|
|
3518
3766
|
"organization",
|
|
3767
|
+
"organization_medical_facility",
|
|
3519
3768
|
"passport_number",
|
|
3520
3769
|
"password",
|
|
3521
3770
|
"person_age",
|
|
@@ -3524,6 +3773,7 @@ var deleteTranscriptResponse = zod3.object({
|
|
|
3524
3773
|
"physical_attribute",
|
|
3525
3774
|
"political_affiliation",
|
|
3526
3775
|
"religion",
|
|
3776
|
+
"sexuality",
|
|
3527
3777
|
"statistics",
|
|
3528
3778
|
"time",
|
|
3529
3779
|
"url",
|
|
@@ -3531,12 +3781,17 @@ var deleteTranscriptResponse = zod3.object({
|
|
|
3531
3781
|
"username",
|
|
3532
3782
|
"vehicle_id",
|
|
3533
3783
|
"zodiac_sign"
|
|
3534
|
-
]).describe(
|
|
3784
|
+
]).describe(
|
|
3785
|
+
"The type of PII to redact. See [PII redaction](https://www.assemblyai.com/docs/streaming/pii-redaction) for the full list of policies and their descriptions."
|
|
3786
|
+
)
|
|
3535
3787
|
).nullish().describe(
|
|
3536
3788
|
"The list of PII Redaction policies that were enabled, if PII Redaction is enabled.\nSee [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
|
|
3537
3789
|
),
|
|
3538
3790
|
redact_pii_sub: zod3.enum(["entity_name", "hash"]).optional().describe(
|
|
3539
|
-
"The replacement logic for detected PII, can be `entity_name` or `hash`. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more details."
|
|
3791
|
+
"The replacement logic for detected PII, can be `entity_name` or `hash`. See [PII redaction](https://www.assemblyai.com/docs/streaming/pii-redaction) for more details."
|
|
3792
|
+
),
|
|
3793
|
+
redact_pii_return_unredacted: zod3.boolean().nullish().describe(
|
|
3794
|
+
"Whether the original unredacted transcript was also returned alongside the redacted one. When `true`, the response includes `unredacted_text`, `unredacted_words`, and `unredacted_utterances`. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
|
|
3540
3795
|
),
|
|
3541
3796
|
sentiment_analysis: zod3.boolean().nullish().describe(
|
|
3542
3797
|
"Whether [Sentiment Analysis](https://www.assemblyai.com/docs/speech-understanding/analyze-sentiment-of-speech) is enabled, can be true or false"
|
|
@@ -3673,20 +3928,23 @@ var deleteTranscriptResponse = zod3.object({
|
|
|
3673
3928
|
"The generated summary of the media file, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details."
|
|
3674
3929
|
),
|
|
3675
3930
|
summary_model: zod3.string().nullish().describe(
|
|
3676
|
-
"The Summarization model used to generate the summary,\nif [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts
|
|
3931
|
+
"The Summarization model used to generate the summary,\nif [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details.\n"
|
|
3677
3932
|
),
|
|
3678
3933
|
summary_type: zod3.string().nullish().describe(
|
|
3679
|
-
"The type of summary generated, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts
|
|
3934
|
+
"The type of summary generated, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details."
|
|
3680
3935
|
),
|
|
3681
|
-
remove_audio_tags: zod3.enum(["all"]).describe(
|
|
3682
|
-
|
|
3936
|
+
remove_audio_tags: zod3.enum(["all", "speaker"]).describe(
|
|
3937
|
+
'Universal-3 Pro generates rich transcripts that can include inline annotations such as audio event markers and speaker cues. Set to `"all"` to remove all inline annotations, or `"speaker"` to remove only speaker cues while keeping other annotations.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n'
|
|
3683
3938
|
).or(zod3.null()).optional().describe(
|
|
3684
|
-
|
|
3939
|
+
'Universal-3 Pro generates rich transcripts that can include inline annotations such as audio event markers and speaker cues. Set to `"all"` to remove all inline annotations, or `"speaker"` to remove only speaker cues while keeping other annotations.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n'
|
|
3685
3940
|
),
|
|
3686
3941
|
temperature: zod3.number().nullish().describe(
|
|
3687
3942
|
"The temperature that was used for the model's response. See the [Prompting Guide](https://www.assemblyai.com/docs/pre-recorded-audio/prompting) for more details.\n\nNote: This parameter can only be used with the Universal-3 Pro model.\n"
|
|
3688
3943
|
),
|
|
3689
3944
|
text: zod3.string().nullish().describe("The textual transcript of your media file"),
|
|
3945
|
+
unredacted_text: zod3.string().nullish().describe(
|
|
3946
|
+
"The original textual transcript of your media file before PII redaction was applied. Only returned when `redact_pii_return_unredacted` was set to `true` on the transcription request, otherwise this field is omitted and the `text` field remains fully redacted. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
|
|
3947
|
+
),
|
|
3690
3948
|
throttled: zod3.boolean().nullish().describe(
|
|
3691
3949
|
"True while a request is throttled and false when a request is no longer throttled"
|
|
3692
3950
|
),
|
|
@@ -3723,6 +3981,39 @@ var deleteTranscriptResponse = zod3.object({
|
|
|
3723
3981
|
).nullish().describe(
|
|
3724
3982
|
"When multichannel or speaker_labels is enabled, a list of turn-by-turn utterance objects.\nSee [Speaker diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) and [Multichannel transcription](https://www.assemblyai.com/docs/pre-recorded-audio/transcribe-multiple-audio-channels) for more information.\n"
|
|
3725
3983
|
),
|
|
3984
|
+
unredacted_utterances: zod3.array(
|
|
3985
|
+
zod3.object({
|
|
3986
|
+
confidence: zod3.number().describe("The confidence score for the transcript of this utterance"),
|
|
3987
|
+
start: zod3.number().describe("The starting time, in milliseconds, of the utterance in the audio file"),
|
|
3988
|
+
end: zod3.number().describe("The ending time, in milliseconds, of the utterance in the audio file"),
|
|
3989
|
+
text: zod3.string().describe("The text for this utterance"),
|
|
3990
|
+
words: zod3.array(
|
|
3991
|
+
zod3.object({
|
|
3992
|
+
confidence: zod3.number().describe("The confidence score for the transcript of this word"),
|
|
3993
|
+
start: zod3.number().describe("The starting time, in milliseconds, for the word"),
|
|
3994
|
+
end: zod3.number().describe("The ending time, in milliseconds, for the word"),
|
|
3995
|
+
text: zod3.string().describe("The text of the word"),
|
|
3996
|
+
channel: zod3.string().nullish().describe(
|
|
3997
|
+
"The channel of the word. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
|
|
3998
|
+
),
|
|
3999
|
+
speaker: zod3.string().nullable().describe(
|
|
4000
|
+
"The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) is enabled, else null"
|
|
4001
|
+
)
|
|
4002
|
+
})
|
|
4003
|
+
).describe("The words in the utterance."),
|
|
4004
|
+
channel: zod3.string().nullish().describe(
|
|
4005
|
+
"The channel of this utterance. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
|
|
4006
|
+
),
|
|
4007
|
+
speaker: zod3.string().describe(
|
|
4008
|
+
'The speaker of this utterance, where each speaker is assigned a sequential capital letter - e.g. "A" for Speaker A, "B" for Speaker B, etc.'
|
|
4009
|
+
),
|
|
4010
|
+
translated_texts: zod3.record(zod3.string(), zod3.string()).optional().describe(
|
|
4011
|
+
'Translations keyed by language code (e.g., `{"es": "Texto traducido", "de": "\xDCbersetzter Text"}`). Only present when `match_original_utterance` is enabled with translation.'
|
|
4012
|
+
)
|
|
4013
|
+
})
|
|
4014
|
+
).nullish().describe(
|
|
4015
|
+
"The original turn-by-turn utterance objects before PII redaction was applied. Same shape as `utterances`. Only returned when `redact_pii_return_unredacted` was set to `true` on the transcription request, otherwise this field is omitted and the `utterances` field remains fully redacted. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
|
|
4016
|
+
),
|
|
3726
4017
|
webhook_auth: zod3.boolean().describe(
|
|
3727
4018
|
"Whether [webhook](https://www.assemblyai.com/docs/deployment/webhooks-for-pre-recorded-audio) authentication details were provided"
|
|
3728
4019
|
),
|
|
@@ -3751,6 +4042,22 @@ var deleteTranscriptResponse = zod3.object({
|
|
|
3751
4042
|
).nullish().describe(
|
|
3752
4043
|
"An array of temporally-sequential word objects, one for each word in the transcript.\n"
|
|
3753
4044
|
),
|
|
4045
|
+
unredacted_words: zod3.array(
|
|
4046
|
+
zod3.object({
|
|
4047
|
+
confidence: zod3.number().describe("The confidence score for the transcript of this word"),
|
|
4048
|
+
start: zod3.number().describe("The starting time, in milliseconds, for the word"),
|
|
4049
|
+
end: zod3.number().describe("The ending time, in milliseconds, for the word"),
|
|
4050
|
+
text: zod3.string().describe("The text of the word"),
|
|
4051
|
+
channel: zod3.string().nullish().describe(
|
|
4052
|
+
"The channel of the word. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
|
|
4053
|
+
),
|
|
4054
|
+
speaker: zod3.string().nullable().describe(
|
|
4055
|
+
"The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) is enabled, else null"
|
|
4056
|
+
)
|
|
4057
|
+
})
|
|
4058
|
+
).nullish().describe(
|
|
4059
|
+
"The original temporally-sequential word objects before PII redaction was applied. Same shape as `words`. Only returned when `redact_pii_return_unredacted` was set to `true` on the transcription request, otherwise this field is omitted and the `words` field remains fully redacted. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
|
|
4060
|
+
),
|
|
3754
4061
|
acoustic_model: zod3.string().describe("This parameter does not currently have any functionality attached to it."),
|
|
3755
4062
|
custom_topics: zod3.boolean().nullish().describe("This parameter does not currently have any functionality attached to it."),
|
|
3756
4063
|
language_model: zod3.string().describe("This parameter does not currently have any functionality attached to it."),
|
|
@@ -3906,7 +4213,21 @@ var streamingTranscriberParams = zod4.object({
|
|
|
3906
4213
|
inactivityTimeout: zod4.number().optional().describe("From SDK v3"),
|
|
3907
4214
|
speakerLabels: zod4.boolean().optional().describe("From SDK v3"),
|
|
3908
4215
|
maxSpeakers: zod4.number().optional().describe("From SDK v3"),
|
|
3909
|
-
|
|
4216
|
+
voiceFocus: zod4.unknown().optional().describe("From SDK v3"),
|
|
4217
|
+
voiceFocusThreshold: zod4.number().optional().describe("From SDK v3"),
|
|
4218
|
+
continuousPartials: zod4.boolean().optional().describe("From SDK v3"),
|
|
4219
|
+
interruptionDelay: zod4.number().optional().describe("From SDK v3"),
|
|
4220
|
+
turnLeftPadMs: zod4.number().optional().describe("From SDK v3"),
|
|
4221
|
+
customerSupportAudioCapture: zod4.boolean().optional().describe("From SDK v3"),
|
|
4222
|
+
includePartialTurns: zod4.boolean().optional().describe("From SDK v3"),
|
|
4223
|
+
redactPii: zod4.boolean().optional().describe("From SDK v3"),
|
|
4224
|
+
redactPiiPolicies: zod4.unknown().optional().describe("From SDK v3"),
|
|
4225
|
+
redactPiiSub: zod4.unknown().optional().describe("From SDK v3"),
|
|
4226
|
+
llmGateway: zod4.unknown().optional().describe("From SDK v3"),
|
|
4227
|
+
webhookUrl: zod4.string().optional().describe("From SDK v3"),
|
|
4228
|
+
webhookAuthHeaderName: zod4.string().optional().describe("From SDK v3"),
|
|
4229
|
+
webhookAuthHeaderValue: zod4.string().optional().describe("From SDK v3"),
|
|
4230
|
+
mode: zod4.unknown().describe("From SDK v3")
|
|
3910
4231
|
});
|
|
3911
4232
|
var streamingUpdateConfigParams = zod4.object({
|
|
3912
4233
|
end_utterance_silence_threshold: zod4.number().min(0).max(2e4).optional().describe("The duration threshold in milliseconds"),
|
|
@@ -3918,7 +4239,9 @@ var streamingUpdateConfigParams = zod4.object({
|
|
|
3918
4239
|
format_turns: zod4.boolean().optional().describe("From SDK v3"),
|
|
3919
4240
|
keyterms_prompt: zod4.array(zod4.string()).optional().describe("From SDK v3"),
|
|
3920
4241
|
prompt: zod4.string().optional().describe("From SDK v3"),
|
|
3921
|
-
filter_profanity: zod4.boolean().optional().describe("From SDK v3")
|
|
4242
|
+
filter_profanity: zod4.boolean().optional().describe("From SDK v3"),
|
|
4243
|
+
interruption_delay: zod4.number().optional().describe("From SDK v3"),
|
|
4244
|
+
turn_left_pad_ms: zod4.number().optional().describe("From SDK v3")
|
|
3922
4245
|
});
|
|
3923
4246
|
|
|
3924
4247
|
// src/generated/gladia/api/gladiaControlAPI.zod.ts
|
|
@@ -3955,7 +4278,7 @@ var preRecordedControllerInitPreRecordedJobV2BodyTranslationConfigMatchOriginalU
|
|
|
3955
4278
|
var preRecordedControllerInitPreRecordedJobV2BodyTranslationConfigLipsyncDefault = true;
|
|
3956
4279
|
var preRecordedControllerInitPreRecordedJobV2BodyTranslationConfigContextAdaptationDefault = true;
|
|
3957
4280
|
var preRecordedControllerInitPreRecordedJobV2BodySummarizationConfigTypeDefault = "general";
|
|
3958
|
-
var preRecordedControllerInitPreRecordedJobV2BodyAudioToLlmConfigModelDefault = "openai/gpt-
|
|
4281
|
+
var preRecordedControllerInitPreRecordedJobV2BodyAudioToLlmConfigModelDefault = "openai/gpt-5.4-nano";
|
|
3959
4282
|
var preRecordedControllerInitPreRecordedJobV2BodyLanguageConfigLanguagesDefault = [];
|
|
3960
4283
|
var preRecordedControllerInitPreRecordedJobV2Body = zod5.object({
|
|
3961
4284
|
custom_vocabulary: zod5.boolean().optional().describe(
|
|
@@ -4240,23 +4563,23 @@ var preRecordedControllerInitPreRecordedJobV2Body = zod5.object({
|
|
|
4240
4563
|
"Forces the translation to use informal language forms when available in the target language."
|
|
4241
4564
|
)
|
|
4242
4565
|
}).optional().describe("**[Beta]** Translation configuration, if `translation` is enabled"),
|
|
4243
|
-
summarization: zod5.boolean().optional().describe("
|
|
4566
|
+
summarization: zod5.boolean().optional().describe("Enable summarization for this audio"),
|
|
4244
4567
|
summarization_config: zod5.object({
|
|
4245
4568
|
type: zod5.enum(["general", "bullet_points", "concise"]).describe("The type of summarization to apply").default(preRecordedControllerInitPreRecordedJobV2BodySummarizationConfigTypeDefault).describe("The type of summarization to apply")
|
|
4246
|
-
}).optional().describe("
|
|
4569
|
+
}).optional().describe("Summarization configuration, if `summarization` is enabled"),
|
|
4247
4570
|
named_entity_recognition: zod5.boolean().optional().describe("**[Alpha]** Enable named entity recognition for this audio"),
|
|
4248
4571
|
custom_spelling: zod5.boolean().optional().describe("**[Alpha]** Enable custom spelling for this audio"),
|
|
4249
4572
|
custom_spelling_config: zod5.object({
|
|
4250
4573
|
spelling_dictionary: zod5.record(zod5.string(), zod5.array(zod5.string())).describe("The list of spelling applied on the audio transcription")
|
|
4251
4574
|
}).optional().describe("**[Alpha]** Custom spelling configuration, if `custom_spelling` is enabled"),
|
|
4252
4575
|
sentiment_analysis: zod5.boolean().optional().describe("Enable sentiment analysis for this audio"),
|
|
4253
|
-
audio_to_llm: zod5.boolean().optional().describe("
|
|
4576
|
+
audio_to_llm: zod5.boolean().optional().describe("Enable audio to LLM processing for this audio"),
|
|
4254
4577
|
audio_to_llm_config: zod5.object({
|
|
4255
4578
|
prompts: zod5.array(zod5.array(zod5.unknown())).min(1).describe("The list of prompts applied on the audio transcription"),
|
|
4256
4579
|
model: zod5.string().default(preRecordedControllerInitPreRecordedJobV2BodyAudioToLlmConfigModelDefault).describe(
|
|
4257
4580
|
"The model to use for the prompt execution. You can find the list of supported models [here](https://openrouter.ai/models)."
|
|
4258
4581
|
)
|
|
4259
|
-
}).optional().describe("
|
|
4582
|
+
}).optional().describe("Audio to LLM configuration, if `audio_to_llm` is enabled"),
|
|
4260
4583
|
pii_redaction: zod5.boolean().optional().describe("Enable PII redaction for this audio"),
|
|
4261
4584
|
pii_redaction_config: zod5.object({
|
|
4262
4585
|
entity_types: zod5.enum([
|
|
@@ -4498,7 +4821,7 @@ var preRecordedControllerGetPreRecordedJobsV2ResponseItemsItemRequestParamsTrans
|
|
|
4498
4821
|
var preRecordedControllerGetPreRecordedJobsV2ResponseItemsItemRequestParamsTranslationConfigLipsyncDefault = true;
|
|
4499
4822
|
var preRecordedControllerGetPreRecordedJobsV2ResponseItemsItemRequestParamsTranslationConfigContextAdaptationDefault = true;
|
|
4500
4823
|
var preRecordedControllerGetPreRecordedJobsV2ResponseItemsItemRequestParamsSummarizationConfigTypeDefault = "general";
|
|
4501
|
-
var preRecordedControllerGetPreRecordedJobsV2ResponseItemsItemRequestParamsAudioToLlmConfigModelDefault = "openai/gpt-
|
|
4824
|
+
var preRecordedControllerGetPreRecordedJobsV2ResponseItemsItemRequestParamsAudioToLlmConfigModelDefault = "openai/gpt-5.4-nano";
|
|
4502
4825
|
var preRecordedControllerGetPreRecordedJobsV2ResponseItemsItemRequestParamsLanguageConfigLanguagesDefault = [];
|
|
4503
4826
|
var preRecordedControllerGetPreRecordedJobsV2ResponseItemsItemResultTranscriptionUtterancesItemChannelMin = 0;
|
|
4504
4827
|
var preRecordedControllerGetPreRecordedJobsV2ResponseItemsItemResultTranscriptionUtterancesItemSpeakerMin = 0;
|
|
@@ -4842,12 +5165,12 @@ var preRecordedControllerGetPreRecordedJobsV2Response = zod5.object({
|
|
|
4842
5165
|
"Forces the translation to use informal language forms when available in the target language."
|
|
4843
5166
|
)
|
|
4844
5167
|
}).optional().describe("**[Beta]** Translation configuration, if `translation` is enabled"),
|
|
4845
|
-
summarization: zod5.boolean().optional().describe("
|
|
5168
|
+
summarization: zod5.boolean().optional().describe("Enable summarization for this audio"),
|
|
4846
5169
|
summarization_config: zod5.object({
|
|
4847
5170
|
type: zod5.enum(["general", "bullet_points", "concise"]).describe("The type of summarization to apply").default(
|
|
4848
5171
|
preRecordedControllerGetPreRecordedJobsV2ResponseItemsItemRequestParamsSummarizationConfigTypeDefault
|
|
4849
5172
|
).describe("The type of summarization to apply")
|
|
4850
|
-
}).optional().describe("
|
|
5173
|
+
}).optional().describe("Summarization configuration, if `summarization` is enabled"),
|
|
4851
5174
|
named_entity_recognition: zod5.boolean().optional().describe("**[Alpha]** Enable named entity recognition for this audio"),
|
|
4852
5175
|
custom_spelling: zod5.boolean().optional().describe("**[Alpha]** Enable custom spelling for this audio"),
|
|
4853
5176
|
custom_spelling_config: zod5.object({
|
|
@@ -4856,7 +5179,7 @@ var preRecordedControllerGetPreRecordedJobsV2Response = zod5.object({
|
|
|
4856
5179
|
"**[Alpha]** Custom spelling configuration, if `custom_spelling` is enabled"
|
|
4857
5180
|
),
|
|
4858
5181
|
sentiment_analysis: zod5.boolean().optional().describe("Enable sentiment analysis for this audio"),
|
|
4859
|
-
audio_to_llm: zod5.boolean().optional().describe("
|
|
5182
|
+
audio_to_llm: zod5.boolean().optional().describe("Enable audio to LLM processing for this audio"),
|
|
4860
5183
|
audio_to_llm_config: zod5.object({
|
|
4861
5184
|
prompts: zod5.array(zod5.array(zod5.unknown())).min(1).describe("The list of prompts applied on the audio transcription"),
|
|
4862
5185
|
model: zod5.string().default(
|
|
@@ -4864,7 +5187,7 @@ var preRecordedControllerGetPreRecordedJobsV2Response = zod5.object({
|
|
|
4864
5187
|
).describe(
|
|
4865
5188
|
"The model to use for the prompt execution. You can find the list of supported models [here](https://openrouter.ai/models)."
|
|
4866
5189
|
)
|
|
4867
|
-
}).optional().describe("
|
|
5190
|
+
}).optional().describe("Audio to LLM configuration, if `audio_to_llm` is enabled"),
|
|
4868
5191
|
pii_redaction: zod5.boolean().optional().describe("Enable PII redaction for this audio"),
|
|
4869
5192
|
pii_redaction_config: zod5.object({
|
|
4870
5193
|
entity_types: zod5.enum([
|
|
@@ -5989,7 +6312,7 @@ var preRecordedControllerGetPreRecordedJobV2ResponseRequestParamsTranslationConf
|
|
|
5989
6312
|
var preRecordedControllerGetPreRecordedJobV2ResponseRequestParamsTranslationConfigLipsyncDefault = true;
|
|
5990
6313
|
var preRecordedControllerGetPreRecordedJobV2ResponseRequestParamsTranslationConfigContextAdaptationDefault = true;
|
|
5991
6314
|
var preRecordedControllerGetPreRecordedJobV2ResponseRequestParamsSummarizationConfigTypeDefault = "general";
|
|
5992
|
-
var preRecordedControllerGetPreRecordedJobV2ResponseRequestParamsAudioToLlmConfigModelDefault = "openai/gpt-
|
|
6315
|
+
var preRecordedControllerGetPreRecordedJobV2ResponseRequestParamsAudioToLlmConfigModelDefault = "openai/gpt-5.4-nano";
|
|
5993
6316
|
var preRecordedControllerGetPreRecordedJobV2ResponseRequestParamsLanguageConfigLanguagesDefault = [];
|
|
5994
6317
|
var preRecordedControllerGetPreRecordedJobV2ResponseResultTranscriptionUtterancesItemChannelMin = 0;
|
|
5995
6318
|
var preRecordedControllerGetPreRecordedJobV2ResponseResultTranscriptionUtterancesItemSpeakerMin = 0;
|
|
@@ -6326,19 +6649,19 @@ var preRecordedControllerGetPreRecordedJobV2Response = zod5.object({
|
|
|
6326
6649
|
"Forces the translation to use informal language forms when available in the target language."
|
|
6327
6650
|
)
|
|
6328
6651
|
}).optional().describe("**[Beta]** Translation configuration, if `translation` is enabled"),
|
|
6329
|
-
summarization: zod5.boolean().optional().describe("
|
|
6652
|
+
summarization: zod5.boolean().optional().describe("Enable summarization for this audio"),
|
|
6330
6653
|
summarization_config: zod5.object({
|
|
6331
6654
|
type: zod5.enum(["general", "bullet_points", "concise"]).describe("The type of summarization to apply").default(
|
|
6332
6655
|
preRecordedControllerGetPreRecordedJobV2ResponseRequestParamsSummarizationConfigTypeDefault
|
|
6333
6656
|
).describe("The type of summarization to apply")
|
|
6334
|
-
}).optional().describe("
|
|
6657
|
+
}).optional().describe("Summarization configuration, if `summarization` is enabled"),
|
|
6335
6658
|
named_entity_recognition: zod5.boolean().optional().describe("**[Alpha]** Enable named entity recognition for this audio"),
|
|
6336
6659
|
custom_spelling: zod5.boolean().optional().describe("**[Alpha]** Enable custom spelling for this audio"),
|
|
6337
6660
|
custom_spelling_config: zod5.object({
|
|
6338
6661
|
spelling_dictionary: zod5.record(zod5.string(), zod5.array(zod5.string())).describe("The list of spelling applied on the audio transcription")
|
|
6339
6662
|
}).optional().describe("**[Alpha]** Custom spelling configuration, if `custom_spelling` is enabled"),
|
|
6340
6663
|
sentiment_analysis: zod5.boolean().optional().describe("Enable sentiment analysis for this audio"),
|
|
6341
|
-
audio_to_llm: zod5.boolean().optional().describe("
|
|
6664
|
+
audio_to_llm: zod5.boolean().optional().describe("Enable audio to LLM processing for this audio"),
|
|
6342
6665
|
audio_to_llm_config: zod5.object({
|
|
6343
6666
|
prompts: zod5.array(zod5.array(zod5.unknown())).min(1).describe("The list of prompts applied on the audio transcription"),
|
|
6344
6667
|
model: zod5.string().default(
|
|
@@ -6346,7 +6669,7 @@ var preRecordedControllerGetPreRecordedJobV2Response = zod5.object({
|
|
|
6346
6669
|
).describe(
|
|
6347
6670
|
"The model to use for the prompt execution. You can find the list of supported models [here](https://openrouter.ai/models)."
|
|
6348
6671
|
)
|
|
6349
|
-
}).optional().describe("
|
|
6672
|
+
}).optional().describe("Audio to LLM configuration, if `audio_to_llm` is enabled"),
|
|
6350
6673
|
pii_redaction: zod5.boolean().optional().describe("Enable PII redaction for this audio"),
|
|
6351
6674
|
pii_redaction_config: zod5.object({
|
|
6352
6675
|
entity_types: zod5.enum([
|
|
@@ -7448,7 +7771,7 @@ var transcriptionControllerInitPreRecordedJobV2BodyTranslationConfigMatchOrigina
|
|
|
7448
7771
|
var transcriptionControllerInitPreRecordedJobV2BodyTranslationConfigLipsyncDefault = true;
|
|
7449
7772
|
var transcriptionControllerInitPreRecordedJobV2BodyTranslationConfigContextAdaptationDefault = true;
|
|
7450
7773
|
var transcriptionControllerInitPreRecordedJobV2BodySummarizationConfigTypeDefault = "general";
|
|
7451
|
-
var transcriptionControllerInitPreRecordedJobV2BodyAudioToLlmConfigModelDefault = "openai/gpt-
|
|
7774
|
+
var transcriptionControllerInitPreRecordedJobV2BodyAudioToLlmConfigModelDefault = "openai/gpt-5.4-nano";
|
|
7452
7775
|
var transcriptionControllerInitPreRecordedJobV2BodyLanguageConfigLanguagesDefault = [];
|
|
7453
7776
|
var transcriptionControllerInitPreRecordedJobV2Body = zod5.object({
|
|
7454
7777
|
custom_vocabulary: zod5.boolean().optional().describe(
|
|
@@ -7737,23 +8060,23 @@ var transcriptionControllerInitPreRecordedJobV2Body = zod5.object({
|
|
|
7737
8060
|
"Forces the translation to use informal language forms when available in the target language."
|
|
7738
8061
|
)
|
|
7739
8062
|
}).optional().describe("**[Beta]** Translation configuration, if `translation` is enabled"),
|
|
7740
|
-
summarization: zod5.boolean().optional().describe("
|
|
8063
|
+
summarization: zod5.boolean().optional().describe("Enable summarization for this audio"),
|
|
7741
8064
|
summarization_config: zod5.object({
|
|
7742
8065
|
type: zod5.enum(["general", "bullet_points", "concise"]).describe("The type of summarization to apply").default(transcriptionControllerInitPreRecordedJobV2BodySummarizationConfigTypeDefault).describe("The type of summarization to apply")
|
|
7743
|
-
}).optional().describe("
|
|
8066
|
+
}).optional().describe("Summarization configuration, if `summarization` is enabled"),
|
|
7744
8067
|
named_entity_recognition: zod5.boolean().optional().describe("**[Alpha]** Enable named entity recognition for this audio"),
|
|
7745
8068
|
custom_spelling: zod5.boolean().optional().describe("**[Alpha]** Enable custom spelling for this audio"),
|
|
7746
8069
|
custom_spelling_config: zod5.object({
|
|
7747
8070
|
spelling_dictionary: zod5.record(zod5.string(), zod5.array(zod5.string())).describe("The list of spelling applied on the audio transcription")
|
|
7748
8071
|
}).optional().describe("**[Alpha]** Custom spelling configuration, if `custom_spelling` is enabled"),
|
|
7749
8072
|
sentiment_analysis: zod5.boolean().optional().describe("Enable sentiment analysis for this audio"),
|
|
7750
|
-
audio_to_llm: zod5.boolean().optional().describe("
|
|
8073
|
+
audio_to_llm: zod5.boolean().optional().describe("Enable audio to LLM processing for this audio"),
|
|
7751
8074
|
audio_to_llm_config: zod5.object({
|
|
7752
8075
|
prompts: zod5.array(zod5.array(zod5.unknown())).min(1).describe("The list of prompts applied on the audio transcription"),
|
|
7753
8076
|
model: zod5.string().default(transcriptionControllerInitPreRecordedJobV2BodyAudioToLlmConfigModelDefault).describe(
|
|
7754
8077
|
"The model to use for the prompt execution. You can find the list of supported models [here](https://openrouter.ai/models)."
|
|
7755
8078
|
)
|
|
7756
|
-
}).optional().describe("
|
|
8079
|
+
}).optional().describe("Audio to LLM configuration, if `audio_to_llm` is enabled"),
|
|
7757
8080
|
pii_redaction: zod5.boolean().optional().describe("Enable PII redaction for this audio"),
|
|
7758
8081
|
pii_redaction_config: zod5.object({
|
|
7759
8082
|
entity_types: zod5.enum([
|
|
@@ -7998,7 +8321,7 @@ var transcriptionControllerListV2ResponseItemsItemRequestParamsTranslationConfig
|
|
|
7998
8321
|
var transcriptionControllerListV2ResponseItemsItemRequestParamsTranslationConfigLipsyncDefault = true;
|
|
7999
8322
|
var transcriptionControllerListV2ResponseItemsItemRequestParamsTranslationConfigContextAdaptationDefault = true;
|
|
8000
8323
|
var transcriptionControllerListV2ResponseItemsItemRequestParamsSummarizationConfigTypeDefault = "general";
|
|
8001
|
-
var transcriptionControllerListV2ResponseItemsItemRequestParamsAudioToLlmConfigModelDefault = "openai/gpt-
|
|
8324
|
+
var transcriptionControllerListV2ResponseItemsItemRequestParamsAudioToLlmConfigModelDefault = "openai/gpt-5.4-nano";
|
|
8002
8325
|
var transcriptionControllerListV2ResponseItemsItemRequestParamsLanguageConfigLanguagesDefault = [];
|
|
8003
8326
|
var transcriptionControllerListV2ResponseItemsItemResultTranscriptionUtterancesItemChannelMin = 0;
|
|
8004
8327
|
var transcriptionControllerListV2ResponseItemsItemResultTranscriptionUtterancesItemSpeakerMin = 0;
|
|
@@ -8387,12 +8710,12 @@ var transcriptionControllerListV2Response = zod5.object({
|
|
|
8387
8710
|
"Forces the translation to use informal language forms when available in the target language."
|
|
8388
8711
|
)
|
|
8389
8712
|
}).optional().describe("**[Beta]** Translation configuration, if `translation` is enabled"),
|
|
8390
|
-
summarization: zod5.boolean().optional().describe("
|
|
8713
|
+
summarization: zod5.boolean().optional().describe("Enable summarization for this audio"),
|
|
8391
8714
|
summarization_config: zod5.object({
|
|
8392
8715
|
type: zod5.enum(["general", "bullet_points", "concise"]).describe("The type of summarization to apply").default(
|
|
8393
8716
|
transcriptionControllerListV2ResponseItemsItemRequestParamsSummarizationConfigTypeDefault
|
|
8394
8717
|
).describe("The type of summarization to apply")
|
|
8395
|
-
}).optional().describe("
|
|
8718
|
+
}).optional().describe("Summarization configuration, if `summarization` is enabled"),
|
|
8396
8719
|
named_entity_recognition: zod5.boolean().optional().describe("**[Alpha]** Enable named entity recognition for this audio"),
|
|
8397
8720
|
custom_spelling: zod5.boolean().optional().describe("**[Alpha]** Enable custom spelling for this audio"),
|
|
8398
8721
|
custom_spelling_config: zod5.object({
|
|
@@ -8401,7 +8724,7 @@ var transcriptionControllerListV2Response = zod5.object({
|
|
|
8401
8724
|
"**[Alpha]** Custom spelling configuration, if `custom_spelling` is enabled"
|
|
8402
8725
|
),
|
|
8403
8726
|
sentiment_analysis: zod5.boolean().optional().describe("Enable sentiment analysis for this audio"),
|
|
8404
|
-
audio_to_llm: zod5.boolean().optional().describe("
|
|
8727
|
+
audio_to_llm: zod5.boolean().optional().describe("Enable audio to LLM processing for this audio"),
|
|
8405
8728
|
audio_to_llm_config: zod5.object({
|
|
8406
8729
|
prompts: zod5.array(zod5.array(zod5.unknown())).min(1).describe("The list of prompts applied on the audio transcription"),
|
|
8407
8730
|
model: zod5.string().default(
|
|
@@ -8409,7 +8732,7 @@ var transcriptionControllerListV2Response = zod5.object({
|
|
|
8409
8732
|
).describe(
|
|
8410
8733
|
"The model to use for the prompt execution. You can find the list of supported models [here](https://openrouter.ai/models)."
|
|
8411
8734
|
)
|
|
8412
|
-
}).optional().describe("
|
|
8735
|
+
}).optional().describe("Audio to LLM configuration, if `audio_to_llm` is enabled"),
|
|
8413
8736
|
pii_redaction: zod5.boolean().optional().describe("Enable PII redaction for this audio"),
|
|
8414
8737
|
pii_redaction_config: zod5.object({
|
|
8415
8738
|
entity_types: zod5.enum([
|
|
@@ -10715,7 +11038,7 @@ var transcriptionControllerGetTranscriptV2ResponseRequestParamsTranslationConfig
|
|
|
10715
11038
|
var transcriptionControllerGetTranscriptV2ResponseRequestParamsTranslationConfigLipsyncDefault = true;
|
|
10716
11039
|
var transcriptionControllerGetTranscriptV2ResponseRequestParamsTranslationConfigContextAdaptationDefault = true;
|
|
10717
11040
|
var transcriptionControllerGetTranscriptV2ResponseRequestParamsSummarizationConfigTypeDefault = "general";
|
|
10718
|
-
var transcriptionControllerGetTranscriptV2ResponseRequestParamsAudioToLlmConfigModelDefault = "openai/gpt-
|
|
11041
|
+
var transcriptionControllerGetTranscriptV2ResponseRequestParamsAudioToLlmConfigModelDefault = "openai/gpt-5.4-nano";
|
|
10719
11042
|
var transcriptionControllerGetTranscriptV2ResponseRequestParamsLanguageConfigLanguagesDefault = [];
|
|
10720
11043
|
var transcriptionControllerGetTranscriptV2ResponseResultTranscriptionUtterancesItemChannelMin = 0;
|
|
10721
11044
|
var transcriptionControllerGetTranscriptV2ResponseResultTranscriptionUtterancesItemSpeakerMin = 0;
|
|
@@ -11098,19 +11421,19 @@ var transcriptionControllerGetTranscriptV2Response = zod5.discriminatedUnion("ki
|
|
|
11098
11421
|
"Forces the translation to use informal language forms when available in the target language."
|
|
11099
11422
|
)
|
|
11100
11423
|
}).optional().describe("**[Beta]** Translation configuration, if `translation` is enabled"),
|
|
11101
|
-
summarization: zod5.boolean().optional().describe("
|
|
11424
|
+
summarization: zod5.boolean().optional().describe("Enable summarization for this audio"),
|
|
11102
11425
|
summarization_config: zod5.object({
|
|
11103
11426
|
type: zod5.enum(["general", "bullet_points", "concise"]).describe("The type of summarization to apply").default(
|
|
11104
11427
|
transcriptionControllerGetTranscriptV2ResponseRequestParamsSummarizationConfigTypeDefault
|
|
11105
11428
|
).describe("The type of summarization to apply")
|
|
11106
|
-
}).optional().describe("
|
|
11429
|
+
}).optional().describe("Summarization configuration, if `summarization` is enabled"),
|
|
11107
11430
|
named_entity_recognition: zod5.boolean().optional().describe("**[Alpha]** Enable named entity recognition for this audio"),
|
|
11108
11431
|
custom_spelling: zod5.boolean().optional().describe("**[Alpha]** Enable custom spelling for this audio"),
|
|
11109
11432
|
custom_spelling_config: zod5.object({
|
|
11110
11433
|
spelling_dictionary: zod5.record(zod5.string(), zod5.array(zod5.string())).describe("The list of spelling applied on the audio transcription")
|
|
11111
11434
|
}).optional().describe("**[Alpha]** Custom spelling configuration, if `custom_spelling` is enabled"),
|
|
11112
11435
|
sentiment_analysis: zod5.boolean().optional().describe("Enable sentiment analysis for this audio"),
|
|
11113
|
-
audio_to_llm: zod5.boolean().optional().describe("
|
|
11436
|
+
audio_to_llm: zod5.boolean().optional().describe("Enable audio to LLM processing for this audio"),
|
|
11114
11437
|
audio_to_llm_config: zod5.object({
|
|
11115
11438
|
prompts: zod5.array(zod5.array(zod5.unknown())).min(1).describe("The list of prompts applied on the audio transcription"),
|
|
11116
11439
|
model: zod5.string().default(
|
|
@@ -11118,7 +11441,7 @@ var transcriptionControllerGetTranscriptV2Response = zod5.discriminatedUnion("ki
|
|
|
11118
11441
|
).describe(
|
|
11119
11442
|
"The model to use for the prompt execution. You can find the list of supported models [here](https://openrouter.ai/models)."
|
|
11120
11443
|
)
|
|
11121
|
-
}).optional().describe("
|
|
11444
|
+
}).optional().describe("Audio to LLM configuration, if `audio_to_llm` is enabled"),
|
|
11122
11445
|
pii_redaction: zod5.boolean().optional().describe("Enable PII redaction for this audio"),
|
|
11123
11446
|
pii_redaction_config: zod5.object({
|
|
11124
11447
|
entity_types: zod5.enum([
|
|
@@ -13809,7 +14132,7 @@ var historyControllerGetListV1ResponseItemsItemRequestParamsTranslationConfigMat
|
|
|
13809
14132
|
var historyControllerGetListV1ResponseItemsItemRequestParamsTranslationConfigLipsyncDefault = true;
|
|
13810
14133
|
var historyControllerGetListV1ResponseItemsItemRequestParamsTranslationConfigContextAdaptationDefault = true;
|
|
13811
14134
|
var historyControllerGetListV1ResponseItemsItemRequestParamsSummarizationConfigTypeDefault = "general";
|
|
13812
|
-
var historyControllerGetListV1ResponseItemsItemRequestParamsAudioToLlmConfigModelDefault = "openai/gpt-
|
|
14135
|
+
var historyControllerGetListV1ResponseItemsItemRequestParamsAudioToLlmConfigModelDefault = "openai/gpt-5.4-nano";
|
|
13813
14136
|
var historyControllerGetListV1ResponseItemsItemRequestParamsLanguageConfigLanguagesDefault = [];
|
|
13814
14137
|
var historyControllerGetListV1ResponseItemsItemResultTranscriptionUtterancesItemChannelMin = 0;
|
|
13815
14138
|
var historyControllerGetListV1ResponseItemsItemResultTranscriptionUtterancesItemSpeakerMin = 0;
|
|
@@ -14198,12 +14521,12 @@ var historyControllerGetListV1Response = zod5.object({
|
|
|
14198
14521
|
"Forces the translation to use informal language forms when available in the target language."
|
|
14199
14522
|
)
|
|
14200
14523
|
}).optional().describe("**[Beta]** Translation configuration, if `translation` is enabled"),
|
|
14201
|
-
summarization: zod5.boolean().optional().describe("
|
|
14524
|
+
summarization: zod5.boolean().optional().describe("Enable summarization for this audio"),
|
|
14202
14525
|
summarization_config: zod5.object({
|
|
14203
14526
|
type: zod5.enum(["general", "bullet_points", "concise"]).describe("The type of summarization to apply").default(
|
|
14204
14527
|
historyControllerGetListV1ResponseItemsItemRequestParamsSummarizationConfigTypeDefault
|
|
14205
14528
|
).describe("The type of summarization to apply")
|
|
14206
|
-
}).optional().describe("
|
|
14529
|
+
}).optional().describe("Summarization configuration, if `summarization` is enabled"),
|
|
14207
14530
|
named_entity_recognition: zod5.boolean().optional().describe("**[Alpha]** Enable named entity recognition for this audio"),
|
|
14208
14531
|
custom_spelling: zod5.boolean().optional().describe("**[Alpha]** Enable custom spelling for this audio"),
|
|
14209
14532
|
custom_spelling_config: zod5.object({
|
|
@@ -14212,7 +14535,7 @@ var historyControllerGetListV1Response = zod5.object({
|
|
|
14212
14535
|
"**[Alpha]** Custom spelling configuration, if `custom_spelling` is enabled"
|
|
14213
14536
|
),
|
|
14214
14537
|
sentiment_analysis: zod5.boolean().optional().describe("Enable sentiment analysis for this audio"),
|
|
14215
|
-
audio_to_llm: zod5.boolean().optional().describe("
|
|
14538
|
+
audio_to_llm: zod5.boolean().optional().describe("Enable audio to LLM processing for this audio"),
|
|
14216
14539
|
audio_to_llm_config: zod5.object({
|
|
14217
14540
|
prompts: zod5.array(zod5.array(zod5.unknown())).min(1).describe("The list of prompts applied on the audio transcription"),
|
|
14218
14541
|
model: zod5.string().default(
|
|
@@ -14220,7 +14543,7 @@ var historyControllerGetListV1Response = zod5.object({
|
|
|
14220
14543
|
).describe(
|
|
14221
14544
|
"The model to use for the prompt execution. You can find the list of supported models [here](https://openrouter.ai/models)."
|
|
14222
14545
|
)
|
|
14223
|
-
}).optional().describe("
|
|
14546
|
+
}).optional().describe("Audio to LLM configuration, if `audio_to_llm` is enabled"),
|
|
14224
14547
|
pii_redaction: zod5.boolean().optional().describe("Enable PII redaction for this audio"),
|
|
14225
14548
|
pii_redaction_config: zod5.object({
|
|
14226
14549
|
entity_types: zod5.enum([
|
|
@@ -19673,6 +19996,7 @@ var createRealtimeClientSecretBodySessionAudioOutputSpeedDefault = 1;
|
|
|
19673
19996
|
var createRealtimeClientSecretBodySessionAudioOutputSpeedMin = 0.25;
|
|
19674
19997
|
var createRealtimeClientSecretBodySessionAudioOutputSpeedMax = 1.5;
|
|
19675
19998
|
var createRealtimeClientSecretBodySessionToolChoiceDefault = "auto";
|
|
19999
|
+
var createRealtimeClientSecretBodySessionReasoningEffortDefault = "low";
|
|
19676
20000
|
var createRealtimeClientSecretBodySessionTruncationRetentionRatioMin = 0;
|
|
19677
20001
|
var createRealtimeClientSecretBodySessionTruncationRetentionRatioMax = 1;
|
|
19678
20002
|
var createRealtimeClientSecretBodySessionTruncationTokenLimitsPostInstructionsMin = 0;
|
|
@@ -19703,6 +20027,7 @@ var createRealtimeClientSecretBody = zod6.object({
|
|
|
19703
20027
|
zod6.enum([
|
|
19704
20028
|
"gpt-realtime",
|
|
19705
20029
|
"gpt-realtime-1.5",
|
|
20030
|
+
"gpt-realtime-2",
|
|
19706
20031
|
"gpt-realtime-2025-08-28",
|
|
19707
20032
|
"gpt-4o-realtime-preview",
|
|
19708
20033
|
"gpt-4o-realtime-preview-2024-10-01",
|
|
@@ -19743,16 +20068,20 @@ var createRealtimeClientSecretBody = zod6.object({
|
|
|
19743
20068
|
"gpt-4o-mini-transcribe",
|
|
19744
20069
|
"gpt-4o-mini-transcribe-2025-12-15",
|
|
19745
20070
|
"gpt-4o-transcribe",
|
|
19746
|
-
"gpt-4o-transcribe-diarize"
|
|
20071
|
+
"gpt-4o-transcribe-diarize",
|
|
20072
|
+
"gpt-realtime-whisper"
|
|
19747
20073
|
])
|
|
19748
20074
|
).optional().describe(
|
|
19749
|
-
"The model to use for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`,
|
|
20075
|
+
"The model to use for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`, `gpt-4o-transcribe-diarize`, and `gpt-realtime-whisper`. Use `gpt-4o-transcribe-diarize` when you need diarization with speaker labels.\n"
|
|
19750
20076
|
),
|
|
19751
20077
|
language: zod6.string().optional().describe(
|
|
19752
20078
|
"The language of the input audio. Supplying the input language in\n[ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`) format\nwill improve accuracy and latency.\n"
|
|
19753
20079
|
),
|
|
19754
20080
|
prompt: zod6.string().optional().describe(
|
|
19755
|
-
'An optional text to guide the model\'s style or continue a previous audio\nsegment.\nFor `whisper-1`, the [prompt is a list of keywords](/docs/guides/speech-to-text#prompting).\nFor `gpt-4o-transcribe` models (excluding `gpt-4o-transcribe-diarize`), the prompt is a free text string, for example "expect words related to technology".\n'
|
|
20081
|
+
'An optional text to guide the model\'s style or continue a previous audio\nsegment.\nFor `whisper-1`, the [prompt is a list of keywords](/docs/guides/speech-to-text#prompting).\nFor `gpt-4o-transcribe` models (excluding `gpt-4o-transcribe-diarize`), the prompt is a free text string, for example "expect words related to technology".\nPrompt is not supported with `gpt-realtime-whisper` in GA Realtime sessions.\n'
|
|
20082
|
+
),
|
|
20083
|
+
delay: zod6.enum(["minimal", "low", "medium", "high", "xhigh"]).optional().describe(
|
|
20084
|
+
"Controls how long the model waits before emitting transcription text.\nHigher values can improve transcription accuracy at the cost of latency.\nOnly supported with `gpt-realtime-whisper` in GA Realtime sessions.\n"
|
|
19756
20085
|
)
|
|
19757
20086
|
}).optional(),
|
|
19758
20087
|
noise_reduction: zod6.object({
|
|
@@ -19819,7 +20148,7 @@ var createRealtimeClientSecretBody = zod6.object({
|
|
|
19819
20148
|
"Server-side semantic turn detection which uses a model to determine when the user has finished speaking."
|
|
19820
20149
|
)
|
|
19821
20150
|
]).describe(
|
|
19822
|
-
'Configuration for turn detection, ether Server VAD or Semantic VAD. This can be set to `null` to turn off, in which case the client must manually trigger model response.\n\nServer VAD means that the model will detect the start and end of speech based on audio volume and respond at the end of user speech.\n\nSemantic VAD is more advanced and uses a turn detection model (in conjunction with VAD) to semantically estimate whether the user has finished speaking, then dynamically sets a timeout based on this probability. For example, if user audio trails off with "uhhm", the model will score a low probability of turn end and wait longer for the user to continue speaking. This can be useful for more natural conversations, but may have a higher latency.\n'
|
|
20151
|
+
'Configuration for turn detection, ether Server VAD or Semantic VAD. This can be set to `null` to turn off, in which case the client must manually trigger model response.\n\nServer VAD means that the model will detect the start and end of speech based on audio volume and respond at the end of user speech.\n\nSemantic VAD is more advanced and uses a turn detection model (in conjunction with VAD) to semantically estimate whether the user has finished speaking, then dynamically sets a timeout based on this probability. For example, if user audio trails off with "uhhm", the model will score a low probability of turn end and wait longer for the user to continue speaking. This can be useful for more natural conversations, but may have a higher latency.\n\nFor `gpt-realtime-whisper` transcription sessions, turn detection must be\nset to `null`; VAD is not supported.\n'
|
|
19823
20152
|
).or(zod6.null()).optional()
|
|
19824
20153
|
}).optional(),
|
|
19825
20154
|
output: zod6.object({
|
|
@@ -19892,7 +20221,7 @@ var createRealtimeClientSecretBody = zod6.object({
|
|
|
19892
20221
|
server_label: zod6.string().describe(
|
|
19893
20222
|
"A label for this MCP server, used to identify it in tool calls.\n"
|
|
19894
20223
|
),
|
|
19895
|
-
server_url: zod6.string().optional().describe(
|
|
20224
|
+
server_url: zod6.string().url().optional().describe(
|
|
19896
20225
|
"The URL for the MCP server. One of `server_url` or `connector_id` must be\nprovided.\n"
|
|
19897
20226
|
),
|
|
19898
20227
|
connector_id: zod6.enum([
|
|
@@ -19970,6 +20299,16 @@ var createRealtimeClientSecretBody = zod6.object({
|
|
|
19970
20299
|
).default(createRealtimeClientSecretBodySessionToolChoiceDefault).describe(
|
|
19971
20300
|
"How the model chooses tools. Provide one of the string modes or force a specific\nfunction/MCP tool.\n"
|
|
19972
20301
|
),
|
|
20302
|
+
parallel_tool_calls: zod6.boolean().optional().describe(
|
|
20303
|
+
"Whether the model may call multiple tools in parallel. Only supported by\nreasoning Realtime models such as `gpt-realtime-2`.\n"
|
|
20304
|
+
),
|
|
20305
|
+
reasoning: zod6.object({
|
|
20306
|
+
effort: zod6.enum(["minimal", "low", "medium", "high", "xhigh"]).default(createRealtimeClientSecretBodySessionReasoningEffortDefault).describe(
|
|
20307
|
+
"Constrains effort on reasoning for reasoning-capable Realtime models such as\n`gpt-realtime-2`.\n"
|
|
20308
|
+
)
|
|
20309
|
+
}).optional().describe(
|
|
20310
|
+
"Configuration for reasoning-capable Realtime models such as `gpt-realtime-2`.\n"
|
|
20311
|
+
),
|
|
19973
20312
|
max_output_tokens: zod6.number().or(zod6.enum(["inf"])).optional().describe(
|
|
19974
20313
|
"Maximum number of output tokens for a single assistant response,\ninclusive of tool calls. Provide an integer between 1 and 4096 to\nlimit output tokens, or `inf` for the maximum available tokens for a\ngiven model. Defaults to `inf`.\n"
|
|
19975
20314
|
),
|
|
@@ -20009,7 +20348,7 @@ var createRealtimeClientSecretBody = zod6.object({
|
|
|
20009
20348
|
).or(
|
|
20010
20349
|
zod6.object({
|
|
20011
20350
|
type: zod6.enum(["input_image"]).describe("The type of the input item. Always `input_image`."),
|
|
20012
|
-
image_url: zod6.string().describe(
|
|
20351
|
+
image_url: zod6.string().url().describe(
|
|
20013
20352
|
"The URL of the image to be sent to the model. A fully qualified URL or base64 encoded image in a data URL."
|
|
20014
20353
|
).or(zod6.null()).optional(),
|
|
20015
20354
|
file_id: zod6.string().describe("The ID of the file to be sent to the model.").or(zod6.null()).optional(),
|
|
@@ -20023,7 +20362,7 @@ var createRealtimeClientSecretBody = zod6.object({
|
|
|
20023
20362
|
file_id: zod6.string().describe("The ID of the file to be sent to the model.").or(zod6.null()).optional(),
|
|
20024
20363
|
filename: zod6.string().optional().describe("The name of the file to be sent to the model."),
|
|
20025
20364
|
file_data: zod6.string().optional().describe("The content of the file to be sent to the model.\n"),
|
|
20026
|
-
file_url: zod6.string().optional().describe("The URL of the file to be sent to the model."),
|
|
20365
|
+
file_url: zod6.string().url().optional().describe("The URL of the file to be sent to the model."),
|
|
20027
20366
|
detail: zod6.enum(["low", "high"]).optional()
|
|
20028
20367
|
}).describe("A file input to the model.")
|
|
20029
20368
|
)
|
|
@@ -20059,16 +20398,20 @@ var createRealtimeClientSecretBody = zod6.object({
|
|
|
20059
20398
|
"gpt-4o-mini-transcribe",
|
|
20060
20399
|
"gpt-4o-mini-transcribe-2025-12-15",
|
|
20061
20400
|
"gpt-4o-transcribe",
|
|
20062
|
-
"gpt-4o-transcribe-diarize"
|
|
20401
|
+
"gpt-4o-transcribe-diarize",
|
|
20402
|
+
"gpt-realtime-whisper"
|
|
20063
20403
|
])
|
|
20064
20404
|
).optional().describe(
|
|
20065
|
-
"The model to use for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`,
|
|
20405
|
+
"The model to use for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`, `gpt-4o-transcribe-diarize`, and `gpt-realtime-whisper`. Use `gpt-4o-transcribe-diarize` when you need diarization with speaker labels.\n"
|
|
20066
20406
|
),
|
|
20067
20407
|
language: zod6.string().optional().describe(
|
|
20068
20408
|
"The language of the input audio. Supplying the input language in\n[ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`) format\nwill improve accuracy and latency.\n"
|
|
20069
20409
|
),
|
|
20070
20410
|
prompt: zod6.string().optional().describe(
|
|
20071
|
-
'An optional text to guide the model\'s style or continue a previous audio\nsegment.\nFor `whisper-1`, the [prompt is a list of keywords](/docs/guides/speech-to-text#prompting).\nFor `gpt-4o-transcribe` models (excluding `gpt-4o-transcribe-diarize`), the prompt is a free text string, for example "expect words related to technology".\n'
|
|
20411
|
+
'An optional text to guide the model\'s style or continue a previous audio\nsegment.\nFor `whisper-1`, the [prompt is a list of keywords](/docs/guides/speech-to-text#prompting).\nFor `gpt-4o-transcribe` models (excluding `gpt-4o-transcribe-diarize`), the prompt is a free text string, for example "expect words related to technology".\nPrompt is not supported with `gpt-realtime-whisper` in GA Realtime sessions.\n'
|
|
20412
|
+
),
|
|
20413
|
+
delay: zod6.enum(["minimal", "low", "medium", "high", "xhigh"]).optional().describe(
|
|
20414
|
+
"Controls how long the model waits before emitting transcription text.\nHigher values can improve transcription accuracy at the cost of latency.\nOnly supported with `gpt-realtime-whisper` in GA Realtime sessions.\n"
|
|
20072
20415
|
)
|
|
20073
20416
|
}).optional(),
|
|
20074
20417
|
noise_reduction: zod6.object({
|
|
@@ -20135,7 +20478,7 @@ var createRealtimeClientSecretBody = zod6.object({
|
|
|
20135
20478
|
"Server-side semantic turn detection which uses a model to determine when the user has finished speaking."
|
|
20136
20479
|
)
|
|
20137
20480
|
]).describe(
|
|
20138
|
-
'Configuration for turn detection, ether Server VAD or Semantic VAD. This can be set to `null` to turn off, in which case the client must manually trigger model response.\n\nServer VAD means that the model will detect the start and end of speech based on audio volume and respond at the end of user speech.\n\nSemantic VAD is more advanced and uses a turn detection model (in conjunction with VAD) to semantically estimate whether the user has finished speaking, then dynamically sets a timeout based on this probability. For example, if user audio trails off with "uhhm", the model will score a low probability of turn end and wait longer for the user to continue speaking. This can be useful for more natural conversations, but may have a higher latency.\n'
|
|
20481
|
+
'Configuration for turn detection, ether Server VAD or Semantic VAD. This can be set to `null` to turn off, in which case the client must manually trigger model response.\n\nServer VAD means that the model will detect the start and end of speech based on audio volume and respond at the end of user speech.\n\nSemantic VAD is more advanced and uses a turn detection model (in conjunction with VAD) to semantically estimate whether the user has finished speaking, then dynamically sets a timeout based on this probability. For example, if user audio trails off with "uhhm", the model will score a low probability of turn end and wait longer for the user to continue speaking. This can be useful for more natural conversations, but may have a higher latency.\n\nFor `gpt-realtime-whisper` transcription sessions, turn detection must be\nset to `null`; VAD is not supported.\n'
|
|
20139
20482
|
).or(zod6.null()).optional()
|
|
20140
20483
|
}).optional()
|
|
20141
20484
|
}).optional().describe("Configuration for input and output audio.\n"),
|
|
@@ -20161,23 +20504,21 @@ var createRealtimeClientSecretResponseSessionAudioOutputSpeedDefault = 1;
|
|
|
20161
20504
|
var createRealtimeClientSecretResponseSessionAudioOutputSpeedMin = 0.25;
|
|
20162
20505
|
var createRealtimeClientSecretResponseSessionAudioOutputSpeedMax = 1.5;
|
|
20163
20506
|
var createRealtimeClientSecretResponseSessionToolChoiceDefault = "auto";
|
|
20507
|
+
var createRealtimeClientSecretResponseSessionReasoningEffortDefault = "low";
|
|
20164
20508
|
var createRealtimeClientSecretResponseSessionTruncationRetentionRatioMin = 0;
|
|
20165
20509
|
var createRealtimeClientSecretResponseSessionTruncationRetentionRatioMax = 1;
|
|
20166
20510
|
var createRealtimeClientSecretResponseSessionTruncationTokenLimitsPostInstructionsMin = 0;
|
|
20167
20511
|
var createRealtimeClientSecretResponse = zod6.object({
|
|
20168
20512
|
value: zod6.string().describe("The generated client secret value."),
|
|
20169
20513
|
expires_at: zod6.number().describe("Expiration timestamp for the client secret, in seconds since epoch."),
|
|
20170
|
-
session: zod6.
|
|
20514
|
+
session: zod6.union([
|
|
20171
20515
|
zod6.object({
|
|
20172
|
-
client_secret: zod6.object({
|
|
20173
|
-
value: zod6.string().describe(
|
|
20174
|
-
"Ephemeral key usable in client environments to authenticate connections to the Realtime API. Use this in client-side environments rather than a standard API token, which should only be used server-side.\n"
|
|
20175
|
-
),
|
|
20176
|
-
expires_at: zod6.number().describe(
|
|
20177
|
-
"Timestamp for when the token expires. Currently, all tokens expire\nafter one minute.\n"
|
|
20178
|
-
)
|
|
20179
|
-
}).describe("Ephemeral key returned by the API."),
|
|
20180
20516
|
type: zod6.enum(["realtime"]).describe("The type of session to create. Always `realtime` for the Realtime API.\n"),
|
|
20517
|
+
id: zod6.string().describe(
|
|
20518
|
+
"Unique identifier for the session that looks like `sess_1234567890abcdef`.\n"
|
|
20519
|
+
),
|
|
20520
|
+
object: zod6.enum(["realtime.session"]).describe("The object type. Always `realtime.session`."),
|
|
20521
|
+
expires_at: zod6.number().optional().describe("Expiration timestamp for the session, in seconds since epoch."),
|
|
20181
20522
|
output_modalities: zod6.array(zod6.enum(["text", "audio"])).default(createRealtimeClientSecretResponseSessionOutputModalitiesDefault).describe(
|
|
20182
20523
|
'The set of modalities the model can respond with. It defaults to `["audio"]`, indicating\nthat the model will respond with audio plus a transcript. `["text"]` can be used to make\nthe model respond with text only. It is not possible to request both `text` and `audio` at the same time.\n'
|
|
20183
20524
|
),
|
|
@@ -20185,6 +20526,7 @@ var createRealtimeClientSecretResponse = zod6.object({
|
|
|
20185
20526
|
zod6.enum([
|
|
20186
20527
|
"gpt-realtime",
|
|
20187
20528
|
"gpt-realtime-1.5",
|
|
20529
|
+
"gpt-realtime-2",
|
|
20188
20530
|
"gpt-realtime-2025-08-28",
|
|
20189
20531
|
"gpt-4o-realtime-preview",
|
|
20190
20532
|
"gpt-4o-realtime-preview-2024-10-01",
|
|
@@ -20207,15 +20549,15 @@ var createRealtimeClientSecretResponse = zod6.object({
|
|
|
20207
20549
|
audio: zod6.object({
|
|
20208
20550
|
input: zod6.object({
|
|
20209
20551
|
format: zod6.object({
|
|
20210
|
-
type: zod6.enum(["audio/pcm"]).describe("The audio format. Always `audio/pcm`."),
|
|
20211
|
-
rate: zod6.literal(24e3).describe("The sample rate of the audio. Always `24000`.")
|
|
20552
|
+
type: zod6.enum(["audio/pcm"]).optional().describe("The audio format. Always `audio/pcm`."),
|
|
20553
|
+
rate: zod6.literal(24e3).optional().describe("The sample rate of the audio. Always `24000`.")
|
|
20212
20554
|
}).describe("The PCM audio format. Only a 24kHz sample rate is supported.").or(
|
|
20213
20555
|
zod6.object({
|
|
20214
|
-
type: zod6.enum(["audio/pcmu"]).describe("The audio format. Always `audio/pcmu`.")
|
|
20556
|
+
type: zod6.enum(["audio/pcmu"]).optional().describe("The audio format. Always `audio/pcmu`.")
|
|
20215
20557
|
}).describe("The G.711 \u03BC-law format.")
|
|
20216
20558
|
).or(
|
|
20217
20559
|
zod6.object({
|
|
20218
|
-
type: zod6.enum(["audio/pcma"]).describe("The audio format. Always `audio/pcma`.")
|
|
20560
|
+
type: zod6.enum(["audio/pcma"]).optional().describe("The audio format. Always `audio/pcma`.")
|
|
20219
20561
|
}).describe("The G.711 A-law format.")
|
|
20220
20562
|
).optional(),
|
|
20221
20563
|
transcription: zod6.object({
|
|
@@ -20225,20 +20567,19 @@ var createRealtimeClientSecretResponse = zod6.object({
|
|
|
20225
20567
|
"gpt-4o-mini-transcribe",
|
|
20226
20568
|
"gpt-4o-mini-transcribe-2025-12-15",
|
|
20227
20569
|
"gpt-4o-transcribe",
|
|
20228
|
-
"gpt-4o-transcribe-diarize"
|
|
20570
|
+
"gpt-4o-transcribe-diarize",
|
|
20571
|
+
"gpt-realtime-whisper"
|
|
20229
20572
|
])
|
|
20230
20573
|
).optional().describe(
|
|
20231
|
-
"The model
|
|
20232
|
-
),
|
|
20233
|
-
language: zod6.string().optional().describe(
|
|
20234
|
-
"The language of the input audio. Supplying the input language in\n[ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`) format\nwill improve accuracy and latency.\n"
|
|
20574
|
+
"The model used for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`, `gpt-4o-transcribe-diarize`, and `gpt-realtime-whisper`.\n"
|
|
20235
20575
|
),
|
|
20576
|
+
language: zod6.string().optional().describe("The language of the input audio.\n"),
|
|
20236
20577
|
prompt: zod6.string().optional().describe(
|
|
20237
|
-
|
|
20578
|
+
"The prompt configured for input audio transcription, when present.\n"
|
|
20238
20579
|
)
|
|
20239
20580
|
}).optional(),
|
|
20240
20581
|
noise_reduction: zod6.object({
|
|
20241
|
-
type: zod6.enum(["near_field", "far_field"]).describe(
|
|
20582
|
+
type: zod6.enum(["near_field", "far_field"]).optional().describe(
|
|
20242
20583
|
"Type of noise reduction. `near_field` is for close-talking microphones such as headphones, `far_field` is for far-field microphones such as laptop or conference room microphones.\n"
|
|
20243
20584
|
)
|
|
20244
20585
|
}).optional().describe(
|
|
@@ -20301,20 +20642,20 @@ var createRealtimeClientSecretResponse = zod6.object({
|
|
|
20301
20642
|
"Server-side semantic turn detection which uses a model to determine when the user has finished speaking."
|
|
20302
20643
|
)
|
|
20303
20644
|
]).describe(
|
|
20304
|
-
'Configuration for turn detection, ether Server VAD or Semantic VAD. This can be set to `null` to turn off, in which case the client must manually trigger model response.\n\nServer VAD means that the model will detect the start and end of speech based on audio volume and respond at the end of user speech.\n\nSemantic VAD is more advanced and uses a turn detection model (in conjunction with VAD) to semantically estimate whether the user has finished speaking, then dynamically sets a timeout based on this probability. For example, if user audio trails off with "uhhm", the model will score a low probability of turn end and wait longer for the user to continue speaking. This can be useful for more natural conversations, but may have a higher latency.\n'
|
|
20645
|
+
'Configuration for turn detection, ether Server VAD or Semantic VAD. This can be set to `null` to turn off, in which case the client must manually trigger model response.\n\nServer VAD means that the model will detect the start and end of speech based on audio volume and respond at the end of user speech.\n\nSemantic VAD is more advanced and uses a turn detection model (in conjunction with VAD) to semantically estimate whether the user has finished speaking, then dynamically sets a timeout based on this probability. For example, if user audio trails off with "uhhm", the model will score a low probability of turn end and wait longer for the user to continue speaking. This can be useful for more natural conversations, but may have a higher latency.\n\nFor `gpt-realtime-whisper` transcription sessions, turn detection must be\nset to `null`; VAD is not supported.\n'
|
|
20305
20646
|
).or(zod6.null()).optional()
|
|
20306
20647
|
}).optional(),
|
|
20307
20648
|
output: zod6.object({
|
|
20308
20649
|
format: zod6.object({
|
|
20309
|
-
type: zod6.enum(["audio/pcm"]).describe("The audio format. Always `audio/pcm`."),
|
|
20310
|
-
rate: zod6.literal(24e3).describe("The sample rate of the audio. Always `24000`.")
|
|
20650
|
+
type: zod6.enum(["audio/pcm"]).optional().describe("The audio format. Always `audio/pcm`."),
|
|
20651
|
+
rate: zod6.literal(24e3).optional().describe("The sample rate of the audio. Always `24000`.")
|
|
20311
20652
|
}).describe("The PCM audio format. Only a 24kHz sample rate is supported.").or(
|
|
20312
20653
|
zod6.object({
|
|
20313
|
-
type: zod6.enum(["audio/pcmu"]).describe("The audio format. Always `audio/pcmu`.")
|
|
20654
|
+
type: zod6.enum(["audio/pcmu"]).optional().describe("The audio format. Always `audio/pcmu`.")
|
|
20314
20655
|
}).describe("The G.711 \u03BC-law format.")
|
|
20315
20656
|
).or(
|
|
20316
20657
|
zod6.object({
|
|
20317
|
-
type: zod6.enum(["audio/pcma"]).describe("The audio format. Always `audio/pcma`.")
|
|
20658
|
+
type: zod6.enum(["audio/pcma"]).optional().describe("The audio format. Always `audio/pcma`.")
|
|
20318
20659
|
}).describe("The G.711 A-law format.")
|
|
20319
20660
|
).optional(),
|
|
20320
20661
|
voice: zod6.string().or(
|
|
@@ -20358,7 +20699,7 @@ var createRealtimeClientSecretResponse = zod6.object({
|
|
|
20358
20699
|
).or(zod6.null()).optional(),
|
|
20359
20700
|
tools: zod6.array(
|
|
20360
20701
|
zod6.object({
|
|
20361
|
-
type: zod6.enum(["function"]).describe("The type of the tool, i.e. `function`."),
|
|
20702
|
+
type: zod6.enum(["function"]).optional().describe("The type of the tool, i.e. `function`."),
|
|
20362
20703
|
name: zod6.string().optional().describe("The name of the function."),
|
|
20363
20704
|
description: zod6.string().optional().describe(
|
|
20364
20705
|
"The description of the function, including guidance on when and how\nto call it, and guidance about what to tell the user when calling\n(if anything).\n"
|
|
@@ -20370,7 +20711,7 @@ var createRealtimeClientSecretResponse = zod6.object({
|
|
|
20370
20711
|
server_label: zod6.string().describe(
|
|
20371
20712
|
"A label for this MCP server, used to identify it in tool calls.\n"
|
|
20372
20713
|
),
|
|
20373
|
-
server_url: zod6.string().optional().describe(
|
|
20714
|
+
server_url: zod6.string().url().optional().describe(
|
|
20374
20715
|
"The URL for the MCP server. One of `server_url` or `connector_id` must be\nprovided.\n"
|
|
20375
20716
|
),
|
|
20376
20717
|
connector_id: zod6.enum([
|
|
@@ -20382,7 +20723,7 @@ var createRealtimeClientSecretResponse = zod6.object({
|
|
|
20382
20723
|
"connector_outlookcalendar",
|
|
20383
20724
|
"connector_outlookemail",
|
|
20384
20725
|
"connector_sharepoint"
|
|
20385
|
-
]).describe(
|
|
20726
|
+
]).optional().describe(
|
|
20386
20727
|
"Identifier for service connectors, like those available in ChatGPT. One of\n`server_url` or `connector_id` must be provided. Learn more about service\nconnectors [here](/docs/guides/tools-remote-mcp#connectors).\n\nCurrently supported `connector_id` values are:\n\n- Dropbox: `connector_dropbox`\n- Gmail: `connector_gmail`\n- Google Calendar: `connector_googlecalendar`\n- Google Drive: `connector_googledrive`\n- Microsoft Teams: `connector_microsoftteams`\n- Outlook Calendar: `connector_outlookcalendar`\n- Outlook Email: `connector_outlookemail`\n- SharePoint: `connector_sharepoint`\n"
|
|
20387
20728
|
),
|
|
20388
20729
|
authorization: zod6.string().optional().describe(
|
|
@@ -20448,6 +20789,13 @@ var createRealtimeClientSecretResponse = zod6.object({
|
|
|
20448
20789
|
).default(createRealtimeClientSecretResponseSessionToolChoiceDefault).describe(
|
|
20449
20790
|
"How the model chooses tools. Provide one of the string modes or force a specific\nfunction/MCP tool.\n"
|
|
20450
20791
|
),
|
|
20792
|
+
reasoning: zod6.object({
|
|
20793
|
+
effort: zod6.enum(["minimal", "low", "medium", "high", "xhigh"]).default(createRealtimeClientSecretResponseSessionReasoningEffortDefault).describe(
|
|
20794
|
+
"Constrains effort on reasoning for reasoning-capable Realtime models such as\n`gpt-realtime-2`.\n"
|
|
20795
|
+
)
|
|
20796
|
+
}).optional().describe(
|
|
20797
|
+
"Configuration for reasoning-capable Realtime models such as `gpt-realtime-2`.\n"
|
|
20798
|
+
),
|
|
20451
20799
|
max_output_tokens: zod6.number().or(zod6.enum(["inf"])).optional().describe(
|
|
20452
20800
|
"Maximum number of output tokens for a single assistant response,\ninclusive of tool calls. Provide an integer between 1 and 4096 to\nlimit output tokens, or `inf` for the maximum available tokens for a\ngiven model. Defaults to `inf`.\n"
|
|
20453
20801
|
),
|
|
@@ -20487,7 +20835,7 @@ var createRealtimeClientSecretResponse = zod6.object({
|
|
|
20487
20835
|
).or(
|
|
20488
20836
|
zod6.object({
|
|
20489
20837
|
type: zod6.enum(["input_image"]).describe("The type of the input item. Always `input_image`."),
|
|
20490
|
-
image_url: zod6.string().describe(
|
|
20838
|
+
image_url: zod6.string().url().describe(
|
|
20491
20839
|
"The URL of the image to be sent to the model. A fully qualified URL or base64 encoded image in a data URL."
|
|
20492
20840
|
).or(zod6.null()).optional(),
|
|
20493
20841
|
file_id: zod6.string().describe("The ID of the file to be sent to the model.").or(zod6.null()).optional(),
|
|
@@ -20501,8 +20849,8 @@ var createRealtimeClientSecretResponse = zod6.object({
|
|
|
20501
20849
|
file_id: zod6.string().describe("The ID of the file to be sent to the model.").or(zod6.null()).optional(),
|
|
20502
20850
|
filename: zod6.string().optional().describe("The name of the file to be sent to the model."),
|
|
20503
20851
|
file_data: zod6.string().optional().describe("The content of the file to be sent to the model.\n"),
|
|
20504
|
-
file_url: zod6.string().optional().describe("The URL of the file to be sent to the model."),
|
|
20505
|
-
detail: zod6.enum(["low", "high"])
|
|
20852
|
+
file_url: zod6.string().url().optional().describe("The URL of the file to be sent to the model."),
|
|
20853
|
+
detail: zod6.enum(["low", "high"]).optional()
|
|
20506
20854
|
}).describe("A file input to the model.")
|
|
20507
20855
|
)
|
|
20508
20856
|
).describe(
|
|
@@ -20511,9 +20859,7 @@ var createRealtimeClientSecretResponse = zod6.object({
|
|
|
20511
20859
|
}).describe(
|
|
20512
20860
|
"Reference to a prompt template and its variables.\n[Learn more](/docs/guides/text?api-mode=responses#reusable-prompts).\n"
|
|
20513
20861
|
).or(zod6.null()).optional()
|
|
20514
|
-
}).describe(
|
|
20515
|
-
"A new Realtime session configuration, with an ephemeral key. Default TTL\nfor keys is one minute.\n"
|
|
20516
|
-
),
|
|
20862
|
+
}).describe("A Realtime session configuration object.\n"),
|
|
20517
20863
|
zod6.object({
|
|
20518
20864
|
type: zod6.enum(["transcription"]).describe(
|
|
20519
20865
|
"The type of session. Always `transcription` for transcription sessions.\n"
|
|
@@ -20529,15 +20875,15 @@ var createRealtimeClientSecretResponse = zod6.object({
|
|
|
20529
20875
|
audio: zod6.object({
|
|
20530
20876
|
input: zod6.object({
|
|
20531
20877
|
format: zod6.object({
|
|
20532
|
-
type: zod6.enum(["audio/pcm"]).describe("The audio format. Always `audio/pcm`."),
|
|
20533
|
-
rate: zod6.literal(24e3).describe("The sample rate of the audio. Always `24000`.")
|
|
20878
|
+
type: zod6.enum(["audio/pcm"]).optional().describe("The audio format. Always `audio/pcm`."),
|
|
20879
|
+
rate: zod6.literal(24e3).optional().describe("The sample rate of the audio. Always `24000`.")
|
|
20534
20880
|
}).describe("The PCM audio format. Only a 24kHz sample rate is supported.").or(
|
|
20535
20881
|
zod6.object({
|
|
20536
|
-
type: zod6.enum(["audio/pcmu"]).describe("The audio format. Always `audio/pcmu`.")
|
|
20882
|
+
type: zod6.enum(["audio/pcmu"]).optional().describe("The audio format. Always `audio/pcmu`.")
|
|
20537
20883
|
}).describe("The G.711 \u03BC-law format.")
|
|
20538
20884
|
).or(
|
|
20539
20885
|
zod6.object({
|
|
20540
|
-
type: zod6.enum(["audio/pcma"]).describe("The audio format. Always `audio/pcma`.")
|
|
20886
|
+
type: zod6.enum(["audio/pcma"]).optional().describe("The audio format. Always `audio/pcma`.")
|
|
20541
20887
|
}).describe("The G.711 A-law format.")
|
|
20542
20888
|
).optional(),
|
|
20543
20889
|
transcription: zod6.object({
|
|
@@ -20547,20 +20893,19 @@ var createRealtimeClientSecretResponse = zod6.object({
|
|
|
20547
20893
|
"gpt-4o-mini-transcribe",
|
|
20548
20894
|
"gpt-4o-mini-transcribe-2025-12-15",
|
|
20549
20895
|
"gpt-4o-transcribe",
|
|
20550
|
-
"gpt-4o-transcribe-diarize"
|
|
20896
|
+
"gpt-4o-transcribe-diarize",
|
|
20897
|
+
"gpt-realtime-whisper"
|
|
20551
20898
|
])
|
|
20552
20899
|
).optional().describe(
|
|
20553
|
-
"The model
|
|
20554
|
-
),
|
|
20555
|
-
language: zod6.string().optional().describe(
|
|
20556
|
-
"The language of the input audio. Supplying the input language in\n[ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`) format\nwill improve accuracy and latency.\n"
|
|
20900
|
+
"The model used for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`, `gpt-4o-transcribe-diarize`, and `gpt-realtime-whisper`.\n"
|
|
20557
20901
|
),
|
|
20902
|
+
language: zod6.string().optional().describe("The language of the input audio.\n"),
|
|
20558
20903
|
prompt: zod6.string().optional().describe(
|
|
20559
|
-
|
|
20904
|
+
"The prompt configured for input audio transcription, when present.\n"
|
|
20560
20905
|
)
|
|
20561
20906
|
}).optional(),
|
|
20562
20907
|
noise_reduction: zod6.object({
|
|
20563
|
-
type: zod6.enum(["near_field", "far_field"]).describe(
|
|
20908
|
+
type: zod6.enum(["near_field", "far_field"]).optional().describe(
|
|
20564
20909
|
"Type of noise reduction. `near_field` is for close-talking microphones such as headphones, `far_field` is for far-field microphones such as laptop or conference room microphones.\n"
|
|
20565
20910
|
)
|
|
20566
20911
|
}).optional().describe("Configuration for input audio noise reduction.\n"),
|
|
@@ -20577,8 +20922,10 @@ var createRealtimeClientSecretResponse = zod6.object({
|
|
|
20577
20922
|
silence_duration_ms: zod6.number().optional().describe(
|
|
20578
20923
|
"Duration of silence to detect speech stop (in milliseconds). Defaults\nto 500ms. With shorter values the model will respond more quickly,\nbut may jump in on short pauses from the user.\n"
|
|
20579
20924
|
)
|
|
20580
|
-
}).
|
|
20581
|
-
"Configuration for turn detection. Can be set to `null` to turn off. Server\nVAD means that the model will detect the start and end of speech based on\naudio volume and respond at the end of user speech.\n"
|
|
20925
|
+
}).describe(
|
|
20926
|
+
"Configuration for turn detection. Can be set to `null` to turn off. Server\nVAD means that the model will detect the start and end of speech based on\naudio volume and respond at the end of user speech. For `gpt-realtime-whisper`, this must be `null`; VAD is not supported.\n"
|
|
20927
|
+
).or(zod6.null()).optional().describe(
|
|
20928
|
+
"Configuration for turn detection. For `gpt-realtime-whisper`, this must be `null`; VAD is not supported.\n"
|
|
20582
20929
|
)
|
|
20583
20930
|
}).optional()
|
|
20584
20931
|
}).optional().describe("Configuration for input audio for the session.\n")
|
|
@@ -20714,7 +21061,7 @@ var createRealtimeSessionBody = zod6.object({
|
|
|
20714
21061
|
).or(
|
|
20715
21062
|
zod6.object({
|
|
20716
21063
|
type: zod6.enum(["input_image"]).describe("The type of the input item. Always `input_image`."),
|
|
20717
|
-
image_url: zod6.string().describe(
|
|
21064
|
+
image_url: zod6.string().url().describe(
|
|
20718
21065
|
"The URL of the image to be sent to the model. A fully qualified URL or base64 encoded image in a data URL."
|
|
20719
21066
|
).or(zod6.null()).optional(),
|
|
20720
21067
|
file_id: zod6.string().describe("The ID of the file to be sent to the model.").or(zod6.null()).optional(),
|
|
@@ -20728,7 +21075,7 @@ var createRealtimeSessionBody = zod6.object({
|
|
|
20728
21075
|
file_id: zod6.string().describe("The ID of the file to be sent to the model.").or(zod6.null()).optional(),
|
|
20729
21076
|
filename: zod6.string().optional().describe("The name of the file to be sent to the model."),
|
|
20730
21077
|
file_data: zod6.string().optional().describe("The content of the file to be sent to the model.\n"),
|
|
20731
|
-
file_url: zod6.string().optional().describe("The URL of the file to be sent to the model."),
|
|
21078
|
+
file_url: zod6.string().url().optional().describe("The URL of the file to be sent to the model."),
|
|
20732
21079
|
detail: zod6.enum(["low", "high"]).optional()
|
|
20733
21080
|
}).describe("A file input to the model.")
|
|
20734
21081
|
)
|
|
@@ -20776,17 +21123,14 @@ var createRealtimeSessionResponse = zod6.object({
|
|
|
20776
21123
|
"gpt-4o-mini-transcribe",
|
|
20777
21124
|
"gpt-4o-mini-transcribe-2025-12-15",
|
|
20778
21125
|
"gpt-4o-transcribe",
|
|
20779
|
-
"gpt-4o-transcribe-diarize"
|
|
21126
|
+
"gpt-4o-transcribe-diarize",
|
|
21127
|
+
"gpt-realtime-whisper"
|
|
20780
21128
|
])
|
|
20781
21129
|
).optional().describe(
|
|
20782
|
-
"The model
|
|
21130
|
+
"The model used for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`, `gpt-4o-transcribe-diarize`, and `gpt-realtime-whisper`.\n"
|
|
20783
21131
|
),
|
|
20784
|
-
language: zod6.string().optional().describe(
|
|
20785
|
-
|
|
20786
|
-
),
|
|
20787
|
-
prompt: zod6.string().optional().describe(
|
|
20788
|
-
'An optional text to guide the model\'s style or continue a previous audio\nsegment.\nFor `whisper-1`, the [prompt is a list of keywords](/docs/guides/speech-to-text#prompting).\nFor `gpt-4o-transcribe` models (excluding `gpt-4o-transcribe-diarize`), the prompt is a free text string, for example "expect words related to technology".\n'
|
|
20789
|
-
)
|
|
21132
|
+
language: zod6.string().optional().describe("The language of the input audio.\n"),
|
|
21133
|
+
prompt: zod6.string().optional().describe("The prompt configured for input audio transcription, when present.\n")
|
|
20790
21134
|
}).optional(),
|
|
20791
21135
|
noise_reduction: zod6.object({
|
|
20792
21136
|
type: zod6.enum(["near_field", "far_field"]).optional().describe(
|
|
@@ -20911,16 +21255,20 @@ var createRealtimeTranscriptionSessionBody = zod6.object({
|
|
|
20911
21255
|
"gpt-4o-mini-transcribe",
|
|
20912
21256
|
"gpt-4o-mini-transcribe-2025-12-15",
|
|
20913
21257
|
"gpt-4o-transcribe",
|
|
20914
|
-
"gpt-4o-transcribe-diarize"
|
|
21258
|
+
"gpt-4o-transcribe-diarize",
|
|
21259
|
+
"gpt-realtime-whisper"
|
|
20915
21260
|
])
|
|
20916
21261
|
).optional().describe(
|
|
20917
|
-
"The model to use for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`,
|
|
21262
|
+
"The model to use for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`, `gpt-4o-transcribe-diarize`, and `gpt-realtime-whisper`. Use `gpt-4o-transcribe-diarize` when you need diarization with speaker labels.\n"
|
|
20918
21263
|
),
|
|
20919
21264
|
language: zod6.string().optional().describe(
|
|
20920
21265
|
"The language of the input audio. Supplying the input language in\n[ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`) format\nwill improve accuracy and latency.\n"
|
|
20921
21266
|
),
|
|
20922
21267
|
prompt: zod6.string().optional().describe(
|
|
20923
|
-
'An optional text to guide the model\'s style or continue a previous audio\nsegment.\nFor `whisper-1`, the [prompt is a list of keywords](/docs/guides/speech-to-text#prompting).\nFor `gpt-4o-transcribe` models (excluding `gpt-4o-transcribe-diarize`), the prompt is a free text string, for example "expect words related to technology".\n'
|
|
21268
|
+
'An optional text to guide the model\'s style or continue a previous audio\nsegment.\nFor `whisper-1`, the [prompt is a list of keywords](/docs/guides/speech-to-text#prompting).\nFor `gpt-4o-transcribe` models (excluding `gpt-4o-transcribe-diarize`), the prompt is a free text string, for example "expect words related to technology".\nPrompt is not supported with `gpt-realtime-whisper` in GA Realtime sessions.\n'
|
|
21269
|
+
),
|
|
21270
|
+
delay: zod6.enum(["minimal", "low", "medium", "high", "xhigh"]).optional().describe(
|
|
21271
|
+
"Controls how long the model waits before emitting transcription text.\nHigher values can improve transcription accuracy at the cost of latency.\nOnly supported with `gpt-realtime-whisper` in GA Realtime sessions.\n"
|
|
20924
21272
|
)
|
|
20925
21273
|
}).optional(),
|
|
20926
21274
|
include: zod6.array(zod6.enum(["item.input_audio_transcription.logprobs"])).optional().describe(
|
|
@@ -20949,17 +21297,14 @@ var createRealtimeTranscriptionSessionResponse = zod6.object({
|
|
|
20949
21297
|
"gpt-4o-mini-transcribe",
|
|
20950
21298
|
"gpt-4o-mini-transcribe-2025-12-15",
|
|
20951
21299
|
"gpt-4o-transcribe",
|
|
20952
|
-
"gpt-4o-transcribe-diarize"
|
|
21300
|
+
"gpt-4o-transcribe-diarize",
|
|
21301
|
+
"gpt-realtime-whisper"
|
|
20953
21302
|
])
|
|
20954
21303
|
).optional().describe(
|
|
20955
|
-
"The model
|
|
20956
|
-
),
|
|
20957
|
-
language: zod6.string().optional().describe(
|
|
20958
|
-
"The language of the input audio. Supplying the input language in\n[ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`) format\nwill improve accuracy and latency.\n"
|
|
21304
|
+
"The model used for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`, `gpt-4o-transcribe-diarize`, and `gpt-realtime-whisper`.\n"
|
|
20959
21305
|
),
|
|
20960
|
-
|
|
20961
|
-
|
|
20962
|
-
)
|
|
21306
|
+
language: zod6.string().optional().describe("The language of the input audio.\n"),
|
|
21307
|
+
prompt: zod6.string().optional().describe("The prompt configured for input audio transcription, when present.\n")
|
|
20963
21308
|
}).optional(),
|
|
20964
21309
|
turn_detection: zod6.object({
|
|
20965
21310
|
type: zod6.string().optional().describe("Type of turn detection, only `server_vad` is currently supported.\n"),
|
|
@@ -24572,11 +24917,73 @@ var getModelsResponse = zod10.object({
|
|
|
24572
24917
|
})
|
|
24573
24918
|
).describe("List of available models and their attributes.")
|
|
24574
24919
|
});
|
|
24920
|
+
var getTtsModelsResponse = zod10.object({
|
|
24921
|
+
models: zod10.array(
|
|
24922
|
+
zod10.object({
|
|
24923
|
+
id: zod10.string().describe("Unique identifier of the model."),
|
|
24924
|
+
aliased_model_id: zod10.string().or(zod10.null()).describe("If this is an alias, the id of the aliased model."),
|
|
24925
|
+
name: zod10.string().describe("Name of the model."),
|
|
24926
|
+
voices: zod10.array(
|
|
24927
|
+
zod10.object({
|
|
24928
|
+
id: zod10.string().describe("Unique identifier of the voice."),
|
|
24929
|
+
description: zod10.string().describe("Description of the TTS voice."),
|
|
24930
|
+
gender: zod10.enum(["male", "female", "neutral"])
|
|
24931
|
+
})
|
|
24932
|
+
).describe("List of available voices for this model."),
|
|
24933
|
+
languages: zod10.array(
|
|
24934
|
+
zod10.object({
|
|
24935
|
+
code: zod10.string().describe("2-letter language code."),
|
|
24936
|
+
name: zod10.string().describe("Language name.")
|
|
24937
|
+
})
|
|
24938
|
+
).describe("List of languages supported by the model.")
|
|
24939
|
+
})
|
|
24940
|
+
).describe("List of available TTS models and their attributes.")
|
|
24941
|
+
});
|
|
24942
|
+
var getUsageLogsQueryLimitDefault = 1e3;
|
|
24943
|
+
var getUsageLogsQueryLimitMax = 1e3;
|
|
24944
|
+
var getUsageLogsQuerySortDefault = "end_time_asc";
|
|
24945
|
+
var getUsageLogsQueryParams = zod10.object({
|
|
24946
|
+
start_time: zod10.string().describe("Start of the time window (inclusive). Filters by request end time."),
|
|
24947
|
+
end_time: zod10.string().describe("End of the time window (exclusive). Filters by request end time."),
|
|
24948
|
+
limit: zod10.number().min(1).max(getUsageLogsQueryLimitMax).default(getUsageLogsQueryLimitDefault).describe("Maximum number of usage log entries to return."),
|
|
24949
|
+
sort: zod10.enum(["end_time_asc", "end_time_desc"]).default(getUsageLogsQuerySortDefault).describe(
|
|
24950
|
+
"Sort order by end_time.Use `end_time_desc` to get the most recent entries first. When paginating, pass the same `sort` value alongside the cursor."
|
|
24951
|
+
),
|
|
24952
|
+
cursor: zod10.string().or(zod10.null()).optional().describe("Pagination cursor for the next page of results.")
|
|
24953
|
+
});
|
|
24954
|
+
var getUsageLogsResponse = zod10.object({
|
|
24955
|
+
usage_logs: zod10.array(
|
|
24956
|
+
zod10.object({
|
|
24957
|
+
uuid: zod10.string().uuid().describe("Unique identifier of the request."),
|
|
24958
|
+
request_scope: zod10.string().describe("Scope of the request (api / playground)."),
|
|
24959
|
+
client_reference_id: zod10.string().describe("Client reference ID supplied on the original request. Empty string if none."),
|
|
24960
|
+
model: zod10.string().describe("Model identifier."),
|
|
24961
|
+
start_time: zod10.string().datetime({}).describe("When the request started."),
|
|
24962
|
+
end_time: zod10.string().datetime({}).describe("When the request ended."),
|
|
24963
|
+
input_text_tokens: zod10.number(),
|
|
24964
|
+
input_audio_tokens: zod10.number(),
|
|
24965
|
+
input_audio_duration_ms: zod10.number(),
|
|
24966
|
+
output_text_tokens: zod10.number(),
|
|
24967
|
+
output_audio_tokens: zod10.number(),
|
|
24968
|
+
output_audio_duration_ms: zod10.number(),
|
|
24969
|
+
cost_usd: zod10.string(),
|
|
24970
|
+
input_cost_usd: zod10.string(),
|
|
24971
|
+
input_text_cost_usd: zod10.string(),
|
|
24972
|
+
input_audio_cost_usd: zod10.string(),
|
|
24973
|
+
output_cost_usd: zod10.string(),
|
|
24974
|
+
output_text_cost_usd: zod10.string(),
|
|
24975
|
+
output_audio_cost_usd: zod10.string()
|
|
24976
|
+
})
|
|
24977
|
+
).describe("Per-request usage log entries ordered by end_time, uuid (per `sort`)."),
|
|
24978
|
+
next_page_cursor: zod10.string().or(zod10.null()).optional().describe(
|
|
24979
|
+
"A pagination token that references the next page of results. When more data is available, this field contains a value to pass in the cursor parameter of a subsequent request. When null, no additional results are available."
|
|
24980
|
+
)
|
|
24981
|
+
});
|
|
24575
24982
|
var createTemporaryApiKeyBodyExpiresInSecondsMax = 3600;
|
|
24576
24983
|
var createTemporaryApiKeyBodyClientReferenceIdMaxOne = 256;
|
|
24577
24984
|
var createTemporaryApiKeyBodyMaxSessionDurationSecondsMaxOne = 18e3;
|
|
24578
24985
|
var createTemporaryApiKeyBody = zod10.object({
|
|
24579
|
-
usage_type: zod10.enum(["transcribe_websocket"]),
|
|
24986
|
+
usage_type: zod10.enum(["transcribe_websocket", "tts_rt"]),
|
|
24580
24987
|
expires_in_seconds: zod10.number().min(1).max(createTemporaryApiKeyBodyExpiresInSecondsMax).describe("Duration in seconds until the temporary API key expires."),
|
|
24581
24988
|
client_reference_id: zod10.string().max(createTemporaryApiKeyBodyClientReferenceIdMaxOne).or(zod10.null()).optional().describe("Optional tracking identifier string. Does not need to be unique."),
|
|
24582
24989
|
single_use: zod10.boolean().or(zod10.null()).optional().describe("If true, the temporary API key can be used only once."),
|
|
@@ -24584,6 +24991,28 @@ var createTemporaryApiKeyBody = zod10.object({
|
|
|
24584
24991
|
"Maximum WebSocket connection duration in seconds. If exceeded, the connection will be dropped. If not set, no limit is applied."
|
|
24585
24992
|
)
|
|
24586
24993
|
});
|
|
24994
|
+
var getConcurrencyLimitsResponse = zod10.object({
|
|
24995
|
+
project: zod10.object({
|
|
24996
|
+
current: zod10.object({
|
|
24997
|
+
transcribe_concurrent: zod10.number(),
|
|
24998
|
+
tts_concurrent: zod10.number()
|
|
24999
|
+
}).describe("Live counts read from Redis"),
|
|
25000
|
+
limits: zod10.object({
|
|
25001
|
+
transcribe_concurrent: zod10.number().or(zod10.null()),
|
|
25002
|
+
tts_concurrent: zod10.number().or(zod10.null())
|
|
25003
|
+
}).describe("Configured limits")
|
|
25004
|
+
}),
|
|
25005
|
+
organization: zod10.object({
|
|
25006
|
+
current: zod10.object({
|
|
25007
|
+
transcribe_concurrent: zod10.number(),
|
|
25008
|
+
tts_concurrent: zod10.number()
|
|
25009
|
+
}).describe("Live counts read from Redis"),
|
|
25010
|
+
limits: zod10.object({
|
|
25011
|
+
transcribe_concurrent: zod10.number().or(zod10.null()),
|
|
25012
|
+
tts_concurrent: zod10.number().or(zod10.null())
|
|
25013
|
+
}).describe("Configured limits")
|
|
25014
|
+
})
|
|
25015
|
+
});
|
|
24587
25016
|
|
|
24588
25017
|
// src/generated/soniox/streaming-types.zod.ts
|
|
24589
25018
|
import { z as zod11 } from "zod";
|
|
@@ -24649,10 +25078,10 @@ var sonioxStructuredContextSchema = zod11.object({
|
|
|
24649
25078
|
var sonioxContextSchema = zod11.union([sonioxStructuredContextSchema, zod11.string()]);
|
|
24650
25079
|
var sonioxRealtimeModelSchema = zod11.enum([
|
|
24651
25080
|
"stt-rt-v4",
|
|
24652
|
-
"stt-rt-v3",
|
|
24653
25081
|
"stt-rt-preview",
|
|
24654
25082
|
"stt-rt-v3-preview",
|
|
24655
|
-
"stt-rt-preview-v2"
|
|
25083
|
+
"stt-rt-preview-v2",
|
|
25084
|
+
"stt-rt-v3"
|
|
24656
25085
|
]);
|
|
24657
25086
|
var streamingTranscriberParams3 = zod11.object({
|
|
24658
25087
|
model: sonioxRealtimeModelSchema,
|
|
@@ -24660,12 +25089,16 @@ var streamingTranscriberParams3 = zod11.object({
|
|
|
24660
25089
|
sampleRate: zod11.number().optional(),
|
|
24661
25090
|
numChannels: zod11.number().optional(),
|
|
24662
25091
|
languageHints: zod11.array(zod11.string()).optional(),
|
|
25092
|
+
languageHintsStrict: zod11.boolean().optional(),
|
|
24663
25093
|
context: sonioxContextSchema.optional(),
|
|
24664
25094
|
enableSpeakerDiarization: zod11.boolean().optional(),
|
|
24665
25095
|
enableLanguageIdentification: zod11.boolean().optional(),
|
|
24666
25096
|
enableEndpointDetection: zod11.boolean().optional(),
|
|
25097
|
+
maxEndpointDelayMs: zod11.number().optional(),
|
|
24667
25098
|
translation: sonioxTranslationConfigSchema.optional(),
|
|
24668
|
-
clientReferenceId: zod11.string().optional()
|
|
25099
|
+
clientReferenceId: zod11.string().optional(),
|
|
25100
|
+
keepaliveIntervalMs: zod11.number().optional(),
|
|
25101
|
+
connectTimeoutMs: zod11.number().optional()
|
|
24669
25102
|
});
|
|
24670
25103
|
var sonioxTranslationStatusSchema = zod11.enum(["original", "translation", "none"]);
|
|
24671
25104
|
var sonioxTokenSchema = zod11.object({
|