voice-router-dev 0.9.3 → 0.9.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +12 -0
- package/dist/constants.d.mts +11 -92
- package/dist/constants.d.ts +11 -92
- package/dist/constants.js +11 -88
- package/dist/constants.mjs +11 -88
- package/dist/{field-configs-FbtCPxzs.d.mts → field-configs-BVOZQiG3.d.mts} +4855 -3773
- package/dist/{field-configs-FbtCPxzs.d.ts → field-configs-BVOZQiG3.d.ts} +4855 -3773
- package/dist/field-configs.d.mts +1 -1
- package/dist/field-configs.d.ts +1 -1
- package/dist/field-configs.js +583 -150
- package/dist/field-configs.mjs +583 -150
- package/dist/index.d.mts +1211 -162
- package/dist/index.d.ts +1211 -162
- package/dist/index.js +924 -275
- package/dist/index.mjs +927 -275
- package/dist/{provider-metadata-D1d-9cng.d.ts → provider-metadata-CiSA4fWP.d.ts} +2 -2
- package/dist/{provider-metadata-BJ29OPW1.d.mts → provider-metadata-oxzd1q6t.d.mts} +2 -2
- package/dist/provider-metadata.d.mts +1 -1
- package/dist/provider-metadata.d.ts +1 -1
- package/dist/provider-metadata.js +3 -66
- package/dist/provider-metadata.mjs +3 -66
- package/dist/{speechToTextChunkResponseModel-BY2lGyZ3.d.ts → speechToTextChunkResponseModel-Dns0Ma9x.d.ts} +364 -39
- package/dist/{speechToTextChunkResponseModel-KayxDiZ7.d.mts → speechToTextChunkResponseModel-_ZvHTD4e.d.mts} +364 -39
- package/dist/webhooks.d.mts +3 -2
- package/dist/webhooks.d.ts +3 -2
- package/package.json +8 -3
package/dist/field-configs.js
CHANGED
|
@@ -298,6 +298,9 @@ var listenTranscribeQueryParams = import_zod.z.object({
|
|
|
298
298
|
diarize: import_zod.z.boolean().optional().describe(
|
|
299
299
|
"Recognize speaker changes. Each word in the transcript will be assigned a speaker number starting at 0"
|
|
300
300
|
),
|
|
301
|
+
diarize_model: import_zod.z.enum(["latest", "v1", "v2"]).optional().describe(
|
|
302
|
+
"Select and enable a specific batch diarization model version. If specifying this parameter, you should not set the deprecated `diarize=true` parameter. Not accepted on streaming requests."
|
|
303
|
+
),
|
|
301
304
|
dictation: import_zod.z.boolean().optional().describe("Dictation mode for controlling formatting with dictated speech"),
|
|
302
305
|
encoding: import_zod.z.enum(["linear16", "flac", "mulaw", "amr-nb", "amr-wb", "opus", "speex", "g729"]).optional().describe("Specify the expected encoding of your submitted audio"),
|
|
303
306
|
filler_words: import_zod.z.boolean().optional().describe('Filler Words can help transcribe interruptions in your audio, like "uh" and "um"'),
|
|
@@ -562,6 +565,7 @@ var listenTranscribeResponse = import_zod.z.object({
|
|
|
562
565
|
);
|
|
563
566
|
var speakGenerateQueryCallbackMethodDefault = "POST";
|
|
564
567
|
var speakGenerateQueryModelDefault = "aura-asteria-en";
|
|
568
|
+
var speakGenerateQuerySpeedDefault = 1;
|
|
565
569
|
var speakGenerateQueryParams = import_zod.z.object({
|
|
566
570
|
callback: import_zod.z.string().optional().describe("URL to which we'll make the callback request"),
|
|
567
571
|
callback_method: import_zod.z.enum(["POST", "PUT"]).default(speakGenerateQueryCallbackMethodDefault).describe("HTTP method by which the callback request will be made"),
|
|
@@ -673,6 +677,9 @@ var speakGenerateQueryParams = import_zod.z.object({
|
|
|
673
677
|
import_zod.z.enum(["22050"]).describe("Encoding - mp3. Sample rate is fixed and not configurable (22050 Hz).")
|
|
674
678
|
).or(import_zod.z.enum(["48000"]).describe("Encoding - opus. Sample rate is fixed at 48000 Hz.")).optional().describe(
|
|
675
679
|
"Sample Rate specifies the sample rate for the output audio. Based on the encoding, different sample rates are supported. For some encodings, the sample rate is not configurable"
|
|
680
|
+
),
|
|
681
|
+
speed: import_zod.z.number().default(speakGenerateQuerySpeedDefault).describe(
|
|
682
|
+
"Speaking rate multiplier that adjusts the pace of generated speech while preserving natural prosody and voice quality. Not yet supported in all languages."
|
|
676
683
|
)
|
|
677
684
|
});
|
|
678
685
|
var speakGenerateHeader = import_zod.z.object({
|
|
@@ -1012,7 +1019,7 @@ var createTranscriptBody = import_zod3.z.object({
|
|
|
1012
1019
|
"Customize how words are spelled and formatted using to and from values. See [Custom Spelling](https://www.assemblyai.com/docs/pre-recorded-audio/correct-spelling-of-terms) for more details."
|
|
1013
1020
|
),
|
|
1014
1021
|
disfluencies: import_zod3.z.boolean().optional().describe(
|
|
1015
|
-
'Transcribe [Filler Words](https://www.assemblyai.com/docs/pre-recorded-audio/include-filler-words), like "umm", in your media file; can be true or false'
|
|
1022
|
+
'Transcribe [Filler Words](https://www.assemblyai.com/docs/pre-recorded-audio/include-filler-words), like "umm", in your media file; can be true or false. Supported on Universal-3 Pro and Universal-2.'
|
|
1016
1023
|
),
|
|
1017
1024
|
domain: import_zod3.z.string().nullish().describe(
|
|
1018
1025
|
'Enable domain-specific transcription models to improve accuracy for specialized terminology. Set to `"medical-v1"` to enable [Medical Mode](https://www.assemblyai.com/docs/pre-recorded-audio/medical-mode) for improved accuracy of medical terms such as medications, procedures, conditions, and dosages.\n\nSupported languages: English (`en`), Spanish (`es`), German (`de`), French (`fr`). If used with an unsupported language, the parameter is ignored and a warning is returned.\n'
|
|
@@ -1319,12 +1326,20 @@ var createTranscriptBody = import_zod3.z.object({
|
|
|
1319
1326
|
"email_address",
|
|
1320
1327
|
"event",
|
|
1321
1328
|
"filename",
|
|
1329
|
+
"gender",
|
|
1322
1330
|
"gender_sexuality",
|
|
1323
1331
|
"healthcare_number",
|
|
1324
1332
|
"injury",
|
|
1325
1333
|
"ip_address",
|
|
1326
1334
|
"language",
|
|
1327
1335
|
"location",
|
|
1336
|
+
"location_address",
|
|
1337
|
+
"location_address_street",
|
|
1338
|
+
"location_city",
|
|
1339
|
+
"location_coordinate",
|
|
1340
|
+
"location_country",
|
|
1341
|
+
"location_state",
|
|
1342
|
+
"location_zip",
|
|
1328
1343
|
"marital_status",
|
|
1329
1344
|
"medical_condition",
|
|
1330
1345
|
"medical_process",
|
|
@@ -1333,6 +1348,7 @@ var createTranscriptBody = import_zod3.z.object({
|
|
|
1333
1348
|
"number_sequence",
|
|
1334
1349
|
"occupation",
|
|
1335
1350
|
"organization",
|
|
1351
|
+
"organization_medical_facility",
|
|
1336
1352
|
"passport_number",
|
|
1337
1353
|
"password",
|
|
1338
1354
|
"person_age",
|
|
@@ -1341,6 +1357,7 @@ var createTranscriptBody = import_zod3.z.object({
|
|
|
1341
1357
|
"physical_attribute",
|
|
1342
1358
|
"political_affiliation",
|
|
1343
1359
|
"religion",
|
|
1360
|
+
"sexuality",
|
|
1344
1361
|
"statistics",
|
|
1345
1362
|
"time",
|
|
1346
1363
|
"url",
|
|
@@ -1348,15 +1365,20 @@ var createTranscriptBody = import_zod3.z.object({
|
|
|
1348
1365
|
"username",
|
|
1349
1366
|
"vehicle_id",
|
|
1350
1367
|
"zodiac_sign"
|
|
1351
|
-
]).describe(
|
|
1368
|
+
]).describe(
|
|
1369
|
+
"The type of PII to redact. See [PII redaction](https://www.assemblyai.com/docs/streaming/pii-redaction) for the full list of policies and their descriptions."
|
|
1370
|
+
)
|
|
1352
1371
|
).optional().describe(
|
|
1353
1372
|
"The list of PII Redaction policies to enable. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more details."
|
|
1354
1373
|
),
|
|
1355
1374
|
redact_pii_sub: import_zod3.z.enum(["entity_name", "hash"]).describe(
|
|
1356
|
-
"The replacement logic for detected PII, can be `entity_name` or `hash`. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more details."
|
|
1375
|
+
"The replacement logic for detected PII, can be `entity_name` or `hash`. See [PII redaction](https://www.assemblyai.com/docs/streaming/pii-redaction) for more details."
|
|
1357
1376
|
).or(import_zod3.z.null()).optional().describe(
|
|
1358
1377
|
"The replacement logic for detected PII, can be `entity_type` or `hash`. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more details."
|
|
1359
1378
|
),
|
|
1379
|
+
redact_pii_return_unredacted: import_zod3.z.boolean().optional().describe(
|
|
1380
|
+
"When set to `true`, returns the original unredacted transcript alongside the redacted one in the same response. Requires `redact_pii` to be `true`, otherwise a 400 error is returned.\n\nWhen enabled, the response includes the additional fields `unredacted_text`, `unredacted_words`, and `unredacted_utterances`. The existing `text`, `words`, and `utterances` fields remain fully redacted. When disabled (default), the response is unchanged and contains only the redacted transcript. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more details.\n"
|
|
1381
|
+
),
|
|
1360
1382
|
sentiment_analysis: import_zod3.z.boolean().optional().describe(
|
|
1361
1383
|
"Enable [Sentiment Analysis](https://www.assemblyai.com/docs/speech-understanding/analyze-sentiment-of-speech), can be true or false"
|
|
1362
1384
|
),
|
|
@@ -1454,10 +1476,10 @@ var createTranscriptBody = import_zod3.z.object({
|
|
|
1454
1476
|
),
|
|
1455
1477
|
summary_model: import_zod3.z.enum(["informative", "conversational", "catchy"]).optional().describe("The model to summarize the transcript"),
|
|
1456
1478
|
summary_type: import_zod3.z.enum(["bullets", "bullets_verbose", "gist", "headline", "paragraph"]).optional().describe("The type of summary"),
|
|
1457
|
-
remove_audio_tags: import_zod3.z.enum(["all"]).describe(
|
|
1458
|
-
'
|
|
1479
|
+
remove_audio_tags: import_zod3.z.enum(["all", "speaker"]).describe(
|
|
1480
|
+
'Universal-3 Pro generates rich transcripts that can include inline annotations such as audio event markers and speaker cues. Set to `"all"` to remove all inline annotations, or `"speaker"` to remove only speaker cues while keeping other annotations.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n'
|
|
1459
1481
|
).or(import_zod3.z.null()).optional().describe(
|
|
1460
|
-
'
|
|
1482
|
+
'Universal-3 Pro generates rich transcripts that can include inline annotations such as audio event markers and speaker cues. Set to `"all"` to remove all inline annotations, or `"speaker"` to remove only speaker cues while keeping other annotations.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n'
|
|
1461
1483
|
),
|
|
1462
1484
|
temperature: import_zod3.z.number().optional().describe(
|
|
1463
1485
|
"Control the amount of randomness injected into the model's response. See the [Prompting Guide](https://www.assemblyai.com/docs/pre-recorded-audio/prompting) for more details.\n\nNote: This parameter can only be used with the Universal-3 Pro model.\n"
|
|
@@ -1588,7 +1610,7 @@ var createTranscriptResponse = import_zod3.z.object({
|
|
|
1588
1610
|
"Customize how words are spelled and formatted using to and from values. See [Custom Spelling](https://www.assemblyai.com/docs/pre-recorded-audio/correct-spelling-of-terms) for more details."
|
|
1589
1611
|
),
|
|
1590
1612
|
disfluencies: import_zod3.z.boolean().nullish().describe(
|
|
1591
|
-
'Transcribe [Filler Words](https://www.assemblyai.com/docs/pre-recorded-audio/include-filler-words), like "umm", in your media file; can be true or false'
|
|
1613
|
+
'Transcribe [Filler Words](https://www.assemblyai.com/docs/pre-recorded-audio/include-filler-words), like "umm", in your media file; can be true or false. Supported on Universal-3 Pro and Universal-2.'
|
|
1592
1614
|
),
|
|
1593
1615
|
domain: import_zod3.z.string().nullish().describe(
|
|
1594
1616
|
'The domain-specific model applied to the transcript. When set to `"medical-v1"`, [Medical Mode](https://www.assemblyai.com/docs/pre-recorded-audio/medical-mode) was used to improve accuracy for medical terminology.\n'
|
|
@@ -1611,12 +1633,20 @@ var createTranscriptResponse = import_zod3.z.object({
|
|
|
1611
1633
|
"email_address",
|
|
1612
1634
|
"event",
|
|
1613
1635
|
"filename",
|
|
1636
|
+
"gender",
|
|
1614
1637
|
"gender_sexuality",
|
|
1615
1638
|
"healthcare_number",
|
|
1616
1639
|
"injury",
|
|
1617
1640
|
"ip_address",
|
|
1618
1641
|
"language",
|
|
1619
1642
|
"location",
|
|
1643
|
+
"location_address",
|
|
1644
|
+
"location_address_street",
|
|
1645
|
+
"location_city",
|
|
1646
|
+
"location_coordinate",
|
|
1647
|
+
"location_country",
|
|
1648
|
+
"location_state",
|
|
1649
|
+
"location_zip",
|
|
1620
1650
|
"marital_status",
|
|
1621
1651
|
"medical_condition",
|
|
1622
1652
|
"medical_process",
|
|
@@ -1625,6 +1655,7 @@ var createTranscriptResponse = import_zod3.z.object({
|
|
|
1625
1655
|
"number_sequence",
|
|
1626
1656
|
"occupation",
|
|
1627
1657
|
"organization",
|
|
1658
|
+
"organization_medical_facility",
|
|
1628
1659
|
"passport_number",
|
|
1629
1660
|
"password",
|
|
1630
1661
|
"person_age",
|
|
@@ -1633,6 +1664,7 @@ var createTranscriptResponse = import_zod3.z.object({
|
|
|
1633
1664
|
"physical_attribute",
|
|
1634
1665
|
"political_affiliation",
|
|
1635
1666
|
"religion",
|
|
1667
|
+
"sexuality",
|
|
1636
1668
|
"statistics",
|
|
1637
1669
|
"time",
|
|
1638
1670
|
"url",
|
|
@@ -1937,6 +1969,24 @@ var createTranscriptResponse = import_zod3.z.object({
|
|
|
1937
1969
|
}).optional().describe(
|
|
1938
1970
|
"Specify options for [Automatic Language Detection](https://www.assemblyai.com/docs/pre-recorded-audio/language-detection)."
|
|
1939
1971
|
),
|
|
1972
|
+
metadata: import_zod3.z.object({
|
|
1973
|
+
domain_used: import_zod3.z.string().nullish().describe(
|
|
1974
|
+
'The domain-specific model that was applied to the transcription (for example, `"medical-v1"` when [Medical Mode](https://www.assemblyai.com/docs/pre-recorded-audio/medical-mode) was applied), or `null` if no domain-specific model was used. Always present when `metadata` is present.\n'
|
|
1975
|
+
),
|
|
1976
|
+
warnings: import_zod3.z.array(
|
|
1977
|
+
import_zod3.z.object({
|
|
1978
|
+
message: import_zod3.z.string().describe("A human-readable description of the warning.")
|
|
1979
|
+
}).describe(
|
|
1980
|
+
"A warning message emitted while processing a transcription request. Warnings are surfaced on the transcript response under `metadata.warnings`.\n"
|
|
1981
|
+
)
|
|
1982
|
+
).optional().describe(
|
|
1983
|
+
"Warning messages emitted while processing the request. Each warning is an object with a human-readable `message`. When there are no warnings to report, this field is omitted from the `metadata` object entirely.\n"
|
|
1984
|
+
)
|
|
1985
|
+
}).describe(
|
|
1986
|
+
"Additional metadata about the transcription returned on the `Transcript` object under `metadata`. Only present when there is information to report \u2014 when all of its fields would be empty, the `metadata` object is omitted from the response entirely.\n"
|
|
1987
|
+
).or(import_zod3.z.null()).optional().describe(
|
|
1988
|
+
"Additional metadata about the transcription, including any warnings emitted while processing the request. Only present when there is information to report; if no fields would be populated, `metadata` is omitted from the response entirely.\n"
|
|
1989
|
+
),
|
|
1940
1990
|
multichannel: import_zod3.z.boolean().nullish().describe(
|
|
1941
1991
|
"Whether [Multichannel transcription](https://www.assemblyai.com/docs/pre-recorded-audio/transcribe-multiple-audio-channels) was enabled in the transcription request, either true or false"
|
|
1942
1992
|
),
|
|
@@ -1984,12 +2034,20 @@ var createTranscriptResponse = import_zod3.z.object({
|
|
|
1984
2034
|
"email_address",
|
|
1985
2035
|
"event",
|
|
1986
2036
|
"filename",
|
|
2037
|
+
"gender",
|
|
1987
2038
|
"gender_sexuality",
|
|
1988
2039
|
"healthcare_number",
|
|
1989
2040
|
"injury",
|
|
1990
2041
|
"ip_address",
|
|
1991
2042
|
"language",
|
|
1992
2043
|
"location",
|
|
2044
|
+
"location_address",
|
|
2045
|
+
"location_address_street",
|
|
2046
|
+
"location_city",
|
|
2047
|
+
"location_coordinate",
|
|
2048
|
+
"location_country",
|
|
2049
|
+
"location_state",
|
|
2050
|
+
"location_zip",
|
|
1993
2051
|
"marital_status",
|
|
1994
2052
|
"medical_condition",
|
|
1995
2053
|
"medical_process",
|
|
@@ -1998,6 +2056,7 @@ var createTranscriptResponse = import_zod3.z.object({
|
|
|
1998
2056
|
"number_sequence",
|
|
1999
2057
|
"occupation",
|
|
2000
2058
|
"organization",
|
|
2059
|
+
"organization_medical_facility",
|
|
2001
2060
|
"passport_number",
|
|
2002
2061
|
"password",
|
|
2003
2062
|
"person_age",
|
|
@@ -2006,6 +2065,7 @@ var createTranscriptResponse = import_zod3.z.object({
|
|
|
2006
2065
|
"physical_attribute",
|
|
2007
2066
|
"political_affiliation",
|
|
2008
2067
|
"religion",
|
|
2068
|
+
"sexuality",
|
|
2009
2069
|
"statistics",
|
|
2010
2070
|
"time",
|
|
2011
2071
|
"url",
|
|
@@ -2013,12 +2073,17 @@ var createTranscriptResponse = import_zod3.z.object({
|
|
|
2013
2073
|
"username",
|
|
2014
2074
|
"vehicle_id",
|
|
2015
2075
|
"zodiac_sign"
|
|
2016
|
-
]).describe(
|
|
2076
|
+
]).describe(
|
|
2077
|
+
"The type of PII to redact. See [PII redaction](https://www.assemblyai.com/docs/streaming/pii-redaction) for the full list of policies and their descriptions."
|
|
2078
|
+
)
|
|
2017
2079
|
).nullish().describe(
|
|
2018
2080
|
"The list of PII Redaction policies that were enabled, if PII Redaction is enabled.\nSee [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
|
|
2019
2081
|
),
|
|
2020
2082
|
redact_pii_sub: import_zod3.z.enum(["entity_name", "hash"]).optional().describe(
|
|
2021
|
-
"The replacement logic for detected PII, can be `entity_name` or `hash`. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more details."
|
|
2083
|
+
"The replacement logic for detected PII, can be `entity_name` or `hash`. See [PII redaction](https://www.assemblyai.com/docs/streaming/pii-redaction) for more details."
|
|
2084
|
+
),
|
|
2085
|
+
redact_pii_return_unredacted: import_zod3.z.boolean().nullish().describe(
|
|
2086
|
+
"Whether the original unredacted transcript was also returned alongside the redacted one. When `true`, the response includes `unredacted_text`, `unredacted_words`, and `unredacted_utterances`. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
|
|
2022
2087
|
),
|
|
2023
2088
|
sentiment_analysis: import_zod3.z.boolean().nullish().describe(
|
|
2024
2089
|
"Whether [Sentiment Analysis](https://www.assemblyai.com/docs/speech-understanding/analyze-sentiment-of-speech) is enabled, can be true or false"
|
|
@@ -2155,20 +2220,23 @@ var createTranscriptResponse = import_zod3.z.object({
|
|
|
2155
2220
|
"The generated summary of the media file, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details."
|
|
2156
2221
|
),
|
|
2157
2222
|
summary_model: import_zod3.z.string().nullish().describe(
|
|
2158
|
-
"The Summarization model used to generate the summary,\nif [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts
|
|
2223
|
+
"The Summarization model used to generate the summary,\nif [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details.\n"
|
|
2159
2224
|
),
|
|
2160
2225
|
summary_type: import_zod3.z.string().nullish().describe(
|
|
2161
|
-
"The type of summary generated, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts
|
|
2226
|
+
"The type of summary generated, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details."
|
|
2162
2227
|
),
|
|
2163
|
-
remove_audio_tags: import_zod3.z.enum(["all"]).describe(
|
|
2164
|
-
|
|
2228
|
+
remove_audio_tags: import_zod3.z.enum(["all", "speaker"]).describe(
|
|
2229
|
+
'Universal-3 Pro generates rich transcripts that can include inline annotations such as audio event markers and speaker cues. Set to `"all"` to remove all inline annotations, or `"speaker"` to remove only speaker cues while keeping other annotations.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n'
|
|
2165
2230
|
).or(import_zod3.z.null()).optional().describe(
|
|
2166
|
-
|
|
2231
|
+
'Universal-3 Pro generates rich transcripts that can include inline annotations such as audio event markers and speaker cues. Set to `"all"` to remove all inline annotations, or `"speaker"` to remove only speaker cues while keeping other annotations.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n'
|
|
2167
2232
|
),
|
|
2168
2233
|
temperature: import_zod3.z.number().nullish().describe(
|
|
2169
2234
|
"The temperature that was used for the model's response. See the [Prompting Guide](https://www.assemblyai.com/docs/pre-recorded-audio/prompting) for more details.\n\nNote: This parameter can only be used with the Universal-3 Pro model.\n"
|
|
2170
2235
|
),
|
|
2171
2236
|
text: import_zod3.z.string().nullish().describe("The textual transcript of your media file"),
|
|
2237
|
+
unredacted_text: import_zod3.z.string().nullish().describe(
|
|
2238
|
+
"The original textual transcript of your media file before PII redaction was applied. Only returned when `redact_pii_return_unredacted` was set to `true` on the transcription request, otherwise this field is omitted and the `text` field remains fully redacted. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
|
|
2239
|
+
),
|
|
2172
2240
|
throttled: import_zod3.z.boolean().nullish().describe(
|
|
2173
2241
|
"True while a request is throttled and false when a request is no longer throttled"
|
|
2174
2242
|
),
|
|
@@ -2205,6 +2273,39 @@ var createTranscriptResponse = import_zod3.z.object({
|
|
|
2205
2273
|
).nullish().describe(
|
|
2206
2274
|
"When multichannel or speaker_labels is enabled, a list of turn-by-turn utterance objects.\nSee [Speaker diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) and [Multichannel transcription](https://www.assemblyai.com/docs/pre-recorded-audio/transcribe-multiple-audio-channels) for more information.\n"
|
|
2207
2275
|
),
|
|
2276
|
+
unredacted_utterances: import_zod3.z.array(
|
|
2277
|
+
import_zod3.z.object({
|
|
2278
|
+
confidence: import_zod3.z.number().describe("The confidence score for the transcript of this utterance"),
|
|
2279
|
+
start: import_zod3.z.number().describe("The starting time, in milliseconds, of the utterance in the audio file"),
|
|
2280
|
+
end: import_zod3.z.number().describe("The ending time, in milliseconds, of the utterance in the audio file"),
|
|
2281
|
+
text: import_zod3.z.string().describe("The text for this utterance"),
|
|
2282
|
+
words: import_zod3.z.array(
|
|
2283
|
+
import_zod3.z.object({
|
|
2284
|
+
confidence: import_zod3.z.number().describe("The confidence score for the transcript of this word"),
|
|
2285
|
+
start: import_zod3.z.number().describe("The starting time, in milliseconds, for the word"),
|
|
2286
|
+
end: import_zod3.z.number().describe("The ending time, in milliseconds, for the word"),
|
|
2287
|
+
text: import_zod3.z.string().describe("The text of the word"),
|
|
2288
|
+
channel: import_zod3.z.string().nullish().describe(
|
|
2289
|
+
"The channel of the word. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
|
|
2290
|
+
),
|
|
2291
|
+
speaker: import_zod3.z.string().nullable().describe(
|
|
2292
|
+
"The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) is enabled, else null"
|
|
2293
|
+
)
|
|
2294
|
+
})
|
|
2295
|
+
).describe("The words in the utterance."),
|
|
2296
|
+
channel: import_zod3.z.string().nullish().describe(
|
|
2297
|
+
"The channel of this utterance. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
|
|
2298
|
+
),
|
|
2299
|
+
speaker: import_zod3.z.string().describe(
|
|
2300
|
+
'The speaker of this utterance, where each speaker is assigned a sequential capital letter - e.g. "A" for Speaker A, "B" for Speaker B, etc.'
|
|
2301
|
+
),
|
|
2302
|
+
translated_texts: import_zod3.z.record(import_zod3.z.string(), import_zod3.z.string()).optional().describe(
|
|
2303
|
+
'Translations keyed by language code (e.g., `{"es": "Texto traducido", "de": "\xDCbersetzter Text"}`). Only present when `match_original_utterance` is enabled with translation.'
|
|
2304
|
+
)
|
|
2305
|
+
})
|
|
2306
|
+
).nullish().describe(
|
|
2307
|
+
"The original turn-by-turn utterance objects before PII redaction was applied. Same shape as `utterances`. Only returned when `redact_pii_return_unredacted` was set to `true` on the transcription request, otherwise this field is omitted and the `utterances` field remains fully redacted. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
|
|
2308
|
+
),
|
|
2208
2309
|
webhook_auth: import_zod3.z.boolean().describe(
|
|
2209
2310
|
"Whether [webhook](https://www.assemblyai.com/docs/deployment/webhooks-for-pre-recorded-audio) authentication details were provided"
|
|
2210
2311
|
),
|
|
@@ -2233,6 +2334,22 @@ var createTranscriptResponse = import_zod3.z.object({
|
|
|
2233
2334
|
).nullish().describe(
|
|
2234
2335
|
"An array of temporally-sequential word objects, one for each word in the transcript.\n"
|
|
2235
2336
|
),
|
|
2337
|
+
unredacted_words: import_zod3.z.array(
|
|
2338
|
+
import_zod3.z.object({
|
|
2339
|
+
confidence: import_zod3.z.number().describe("The confidence score for the transcript of this word"),
|
|
2340
|
+
start: import_zod3.z.number().describe("The starting time, in milliseconds, for the word"),
|
|
2341
|
+
end: import_zod3.z.number().describe("The ending time, in milliseconds, for the word"),
|
|
2342
|
+
text: import_zod3.z.string().describe("The text of the word"),
|
|
2343
|
+
channel: import_zod3.z.string().nullish().describe(
|
|
2344
|
+
"The channel of the word. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
|
|
2345
|
+
),
|
|
2346
|
+
speaker: import_zod3.z.string().nullable().describe(
|
|
2347
|
+
"The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) is enabled, else null"
|
|
2348
|
+
)
|
|
2349
|
+
})
|
|
2350
|
+
).nullish().describe(
|
|
2351
|
+
"The original temporally-sequential word objects before PII redaction was applied. Same shape as `words`. Only returned when `redact_pii_return_unredacted` was set to `true` on the transcription request, otherwise this field is omitted and the `words` field remains fully redacted. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
|
|
2352
|
+
),
|
|
2236
2353
|
acoustic_model: import_zod3.z.string().describe("This parameter does not currently have any functionality attached to it."),
|
|
2237
2354
|
custom_topics: import_zod3.z.boolean().nullish().describe("This parameter does not currently have any functionality attached to it."),
|
|
2238
2355
|
language_model: import_zod3.z.string().describe("This parameter does not currently have any functionality attached to it."),
|
|
@@ -2404,7 +2521,7 @@ var getTranscriptResponse = import_zod3.z.object({
|
|
|
2404
2521
|
"Customize how words are spelled and formatted using to and from values. See [Custom Spelling](https://www.assemblyai.com/docs/pre-recorded-audio/correct-spelling-of-terms) for more details."
|
|
2405
2522
|
),
|
|
2406
2523
|
disfluencies: import_zod3.z.boolean().nullish().describe(
|
|
2407
|
-
'Transcribe [Filler Words](https://www.assemblyai.com/docs/pre-recorded-audio/include-filler-words), like "umm", in your media file; can be true or false'
|
|
2524
|
+
'Transcribe [Filler Words](https://www.assemblyai.com/docs/pre-recorded-audio/include-filler-words), like "umm", in your media file; can be true or false. Supported on Universal-3 Pro and Universal-2.'
|
|
2408
2525
|
),
|
|
2409
2526
|
domain: import_zod3.z.string().nullish().describe(
|
|
2410
2527
|
'The domain-specific model applied to the transcript. When set to `"medical-v1"`, [Medical Mode](https://www.assemblyai.com/docs/pre-recorded-audio/medical-mode) was used to improve accuracy for medical terminology.\n'
|
|
@@ -2427,12 +2544,20 @@ var getTranscriptResponse = import_zod3.z.object({
|
|
|
2427
2544
|
"email_address",
|
|
2428
2545
|
"event",
|
|
2429
2546
|
"filename",
|
|
2547
|
+
"gender",
|
|
2430
2548
|
"gender_sexuality",
|
|
2431
2549
|
"healthcare_number",
|
|
2432
2550
|
"injury",
|
|
2433
2551
|
"ip_address",
|
|
2434
2552
|
"language",
|
|
2435
2553
|
"location",
|
|
2554
|
+
"location_address",
|
|
2555
|
+
"location_address_street",
|
|
2556
|
+
"location_city",
|
|
2557
|
+
"location_coordinate",
|
|
2558
|
+
"location_country",
|
|
2559
|
+
"location_state",
|
|
2560
|
+
"location_zip",
|
|
2436
2561
|
"marital_status",
|
|
2437
2562
|
"medical_condition",
|
|
2438
2563
|
"medical_process",
|
|
@@ -2441,6 +2566,7 @@ var getTranscriptResponse = import_zod3.z.object({
|
|
|
2441
2566
|
"number_sequence",
|
|
2442
2567
|
"occupation",
|
|
2443
2568
|
"organization",
|
|
2569
|
+
"organization_medical_facility",
|
|
2444
2570
|
"passport_number",
|
|
2445
2571
|
"password",
|
|
2446
2572
|
"person_age",
|
|
@@ -2449,6 +2575,7 @@ var getTranscriptResponse = import_zod3.z.object({
|
|
|
2449
2575
|
"physical_attribute",
|
|
2450
2576
|
"political_affiliation",
|
|
2451
2577
|
"religion",
|
|
2578
|
+
"sexuality",
|
|
2452
2579
|
"statistics",
|
|
2453
2580
|
"time",
|
|
2454
2581
|
"url",
|
|
@@ -2753,6 +2880,24 @@ var getTranscriptResponse = import_zod3.z.object({
|
|
|
2753
2880
|
}).optional().describe(
|
|
2754
2881
|
"Specify options for [Automatic Language Detection](https://www.assemblyai.com/docs/pre-recorded-audio/language-detection)."
|
|
2755
2882
|
),
|
|
2883
|
+
metadata: import_zod3.z.object({
|
|
2884
|
+
domain_used: import_zod3.z.string().nullish().describe(
|
|
2885
|
+
'The domain-specific model that was applied to the transcription (for example, `"medical-v1"` when [Medical Mode](https://www.assemblyai.com/docs/pre-recorded-audio/medical-mode) was applied), or `null` if no domain-specific model was used. Always present when `metadata` is present.\n'
|
|
2886
|
+
),
|
|
2887
|
+
warnings: import_zod3.z.array(
|
|
2888
|
+
import_zod3.z.object({
|
|
2889
|
+
message: import_zod3.z.string().describe("A human-readable description of the warning.")
|
|
2890
|
+
}).describe(
|
|
2891
|
+
"A warning message emitted while processing a transcription request. Warnings are surfaced on the transcript response under `metadata.warnings`.\n"
|
|
2892
|
+
)
|
|
2893
|
+
).optional().describe(
|
|
2894
|
+
"Warning messages emitted while processing the request. Each warning is an object with a human-readable `message`. When there are no warnings to report, this field is omitted from the `metadata` object entirely.\n"
|
|
2895
|
+
)
|
|
2896
|
+
}).describe(
|
|
2897
|
+
"Additional metadata about the transcription returned on the `Transcript` object under `metadata`. Only present when there is information to report \u2014 when all of its fields would be empty, the `metadata` object is omitted from the response entirely.\n"
|
|
2898
|
+
).or(import_zod3.z.null()).optional().describe(
|
|
2899
|
+
"Additional metadata about the transcription, including any warnings emitted while processing the request. Only present when there is information to report; if no fields would be populated, `metadata` is omitted from the response entirely.\n"
|
|
2900
|
+
),
|
|
2756
2901
|
multichannel: import_zod3.z.boolean().nullish().describe(
|
|
2757
2902
|
"Whether [Multichannel transcription](https://www.assemblyai.com/docs/pre-recorded-audio/transcribe-multiple-audio-channels) was enabled in the transcription request, either true or false"
|
|
2758
2903
|
),
|
|
@@ -2800,12 +2945,20 @@ var getTranscriptResponse = import_zod3.z.object({
|
|
|
2800
2945
|
"email_address",
|
|
2801
2946
|
"event",
|
|
2802
2947
|
"filename",
|
|
2948
|
+
"gender",
|
|
2803
2949
|
"gender_sexuality",
|
|
2804
2950
|
"healthcare_number",
|
|
2805
2951
|
"injury",
|
|
2806
2952
|
"ip_address",
|
|
2807
2953
|
"language",
|
|
2808
2954
|
"location",
|
|
2955
|
+
"location_address",
|
|
2956
|
+
"location_address_street",
|
|
2957
|
+
"location_city",
|
|
2958
|
+
"location_coordinate",
|
|
2959
|
+
"location_country",
|
|
2960
|
+
"location_state",
|
|
2961
|
+
"location_zip",
|
|
2809
2962
|
"marital_status",
|
|
2810
2963
|
"medical_condition",
|
|
2811
2964
|
"medical_process",
|
|
@@ -2814,6 +2967,7 @@ var getTranscriptResponse = import_zod3.z.object({
|
|
|
2814
2967
|
"number_sequence",
|
|
2815
2968
|
"occupation",
|
|
2816
2969
|
"organization",
|
|
2970
|
+
"organization_medical_facility",
|
|
2817
2971
|
"passport_number",
|
|
2818
2972
|
"password",
|
|
2819
2973
|
"person_age",
|
|
@@ -2822,6 +2976,7 @@ var getTranscriptResponse = import_zod3.z.object({
|
|
|
2822
2976
|
"physical_attribute",
|
|
2823
2977
|
"political_affiliation",
|
|
2824
2978
|
"religion",
|
|
2979
|
+
"sexuality",
|
|
2825
2980
|
"statistics",
|
|
2826
2981
|
"time",
|
|
2827
2982
|
"url",
|
|
@@ -2829,12 +2984,17 @@ var getTranscriptResponse = import_zod3.z.object({
|
|
|
2829
2984
|
"username",
|
|
2830
2985
|
"vehicle_id",
|
|
2831
2986
|
"zodiac_sign"
|
|
2832
|
-
]).describe(
|
|
2987
|
+
]).describe(
|
|
2988
|
+
"The type of PII to redact. See [PII redaction](https://www.assemblyai.com/docs/streaming/pii-redaction) for the full list of policies and their descriptions."
|
|
2989
|
+
)
|
|
2833
2990
|
).nullish().describe(
|
|
2834
2991
|
"The list of PII Redaction policies that were enabled, if PII Redaction is enabled.\nSee [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
|
|
2835
2992
|
),
|
|
2836
2993
|
redact_pii_sub: import_zod3.z.enum(["entity_name", "hash"]).optional().describe(
|
|
2837
|
-
"The replacement logic for detected PII, can be `entity_name` or `hash`. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more details."
|
|
2994
|
+
"The replacement logic for detected PII, can be `entity_name` or `hash`. See [PII redaction](https://www.assemblyai.com/docs/streaming/pii-redaction) for more details."
|
|
2995
|
+
),
|
|
2996
|
+
redact_pii_return_unredacted: import_zod3.z.boolean().nullish().describe(
|
|
2997
|
+
"Whether the original unredacted transcript was also returned alongside the redacted one. When `true`, the response includes `unredacted_text`, `unredacted_words`, and `unredacted_utterances`. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
|
|
2838
2998
|
),
|
|
2839
2999
|
sentiment_analysis: import_zod3.z.boolean().nullish().describe(
|
|
2840
3000
|
"Whether [Sentiment Analysis](https://www.assemblyai.com/docs/speech-understanding/analyze-sentiment-of-speech) is enabled, can be true or false"
|
|
@@ -2971,20 +3131,23 @@ var getTranscriptResponse = import_zod3.z.object({
|
|
|
2971
3131
|
"The generated summary of the media file, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details."
|
|
2972
3132
|
),
|
|
2973
3133
|
summary_model: import_zod3.z.string().nullish().describe(
|
|
2974
|
-
"The Summarization model used to generate the summary,\nif [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts
|
|
3134
|
+
"The Summarization model used to generate the summary,\nif [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details.\n"
|
|
2975
3135
|
),
|
|
2976
3136
|
summary_type: import_zod3.z.string().nullish().describe(
|
|
2977
|
-
"The type of summary generated, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts
|
|
3137
|
+
"The type of summary generated, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details."
|
|
2978
3138
|
),
|
|
2979
|
-
remove_audio_tags: import_zod3.z.enum(["all"]).describe(
|
|
2980
|
-
|
|
3139
|
+
remove_audio_tags: import_zod3.z.enum(["all", "speaker"]).describe(
|
|
3140
|
+
'Universal-3 Pro generates rich transcripts that can include inline annotations such as audio event markers and speaker cues. Set to `"all"` to remove all inline annotations, or `"speaker"` to remove only speaker cues while keeping other annotations.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n'
|
|
2981
3141
|
).or(import_zod3.z.null()).optional().describe(
|
|
2982
|
-
|
|
3142
|
+
'Universal-3 Pro generates rich transcripts that can include inline annotations such as audio event markers and speaker cues. Set to `"all"` to remove all inline annotations, or `"speaker"` to remove only speaker cues while keeping other annotations.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n'
|
|
2983
3143
|
),
|
|
2984
3144
|
temperature: import_zod3.z.number().nullish().describe(
|
|
2985
3145
|
"The temperature that was used for the model's response. See the [Prompting Guide](https://www.assemblyai.com/docs/pre-recorded-audio/prompting) for more details.\n\nNote: This parameter can only be used with the Universal-3 Pro model.\n"
|
|
2986
3146
|
),
|
|
2987
3147
|
text: import_zod3.z.string().nullish().describe("The textual transcript of your media file"),
|
|
3148
|
+
unredacted_text: import_zod3.z.string().nullish().describe(
|
|
3149
|
+
"The original textual transcript of your media file before PII redaction was applied. Only returned when `redact_pii_return_unredacted` was set to `true` on the transcription request, otherwise this field is omitted and the `text` field remains fully redacted. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
|
|
3150
|
+
),
|
|
2988
3151
|
throttled: import_zod3.z.boolean().nullish().describe(
|
|
2989
3152
|
"True while a request is throttled and false when a request is no longer throttled"
|
|
2990
3153
|
),
|
|
@@ -3021,6 +3184,39 @@ var getTranscriptResponse = import_zod3.z.object({
|
|
|
3021
3184
|
).nullish().describe(
|
|
3022
3185
|
"When multichannel or speaker_labels is enabled, a list of turn-by-turn utterance objects.\nSee [Speaker diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) and [Multichannel transcription](https://www.assemblyai.com/docs/pre-recorded-audio/transcribe-multiple-audio-channels) for more information.\n"
|
|
3023
3186
|
),
|
|
3187
|
+
unredacted_utterances: import_zod3.z.array(
|
|
3188
|
+
import_zod3.z.object({
|
|
3189
|
+
confidence: import_zod3.z.number().describe("The confidence score for the transcript of this utterance"),
|
|
3190
|
+
start: import_zod3.z.number().describe("The starting time, in milliseconds, of the utterance in the audio file"),
|
|
3191
|
+
end: import_zod3.z.number().describe("The ending time, in milliseconds, of the utterance in the audio file"),
|
|
3192
|
+
text: import_zod3.z.string().describe("The text for this utterance"),
|
|
3193
|
+
words: import_zod3.z.array(
|
|
3194
|
+
import_zod3.z.object({
|
|
3195
|
+
confidence: import_zod3.z.number().describe("The confidence score for the transcript of this word"),
|
|
3196
|
+
start: import_zod3.z.number().describe("The starting time, in milliseconds, for the word"),
|
|
3197
|
+
end: import_zod3.z.number().describe("The ending time, in milliseconds, for the word"),
|
|
3198
|
+
text: import_zod3.z.string().describe("The text of the word"),
|
|
3199
|
+
channel: import_zod3.z.string().nullish().describe(
|
|
3200
|
+
"The channel of the word. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
|
|
3201
|
+
),
|
|
3202
|
+
speaker: import_zod3.z.string().nullable().describe(
|
|
3203
|
+
"The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) is enabled, else null"
|
|
3204
|
+
)
|
|
3205
|
+
})
|
|
3206
|
+
).describe("The words in the utterance."),
|
|
3207
|
+
channel: import_zod3.z.string().nullish().describe(
|
|
3208
|
+
"The channel of this utterance. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
|
|
3209
|
+
),
|
|
3210
|
+
speaker: import_zod3.z.string().describe(
|
|
3211
|
+
'The speaker of this utterance, where each speaker is assigned a sequential capital letter - e.g. "A" for Speaker A, "B" for Speaker B, etc.'
|
|
3212
|
+
),
|
|
3213
|
+
translated_texts: import_zod3.z.record(import_zod3.z.string(), import_zod3.z.string()).optional().describe(
|
|
3214
|
+
'Translations keyed by language code (e.g., `{"es": "Texto traducido", "de": "\xDCbersetzter Text"}`). Only present when `match_original_utterance` is enabled with translation.'
|
|
3215
|
+
)
|
|
3216
|
+
})
|
|
3217
|
+
).nullish().describe(
|
|
3218
|
+
"The original turn-by-turn utterance objects before PII redaction was applied. Same shape as `utterances`. Only returned when `redact_pii_return_unredacted` was set to `true` on the transcription request, otherwise this field is omitted and the `utterances` field remains fully redacted. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
|
|
3219
|
+
),
|
|
3024
3220
|
webhook_auth: import_zod3.z.boolean().describe(
|
|
3025
3221
|
"Whether [webhook](https://www.assemblyai.com/docs/deployment/webhooks-for-pre-recorded-audio) authentication details were provided"
|
|
3026
3222
|
),
|
|
@@ -3049,6 +3245,22 @@ var getTranscriptResponse = import_zod3.z.object({
|
|
|
3049
3245
|
).nullish().describe(
|
|
3050
3246
|
"An array of temporally-sequential word objects, one for each word in the transcript.\n"
|
|
3051
3247
|
),
|
|
3248
|
+
unredacted_words: import_zod3.z.array(
|
|
3249
|
+
import_zod3.z.object({
|
|
3250
|
+
confidence: import_zod3.z.number().describe("The confidence score for the transcript of this word"),
|
|
3251
|
+
start: import_zod3.z.number().describe("The starting time, in milliseconds, for the word"),
|
|
3252
|
+
end: import_zod3.z.number().describe("The ending time, in milliseconds, for the word"),
|
|
3253
|
+
text: import_zod3.z.string().describe("The text of the word"),
|
|
3254
|
+
channel: import_zod3.z.string().nullish().describe(
|
|
3255
|
+
"The channel of the word. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
|
|
3256
|
+
),
|
|
3257
|
+
speaker: import_zod3.z.string().nullable().describe(
|
|
3258
|
+
"The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) is enabled, else null"
|
|
3259
|
+
)
|
|
3260
|
+
})
|
|
3261
|
+
).nullish().describe(
|
|
3262
|
+
"The original temporally-sequential word objects before PII redaction was applied. Same shape as `words`. Only returned when `redact_pii_return_unredacted` was set to `true` on the transcription request, otherwise this field is omitted and the `words` field remains fully redacted. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
|
|
3263
|
+
),
|
|
3052
3264
|
acoustic_model: import_zod3.z.string().describe("This parameter does not currently have any functionality attached to it."),
|
|
3053
3265
|
custom_topics: import_zod3.z.boolean().nullish().describe("This parameter does not currently have any functionality attached to it."),
|
|
3054
3266
|
language_model: import_zod3.z.string().describe("This parameter does not currently have any functionality attached to it."),
|
|
@@ -3181,7 +3393,7 @@ var deleteTranscriptResponse = import_zod3.z.object({
|
|
|
3181
3393
|
"Customize how words are spelled and formatted using to and from values. See [Custom Spelling](https://www.assemblyai.com/docs/pre-recorded-audio/correct-spelling-of-terms) for more details."
|
|
3182
3394
|
),
|
|
3183
3395
|
disfluencies: import_zod3.z.boolean().nullish().describe(
|
|
3184
|
-
'Transcribe [Filler Words](https://www.assemblyai.com/docs/pre-recorded-audio/include-filler-words), like "umm", in your media file; can be true or false'
|
|
3396
|
+
'Transcribe [Filler Words](https://www.assemblyai.com/docs/pre-recorded-audio/include-filler-words), like "umm", in your media file; can be true or false. Supported on Universal-3 Pro and Universal-2.'
|
|
3185
3397
|
),
|
|
3186
3398
|
domain: import_zod3.z.string().nullish().describe(
|
|
3187
3399
|
'The domain-specific model applied to the transcript. When set to `"medical-v1"`, [Medical Mode](https://www.assemblyai.com/docs/pre-recorded-audio/medical-mode) was used to improve accuracy for medical terminology.\n'
|
|
@@ -3204,12 +3416,20 @@ var deleteTranscriptResponse = import_zod3.z.object({
|
|
|
3204
3416
|
"email_address",
|
|
3205
3417
|
"event",
|
|
3206
3418
|
"filename",
|
|
3419
|
+
"gender",
|
|
3207
3420
|
"gender_sexuality",
|
|
3208
3421
|
"healthcare_number",
|
|
3209
3422
|
"injury",
|
|
3210
3423
|
"ip_address",
|
|
3211
3424
|
"language",
|
|
3212
3425
|
"location",
|
|
3426
|
+
"location_address",
|
|
3427
|
+
"location_address_street",
|
|
3428
|
+
"location_city",
|
|
3429
|
+
"location_coordinate",
|
|
3430
|
+
"location_country",
|
|
3431
|
+
"location_state",
|
|
3432
|
+
"location_zip",
|
|
3213
3433
|
"marital_status",
|
|
3214
3434
|
"medical_condition",
|
|
3215
3435
|
"medical_process",
|
|
@@ -3218,6 +3438,7 @@ var deleteTranscriptResponse = import_zod3.z.object({
|
|
|
3218
3438
|
"number_sequence",
|
|
3219
3439
|
"occupation",
|
|
3220
3440
|
"organization",
|
|
3441
|
+
"organization_medical_facility",
|
|
3221
3442
|
"passport_number",
|
|
3222
3443
|
"password",
|
|
3223
3444
|
"person_age",
|
|
@@ -3226,6 +3447,7 @@ var deleteTranscriptResponse = import_zod3.z.object({
|
|
|
3226
3447
|
"physical_attribute",
|
|
3227
3448
|
"political_affiliation",
|
|
3228
3449
|
"religion",
|
|
3450
|
+
"sexuality",
|
|
3229
3451
|
"statistics",
|
|
3230
3452
|
"time",
|
|
3231
3453
|
"url",
|
|
@@ -3530,6 +3752,24 @@ var deleteTranscriptResponse = import_zod3.z.object({
|
|
|
3530
3752
|
}).optional().describe(
|
|
3531
3753
|
"Specify options for [Automatic Language Detection](https://www.assemblyai.com/docs/pre-recorded-audio/language-detection)."
|
|
3532
3754
|
),
|
|
3755
|
+
metadata: import_zod3.z.object({
|
|
3756
|
+
domain_used: import_zod3.z.string().nullish().describe(
|
|
3757
|
+
'The domain-specific model that was applied to the transcription (for example, `"medical-v1"` when [Medical Mode](https://www.assemblyai.com/docs/pre-recorded-audio/medical-mode) was applied), or `null` if no domain-specific model was used. Always present when `metadata` is present.\n'
|
|
3758
|
+
),
|
|
3759
|
+
warnings: import_zod3.z.array(
|
|
3760
|
+
import_zod3.z.object({
|
|
3761
|
+
message: import_zod3.z.string().describe("A human-readable description of the warning.")
|
|
3762
|
+
}).describe(
|
|
3763
|
+
"A warning message emitted while processing a transcription request. Warnings are surfaced on the transcript response under `metadata.warnings`.\n"
|
|
3764
|
+
)
|
|
3765
|
+
).optional().describe(
|
|
3766
|
+
"Warning messages emitted while processing the request. Each warning is an object with a human-readable `message`. When there are no warnings to report, this field is omitted from the `metadata` object entirely.\n"
|
|
3767
|
+
)
|
|
3768
|
+
}).describe(
|
|
3769
|
+
"Additional metadata about the transcription returned on the `Transcript` object under `metadata`. Only present when there is information to report \u2014 when all of its fields would be empty, the `metadata` object is omitted from the response entirely.\n"
|
|
3770
|
+
).or(import_zod3.z.null()).optional().describe(
|
|
3771
|
+
"Additional metadata about the transcription, including any warnings emitted while processing the request. Only present when there is information to report; if no fields would be populated, `metadata` is omitted from the response entirely.\n"
|
|
3772
|
+
),
|
|
3533
3773
|
multichannel: import_zod3.z.boolean().nullish().describe(
|
|
3534
3774
|
"Whether [Multichannel transcription](https://www.assemblyai.com/docs/pre-recorded-audio/transcribe-multiple-audio-channels) was enabled in the transcription request, either true or false"
|
|
3535
3775
|
),
|
|
@@ -3577,12 +3817,20 @@ var deleteTranscriptResponse = import_zod3.z.object({
|
|
|
3577
3817
|
"email_address",
|
|
3578
3818
|
"event",
|
|
3579
3819
|
"filename",
|
|
3820
|
+
"gender",
|
|
3580
3821
|
"gender_sexuality",
|
|
3581
3822
|
"healthcare_number",
|
|
3582
3823
|
"injury",
|
|
3583
3824
|
"ip_address",
|
|
3584
3825
|
"language",
|
|
3585
3826
|
"location",
|
|
3827
|
+
"location_address",
|
|
3828
|
+
"location_address_street",
|
|
3829
|
+
"location_city",
|
|
3830
|
+
"location_coordinate",
|
|
3831
|
+
"location_country",
|
|
3832
|
+
"location_state",
|
|
3833
|
+
"location_zip",
|
|
3586
3834
|
"marital_status",
|
|
3587
3835
|
"medical_condition",
|
|
3588
3836
|
"medical_process",
|
|
@@ -3591,6 +3839,7 @@ var deleteTranscriptResponse = import_zod3.z.object({
|
|
|
3591
3839
|
"number_sequence",
|
|
3592
3840
|
"occupation",
|
|
3593
3841
|
"organization",
|
|
3842
|
+
"organization_medical_facility",
|
|
3594
3843
|
"passport_number",
|
|
3595
3844
|
"password",
|
|
3596
3845
|
"person_age",
|
|
@@ -3599,6 +3848,7 @@ var deleteTranscriptResponse = import_zod3.z.object({
|
|
|
3599
3848
|
"physical_attribute",
|
|
3600
3849
|
"political_affiliation",
|
|
3601
3850
|
"religion",
|
|
3851
|
+
"sexuality",
|
|
3602
3852
|
"statistics",
|
|
3603
3853
|
"time",
|
|
3604
3854
|
"url",
|
|
@@ -3606,12 +3856,17 @@ var deleteTranscriptResponse = import_zod3.z.object({
|
|
|
3606
3856
|
"username",
|
|
3607
3857
|
"vehicle_id",
|
|
3608
3858
|
"zodiac_sign"
|
|
3609
|
-
]).describe(
|
|
3859
|
+
]).describe(
|
|
3860
|
+
"The type of PII to redact. See [PII redaction](https://www.assemblyai.com/docs/streaming/pii-redaction) for the full list of policies and their descriptions."
|
|
3861
|
+
)
|
|
3610
3862
|
).nullish().describe(
|
|
3611
3863
|
"The list of PII Redaction policies that were enabled, if PII Redaction is enabled.\nSee [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
|
|
3612
3864
|
),
|
|
3613
3865
|
redact_pii_sub: import_zod3.z.enum(["entity_name", "hash"]).optional().describe(
|
|
3614
|
-
"The replacement logic for detected PII, can be `entity_name` or `hash`. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more details."
|
|
3866
|
+
"The replacement logic for detected PII, can be `entity_name` or `hash`. See [PII redaction](https://www.assemblyai.com/docs/streaming/pii-redaction) for more details."
|
|
3867
|
+
),
|
|
3868
|
+
redact_pii_return_unredacted: import_zod3.z.boolean().nullish().describe(
|
|
3869
|
+
"Whether the original unredacted transcript was also returned alongside the redacted one. When `true`, the response includes `unredacted_text`, `unredacted_words`, and `unredacted_utterances`. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
|
|
3615
3870
|
),
|
|
3616
3871
|
sentiment_analysis: import_zod3.z.boolean().nullish().describe(
|
|
3617
3872
|
"Whether [Sentiment Analysis](https://www.assemblyai.com/docs/speech-understanding/analyze-sentiment-of-speech) is enabled, can be true or false"
|
|
@@ -3748,20 +4003,23 @@ var deleteTranscriptResponse = import_zod3.z.object({
|
|
|
3748
4003
|
"The generated summary of the media file, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details."
|
|
3749
4004
|
),
|
|
3750
4005
|
summary_model: import_zod3.z.string().nullish().describe(
|
|
3751
|
-
"The Summarization model used to generate the summary,\nif [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts
|
|
4006
|
+
"The Summarization model used to generate the summary,\nif [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details.\n"
|
|
3752
4007
|
),
|
|
3753
4008
|
summary_type: import_zod3.z.string().nullish().describe(
|
|
3754
|
-
"The type of summary generated, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts
|
|
4009
|
+
"The type of summary generated, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarize-transcripts) for details."
|
|
3755
4010
|
),
|
|
3756
|
-
remove_audio_tags: import_zod3.z.enum(["all"]).describe(
|
|
3757
|
-
|
|
4011
|
+
remove_audio_tags: import_zod3.z.enum(["all", "speaker"]).describe(
|
|
4012
|
+
'Universal-3 Pro generates rich transcripts that can include inline annotations such as audio event markers and speaker cues. Set to `"all"` to remove all inline annotations, or `"speaker"` to remove only speaker cues while keeping other annotations.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n'
|
|
3758
4013
|
).or(import_zod3.z.null()).optional().describe(
|
|
3759
|
-
|
|
4014
|
+
'Universal-3 Pro generates rich transcripts that can include inline annotations such as audio event markers and speaker cues. Set to `"all"` to remove all inline annotations, or `"speaker"` to remove only speaker cues while keeping other annotations.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n'
|
|
3760
4015
|
),
|
|
3761
4016
|
temperature: import_zod3.z.number().nullish().describe(
|
|
3762
4017
|
"The temperature that was used for the model's response. See the [Prompting Guide](https://www.assemblyai.com/docs/pre-recorded-audio/prompting) for more details.\n\nNote: This parameter can only be used with the Universal-3 Pro model.\n"
|
|
3763
4018
|
),
|
|
3764
4019
|
text: import_zod3.z.string().nullish().describe("The textual transcript of your media file"),
|
|
4020
|
+
unredacted_text: import_zod3.z.string().nullish().describe(
|
|
4021
|
+
"The original textual transcript of your media file before PII redaction was applied. Only returned when `redact_pii_return_unredacted` was set to `true` on the transcription request, otherwise this field is omitted and the `text` field remains fully redacted. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
|
|
4022
|
+
),
|
|
3765
4023
|
throttled: import_zod3.z.boolean().nullish().describe(
|
|
3766
4024
|
"True while a request is throttled and false when a request is no longer throttled"
|
|
3767
4025
|
),
|
|
@@ -3798,6 +4056,39 @@ var deleteTranscriptResponse = import_zod3.z.object({
|
|
|
3798
4056
|
).nullish().describe(
|
|
3799
4057
|
"When multichannel or speaker_labels is enabled, a list of turn-by-turn utterance objects.\nSee [Speaker diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) and [Multichannel transcription](https://www.assemblyai.com/docs/pre-recorded-audio/transcribe-multiple-audio-channels) for more information.\n"
|
|
3800
4058
|
),
|
|
4059
|
+
unredacted_utterances: import_zod3.z.array(
|
|
4060
|
+
import_zod3.z.object({
|
|
4061
|
+
confidence: import_zod3.z.number().describe("The confidence score for the transcript of this utterance"),
|
|
4062
|
+
start: import_zod3.z.number().describe("The starting time, in milliseconds, of the utterance in the audio file"),
|
|
4063
|
+
end: import_zod3.z.number().describe("The ending time, in milliseconds, of the utterance in the audio file"),
|
|
4064
|
+
text: import_zod3.z.string().describe("The text for this utterance"),
|
|
4065
|
+
words: import_zod3.z.array(
|
|
4066
|
+
import_zod3.z.object({
|
|
4067
|
+
confidence: import_zod3.z.number().describe("The confidence score for the transcript of this word"),
|
|
4068
|
+
start: import_zod3.z.number().describe("The starting time, in milliseconds, for the word"),
|
|
4069
|
+
end: import_zod3.z.number().describe("The ending time, in milliseconds, for the word"),
|
|
4070
|
+
text: import_zod3.z.string().describe("The text of the word"),
|
|
4071
|
+
channel: import_zod3.z.string().nullish().describe(
|
|
4072
|
+
"The channel of the word. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
|
|
4073
|
+
),
|
|
4074
|
+
speaker: import_zod3.z.string().nullable().describe(
|
|
4075
|
+
"The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) is enabled, else null"
|
|
4076
|
+
)
|
|
4077
|
+
})
|
|
4078
|
+
).describe("The words in the utterance."),
|
|
4079
|
+
channel: import_zod3.z.string().nullish().describe(
|
|
4080
|
+
"The channel of this utterance. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
|
|
4081
|
+
),
|
|
4082
|
+
speaker: import_zod3.z.string().describe(
|
|
4083
|
+
'The speaker of this utterance, where each speaker is assigned a sequential capital letter - e.g. "A" for Speaker A, "B" for Speaker B, etc.'
|
|
4084
|
+
),
|
|
4085
|
+
translated_texts: import_zod3.z.record(import_zod3.z.string(), import_zod3.z.string()).optional().describe(
|
|
4086
|
+
'Translations keyed by language code (e.g., `{"es": "Texto traducido", "de": "\xDCbersetzter Text"}`). Only present when `match_original_utterance` is enabled with translation.'
|
|
4087
|
+
)
|
|
4088
|
+
})
|
|
4089
|
+
).nullish().describe(
|
|
4090
|
+
"The original turn-by-turn utterance objects before PII redaction was applied. Same shape as `utterances`. Only returned when `redact_pii_return_unredacted` was set to `true` on the transcription request, otherwise this field is omitted and the `utterances` field remains fully redacted. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
|
|
4091
|
+
),
|
|
3801
4092
|
webhook_auth: import_zod3.z.boolean().describe(
|
|
3802
4093
|
"Whether [webhook](https://www.assemblyai.com/docs/deployment/webhooks-for-pre-recorded-audio) authentication details were provided"
|
|
3803
4094
|
),
|
|
@@ -3826,6 +4117,22 @@ var deleteTranscriptResponse = import_zod3.z.object({
|
|
|
3826
4117
|
).nullish().describe(
|
|
3827
4118
|
"An array of temporally-sequential word objects, one for each word in the transcript.\n"
|
|
3828
4119
|
),
|
|
4120
|
+
unredacted_words: import_zod3.z.array(
|
|
4121
|
+
import_zod3.z.object({
|
|
4122
|
+
confidence: import_zod3.z.number().describe("The confidence score for the transcript of this word"),
|
|
4123
|
+
start: import_zod3.z.number().describe("The starting time, in milliseconds, for the word"),
|
|
4124
|
+
end: import_zod3.z.number().describe("The ending time, in milliseconds, for the word"),
|
|
4125
|
+
text: import_zod3.z.string().describe("The text of the word"),
|
|
4126
|
+
channel: import_zod3.z.string().nullish().describe(
|
|
4127
|
+
"The channel of the word. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
|
|
4128
|
+
),
|
|
4129
|
+
speaker: import_zod3.z.string().nullable().describe(
|
|
4130
|
+
"The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/label-speakers) is enabled, else null"
|
|
4131
|
+
)
|
|
4132
|
+
})
|
|
4133
|
+
).nullish().describe(
|
|
4134
|
+
"The original temporally-sequential word objects before PII redaction was applied. Same shape as `words`. Only returned when `redact_pii_return_unredacted` was set to `true` on the transcription request, otherwise this field is omitted and the `words` field remains fully redacted. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
|
|
4135
|
+
),
|
|
3829
4136
|
acoustic_model: import_zod3.z.string().describe("This parameter does not currently have any functionality attached to it."),
|
|
3830
4137
|
custom_topics: import_zod3.z.boolean().nullish().describe("This parameter does not currently have any functionality attached to it."),
|
|
3831
4138
|
language_model: import_zod3.z.string().describe("This parameter does not currently have any functionality attached to it."),
|
|
@@ -3981,7 +4288,21 @@ var streamingTranscriberParams = import_zod4.z.object({
|
|
|
3981
4288
|
inactivityTimeout: import_zod4.z.number().optional().describe("From SDK v3"),
|
|
3982
4289
|
speakerLabels: import_zod4.z.boolean().optional().describe("From SDK v3"),
|
|
3983
4290
|
maxSpeakers: import_zod4.z.number().optional().describe("From SDK v3"),
|
|
3984
|
-
|
|
4291
|
+
voiceFocus: import_zod4.z.unknown().optional().describe("From SDK v3"),
|
|
4292
|
+
voiceFocusThreshold: import_zod4.z.number().optional().describe("From SDK v3"),
|
|
4293
|
+
continuousPartials: import_zod4.z.boolean().optional().describe("From SDK v3"),
|
|
4294
|
+
interruptionDelay: import_zod4.z.number().optional().describe("From SDK v3"),
|
|
4295
|
+
turnLeftPadMs: import_zod4.z.number().optional().describe("From SDK v3"),
|
|
4296
|
+
customerSupportAudioCapture: import_zod4.z.boolean().optional().describe("From SDK v3"),
|
|
4297
|
+
includePartialTurns: import_zod4.z.boolean().optional().describe("From SDK v3"),
|
|
4298
|
+
redactPii: import_zod4.z.boolean().optional().describe("From SDK v3"),
|
|
4299
|
+
redactPiiPolicies: import_zod4.z.unknown().optional().describe("From SDK v3"),
|
|
4300
|
+
redactPiiSub: import_zod4.z.unknown().optional().describe("From SDK v3"),
|
|
4301
|
+
llmGateway: import_zod4.z.unknown().optional().describe("From SDK v3"),
|
|
4302
|
+
webhookUrl: import_zod4.z.string().optional().describe("From SDK v3"),
|
|
4303
|
+
webhookAuthHeaderName: import_zod4.z.string().optional().describe("From SDK v3"),
|
|
4304
|
+
webhookAuthHeaderValue: import_zod4.z.string().optional().describe("From SDK v3"),
|
|
4305
|
+
mode: import_zod4.z.unknown().describe("From SDK v3")
|
|
3985
4306
|
});
|
|
3986
4307
|
var streamingUpdateConfigParams = import_zod4.z.object({
|
|
3987
4308
|
end_utterance_silence_threshold: import_zod4.z.number().min(0).max(2e4).optional().describe("The duration threshold in milliseconds"),
|
|
@@ -3993,7 +4314,9 @@ var streamingUpdateConfigParams = import_zod4.z.object({
|
|
|
3993
4314
|
format_turns: import_zod4.z.boolean().optional().describe("From SDK v3"),
|
|
3994
4315
|
keyterms_prompt: import_zod4.z.array(import_zod4.z.string()).optional().describe("From SDK v3"),
|
|
3995
4316
|
prompt: import_zod4.z.string().optional().describe("From SDK v3"),
|
|
3996
|
-
filter_profanity: import_zod4.z.boolean().optional().describe("From SDK v3")
|
|
4317
|
+
filter_profanity: import_zod4.z.boolean().optional().describe("From SDK v3"),
|
|
4318
|
+
interruption_delay: import_zod4.z.number().optional().describe("From SDK v3"),
|
|
4319
|
+
turn_left_pad_ms: import_zod4.z.number().optional().describe("From SDK v3")
|
|
3997
4320
|
});
|
|
3998
4321
|
|
|
3999
4322
|
// src/generated/gladia/api/gladiaControlAPI.zod.ts
|
|
@@ -4030,7 +4353,7 @@ var preRecordedControllerInitPreRecordedJobV2BodyTranslationConfigMatchOriginalU
|
|
|
4030
4353
|
var preRecordedControllerInitPreRecordedJobV2BodyTranslationConfigLipsyncDefault = true;
|
|
4031
4354
|
var preRecordedControllerInitPreRecordedJobV2BodyTranslationConfigContextAdaptationDefault = true;
|
|
4032
4355
|
var preRecordedControllerInitPreRecordedJobV2BodySummarizationConfigTypeDefault = "general";
|
|
4033
|
-
var preRecordedControllerInitPreRecordedJobV2BodyAudioToLlmConfigModelDefault = "openai/gpt-
|
|
4356
|
+
var preRecordedControllerInitPreRecordedJobV2BodyAudioToLlmConfigModelDefault = "openai/gpt-5.4-nano";
|
|
4034
4357
|
var preRecordedControllerInitPreRecordedJobV2BodyLanguageConfigLanguagesDefault = [];
|
|
4035
4358
|
var preRecordedControllerInitPreRecordedJobV2Body = import_zod5.z.object({
|
|
4036
4359
|
custom_vocabulary: import_zod5.z.boolean().optional().describe(
|
|
@@ -4315,23 +4638,23 @@ var preRecordedControllerInitPreRecordedJobV2Body = import_zod5.z.object({
|
|
|
4315
4638
|
"Forces the translation to use informal language forms when available in the target language."
|
|
4316
4639
|
)
|
|
4317
4640
|
}).optional().describe("**[Beta]** Translation configuration, if `translation` is enabled"),
|
|
4318
|
-
summarization: import_zod5.z.boolean().optional().describe("
|
|
4641
|
+
summarization: import_zod5.z.boolean().optional().describe("Enable summarization for this audio"),
|
|
4319
4642
|
summarization_config: import_zod5.z.object({
|
|
4320
4643
|
type: import_zod5.z.enum(["general", "bullet_points", "concise"]).describe("The type of summarization to apply").default(preRecordedControllerInitPreRecordedJobV2BodySummarizationConfigTypeDefault).describe("The type of summarization to apply")
|
|
4321
|
-
}).optional().describe("
|
|
4644
|
+
}).optional().describe("Summarization configuration, if `summarization` is enabled"),
|
|
4322
4645
|
named_entity_recognition: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable named entity recognition for this audio"),
|
|
4323
4646
|
custom_spelling: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable custom spelling for this audio"),
|
|
4324
4647
|
custom_spelling_config: import_zod5.z.object({
|
|
4325
4648
|
spelling_dictionary: import_zod5.z.record(import_zod5.z.string(), import_zod5.z.array(import_zod5.z.string())).describe("The list of spelling applied on the audio transcription")
|
|
4326
4649
|
}).optional().describe("**[Alpha]** Custom spelling configuration, if `custom_spelling` is enabled"),
|
|
4327
4650
|
sentiment_analysis: import_zod5.z.boolean().optional().describe("Enable sentiment analysis for this audio"),
|
|
4328
|
-
audio_to_llm: import_zod5.z.boolean().optional().describe("
|
|
4651
|
+
audio_to_llm: import_zod5.z.boolean().optional().describe("Enable audio to LLM processing for this audio"),
|
|
4329
4652
|
audio_to_llm_config: import_zod5.z.object({
|
|
4330
4653
|
prompts: import_zod5.z.array(import_zod5.z.array(import_zod5.z.unknown())).min(1).describe("The list of prompts applied on the audio transcription"),
|
|
4331
4654
|
model: import_zod5.z.string().default(preRecordedControllerInitPreRecordedJobV2BodyAudioToLlmConfigModelDefault).describe(
|
|
4332
4655
|
"The model to use for the prompt execution. You can find the list of supported models [here](https://openrouter.ai/models)."
|
|
4333
4656
|
)
|
|
4334
|
-
}).optional().describe("
|
|
4657
|
+
}).optional().describe("Audio to LLM configuration, if `audio_to_llm` is enabled"),
|
|
4335
4658
|
pii_redaction: import_zod5.z.boolean().optional().describe("Enable PII redaction for this audio"),
|
|
4336
4659
|
pii_redaction_config: import_zod5.z.object({
|
|
4337
4660
|
entity_types: import_zod5.z.enum([
|
|
@@ -4573,7 +4896,7 @@ var preRecordedControllerGetPreRecordedJobsV2ResponseItemsItemRequestParamsTrans
|
|
|
4573
4896
|
var preRecordedControllerGetPreRecordedJobsV2ResponseItemsItemRequestParamsTranslationConfigLipsyncDefault = true;
|
|
4574
4897
|
var preRecordedControllerGetPreRecordedJobsV2ResponseItemsItemRequestParamsTranslationConfigContextAdaptationDefault = true;
|
|
4575
4898
|
var preRecordedControllerGetPreRecordedJobsV2ResponseItemsItemRequestParamsSummarizationConfigTypeDefault = "general";
|
|
4576
|
-
var preRecordedControllerGetPreRecordedJobsV2ResponseItemsItemRequestParamsAudioToLlmConfigModelDefault = "openai/gpt-
|
|
4899
|
+
var preRecordedControllerGetPreRecordedJobsV2ResponseItemsItemRequestParamsAudioToLlmConfigModelDefault = "openai/gpt-5.4-nano";
|
|
4577
4900
|
var preRecordedControllerGetPreRecordedJobsV2ResponseItemsItemRequestParamsLanguageConfigLanguagesDefault = [];
|
|
4578
4901
|
var preRecordedControllerGetPreRecordedJobsV2ResponseItemsItemResultTranscriptionUtterancesItemChannelMin = 0;
|
|
4579
4902
|
var preRecordedControllerGetPreRecordedJobsV2ResponseItemsItemResultTranscriptionUtterancesItemSpeakerMin = 0;
|
|
@@ -4917,12 +5240,12 @@ var preRecordedControllerGetPreRecordedJobsV2Response = import_zod5.z.object({
|
|
|
4917
5240
|
"Forces the translation to use informal language forms when available in the target language."
|
|
4918
5241
|
)
|
|
4919
5242
|
}).optional().describe("**[Beta]** Translation configuration, if `translation` is enabled"),
|
|
4920
|
-
summarization: import_zod5.z.boolean().optional().describe("
|
|
5243
|
+
summarization: import_zod5.z.boolean().optional().describe("Enable summarization for this audio"),
|
|
4921
5244
|
summarization_config: import_zod5.z.object({
|
|
4922
5245
|
type: import_zod5.z.enum(["general", "bullet_points", "concise"]).describe("The type of summarization to apply").default(
|
|
4923
5246
|
preRecordedControllerGetPreRecordedJobsV2ResponseItemsItemRequestParamsSummarizationConfigTypeDefault
|
|
4924
5247
|
).describe("The type of summarization to apply")
|
|
4925
|
-
}).optional().describe("
|
|
5248
|
+
}).optional().describe("Summarization configuration, if `summarization` is enabled"),
|
|
4926
5249
|
named_entity_recognition: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable named entity recognition for this audio"),
|
|
4927
5250
|
custom_spelling: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable custom spelling for this audio"),
|
|
4928
5251
|
custom_spelling_config: import_zod5.z.object({
|
|
@@ -4931,7 +5254,7 @@ var preRecordedControllerGetPreRecordedJobsV2Response = import_zod5.z.object({
|
|
|
4931
5254
|
"**[Alpha]** Custom spelling configuration, if `custom_spelling` is enabled"
|
|
4932
5255
|
),
|
|
4933
5256
|
sentiment_analysis: import_zod5.z.boolean().optional().describe("Enable sentiment analysis for this audio"),
|
|
4934
|
-
audio_to_llm: import_zod5.z.boolean().optional().describe("
|
|
5257
|
+
audio_to_llm: import_zod5.z.boolean().optional().describe("Enable audio to LLM processing for this audio"),
|
|
4935
5258
|
audio_to_llm_config: import_zod5.z.object({
|
|
4936
5259
|
prompts: import_zod5.z.array(import_zod5.z.array(import_zod5.z.unknown())).min(1).describe("The list of prompts applied on the audio transcription"),
|
|
4937
5260
|
model: import_zod5.z.string().default(
|
|
@@ -4939,7 +5262,7 @@ var preRecordedControllerGetPreRecordedJobsV2Response = import_zod5.z.object({
|
|
|
4939
5262
|
).describe(
|
|
4940
5263
|
"The model to use for the prompt execution. You can find the list of supported models [here](https://openrouter.ai/models)."
|
|
4941
5264
|
)
|
|
4942
|
-
}).optional().describe("
|
|
5265
|
+
}).optional().describe("Audio to LLM configuration, if `audio_to_llm` is enabled"),
|
|
4943
5266
|
pii_redaction: import_zod5.z.boolean().optional().describe("Enable PII redaction for this audio"),
|
|
4944
5267
|
pii_redaction_config: import_zod5.z.object({
|
|
4945
5268
|
entity_types: import_zod5.z.enum([
|
|
@@ -6064,7 +6387,7 @@ var preRecordedControllerGetPreRecordedJobV2ResponseRequestParamsTranslationConf
|
|
|
6064
6387
|
var preRecordedControllerGetPreRecordedJobV2ResponseRequestParamsTranslationConfigLipsyncDefault = true;
|
|
6065
6388
|
var preRecordedControllerGetPreRecordedJobV2ResponseRequestParamsTranslationConfigContextAdaptationDefault = true;
|
|
6066
6389
|
var preRecordedControllerGetPreRecordedJobV2ResponseRequestParamsSummarizationConfigTypeDefault = "general";
|
|
6067
|
-
var preRecordedControllerGetPreRecordedJobV2ResponseRequestParamsAudioToLlmConfigModelDefault = "openai/gpt-
|
|
6390
|
+
var preRecordedControllerGetPreRecordedJobV2ResponseRequestParamsAudioToLlmConfigModelDefault = "openai/gpt-5.4-nano";
|
|
6068
6391
|
var preRecordedControllerGetPreRecordedJobV2ResponseRequestParamsLanguageConfigLanguagesDefault = [];
|
|
6069
6392
|
var preRecordedControllerGetPreRecordedJobV2ResponseResultTranscriptionUtterancesItemChannelMin = 0;
|
|
6070
6393
|
var preRecordedControllerGetPreRecordedJobV2ResponseResultTranscriptionUtterancesItemSpeakerMin = 0;
|
|
@@ -6401,19 +6724,19 @@ var preRecordedControllerGetPreRecordedJobV2Response = import_zod5.z.object({
|
|
|
6401
6724
|
"Forces the translation to use informal language forms when available in the target language."
|
|
6402
6725
|
)
|
|
6403
6726
|
}).optional().describe("**[Beta]** Translation configuration, if `translation` is enabled"),
|
|
6404
|
-
summarization: import_zod5.z.boolean().optional().describe("
|
|
6727
|
+
summarization: import_zod5.z.boolean().optional().describe("Enable summarization for this audio"),
|
|
6405
6728
|
summarization_config: import_zod5.z.object({
|
|
6406
6729
|
type: import_zod5.z.enum(["general", "bullet_points", "concise"]).describe("The type of summarization to apply").default(
|
|
6407
6730
|
preRecordedControllerGetPreRecordedJobV2ResponseRequestParamsSummarizationConfigTypeDefault
|
|
6408
6731
|
).describe("The type of summarization to apply")
|
|
6409
|
-
}).optional().describe("
|
|
6732
|
+
}).optional().describe("Summarization configuration, if `summarization` is enabled"),
|
|
6410
6733
|
named_entity_recognition: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable named entity recognition for this audio"),
|
|
6411
6734
|
custom_spelling: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable custom spelling for this audio"),
|
|
6412
6735
|
custom_spelling_config: import_zod5.z.object({
|
|
6413
6736
|
spelling_dictionary: import_zod5.z.record(import_zod5.z.string(), import_zod5.z.array(import_zod5.z.string())).describe("The list of spelling applied on the audio transcription")
|
|
6414
6737
|
}).optional().describe("**[Alpha]** Custom spelling configuration, if `custom_spelling` is enabled"),
|
|
6415
6738
|
sentiment_analysis: import_zod5.z.boolean().optional().describe("Enable sentiment analysis for this audio"),
|
|
6416
|
-
audio_to_llm: import_zod5.z.boolean().optional().describe("
|
|
6739
|
+
audio_to_llm: import_zod5.z.boolean().optional().describe("Enable audio to LLM processing for this audio"),
|
|
6417
6740
|
audio_to_llm_config: import_zod5.z.object({
|
|
6418
6741
|
prompts: import_zod5.z.array(import_zod5.z.array(import_zod5.z.unknown())).min(1).describe("The list of prompts applied on the audio transcription"),
|
|
6419
6742
|
model: import_zod5.z.string().default(
|
|
@@ -6421,7 +6744,7 @@ var preRecordedControllerGetPreRecordedJobV2Response = import_zod5.z.object({
|
|
|
6421
6744
|
).describe(
|
|
6422
6745
|
"The model to use for the prompt execution. You can find the list of supported models [here](https://openrouter.ai/models)."
|
|
6423
6746
|
)
|
|
6424
|
-
}).optional().describe("
|
|
6747
|
+
}).optional().describe("Audio to LLM configuration, if `audio_to_llm` is enabled"),
|
|
6425
6748
|
pii_redaction: import_zod5.z.boolean().optional().describe("Enable PII redaction for this audio"),
|
|
6426
6749
|
pii_redaction_config: import_zod5.z.object({
|
|
6427
6750
|
entity_types: import_zod5.z.enum([
|
|
@@ -7523,7 +7846,7 @@ var transcriptionControllerInitPreRecordedJobV2BodyTranslationConfigMatchOrigina
|
|
|
7523
7846
|
var transcriptionControllerInitPreRecordedJobV2BodyTranslationConfigLipsyncDefault = true;
|
|
7524
7847
|
var transcriptionControllerInitPreRecordedJobV2BodyTranslationConfigContextAdaptationDefault = true;
|
|
7525
7848
|
var transcriptionControllerInitPreRecordedJobV2BodySummarizationConfigTypeDefault = "general";
|
|
7526
|
-
var transcriptionControllerInitPreRecordedJobV2BodyAudioToLlmConfigModelDefault = "openai/gpt-
|
|
7849
|
+
var transcriptionControllerInitPreRecordedJobV2BodyAudioToLlmConfigModelDefault = "openai/gpt-5.4-nano";
|
|
7527
7850
|
var transcriptionControllerInitPreRecordedJobV2BodyLanguageConfigLanguagesDefault = [];
|
|
7528
7851
|
var transcriptionControllerInitPreRecordedJobV2Body = import_zod5.z.object({
|
|
7529
7852
|
custom_vocabulary: import_zod5.z.boolean().optional().describe(
|
|
@@ -7812,23 +8135,23 @@ var transcriptionControllerInitPreRecordedJobV2Body = import_zod5.z.object({
|
|
|
7812
8135
|
"Forces the translation to use informal language forms when available in the target language."
|
|
7813
8136
|
)
|
|
7814
8137
|
}).optional().describe("**[Beta]** Translation configuration, if `translation` is enabled"),
|
|
7815
|
-
summarization: import_zod5.z.boolean().optional().describe("
|
|
8138
|
+
summarization: import_zod5.z.boolean().optional().describe("Enable summarization for this audio"),
|
|
7816
8139
|
summarization_config: import_zod5.z.object({
|
|
7817
8140
|
type: import_zod5.z.enum(["general", "bullet_points", "concise"]).describe("The type of summarization to apply").default(transcriptionControllerInitPreRecordedJobV2BodySummarizationConfigTypeDefault).describe("The type of summarization to apply")
|
|
7818
|
-
}).optional().describe("
|
|
8141
|
+
}).optional().describe("Summarization configuration, if `summarization` is enabled"),
|
|
7819
8142
|
named_entity_recognition: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable named entity recognition for this audio"),
|
|
7820
8143
|
custom_spelling: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable custom spelling for this audio"),
|
|
7821
8144
|
custom_spelling_config: import_zod5.z.object({
|
|
7822
8145
|
spelling_dictionary: import_zod5.z.record(import_zod5.z.string(), import_zod5.z.array(import_zod5.z.string())).describe("The list of spelling applied on the audio transcription")
|
|
7823
8146
|
}).optional().describe("**[Alpha]** Custom spelling configuration, if `custom_spelling` is enabled"),
|
|
7824
8147
|
sentiment_analysis: import_zod5.z.boolean().optional().describe("Enable sentiment analysis for this audio"),
|
|
7825
|
-
audio_to_llm: import_zod5.z.boolean().optional().describe("
|
|
8148
|
+
audio_to_llm: import_zod5.z.boolean().optional().describe("Enable audio to LLM processing for this audio"),
|
|
7826
8149
|
audio_to_llm_config: import_zod5.z.object({
|
|
7827
8150
|
prompts: import_zod5.z.array(import_zod5.z.array(import_zod5.z.unknown())).min(1).describe("The list of prompts applied on the audio transcription"),
|
|
7828
8151
|
model: import_zod5.z.string().default(transcriptionControllerInitPreRecordedJobV2BodyAudioToLlmConfigModelDefault).describe(
|
|
7829
8152
|
"The model to use for the prompt execution. You can find the list of supported models [here](https://openrouter.ai/models)."
|
|
7830
8153
|
)
|
|
7831
|
-
}).optional().describe("
|
|
8154
|
+
}).optional().describe("Audio to LLM configuration, if `audio_to_llm` is enabled"),
|
|
7832
8155
|
pii_redaction: import_zod5.z.boolean().optional().describe("Enable PII redaction for this audio"),
|
|
7833
8156
|
pii_redaction_config: import_zod5.z.object({
|
|
7834
8157
|
entity_types: import_zod5.z.enum([
|
|
@@ -8073,7 +8396,7 @@ var transcriptionControllerListV2ResponseItemsItemRequestParamsTranslationConfig
|
|
|
8073
8396
|
var transcriptionControllerListV2ResponseItemsItemRequestParamsTranslationConfigLipsyncDefault = true;
|
|
8074
8397
|
var transcriptionControllerListV2ResponseItemsItemRequestParamsTranslationConfigContextAdaptationDefault = true;
|
|
8075
8398
|
var transcriptionControllerListV2ResponseItemsItemRequestParamsSummarizationConfigTypeDefault = "general";
|
|
8076
|
-
var transcriptionControllerListV2ResponseItemsItemRequestParamsAudioToLlmConfigModelDefault = "openai/gpt-
|
|
8399
|
+
var transcriptionControllerListV2ResponseItemsItemRequestParamsAudioToLlmConfigModelDefault = "openai/gpt-5.4-nano";
|
|
8077
8400
|
var transcriptionControllerListV2ResponseItemsItemRequestParamsLanguageConfigLanguagesDefault = [];
|
|
8078
8401
|
var transcriptionControllerListV2ResponseItemsItemResultTranscriptionUtterancesItemChannelMin = 0;
|
|
8079
8402
|
var transcriptionControllerListV2ResponseItemsItemResultTranscriptionUtterancesItemSpeakerMin = 0;
|
|
@@ -8462,12 +8785,12 @@ var transcriptionControllerListV2Response = import_zod5.z.object({
|
|
|
8462
8785
|
"Forces the translation to use informal language forms when available in the target language."
|
|
8463
8786
|
)
|
|
8464
8787
|
}).optional().describe("**[Beta]** Translation configuration, if `translation` is enabled"),
|
|
8465
|
-
summarization: import_zod5.z.boolean().optional().describe("
|
|
8788
|
+
summarization: import_zod5.z.boolean().optional().describe("Enable summarization for this audio"),
|
|
8466
8789
|
summarization_config: import_zod5.z.object({
|
|
8467
8790
|
type: import_zod5.z.enum(["general", "bullet_points", "concise"]).describe("The type of summarization to apply").default(
|
|
8468
8791
|
transcriptionControllerListV2ResponseItemsItemRequestParamsSummarizationConfigTypeDefault
|
|
8469
8792
|
).describe("The type of summarization to apply")
|
|
8470
|
-
}).optional().describe("
|
|
8793
|
+
}).optional().describe("Summarization configuration, if `summarization` is enabled"),
|
|
8471
8794
|
named_entity_recognition: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable named entity recognition for this audio"),
|
|
8472
8795
|
custom_spelling: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable custom spelling for this audio"),
|
|
8473
8796
|
custom_spelling_config: import_zod5.z.object({
|
|
@@ -8476,7 +8799,7 @@ var transcriptionControllerListV2Response = import_zod5.z.object({
|
|
|
8476
8799
|
"**[Alpha]** Custom spelling configuration, if `custom_spelling` is enabled"
|
|
8477
8800
|
),
|
|
8478
8801
|
sentiment_analysis: import_zod5.z.boolean().optional().describe("Enable sentiment analysis for this audio"),
|
|
8479
|
-
audio_to_llm: import_zod5.z.boolean().optional().describe("
|
|
8802
|
+
audio_to_llm: import_zod5.z.boolean().optional().describe("Enable audio to LLM processing for this audio"),
|
|
8480
8803
|
audio_to_llm_config: import_zod5.z.object({
|
|
8481
8804
|
prompts: import_zod5.z.array(import_zod5.z.array(import_zod5.z.unknown())).min(1).describe("The list of prompts applied on the audio transcription"),
|
|
8482
8805
|
model: import_zod5.z.string().default(
|
|
@@ -8484,7 +8807,7 @@ var transcriptionControllerListV2Response = import_zod5.z.object({
|
|
|
8484
8807
|
).describe(
|
|
8485
8808
|
"The model to use for the prompt execution. You can find the list of supported models [here](https://openrouter.ai/models)."
|
|
8486
8809
|
)
|
|
8487
|
-
}).optional().describe("
|
|
8810
|
+
}).optional().describe("Audio to LLM configuration, if `audio_to_llm` is enabled"),
|
|
8488
8811
|
pii_redaction: import_zod5.z.boolean().optional().describe("Enable PII redaction for this audio"),
|
|
8489
8812
|
pii_redaction_config: import_zod5.z.object({
|
|
8490
8813
|
entity_types: import_zod5.z.enum([
|
|
@@ -10790,7 +11113,7 @@ var transcriptionControllerGetTranscriptV2ResponseRequestParamsTranslationConfig
|
|
|
10790
11113
|
var transcriptionControllerGetTranscriptV2ResponseRequestParamsTranslationConfigLipsyncDefault = true;
|
|
10791
11114
|
var transcriptionControllerGetTranscriptV2ResponseRequestParamsTranslationConfigContextAdaptationDefault = true;
|
|
10792
11115
|
var transcriptionControllerGetTranscriptV2ResponseRequestParamsSummarizationConfigTypeDefault = "general";
|
|
10793
|
-
var transcriptionControllerGetTranscriptV2ResponseRequestParamsAudioToLlmConfigModelDefault = "openai/gpt-
|
|
11116
|
+
var transcriptionControllerGetTranscriptV2ResponseRequestParamsAudioToLlmConfigModelDefault = "openai/gpt-5.4-nano";
|
|
10794
11117
|
var transcriptionControllerGetTranscriptV2ResponseRequestParamsLanguageConfigLanguagesDefault = [];
|
|
10795
11118
|
var transcriptionControllerGetTranscriptV2ResponseResultTranscriptionUtterancesItemChannelMin = 0;
|
|
10796
11119
|
var transcriptionControllerGetTranscriptV2ResponseResultTranscriptionUtterancesItemSpeakerMin = 0;
|
|
@@ -11173,19 +11496,19 @@ var transcriptionControllerGetTranscriptV2Response = import_zod5.z.discriminated
|
|
|
11173
11496
|
"Forces the translation to use informal language forms when available in the target language."
|
|
11174
11497
|
)
|
|
11175
11498
|
}).optional().describe("**[Beta]** Translation configuration, if `translation` is enabled"),
|
|
11176
|
-
summarization: import_zod5.z.boolean().optional().describe("
|
|
11499
|
+
summarization: import_zod5.z.boolean().optional().describe("Enable summarization for this audio"),
|
|
11177
11500
|
summarization_config: import_zod5.z.object({
|
|
11178
11501
|
type: import_zod5.z.enum(["general", "bullet_points", "concise"]).describe("The type of summarization to apply").default(
|
|
11179
11502
|
transcriptionControllerGetTranscriptV2ResponseRequestParamsSummarizationConfigTypeDefault
|
|
11180
11503
|
).describe("The type of summarization to apply")
|
|
11181
|
-
}).optional().describe("
|
|
11504
|
+
}).optional().describe("Summarization configuration, if `summarization` is enabled"),
|
|
11182
11505
|
named_entity_recognition: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable named entity recognition for this audio"),
|
|
11183
11506
|
custom_spelling: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable custom spelling for this audio"),
|
|
11184
11507
|
custom_spelling_config: import_zod5.z.object({
|
|
11185
11508
|
spelling_dictionary: import_zod5.z.record(import_zod5.z.string(), import_zod5.z.array(import_zod5.z.string())).describe("The list of spelling applied on the audio transcription")
|
|
11186
11509
|
}).optional().describe("**[Alpha]** Custom spelling configuration, if `custom_spelling` is enabled"),
|
|
11187
11510
|
sentiment_analysis: import_zod5.z.boolean().optional().describe("Enable sentiment analysis for this audio"),
|
|
11188
|
-
audio_to_llm: import_zod5.z.boolean().optional().describe("
|
|
11511
|
+
audio_to_llm: import_zod5.z.boolean().optional().describe("Enable audio to LLM processing for this audio"),
|
|
11189
11512
|
audio_to_llm_config: import_zod5.z.object({
|
|
11190
11513
|
prompts: import_zod5.z.array(import_zod5.z.array(import_zod5.z.unknown())).min(1).describe("The list of prompts applied on the audio transcription"),
|
|
11191
11514
|
model: import_zod5.z.string().default(
|
|
@@ -11193,7 +11516,7 @@ var transcriptionControllerGetTranscriptV2Response = import_zod5.z.discriminated
|
|
|
11193
11516
|
).describe(
|
|
11194
11517
|
"The model to use for the prompt execution. You can find the list of supported models [here](https://openrouter.ai/models)."
|
|
11195
11518
|
)
|
|
11196
|
-
}).optional().describe("
|
|
11519
|
+
}).optional().describe("Audio to LLM configuration, if `audio_to_llm` is enabled"),
|
|
11197
11520
|
pii_redaction: import_zod5.z.boolean().optional().describe("Enable PII redaction for this audio"),
|
|
11198
11521
|
pii_redaction_config: import_zod5.z.object({
|
|
11199
11522
|
entity_types: import_zod5.z.enum([
|
|
@@ -13884,7 +14207,7 @@ var historyControllerGetListV1ResponseItemsItemRequestParamsTranslationConfigMat
|
|
|
13884
14207
|
var historyControllerGetListV1ResponseItemsItemRequestParamsTranslationConfigLipsyncDefault = true;
|
|
13885
14208
|
var historyControllerGetListV1ResponseItemsItemRequestParamsTranslationConfigContextAdaptationDefault = true;
|
|
13886
14209
|
var historyControllerGetListV1ResponseItemsItemRequestParamsSummarizationConfigTypeDefault = "general";
|
|
13887
|
-
var historyControllerGetListV1ResponseItemsItemRequestParamsAudioToLlmConfigModelDefault = "openai/gpt-
|
|
14210
|
+
var historyControllerGetListV1ResponseItemsItemRequestParamsAudioToLlmConfigModelDefault = "openai/gpt-5.4-nano";
|
|
13888
14211
|
var historyControllerGetListV1ResponseItemsItemRequestParamsLanguageConfigLanguagesDefault = [];
|
|
13889
14212
|
var historyControllerGetListV1ResponseItemsItemResultTranscriptionUtterancesItemChannelMin = 0;
|
|
13890
14213
|
var historyControllerGetListV1ResponseItemsItemResultTranscriptionUtterancesItemSpeakerMin = 0;
|
|
@@ -14273,12 +14596,12 @@ var historyControllerGetListV1Response = import_zod5.z.object({
|
|
|
14273
14596
|
"Forces the translation to use informal language forms when available in the target language."
|
|
14274
14597
|
)
|
|
14275
14598
|
}).optional().describe("**[Beta]** Translation configuration, if `translation` is enabled"),
|
|
14276
|
-
summarization: import_zod5.z.boolean().optional().describe("
|
|
14599
|
+
summarization: import_zod5.z.boolean().optional().describe("Enable summarization for this audio"),
|
|
14277
14600
|
summarization_config: import_zod5.z.object({
|
|
14278
14601
|
type: import_zod5.z.enum(["general", "bullet_points", "concise"]).describe("The type of summarization to apply").default(
|
|
14279
14602
|
historyControllerGetListV1ResponseItemsItemRequestParamsSummarizationConfigTypeDefault
|
|
14280
14603
|
).describe("The type of summarization to apply")
|
|
14281
|
-
}).optional().describe("
|
|
14604
|
+
}).optional().describe("Summarization configuration, if `summarization` is enabled"),
|
|
14282
14605
|
named_entity_recognition: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable named entity recognition for this audio"),
|
|
14283
14606
|
custom_spelling: import_zod5.z.boolean().optional().describe("**[Alpha]** Enable custom spelling for this audio"),
|
|
14284
14607
|
custom_spelling_config: import_zod5.z.object({
|
|
@@ -14287,7 +14610,7 @@ var historyControllerGetListV1Response = import_zod5.z.object({
|
|
|
14287
14610
|
"**[Alpha]** Custom spelling configuration, if `custom_spelling` is enabled"
|
|
14288
14611
|
),
|
|
14289
14612
|
sentiment_analysis: import_zod5.z.boolean().optional().describe("Enable sentiment analysis for this audio"),
|
|
14290
|
-
audio_to_llm: import_zod5.z.boolean().optional().describe("
|
|
14613
|
+
audio_to_llm: import_zod5.z.boolean().optional().describe("Enable audio to LLM processing for this audio"),
|
|
14291
14614
|
audio_to_llm_config: import_zod5.z.object({
|
|
14292
14615
|
prompts: import_zod5.z.array(import_zod5.z.array(import_zod5.z.unknown())).min(1).describe("The list of prompts applied on the audio transcription"),
|
|
14293
14616
|
model: import_zod5.z.string().default(
|
|
@@ -14295,7 +14618,7 @@ var historyControllerGetListV1Response = import_zod5.z.object({
|
|
|
14295
14618
|
).describe(
|
|
14296
14619
|
"The model to use for the prompt execution. You can find the list of supported models [here](https://openrouter.ai/models)."
|
|
14297
14620
|
)
|
|
14298
|
-
}).optional().describe("
|
|
14621
|
+
}).optional().describe("Audio to LLM configuration, if `audio_to_llm` is enabled"),
|
|
14299
14622
|
pii_redaction: import_zod5.z.boolean().optional().describe("Enable PII redaction for this audio"),
|
|
14300
14623
|
pii_redaction_config: import_zod5.z.object({
|
|
14301
14624
|
entity_types: import_zod5.z.enum([
|
|
@@ -19748,6 +20071,7 @@ var createRealtimeClientSecretBodySessionAudioOutputSpeedDefault = 1;
|
|
|
19748
20071
|
var createRealtimeClientSecretBodySessionAudioOutputSpeedMin = 0.25;
|
|
19749
20072
|
var createRealtimeClientSecretBodySessionAudioOutputSpeedMax = 1.5;
|
|
19750
20073
|
var createRealtimeClientSecretBodySessionToolChoiceDefault = "auto";
|
|
20074
|
+
var createRealtimeClientSecretBodySessionReasoningEffortDefault = "low";
|
|
19751
20075
|
var createRealtimeClientSecretBodySessionTruncationRetentionRatioMin = 0;
|
|
19752
20076
|
var createRealtimeClientSecretBodySessionTruncationRetentionRatioMax = 1;
|
|
19753
20077
|
var createRealtimeClientSecretBodySessionTruncationTokenLimitsPostInstructionsMin = 0;
|
|
@@ -19778,6 +20102,7 @@ var createRealtimeClientSecretBody = import_zod6.z.object({
|
|
|
19778
20102
|
import_zod6.z.enum([
|
|
19779
20103
|
"gpt-realtime",
|
|
19780
20104
|
"gpt-realtime-1.5",
|
|
20105
|
+
"gpt-realtime-2",
|
|
19781
20106
|
"gpt-realtime-2025-08-28",
|
|
19782
20107
|
"gpt-4o-realtime-preview",
|
|
19783
20108
|
"gpt-4o-realtime-preview-2024-10-01",
|
|
@@ -19818,16 +20143,20 @@ var createRealtimeClientSecretBody = import_zod6.z.object({
|
|
|
19818
20143
|
"gpt-4o-mini-transcribe",
|
|
19819
20144
|
"gpt-4o-mini-transcribe-2025-12-15",
|
|
19820
20145
|
"gpt-4o-transcribe",
|
|
19821
|
-
"gpt-4o-transcribe-diarize"
|
|
20146
|
+
"gpt-4o-transcribe-diarize",
|
|
20147
|
+
"gpt-realtime-whisper"
|
|
19822
20148
|
])
|
|
19823
20149
|
).optional().describe(
|
|
19824
|
-
"The model to use for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`,
|
|
20150
|
+
"The model to use for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`, `gpt-4o-transcribe-diarize`, and `gpt-realtime-whisper`. Use `gpt-4o-transcribe-diarize` when you need diarization with speaker labels.\n"
|
|
19825
20151
|
),
|
|
19826
20152
|
language: import_zod6.z.string().optional().describe(
|
|
19827
20153
|
"The language of the input audio. Supplying the input language in\n[ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`) format\nwill improve accuracy and latency.\n"
|
|
19828
20154
|
),
|
|
19829
20155
|
prompt: import_zod6.z.string().optional().describe(
|
|
19830
|
-
'An optional text to guide the model\'s style or continue a previous audio\nsegment.\nFor `whisper-1`, the [prompt is a list of keywords](/docs/guides/speech-to-text#prompting).\nFor `gpt-4o-transcribe` models (excluding `gpt-4o-transcribe-diarize`), the prompt is a free text string, for example "expect words related to technology".\n'
|
|
20156
|
+
'An optional text to guide the model\'s style or continue a previous audio\nsegment.\nFor `whisper-1`, the [prompt is a list of keywords](/docs/guides/speech-to-text#prompting).\nFor `gpt-4o-transcribe` models (excluding `gpt-4o-transcribe-diarize`), the prompt is a free text string, for example "expect words related to technology".\nPrompt is not supported with `gpt-realtime-whisper` in GA Realtime sessions.\n'
|
|
20157
|
+
),
|
|
20158
|
+
delay: import_zod6.z.enum(["minimal", "low", "medium", "high", "xhigh"]).optional().describe(
|
|
20159
|
+
"Controls how long the model waits before emitting transcription text.\nHigher values can improve transcription accuracy at the cost of latency.\nOnly supported with `gpt-realtime-whisper` in GA Realtime sessions.\n"
|
|
19831
20160
|
)
|
|
19832
20161
|
}).optional(),
|
|
19833
20162
|
noise_reduction: import_zod6.z.object({
|
|
@@ -19894,7 +20223,7 @@ var createRealtimeClientSecretBody = import_zod6.z.object({
|
|
|
19894
20223
|
"Server-side semantic turn detection which uses a model to determine when the user has finished speaking."
|
|
19895
20224
|
)
|
|
19896
20225
|
]).describe(
|
|
19897
|
-
'Configuration for turn detection, ether Server VAD or Semantic VAD. This can be set to `null` to turn off, in which case the client must manually trigger model response.\n\nServer VAD means that the model will detect the start and end of speech based on audio volume and respond at the end of user speech.\n\nSemantic VAD is more advanced and uses a turn detection model (in conjunction with VAD) to semantically estimate whether the user has finished speaking, then dynamically sets a timeout based on this probability. For example, if user audio trails off with "uhhm", the model will score a low probability of turn end and wait longer for the user to continue speaking. This can be useful for more natural conversations, but may have a higher latency.\n'
|
|
20226
|
+
'Configuration for turn detection, ether Server VAD or Semantic VAD. This can be set to `null` to turn off, in which case the client must manually trigger model response.\n\nServer VAD means that the model will detect the start and end of speech based on audio volume and respond at the end of user speech.\n\nSemantic VAD is more advanced and uses a turn detection model (in conjunction with VAD) to semantically estimate whether the user has finished speaking, then dynamically sets a timeout based on this probability. For example, if user audio trails off with "uhhm", the model will score a low probability of turn end and wait longer for the user to continue speaking. This can be useful for more natural conversations, but may have a higher latency.\n\nFor `gpt-realtime-whisper` transcription sessions, turn detection must be\nset to `null`; VAD is not supported.\n'
|
|
19898
20227
|
).or(import_zod6.z.null()).optional()
|
|
19899
20228
|
}).optional(),
|
|
19900
20229
|
output: import_zod6.z.object({
|
|
@@ -19967,7 +20296,7 @@ var createRealtimeClientSecretBody = import_zod6.z.object({
|
|
|
19967
20296
|
server_label: import_zod6.z.string().describe(
|
|
19968
20297
|
"A label for this MCP server, used to identify it in tool calls.\n"
|
|
19969
20298
|
),
|
|
19970
|
-
server_url: import_zod6.z.string().optional().describe(
|
|
20299
|
+
server_url: import_zod6.z.string().url().optional().describe(
|
|
19971
20300
|
"The URL for the MCP server. One of `server_url` or `connector_id` must be\nprovided.\n"
|
|
19972
20301
|
),
|
|
19973
20302
|
connector_id: import_zod6.z.enum([
|
|
@@ -20045,6 +20374,16 @@ var createRealtimeClientSecretBody = import_zod6.z.object({
|
|
|
20045
20374
|
).default(createRealtimeClientSecretBodySessionToolChoiceDefault).describe(
|
|
20046
20375
|
"How the model chooses tools. Provide one of the string modes or force a specific\nfunction/MCP tool.\n"
|
|
20047
20376
|
),
|
|
20377
|
+
parallel_tool_calls: import_zod6.z.boolean().optional().describe(
|
|
20378
|
+
"Whether the model may call multiple tools in parallel. Only supported by\nreasoning Realtime models such as `gpt-realtime-2`.\n"
|
|
20379
|
+
),
|
|
20380
|
+
reasoning: import_zod6.z.object({
|
|
20381
|
+
effort: import_zod6.z.enum(["minimal", "low", "medium", "high", "xhigh"]).default(createRealtimeClientSecretBodySessionReasoningEffortDefault).describe(
|
|
20382
|
+
"Constrains effort on reasoning for reasoning-capable Realtime models such as\n`gpt-realtime-2`.\n"
|
|
20383
|
+
)
|
|
20384
|
+
}).optional().describe(
|
|
20385
|
+
"Configuration for reasoning-capable Realtime models such as `gpt-realtime-2`.\n"
|
|
20386
|
+
),
|
|
20048
20387
|
max_output_tokens: import_zod6.z.number().or(import_zod6.z.enum(["inf"])).optional().describe(
|
|
20049
20388
|
"Maximum number of output tokens for a single assistant response,\ninclusive of tool calls. Provide an integer between 1 and 4096 to\nlimit output tokens, or `inf` for the maximum available tokens for a\ngiven model. Defaults to `inf`.\n"
|
|
20050
20389
|
),
|
|
@@ -20084,7 +20423,7 @@ var createRealtimeClientSecretBody = import_zod6.z.object({
|
|
|
20084
20423
|
).or(
|
|
20085
20424
|
import_zod6.z.object({
|
|
20086
20425
|
type: import_zod6.z.enum(["input_image"]).describe("The type of the input item. Always `input_image`."),
|
|
20087
|
-
image_url: import_zod6.z.string().describe(
|
|
20426
|
+
image_url: import_zod6.z.string().url().describe(
|
|
20088
20427
|
"The URL of the image to be sent to the model. A fully qualified URL or base64 encoded image in a data URL."
|
|
20089
20428
|
).or(import_zod6.z.null()).optional(),
|
|
20090
20429
|
file_id: import_zod6.z.string().describe("The ID of the file to be sent to the model.").or(import_zod6.z.null()).optional(),
|
|
@@ -20098,7 +20437,7 @@ var createRealtimeClientSecretBody = import_zod6.z.object({
|
|
|
20098
20437
|
file_id: import_zod6.z.string().describe("The ID of the file to be sent to the model.").or(import_zod6.z.null()).optional(),
|
|
20099
20438
|
filename: import_zod6.z.string().optional().describe("The name of the file to be sent to the model."),
|
|
20100
20439
|
file_data: import_zod6.z.string().optional().describe("The content of the file to be sent to the model.\n"),
|
|
20101
|
-
file_url: import_zod6.z.string().optional().describe("The URL of the file to be sent to the model."),
|
|
20440
|
+
file_url: import_zod6.z.string().url().optional().describe("The URL of the file to be sent to the model."),
|
|
20102
20441
|
detail: import_zod6.z.enum(["low", "high"]).optional()
|
|
20103
20442
|
}).describe("A file input to the model.")
|
|
20104
20443
|
)
|
|
@@ -20134,16 +20473,20 @@ var createRealtimeClientSecretBody = import_zod6.z.object({
|
|
|
20134
20473
|
"gpt-4o-mini-transcribe",
|
|
20135
20474
|
"gpt-4o-mini-transcribe-2025-12-15",
|
|
20136
20475
|
"gpt-4o-transcribe",
|
|
20137
|
-
"gpt-4o-transcribe-diarize"
|
|
20476
|
+
"gpt-4o-transcribe-diarize",
|
|
20477
|
+
"gpt-realtime-whisper"
|
|
20138
20478
|
])
|
|
20139
20479
|
).optional().describe(
|
|
20140
|
-
"The model to use for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`,
|
|
20480
|
+
"The model to use for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`, `gpt-4o-transcribe-diarize`, and `gpt-realtime-whisper`. Use `gpt-4o-transcribe-diarize` when you need diarization with speaker labels.\n"
|
|
20141
20481
|
),
|
|
20142
20482
|
language: import_zod6.z.string().optional().describe(
|
|
20143
20483
|
"The language of the input audio. Supplying the input language in\n[ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`) format\nwill improve accuracy and latency.\n"
|
|
20144
20484
|
),
|
|
20145
20485
|
prompt: import_zod6.z.string().optional().describe(
|
|
20146
|
-
'An optional text to guide the model\'s style or continue a previous audio\nsegment.\nFor `whisper-1`, the [prompt is a list of keywords](/docs/guides/speech-to-text#prompting).\nFor `gpt-4o-transcribe` models (excluding `gpt-4o-transcribe-diarize`), the prompt is a free text string, for example "expect words related to technology".\n'
|
|
20486
|
+
'An optional text to guide the model\'s style or continue a previous audio\nsegment.\nFor `whisper-1`, the [prompt is a list of keywords](/docs/guides/speech-to-text#prompting).\nFor `gpt-4o-transcribe` models (excluding `gpt-4o-transcribe-diarize`), the prompt is a free text string, for example "expect words related to technology".\nPrompt is not supported with `gpt-realtime-whisper` in GA Realtime sessions.\n'
|
|
20487
|
+
),
|
|
20488
|
+
delay: import_zod6.z.enum(["minimal", "low", "medium", "high", "xhigh"]).optional().describe(
|
|
20489
|
+
"Controls how long the model waits before emitting transcription text.\nHigher values can improve transcription accuracy at the cost of latency.\nOnly supported with `gpt-realtime-whisper` in GA Realtime sessions.\n"
|
|
20147
20490
|
)
|
|
20148
20491
|
}).optional(),
|
|
20149
20492
|
noise_reduction: import_zod6.z.object({
|
|
@@ -20210,7 +20553,7 @@ var createRealtimeClientSecretBody = import_zod6.z.object({
|
|
|
20210
20553
|
"Server-side semantic turn detection which uses a model to determine when the user has finished speaking."
|
|
20211
20554
|
)
|
|
20212
20555
|
]).describe(
|
|
20213
|
-
'Configuration for turn detection, ether Server VAD or Semantic VAD. This can be set to `null` to turn off, in which case the client must manually trigger model response.\n\nServer VAD means that the model will detect the start and end of speech based on audio volume and respond at the end of user speech.\n\nSemantic VAD is more advanced and uses a turn detection model (in conjunction with VAD) to semantically estimate whether the user has finished speaking, then dynamically sets a timeout based on this probability. For example, if user audio trails off with "uhhm", the model will score a low probability of turn end and wait longer for the user to continue speaking. This can be useful for more natural conversations, but may have a higher latency.\n'
|
|
20556
|
+
'Configuration for turn detection, ether Server VAD or Semantic VAD. This can be set to `null` to turn off, in which case the client must manually trigger model response.\n\nServer VAD means that the model will detect the start and end of speech based on audio volume and respond at the end of user speech.\n\nSemantic VAD is more advanced and uses a turn detection model (in conjunction with VAD) to semantically estimate whether the user has finished speaking, then dynamically sets a timeout based on this probability. For example, if user audio trails off with "uhhm", the model will score a low probability of turn end and wait longer for the user to continue speaking. This can be useful for more natural conversations, but may have a higher latency.\n\nFor `gpt-realtime-whisper` transcription sessions, turn detection must be\nset to `null`; VAD is not supported.\n'
|
|
20214
20557
|
).or(import_zod6.z.null()).optional()
|
|
20215
20558
|
}).optional()
|
|
20216
20559
|
}).optional().describe("Configuration for input and output audio.\n"),
|
|
@@ -20236,23 +20579,21 @@ var createRealtimeClientSecretResponseSessionAudioOutputSpeedDefault = 1;
|
|
|
20236
20579
|
var createRealtimeClientSecretResponseSessionAudioOutputSpeedMin = 0.25;
|
|
20237
20580
|
var createRealtimeClientSecretResponseSessionAudioOutputSpeedMax = 1.5;
|
|
20238
20581
|
var createRealtimeClientSecretResponseSessionToolChoiceDefault = "auto";
|
|
20582
|
+
var createRealtimeClientSecretResponseSessionReasoningEffortDefault = "low";
|
|
20239
20583
|
var createRealtimeClientSecretResponseSessionTruncationRetentionRatioMin = 0;
|
|
20240
20584
|
var createRealtimeClientSecretResponseSessionTruncationRetentionRatioMax = 1;
|
|
20241
20585
|
var createRealtimeClientSecretResponseSessionTruncationTokenLimitsPostInstructionsMin = 0;
|
|
20242
20586
|
var createRealtimeClientSecretResponse = import_zod6.z.object({
|
|
20243
20587
|
value: import_zod6.z.string().describe("The generated client secret value."),
|
|
20244
20588
|
expires_at: import_zod6.z.number().describe("Expiration timestamp for the client secret, in seconds since epoch."),
|
|
20245
|
-
session: import_zod6.z.
|
|
20589
|
+
session: import_zod6.z.union([
|
|
20246
20590
|
import_zod6.z.object({
|
|
20247
|
-
client_secret: import_zod6.z.object({
|
|
20248
|
-
value: import_zod6.z.string().describe(
|
|
20249
|
-
"Ephemeral key usable in client environments to authenticate connections to the Realtime API. Use this in client-side environments rather than a standard API token, which should only be used server-side.\n"
|
|
20250
|
-
),
|
|
20251
|
-
expires_at: import_zod6.z.number().describe(
|
|
20252
|
-
"Timestamp for when the token expires. Currently, all tokens expire\nafter one minute.\n"
|
|
20253
|
-
)
|
|
20254
|
-
}).describe("Ephemeral key returned by the API."),
|
|
20255
20591
|
type: import_zod6.z.enum(["realtime"]).describe("The type of session to create. Always `realtime` for the Realtime API.\n"),
|
|
20592
|
+
id: import_zod6.z.string().describe(
|
|
20593
|
+
"Unique identifier for the session that looks like `sess_1234567890abcdef`.\n"
|
|
20594
|
+
),
|
|
20595
|
+
object: import_zod6.z.enum(["realtime.session"]).describe("The object type. Always `realtime.session`."),
|
|
20596
|
+
expires_at: import_zod6.z.number().optional().describe("Expiration timestamp for the session, in seconds since epoch."),
|
|
20256
20597
|
output_modalities: import_zod6.z.array(import_zod6.z.enum(["text", "audio"])).default(createRealtimeClientSecretResponseSessionOutputModalitiesDefault).describe(
|
|
20257
20598
|
'The set of modalities the model can respond with. It defaults to `["audio"]`, indicating\nthat the model will respond with audio plus a transcript. `["text"]` can be used to make\nthe model respond with text only. It is not possible to request both `text` and `audio` at the same time.\n'
|
|
20258
20599
|
),
|
|
@@ -20260,6 +20601,7 @@ var createRealtimeClientSecretResponse = import_zod6.z.object({
|
|
|
20260
20601
|
import_zod6.z.enum([
|
|
20261
20602
|
"gpt-realtime",
|
|
20262
20603
|
"gpt-realtime-1.5",
|
|
20604
|
+
"gpt-realtime-2",
|
|
20263
20605
|
"gpt-realtime-2025-08-28",
|
|
20264
20606
|
"gpt-4o-realtime-preview",
|
|
20265
20607
|
"gpt-4o-realtime-preview-2024-10-01",
|
|
@@ -20282,15 +20624,15 @@ var createRealtimeClientSecretResponse = import_zod6.z.object({
|
|
|
20282
20624
|
audio: import_zod6.z.object({
|
|
20283
20625
|
input: import_zod6.z.object({
|
|
20284
20626
|
format: import_zod6.z.object({
|
|
20285
|
-
type: import_zod6.z.enum(["audio/pcm"]).describe("The audio format. Always `audio/pcm`."),
|
|
20286
|
-
rate: import_zod6.z.literal(24e3).describe("The sample rate of the audio. Always `24000`.")
|
|
20627
|
+
type: import_zod6.z.enum(["audio/pcm"]).optional().describe("The audio format. Always `audio/pcm`."),
|
|
20628
|
+
rate: import_zod6.z.literal(24e3).optional().describe("The sample rate of the audio. Always `24000`.")
|
|
20287
20629
|
}).describe("The PCM audio format. Only a 24kHz sample rate is supported.").or(
|
|
20288
20630
|
import_zod6.z.object({
|
|
20289
|
-
type: import_zod6.z.enum(["audio/pcmu"]).describe("The audio format. Always `audio/pcmu`.")
|
|
20631
|
+
type: import_zod6.z.enum(["audio/pcmu"]).optional().describe("The audio format. Always `audio/pcmu`.")
|
|
20290
20632
|
}).describe("The G.711 \u03BC-law format.")
|
|
20291
20633
|
).or(
|
|
20292
20634
|
import_zod6.z.object({
|
|
20293
|
-
type: import_zod6.z.enum(["audio/pcma"]).describe("The audio format. Always `audio/pcma`.")
|
|
20635
|
+
type: import_zod6.z.enum(["audio/pcma"]).optional().describe("The audio format. Always `audio/pcma`.")
|
|
20294
20636
|
}).describe("The G.711 A-law format.")
|
|
20295
20637
|
).optional(),
|
|
20296
20638
|
transcription: import_zod6.z.object({
|
|
@@ -20300,20 +20642,19 @@ var createRealtimeClientSecretResponse = import_zod6.z.object({
|
|
|
20300
20642
|
"gpt-4o-mini-transcribe",
|
|
20301
20643
|
"gpt-4o-mini-transcribe-2025-12-15",
|
|
20302
20644
|
"gpt-4o-transcribe",
|
|
20303
|
-
"gpt-4o-transcribe-diarize"
|
|
20645
|
+
"gpt-4o-transcribe-diarize",
|
|
20646
|
+
"gpt-realtime-whisper"
|
|
20304
20647
|
])
|
|
20305
20648
|
).optional().describe(
|
|
20306
|
-
"The model
|
|
20307
|
-
),
|
|
20308
|
-
language: import_zod6.z.string().optional().describe(
|
|
20309
|
-
"The language of the input audio. Supplying the input language in\n[ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`) format\nwill improve accuracy and latency.\n"
|
|
20649
|
+
"The model used for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`, `gpt-4o-transcribe-diarize`, and `gpt-realtime-whisper`.\n"
|
|
20310
20650
|
),
|
|
20651
|
+
language: import_zod6.z.string().optional().describe("The language of the input audio.\n"),
|
|
20311
20652
|
prompt: import_zod6.z.string().optional().describe(
|
|
20312
|
-
|
|
20653
|
+
"The prompt configured for input audio transcription, when present.\n"
|
|
20313
20654
|
)
|
|
20314
20655
|
}).optional(),
|
|
20315
20656
|
noise_reduction: import_zod6.z.object({
|
|
20316
|
-
type: import_zod6.z.enum(["near_field", "far_field"]).describe(
|
|
20657
|
+
type: import_zod6.z.enum(["near_field", "far_field"]).optional().describe(
|
|
20317
20658
|
"Type of noise reduction. `near_field` is for close-talking microphones such as headphones, `far_field` is for far-field microphones such as laptop or conference room microphones.\n"
|
|
20318
20659
|
)
|
|
20319
20660
|
}).optional().describe(
|
|
@@ -20376,20 +20717,20 @@ var createRealtimeClientSecretResponse = import_zod6.z.object({
|
|
|
20376
20717
|
"Server-side semantic turn detection which uses a model to determine when the user has finished speaking."
|
|
20377
20718
|
)
|
|
20378
20719
|
]).describe(
|
|
20379
|
-
'Configuration for turn detection, ether Server VAD or Semantic VAD. This can be set to `null` to turn off, in which case the client must manually trigger model response.\n\nServer VAD means that the model will detect the start and end of speech based on audio volume and respond at the end of user speech.\n\nSemantic VAD is more advanced and uses a turn detection model (in conjunction with VAD) to semantically estimate whether the user has finished speaking, then dynamically sets a timeout based on this probability. For example, if user audio trails off with "uhhm", the model will score a low probability of turn end and wait longer for the user to continue speaking. This can be useful for more natural conversations, but may have a higher latency.\n'
|
|
20720
|
+
'Configuration for turn detection, ether Server VAD or Semantic VAD. This can be set to `null` to turn off, in which case the client must manually trigger model response.\n\nServer VAD means that the model will detect the start and end of speech based on audio volume and respond at the end of user speech.\n\nSemantic VAD is more advanced and uses a turn detection model (in conjunction with VAD) to semantically estimate whether the user has finished speaking, then dynamically sets a timeout based on this probability. For example, if user audio trails off with "uhhm", the model will score a low probability of turn end and wait longer for the user to continue speaking. This can be useful for more natural conversations, but may have a higher latency.\n\nFor `gpt-realtime-whisper` transcription sessions, turn detection must be\nset to `null`; VAD is not supported.\n'
|
|
20380
20721
|
).or(import_zod6.z.null()).optional()
|
|
20381
20722
|
}).optional(),
|
|
20382
20723
|
output: import_zod6.z.object({
|
|
20383
20724
|
format: import_zod6.z.object({
|
|
20384
|
-
type: import_zod6.z.enum(["audio/pcm"]).describe("The audio format. Always `audio/pcm`."),
|
|
20385
|
-
rate: import_zod6.z.literal(24e3).describe("The sample rate of the audio. Always `24000`.")
|
|
20725
|
+
type: import_zod6.z.enum(["audio/pcm"]).optional().describe("The audio format. Always `audio/pcm`."),
|
|
20726
|
+
rate: import_zod6.z.literal(24e3).optional().describe("The sample rate of the audio. Always `24000`.")
|
|
20386
20727
|
}).describe("The PCM audio format. Only a 24kHz sample rate is supported.").or(
|
|
20387
20728
|
import_zod6.z.object({
|
|
20388
|
-
type: import_zod6.z.enum(["audio/pcmu"]).describe("The audio format. Always `audio/pcmu`.")
|
|
20729
|
+
type: import_zod6.z.enum(["audio/pcmu"]).optional().describe("The audio format. Always `audio/pcmu`.")
|
|
20389
20730
|
}).describe("The G.711 \u03BC-law format.")
|
|
20390
20731
|
).or(
|
|
20391
20732
|
import_zod6.z.object({
|
|
20392
|
-
type: import_zod6.z.enum(["audio/pcma"]).describe("The audio format. Always `audio/pcma`.")
|
|
20733
|
+
type: import_zod6.z.enum(["audio/pcma"]).optional().describe("The audio format. Always `audio/pcma`.")
|
|
20393
20734
|
}).describe("The G.711 A-law format.")
|
|
20394
20735
|
).optional(),
|
|
20395
20736
|
voice: import_zod6.z.string().or(
|
|
@@ -20433,7 +20774,7 @@ var createRealtimeClientSecretResponse = import_zod6.z.object({
|
|
|
20433
20774
|
).or(import_zod6.z.null()).optional(),
|
|
20434
20775
|
tools: import_zod6.z.array(
|
|
20435
20776
|
import_zod6.z.object({
|
|
20436
|
-
type: import_zod6.z.enum(["function"]).describe("The type of the tool, i.e. `function`."),
|
|
20777
|
+
type: import_zod6.z.enum(["function"]).optional().describe("The type of the tool, i.e. `function`."),
|
|
20437
20778
|
name: import_zod6.z.string().optional().describe("The name of the function."),
|
|
20438
20779
|
description: import_zod6.z.string().optional().describe(
|
|
20439
20780
|
"The description of the function, including guidance on when and how\nto call it, and guidance about what to tell the user when calling\n(if anything).\n"
|
|
@@ -20445,7 +20786,7 @@ var createRealtimeClientSecretResponse = import_zod6.z.object({
|
|
|
20445
20786
|
server_label: import_zod6.z.string().describe(
|
|
20446
20787
|
"A label for this MCP server, used to identify it in tool calls.\n"
|
|
20447
20788
|
),
|
|
20448
|
-
server_url: import_zod6.z.string().optional().describe(
|
|
20789
|
+
server_url: import_zod6.z.string().url().optional().describe(
|
|
20449
20790
|
"The URL for the MCP server. One of `server_url` or `connector_id` must be\nprovided.\n"
|
|
20450
20791
|
),
|
|
20451
20792
|
connector_id: import_zod6.z.enum([
|
|
@@ -20457,7 +20798,7 @@ var createRealtimeClientSecretResponse = import_zod6.z.object({
|
|
|
20457
20798
|
"connector_outlookcalendar",
|
|
20458
20799
|
"connector_outlookemail",
|
|
20459
20800
|
"connector_sharepoint"
|
|
20460
|
-
]).describe(
|
|
20801
|
+
]).optional().describe(
|
|
20461
20802
|
"Identifier for service connectors, like those available in ChatGPT. One of\n`server_url` or `connector_id` must be provided. Learn more about service\nconnectors [here](/docs/guides/tools-remote-mcp#connectors).\n\nCurrently supported `connector_id` values are:\n\n- Dropbox: `connector_dropbox`\n- Gmail: `connector_gmail`\n- Google Calendar: `connector_googlecalendar`\n- Google Drive: `connector_googledrive`\n- Microsoft Teams: `connector_microsoftteams`\n- Outlook Calendar: `connector_outlookcalendar`\n- Outlook Email: `connector_outlookemail`\n- SharePoint: `connector_sharepoint`\n"
|
|
20462
20803
|
),
|
|
20463
20804
|
authorization: import_zod6.z.string().optional().describe(
|
|
@@ -20523,6 +20864,13 @@ var createRealtimeClientSecretResponse = import_zod6.z.object({
|
|
|
20523
20864
|
).default(createRealtimeClientSecretResponseSessionToolChoiceDefault).describe(
|
|
20524
20865
|
"How the model chooses tools. Provide one of the string modes or force a specific\nfunction/MCP tool.\n"
|
|
20525
20866
|
),
|
|
20867
|
+
reasoning: import_zod6.z.object({
|
|
20868
|
+
effort: import_zod6.z.enum(["minimal", "low", "medium", "high", "xhigh"]).default(createRealtimeClientSecretResponseSessionReasoningEffortDefault).describe(
|
|
20869
|
+
"Constrains effort on reasoning for reasoning-capable Realtime models such as\n`gpt-realtime-2`.\n"
|
|
20870
|
+
)
|
|
20871
|
+
}).optional().describe(
|
|
20872
|
+
"Configuration for reasoning-capable Realtime models such as `gpt-realtime-2`.\n"
|
|
20873
|
+
),
|
|
20526
20874
|
max_output_tokens: import_zod6.z.number().or(import_zod6.z.enum(["inf"])).optional().describe(
|
|
20527
20875
|
"Maximum number of output tokens for a single assistant response,\ninclusive of tool calls. Provide an integer between 1 and 4096 to\nlimit output tokens, or `inf` for the maximum available tokens for a\ngiven model. Defaults to `inf`.\n"
|
|
20528
20876
|
),
|
|
@@ -20562,7 +20910,7 @@ var createRealtimeClientSecretResponse = import_zod6.z.object({
|
|
|
20562
20910
|
).or(
|
|
20563
20911
|
import_zod6.z.object({
|
|
20564
20912
|
type: import_zod6.z.enum(["input_image"]).describe("The type of the input item. Always `input_image`."),
|
|
20565
|
-
image_url: import_zod6.z.string().describe(
|
|
20913
|
+
image_url: import_zod6.z.string().url().describe(
|
|
20566
20914
|
"The URL of the image to be sent to the model. A fully qualified URL or base64 encoded image in a data URL."
|
|
20567
20915
|
).or(import_zod6.z.null()).optional(),
|
|
20568
20916
|
file_id: import_zod6.z.string().describe("The ID of the file to be sent to the model.").or(import_zod6.z.null()).optional(),
|
|
@@ -20576,8 +20924,8 @@ var createRealtimeClientSecretResponse = import_zod6.z.object({
|
|
|
20576
20924
|
file_id: import_zod6.z.string().describe("The ID of the file to be sent to the model.").or(import_zod6.z.null()).optional(),
|
|
20577
20925
|
filename: import_zod6.z.string().optional().describe("The name of the file to be sent to the model."),
|
|
20578
20926
|
file_data: import_zod6.z.string().optional().describe("The content of the file to be sent to the model.\n"),
|
|
20579
|
-
file_url: import_zod6.z.string().optional().describe("The URL of the file to be sent to the model."),
|
|
20580
|
-
detail: import_zod6.z.enum(["low", "high"])
|
|
20927
|
+
file_url: import_zod6.z.string().url().optional().describe("The URL of the file to be sent to the model."),
|
|
20928
|
+
detail: import_zod6.z.enum(["low", "high"]).optional()
|
|
20581
20929
|
}).describe("A file input to the model.")
|
|
20582
20930
|
)
|
|
20583
20931
|
).describe(
|
|
@@ -20586,9 +20934,7 @@ var createRealtimeClientSecretResponse = import_zod6.z.object({
|
|
|
20586
20934
|
}).describe(
|
|
20587
20935
|
"Reference to a prompt template and its variables.\n[Learn more](/docs/guides/text?api-mode=responses#reusable-prompts).\n"
|
|
20588
20936
|
).or(import_zod6.z.null()).optional()
|
|
20589
|
-
}).describe(
|
|
20590
|
-
"A new Realtime session configuration, with an ephemeral key. Default TTL\nfor keys is one minute.\n"
|
|
20591
|
-
),
|
|
20937
|
+
}).describe("A Realtime session configuration object.\n"),
|
|
20592
20938
|
import_zod6.z.object({
|
|
20593
20939
|
type: import_zod6.z.enum(["transcription"]).describe(
|
|
20594
20940
|
"The type of session. Always `transcription` for transcription sessions.\n"
|
|
@@ -20604,15 +20950,15 @@ var createRealtimeClientSecretResponse = import_zod6.z.object({
|
|
|
20604
20950
|
audio: import_zod6.z.object({
|
|
20605
20951
|
input: import_zod6.z.object({
|
|
20606
20952
|
format: import_zod6.z.object({
|
|
20607
|
-
type: import_zod6.z.enum(["audio/pcm"]).describe("The audio format. Always `audio/pcm`."),
|
|
20608
|
-
rate: import_zod6.z.literal(24e3).describe("The sample rate of the audio. Always `24000`.")
|
|
20953
|
+
type: import_zod6.z.enum(["audio/pcm"]).optional().describe("The audio format. Always `audio/pcm`."),
|
|
20954
|
+
rate: import_zod6.z.literal(24e3).optional().describe("The sample rate of the audio. Always `24000`.")
|
|
20609
20955
|
}).describe("The PCM audio format. Only a 24kHz sample rate is supported.").or(
|
|
20610
20956
|
import_zod6.z.object({
|
|
20611
|
-
type: import_zod6.z.enum(["audio/pcmu"]).describe("The audio format. Always `audio/pcmu`.")
|
|
20957
|
+
type: import_zod6.z.enum(["audio/pcmu"]).optional().describe("The audio format. Always `audio/pcmu`.")
|
|
20612
20958
|
}).describe("The G.711 \u03BC-law format.")
|
|
20613
20959
|
).or(
|
|
20614
20960
|
import_zod6.z.object({
|
|
20615
|
-
type: import_zod6.z.enum(["audio/pcma"]).describe("The audio format. Always `audio/pcma`.")
|
|
20961
|
+
type: import_zod6.z.enum(["audio/pcma"]).optional().describe("The audio format. Always `audio/pcma`.")
|
|
20616
20962
|
}).describe("The G.711 A-law format.")
|
|
20617
20963
|
).optional(),
|
|
20618
20964
|
transcription: import_zod6.z.object({
|
|
@@ -20622,20 +20968,19 @@ var createRealtimeClientSecretResponse = import_zod6.z.object({
|
|
|
20622
20968
|
"gpt-4o-mini-transcribe",
|
|
20623
20969
|
"gpt-4o-mini-transcribe-2025-12-15",
|
|
20624
20970
|
"gpt-4o-transcribe",
|
|
20625
|
-
"gpt-4o-transcribe-diarize"
|
|
20971
|
+
"gpt-4o-transcribe-diarize",
|
|
20972
|
+
"gpt-realtime-whisper"
|
|
20626
20973
|
])
|
|
20627
20974
|
).optional().describe(
|
|
20628
|
-
"The model
|
|
20629
|
-
),
|
|
20630
|
-
language: import_zod6.z.string().optional().describe(
|
|
20631
|
-
"The language of the input audio. Supplying the input language in\n[ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`) format\nwill improve accuracy and latency.\n"
|
|
20975
|
+
"The model used for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`, `gpt-4o-transcribe-diarize`, and `gpt-realtime-whisper`.\n"
|
|
20632
20976
|
),
|
|
20977
|
+
language: import_zod6.z.string().optional().describe("The language of the input audio.\n"),
|
|
20633
20978
|
prompt: import_zod6.z.string().optional().describe(
|
|
20634
|
-
|
|
20979
|
+
"The prompt configured for input audio transcription, when present.\n"
|
|
20635
20980
|
)
|
|
20636
20981
|
}).optional(),
|
|
20637
20982
|
noise_reduction: import_zod6.z.object({
|
|
20638
|
-
type: import_zod6.z.enum(["near_field", "far_field"]).describe(
|
|
20983
|
+
type: import_zod6.z.enum(["near_field", "far_field"]).optional().describe(
|
|
20639
20984
|
"Type of noise reduction. `near_field` is for close-talking microphones such as headphones, `far_field` is for far-field microphones such as laptop or conference room microphones.\n"
|
|
20640
20985
|
)
|
|
20641
20986
|
}).optional().describe("Configuration for input audio noise reduction.\n"),
|
|
@@ -20652,8 +20997,10 @@ var createRealtimeClientSecretResponse = import_zod6.z.object({
|
|
|
20652
20997
|
silence_duration_ms: import_zod6.z.number().optional().describe(
|
|
20653
20998
|
"Duration of silence to detect speech stop (in milliseconds). Defaults\nto 500ms. With shorter values the model will respond more quickly,\nbut may jump in on short pauses from the user.\n"
|
|
20654
20999
|
)
|
|
20655
|
-
}).
|
|
20656
|
-
"Configuration for turn detection. Can be set to `null` to turn off. Server\nVAD means that the model will detect the start and end of speech based on\naudio volume and respond at the end of user speech.\n"
|
|
21000
|
+
}).describe(
|
|
21001
|
+
"Configuration for turn detection. Can be set to `null` to turn off. Server\nVAD means that the model will detect the start and end of speech based on\naudio volume and respond at the end of user speech. For `gpt-realtime-whisper`, this must be `null`; VAD is not supported.\n"
|
|
21002
|
+
).or(import_zod6.z.null()).optional().describe(
|
|
21003
|
+
"Configuration for turn detection. For `gpt-realtime-whisper`, this must be `null`; VAD is not supported.\n"
|
|
20657
21004
|
)
|
|
20658
21005
|
}).optional()
|
|
20659
21006
|
}).optional().describe("Configuration for input audio for the session.\n")
|
|
@@ -20789,7 +21136,7 @@ var createRealtimeSessionBody = import_zod6.z.object({
|
|
|
20789
21136
|
).or(
|
|
20790
21137
|
import_zod6.z.object({
|
|
20791
21138
|
type: import_zod6.z.enum(["input_image"]).describe("The type of the input item. Always `input_image`."),
|
|
20792
|
-
image_url: import_zod6.z.string().describe(
|
|
21139
|
+
image_url: import_zod6.z.string().url().describe(
|
|
20793
21140
|
"The URL of the image to be sent to the model. A fully qualified URL or base64 encoded image in a data URL."
|
|
20794
21141
|
).or(import_zod6.z.null()).optional(),
|
|
20795
21142
|
file_id: import_zod6.z.string().describe("The ID of the file to be sent to the model.").or(import_zod6.z.null()).optional(),
|
|
@@ -20803,7 +21150,7 @@ var createRealtimeSessionBody = import_zod6.z.object({
|
|
|
20803
21150
|
file_id: import_zod6.z.string().describe("The ID of the file to be sent to the model.").or(import_zod6.z.null()).optional(),
|
|
20804
21151
|
filename: import_zod6.z.string().optional().describe("The name of the file to be sent to the model."),
|
|
20805
21152
|
file_data: import_zod6.z.string().optional().describe("The content of the file to be sent to the model.\n"),
|
|
20806
|
-
file_url: import_zod6.z.string().optional().describe("The URL of the file to be sent to the model."),
|
|
21153
|
+
file_url: import_zod6.z.string().url().optional().describe("The URL of the file to be sent to the model."),
|
|
20807
21154
|
detail: import_zod6.z.enum(["low", "high"]).optional()
|
|
20808
21155
|
}).describe("A file input to the model.")
|
|
20809
21156
|
)
|
|
@@ -20851,17 +21198,14 @@ var createRealtimeSessionResponse = import_zod6.z.object({
|
|
|
20851
21198
|
"gpt-4o-mini-transcribe",
|
|
20852
21199
|
"gpt-4o-mini-transcribe-2025-12-15",
|
|
20853
21200
|
"gpt-4o-transcribe",
|
|
20854
|
-
"gpt-4o-transcribe-diarize"
|
|
21201
|
+
"gpt-4o-transcribe-diarize",
|
|
21202
|
+
"gpt-realtime-whisper"
|
|
20855
21203
|
])
|
|
20856
21204
|
).optional().describe(
|
|
20857
|
-
"The model
|
|
21205
|
+
"The model used for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`, `gpt-4o-transcribe-diarize`, and `gpt-realtime-whisper`.\n"
|
|
20858
21206
|
),
|
|
20859
|
-
language: import_zod6.z.string().optional().describe(
|
|
20860
|
-
|
|
20861
|
-
),
|
|
20862
|
-
prompt: import_zod6.z.string().optional().describe(
|
|
20863
|
-
'An optional text to guide the model\'s style or continue a previous audio\nsegment.\nFor `whisper-1`, the [prompt is a list of keywords](/docs/guides/speech-to-text#prompting).\nFor `gpt-4o-transcribe` models (excluding `gpt-4o-transcribe-diarize`), the prompt is a free text string, for example "expect words related to technology".\n'
|
|
20864
|
-
)
|
|
21207
|
+
language: import_zod6.z.string().optional().describe("The language of the input audio.\n"),
|
|
21208
|
+
prompt: import_zod6.z.string().optional().describe("The prompt configured for input audio transcription, when present.\n")
|
|
20865
21209
|
}).optional(),
|
|
20866
21210
|
noise_reduction: import_zod6.z.object({
|
|
20867
21211
|
type: import_zod6.z.enum(["near_field", "far_field"]).optional().describe(
|
|
@@ -20986,16 +21330,20 @@ var createRealtimeTranscriptionSessionBody = import_zod6.z.object({
|
|
|
20986
21330
|
"gpt-4o-mini-transcribe",
|
|
20987
21331
|
"gpt-4o-mini-transcribe-2025-12-15",
|
|
20988
21332
|
"gpt-4o-transcribe",
|
|
20989
|
-
"gpt-4o-transcribe-diarize"
|
|
21333
|
+
"gpt-4o-transcribe-diarize",
|
|
21334
|
+
"gpt-realtime-whisper"
|
|
20990
21335
|
])
|
|
20991
21336
|
).optional().describe(
|
|
20992
|
-
"The model to use for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`,
|
|
21337
|
+
"The model to use for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`, `gpt-4o-transcribe-diarize`, and `gpt-realtime-whisper`. Use `gpt-4o-transcribe-diarize` when you need diarization with speaker labels.\n"
|
|
20993
21338
|
),
|
|
20994
21339
|
language: import_zod6.z.string().optional().describe(
|
|
20995
21340
|
"The language of the input audio. Supplying the input language in\n[ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`) format\nwill improve accuracy and latency.\n"
|
|
20996
21341
|
),
|
|
20997
21342
|
prompt: import_zod6.z.string().optional().describe(
|
|
20998
|
-
'An optional text to guide the model\'s style or continue a previous audio\nsegment.\nFor `whisper-1`, the [prompt is a list of keywords](/docs/guides/speech-to-text#prompting).\nFor `gpt-4o-transcribe` models (excluding `gpt-4o-transcribe-diarize`), the prompt is a free text string, for example "expect words related to technology".\n'
|
|
21343
|
+
'An optional text to guide the model\'s style or continue a previous audio\nsegment.\nFor `whisper-1`, the [prompt is a list of keywords](/docs/guides/speech-to-text#prompting).\nFor `gpt-4o-transcribe` models (excluding `gpt-4o-transcribe-diarize`), the prompt is a free text string, for example "expect words related to technology".\nPrompt is not supported with `gpt-realtime-whisper` in GA Realtime sessions.\n'
|
|
21344
|
+
),
|
|
21345
|
+
delay: import_zod6.z.enum(["minimal", "low", "medium", "high", "xhigh"]).optional().describe(
|
|
21346
|
+
"Controls how long the model waits before emitting transcription text.\nHigher values can improve transcription accuracy at the cost of latency.\nOnly supported with `gpt-realtime-whisper` in GA Realtime sessions.\n"
|
|
20999
21347
|
)
|
|
21000
21348
|
}).optional(),
|
|
21001
21349
|
include: import_zod6.z.array(import_zod6.z.enum(["item.input_audio_transcription.logprobs"])).optional().describe(
|
|
@@ -21024,17 +21372,14 @@ var createRealtimeTranscriptionSessionResponse = import_zod6.z.object({
|
|
|
21024
21372
|
"gpt-4o-mini-transcribe",
|
|
21025
21373
|
"gpt-4o-mini-transcribe-2025-12-15",
|
|
21026
21374
|
"gpt-4o-transcribe",
|
|
21027
|
-
"gpt-4o-transcribe-diarize"
|
|
21375
|
+
"gpt-4o-transcribe-diarize",
|
|
21376
|
+
"gpt-realtime-whisper"
|
|
21028
21377
|
])
|
|
21029
21378
|
).optional().describe(
|
|
21030
|
-
"The model
|
|
21031
|
-
),
|
|
21032
|
-
language: import_zod6.z.string().optional().describe(
|
|
21033
|
-
"The language of the input audio. Supplying the input language in\n[ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`) format\nwill improve accuracy and latency.\n"
|
|
21379
|
+
"The model used for transcription. Current options are `whisper-1`, `gpt-4o-mini-transcribe`, `gpt-4o-mini-transcribe-2025-12-15`, `gpt-4o-transcribe`, `gpt-4o-transcribe-diarize`, and `gpt-realtime-whisper`.\n"
|
|
21034
21380
|
),
|
|
21035
|
-
|
|
21036
|
-
|
|
21037
|
-
)
|
|
21381
|
+
language: import_zod6.z.string().optional().describe("The language of the input audio.\n"),
|
|
21382
|
+
prompt: import_zod6.z.string().optional().describe("The prompt configured for input audio transcription, when present.\n")
|
|
21038
21383
|
}).optional(),
|
|
21039
21384
|
turn_detection: import_zod6.z.object({
|
|
21040
21385
|
type: import_zod6.z.string().optional().describe("Type of turn detection, only `server_vad` is currently supported.\n"),
|
|
@@ -24647,11 +24992,73 @@ var getModelsResponse = import_zod10.z.object({
|
|
|
24647
24992
|
})
|
|
24648
24993
|
).describe("List of available models and their attributes.")
|
|
24649
24994
|
});
|
|
24995
|
+
var getTtsModelsResponse = import_zod10.z.object({
|
|
24996
|
+
models: import_zod10.z.array(
|
|
24997
|
+
import_zod10.z.object({
|
|
24998
|
+
id: import_zod10.z.string().describe("Unique identifier of the model."),
|
|
24999
|
+
aliased_model_id: import_zod10.z.string().or(import_zod10.z.null()).describe("If this is an alias, the id of the aliased model."),
|
|
25000
|
+
name: import_zod10.z.string().describe("Name of the model."),
|
|
25001
|
+
voices: import_zod10.z.array(
|
|
25002
|
+
import_zod10.z.object({
|
|
25003
|
+
id: import_zod10.z.string().describe("Unique identifier of the voice."),
|
|
25004
|
+
description: import_zod10.z.string().describe("Description of the TTS voice."),
|
|
25005
|
+
gender: import_zod10.z.enum(["male", "female", "neutral"])
|
|
25006
|
+
})
|
|
25007
|
+
).describe("List of available voices for this model."),
|
|
25008
|
+
languages: import_zod10.z.array(
|
|
25009
|
+
import_zod10.z.object({
|
|
25010
|
+
code: import_zod10.z.string().describe("2-letter language code."),
|
|
25011
|
+
name: import_zod10.z.string().describe("Language name.")
|
|
25012
|
+
})
|
|
25013
|
+
).describe("List of languages supported by the model.")
|
|
25014
|
+
})
|
|
25015
|
+
).describe("List of available TTS models and their attributes.")
|
|
25016
|
+
});
|
|
25017
|
+
var getUsageLogsQueryLimitDefault = 1e3;
|
|
25018
|
+
var getUsageLogsQueryLimitMax = 1e3;
|
|
25019
|
+
var getUsageLogsQuerySortDefault = "end_time_asc";
|
|
25020
|
+
var getUsageLogsQueryParams = import_zod10.z.object({
|
|
25021
|
+
start_time: import_zod10.z.string().describe("Start of the time window (inclusive). Filters by request end time."),
|
|
25022
|
+
end_time: import_zod10.z.string().describe("End of the time window (exclusive). Filters by request end time."),
|
|
25023
|
+
limit: import_zod10.z.number().min(1).max(getUsageLogsQueryLimitMax).default(getUsageLogsQueryLimitDefault).describe("Maximum number of usage log entries to return."),
|
|
25024
|
+
sort: import_zod10.z.enum(["end_time_asc", "end_time_desc"]).default(getUsageLogsQuerySortDefault).describe(
|
|
25025
|
+
"Sort order by end_time.Use `end_time_desc` to get the most recent entries first. When paginating, pass the same `sort` value alongside the cursor."
|
|
25026
|
+
),
|
|
25027
|
+
cursor: import_zod10.z.string().or(import_zod10.z.null()).optional().describe("Pagination cursor for the next page of results.")
|
|
25028
|
+
});
|
|
25029
|
+
var getUsageLogsResponse = import_zod10.z.object({
|
|
25030
|
+
usage_logs: import_zod10.z.array(
|
|
25031
|
+
import_zod10.z.object({
|
|
25032
|
+
uuid: import_zod10.z.string().uuid().describe("Unique identifier of the request."),
|
|
25033
|
+
request_scope: import_zod10.z.string().describe("Scope of the request (api / playground)."),
|
|
25034
|
+
client_reference_id: import_zod10.z.string().describe("Client reference ID supplied on the original request. Empty string if none."),
|
|
25035
|
+
model: import_zod10.z.string().describe("Model identifier."),
|
|
25036
|
+
start_time: import_zod10.z.string().datetime({}).describe("When the request started."),
|
|
25037
|
+
end_time: import_zod10.z.string().datetime({}).describe("When the request ended."),
|
|
25038
|
+
input_text_tokens: import_zod10.z.number(),
|
|
25039
|
+
input_audio_tokens: import_zod10.z.number(),
|
|
25040
|
+
input_audio_duration_ms: import_zod10.z.number(),
|
|
25041
|
+
output_text_tokens: import_zod10.z.number(),
|
|
25042
|
+
output_audio_tokens: import_zod10.z.number(),
|
|
25043
|
+
output_audio_duration_ms: import_zod10.z.number(),
|
|
25044
|
+
cost_usd: import_zod10.z.string(),
|
|
25045
|
+
input_cost_usd: import_zod10.z.string(),
|
|
25046
|
+
input_text_cost_usd: import_zod10.z.string(),
|
|
25047
|
+
input_audio_cost_usd: import_zod10.z.string(),
|
|
25048
|
+
output_cost_usd: import_zod10.z.string(),
|
|
25049
|
+
output_text_cost_usd: import_zod10.z.string(),
|
|
25050
|
+
output_audio_cost_usd: import_zod10.z.string()
|
|
25051
|
+
})
|
|
25052
|
+
).describe("Per-request usage log entries ordered by end_time, uuid (per `sort`)."),
|
|
25053
|
+
next_page_cursor: import_zod10.z.string().or(import_zod10.z.null()).optional().describe(
|
|
25054
|
+
"A pagination token that references the next page of results. When more data is available, this field contains a value to pass in the cursor parameter of a subsequent request. When null, no additional results are available."
|
|
25055
|
+
)
|
|
25056
|
+
});
|
|
24650
25057
|
var createTemporaryApiKeyBodyExpiresInSecondsMax = 3600;
|
|
24651
25058
|
var createTemporaryApiKeyBodyClientReferenceIdMaxOne = 256;
|
|
24652
25059
|
var createTemporaryApiKeyBodyMaxSessionDurationSecondsMaxOne = 18e3;
|
|
24653
25060
|
var createTemporaryApiKeyBody = import_zod10.z.object({
|
|
24654
|
-
usage_type: import_zod10.z.enum(["transcribe_websocket"]),
|
|
25061
|
+
usage_type: import_zod10.z.enum(["transcribe_websocket", "tts_rt"]),
|
|
24655
25062
|
expires_in_seconds: import_zod10.z.number().min(1).max(createTemporaryApiKeyBodyExpiresInSecondsMax).describe("Duration in seconds until the temporary API key expires."),
|
|
24656
25063
|
client_reference_id: import_zod10.z.string().max(createTemporaryApiKeyBodyClientReferenceIdMaxOne).or(import_zod10.z.null()).optional().describe("Optional tracking identifier string. Does not need to be unique."),
|
|
24657
25064
|
single_use: import_zod10.z.boolean().or(import_zod10.z.null()).optional().describe("If true, the temporary API key can be used only once."),
|
|
@@ -24659,6 +25066,28 @@ var createTemporaryApiKeyBody = import_zod10.z.object({
|
|
|
24659
25066
|
"Maximum WebSocket connection duration in seconds. If exceeded, the connection will be dropped. If not set, no limit is applied."
|
|
24660
25067
|
)
|
|
24661
25068
|
});
|
|
25069
|
+
var getConcurrencyLimitsResponse = import_zod10.z.object({
|
|
25070
|
+
project: import_zod10.z.object({
|
|
25071
|
+
current: import_zod10.z.object({
|
|
25072
|
+
transcribe_concurrent: import_zod10.z.number(),
|
|
25073
|
+
tts_concurrent: import_zod10.z.number()
|
|
25074
|
+
}).describe("Live counts read from Redis"),
|
|
25075
|
+
limits: import_zod10.z.object({
|
|
25076
|
+
transcribe_concurrent: import_zod10.z.number().or(import_zod10.z.null()),
|
|
25077
|
+
tts_concurrent: import_zod10.z.number().or(import_zod10.z.null())
|
|
25078
|
+
}).describe("Configured limits")
|
|
25079
|
+
}),
|
|
25080
|
+
organization: import_zod10.z.object({
|
|
25081
|
+
current: import_zod10.z.object({
|
|
25082
|
+
transcribe_concurrent: import_zod10.z.number(),
|
|
25083
|
+
tts_concurrent: import_zod10.z.number()
|
|
25084
|
+
}).describe("Live counts read from Redis"),
|
|
25085
|
+
limits: import_zod10.z.object({
|
|
25086
|
+
transcribe_concurrent: import_zod10.z.number().or(import_zod10.z.null()),
|
|
25087
|
+
tts_concurrent: import_zod10.z.number().or(import_zod10.z.null())
|
|
25088
|
+
}).describe("Configured limits")
|
|
25089
|
+
})
|
|
25090
|
+
});
|
|
24662
25091
|
|
|
24663
25092
|
// src/generated/soniox/streaming-types.zod.ts
|
|
24664
25093
|
var import_zod11 = require("zod");
|
|
@@ -24724,10 +25153,10 @@ var sonioxStructuredContextSchema = import_zod11.z.object({
|
|
|
24724
25153
|
var sonioxContextSchema = import_zod11.z.union([sonioxStructuredContextSchema, import_zod11.z.string()]);
|
|
24725
25154
|
var sonioxRealtimeModelSchema = import_zod11.z.enum([
|
|
24726
25155
|
"stt-rt-v4",
|
|
24727
|
-
"stt-rt-v3",
|
|
24728
25156
|
"stt-rt-preview",
|
|
24729
25157
|
"stt-rt-v3-preview",
|
|
24730
|
-
"stt-rt-preview-v2"
|
|
25158
|
+
"stt-rt-preview-v2",
|
|
25159
|
+
"stt-rt-v3"
|
|
24731
25160
|
]);
|
|
24732
25161
|
var streamingTranscriberParams3 = import_zod11.z.object({
|
|
24733
25162
|
model: sonioxRealtimeModelSchema,
|
|
@@ -24735,12 +25164,16 @@ var streamingTranscriberParams3 = import_zod11.z.object({
|
|
|
24735
25164
|
sampleRate: import_zod11.z.number().optional(),
|
|
24736
25165
|
numChannels: import_zod11.z.number().optional(),
|
|
24737
25166
|
languageHints: import_zod11.z.array(import_zod11.z.string()).optional(),
|
|
25167
|
+
languageHintsStrict: import_zod11.z.boolean().optional(),
|
|
24738
25168
|
context: sonioxContextSchema.optional(),
|
|
24739
25169
|
enableSpeakerDiarization: import_zod11.z.boolean().optional(),
|
|
24740
25170
|
enableLanguageIdentification: import_zod11.z.boolean().optional(),
|
|
24741
25171
|
enableEndpointDetection: import_zod11.z.boolean().optional(),
|
|
25172
|
+
maxEndpointDelayMs: import_zod11.z.number().optional(),
|
|
24742
25173
|
translation: sonioxTranslationConfigSchema.optional(),
|
|
24743
|
-
clientReferenceId: import_zod11.z.string().optional()
|
|
25174
|
+
clientReferenceId: import_zod11.z.string().optional(),
|
|
25175
|
+
keepaliveIntervalMs: import_zod11.z.number().optional(),
|
|
25176
|
+
connectTimeoutMs: import_zod11.z.number().optional()
|
|
24744
25177
|
});
|
|
24745
25178
|
var sonioxTranslationStatusSchema = import_zod11.z.enum(["original", "translation", "none"]);
|
|
24746
25179
|
var sonioxTokenSchema = import_zod11.z.object({
|