voice-router-dev 0.8.2 → 0.8.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1396,23 +1396,173 @@ import { z as zod3 } from "zod";
1396
1396
  var uploadFileResponse = zod3.object({
1397
1397
  upload_url: zod3.string().describe("A URL that points to your audio file, accessible only by AssemblyAI's servers\n")
1398
1398
  });
1399
- var createTranscriptBodyLanguageCodeDefault = "en_us";
1400
- var createTranscriptBodyLanguageConfidenceThresholdMin = 0;
1401
- var createTranscriptBodyLanguageConfidenceThresholdMax = 1;
1402
- var createTranscriptBodySpeechModelDefault = "best";
1403
- var createTranscriptBodyPunctuateDefault = true;
1404
- var createTranscriptBodyFormatTextDefault = true;
1405
- var createTranscriptBodyRedactPiiSubDefault = "hash";
1406
1399
  var createTranscriptBodyContentSafetyConfidenceDefault = 50;
1407
- var createTranscriptBodyContentSafetyConfidenceMin = 25;
1408
- var createTranscriptBodyContentSafetyConfidenceMax = 100;
1409
- var createTranscriptBodySpeechThresholdMin = 0;
1410
- var createTranscriptBodySpeechThresholdMax = 1;
1400
+ var createTranscriptBodyFormatTextDefault = true;
1401
+ var createTranscriptBodyLanguageDetectionOptionsFallbackLanguageDefault = "auto";
1402
+ var createTranscriptBodyLanguageDetectionOptionsCodeSwitchingConfidenceThresholdDefault = 0.3;
1403
+ var createTranscriptBodyPunctuateDefault = true;
1404
+ var createTranscriptBodySpeakerOptionsMinSpeakersExpectedDefault = 1;
1405
+ var createTranscriptBodySpeechUnderstandingRequestTranslationFormalDefault = true;
1411
1406
  var createTranscriptBody = zod3.object({
1412
- audio_url: zod3.string().describe("The URL of the audio or video file to transcribe.")
1413
- }).and(
1414
- zod3.object({
1415
- language_code: zod3.enum([
1407
+ audio_end_at: zod3.number().optional().describe(
1408
+ "The point in time, in milliseconds, to stop transcribing in your media file. See [Set the start and end of the transcript](https://www.assemblyai.com/docs/pre-recorded-audio/set-the-start-and-end-of-the-transcript) for more details."
1409
+ ),
1410
+ audio_start_from: zod3.number().optional().describe(
1411
+ "The point in time, in milliseconds, to begin transcribing in your media file. See [Set the start and end of the transcript](https://www.assemblyai.com/docs/pre-recorded-audio/set-the-start-and-end-of-the-transcript) for more details."
1412
+ ),
1413
+ auto_chapters: zod3.boolean().optional().describe(
1414
+ "Enable [Auto Chapters](https://www.assemblyai.com/docs/speech-understanding/auto-chapters), can be true or false. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible chapter summaries. See the [updated Auto Chapters page](https://www.assemblyai.com/docs/speech-understanding/auto-chapters) for details.\n\nNote: This parameter is only supported for the Universal-2 model.\n"
1415
+ ),
1416
+ auto_highlights: zod3.boolean().optional().describe(
1417
+ "Enable [Key Phrases](https://www.assemblyai.com/docs/speech-understanding/key-phrases), either true or false"
1418
+ ),
1419
+ content_safety: zod3.boolean().optional().describe(
1420
+ "Enable [Content Moderation](https://www.assemblyai.com/docs/content-moderation), can be true or false"
1421
+ ),
1422
+ content_safety_confidence: zod3.number().default(createTranscriptBodyContentSafetyConfidenceDefault).describe(
1423
+ "The confidence threshold for the [Content Moderation](https://www.assemblyai.com/docs/content-moderation) model. Values must be between 25 and 100."
1424
+ ),
1425
+ custom_spelling: zod3.array(
1426
+ zod3.object({
1427
+ from: zod3.array(zod3.string()).describe("Words or phrases to replace"),
1428
+ to: zod3.string().describe("Word to replace with")
1429
+ }).describe(
1430
+ "Object containing words or phrases to replace, and the word or phrase to replace with"
1431
+ )
1432
+ ).optional().describe(
1433
+ "Customize how words are spelled and formatted using to and from values. See [Custom Spelling](https://www.assemblyai.com/docs/pre-recorded-audio/custom-spelling) for more details."
1434
+ ),
1435
+ disfluencies: zod3.boolean().optional().describe(
1436
+ 'Transcribe [Filler Words](https://www.assemblyai.com/docs/pre-recorded-audio/filler-words), like "umm", in your media file; can be true or false'
1437
+ ),
1438
+ domain: zod3.string().nullish().describe(
1439
+ 'Enable domain-specific transcription models to improve accuracy for specialized terminology. Set to `"medical-v1"` to enable [Medical Mode](https://www.assemblyai.com/docs/pre-recorded-audio/medical-mode) for improved accuracy of medical terms such as medications, procedures, conditions, and dosages.\n\nSupported languages: English (`en`), Spanish (`es`), German (`de`), French (`fr`). If used with an unsupported language, the parameter is ignored and a warning is returned.\n'
1440
+ ),
1441
+ entity_detection: zod3.boolean().optional().describe(
1442
+ "Enable [Entity Detection](https://www.assemblyai.com/docs/speech-understanding/entity-detection), can be true or false"
1443
+ ),
1444
+ filter_profanity: zod3.boolean().optional().describe(
1445
+ "Filter profanity from the transcribed text, can be true or false. See [Profanity Filtering](https://www.assemblyai.com/docs/profanity-filtering) for more details."
1446
+ ),
1447
+ format_text: zod3.boolean().default(createTranscriptBodyFormatTextDefault).describe(
1448
+ "Enable [Text Formatting](https://www.assemblyai.com/docs/pre-recorded-audio), can be true or false"
1449
+ ),
1450
+ iab_categories: zod3.boolean().optional().describe(
1451
+ "Enable [Topic Detection](https://www.assemblyai.com/docs/speech-understanding/topic-detection), can be true or false"
1452
+ ),
1453
+ keyterms_prompt: zod3.array(zod3.string()).optional().describe(
1454
+ "Improve accuracy with up to 200 (for Universal-2) or 1000 (for Universal-3 Pro) domain-specific words or phrases (maximum 6 words per phrase). See [Keyterms Prompting](https://www.assemblyai.com/docs/pre-recorded-audio/keyterms-prompting) for more details.\n"
1455
+ ),
1456
+ language_code: zod3.enum([
1457
+ "en",
1458
+ "en_au",
1459
+ "en_uk",
1460
+ "en_us",
1461
+ "es",
1462
+ "fr",
1463
+ "de",
1464
+ "it",
1465
+ "pt",
1466
+ "nl",
1467
+ "af",
1468
+ "sq",
1469
+ "am",
1470
+ "ar",
1471
+ "hy",
1472
+ "as",
1473
+ "az",
1474
+ "ba",
1475
+ "eu",
1476
+ "be",
1477
+ "bn",
1478
+ "bs",
1479
+ "br",
1480
+ "bg",
1481
+ "my",
1482
+ "ca",
1483
+ "zh",
1484
+ "hr",
1485
+ "cs",
1486
+ "da",
1487
+ "et",
1488
+ "fo",
1489
+ "fi",
1490
+ "gl",
1491
+ "ka",
1492
+ "el",
1493
+ "gu",
1494
+ "ht",
1495
+ "ha",
1496
+ "haw",
1497
+ "he",
1498
+ "hi",
1499
+ "hu",
1500
+ "is",
1501
+ "id",
1502
+ "ja",
1503
+ "jw",
1504
+ "kn",
1505
+ "kk",
1506
+ "km",
1507
+ "ko",
1508
+ "lo",
1509
+ "la",
1510
+ "lv",
1511
+ "ln",
1512
+ "lt",
1513
+ "lb",
1514
+ "mk",
1515
+ "mg",
1516
+ "ms",
1517
+ "ml",
1518
+ "mt",
1519
+ "mi",
1520
+ "mr",
1521
+ "mn",
1522
+ "ne",
1523
+ "no",
1524
+ "nn",
1525
+ "oc",
1526
+ "pa",
1527
+ "ps",
1528
+ "fa",
1529
+ "pl",
1530
+ "ro",
1531
+ "ru",
1532
+ "sa",
1533
+ "sr",
1534
+ "sn",
1535
+ "sd",
1536
+ "si",
1537
+ "sk",
1538
+ "sl",
1539
+ "so",
1540
+ "su",
1541
+ "sw",
1542
+ "sv",
1543
+ "tl",
1544
+ "tg",
1545
+ "ta",
1546
+ "tt",
1547
+ "te",
1548
+ "th",
1549
+ "bo",
1550
+ "tr",
1551
+ "tk",
1552
+ "uk",
1553
+ "ur",
1554
+ "uz",
1555
+ "vi",
1556
+ "cy",
1557
+ "yi",
1558
+ "yo"
1559
+ ]).describe(
1560
+ "The language of your audio file. Possible values are found in [Supported Languages](https://www.assemblyai.com/docs/pre-recorded-audio/supported-languages).\nThe default value is 'en_us'.\n"
1561
+ ).or(zod3.null()).optional().describe(
1562
+ "The language of your audio file. Possible values are found in [Supported Languages](https://www.assemblyai.com/docs/pre-recorded-audio/supported-languages).\nThe default value is 'en_us'.\n"
1563
+ ),
1564
+ language_codes: zod3.array(
1565
+ zod3.enum([
1416
1566
  "en",
1417
1567
  "en_au",
1418
1568
  "en_uk",
@@ -1516,429 +1666,62 @@ var createTranscriptBody = zod3.object({
1516
1666
  "yi",
1517
1667
  "yo"
1518
1668
  ]).describe(
1519
- "The language of your audio file. Possible values are found in [Supported Languages](https://www.assemblyai.com/docs/concepts/supported-languages).\nThe default value is 'en_us'.\n"
1520
- ).or(zod3.string()).or(zod3.null()).default(createTranscriptBodyLanguageCodeDefault).describe(
1521
- "The language of your audio file. Possible values are found in [Supported Languages](https://www.assemblyai.com/docs/concepts/supported-languages).\nThe default value is 'en_us'.\n"
1522
- ),
1523
- language_detection: zod3.boolean().optional().describe(
1524
- "Enable [Automatic language detection](https://www.assemblyai.com/docs/models/speech-recognition#automatic-language-detection), either true or false."
1525
- ),
1526
- language_confidence_threshold: zod3.number().min(createTranscriptBodyLanguageConfidenceThresholdMin).max(createTranscriptBodyLanguageConfidenceThresholdMax).optional().describe(
1527
- "The confidence threshold for the automatically detected language.\nAn error will be returned if the language confidence is below this threshold.\nDefaults to 0.\n"
1528
- ),
1529
- speech_model: zod3.enum(["best", "slam-1", "universal"]).describe("The speech model to use for the transcription.").or(zod3.null()).default(createTranscriptBodySpeechModelDefault).describe(
1530
- 'The speech model to use for the transcription. When `null`, the "best" model is used.'
1531
- ),
1532
- punctuate: zod3.boolean().default(createTranscriptBodyPunctuateDefault).describe("Enable Automatic Punctuation, can be true or false"),
1533
- format_text: zod3.boolean().default(createTranscriptBodyFormatTextDefault).describe("Enable Text Formatting, can be true or false"),
1534
- disfluencies: zod3.boolean().optional().describe(
1535
- 'Transcribe Filler Words, like "umm", in your media file; can be true or false'
1536
- ),
1537
- multichannel: zod3.boolean().optional().describe(
1538
- "Enable [Multichannel](https://www.assemblyai.com/docs/models/speech-recognition#multichannel-transcription) transcription, can be true or false."
1539
- ),
1540
- webhook_url: zod3.string().optional().describe(
1541
- "The URL to which we send webhook requests.\nWe sends two different types of webhook requests.\nOne request when a transcript is completed or failed, and one request when the redacted audio is ready if redact_pii_audio is enabled.\n"
1542
- ),
1543
- webhook_auth_header_name: zod3.string().nullish().describe(
1544
- "The header name to be sent with the transcript completed or failed webhook requests"
1545
- ),
1546
- webhook_auth_header_value: zod3.string().nullish().describe(
1547
- "The header value to send back with the transcript completed or failed webhook requests for added security"
1548
- ),
1549
- auto_highlights: zod3.boolean().optional().describe("Enable Key Phrases, either true or false"),
1550
- audio_start_from: zod3.number().optional().describe("The point in time, in milliseconds, to begin transcribing in your media file"),
1551
- audio_end_at: zod3.number().optional().describe("The point in time, in milliseconds, to stop transcribing in your media file"),
1552
- word_boost: zod3.array(zod3.string()).optional().describe("The list of custom vocabulary to boost transcription probability for"),
1553
- boost_param: zod3.enum(["low", "default", "high"]).optional().describe("How much to boost specified words"),
1554
- filter_profanity: zod3.boolean().optional().describe("Filter profanity from the transcribed text, can be true or false"),
1555
- redact_pii: zod3.boolean().optional().describe(
1556
- "Redact PII from the transcribed text using the Redact PII model, can be true or false"
1557
- ),
1558
- redact_pii_audio: zod3.boolean().optional().describe(
1559
- 'Generate a copy of the original media file with spoken PII "beeped" out, can be true or false. See [PII redaction](https://www.assemblyai.com/docs/models/pii-redaction) for more details.'
1560
- ),
1561
- redact_pii_audio_quality: zod3.enum(["mp3", "wav"]).optional().describe(
1562
- "Controls the filetype of the audio created by redact_pii_audio. Currently supports mp3 (default) and wav. See [PII redaction](https://www.assemblyai.com/docs/models/pii-redaction) for more details."
1563
- ),
1564
- redact_pii_policies: zod3.array(
1565
- zod3.enum([
1566
- "account_number",
1567
- "banking_information",
1568
- "blood_type",
1569
- "credit_card_cvv",
1570
- "credit_card_expiration",
1571
- "credit_card_number",
1572
- "date",
1573
- "date_interval",
1574
- "date_of_birth",
1575
- "drivers_license",
1576
- "drug",
1577
- "duration",
1578
- "email_address",
1579
- "event",
1580
- "filename",
1581
- "gender_sexuality",
1582
- "healthcare_number",
1583
- "injury",
1584
- "ip_address",
1585
- "language",
1586
- "location",
1587
- "marital_status",
1588
- "medical_condition",
1589
- "medical_process",
1590
- "money_amount",
1591
- "nationality",
1592
- "number_sequence",
1593
- "occupation",
1594
- "organization",
1595
- "passport_number",
1596
- "password",
1597
- "person_age",
1598
- "person_name",
1599
- "phone_number",
1600
- "physical_attribute",
1601
- "political_affiliation",
1602
- "religion",
1603
- "statistics",
1604
- "time",
1605
- "url",
1606
- "us_social_security_number",
1607
- "username",
1608
- "vehicle_id",
1609
- "zodiac_sign"
1610
- ]).describe("The type of PII to redact")
1611
- ).optional().describe(
1612
- "The list of PII Redaction policies to enable. See [PII redaction](https://www.assemblyai.com/docs/models/pii-redaction) for more details."
1613
- ),
1614
- redact_pii_sub: zod3.enum(["entity_name", "hash"]).describe(
1615
- 'The replacement logic for detected PII, can be "entity_name" or "hash". See [PII redaction](https://www.assemblyai.com/docs/models/pii-redaction) for more details.'
1616
- ).or(zod3.null()).default(createTranscriptBodyRedactPiiSubDefault).describe(
1617
- 'The replacement logic for detected PII, can be "entity_type" or "hash". See [PII redaction](https://www.assemblyai.com/docs/models/pii-redaction) for more details.'
1618
- ),
1619
- speaker_labels: zod3.boolean().optional().describe(
1620
- "Enable [Speaker diarization](https://www.assemblyai.com/docs/models/speaker-diarization), can be true or false"
1621
- ),
1622
- speakers_expected: zod3.number().nullish().describe(
1623
- "Tells the speaker label model how many speakers it should attempt to identify. See [Speaker diarization](https://www.assemblyai.com/docs/models/speaker-diarization) for more details."
1624
- ),
1625
- content_safety: zod3.boolean().optional().describe(
1626
- "Enable [Content Moderation](https://www.assemblyai.com/docs/models/content-moderation), can be true or false"
1627
- ),
1628
- content_safety_confidence: zod3.number().min(createTranscriptBodyContentSafetyConfidenceMin).max(createTranscriptBodyContentSafetyConfidenceMax).default(createTranscriptBodyContentSafetyConfidenceDefault).describe(
1629
- "The confidence threshold for the Content Moderation model. Values must be between 25 and 100."
1630
- ),
1631
- iab_categories: zod3.boolean().optional().describe(
1632
- "Enable [Topic Detection](https://www.assemblyai.com/docs/models/topic-detection), can be true or false"
1633
- ),
1634
- custom_spelling: zod3.array(
1635
- zod3.object({
1636
- from: zod3.array(zod3.string().describe("Word or phrase to replace")).describe("Words or phrases to replace"),
1637
- to: zod3.string().describe("Word to replace with")
1638
- }).describe(
1639
- "Object containing words or phrases to replace, and the word or phrase to replace with"
1640
- )
1641
- ).optional().describe("Customize how words are spelled and formatted using to and from values"),
1642
- keyterms_prompt: zod3.array(zod3.string()).optional().describe(
1643
- "<Warning>`keyterms_prompt` is only supported when the `speech_model` is specified as `slam-1`</Warning>\nImprove accuracy with up to 1000 domain-specific words or phrases (maximum 6 words per phrase).\n"
1644
- ),
1645
- prompt: zod3.string().optional().describe("This parameter does not currently have any functionality attached to it."),
1646
- sentiment_analysis: zod3.boolean().optional().describe(
1647
- "Enable [Sentiment Analysis](https://www.assemblyai.com/docs/models/sentiment-analysis), can be true or false"
1648
- ),
1649
- auto_chapters: zod3.boolean().optional().describe(
1650
- "Enable [Auto Chapters](https://www.assemblyai.com/docs/models/auto-chapters), can be true or false"
1669
+ "The language of your audio file. Possible values are found in [Supported Languages](https://www.assemblyai.com/docs/pre-recorded-audio/supported-languages).\nThe default value is 'en_us'.\n"
1670
+ )
1671
+ ).nullish().describe(
1672
+ "The language codes of your audio file. Used for [Code switching](/docs/speech-to-text/pre-recorded-audio/code-switching)\nOne of the values specified must be `en`.\n"
1673
+ ),
1674
+ language_confidence_threshold: zod3.number().optional().describe(
1675
+ "The confidence threshold for the automatically detected language.\nAn error will be returned if the language confidence is below this threshold.\nDefaults to 0. See [Automatic Language Detection](https://www.assemblyai.com/docs/pre-recorded-audio/language-detection) for more details.\n"
1676
+ ),
1677
+ language_detection: zod3.boolean().optional().describe(
1678
+ "Enable [Automatic language detection](https://www.assemblyai.com/docs/pre-recorded-audio/language-detection), either true or false."
1679
+ ),
1680
+ language_detection_options: zod3.object({
1681
+ expected_languages: zod3.array(zod3.string()).optional().describe(
1682
+ 'List of languages expected in the audio file. Defaults to `["all"]` when unspecified. See [Automatic Language Detection](https://www.assemblyai.com/docs/pre-recorded-audio/language-detection) for more details.'
1651
1683
  ),
1652
- entity_detection: zod3.boolean().optional().describe(
1653
- "Enable [Entity Detection](https://www.assemblyai.com/docs/models/entity-detection), can be true or false"
1684
+ fallback_language: zod3.string().default(createTranscriptBodyLanguageDetectionOptionsFallbackLanguageDefault).describe(
1685
+ 'If the detected language of the audio file is not in the list of expected languages, the `fallback_language` is used. Specify `["auto"]` to let our model choose the fallback language from `expected_languages` with the highest confidence score. See [Automatic Language Detection](https://www.assemblyai.com/docs/pre-recorded-audio/language-detection) for more details.\n'
1654
1686
  ),
1655
- speech_threshold: zod3.number().min(createTranscriptBodySpeechThresholdMin).max(createTranscriptBodySpeechThresholdMax).nullish().describe(
1656
- "Reject audio files that contain less than this fraction of speech.\nValid values are in the range [0, 1] inclusive.\n"
1687
+ code_switching: zod3.boolean().optional().describe(
1688
+ "Whether [code switching](/docs/speech-to-text/pre-recorded-audio/code-switching) should be detected.\n"
1657
1689
  ),
1658
- summarization: zod3.boolean().optional().describe(
1659
- "Enable [Summarization](https://www.assemblyai.com/docs/models/summarization), can be true or false"
1690
+ code_switching_confidence_threshold: zod3.number().default(
1691
+ createTranscriptBodyLanguageDetectionOptionsCodeSwitchingConfidenceThresholdDefault
1692
+ ).describe(
1693
+ "The confidence threshold for [code switching](/docs/speech-to-text/pre-recorded-audio/code-switching) detection. If the code switching confidence is below this threshold, the transcript will be processed in the language with the highest `language_detection_confidence` score.\n"
1694
+ )
1695
+ }).optional().describe(
1696
+ "Specify options for [Automatic Language Detection](https://www.assemblyai.com/docs/pre-recorded-audio/language-detection)."
1697
+ ),
1698
+ multichannel: zod3.boolean().optional().describe(
1699
+ "Enable [Multichannel](https://www.assemblyai.com/docs/pre-recorded-audio/multichannel) transcription, can be true or false."
1700
+ ),
1701
+ prompt: zod3.string().optional().describe(
1702
+ "Provide natural language prompting of up to 1,500 words of contextual information to the model. See the [Prompting Guide](https://www.assemblyai.com/docs/pre-recorded-audio/prompting) for best practices.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n"
1703
+ ),
1704
+ punctuate: zod3.boolean().default(createTranscriptBodyPunctuateDefault).describe(
1705
+ "Enable [Automatic Punctuation](https://www.assemblyai.com/docs/pre-recorded-audio), can be true or false"
1706
+ ),
1707
+ redact_pii: zod3.boolean().optional().describe(
1708
+ "Redact PII from the transcribed text using the Redact PII model, can be true or false. See [PII Redaction](https://www.assemblyai.com/docs/pii-redaction) for more details."
1709
+ ),
1710
+ redact_pii_audio: zod3.boolean().optional().describe(
1711
+ 'Generate a copy of the original media file with spoken PII "beeped" out, can be true or false. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction#request-for-redacted-audio) for more details.'
1712
+ ),
1713
+ redact_pii_audio_options: zod3.object({
1714
+ return_redacted_no_speech_audio: zod3.boolean().optional().describe(
1715
+ "By default, audio redaction provides redacted audio URLs only when speech is detected. However, if your use-case specifically requires redacted audio files even for silent audio files without any dialogue, you can opt to receive these URLs by setting this parameter to `true`."
1660
1716
  ),
1661
- summary_model: zod3.enum(["informative", "conversational", "catchy"]).optional().describe("The model to summarize the transcript"),
1662
- summary_type: zod3.enum(["bullets", "bullets_verbose", "gist", "headline", "paragraph"]).optional().describe("The type of summary"),
1663
- custom_topics: zod3.boolean().optional().describe("Enable custom topics, either true or false"),
1664
- topics: zod3.array(zod3.string()).optional().describe("The list of custom topics")
1665
- }).describe("The parameters for creating a transcript")
1666
- ).describe("The parameters for creating a transcript");
1667
- var createTranscriptResponseLanguageConfidenceThresholdMin = 0;
1668
- var createTranscriptResponseLanguageConfidenceThresholdMax = 1;
1669
- var createTranscriptResponseLanguageConfidenceMin = 0;
1670
- var createTranscriptResponseLanguageConfidenceMax = 1;
1671
- var createTranscriptResponseWordsItemConfidenceMin = 0;
1672
- var createTranscriptResponseWordsItemConfidenceMax = 1;
1673
- var createTranscriptResponseUtterancesItemConfidenceMin = 0;
1674
- var createTranscriptResponseUtterancesItemConfidenceMax = 1;
1675
- var createTranscriptResponseUtterancesItemWordsItemConfidenceMin = 0;
1676
- var createTranscriptResponseUtterancesItemWordsItemConfidenceMax = 1;
1677
- var createTranscriptResponseConfidenceMin = 0;
1678
- var createTranscriptResponseConfidenceMax = 1;
1679
- var createTranscriptResponseAutoHighlightsResultResultsItemRankMin = 0;
1680
- var createTranscriptResponseAutoHighlightsResultResultsItemRankMax = 1;
1681
- var createTranscriptResponseContentSafetyLabelsResultsItemLabelsItemConfidenceMin = 0;
1682
- var createTranscriptResponseContentSafetyLabelsResultsItemLabelsItemConfidenceMax = 1;
1683
- var createTranscriptResponseContentSafetyLabelsResultsItemLabelsItemSeverityMin = 0;
1684
- var createTranscriptResponseContentSafetyLabelsResultsItemLabelsItemSeverityMax = 1;
1685
- var createTranscriptResponseContentSafetyLabelsSummaryMinOne = 0;
1686
- var createTranscriptResponseContentSafetyLabelsSummaryMaxOne = 1;
1687
- var createTranscriptResponseContentSafetyLabelsSeverityScoreSummaryLowMin = 0;
1688
- var createTranscriptResponseContentSafetyLabelsSeverityScoreSummaryLowMax = 1;
1689
- var createTranscriptResponseContentSafetyLabelsSeverityScoreSummaryMediumMin = 0;
1690
- var createTranscriptResponseContentSafetyLabelsSeverityScoreSummaryMediumMax = 1;
1691
- var createTranscriptResponseContentSafetyLabelsSeverityScoreSummaryHighMin = 0;
1692
- var createTranscriptResponseContentSafetyLabelsSeverityScoreSummaryHighMax = 1;
1693
- var createTranscriptResponseIabCategoriesResultResultsItemLabelsItemRelevanceMin = 0;
1694
- var createTranscriptResponseIabCategoriesResultResultsItemLabelsItemRelevanceMax = 1;
1695
- var createTranscriptResponseIabCategoriesResultSummaryMinOne = 0;
1696
- var createTranscriptResponseIabCategoriesResultSummaryMaxOne = 1;
1697
- var createTranscriptResponseSentimentAnalysisResultsItemConfidenceMin = 0;
1698
- var createTranscriptResponseSentimentAnalysisResultsItemConfidenceMax = 1;
1699
- var createTranscriptResponseSpeechThresholdMin = 0;
1700
- var createTranscriptResponseSpeechThresholdMax = 1;
1701
- var createTranscriptResponse = zod3.object({
1702
- id: zod3.string().uuid().describe("The unique identifier of your transcript"),
1703
- audio_url: zod3.string().describe("The URL of the media that was transcribed"),
1704
- status: zod3.enum(["queued", "processing", "completed", "error"]).describe(
1705
- "The status of your transcript. Possible values are queued, processing, completed, or error."
1717
+ override_audio_redaction_method: zod3.enum(["silence"]).optional().describe(
1718
+ "Specify the method used to redact audio. By default, redacted audio uses a beep sound. Set to `silence` to replace PII with silence instead of a beep."
1719
+ )
1720
+ }).optional().describe(
1721
+ "Specify options for [PII redacted audio](https://www.assemblyai.com/docs/pii-redaction#request-for-redacted-audio) files."
1706
1722
  ),
1707
- language_code: zod3.enum([
1708
- "en",
1709
- "en_au",
1710
- "en_uk",
1711
- "en_us",
1712
- "es",
1713
- "fr",
1714
- "de",
1715
- "it",
1716
- "pt",
1717
- "nl",
1718
- "af",
1719
- "sq",
1720
- "am",
1721
- "ar",
1722
- "hy",
1723
- "as",
1724
- "az",
1725
- "ba",
1726
- "eu",
1727
- "be",
1728
- "bn",
1729
- "bs",
1730
- "br",
1731
- "bg",
1732
- "my",
1733
- "ca",
1734
- "zh",
1735
- "hr",
1736
- "cs",
1737
- "da",
1738
- "et",
1739
- "fo",
1740
- "fi",
1741
- "gl",
1742
- "ka",
1743
- "el",
1744
- "gu",
1745
- "ht",
1746
- "ha",
1747
- "haw",
1748
- "he",
1749
- "hi",
1750
- "hu",
1751
- "is",
1752
- "id",
1753
- "ja",
1754
- "jw",
1755
- "kn",
1756
- "kk",
1757
- "km",
1758
- "ko",
1759
- "lo",
1760
- "la",
1761
- "lv",
1762
- "ln",
1763
- "lt",
1764
- "lb",
1765
- "mk",
1766
- "mg",
1767
- "ms",
1768
- "ml",
1769
- "mt",
1770
- "mi",
1771
- "mr",
1772
- "mn",
1773
- "ne",
1774
- "no",
1775
- "nn",
1776
- "oc",
1777
- "pa",
1778
- "ps",
1779
- "fa",
1780
- "pl",
1781
- "ro",
1782
- "ru",
1783
- "sa",
1784
- "sr",
1785
- "sn",
1786
- "sd",
1787
- "si",
1788
- "sk",
1789
- "sl",
1790
- "so",
1791
- "su",
1792
- "sw",
1793
- "sv",
1794
- "tl",
1795
- "tg",
1796
- "ta",
1797
- "tt",
1798
- "te",
1799
- "th",
1800
- "bo",
1801
- "tr",
1802
- "tk",
1803
- "uk",
1804
- "ur",
1805
- "uz",
1806
- "vi",
1807
- "cy",
1808
- "yi",
1809
- "yo"
1810
- ]).describe(
1811
- "The language of your audio file. Possible values are found in [Supported Languages](https://www.assemblyai.com/docs/concepts/supported-languages).\nThe default value is 'en_us'.\n"
1812
- ).or(zod3.string()).optional().describe(
1813
- "The language of your audio file.\nPossible values are found in [Supported Languages](https://www.assemblyai.com/docs/concepts/supported-languages).\nThe default value is 'en_us'.\n"
1814
- ),
1815
- language_detection: zod3.boolean().nullish().describe(
1816
- "Whether [Automatic language detection](https://www.assemblyai.com/docs/models/speech-recognition#automatic-language-detection) is enabled, either true or false"
1817
- ),
1818
- language_confidence_threshold: zod3.number().min(createTranscriptResponseLanguageConfidenceThresholdMin).max(createTranscriptResponseLanguageConfidenceThresholdMax).nullable().describe(
1819
- "The confidence threshold for the automatically detected language.\nAn error will be returned if the language confidence is below this threshold.\n"
1820
- ),
1821
- language_confidence: zod3.number().min(createTranscriptResponseLanguageConfidenceMin).max(createTranscriptResponseLanguageConfidenceMax).nullable().describe(
1822
- "The confidence score for the detected language, between 0.0 (low confidence) and 1.0 (high confidence)"
1823
- ),
1824
- speech_model: zod3.enum(["best", "slam-1", "universal"]).describe("The speech model to use for the transcription.").or(zod3.null()).describe(
1825
- "The speech model used for the transcription. When `null`, the default model is used."
1826
- ),
1827
- text: zod3.string().nullish().describe("The textual transcript of your media file"),
1828
- words: zod3.array(
1829
- zod3.object({
1830
- confidence: zod3.number().min(createTranscriptResponseWordsItemConfidenceMin).max(createTranscriptResponseWordsItemConfidenceMax).describe("The confidence score for the transcript of this word"),
1831
- start: zod3.number().describe("The starting time, in milliseconds, for the word"),
1832
- end: zod3.number().describe("The ending time, in milliseconds, for the word"),
1833
- text: zod3.string().describe("The text of the word"),
1834
- channel: zod3.string().nullish().describe(
1835
- "The channel of the word. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
1836
- ),
1837
- speaker: zod3.string().nullable().describe(
1838
- "The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/models/speaker-diarization) is enabled, else null"
1839
- )
1840
- })
1841
- ).nullish().describe(
1842
- "An array of temporally-sequential word objects, one for each word in the transcript.\nSee [Speech recognition](https://www.assemblyai.com/docs/models/speech-recognition) for more information.\n"
1843
- ),
1844
- utterances: zod3.array(
1845
- zod3.object({
1846
- confidence: zod3.number().min(createTranscriptResponseUtterancesItemConfidenceMin).max(createTranscriptResponseUtterancesItemConfidenceMax).describe("The confidence score for the transcript of this utterance"),
1847
- start: zod3.number().describe("The starting time, in milliseconds, of the utterance in the audio file"),
1848
- end: zod3.number().describe("The ending time, in milliseconds, of the utterance in the audio file"),
1849
- text: zod3.string().describe("The text for this utterance"),
1850
- words: zod3.array(
1851
- zod3.object({
1852
- confidence: zod3.number().min(createTranscriptResponseUtterancesItemWordsItemConfidenceMin).max(createTranscriptResponseUtterancesItemWordsItemConfidenceMax).describe("The confidence score for the transcript of this word"),
1853
- start: zod3.number().describe("The starting time, in milliseconds, for the word"),
1854
- end: zod3.number().describe("The ending time, in milliseconds, for the word"),
1855
- text: zod3.string().describe("The text of the word"),
1856
- channel: zod3.string().nullish().describe(
1857
- "The channel of the word. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
1858
- ),
1859
- speaker: zod3.string().nullable().describe(
1860
- "The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/models/speaker-diarization) is enabled, else null"
1861
- )
1862
- })
1863
- ).describe("The words in the utterance."),
1864
- channel: zod3.string().nullish().describe(
1865
- "The channel of this utterance. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
1866
- ),
1867
- speaker: zod3.string().describe(
1868
- 'The speaker of this utterance, where each speaker is assigned a sequential capital letter - e.g. "A" for Speaker A, "B" for Speaker B, etc.'
1869
- )
1870
- })
1871
- ).nullish().describe(
1872
- "When multichannel or speaker_labels is enabled, a list of turn-by-turn utterance objects.\nSee [Speaker diarization](https://www.assemblyai.com/docs/speech-to-text/speaker-diarization) and [Multichannel transcription](https://www.assemblyai.com/docs/speech-to-text/speech-recognition#multichannel-transcription) for more information.\n"
1873
- ),
1874
- confidence: zod3.number().min(createTranscriptResponseConfidenceMin).max(createTranscriptResponseConfidenceMax).nullish().describe(
1875
- "The confidence score for the transcript, between 0.0 (low confidence) and 1.0 (high confidence)"
1876
- ),
1877
- audio_duration: zod3.number().nullish().describe("The duration of this transcript object's media file, in seconds"),
1878
- punctuate: zod3.boolean().nullish().describe("Whether Automatic Punctuation is enabled, either true or false"),
1879
- format_text: zod3.boolean().nullish().describe("Whether Text Formatting is enabled, either true or false"),
1880
- disfluencies: zod3.boolean().nullish().describe('Transcribe Filler Words, like "umm", in your media file; can be true or false'),
1881
- multichannel: zod3.boolean().nullish().describe(
1882
- "Whether [Multichannel transcription](https://www.assemblyai.com/docs/models/speech-recognition#multichannel-transcription) was enabled in the transcription request, either true or false"
1883
- ),
1884
- audio_channels: zod3.number().optional().describe(
1885
- "The number of audio channels in the audio file. This is only present when multichannel is enabled."
1886
- ),
1887
- webhook_url: zod3.string().nullish().describe(
1888
- "The URL to which we send webhook requests.\nWe sends two different types of webhook requests.\nOne request when a transcript is completed or failed, and one request when the redacted audio is ready if redact_pii_audio is enabled.\n"
1889
- ),
1890
- webhook_status_code: zod3.number().nullish().describe(
1891
- "The status code we received from your server when delivering the transcript completed or failed webhook request, if a webhook URL was provided"
1892
- ),
1893
- webhook_auth: zod3.boolean().describe("Whether webhook authentication details were provided"),
1894
- webhook_auth_header_name: zod3.string().nullish().describe(
1895
- "The header name to be sent with the transcript completed or failed webhook requests"
1896
- ),
1897
- speed_boost: zod3.boolean().nullish().describe("Whether speed boost is enabled"),
1898
- auto_highlights: zod3.boolean().describe("Whether Key Phrases is enabled, either true or false"),
1899
- auto_highlights_result: zod3.object({
1900
- status: zod3.enum(["success", "unavailable"]).describe("Either success, or unavailable in the rare case that the model failed"),
1901
- results: zod3.array(
1902
- zod3.object({
1903
- count: zod3.number().describe("The total number of times the key phrase appears in the audio file"),
1904
- rank: zod3.number().min(createTranscriptResponseAutoHighlightsResultResultsItemRankMin).max(createTranscriptResponseAutoHighlightsResultResultsItemRankMax).describe(
1905
- "The total relevancy to the overall audio file of this key phrase - a greater number means more relevant"
1906
- ),
1907
- text: zod3.string().describe("The text itself of the key phrase"),
1908
- timestamps: zod3.array(
1909
- zod3.object({
1910
- start: zod3.number().describe("The start time in milliseconds"),
1911
- end: zod3.number().describe("The end time in milliseconds")
1912
- }).describe("Timestamp containing a start and end property in milliseconds")
1913
- ).describe("The timestamp of the of the key phrase")
1914
- })
1915
- ).describe("A temporally-sequential array of Key Phrases")
1916
- }).describe(
1917
- "An array of results for the Key Phrases model, if it is enabled.\nSee [Key phrases](https://www.assemblyai.com/docs/models/key-phrases) for more information.\n"
1918
- ).or(zod3.null()).optional().describe(
1919
- "An array of results for the Key Phrases model, if it is enabled.\nSee [Key Phrases](https://www.assemblyai.com/docs/models/key-phrases) for more information.\n"
1920
- ),
1921
- audio_start_from: zod3.number().nullish().describe(
1922
- "The point in time, in milliseconds, in the file at which the transcription was started"
1923
- ),
1924
- audio_end_at: zod3.number().nullish().describe(
1925
- "The point in time, in milliseconds, in the file at which the transcription was terminated"
1926
- ),
1927
- word_boost: zod3.array(zod3.string()).optional().describe("The list of custom vocabulary to boost transcription probability for"),
1928
- boost_param: zod3.string().nullish().describe("The word boost parameter value"),
1929
- filter_profanity: zod3.boolean().nullish().describe(
1930
- "Whether [Profanity Filtering](https://www.assemblyai.com/docs/models/speech-recognition#profanity-filtering) is enabled, either true or false"
1931
- ),
1932
- redact_pii: zod3.boolean().describe(
1933
- "Whether [PII Redaction](https://www.assemblyai.com/docs/models/pii-redaction) is enabled, either true or false"
1934
- ),
1935
- redact_pii_audio: zod3.boolean().nullish().describe(
1936
- "Whether a redacted version of the audio file was generated,\neither true or false. See [PII redaction](https://www.assemblyai.com/docs/models/pii-redaction) for more information.\n"
1937
- ),
1938
- redact_pii_audio_quality: zod3.enum(["mp3", "wav"]).describe(
1939
- "Controls the filetype of the audio created by redact_pii_audio. Currently supports mp3 (default) and wav. See [PII redaction](https://www.assemblyai.com/docs/models/pii-redaction) for more details."
1940
- ).or(zod3.null()).optional().describe(
1941
- "The audio quality of the PII-redacted audio file, if redact_pii_audio is enabled.\nSee [PII redaction](https://www.assemblyai.com/docs/models/pii-redaction) for more information.\n"
1723
+ redact_pii_audio_quality: zod3.enum(["mp3", "wav"]).optional().describe(
1724
+ "Controls the filetype of the audio created by redact_pii_audio. Currently supports mp3 (default) and wav. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction#request-for-redacted-audio) for more details."
1942
1725
  ),
1943
1726
  redact_pii_policies: zod3.array(
1944
1727
  zod3.enum([
@@ -1987,123 +1770,179 @@ var createTranscriptResponse = zod3.object({
1987
1770
  "vehicle_id",
1988
1771
  "zodiac_sign"
1989
1772
  ]).describe("The type of PII to redact")
1990
- ).nullish().describe(
1991
- "The list of PII Redaction policies that were enabled, if PII Redaction is enabled.\nSee [PII redaction](https://www.assemblyai.com/docs/models/pii-redaction) for more information.\n"
1773
+ ).optional().describe(
1774
+ "The list of PII Redaction policies to enable. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more details."
1992
1775
  ),
1993
- redact_pii_sub: zod3.enum(["entity_name", "hash"]).optional().describe(
1994
- 'The replacement logic for detected PII, can be "entity_name" or "hash". See [PII redaction](https://www.assemblyai.com/docs/models/pii-redaction) for more details.'
1776
+ redact_pii_sub: zod3.enum(["entity_name", "hash"]).describe(
1777
+ "The replacement logic for detected PII, can be `entity_name` or `hash`. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more details."
1778
+ ).or(zod3.null()).optional().describe(
1779
+ "The replacement logic for detected PII, can be `entity_type` or `hash`. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more details."
1995
1780
  ),
1996
- speaker_labels: zod3.boolean().nullish().describe(
1997
- "Whether [Speaker diarization](https://www.assemblyai.com/docs/models/speaker-diarization) is enabled, can be true or false"
1781
+ sentiment_analysis: zod3.boolean().optional().describe(
1782
+ "Enable [Sentiment Analysis](https://www.assemblyai.com/docs/speech-understanding/sentiment-analysis), can be true or false"
1783
+ ),
1784
+ speaker_labels: zod3.boolean().optional().describe(
1785
+ "Enable [Speaker diarization](https://www.assemblyai.com/docs/pre-recorded-audio/speaker-diarization), can be true or false"
1786
+ ),
1787
+ speaker_options: zod3.object({
1788
+ min_speakers_expected: zod3.number().default(createTranscriptBodySpeakerOptionsMinSpeakersExpectedDefault).describe(
1789
+ "The minimum number of speakers expected in the audio file. See [Set a range of possible speakers](https://www.assemblyai.com/docs/pre-recorded-audio/speaker-diarization#set-a-range-of-possible-speakers) for more details."
1790
+ ),
1791
+ max_speakers_expected: zod3.number().optional().describe(
1792
+ "<Warning>Setting this parameter too high may hurt model accuracy</Warning>\nThe maximum number of speakers expected in the audio file. The default depends on audio duration: no limit for 0-2 minutes, 10 for 2-10 minutes, and 30 for 10+ minutes. See [Set a range of possible speakers](https://www.assemblyai.com/docs/pre-recorded-audio/speaker-diarization#set-a-range-of-possible-speakers) for more details.\n"
1793
+ )
1794
+ }).optional().describe(
1795
+ "Specify options for [Speaker diarization](https://www.assemblyai.com/docs/pre-recorded-audio/speaker-diarization#set-a-range-of-possible-speakers). Use this to set a range of possible speakers."
1998
1796
  ),
1999
1797
  speakers_expected: zod3.number().nullish().describe(
2000
- "Tell the speaker label model how many speakers it should attempt to identify. See [Speaker diarization](https://www.assemblyai.com/docs/models/speaker-diarization) for more details."
1798
+ "Tells the speaker label model how many speakers it should attempt to identify. See [Set number of speakers expected](https://www.assemblyai.com/docs/pre-recorded-audio/speaker-diarization#set-number-of-speakers-expected) for more details."
2001
1799
  ),
2002
- content_safety: zod3.boolean().nullish().describe(
2003
- "Whether [Content Moderation](https://www.assemblyai.com/docs/models/content-moderation) is enabled, can be true or false"
1800
+ speech_models: zod3.array(
1801
+ zod3.string().describe(
1802
+ "The speech model to use for the transcription. See [Model Selection](https://www.assemblyai.com/docs/pre-recorded-audio/select-the-speech-model) for available models."
1803
+ )
1804
+ ).describe(
1805
+ "List multiple speech models in priority order, allowing our system to automatically route your audio to the best available option. See [Model Selection](https://www.assemblyai.com/docs/pre-recorded-audio/select-the-speech-model) for available models and routing behavior.\n"
2004
1806
  ),
2005
- content_safety_labels: zod3.object({
2006
- status: zod3.enum(["success", "unavailable"]).describe("Either success, or unavailable in the rare case that the model failed"),
2007
- results: zod3.array(
2008
- zod3.object({
2009
- text: zod3.string().describe("The transcript of the section flagged by the Content Moderation model"),
2010
- labels: zod3.array(
2011
- zod3.object({
2012
- label: zod3.string().describe("The label of the sensitive topic"),
2013
- confidence: zod3.number().min(
2014
- createTranscriptResponseContentSafetyLabelsResultsItemLabelsItemConfidenceMin
2015
- ).max(
2016
- createTranscriptResponseContentSafetyLabelsResultsItemLabelsItemConfidenceMax
2017
- ).describe("The confidence score for the topic being discussed, from 0 to 1"),
2018
- severity: zod3.number().min(
2019
- createTranscriptResponseContentSafetyLabelsResultsItemLabelsItemSeverityMin
2020
- ).max(
2021
- createTranscriptResponseContentSafetyLabelsResultsItemLabelsItemSeverityMax
2022
- ).describe("How severely the topic is discussed in the section, from 0 to 1")
2023
- })
2024
- ).describe(
2025
- "An array of safety labels, one per sensitive topic that was detected in the section"
1807
+ speech_threshold: zod3.number().nullish().describe(
1808
+ "Reject audio files that contain less than this fraction of speech.\nValid values are in the range [0, 1] inclusive. See [Speech Threshold](https://www.assemblyai.com/docs/speech-threshold) for more details.\n"
1809
+ ),
1810
+ speech_understanding: zod3.object({
1811
+ request: zod3.object({
1812
+ translation: zod3.object({
1813
+ target_languages: zod3.array(zod3.string()).describe(
1814
+ 'List of target language codes (e.g., `["es", "de"]`). See [Translation](https://www.assemblyai.com/docs/speech-understanding/translation) for supported languages.'
2026
1815
  ),
2027
- sentences_idx_start: zod3.number().describe("The sentence index at which the section begins"),
2028
- sentences_idx_end: zod3.number().describe("The sentence index at which the section ends"),
2029
- timestamp: zod3.object({
2030
- start: zod3.number().describe("The start time in milliseconds"),
2031
- end: zod3.number().describe("The end time in milliseconds")
2032
- }).describe("Timestamp containing a start and end property in milliseconds")
1816
+ formal: zod3.boolean().default(createTranscriptBodySpeechUnderstandingRequestTranslationFormalDefault).describe(
1817
+ "Use formal language style. See [Translation](https://www.assemblyai.com/docs/speech-understanding/translation) for more details."
1818
+ ),
1819
+ match_original_utterance: zod3.boolean().optional().describe(
1820
+ "When enabled with Speaker Labels, returns translated text in the utterances array. Each utterance will include a `translated_texts` key containing translations for each target language."
1821
+ )
2033
1822
  })
2034
- ).describe("An array of results for the Content Moderation model"),
2035
- summary: zod3.record(
2036
- zod3.string(),
2037
- zod3.number().min(createTranscriptResponseContentSafetyLabelsSummaryMinOne).max(createTranscriptResponseContentSafetyLabelsSummaryMaxOne).describe(
2038
- 'A confidence score for the presence of the sensitive topic "topic" across the entire audio file'
1823
+ }).describe(
1824
+ "Request body for [Translation](https://www.assemblyai.com/docs/speech-understanding/translation)."
1825
+ ).or(
1826
+ zod3.object({
1827
+ speaker_identification: zod3.object({
1828
+ speaker_type: zod3.enum(["role", "name"]).describe(
1829
+ "Type of speaker identification. See [Speaker Identification](https://www.assemblyai.com/docs/speech-understanding/speaker-identification) for details on each type."
1830
+ ),
1831
+ known_values: zod3.array(zod3.string()).optional().describe(
1832
+ 'Required if speaker_type is "role". Each value must be 35 characters or less.'
1833
+ ),
1834
+ speakers: zod3.array(
1835
+ zod3.object({
1836
+ role: zod3.string().optional().describe(
1837
+ 'The role of the speaker. Required when `speaker_type` is "role".'
1838
+ ),
1839
+ name: zod3.string().optional().describe(
1840
+ 'The name of the speaker. Required when `speaker_type` is "name".'
1841
+ ),
1842
+ description: zod3.string().optional().describe(
1843
+ "A description of the speaker to help the model identify them based on conversational context."
1844
+ )
1845
+ })
1846
+ ).optional().describe(
1847
+ "An array of speaker objects with metadata to improve identification accuracy. Each object should include a `role` or `name` (depending on `speaker_type`) and an optional `description` to help the model identify the speaker. You can also include any additional custom properties (e.g., `company`, `title`) to provide more context. Use this as an alternative to `known_values` when you want to provide additional context about each speaker."
1848
+ )
1849
+ })
1850
+ }).describe(
1851
+ "Request body for [Speaker Identification](https://www.assemblyai.com/docs/speech-understanding/speaker-identification)."
2039
1852
  )
2040
- ).describe(
2041
- "A summary of the Content Moderation confidence results for the entire audio file"
2042
- ),
2043
- severity_score_summary: zod3.record(
2044
- zod3.string(),
1853
+ ).or(
2045
1854
  zod3.object({
2046
- low: zod3.number().min(createTranscriptResponseContentSafetyLabelsSeverityScoreSummaryLowMin).max(createTranscriptResponseContentSafetyLabelsSeverityScoreSummaryLowMax),
2047
- medium: zod3.number().min(createTranscriptResponseContentSafetyLabelsSeverityScoreSummaryMediumMin).max(createTranscriptResponseContentSafetyLabelsSeverityScoreSummaryMediumMax),
2048
- high: zod3.number().min(createTranscriptResponseContentSafetyLabelsSeverityScoreSummaryHighMin).max(createTranscriptResponseContentSafetyLabelsSeverityScoreSummaryHighMax)
2049
- })
2050
- ).describe(
2051
- "A summary of the Content Moderation severity results for the entire audio file"
1855
+ custom_formatting: zod3.object({
1856
+ date: zod3.string().optional().describe(
1857
+ 'Date format pattern (e.g., `"mm/dd/yyyy"`). See [Custom Formatting](https://www.assemblyai.com/docs/speech-understanding/custom-formatting) for more details.'
1858
+ ),
1859
+ phone_number: zod3.string().optional().describe(
1860
+ 'Phone number format pattern (e.g., `"(xxx)xxx-xxxx"`). See [Custom Formatting](https://www.assemblyai.com/docs/speech-understanding/custom-formatting) for more details.'
1861
+ ),
1862
+ email: zod3.string().optional().describe(
1863
+ 'Email format pattern (e.g., `"username@domain.com"`). See [Custom Formatting](https://www.assemblyai.com/docs/speech-understanding/custom-formatting) for more details.'
1864
+ )
1865
+ })
1866
+ }).describe(
1867
+ "Request body for [Custom Formatting](https://www.assemblyai.com/docs/speech-understanding/custom-formatting)."
1868
+ )
2052
1869
  )
2053
- }).describe(
2054
- "An array of results for the Content Moderation model, if it is enabled.\nSee [Content moderation](https://www.assemblyai.com/docs/models/content-moderation) for more information.\n"
1870
+ }).optional().describe(
1871
+ "Enable speech understanding tasks like [Translation](https://www.assemblyai.com/docs/speech-understanding/translation), [Speaker Identification](https://www.assemblyai.com/docs/speech-understanding/speaker-identification), and [Custom Formatting](https://www.assemblyai.com/docs/speech-understanding/custom-formatting). See the task-specific docs for available options and configuration.\n"
1872
+ ),
1873
+ summarization: zod3.boolean().optional().describe(
1874
+ "Enable [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarization), can be true or false. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarization) for details.\n\nNote: This parameter is only supported for the Universal-2 model.\n"
1875
+ ),
1876
+ summary_model: zod3.enum(["informative", "conversational", "catchy"]).optional().describe("The model to summarize the transcript"),
1877
+ summary_type: zod3.enum(["bullets", "bullets_verbose", "gist", "headline", "paragraph"]).optional().describe("The type of summary"),
1878
+ remove_audio_tags: zod3.enum(["all"]).describe(
1879
+ 'Remove [audio event tags](https://www.assemblyai.com/docs/pre-recorded-audio/universal-3-pro#audio-event-tags) from the transcript text. Set to `"all"` to remove all audio tags.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n'
2055
1880
  ).or(zod3.null()).optional().describe(
2056
- "An array of results for the Content Moderation model, if it is enabled.\nSee [Content moderation](https://www.assemblyai.com/docs/models/content-moderation) for more information.\n"
1881
+ 'Remove [audio event tags](https://www.assemblyai.com/docs/pre-recorded-audio/universal-3-pro#audio-event-tags) from the transcript text. Set to `"all"` to remove all audio tags.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n'
2057
1882
  ),
2058
- iab_categories: zod3.boolean().nullish().describe(
2059
- "Whether [Topic Detection](https://www.assemblyai.com/docs/models/topic-detection) is enabled, can be true or false"
1883
+ temperature: zod3.number().optional().describe(
1884
+ "Control the amount of randomness injected into the model's response. See the [Prompting Guide](https://www.assemblyai.com/docs/pre-recorded-audio/prompting) for more details.\n\nNote: This parameter can only be used with the Universal-3 Pro model.\n"
2060
1885
  ),
2061
- iab_categories_result: zod3.object({
1886
+ webhook_auth_header_name: zod3.string().nullish().describe(
1887
+ "The header name to be sent with the transcript completed or failed [webhook](https://www.assemblyai.com/docs/deployment/webhooks-for-pre-recorded-audio) requests"
1888
+ ),
1889
+ webhook_auth_header_value: zod3.string().nullish().describe(
1890
+ "The header value to send back with the transcript completed or failed [webhook](https://www.assemblyai.com/docs/deployment/webhooks-for-pre-recorded-audio) requests for added security"
1891
+ ),
1892
+ webhook_url: zod3.string().optional().describe(
1893
+ "The URL to which we send [webhook](https://www.assemblyai.com/docs/deployment/webhooks-for-pre-recorded-audio) requests.\n"
1894
+ ),
1895
+ custom_topics: zod3.boolean().optional().describe("This parameter does not currently have any functionality attached to it."),
1896
+ speech_model: zod3.string().describe(
1897
+ "The speech model to use for the transcription. See [Model Selection](https://www.assemblyai.com/docs/pre-recorded-audio/select-the-speech-model) for available models."
1898
+ ).or(zod3.null()).optional().describe(
1899
+ "This parameter has been replaced with the `speech_models` parameter, learn more about the `speech_models` parameter [here](https://www.assemblyai.com/docs/pre-recorded-audio/select-the-speech-model).\n"
1900
+ ),
1901
+ topics: zod3.array(zod3.string()).optional().describe("This parameter does not currently have any functionality attached to it."),
1902
+ audio_url: zod3.string().describe("The URL of the audio or video file to transcribe.")
1903
+ }).describe("The parameters for creating a transcript");
1904
+ var createTranscriptResponseLanguageDetectionOptionsFallbackLanguageDefault = "auto";
1905
+ var createTranscriptResponseLanguageDetectionOptionsCodeSwitchingConfidenceThresholdDefault = 0.3;
1906
+ var createTranscriptResponseSpeechUnderstandingRequestTranslationFormalDefault = true;
1907
+ var createTranscriptResponse = zod3.object({
1908
+ audio_channels: zod3.number().optional().describe(
1909
+ "The number of audio channels in the audio file. This is only present when [multichannel](https://www.assemblyai.com/docs/pre-recorded-audio/multichannel) is enabled."
1910
+ ),
1911
+ audio_duration: zod3.number().nullish().describe("The duration of this transcript object's media file, in seconds"),
1912
+ audio_end_at: zod3.number().nullish().describe(
1913
+ "The point in time, in milliseconds, in the file at which the transcription was terminated. See [Set the start and end of the transcript](https://www.assemblyai.com/docs/pre-recorded-audio/set-the-start-and-end-of-the-transcript) for more details."
1914
+ ),
1915
+ audio_start_from: zod3.number().nullish().describe(
1916
+ "The point in time, in milliseconds, in the file at which the transcription was started. See [Set the start and end of the transcript](https://www.assemblyai.com/docs/pre-recorded-audio/set-the-start-and-end-of-the-transcript) for more details."
1917
+ ),
1918
+ audio_url: zod3.string().describe("The URL of the media that was transcribed"),
1919
+ auto_chapters: zod3.boolean().nullish().describe(
1920
+ "Whether [Auto Chapters](https://www.assemblyai.com/docs/speech-understanding/auto-chapters) is enabled, can be true or false. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible chapter summaries. See the [updated Auto Chapters page](https://www.assemblyai.com/docs/speech-understanding/auto-chapters) for details.\n\nNote: This parameter is only supported for the Universal-2 model.\n"
1921
+ ),
1922
+ auto_highlights: zod3.boolean().describe(
1923
+ "Whether [Key Phrases](https://www.assemblyai.com/docs/speech-understanding/key-phrases) is enabled, either true or false"
1924
+ ),
1925
+ auto_highlights_result: zod3.object({
2062
1926
  status: zod3.enum(["success", "unavailable"]).describe("Either success, or unavailable in the rare case that the model failed"),
2063
1927
  results: zod3.array(
2064
1928
  zod3.object({
2065
- text: zod3.string().describe("The text in the transcript in which a detected topic occurs"),
2066
- labels: zod3.array(
2067
- zod3.object({
2068
- relevance: zod3.number().min(
2069
- createTranscriptResponseIabCategoriesResultResultsItemLabelsItemRelevanceMin
2070
- ).max(
2071
- createTranscriptResponseIabCategoriesResultResultsItemLabelsItemRelevanceMax
2072
- ).describe("How relevant the detected topic is of a detected topic"),
2073
- label: zod3.string().describe(
2074
- "The IAB taxonomical label for the label of the detected topic, where > denotes supertopic/subtopic relationship"
2075
- )
2076
- })
2077
- ).optional().describe("An array of detected topics in the text"),
2078
- timestamp: zod3.object({
2079
- start: zod3.number().describe("The start time in milliseconds"),
2080
- end: zod3.number().describe("The end time in milliseconds")
2081
- }).optional().describe("Timestamp containing a start and end property in milliseconds")
2082
- }).describe("The result of the topic detection model")
2083
- ).describe("An array of results for the Topic Detection model"),
2084
- summary: zod3.record(
2085
- zod3.string(),
2086
- zod3.number().min(createTranscriptResponseIabCategoriesResultSummaryMinOne).max(createTranscriptResponseIabCategoriesResultSummaryMaxOne)
2087
- ).describe("The overall relevance of topic to the entire audio file")
1929
+ count: zod3.number().describe("The total number of times the key phrase appears in the audio file"),
1930
+ rank: zod3.number().describe(
1931
+ "The total relevancy to the overall audio file of this key phrase - a greater number means more relevant"
1932
+ ),
1933
+ text: zod3.string().describe("The text itself of the key phrase"),
1934
+ timestamps: zod3.array(
1935
+ zod3.object({
1936
+ start: zod3.number().describe("The start time in milliseconds"),
1937
+ end: zod3.number().describe("The end time in milliseconds")
1938
+ }).describe("Timestamp containing a start and end property in milliseconds")
1939
+ ).describe("The timestamp of the of the key phrase")
1940
+ })
1941
+ ).describe("A temporally-sequential array of Key Phrases")
2088
1942
  }).describe(
2089
- "The result of the Topic Detection model, if it is enabled.\nSee [Topic Detection](https://www.assemblyai.com/docs/models/topic-detection) for more information.\n"
1943
+ "An array of results for the Key Phrases model, if it is enabled.\nSee [Key phrases](https://www.assemblyai.com/docs/speech-understanding/key-phrases) for more information.\n"
2090
1944
  ).or(zod3.null()).optional().describe(
2091
- "The result of the Topic Detection model, if it is enabled.\nSee [Topic Detection](https://www.assemblyai.com/docs/models/topic-detection) for more information.\n"
2092
- ),
2093
- custom_spelling: zod3.array(
2094
- zod3.object({
2095
- from: zod3.array(zod3.string().describe("Word or phrase to replace")).describe("Words or phrases to replace"),
2096
- to: zod3.string().describe("Word to replace with")
2097
- }).describe(
2098
- "Object containing words or phrases to replace, and the word or phrase to replace with"
2099
- )
2100
- ).nullish().describe("Customize how words are spelled and formatted using to and from values"),
2101
- keyterms_prompt: zod3.array(zod3.string()).optional().describe(
2102
- "Improve accuracy with up to 1000 domain-specific words or phrases (maximum 6 words per phrase).\n"
2103
- ),
2104
- prompt: zod3.string().optional().describe("This parameter does not currently have any functionality attached to it."),
2105
- auto_chapters: zod3.boolean().nullish().describe(
2106
- "Whether [Auto Chapters](https://www.assemblyai.com/docs/models/auto-chapters) is enabled, can be true or false"
1945
+ "An array of results for the Key Phrases model, if it is enabled.\nSee [Key Phrases](https://www.assemblyai.com/docs/speech-understanding/key-phrases) for more information.\n"
2107
1946
  ),
2108
1947
  chapters: zod3.array(
2109
1948
  zod3.object({
@@ -2115,45 +1954,70 @@ var createTranscriptResponse = zod3.object({
2115
1954
  start: zod3.number().describe("The starting time, in milliseconds, for the chapter"),
2116
1955
  end: zod3.number().describe("The starting time, in milliseconds, for the chapter")
2117
1956
  }).describe("Chapter of the audio file")
2118
- ).nullish().describe("An array of temporally sequential chapters for the audio file"),
2119
- summarization: zod3.boolean().describe(
2120
- "Whether [Summarization](https://www.assemblyai.com/docs/models/summarization) is enabled, either true or false"
2121
- ),
2122
- summary_type: zod3.string().nullish().describe(
2123
- "The type of summary generated, if [Summarization](https://www.assemblyai.com/docs/models/summarization) is enabled"
1957
+ ).nullish().describe(
1958
+ "An array of temporally sequential chapters for the audio file. See [Auto Chapters](https://www.assemblyai.com/docs/speech-understanding/auto-chapters) for more information."
2124
1959
  ),
2125
- summary_model: zod3.string().nullish().describe(
2126
- "The Summarization model used to generate the summary,\nif [Summarization](https://www.assemblyai.com/docs/models/summarization) is enabled\n"
1960
+ confidence: zod3.number().nullish().describe(
1961
+ "The confidence score for the transcript, between 0.0 (low confidence) and 1.0 (high confidence)"
2127
1962
  ),
2128
- summary: zod3.string().nullish().describe(
2129
- "The generated summary of the media file, if [Summarization](https://www.assemblyai.com/docs/models/summarization) is enabled"
1963
+ content_safety: zod3.boolean().nullish().describe(
1964
+ "Whether [Content Moderation](https://www.assemblyai.com/docs/content-moderation) is enabled, can be true or false"
2130
1965
  ),
2131
- custom_topics: zod3.boolean().nullish().describe("Whether custom topics is enabled, either true or false"),
2132
- topics: zod3.array(zod3.string()).optional().describe("The list of custom topics provided if custom topics is enabled"),
2133
- sentiment_analysis: zod3.boolean().nullish().describe(
2134
- "Whether [Sentiment Analysis](https://www.assemblyai.com/docs/models/sentiment-analysis) is enabled, can be true or false"
1966
+ content_safety_labels: zod3.object({
1967
+ status: zod3.enum(["success", "unavailable"]).describe("Either success, or unavailable in the rare case that the model failed"),
1968
+ results: zod3.array(
1969
+ zod3.object({
1970
+ text: zod3.string().describe("The transcript of the section flagged by the Content Moderation model"),
1971
+ labels: zod3.array(
1972
+ zod3.object({
1973
+ label: zod3.string().describe("The label of the sensitive topic"),
1974
+ confidence: zod3.number().describe("The confidence score for the topic being discussed, from 0 to 1"),
1975
+ severity: zod3.number().describe("How severely the topic is discussed in the section, from 0 to 1")
1976
+ })
1977
+ ).describe(
1978
+ "An array of safety labels, one per sensitive topic that was detected in the section"
1979
+ ),
1980
+ sentences_idx_start: zod3.number().describe("The sentence index at which the section begins"),
1981
+ sentences_idx_end: zod3.number().describe("The sentence index at which the section ends"),
1982
+ timestamp: zod3.object({
1983
+ start: zod3.number().describe("The start time in milliseconds"),
1984
+ end: zod3.number().describe("The end time in milliseconds")
1985
+ }).describe("Timestamp containing a start and end property in milliseconds")
1986
+ })
1987
+ ).describe("An array of results for the Content Moderation model"),
1988
+ summary: zod3.record(zod3.string(), zod3.number()).describe(
1989
+ "A summary of the Content Moderation confidence results for the entire audio file"
1990
+ ),
1991
+ severity_score_summary: zod3.record(
1992
+ zod3.string(),
1993
+ zod3.object({
1994
+ low: zod3.number(),
1995
+ medium: zod3.number(),
1996
+ high: zod3.number()
1997
+ })
1998
+ ).describe(
1999
+ "A summary of the Content Moderation severity results for the entire audio file"
2000
+ )
2001
+ }).describe(
2002
+ "An array of results for the Content Moderation model, if it is enabled.\nSee [Content moderation](https://www.assemblyai.com/docs/content-moderation) for more information.\n"
2003
+ ).or(zod3.null()).optional().describe(
2004
+ "An array of results for the Content Moderation model, if it is enabled.\nSee [Content moderation](https://www.assemblyai.com/docs/content-moderation) for more information.\n"
2135
2005
  ),
2136
- sentiment_analysis_results: zod3.array(
2006
+ custom_spelling: zod3.array(
2137
2007
  zod3.object({
2138
- text: zod3.string().describe("The transcript of the sentence"),
2139
- start: zod3.number().describe("The starting time, in milliseconds, of the sentence"),
2140
- end: zod3.number().describe("The ending time, in milliseconds, of the sentence"),
2141
- sentiment: zod3.enum(["POSITIVE", "NEUTRAL", "NEGATIVE"]),
2142
- confidence: zod3.number().min(createTranscriptResponseSentimentAnalysisResultsItemConfidenceMin).max(createTranscriptResponseSentimentAnalysisResultsItemConfidenceMax).describe(
2143
- "The confidence score for the detected sentiment of the sentence, from 0 to 1"
2144
- ),
2145
- channel: zod3.string().nullish().describe(
2146
- "The channel of this utterance. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
2147
- ),
2148
- speaker: zod3.string().nullable().describe(
2149
- "The speaker of the sentence if [Speaker Diarization](https://www.assemblyai.com/docs/models/speaker-diarization) is enabled, else null"
2150
- )
2151
- }).describe("The result of the Sentiment Analysis model")
2008
+ from: zod3.array(zod3.string()).describe("Words or phrases to replace"),
2009
+ to: zod3.string().describe("Word to replace with")
2010
+ }).describe(
2011
+ "Object containing words or phrases to replace, and the word or phrase to replace with"
2012
+ )
2152
2013
  ).nullish().describe(
2153
- "An array of results for the Sentiment Analysis model, if it is enabled.\nSee [Sentiment Analysis](https://www.assemblyai.com/docs/models/sentiment-analysis) for more information.\n"
2014
+ "Customize how words are spelled and formatted using to and from values. See [Custom Spelling](https://www.assemblyai.com/docs/pre-recorded-audio/custom-spelling) for more details."
2154
2015
  ),
2155
- entity_detection: zod3.boolean().nullish().describe(
2156
- "Whether [Entity Detection](https://www.assemblyai.com/docs/models/entity-detection) is enabled, can be true or false"
2016
+ disfluencies: zod3.boolean().nullish().describe(
2017
+ 'Transcribe [Filler Words](https://www.assemblyai.com/docs/pre-recorded-audio/filler-words), like "umm", in your media file; can be true or false'
2018
+ ),
2019
+ domain: zod3.string().nullish().describe(
2020
+ 'The domain-specific model applied to the transcript. When set to `"medical-v1"`, [Medical Mode](https://www.assemblyai.com/docs/pre-recorded-audio/medical-mode) was used to improve accuracy for medical terminology.\n'
2157
2021
  ),
2158
2022
  entities: zod3.array(
2159
2023
  zod3.object({
@@ -2212,106 +2076,49 @@ var createTranscriptResponse = zod3.object({
2212
2076
  )
2213
2077
  }).describe("A detected entity")
2214
2078
  ).nullish().describe(
2215
- "An array of results for the Entity Detection model, if it is enabled.\nSee [Entity detection](https://www.assemblyai.com/docs/models/entity-detection) for more information.\n"
2216
- ),
2217
- speech_threshold: zod3.number().min(createTranscriptResponseSpeechThresholdMin).max(createTranscriptResponseSpeechThresholdMax).nullish().describe(
2218
- "Defaults to null. Reject audio files that contain less than this fraction of speech.\nValid values are in the range [0, 1] inclusive.\n"
2079
+ "An array of results for the Entity Detection model, if it is enabled.\nSee [Entity detection](https://www.assemblyai.com/docs/speech-understanding/entity-detection) for more information.\n"
2219
2080
  ),
2220
- throttled: zod3.boolean().nullish().describe(
2221
- "True while a request is throttled and false when a request is no longer throttled"
2081
+ entity_detection: zod3.boolean().nullish().describe(
2082
+ "Whether [Entity Detection](https://www.assemblyai.com/docs/speech-understanding/entity-detection) is enabled, can be true or false"
2222
2083
  ),
2223
2084
  error: zod3.string().optional().describe("Error message of why the transcript failed"),
2224
- language_model: zod3.string().describe("The language model that was used for the transcript"),
2225
- acoustic_model: zod3.string().describe("The acoustic model that was used for the transcript")
2226
- }).describe("A transcript object");
2227
- var listTranscriptsQueryLimitDefault = 10;
2228
- var listTranscriptsQueryLimitMax = 200;
2229
- var listTranscriptsQueryParams = zod3.object({
2230
- limit: zod3.number().min(1).max(listTranscriptsQueryLimitMax).default(listTranscriptsQueryLimitDefault).describe("Maximum amount of transcripts to retrieve"),
2231
- status: zod3.enum(["queued", "processing", "completed", "error"]).optional().describe("Filter by transcript status"),
2232
- created_on: zod3.string().date().optional().describe("Only get transcripts created on this date"),
2233
- before_id: zod3.string().uuid().optional().describe("Get transcripts that were created before this transcript ID"),
2234
- after_id: zod3.string().uuid().optional().describe("Get transcripts that were created after this transcript ID"),
2235
- throttled_only: zod3.boolean().optional().describe("Only get throttled transcripts, overrides the status filter")
2236
- });
2237
- var listTranscriptsResponseTranscriptsItemCreatedRegExp = new RegExp(
2238
- "^(?:(\\d{4}-\\d{2}-\\d{2})T(\\d{2}:\\d{2}:\\d{2}(?:\\.\\d+)?))$"
2239
- );
2240
- var listTranscriptsResponseTranscriptsItemCompletedRegExp = new RegExp(
2241
- "^(?:(\\d{4}-\\d{2}-\\d{2})T(\\d{2}:\\d{2}:\\d{2}(?:\\.\\d+)?))$"
2242
- );
2243
- var listTranscriptsResponse = zod3.object({
2244
- page_details: zod3.object({
2245
- limit: zod3.number().describe("The number of results this page is limited to"),
2246
- result_count: zod3.number().describe("The actual number of results in the page"),
2247
- current_url: zod3.string().describe("The URL used to retrieve the current page of transcripts"),
2248
- prev_url: zod3.string().nullable().describe(
2249
- "The URL to the next page of transcripts. The previous URL always points to a page with older transcripts."
2250
- ),
2251
- next_url: zod3.string().nullable().describe(
2252
- "The URL to the next page of transcripts. The next URL always points to a page with newer transcripts."
2253
- )
2085
+ filter_profanity: zod3.boolean().nullish().describe(
2086
+ "Whether [Profanity Filtering](https://www.assemblyai.com/docs/profanity-filtering) is enabled, either true or false"
2087
+ ),
2088
+ format_text: zod3.boolean().nullish().describe(
2089
+ "Whether [Text Formatting](https://www.assemblyai.com/docs/pre-recorded-audio) is enabled, either true or false"
2090
+ ),
2091
+ iab_categories: zod3.boolean().nullish().describe(
2092
+ "Whether [Topic Detection](https://www.assemblyai.com/docs/speech-understanding/topic-detection) is enabled, can be true or false"
2093
+ ),
2094
+ iab_categories_result: zod3.object({
2095
+ status: zod3.enum(["success", "unavailable"]).describe("Either success, or unavailable in the rare case that the model failed"),
2096
+ results: zod3.array(
2097
+ zod3.object({
2098
+ text: zod3.string().describe("The text in the transcript in which a detected topic occurs"),
2099
+ labels: zod3.array(
2100
+ zod3.object({
2101
+ relevance: zod3.number().describe("How relevant the detected topic is of a detected topic"),
2102
+ label: zod3.string().describe(
2103
+ "The IAB taxonomical label for the label of the detected topic, where > denotes supertopic/subtopic relationship"
2104
+ )
2105
+ })
2106
+ ).optional().describe("An array of detected topics in the text"),
2107
+ timestamp: zod3.object({
2108
+ start: zod3.number().describe("The start time in milliseconds"),
2109
+ end: zod3.number().describe("The end time in milliseconds")
2110
+ }).optional().describe("Timestamp containing a start and end property in milliseconds")
2111
+ }).describe("The result of the topic detection model")
2112
+ ).describe("An array of results for the Topic Detection model"),
2113
+ summary: zod3.record(zod3.string(), zod3.number()).describe("The overall relevance of topic to the entire audio file")
2254
2114
  }).describe(
2255
- "Details of the transcript page. Transcripts are sorted from newest to oldest. The previous URL always points to a page with older transcripts."
2115
+ "The result of the Topic Detection model, if it is enabled.\nSee [Topic Detection](https://www.assemblyai.com/docs/speech-understanding/topic-detection) for more information.\n"
2116
+ ).or(zod3.null()).optional().describe(
2117
+ "The result of the Topic Detection model, if it is enabled.\nSee [Topic Detection](https://www.assemblyai.com/docs/speech-understanding/topic-detection) for more information.\n"
2256
2118
  ),
2257
- transcripts: zod3.array(
2258
- zod3.object({
2259
- id: zod3.string().uuid().describe("The unique identifier for the transcript"),
2260
- resource_url: zod3.string().describe("The URL to retrieve the transcript"),
2261
- status: zod3.enum(["queued", "processing", "completed", "error"]).describe(
2262
- "The status of your transcript. Possible values are queued, processing, completed, or error."
2263
- ),
2264
- created: zod3.string().regex(listTranscriptsResponseTranscriptsItemCreatedRegExp).describe("The date and time the transcript was created"),
2265
- completed: zod3.string().regex(listTranscriptsResponseTranscriptsItemCompletedRegExp).nullable().describe("The date and time the transcript was completed"),
2266
- audio_url: zod3.string().describe("The URL to the audio file"),
2267
- error: zod3.string().nullable().describe("Error message of why the transcript failed")
2268
- })
2269
- ).describe("An array of transcripts")
2270
- }).describe(
2271
- "A list of transcripts. Transcripts are sorted from newest to oldest. The previous URL always points to a page with older transcripts."
2272
- );
2273
- var getTranscriptParams = zod3.object({
2274
- transcript_id: zod3.string().describe("ID of the transcript")
2275
- });
2276
- var getTranscriptResponseLanguageConfidenceThresholdMin = 0;
2277
- var getTranscriptResponseLanguageConfidenceThresholdMax = 1;
2278
- var getTranscriptResponseLanguageConfidenceMin = 0;
2279
- var getTranscriptResponseLanguageConfidenceMax = 1;
2280
- var getTranscriptResponseWordsItemConfidenceMin = 0;
2281
- var getTranscriptResponseWordsItemConfidenceMax = 1;
2282
- var getTranscriptResponseUtterancesItemConfidenceMin = 0;
2283
- var getTranscriptResponseUtterancesItemConfidenceMax = 1;
2284
- var getTranscriptResponseUtterancesItemWordsItemConfidenceMin = 0;
2285
- var getTranscriptResponseUtterancesItemWordsItemConfidenceMax = 1;
2286
- var getTranscriptResponseConfidenceMin = 0;
2287
- var getTranscriptResponseConfidenceMax = 1;
2288
- var getTranscriptResponseAutoHighlightsResultResultsItemRankMin = 0;
2289
- var getTranscriptResponseAutoHighlightsResultResultsItemRankMax = 1;
2290
- var getTranscriptResponseContentSafetyLabelsResultsItemLabelsItemConfidenceMin = 0;
2291
- var getTranscriptResponseContentSafetyLabelsResultsItemLabelsItemConfidenceMax = 1;
2292
- var getTranscriptResponseContentSafetyLabelsResultsItemLabelsItemSeverityMin = 0;
2293
- var getTranscriptResponseContentSafetyLabelsResultsItemLabelsItemSeverityMax = 1;
2294
- var getTranscriptResponseContentSafetyLabelsSummaryMinOne = 0;
2295
- var getTranscriptResponseContentSafetyLabelsSummaryMaxOne = 1;
2296
- var getTranscriptResponseContentSafetyLabelsSeverityScoreSummaryLowMin = 0;
2297
- var getTranscriptResponseContentSafetyLabelsSeverityScoreSummaryLowMax = 1;
2298
- var getTranscriptResponseContentSafetyLabelsSeverityScoreSummaryMediumMin = 0;
2299
- var getTranscriptResponseContentSafetyLabelsSeverityScoreSummaryMediumMax = 1;
2300
- var getTranscriptResponseContentSafetyLabelsSeverityScoreSummaryHighMin = 0;
2301
- var getTranscriptResponseContentSafetyLabelsSeverityScoreSummaryHighMax = 1;
2302
- var getTranscriptResponseIabCategoriesResultResultsItemLabelsItemRelevanceMin = 0;
2303
- var getTranscriptResponseIabCategoriesResultResultsItemLabelsItemRelevanceMax = 1;
2304
- var getTranscriptResponseIabCategoriesResultSummaryMinOne = 0;
2305
- var getTranscriptResponseIabCategoriesResultSummaryMaxOne = 1;
2306
- var getTranscriptResponseSentimentAnalysisResultsItemConfidenceMin = 0;
2307
- var getTranscriptResponseSentimentAnalysisResultsItemConfidenceMax = 1;
2308
- var getTranscriptResponseSpeechThresholdMin = 0;
2309
- var getTranscriptResponseSpeechThresholdMax = 1;
2310
- var getTranscriptResponse = zod3.object({
2311
2119
  id: zod3.string().uuid().describe("The unique identifier of your transcript"),
2312
- audio_url: zod3.string().describe("The URL of the media that was transcribed"),
2313
- status: zod3.enum(["queued", "processing", "completed", "error"]).describe(
2314
- "The status of your transcript. Possible values are queued, processing, completed, or error."
2120
+ keyterms_prompt: zod3.array(zod3.string()).optional().describe(
2121
+ "Improve accuracy with up to 200 (for Universal-2) or 1000 (for Universal-3 Pro) domain-specific words or phrases (maximum 6 words per phrase). See [Keyterms Prompting](https://www.assemblyai.com/docs/pre-recorded-audio/keyterms-prompting) for more details.\n"
2315
2122
  ),
2316
2123
  language_code: zod3.enum([
2317
2124
  "en",
@@ -2416,138 +2223,175 @@ var getTranscriptResponse = zod3.object({
2416
2223
  "cy",
2417
2224
  "yi",
2418
2225
  "yo"
2419
- ]).describe(
2420
- "The language of your audio file. Possible values are found in [Supported Languages](https://www.assemblyai.com/docs/concepts/supported-languages).\nThe default value is 'en_us'.\n"
2421
- ).or(zod3.string()).optional().describe(
2422
- "The language of your audio file.\nPossible values are found in [Supported Languages](https://www.assemblyai.com/docs/concepts/supported-languages).\nThe default value is 'en_us'.\n"
2423
- ),
2424
- language_detection: zod3.boolean().nullish().describe(
2425
- "Whether [Automatic language detection](https://www.assemblyai.com/docs/models/speech-recognition#automatic-language-detection) is enabled, either true or false"
2426
- ),
2427
- language_confidence_threshold: zod3.number().min(getTranscriptResponseLanguageConfidenceThresholdMin).max(getTranscriptResponseLanguageConfidenceThresholdMax).nullable().describe(
2428
- "The confidence threshold for the automatically detected language.\nAn error will be returned if the language confidence is below this threshold.\n"
2429
- ),
2430
- language_confidence: zod3.number().min(getTranscriptResponseLanguageConfidenceMin).max(getTranscriptResponseLanguageConfidenceMax).nullable().describe(
2431
- "The confidence score for the detected language, between 0.0 (low confidence) and 1.0 (high confidence)"
2432
- ),
2433
- speech_model: zod3.enum(["best", "slam-1", "universal"]).describe("The speech model to use for the transcription.").or(zod3.null()).describe(
2434
- "The speech model used for the transcription. When `null`, the default model is used."
2435
- ),
2436
- text: zod3.string().nullish().describe("The textual transcript of your media file"),
2437
- words: zod3.array(
2438
- zod3.object({
2439
- confidence: zod3.number().min(getTranscriptResponseWordsItemConfidenceMin).max(getTranscriptResponseWordsItemConfidenceMax).describe("The confidence score for the transcript of this word"),
2440
- start: zod3.number().describe("The starting time, in milliseconds, for the word"),
2441
- end: zod3.number().describe("The ending time, in milliseconds, for the word"),
2442
- text: zod3.string().describe("The text of the word"),
2443
- channel: zod3.string().nullish().describe(
2444
- "The channel of the word. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
2445
- ),
2446
- speaker: zod3.string().nullable().describe(
2447
- "The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/models/speaker-diarization) is enabled, else null"
2448
- )
2449
- })
2450
- ).nullish().describe(
2451
- "An array of temporally-sequential word objects, one for each word in the transcript.\nSee [Speech recognition](https://www.assemblyai.com/docs/models/speech-recognition) for more information.\n"
2452
- ),
2453
- utterances: zod3.array(
2454
- zod3.object({
2455
- confidence: zod3.number().min(getTranscriptResponseUtterancesItemConfidenceMin).max(getTranscriptResponseUtterancesItemConfidenceMax).describe("The confidence score for the transcript of this utterance"),
2456
- start: zod3.number().describe("The starting time, in milliseconds, of the utterance in the audio file"),
2457
- end: zod3.number().describe("The ending time, in milliseconds, of the utterance in the audio file"),
2458
- text: zod3.string().describe("The text for this utterance"),
2459
- words: zod3.array(
2460
- zod3.object({
2461
- confidence: zod3.number().min(getTranscriptResponseUtterancesItemWordsItemConfidenceMin).max(getTranscriptResponseUtterancesItemWordsItemConfidenceMax).describe("The confidence score for the transcript of this word"),
2462
- start: zod3.number().describe("The starting time, in milliseconds, for the word"),
2463
- end: zod3.number().describe("The ending time, in milliseconds, for the word"),
2464
- text: zod3.string().describe("The text of the word"),
2465
- channel: zod3.string().nullish().describe(
2466
- "The channel of the word. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
2467
- ),
2468
- speaker: zod3.string().nullable().describe(
2469
- "The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/models/speaker-diarization) is enabled, else null"
2470
- )
2471
- })
2472
- ).describe("The words in the utterance."),
2473
- channel: zod3.string().nullish().describe(
2474
- "The channel of this utterance. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
2475
- ),
2476
- speaker: zod3.string().describe(
2477
- 'The speaker of this utterance, where each speaker is assigned a sequential capital letter - e.g. "A" for Speaker A, "B" for Speaker B, etc.'
2478
- )
2479
- })
2480
- ).nullish().describe(
2481
- "When multichannel or speaker_labels is enabled, a list of turn-by-turn utterance objects.\nSee [Speaker diarization](https://www.assemblyai.com/docs/speech-to-text/speaker-diarization) and [Multichannel transcription](https://www.assemblyai.com/docs/speech-to-text/speech-recognition#multichannel-transcription) for more information.\n"
2482
- ),
2483
- confidence: zod3.number().min(getTranscriptResponseConfidenceMin).max(getTranscriptResponseConfidenceMax).nullish().describe(
2484
- "The confidence score for the transcript, between 0.0 (low confidence) and 1.0 (high confidence)"
2485
- ),
2486
- audio_duration: zod3.number().nullish().describe("The duration of this transcript object's media file, in seconds"),
2487
- punctuate: zod3.boolean().nullish().describe("Whether Automatic Punctuation is enabled, either true or false"),
2488
- format_text: zod3.boolean().nullish().describe("Whether Text Formatting is enabled, either true or false"),
2489
- disfluencies: zod3.boolean().nullish().describe('Transcribe Filler Words, like "umm", in your media file; can be true or false'),
2490
- multichannel: zod3.boolean().nullish().describe(
2491
- "Whether [Multichannel transcription](https://www.assemblyai.com/docs/models/speech-recognition#multichannel-transcription) was enabled in the transcription request, either true or false"
2492
- ),
2493
- audio_channels: zod3.number().optional().describe(
2494
- "The number of audio channels in the audio file. This is only present when multichannel is enabled."
2495
- ),
2496
- webhook_url: zod3.string().nullish().describe(
2497
- "The URL to which we send webhook requests.\nWe sends two different types of webhook requests.\nOne request when a transcript is completed or failed, and one request when the redacted audio is ready if redact_pii_audio is enabled.\n"
2498
- ),
2499
- webhook_status_code: zod3.number().nullish().describe(
2500
- "The status code we received from your server when delivering the transcript completed or failed webhook request, if a webhook URL was provided"
2501
- ),
2502
- webhook_auth: zod3.boolean().describe("Whether webhook authentication details were provided"),
2503
- webhook_auth_header_name: zod3.string().nullish().describe(
2504
- "The header name to be sent with the transcript completed or failed webhook requests"
2505
- ),
2506
- speed_boost: zod3.boolean().nullish().describe("Whether speed boost is enabled"),
2507
- auto_highlights: zod3.boolean().describe("Whether Key Phrases is enabled, either true or false"),
2508
- auto_highlights_result: zod3.object({
2509
- status: zod3.enum(["success", "unavailable"]).describe("Either success, or unavailable in the rare case that the model failed"),
2510
- results: zod3.array(
2511
- zod3.object({
2512
- count: zod3.number().describe("The total number of times the key phrase appears in the audio file"),
2513
- rank: zod3.number().min(getTranscriptResponseAutoHighlightsResultResultsItemRankMin).max(getTranscriptResponseAutoHighlightsResultResultsItemRankMax).describe(
2514
- "The total relevancy to the overall audio file of this key phrase - a greater number means more relevant"
2515
- ),
2516
- text: zod3.string().describe("The text itself of the key phrase"),
2517
- timestamps: zod3.array(
2518
- zod3.object({
2519
- start: zod3.number().describe("The start time in milliseconds"),
2520
- end: zod3.number().describe("The end time in milliseconds")
2521
- }).describe("Timestamp containing a start and end property in milliseconds")
2522
- ).describe("The timestamp of the of the key phrase")
2523
- })
2524
- ).describe("A temporally-sequential array of Key Phrases")
2525
- }).describe(
2526
- "An array of results for the Key Phrases model, if it is enabled.\nSee [Key phrases](https://www.assemblyai.com/docs/models/key-phrases) for more information.\n"
2527
- ).or(zod3.null()).optional().describe(
2528
- "An array of results for the Key Phrases model, if it is enabled.\nSee [Key Phrases](https://www.assemblyai.com/docs/models/key-phrases) for more information.\n"
2529
- ),
2530
- audio_start_from: zod3.number().nullish().describe(
2531
- "The point in time, in milliseconds, in the file at which the transcription was started"
2532
- ),
2533
- audio_end_at: zod3.number().nullish().describe(
2534
- "The point in time, in milliseconds, in the file at which the transcription was terminated"
2535
- ),
2536
- word_boost: zod3.array(zod3.string()).optional().describe("The list of custom vocabulary to boost transcription probability for"),
2537
- boost_param: zod3.string().nullish().describe("The word boost parameter value"),
2538
- filter_profanity: zod3.boolean().nullish().describe(
2539
- "Whether [Profanity Filtering](https://www.assemblyai.com/docs/models/speech-recognition#profanity-filtering) is enabled, either true or false"
2540
- ),
2541
- redact_pii: zod3.boolean().describe(
2542
- "Whether [PII Redaction](https://www.assemblyai.com/docs/models/pii-redaction) is enabled, either true or false"
2226
+ ]).optional().describe(
2227
+ "The language of your audio file. Possible values are found in [Supported Languages](https://www.assemblyai.com/docs/pre-recorded-audio/supported-languages).\nThe default value is 'en_us'.\n"
2543
2228
  ),
2544
- redact_pii_audio: zod3.boolean().nullish().describe(
2545
- "Whether a redacted version of the audio file was generated,\neither true or false. See [PII redaction](https://www.assemblyai.com/docs/models/pii-redaction) for more information.\n"
2229
+ language_codes: zod3.array(
2230
+ zod3.enum([
2231
+ "en",
2232
+ "en_au",
2233
+ "en_uk",
2234
+ "en_us",
2235
+ "es",
2236
+ "fr",
2237
+ "de",
2238
+ "it",
2239
+ "pt",
2240
+ "nl",
2241
+ "af",
2242
+ "sq",
2243
+ "am",
2244
+ "ar",
2245
+ "hy",
2246
+ "as",
2247
+ "az",
2248
+ "ba",
2249
+ "eu",
2250
+ "be",
2251
+ "bn",
2252
+ "bs",
2253
+ "br",
2254
+ "bg",
2255
+ "my",
2256
+ "ca",
2257
+ "zh",
2258
+ "hr",
2259
+ "cs",
2260
+ "da",
2261
+ "et",
2262
+ "fo",
2263
+ "fi",
2264
+ "gl",
2265
+ "ka",
2266
+ "el",
2267
+ "gu",
2268
+ "ht",
2269
+ "ha",
2270
+ "haw",
2271
+ "he",
2272
+ "hi",
2273
+ "hu",
2274
+ "is",
2275
+ "id",
2276
+ "ja",
2277
+ "jw",
2278
+ "kn",
2279
+ "kk",
2280
+ "km",
2281
+ "ko",
2282
+ "lo",
2283
+ "la",
2284
+ "lv",
2285
+ "ln",
2286
+ "lt",
2287
+ "lb",
2288
+ "mk",
2289
+ "mg",
2290
+ "ms",
2291
+ "ml",
2292
+ "mt",
2293
+ "mi",
2294
+ "mr",
2295
+ "mn",
2296
+ "ne",
2297
+ "no",
2298
+ "nn",
2299
+ "oc",
2300
+ "pa",
2301
+ "ps",
2302
+ "fa",
2303
+ "pl",
2304
+ "ro",
2305
+ "ru",
2306
+ "sa",
2307
+ "sr",
2308
+ "sn",
2309
+ "sd",
2310
+ "si",
2311
+ "sk",
2312
+ "sl",
2313
+ "so",
2314
+ "su",
2315
+ "sw",
2316
+ "sv",
2317
+ "tl",
2318
+ "tg",
2319
+ "ta",
2320
+ "tt",
2321
+ "te",
2322
+ "th",
2323
+ "bo",
2324
+ "tr",
2325
+ "tk",
2326
+ "uk",
2327
+ "ur",
2328
+ "uz",
2329
+ "vi",
2330
+ "cy",
2331
+ "yi",
2332
+ "yo"
2333
+ ]).describe(
2334
+ "The language of your audio file. Possible values are found in [Supported Languages](https://www.assemblyai.com/docs/pre-recorded-audio/supported-languages).\nThe default value is 'en_us'.\n"
2335
+ )
2336
+ ).nullish().describe(
2337
+ "The language codes of your audio file. Used for [Code switching](/docs/speech-to-text/pre-recorded-audio/code-switching)\nOne of the values specified must be `en`.\n"
2338
+ ),
2339
+ language_confidence: zod3.number().nullable().describe(
2340
+ "The confidence score for the detected language, between 0.0 (low confidence) and 1.0 (high confidence). See [Automatic Language Detection](https://www.assemblyai.com/docs/pre-recorded-audio/language-detection) for more details."
2341
+ ),
2342
+ language_confidence_threshold: zod3.number().nullable().describe(
2343
+ "The confidence threshold for the automatically detected language.\nAn error will be returned if the language confidence is below this threshold.\nSee [Automatic Language Detection](https://www.assemblyai.com/docs/pre-recorded-audio/language-detection) for more details.\n"
2344
+ ),
2345
+ language_detection: zod3.boolean().nullish().describe(
2346
+ "Whether [Automatic language detection](/docs/pre-recorded-audio/automatic-language-detection) is enabled, either true or false"
2347
+ ),
2348
+ language_detection_options: zod3.object({
2349
+ expected_languages: zod3.array(zod3.string()).optional().describe(
2350
+ 'List of languages expected in the audio file. Defaults to `["all"]` when unspecified. See [Automatic Language Detection](https://www.assemblyai.com/docs/pre-recorded-audio/language-detection) for more details.'
2351
+ ),
2352
+ fallback_language: zod3.string().default(createTranscriptResponseLanguageDetectionOptionsFallbackLanguageDefault).describe(
2353
+ 'If the detected language of the audio file is not in the list of expected languages, the `fallback_language` is used. Specify `["auto"]` to let our model choose the fallback language from `expected_languages` with the highest confidence score. See [Automatic Language Detection](https://www.assemblyai.com/docs/pre-recorded-audio/language-detection) for more details.\n'
2354
+ ),
2355
+ code_switching: zod3.boolean().optional().describe(
2356
+ "Whether [code switching](/docs/speech-to-text/pre-recorded-audio/code-switching) should be detected.\n"
2357
+ ),
2358
+ code_switching_confidence_threshold: zod3.number().default(
2359
+ createTranscriptResponseLanguageDetectionOptionsCodeSwitchingConfidenceThresholdDefault
2360
+ ).describe(
2361
+ "The confidence threshold for [code switching](/docs/speech-to-text/pre-recorded-audio/code-switching) detection. If the code switching confidence is below this threshold, the transcript will be processed in the language with the highest `language_detection_confidence` score.\n"
2362
+ )
2363
+ }).optional().describe(
2364
+ "Specify options for [Automatic Language Detection](https://www.assemblyai.com/docs/pre-recorded-audio/language-detection)."
2365
+ ),
2366
+ multichannel: zod3.boolean().nullish().describe(
2367
+ "Whether [Multichannel transcription](https://www.assemblyai.com/docs/pre-recorded-audio/multichannel) was enabled in the transcription request, either true or false"
2368
+ ),
2369
+ prompt: zod3.string().optional().describe(
2370
+ "Provide natural language prompting of up to 1,500 words of contextual information to the model. See the [Prompting Guide](https://www.assemblyai.com/docs/pre-recorded-audio/prompting) for best practices.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n"
2371
+ ),
2372
+ punctuate: zod3.boolean().nullish().describe(
2373
+ "Whether [Automatic Punctuation](https://www.assemblyai.com/docs/pre-recorded-audio) is enabled, either true or false"
2374
+ ),
2375
+ redact_pii: zod3.boolean().describe(
2376
+ "Whether [PII Redaction](https://www.assemblyai.com/docs/pii-redaction) is enabled, either true or false"
2377
+ ),
2378
+ redact_pii_audio: zod3.boolean().nullish().describe(
2379
+ "Whether a redacted version of the audio file was generated,\neither true or false. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction#request-for-redacted-audio) for more information.\n"
2380
+ ),
2381
+ redact_pii_audio_options: zod3.object({
2382
+ return_redacted_no_speech_audio: zod3.boolean().optional().describe(
2383
+ "By default, audio redaction provides redacted audio URLs only when speech is detected. However, if your use-case specifically requires redacted audio files even for silent audio files without any dialogue, you can opt to receive these URLs by setting this parameter to `true`."
2384
+ ),
2385
+ override_audio_redaction_method: zod3.enum(["silence"]).optional().describe(
2386
+ "Specify the method used to redact audio. By default, redacted audio uses a beep sound. Set to `silence` to replace PII with silence instead of a beep."
2387
+ )
2388
+ }).optional().describe(
2389
+ "The options for PII-redacted audio, if redact_pii_audio is enabled.\nSee [PII redaction](https://www.assemblyai.com/docs/pii-redaction#request-for-redacted-audio) for more information.\n"
2546
2390
  ),
2547
2391
  redact_pii_audio_quality: zod3.enum(["mp3", "wav"]).describe(
2548
- "Controls the filetype of the audio created by redact_pii_audio. Currently supports mp3 (default) and wav. See [PII redaction](https://www.assemblyai.com/docs/models/pii-redaction) for more details."
2392
+ "Controls the filetype of the audio created by redact_pii_audio. Currently supports mp3 (default) and wav. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction#request-for-redacted-audio) for more details."
2549
2393
  ).or(zod3.null()).optional().describe(
2550
- "The audio quality of the PII-redacted audio file, if redact_pii_audio is enabled.\nSee [PII redaction](https://www.assemblyai.com/docs/models/pii-redaction) for more information.\n"
2394
+ "The audio quality of the PII-redacted audio file, if redact_pii_audio is enabled.\nSee [PII redaction](https://www.assemblyai.com/docs/pii-redaction#request-for-redacted-audio) for more information.\n"
2551
2395
  ),
2552
2396
  redact_pii_policies: zod3.array(
2553
2397
  zod3.enum([
@@ -2597,146 +2441,13 @@ var getTranscriptResponse = zod3.object({
2597
2441
  "zodiac_sign"
2598
2442
  ]).describe("The type of PII to redact")
2599
2443
  ).nullish().describe(
2600
- "The list of PII Redaction policies that were enabled, if PII Redaction is enabled.\nSee [PII redaction](https://www.assemblyai.com/docs/models/pii-redaction) for more information.\n"
2444
+ "The list of PII Redaction policies that were enabled, if PII Redaction is enabled.\nSee [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
2601
2445
  ),
2602
2446
  redact_pii_sub: zod3.enum(["entity_name", "hash"]).optional().describe(
2603
- 'The replacement logic for detected PII, can be "entity_name" or "hash". See [PII redaction](https://www.assemblyai.com/docs/models/pii-redaction) for more details.'
2604
- ),
2605
- speaker_labels: zod3.boolean().nullish().describe(
2606
- "Whether [Speaker diarization](https://www.assemblyai.com/docs/models/speaker-diarization) is enabled, can be true or false"
2607
- ),
2608
- speakers_expected: zod3.number().nullish().describe(
2609
- "Tell the speaker label model how many speakers it should attempt to identify. See [Speaker diarization](https://www.assemblyai.com/docs/models/speaker-diarization) for more details."
2610
- ),
2611
- content_safety: zod3.boolean().nullish().describe(
2612
- "Whether [Content Moderation](https://www.assemblyai.com/docs/models/content-moderation) is enabled, can be true or false"
2613
- ),
2614
- content_safety_labels: zod3.object({
2615
- status: zod3.enum(["success", "unavailable"]).describe("Either success, or unavailable in the rare case that the model failed"),
2616
- results: zod3.array(
2617
- zod3.object({
2618
- text: zod3.string().describe("The transcript of the section flagged by the Content Moderation model"),
2619
- labels: zod3.array(
2620
- zod3.object({
2621
- label: zod3.string().describe("The label of the sensitive topic"),
2622
- confidence: zod3.number().min(
2623
- getTranscriptResponseContentSafetyLabelsResultsItemLabelsItemConfidenceMin
2624
- ).max(
2625
- getTranscriptResponseContentSafetyLabelsResultsItemLabelsItemConfidenceMax
2626
- ).describe("The confidence score for the topic being discussed, from 0 to 1"),
2627
- severity: zod3.number().min(getTranscriptResponseContentSafetyLabelsResultsItemLabelsItemSeverityMin).max(getTranscriptResponseContentSafetyLabelsResultsItemLabelsItemSeverityMax).describe("How severely the topic is discussed in the section, from 0 to 1")
2628
- })
2629
- ).describe(
2630
- "An array of safety labels, one per sensitive topic that was detected in the section"
2631
- ),
2632
- sentences_idx_start: zod3.number().describe("The sentence index at which the section begins"),
2633
- sentences_idx_end: zod3.number().describe("The sentence index at which the section ends"),
2634
- timestamp: zod3.object({
2635
- start: zod3.number().describe("The start time in milliseconds"),
2636
- end: zod3.number().describe("The end time in milliseconds")
2637
- }).describe("Timestamp containing a start and end property in milliseconds")
2638
- })
2639
- ).describe("An array of results for the Content Moderation model"),
2640
- summary: zod3.record(
2641
- zod3.string(),
2642
- zod3.number().min(getTranscriptResponseContentSafetyLabelsSummaryMinOne).max(getTranscriptResponseContentSafetyLabelsSummaryMaxOne).describe(
2643
- 'A confidence score for the presence of the sensitive topic "topic" across the entire audio file'
2644
- )
2645
- ).describe(
2646
- "A summary of the Content Moderation confidence results for the entire audio file"
2647
- ),
2648
- severity_score_summary: zod3.record(
2649
- zod3.string(),
2650
- zod3.object({
2651
- low: zod3.number().min(getTranscriptResponseContentSafetyLabelsSeverityScoreSummaryLowMin).max(getTranscriptResponseContentSafetyLabelsSeverityScoreSummaryLowMax),
2652
- medium: zod3.number().min(getTranscriptResponseContentSafetyLabelsSeverityScoreSummaryMediumMin).max(getTranscriptResponseContentSafetyLabelsSeverityScoreSummaryMediumMax),
2653
- high: zod3.number().min(getTranscriptResponseContentSafetyLabelsSeverityScoreSummaryHighMin).max(getTranscriptResponseContentSafetyLabelsSeverityScoreSummaryHighMax)
2654
- })
2655
- ).describe(
2656
- "A summary of the Content Moderation severity results for the entire audio file"
2657
- )
2658
- }).describe(
2659
- "An array of results for the Content Moderation model, if it is enabled.\nSee [Content moderation](https://www.assemblyai.com/docs/models/content-moderation) for more information.\n"
2660
- ).or(zod3.null()).optional().describe(
2661
- "An array of results for the Content Moderation model, if it is enabled.\nSee [Content moderation](https://www.assemblyai.com/docs/models/content-moderation) for more information.\n"
2662
- ),
2663
- iab_categories: zod3.boolean().nullish().describe(
2664
- "Whether [Topic Detection](https://www.assemblyai.com/docs/models/topic-detection) is enabled, can be true or false"
2665
- ),
2666
- iab_categories_result: zod3.object({
2667
- status: zod3.enum(["success", "unavailable"]).describe("Either success, or unavailable in the rare case that the model failed"),
2668
- results: zod3.array(
2669
- zod3.object({
2670
- text: zod3.string().describe("The text in the transcript in which a detected topic occurs"),
2671
- labels: zod3.array(
2672
- zod3.object({
2673
- relevance: zod3.number().min(
2674
- getTranscriptResponseIabCategoriesResultResultsItemLabelsItemRelevanceMin
2675
- ).max(
2676
- getTranscriptResponseIabCategoriesResultResultsItemLabelsItemRelevanceMax
2677
- ).describe("How relevant the detected topic is of a detected topic"),
2678
- label: zod3.string().describe(
2679
- "The IAB taxonomical label for the label of the detected topic, where > denotes supertopic/subtopic relationship"
2680
- )
2681
- })
2682
- ).optional().describe("An array of detected topics in the text"),
2683
- timestamp: zod3.object({
2684
- start: zod3.number().describe("The start time in milliseconds"),
2685
- end: zod3.number().describe("The end time in milliseconds")
2686
- }).optional().describe("Timestamp containing a start and end property in milliseconds")
2687
- }).describe("The result of the topic detection model")
2688
- ).describe("An array of results for the Topic Detection model"),
2689
- summary: zod3.record(
2690
- zod3.string(),
2691
- zod3.number().min(getTranscriptResponseIabCategoriesResultSummaryMinOne).max(getTranscriptResponseIabCategoriesResultSummaryMaxOne)
2692
- ).describe("The overall relevance of topic to the entire audio file")
2693
- }).describe(
2694
- "The result of the Topic Detection model, if it is enabled.\nSee [Topic Detection](https://www.assemblyai.com/docs/models/topic-detection) for more information.\n"
2695
- ).or(zod3.null()).optional().describe(
2696
- "The result of the Topic Detection model, if it is enabled.\nSee [Topic Detection](https://www.assemblyai.com/docs/models/topic-detection) for more information.\n"
2697
- ),
2698
- custom_spelling: zod3.array(
2699
- zod3.object({
2700
- from: zod3.array(zod3.string().describe("Word or phrase to replace")).describe("Words or phrases to replace"),
2701
- to: zod3.string().describe("Word to replace with")
2702
- }).describe(
2703
- "Object containing words or phrases to replace, and the word or phrase to replace with"
2704
- )
2705
- ).nullish().describe("Customize how words are spelled and formatted using to and from values"),
2706
- keyterms_prompt: zod3.array(zod3.string()).optional().describe(
2707
- "Improve accuracy with up to 1000 domain-specific words or phrases (maximum 6 words per phrase).\n"
2708
- ),
2709
- prompt: zod3.string().optional().describe("This parameter does not currently have any functionality attached to it."),
2710
- auto_chapters: zod3.boolean().nullish().describe(
2711
- "Whether [Auto Chapters](https://www.assemblyai.com/docs/models/auto-chapters) is enabled, can be true or false"
2712
- ),
2713
- chapters: zod3.array(
2714
- zod3.object({
2715
- gist: zod3.string().describe(
2716
- "An ultra-short summary (just a few words) of the content spoken in the chapter"
2717
- ),
2718
- headline: zod3.string().describe("A single sentence summary of the content spoken during the chapter"),
2719
- summary: zod3.string().describe("A one paragraph summary of the content spoken during the chapter"),
2720
- start: zod3.number().describe("The starting time, in milliseconds, for the chapter"),
2721
- end: zod3.number().describe("The starting time, in milliseconds, for the chapter")
2722
- }).describe("Chapter of the audio file")
2723
- ).nullish().describe("An array of temporally sequential chapters for the audio file"),
2724
- summarization: zod3.boolean().describe(
2725
- "Whether [Summarization](https://www.assemblyai.com/docs/models/summarization) is enabled, either true or false"
2726
- ),
2727
- summary_type: zod3.string().nullish().describe(
2728
- "The type of summary generated, if [Summarization](https://www.assemblyai.com/docs/models/summarization) is enabled"
2729
- ),
2730
- summary_model: zod3.string().nullish().describe(
2731
- "The Summarization model used to generate the summary,\nif [Summarization](https://www.assemblyai.com/docs/models/summarization) is enabled\n"
2732
- ),
2733
- summary: zod3.string().nullish().describe(
2734
- "The generated summary of the media file, if [Summarization](https://www.assemblyai.com/docs/models/summarization) is enabled"
2447
+ "The replacement logic for detected PII, can be `entity_name` or `hash`. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more details."
2735
2448
  ),
2736
- custom_topics: zod3.boolean().nullish().describe("Whether custom topics is enabled, either true or false"),
2737
- topics: zod3.array(zod3.string()).optional().describe("The list of custom topics provided if custom topics is enabled"),
2738
2449
  sentiment_analysis: zod3.boolean().nullish().describe(
2739
- "Whether [Sentiment Analysis](https://www.assemblyai.com/docs/models/sentiment-analysis) is enabled, can be true or false"
2450
+ "Whether [Sentiment Analysis](https://www.assemblyai.com/docs/speech-understanding/sentiment-analysis) is enabled, can be true or false"
2740
2451
  ),
2741
2452
  sentiment_analysis_results: zod3.array(
2742
2453
  zod3.object({
@@ -2744,280 +2455,158 @@ var getTranscriptResponse = zod3.object({
2744
2455
  start: zod3.number().describe("The starting time, in milliseconds, of the sentence"),
2745
2456
  end: zod3.number().describe("The ending time, in milliseconds, of the sentence"),
2746
2457
  sentiment: zod3.enum(["POSITIVE", "NEUTRAL", "NEGATIVE"]),
2747
- confidence: zod3.number().min(getTranscriptResponseSentimentAnalysisResultsItemConfidenceMin).max(getTranscriptResponseSentimentAnalysisResultsItemConfidenceMax).describe(
2458
+ confidence: zod3.number().describe(
2748
2459
  "The confidence score for the detected sentiment of the sentence, from 0 to 1"
2749
2460
  ),
2750
2461
  channel: zod3.string().nullish().describe(
2751
2462
  "The channel of this utterance. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
2752
2463
  ),
2753
2464
  speaker: zod3.string().nullable().describe(
2754
- "The speaker of the sentence if [Speaker Diarization](https://www.assemblyai.com/docs/models/speaker-diarization) is enabled, else null"
2465
+ "The speaker of the sentence if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/speaker-diarization) is enabled, else null"
2755
2466
  )
2756
2467
  }).describe("The result of the Sentiment Analysis model")
2757
2468
  ).nullish().describe(
2758
- "An array of results for the Sentiment Analysis model, if it is enabled.\nSee [Sentiment Analysis](https://www.assemblyai.com/docs/models/sentiment-analysis) for more information.\n"
2469
+ "An array of results for the Sentiment Analysis model, if it is enabled.\nSee [Sentiment Analysis](https://www.assemblyai.com/docs/speech-understanding/sentiment-analysis) for more information.\n"
2759
2470
  ),
2760
- entity_detection: zod3.boolean().nullish().describe(
2761
- "Whether [Entity Detection](https://www.assemblyai.com/docs/models/entity-detection) is enabled, can be true or false"
2471
+ speaker_labels: zod3.boolean().nullish().describe(
2472
+ "Whether [Speaker diarization](https://www.assemblyai.com/docs/pre-recorded-audio/speaker-diarization) is enabled, can be true or false"
2762
2473
  ),
2763
- entities: zod3.array(
2764
- zod3.object({
2765
- entity_type: zod3.enum([
2766
- "account_number",
2767
- "banking_information",
2768
- "blood_type",
2769
- "credit_card_cvv",
2770
- "credit_card_expiration",
2771
- "credit_card_number",
2772
- "date",
2773
- "date_interval",
2774
- "date_of_birth",
2775
- "drivers_license",
2776
- "drug",
2777
- "duration",
2778
- "email_address",
2779
- "event",
2780
- "filename",
2781
- "gender_sexuality",
2782
- "healthcare_number",
2783
- "injury",
2784
- "ip_address",
2785
- "language",
2786
- "location",
2787
- "marital_status",
2788
- "medical_condition",
2789
- "medical_process",
2790
- "money_amount",
2791
- "nationality",
2792
- "number_sequence",
2793
- "occupation",
2794
- "organization",
2795
- "passport_number",
2796
- "password",
2797
- "person_age",
2798
- "person_name",
2799
- "phone_number",
2800
- "physical_attribute",
2801
- "political_affiliation",
2802
- "religion",
2803
- "statistics",
2804
- "time",
2805
- "url",
2806
- "us_social_security_number",
2807
- "username",
2808
- "vehicle_id",
2809
- "zodiac_sign"
2810
- ]).describe("The type of entity for the detected entity"),
2811
- text: zod3.string().describe("The text for the detected entity"),
2812
- start: zod3.number().describe(
2813
- "The starting time, in milliseconds, at which the detected entity appears in the audio file"
2814
- ),
2815
- end: zod3.number().describe(
2816
- "The ending time, in milliseconds, for the detected entity in the audio file"
2817
- )
2818
- }).describe("A detected entity")
2474
+ speakers_expected: zod3.number().nullish().describe(
2475
+ "Tell the speaker label model how many speakers it should attempt to identify. See [Set number of speakers expected](https://www.assemblyai.com/docs/pre-recorded-audio/speaker-diarization#set-number-of-speakers-expected) for more details."
2476
+ ),
2477
+ speech_model_used: zod3.string().optional().describe(
2478
+ "The speech model to use for the transcription. See [Model Selection](https://www.assemblyai.com/docs/pre-recorded-audio/select-the-speech-model) for available models."
2479
+ ),
2480
+ speech_models: zod3.array(
2481
+ zod3.string().describe(
2482
+ "The speech model to use for the transcription. See [Model Selection](https://www.assemblyai.com/docs/pre-recorded-audio/select-the-speech-model) for available models."
2483
+ )
2819
2484
  ).nullish().describe(
2820
- "An array of results for the Entity Detection model, if it is enabled.\nSee [Entity detection](https://www.assemblyai.com/docs/models/entity-detection) for more information.\n"
2485
+ "List multiple speech models in priority order, allowing our system to automatically route your audio to the best available option. See [Model Selection](https://www.assemblyai.com/docs/pre-recorded-audio/select-the-speech-model) for available models and routing behavior.\n"
2821
2486
  ),
2822
- speech_threshold: zod3.number().min(getTranscriptResponseSpeechThresholdMin).max(getTranscriptResponseSpeechThresholdMax).nullish().describe(
2823
- "Defaults to null. Reject audio files that contain less than this fraction of speech.\nValid values are in the range [0, 1] inclusive.\n"
2487
+ speech_threshold: zod3.number().nullish().describe(
2488
+ "Defaults to null. Reject audio files that contain less than this fraction of speech.\nValid values are in the range [0, 1] inclusive. See [Speech Threshold](https://www.assemblyai.com/docs/speech-threshold) for more details.\n"
2824
2489
  ),
2825
- throttled: zod3.boolean().nullish().describe(
2826
- "True while a request is throttled and false when a request is no longer throttled"
2490
+ speech_understanding: zod3.object({
2491
+ request: zod3.object({
2492
+ translation: zod3.object({
2493
+ target_languages: zod3.array(zod3.string()).describe(
2494
+ 'List of target language codes (e.g., `["es", "de"]`). See [Translation](https://www.assemblyai.com/docs/speech-understanding/translation) for supported languages.'
2495
+ ),
2496
+ formal: zod3.boolean().default(createTranscriptResponseSpeechUnderstandingRequestTranslationFormalDefault).describe(
2497
+ "Use formal language style. See [Translation](https://www.assemblyai.com/docs/speech-understanding/translation) for more details."
2498
+ ),
2499
+ match_original_utterance: zod3.boolean().optional().describe(
2500
+ "When enabled with Speaker Labels, returns translated text in the utterances array. Each utterance will include a `translated_texts` key containing translations for each target language."
2501
+ )
2502
+ })
2503
+ }).describe(
2504
+ "Request body for [Translation](https://www.assemblyai.com/docs/speech-understanding/translation)."
2505
+ ).or(
2506
+ zod3.object({
2507
+ speaker_identification: zod3.object({
2508
+ speaker_type: zod3.enum(["role", "name"]).describe(
2509
+ "Type of speaker identification. See [Speaker Identification](https://www.assemblyai.com/docs/speech-understanding/speaker-identification) for details on each type."
2510
+ ),
2511
+ known_values: zod3.array(zod3.string()).optional().describe(
2512
+ 'Required if speaker_type is "role". Each value must be 35 characters or less.'
2513
+ ),
2514
+ speakers: zod3.array(
2515
+ zod3.object({
2516
+ role: zod3.string().optional().describe(
2517
+ 'The role of the speaker. Required when `speaker_type` is "role".'
2518
+ ),
2519
+ name: zod3.string().optional().describe(
2520
+ 'The name of the speaker. Required when `speaker_type` is "name".'
2521
+ ),
2522
+ description: zod3.string().optional().describe(
2523
+ "A description of the speaker to help the model identify them based on conversational context."
2524
+ )
2525
+ })
2526
+ ).optional().describe(
2527
+ "An array of speaker objects with metadata to improve identification accuracy. Each object should include a `role` or `name` (depending on `speaker_type`) and an optional `description` to help the model identify the speaker. You can also include any additional custom properties (e.g., `company`, `title`) to provide more context. Use this as an alternative to `known_values` when you want to provide additional context about each speaker."
2528
+ )
2529
+ })
2530
+ }).describe(
2531
+ "Request body for [Speaker Identification](https://www.assemblyai.com/docs/speech-understanding/speaker-identification)."
2532
+ )
2533
+ ).or(
2534
+ zod3.object({
2535
+ custom_formatting: zod3.object({
2536
+ date: zod3.string().optional().describe(
2537
+ 'Date format pattern (e.g., `"mm/dd/yyyy"`). See [Custom Formatting](https://www.assemblyai.com/docs/speech-understanding/custom-formatting) for more details.'
2538
+ ),
2539
+ phone_number: zod3.string().optional().describe(
2540
+ 'Phone number format pattern (e.g., `"(xxx)xxx-xxxx"`). See [Custom Formatting](https://www.assemblyai.com/docs/speech-understanding/custom-formatting) for more details.'
2541
+ ),
2542
+ email: zod3.string().optional().describe(
2543
+ 'Email format pattern (e.g., `"username@domain.com"`). See [Custom Formatting](https://www.assemblyai.com/docs/speech-understanding/custom-formatting) for more details.'
2544
+ )
2545
+ })
2546
+ }).describe(
2547
+ "Request body for [Custom Formatting](https://www.assemblyai.com/docs/speech-understanding/custom-formatting)."
2548
+ )
2549
+ ).optional(),
2550
+ response: zod3.object({
2551
+ translation: zod3.object({
2552
+ status: zod3.string().optional()
2553
+ }).optional()
2554
+ }).or(
2555
+ zod3.object({
2556
+ speaker_identification: zod3.object({
2557
+ mapping: zod3.record(zod3.string(), zod3.string()).optional().describe(
2558
+ 'A mapping of the original generic speaker labels (e.g., "A", "B") to the identified speaker names or roles.'
2559
+ ),
2560
+ status: zod3.string().optional()
2561
+ }).optional()
2562
+ })
2563
+ ).or(
2564
+ zod3.object({
2565
+ custom_formatting: zod3.object({
2566
+ mapping: zod3.record(zod3.string(), zod3.string()).optional(),
2567
+ formatted_text: zod3.string().optional()
2568
+ }).optional()
2569
+ })
2570
+ ).optional()
2571
+ }).optional().describe(
2572
+ "Speech understanding tasks like [Translation](https://www.assemblyai.com/docs/speech-understanding/translation), [Speaker Identification](https://www.assemblyai.com/docs/speech-understanding/speaker-identification), and [Custom Formatting](https://www.assemblyai.com/docs/speech-understanding/custom-formatting). See the task-specific docs for available options and configuration.\n"
2827
2573
  ),
2828
- error: zod3.string().optional().describe("Error message of why the transcript failed"),
2829
- language_model: zod3.string().describe("The language model that was used for the transcript"),
2830
- acoustic_model: zod3.string().describe("The acoustic model that was used for the transcript")
2831
- }).describe("A transcript object");
2832
- var deleteTranscriptParams = zod3.object({
2833
- transcript_id: zod3.string().describe("ID of the transcript")
2834
- });
2835
- var deleteTranscriptResponseLanguageConfidenceThresholdMin = 0;
2836
- var deleteTranscriptResponseLanguageConfidenceThresholdMax = 1;
2837
- var deleteTranscriptResponseLanguageConfidenceMin = 0;
2838
- var deleteTranscriptResponseLanguageConfidenceMax = 1;
2839
- var deleteTranscriptResponseWordsItemConfidenceMin = 0;
2840
- var deleteTranscriptResponseWordsItemConfidenceMax = 1;
2841
- var deleteTranscriptResponseUtterancesItemConfidenceMin = 0;
2842
- var deleteTranscriptResponseUtterancesItemConfidenceMax = 1;
2843
- var deleteTranscriptResponseUtterancesItemWordsItemConfidenceMin = 0;
2844
- var deleteTranscriptResponseUtterancesItemWordsItemConfidenceMax = 1;
2845
- var deleteTranscriptResponseConfidenceMin = 0;
2846
- var deleteTranscriptResponseConfidenceMax = 1;
2847
- var deleteTranscriptResponseAutoHighlightsResultResultsItemRankMin = 0;
2848
- var deleteTranscriptResponseAutoHighlightsResultResultsItemRankMax = 1;
2849
- var deleteTranscriptResponseContentSafetyLabelsResultsItemLabelsItemConfidenceMin = 0;
2850
- var deleteTranscriptResponseContentSafetyLabelsResultsItemLabelsItemConfidenceMax = 1;
2851
- var deleteTranscriptResponseContentSafetyLabelsResultsItemLabelsItemSeverityMin = 0;
2852
- var deleteTranscriptResponseContentSafetyLabelsResultsItemLabelsItemSeverityMax = 1;
2853
- var deleteTranscriptResponseContentSafetyLabelsSummaryMinOne = 0;
2854
- var deleteTranscriptResponseContentSafetyLabelsSummaryMaxOne = 1;
2855
- var deleteTranscriptResponseContentSafetyLabelsSeverityScoreSummaryLowMin = 0;
2856
- var deleteTranscriptResponseContentSafetyLabelsSeverityScoreSummaryLowMax = 1;
2857
- var deleteTranscriptResponseContentSafetyLabelsSeverityScoreSummaryMediumMin = 0;
2858
- var deleteTranscriptResponseContentSafetyLabelsSeverityScoreSummaryMediumMax = 1;
2859
- var deleteTranscriptResponseContentSafetyLabelsSeverityScoreSummaryHighMin = 0;
2860
- var deleteTranscriptResponseContentSafetyLabelsSeverityScoreSummaryHighMax = 1;
2861
- var deleteTranscriptResponseIabCategoriesResultResultsItemLabelsItemRelevanceMin = 0;
2862
- var deleteTranscriptResponseIabCategoriesResultResultsItemLabelsItemRelevanceMax = 1;
2863
- var deleteTranscriptResponseIabCategoriesResultSummaryMinOne = 0;
2864
- var deleteTranscriptResponseIabCategoriesResultSummaryMaxOne = 1;
2865
- var deleteTranscriptResponseSentimentAnalysisResultsItemConfidenceMin = 0;
2866
- var deleteTranscriptResponseSentimentAnalysisResultsItemConfidenceMax = 1;
2867
- var deleteTranscriptResponseSpeechThresholdMin = 0;
2868
- var deleteTranscriptResponseSpeechThresholdMax = 1;
2869
- var deleteTranscriptResponse = zod3.object({
2870
- id: zod3.string().uuid().describe("The unique identifier of your transcript"),
2871
- audio_url: zod3.string().describe("The URL of the media that was transcribed"),
2872
2574
  status: zod3.enum(["queued", "processing", "completed", "error"]).describe(
2873
2575
  "The status of your transcript. Possible values are queued, processing, completed, or error."
2874
2576
  ),
2875
- language_code: zod3.enum([
2876
- "en",
2877
- "en_au",
2878
- "en_uk",
2879
- "en_us",
2880
- "es",
2881
- "fr",
2882
- "de",
2883
- "it",
2884
- "pt",
2885
- "nl",
2886
- "af",
2887
- "sq",
2888
- "am",
2889
- "ar",
2890
- "hy",
2891
- "as",
2892
- "az",
2893
- "ba",
2894
- "eu",
2895
- "be",
2896
- "bn",
2897
- "bs",
2898
- "br",
2899
- "bg",
2900
- "my",
2901
- "ca",
2902
- "zh",
2903
- "hr",
2904
- "cs",
2905
- "da",
2906
- "et",
2907
- "fo",
2908
- "fi",
2909
- "gl",
2910
- "ka",
2911
- "el",
2912
- "gu",
2913
- "ht",
2914
- "ha",
2915
- "haw",
2916
- "he",
2917
- "hi",
2918
- "hu",
2919
- "is",
2920
- "id",
2921
- "ja",
2922
- "jw",
2923
- "kn",
2924
- "kk",
2925
- "km",
2926
- "ko",
2927
- "lo",
2928
- "la",
2929
- "lv",
2930
- "ln",
2931
- "lt",
2932
- "lb",
2933
- "mk",
2934
- "mg",
2935
- "ms",
2936
- "ml",
2937
- "mt",
2938
- "mi",
2939
- "mr",
2940
- "mn",
2941
- "ne",
2942
- "no",
2943
- "nn",
2944
- "oc",
2945
- "pa",
2946
- "ps",
2947
- "fa",
2948
- "pl",
2949
- "ro",
2950
- "ru",
2951
- "sa",
2952
- "sr",
2953
- "sn",
2954
- "sd",
2955
- "si",
2956
- "sk",
2957
- "sl",
2958
- "so",
2959
- "su",
2960
- "sw",
2961
- "sv",
2962
- "tl",
2963
- "tg",
2964
- "ta",
2965
- "tt",
2966
- "te",
2967
- "th",
2968
- "bo",
2969
- "tr",
2970
- "tk",
2971
- "uk",
2972
- "ur",
2973
- "uz",
2974
- "vi",
2975
- "cy",
2976
- "yi",
2977
- "yo"
2978
- ]).describe(
2979
- "The language of your audio file. Possible values are found in [Supported Languages](https://www.assemblyai.com/docs/concepts/supported-languages).\nThe default value is 'en_us'.\n"
2980
- ).or(zod3.string()).optional().describe(
2981
- "The language of your audio file.\nPossible values are found in [Supported Languages](https://www.assemblyai.com/docs/concepts/supported-languages).\nThe default value is 'en_us'.\n"
2577
+ summarization: zod3.boolean().describe(
2578
+ "Whether [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarization) is enabled, either true or false. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarization) for details.\n\nNote: This parameter is only supported for the Universal-2 model.\n"
2982
2579
  ),
2983
- language_detection: zod3.boolean().nullish().describe(
2984
- "Whether [Automatic language detection](https://www.assemblyai.com/docs/models/speech-recognition#automatic-language-detection) is enabled, either true or false"
2580
+ summary: zod3.string().nullish().describe(
2581
+ "The generated summary of the media file, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarization) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarization) for details."
2582
+ ),
2583
+ summary_model: zod3.string().nullish().describe(
2584
+ "The Summarization model used to generate the summary,\nif [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarization#summary-models) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarization) for details.\n"
2985
2585
  ),
2986
- language_confidence_threshold: zod3.number().min(deleteTranscriptResponseLanguageConfidenceThresholdMin).max(deleteTranscriptResponseLanguageConfidenceThresholdMax).nullable().describe(
2987
- "The confidence threshold for the automatically detected language.\nAn error will be returned if the language confidence is below this threshold.\n"
2586
+ summary_type: zod3.string().nullish().describe(
2587
+ "The type of summary generated, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarization#summary-types) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarization) for details."
2988
2588
  ),
2989
- language_confidence: zod3.number().min(deleteTranscriptResponseLanguageConfidenceMin).max(deleteTranscriptResponseLanguageConfidenceMax).nullable().describe(
2990
- "The confidence score for the detected language, between 0.0 (low confidence) and 1.0 (high confidence)"
2589
+ remove_audio_tags: zod3.enum(["all"]).describe(
2590
+ "Whether [audio event tags](https://www.assemblyai.com/docs/pre-recorded-audio/universal-3-pro#audio-event-tags) were removed from the transcript text.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n"
2591
+ ).or(zod3.null()).optional().describe(
2592
+ "Whether [audio event tags](https://www.assemblyai.com/docs/pre-recorded-audio/universal-3-pro#audio-event-tags) were removed from the transcript text.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n"
2991
2593
  ),
2992
- speech_model: zod3.enum(["best", "slam-1", "universal"]).describe("The speech model to use for the transcription.").or(zod3.null()).describe(
2993
- "The speech model used for the transcription. When `null`, the default model is used."
2594
+ temperature: zod3.number().nullish().describe(
2595
+ "The temperature that was used for the model's response. See the [Prompting Guide](https://www.assemblyai.com/docs/pre-recorded-audio/prompting) for more details.\n\nNote: This parameter can only be used with the Universal-3 Pro model.\n"
2994
2596
  ),
2995
2597
  text: zod3.string().nullish().describe("The textual transcript of your media file"),
2996
- words: zod3.array(
2997
- zod3.object({
2998
- confidence: zod3.number().min(deleteTranscriptResponseWordsItemConfidenceMin).max(deleteTranscriptResponseWordsItemConfidenceMax).describe("The confidence score for the transcript of this word"),
2999
- start: zod3.number().describe("The starting time, in milliseconds, for the word"),
3000
- end: zod3.number().describe("The ending time, in milliseconds, for the word"),
3001
- text: zod3.string().describe("The text of the word"),
3002
- channel: zod3.string().nullish().describe(
3003
- "The channel of the word. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
3004
- ),
3005
- speaker: zod3.string().nullable().describe(
3006
- "The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/models/speaker-diarization) is enabled, else null"
3007
- )
3008
- })
3009
- ).nullish().describe(
3010
- "An array of temporally-sequential word objects, one for each word in the transcript.\nSee [Speech recognition](https://www.assemblyai.com/docs/models/speech-recognition) for more information.\n"
2598
+ throttled: zod3.boolean().nullish().describe(
2599
+ "True while a request is throttled and false when a request is no longer throttled"
3011
2600
  ),
3012
2601
  utterances: zod3.array(
3013
2602
  zod3.object({
3014
- confidence: zod3.number().min(deleteTranscriptResponseUtterancesItemConfidenceMin).max(deleteTranscriptResponseUtterancesItemConfidenceMax).describe("The confidence score for the transcript of this utterance"),
2603
+ confidence: zod3.number().describe("The confidence score for the transcript of this utterance"),
3015
2604
  start: zod3.number().describe("The starting time, in milliseconds, of the utterance in the audio file"),
3016
2605
  end: zod3.number().describe("The ending time, in milliseconds, of the utterance in the audio file"),
3017
2606
  text: zod3.string().describe("The text for this utterance"),
3018
2607
  words: zod3.array(
3019
2608
  zod3.object({
3020
- confidence: zod3.number().min(deleteTranscriptResponseUtterancesItemWordsItemConfidenceMin).max(deleteTranscriptResponseUtterancesItemWordsItemConfidenceMax).describe("The confidence score for the transcript of this word"),
2609
+ confidence: zod3.number().describe("The confidence score for the transcript of this word"),
3021
2610
  start: zod3.number().describe("The starting time, in milliseconds, for the word"),
3022
2611
  end: zod3.number().describe("The ending time, in milliseconds, for the word"),
3023
2612
  text: zod3.string().describe("The text of the word"),
@@ -3025,7 +2614,7 @@ var deleteTranscriptResponse = zod3.object({
3025
2614
  "The channel of the word. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
3026
2615
  ),
3027
2616
  speaker: zod3.string().nullable().describe(
3028
- "The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/models/speaker-diarization) is enabled, else null"
2617
+ "The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/speaker-diarization) is enabled, else null"
3029
2618
  )
3030
2619
  })
3031
2620
  ).describe("The words in the utterance."),
@@ -3034,44 +2623,129 @@ var deleteTranscriptResponse = zod3.object({
3034
2623
  ),
3035
2624
  speaker: zod3.string().describe(
3036
2625
  'The speaker of this utterance, where each speaker is assigned a sequential capital letter - e.g. "A" for Speaker A, "B" for Speaker B, etc.'
2626
+ ),
2627
+ translated_texts: zod3.record(zod3.string(), zod3.string()).optional().describe(
2628
+ 'Translations keyed by language code (e.g., `{"es": "Texto traducido", "de": "\xDCbersetzter Text"}`). Only present when `match_original_utterance` is enabled with translation.'
3037
2629
  )
3038
2630
  })
3039
2631
  ).nullish().describe(
3040
- "When multichannel or speaker_labels is enabled, a list of turn-by-turn utterance objects.\nSee [Speaker diarization](https://www.assemblyai.com/docs/speech-to-text/speaker-diarization) and [Multichannel transcription](https://www.assemblyai.com/docs/speech-to-text/speech-recognition#multichannel-transcription) for more information.\n"
2632
+ "When multichannel or speaker_labels is enabled, a list of turn-by-turn utterance objects.\nSee [Speaker diarization](https://www.assemblyai.com/docs/pre-recorded-audio/speaker-diarization) and [Multichannel transcription](https://www.assemblyai.com/docs/pre-recorded-audio/multichannel) for more information.\n"
3041
2633
  ),
3042
- confidence: zod3.number().min(deleteTranscriptResponseConfidenceMin).max(deleteTranscriptResponseConfidenceMax).nullish().describe(
3043
- "The confidence score for the transcript, between 0.0 (low confidence) and 1.0 (high confidence)"
2634
+ webhook_auth: zod3.boolean().describe(
2635
+ "Whether [webhook](https://www.assemblyai.com/docs/deployment/webhooks-for-pre-recorded-audio) authentication details were provided"
3044
2636
  ),
3045
- audio_duration: zod3.number().nullish().describe("The duration of this transcript object's media file, in seconds"),
3046
- punctuate: zod3.boolean().nullish().describe("Whether Automatic Punctuation is enabled, either true or false"),
3047
- format_text: zod3.boolean().nullish().describe("Whether Text Formatting is enabled, either true or false"),
3048
- disfluencies: zod3.boolean().nullish().describe('Transcribe Filler Words, like "umm", in your media file; can be true or false'),
3049
- multichannel: zod3.boolean().nullish().describe(
3050
- "Whether [Multichannel transcription](https://www.assemblyai.com/docs/models/speech-recognition#multichannel-transcription) was enabled in the transcription request, either true or false"
2637
+ webhook_auth_header_name: zod3.string().nullish().describe(
2638
+ "The header name to be sent with the transcript completed or failed [webhook](https://www.assemblyai.com/docs/deployment/webhooks-for-pre-recorded-audio) requests"
3051
2639
  ),
3052
- audio_channels: zod3.number().optional().describe(
3053
- "The number of audio channels in the audio file. This is only present when multichannel is enabled."
2640
+ webhook_status_code: zod3.number().nullish().describe(
2641
+ "The status code we received from your server when delivering the transcript completed or failed [webhook](https://www.assemblyai.com/docs/deployment/webhooks-for-pre-recorded-audio) request, if a webhook URL was provided"
3054
2642
  ),
3055
2643
  webhook_url: zod3.string().nullish().describe(
3056
- "The URL to which we send webhook requests.\nWe sends two different types of webhook requests.\nOne request when a transcript is completed or failed, and one request when the redacted audio is ready if redact_pii_audio is enabled.\n"
3057
- ),
3058
- webhook_status_code: zod3.number().nullish().describe(
3059
- "The status code we received from your server when delivering the transcript completed or failed webhook request, if a webhook URL was provided"
2644
+ "The URL to which we send [webhook](https://www.assemblyai.com/docs/deployment/webhooks-for-pre-recorded-audio) requests.\n"
3060
2645
  ),
3061
- webhook_auth: zod3.boolean().describe("Whether webhook authentication details were provided"),
3062
- webhook_auth_header_name: zod3.string().nullish().describe(
3063
- "The header name to be sent with the transcript completed or failed webhook requests"
2646
+ words: zod3.array(
2647
+ zod3.object({
2648
+ confidence: zod3.number().describe("The confidence score for the transcript of this word"),
2649
+ start: zod3.number().describe("The starting time, in milliseconds, for the word"),
2650
+ end: zod3.number().describe("The ending time, in milliseconds, for the word"),
2651
+ text: zod3.string().describe("The text of the word"),
2652
+ channel: zod3.string().nullish().describe(
2653
+ "The channel of the word. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
2654
+ ),
2655
+ speaker: zod3.string().nullable().describe(
2656
+ "The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/speaker-diarization) is enabled, else null"
2657
+ )
2658
+ })
2659
+ ).nullish().describe(
2660
+ "An array of temporally-sequential word objects, one for each word in the transcript.\n"
2661
+ ),
2662
+ acoustic_model: zod3.string().describe("This parameter does not currently have any functionality attached to it."),
2663
+ custom_topics: zod3.boolean().nullish().describe("This parameter does not currently have any functionality attached to it."),
2664
+ language_model: zod3.string().describe("This parameter does not currently have any functionality attached to it."),
2665
+ speech_model: zod3.string().describe(
2666
+ "The speech model to use for the transcription. See [Model Selection](https://www.assemblyai.com/docs/pre-recorded-audio/select-the-speech-model) for available models."
2667
+ ).or(zod3.null()).describe(
2668
+ "This parameter has been replaced with the `speech_models` parameter, learn more about the `speech_models` parameter [here](https://www.assemblyai.com/docs/pre-recorded-audio/select-the-speech-model).\n"
2669
+ ),
2670
+ speed_boost: zod3.boolean().nullish().describe("This parameter does not currently have any functionality attached to it."),
2671
+ topics: zod3.array(zod3.string()).optional().describe("This parameter does not currently have any functionality attached to it."),
2672
+ translated_texts: zod3.object({
2673
+ language_code: zod3.string().optional().describe("Translated text for this language code")
2674
+ }).optional().describe(
2675
+ "Translated text keyed by language code. See [Translation](https://www.assemblyai.com/docs/speech-understanding/translation) for more details."
2676
+ )
2677
+ }).describe("A transcript object");
2678
+ var listTranscriptsQueryLimitDefault = 10;
2679
+ var listTranscriptsQueryParams = zod3.object({
2680
+ limit: zod3.number().default(listTranscriptsQueryLimitDefault).describe("Maximum amount of transcripts to retrieve"),
2681
+ status: zod3.enum(["queued", "processing", "completed", "error"]).optional().describe("Filter by transcript status"),
2682
+ created_on: zod3.string().date().optional().describe("Only get transcripts created on this date"),
2683
+ before_id: zod3.string().uuid().optional().describe("Get transcripts that were created before this transcript ID"),
2684
+ after_id: zod3.string().uuid().optional().describe("Get transcripts that were created after this transcript ID"),
2685
+ throttled_only: zod3.boolean().optional().describe("Only get throttled transcripts, overrides the status filter")
2686
+ });
2687
+ var listTranscriptsResponse = zod3.object({
2688
+ page_details: zod3.object({
2689
+ limit: zod3.number().describe("The number of results this page is limited to"),
2690
+ result_count: zod3.number().describe("The actual number of results in the page"),
2691
+ current_url: zod3.string().describe("The URL used to retrieve the current page of transcripts"),
2692
+ prev_url: zod3.string().nullable().describe(
2693
+ "The URL to the next page of transcripts. The previous URL always points to a page with older transcripts."
2694
+ ),
2695
+ next_url: zod3.string().nullable().describe(
2696
+ "The URL to the next page of transcripts. The next URL always points to a page with newer transcripts."
2697
+ )
2698
+ }).describe(
2699
+ "Details of the transcript page. Transcripts are sorted from newest to oldest. The previous URL always points to a page with older transcripts."
3064
2700
  ),
3065
- speed_boost: zod3.boolean().nullish().describe("Whether speed boost is enabled"),
3066
- auto_highlights: zod3.boolean().describe("Whether Key Phrases is enabled, either true or false"),
3067
- auto_highlights_result: zod3.object({
3068
- status: zod3.enum(["success", "unavailable"]).describe("Either success, or unavailable in the rare case that the model failed"),
3069
- results: zod3.array(
3070
- zod3.object({
3071
- count: zod3.number().describe("The total number of times the key phrase appears in the audio file"),
3072
- rank: zod3.number().min(deleteTranscriptResponseAutoHighlightsResultResultsItemRankMin).max(deleteTranscriptResponseAutoHighlightsResultResultsItemRankMax).describe(
3073
- "The total relevancy to the overall audio file of this key phrase - a greater number means more relevant"
3074
- ),
2701
+ transcripts: zod3.array(
2702
+ zod3.object({
2703
+ id: zod3.string().uuid().describe("The unique identifier for the transcript"),
2704
+ resource_url: zod3.string().describe("The URL to retrieve the transcript"),
2705
+ status: zod3.enum(["queued", "processing", "completed", "error"]).describe(
2706
+ "The status of your transcript. Possible values are queued, processing, completed, or error."
2707
+ ),
2708
+ created: zod3.string().datetime({}).describe("The date and time the transcript was created"),
2709
+ completed: zod3.string().datetime({}).optional().describe("The date and time the transcript was completed"),
2710
+ audio_url: zod3.string().describe("The URL to the audio file"),
2711
+ error: zod3.string().nullable().describe("Error message of why the transcript failed")
2712
+ })
2713
+ ).describe("An array of transcripts")
2714
+ }).describe(
2715
+ "A list of transcripts. Transcripts are sorted from newest to oldest. The previous URL always points to a page with older transcripts."
2716
+ );
2717
+ var getTranscriptParams = zod3.object({
2718
+ transcript_id: zod3.string().describe("ID of the transcript")
2719
+ });
2720
+ var getTranscriptResponseLanguageDetectionOptionsFallbackLanguageDefault = "auto";
2721
+ var getTranscriptResponseLanguageDetectionOptionsCodeSwitchingConfidenceThresholdDefault = 0.3;
2722
+ var getTranscriptResponseSpeechUnderstandingRequestTranslationFormalDefault = true;
2723
+ var getTranscriptResponse = zod3.object({
2724
+ audio_channels: zod3.number().optional().describe(
2725
+ "The number of audio channels in the audio file. This is only present when [multichannel](https://www.assemblyai.com/docs/pre-recorded-audio/multichannel) is enabled."
2726
+ ),
2727
+ audio_duration: zod3.number().nullish().describe("The duration of this transcript object's media file, in seconds"),
2728
+ audio_end_at: zod3.number().nullish().describe(
2729
+ "The point in time, in milliseconds, in the file at which the transcription was terminated. See [Set the start and end of the transcript](https://www.assemblyai.com/docs/pre-recorded-audio/set-the-start-and-end-of-the-transcript) for more details."
2730
+ ),
2731
+ audio_start_from: zod3.number().nullish().describe(
2732
+ "The point in time, in milliseconds, in the file at which the transcription was started. See [Set the start and end of the transcript](https://www.assemblyai.com/docs/pre-recorded-audio/set-the-start-and-end-of-the-transcript) for more details."
2733
+ ),
2734
+ audio_url: zod3.string().describe("The URL of the media that was transcribed"),
2735
+ auto_chapters: zod3.boolean().nullish().describe(
2736
+ "Whether [Auto Chapters](https://www.assemblyai.com/docs/speech-understanding/auto-chapters) is enabled, can be true or false. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible chapter summaries. See the [updated Auto Chapters page](https://www.assemblyai.com/docs/speech-understanding/auto-chapters) for details.\n\nNote: This parameter is only supported for the Universal-2 model.\n"
2737
+ ),
2738
+ auto_highlights: zod3.boolean().describe(
2739
+ "Whether [Key Phrases](https://www.assemblyai.com/docs/speech-understanding/key-phrases) is enabled, either true or false"
2740
+ ),
2741
+ auto_highlights_result: zod3.object({
2742
+ status: zod3.enum(["success", "unavailable"]).describe("Either success, or unavailable in the rare case that the model failed"),
2743
+ results: zod3.array(
2744
+ zod3.object({
2745
+ count: zod3.number().describe("The total number of times the key phrase appears in the audio file"),
2746
+ rank: zod3.number().describe(
2747
+ "The total relevancy to the overall audio file of this key phrase - a greater number means more relevant"
2748
+ ),
3075
2749
  text: zod3.string().describe("The text itself of the key phrase"),
3076
2750
  timestamps: zod3.array(
3077
2751
  zod3.object({
@@ -3082,93 +2756,28 @@ var deleteTranscriptResponse = zod3.object({
3082
2756
  })
3083
2757
  ).describe("A temporally-sequential array of Key Phrases")
3084
2758
  }).describe(
3085
- "An array of results for the Key Phrases model, if it is enabled.\nSee [Key phrases](https://www.assemblyai.com/docs/models/key-phrases) for more information.\n"
3086
- ).or(zod3.null()).optional().describe(
3087
- "An array of results for the Key Phrases model, if it is enabled.\nSee [Key Phrases](https://www.assemblyai.com/docs/models/key-phrases) for more information.\n"
3088
- ),
3089
- audio_start_from: zod3.number().nullish().describe(
3090
- "The point in time, in milliseconds, in the file at which the transcription was started"
3091
- ),
3092
- audio_end_at: zod3.number().nullish().describe(
3093
- "The point in time, in milliseconds, in the file at which the transcription was terminated"
3094
- ),
3095
- word_boost: zod3.array(zod3.string()).optional().describe("The list of custom vocabulary to boost transcription probability for"),
3096
- boost_param: zod3.string().nullish().describe("The word boost parameter value"),
3097
- filter_profanity: zod3.boolean().nullish().describe(
3098
- "Whether [Profanity Filtering](https://www.assemblyai.com/docs/models/speech-recognition#profanity-filtering) is enabled, either true or false"
3099
- ),
3100
- redact_pii: zod3.boolean().describe(
3101
- "Whether [PII Redaction](https://www.assemblyai.com/docs/models/pii-redaction) is enabled, either true or false"
3102
- ),
3103
- redact_pii_audio: zod3.boolean().nullish().describe(
3104
- "Whether a redacted version of the audio file was generated,\neither true or false. See [PII redaction](https://www.assemblyai.com/docs/models/pii-redaction) for more information.\n"
3105
- ),
3106
- redact_pii_audio_quality: zod3.enum(["mp3", "wav"]).describe(
3107
- "Controls the filetype of the audio created by redact_pii_audio. Currently supports mp3 (default) and wav. See [PII redaction](https://www.assemblyai.com/docs/models/pii-redaction) for more details."
2759
+ "An array of results for the Key Phrases model, if it is enabled.\nSee [Key phrases](https://www.assemblyai.com/docs/speech-understanding/key-phrases) for more information.\n"
3108
2760
  ).or(zod3.null()).optional().describe(
3109
- "The audio quality of the PII-redacted audio file, if redact_pii_audio is enabled.\nSee [PII redaction](https://www.assemblyai.com/docs/models/pii-redaction) for more information.\n"
2761
+ "An array of results for the Key Phrases model, if it is enabled.\nSee [Key Phrases](https://www.assemblyai.com/docs/speech-understanding/key-phrases) for more information.\n"
3110
2762
  ),
3111
- redact_pii_policies: zod3.array(
3112
- zod3.enum([
3113
- "account_number",
3114
- "banking_information",
3115
- "blood_type",
3116
- "credit_card_cvv",
3117
- "credit_card_expiration",
3118
- "credit_card_number",
3119
- "date",
3120
- "date_interval",
3121
- "date_of_birth",
3122
- "drivers_license",
3123
- "drug",
3124
- "duration",
3125
- "email_address",
3126
- "event",
3127
- "filename",
3128
- "gender_sexuality",
3129
- "healthcare_number",
3130
- "injury",
3131
- "ip_address",
3132
- "language",
3133
- "location",
3134
- "marital_status",
3135
- "medical_condition",
3136
- "medical_process",
3137
- "money_amount",
3138
- "nationality",
3139
- "number_sequence",
3140
- "occupation",
3141
- "organization",
3142
- "passport_number",
3143
- "password",
3144
- "person_age",
3145
- "person_name",
3146
- "phone_number",
3147
- "physical_attribute",
3148
- "political_affiliation",
3149
- "religion",
3150
- "statistics",
3151
- "time",
3152
- "url",
3153
- "us_social_security_number",
3154
- "username",
3155
- "vehicle_id",
3156
- "zodiac_sign"
3157
- ]).describe("The type of PII to redact")
2763
+ chapters: zod3.array(
2764
+ zod3.object({
2765
+ gist: zod3.string().describe(
2766
+ "An ultra-short summary (just a few words) of the content spoken in the chapter"
2767
+ ),
2768
+ headline: zod3.string().describe("A single sentence summary of the content spoken during the chapter"),
2769
+ summary: zod3.string().describe("A one paragraph summary of the content spoken during the chapter"),
2770
+ start: zod3.number().describe("The starting time, in milliseconds, for the chapter"),
2771
+ end: zod3.number().describe("The starting time, in milliseconds, for the chapter")
2772
+ }).describe("Chapter of the audio file")
3158
2773
  ).nullish().describe(
3159
- "The list of PII Redaction policies that were enabled, if PII Redaction is enabled.\nSee [PII redaction](https://www.assemblyai.com/docs/models/pii-redaction) for more information.\n"
3160
- ),
3161
- redact_pii_sub: zod3.enum(["entity_name", "hash"]).optional().describe(
3162
- 'The replacement logic for detected PII, can be "entity_name" or "hash". See [PII redaction](https://www.assemblyai.com/docs/models/pii-redaction) for more details.'
2774
+ "An array of temporally sequential chapters for the audio file. See [Auto Chapters](https://www.assemblyai.com/docs/speech-understanding/auto-chapters) for more information."
3163
2775
  ),
3164
- speaker_labels: zod3.boolean().nullish().describe(
3165
- "Whether [Speaker diarization](https://www.assemblyai.com/docs/models/speaker-diarization) is enabled, can be true or false"
3166
- ),
3167
- speakers_expected: zod3.number().nullish().describe(
3168
- "Tell the speaker label model how many speakers it should attempt to identify. See [Speaker diarization](https://www.assemblyai.com/docs/models/speaker-diarization) for more details."
2776
+ confidence: zod3.number().nullish().describe(
2777
+ "The confidence score for the transcript, between 0.0 (low confidence) and 1.0 (high confidence)"
3169
2778
  ),
3170
2779
  content_safety: zod3.boolean().nullish().describe(
3171
- "Whether [Content Moderation](https://www.assemblyai.com/docs/models/content-moderation) is enabled, can be true or false"
2780
+ "Whether [Content Moderation](https://www.assemblyai.com/docs/content-moderation) is enabled, can be true or false"
3172
2781
  ),
3173
2782
  content_safety_labels: zod3.object({
3174
2783
  status: zod3.enum(["success", "unavailable"]).describe("Either success, or unavailable in the rare case that the model failed"),
@@ -3178,16 +2787,8 @@ var deleteTranscriptResponse = zod3.object({
3178
2787
  labels: zod3.array(
3179
2788
  zod3.object({
3180
2789
  label: zod3.string().describe("The label of the sensitive topic"),
3181
- confidence: zod3.number().min(
3182
- deleteTranscriptResponseContentSafetyLabelsResultsItemLabelsItemConfidenceMin
3183
- ).max(
3184
- deleteTranscriptResponseContentSafetyLabelsResultsItemLabelsItemConfidenceMax
3185
- ).describe("The confidence score for the topic being discussed, from 0 to 1"),
3186
- severity: zod3.number().min(
3187
- deleteTranscriptResponseContentSafetyLabelsResultsItemLabelsItemSeverityMin
3188
- ).max(
3189
- deleteTranscriptResponseContentSafetyLabelsResultsItemLabelsItemSeverityMax
3190
- ).describe("How severely the topic is discussed in the section, from 0 to 1")
2790
+ confidence: zod3.number().describe("The confidence score for the topic being discussed, from 0 to 1"),
2791
+ severity: zod3.number().describe("How severely the topic is discussed in the section, from 0 to 1")
3191
2792
  })
3192
2793
  ).describe(
3193
2794
  "An array of safety labels, one per sensitive topic that was detected in the section"
@@ -3200,31 +2801,111 @@ var deleteTranscriptResponse = zod3.object({
3200
2801
  }).describe("Timestamp containing a start and end property in milliseconds")
3201
2802
  })
3202
2803
  ).describe("An array of results for the Content Moderation model"),
3203
- summary: zod3.record(
3204
- zod3.string(),
3205
- zod3.number().min(deleteTranscriptResponseContentSafetyLabelsSummaryMinOne).max(deleteTranscriptResponseContentSafetyLabelsSummaryMaxOne).describe(
3206
- 'A confidence score for the presence of the sensitive topic "topic" across the entire audio file'
3207
- )
3208
- ).describe(
2804
+ summary: zod3.record(zod3.string(), zod3.number()).describe(
3209
2805
  "A summary of the Content Moderation confidence results for the entire audio file"
3210
2806
  ),
3211
2807
  severity_score_summary: zod3.record(
3212
2808
  zod3.string(),
3213
2809
  zod3.object({
3214
- low: zod3.number().min(deleteTranscriptResponseContentSafetyLabelsSeverityScoreSummaryLowMin).max(deleteTranscriptResponseContentSafetyLabelsSeverityScoreSummaryLowMax),
3215
- medium: zod3.number().min(deleteTranscriptResponseContentSafetyLabelsSeverityScoreSummaryMediumMin).max(deleteTranscriptResponseContentSafetyLabelsSeverityScoreSummaryMediumMax),
3216
- high: zod3.number().min(deleteTranscriptResponseContentSafetyLabelsSeverityScoreSummaryHighMin).max(deleteTranscriptResponseContentSafetyLabelsSeverityScoreSummaryHighMax)
2810
+ low: zod3.number(),
2811
+ medium: zod3.number(),
2812
+ high: zod3.number()
3217
2813
  })
3218
2814
  ).describe(
3219
2815
  "A summary of the Content Moderation severity results for the entire audio file"
3220
2816
  )
3221
2817
  }).describe(
3222
- "An array of results for the Content Moderation model, if it is enabled.\nSee [Content moderation](https://www.assemblyai.com/docs/models/content-moderation) for more information.\n"
2818
+ "An array of results for the Content Moderation model, if it is enabled.\nSee [Content moderation](https://www.assemblyai.com/docs/content-moderation) for more information.\n"
3223
2819
  ).or(zod3.null()).optional().describe(
3224
- "An array of results for the Content Moderation model, if it is enabled.\nSee [Content moderation](https://www.assemblyai.com/docs/models/content-moderation) for more information.\n"
2820
+ "An array of results for the Content Moderation model, if it is enabled.\nSee [Content moderation](https://www.assemblyai.com/docs/content-moderation) for more information.\n"
2821
+ ),
2822
+ custom_spelling: zod3.array(
2823
+ zod3.object({
2824
+ from: zod3.array(zod3.string()).describe("Words or phrases to replace"),
2825
+ to: zod3.string().describe("Word to replace with")
2826
+ }).describe(
2827
+ "Object containing words or phrases to replace, and the word or phrase to replace with"
2828
+ )
2829
+ ).nullish().describe(
2830
+ "Customize how words are spelled and formatted using to and from values. See [Custom Spelling](https://www.assemblyai.com/docs/pre-recorded-audio/custom-spelling) for more details."
2831
+ ),
2832
+ disfluencies: zod3.boolean().nullish().describe(
2833
+ 'Transcribe [Filler Words](https://www.assemblyai.com/docs/pre-recorded-audio/filler-words), like "umm", in your media file; can be true or false'
2834
+ ),
2835
+ domain: zod3.string().nullish().describe(
2836
+ 'The domain-specific model applied to the transcript. When set to `"medical-v1"`, [Medical Mode](https://www.assemblyai.com/docs/pre-recorded-audio/medical-mode) was used to improve accuracy for medical terminology.\n'
2837
+ ),
2838
+ entities: zod3.array(
2839
+ zod3.object({
2840
+ entity_type: zod3.enum([
2841
+ "account_number",
2842
+ "banking_information",
2843
+ "blood_type",
2844
+ "credit_card_cvv",
2845
+ "credit_card_expiration",
2846
+ "credit_card_number",
2847
+ "date",
2848
+ "date_interval",
2849
+ "date_of_birth",
2850
+ "drivers_license",
2851
+ "drug",
2852
+ "duration",
2853
+ "email_address",
2854
+ "event",
2855
+ "filename",
2856
+ "gender_sexuality",
2857
+ "healthcare_number",
2858
+ "injury",
2859
+ "ip_address",
2860
+ "language",
2861
+ "location",
2862
+ "marital_status",
2863
+ "medical_condition",
2864
+ "medical_process",
2865
+ "money_amount",
2866
+ "nationality",
2867
+ "number_sequence",
2868
+ "occupation",
2869
+ "organization",
2870
+ "passport_number",
2871
+ "password",
2872
+ "person_age",
2873
+ "person_name",
2874
+ "phone_number",
2875
+ "physical_attribute",
2876
+ "political_affiliation",
2877
+ "religion",
2878
+ "statistics",
2879
+ "time",
2880
+ "url",
2881
+ "us_social_security_number",
2882
+ "username",
2883
+ "vehicle_id",
2884
+ "zodiac_sign"
2885
+ ]).describe("The type of entity for the detected entity"),
2886
+ text: zod3.string().describe("The text for the detected entity"),
2887
+ start: zod3.number().describe(
2888
+ "The starting time, in milliseconds, at which the detected entity appears in the audio file"
2889
+ ),
2890
+ end: zod3.number().describe(
2891
+ "The ending time, in milliseconds, for the detected entity in the audio file"
2892
+ )
2893
+ }).describe("A detected entity")
2894
+ ).nullish().describe(
2895
+ "An array of results for the Entity Detection model, if it is enabled.\nSee [Entity detection](https://www.assemblyai.com/docs/speech-understanding/entity-detection) for more information.\n"
2896
+ ),
2897
+ entity_detection: zod3.boolean().nullish().describe(
2898
+ "Whether [Entity Detection](https://www.assemblyai.com/docs/speech-understanding/entity-detection) is enabled, can be true or false"
2899
+ ),
2900
+ error: zod3.string().optional().describe("Error message of why the transcript failed"),
2901
+ filter_profanity: zod3.boolean().nullish().describe(
2902
+ "Whether [Profanity Filtering](https://www.assemblyai.com/docs/profanity-filtering) is enabled, either true or false"
2903
+ ),
2904
+ format_text: zod3.boolean().nullish().describe(
2905
+ "Whether [Text Formatting](https://www.assemblyai.com/docs/pre-recorded-audio) is enabled, either true or false"
3225
2906
  ),
3226
2907
  iab_categories: zod3.boolean().nullish().describe(
3227
- "Whether [Topic Detection](https://www.assemblyai.com/docs/models/topic-detection) is enabled, can be true or false"
2908
+ "Whether [Topic Detection](https://www.assemblyai.com/docs/speech-understanding/topic-detection) is enabled, can be true or false"
3228
2909
  ),
3229
2910
  iab_categories_result: zod3.object({
3230
2911
  status: zod3.enum(["success", "unavailable"]).describe("Either success, or unavailable in the rare case that the model failed"),
@@ -3233,11 +2914,7 @@ var deleteTranscriptResponse = zod3.object({
3233
2914
  text: zod3.string().describe("The text in the transcript in which a detected topic occurs"),
3234
2915
  labels: zod3.array(
3235
2916
  zod3.object({
3236
- relevance: zod3.number().min(
3237
- deleteTranscriptResponseIabCategoriesResultResultsItemLabelsItemRelevanceMin
3238
- ).max(
3239
- deleteTranscriptResponseIabCategoriesResultResultsItemLabelsItemRelevanceMax
3240
- ).describe("How relevant the detected topic is of a detected topic"),
2917
+ relevance: zod3.number().describe("How relevant the detected topic is of a detected topic"),
3241
2918
  label: zod3.string().describe(
3242
2919
  "The IAB taxonomical label for the label of the detected topic, where > denotes supertopic/subtopic relationship"
3243
2920
  )
@@ -3249,57 +2926,1121 @@ var deleteTranscriptResponse = zod3.object({
3249
2926
  }).optional().describe("Timestamp containing a start and end property in milliseconds")
3250
2927
  }).describe("The result of the topic detection model")
3251
2928
  ).describe("An array of results for the Topic Detection model"),
3252
- summary: zod3.record(
3253
- zod3.string(),
3254
- zod3.number().min(deleteTranscriptResponseIabCategoriesResultSummaryMinOne).max(deleteTranscriptResponseIabCategoriesResultSummaryMaxOne)
3255
- ).describe("The overall relevance of topic to the entire audio file")
2929
+ summary: zod3.record(zod3.string(), zod3.number()).describe("The overall relevance of topic to the entire audio file")
3256
2930
  }).describe(
3257
- "The result of the Topic Detection model, if it is enabled.\nSee [Topic Detection](https://www.assemblyai.com/docs/models/topic-detection) for more information.\n"
2931
+ "The result of the Topic Detection model, if it is enabled.\nSee [Topic Detection](https://www.assemblyai.com/docs/speech-understanding/topic-detection) for more information.\n"
3258
2932
  ).or(zod3.null()).optional().describe(
3259
- "The result of the Topic Detection model, if it is enabled.\nSee [Topic Detection](https://www.assemblyai.com/docs/models/topic-detection) for more information.\n"
2933
+ "The result of the Topic Detection model, if it is enabled.\nSee [Topic Detection](https://www.assemblyai.com/docs/speech-understanding/topic-detection) for more information.\n"
3260
2934
  ),
3261
- custom_spelling: zod3.array(
3262
- zod3.object({
3263
- from: zod3.array(zod3.string().describe("Word or phrase to replace")).describe("Words or phrases to replace"),
3264
- to: zod3.string().describe("Word to replace with")
3265
- }).describe(
3266
- "Object containing words or phrases to replace, and the word or phrase to replace with"
3267
- )
3268
- ).nullish().describe("Customize how words are spelled and formatted using to and from values"),
2935
+ id: zod3.string().uuid().describe("The unique identifier of your transcript"),
3269
2936
  keyterms_prompt: zod3.array(zod3.string()).optional().describe(
3270
- "Improve accuracy with up to 1000 domain-specific words or phrases (maximum 6 words per phrase).\n"
3271
- ),
3272
- prompt: zod3.string().optional().describe("This parameter does not currently have any functionality attached to it."),
3273
- auto_chapters: zod3.boolean().nullish().describe(
3274
- "Whether [Auto Chapters](https://www.assemblyai.com/docs/models/auto-chapters) is enabled, can be true or false"
3275
- ),
3276
- chapters: zod3.array(
3277
- zod3.object({
3278
- gist: zod3.string().describe(
3279
- "An ultra-short summary (just a few words) of the content spoken in the chapter"
3280
- ),
3281
- headline: zod3.string().describe("A single sentence summary of the content spoken during the chapter"),
3282
- summary: zod3.string().describe("A one paragraph summary of the content spoken during the chapter"),
3283
- start: zod3.number().describe("The starting time, in milliseconds, for the chapter"),
3284
- end: zod3.number().describe("The starting time, in milliseconds, for the chapter")
3285
- }).describe("Chapter of the audio file")
3286
- ).nullish().describe("An array of temporally sequential chapters for the audio file"),
3287
- summarization: zod3.boolean().describe(
3288
- "Whether [Summarization](https://www.assemblyai.com/docs/models/summarization) is enabled, either true or false"
3289
- ),
3290
- summary_type: zod3.string().nullish().describe(
3291
- "The type of summary generated, if [Summarization](https://www.assemblyai.com/docs/models/summarization) is enabled"
2937
+ "Improve accuracy with up to 200 (for Universal-2) or 1000 (for Universal-3 Pro) domain-specific words or phrases (maximum 6 words per phrase). See [Keyterms Prompting](https://www.assemblyai.com/docs/pre-recorded-audio/keyterms-prompting) for more details.\n"
3292
2938
  ),
3293
- summary_model: zod3.string().nullish().describe(
3294
- "The Summarization model used to generate the summary,\nif [Summarization](https://www.assemblyai.com/docs/models/summarization) is enabled\n"
2939
+ language_code: zod3.enum([
2940
+ "en",
2941
+ "en_au",
2942
+ "en_uk",
2943
+ "en_us",
2944
+ "es",
2945
+ "fr",
2946
+ "de",
2947
+ "it",
2948
+ "pt",
2949
+ "nl",
2950
+ "af",
2951
+ "sq",
2952
+ "am",
2953
+ "ar",
2954
+ "hy",
2955
+ "as",
2956
+ "az",
2957
+ "ba",
2958
+ "eu",
2959
+ "be",
2960
+ "bn",
2961
+ "bs",
2962
+ "br",
2963
+ "bg",
2964
+ "my",
2965
+ "ca",
2966
+ "zh",
2967
+ "hr",
2968
+ "cs",
2969
+ "da",
2970
+ "et",
2971
+ "fo",
2972
+ "fi",
2973
+ "gl",
2974
+ "ka",
2975
+ "el",
2976
+ "gu",
2977
+ "ht",
2978
+ "ha",
2979
+ "haw",
2980
+ "he",
2981
+ "hi",
2982
+ "hu",
2983
+ "is",
2984
+ "id",
2985
+ "ja",
2986
+ "jw",
2987
+ "kn",
2988
+ "kk",
2989
+ "km",
2990
+ "ko",
2991
+ "lo",
2992
+ "la",
2993
+ "lv",
2994
+ "ln",
2995
+ "lt",
2996
+ "lb",
2997
+ "mk",
2998
+ "mg",
2999
+ "ms",
3000
+ "ml",
3001
+ "mt",
3002
+ "mi",
3003
+ "mr",
3004
+ "mn",
3005
+ "ne",
3006
+ "no",
3007
+ "nn",
3008
+ "oc",
3009
+ "pa",
3010
+ "ps",
3011
+ "fa",
3012
+ "pl",
3013
+ "ro",
3014
+ "ru",
3015
+ "sa",
3016
+ "sr",
3017
+ "sn",
3018
+ "sd",
3019
+ "si",
3020
+ "sk",
3021
+ "sl",
3022
+ "so",
3023
+ "su",
3024
+ "sw",
3025
+ "sv",
3026
+ "tl",
3027
+ "tg",
3028
+ "ta",
3029
+ "tt",
3030
+ "te",
3031
+ "th",
3032
+ "bo",
3033
+ "tr",
3034
+ "tk",
3035
+ "uk",
3036
+ "ur",
3037
+ "uz",
3038
+ "vi",
3039
+ "cy",
3040
+ "yi",
3041
+ "yo"
3042
+ ]).optional().describe(
3043
+ "The language of your audio file. Possible values are found in [Supported Languages](https://www.assemblyai.com/docs/pre-recorded-audio/supported-languages).\nThe default value is 'en_us'.\n"
3044
+ ),
3045
+ language_codes: zod3.array(
3046
+ zod3.enum([
3047
+ "en",
3048
+ "en_au",
3049
+ "en_uk",
3050
+ "en_us",
3051
+ "es",
3052
+ "fr",
3053
+ "de",
3054
+ "it",
3055
+ "pt",
3056
+ "nl",
3057
+ "af",
3058
+ "sq",
3059
+ "am",
3060
+ "ar",
3061
+ "hy",
3062
+ "as",
3063
+ "az",
3064
+ "ba",
3065
+ "eu",
3066
+ "be",
3067
+ "bn",
3068
+ "bs",
3069
+ "br",
3070
+ "bg",
3071
+ "my",
3072
+ "ca",
3073
+ "zh",
3074
+ "hr",
3075
+ "cs",
3076
+ "da",
3077
+ "et",
3078
+ "fo",
3079
+ "fi",
3080
+ "gl",
3081
+ "ka",
3082
+ "el",
3083
+ "gu",
3084
+ "ht",
3085
+ "ha",
3086
+ "haw",
3087
+ "he",
3088
+ "hi",
3089
+ "hu",
3090
+ "is",
3091
+ "id",
3092
+ "ja",
3093
+ "jw",
3094
+ "kn",
3095
+ "kk",
3096
+ "km",
3097
+ "ko",
3098
+ "lo",
3099
+ "la",
3100
+ "lv",
3101
+ "ln",
3102
+ "lt",
3103
+ "lb",
3104
+ "mk",
3105
+ "mg",
3106
+ "ms",
3107
+ "ml",
3108
+ "mt",
3109
+ "mi",
3110
+ "mr",
3111
+ "mn",
3112
+ "ne",
3113
+ "no",
3114
+ "nn",
3115
+ "oc",
3116
+ "pa",
3117
+ "ps",
3118
+ "fa",
3119
+ "pl",
3120
+ "ro",
3121
+ "ru",
3122
+ "sa",
3123
+ "sr",
3124
+ "sn",
3125
+ "sd",
3126
+ "si",
3127
+ "sk",
3128
+ "sl",
3129
+ "so",
3130
+ "su",
3131
+ "sw",
3132
+ "sv",
3133
+ "tl",
3134
+ "tg",
3135
+ "ta",
3136
+ "tt",
3137
+ "te",
3138
+ "th",
3139
+ "bo",
3140
+ "tr",
3141
+ "tk",
3142
+ "uk",
3143
+ "ur",
3144
+ "uz",
3145
+ "vi",
3146
+ "cy",
3147
+ "yi",
3148
+ "yo"
3149
+ ]).describe(
3150
+ "The language of your audio file. Possible values are found in [Supported Languages](https://www.assemblyai.com/docs/pre-recorded-audio/supported-languages).\nThe default value is 'en_us'.\n"
3151
+ )
3152
+ ).nullish().describe(
3153
+ "The language codes of your audio file. Used for [Code switching](/docs/speech-to-text/pre-recorded-audio/code-switching)\nOne of the values specified must be `en`.\n"
3154
+ ),
3155
+ language_confidence: zod3.number().nullable().describe(
3156
+ "The confidence score for the detected language, between 0.0 (low confidence) and 1.0 (high confidence). See [Automatic Language Detection](https://www.assemblyai.com/docs/pre-recorded-audio/language-detection) for more details."
3157
+ ),
3158
+ language_confidence_threshold: zod3.number().nullable().describe(
3159
+ "The confidence threshold for the automatically detected language.\nAn error will be returned if the language confidence is below this threshold.\nSee [Automatic Language Detection](https://www.assemblyai.com/docs/pre-recorded-audio/language-detection) for more details.\n"
3160
+ ),
3161
+ language_detection: zod3.boolean().nullish().describe(
3162
+ "Whether [Automatic language detection](/docs/pre-recorded-audio/automatic-language-detection) is enabled, either true or false"
3163
+ ),
3164
+ language_detection_options: zod3.object({
3165
+ expected_languages: zod3.array(zod3.string()).optional().describe(
3166
+ 'List of languages expected in the audio file. Defaults to `["all"]` when unspecified. See [Automatic Language Detection](https://www.assemblyai.com/docs/pre-recorded-audio/language-detection) for more details.'
3167
+ ),
3168
+ fallback_language: zod3.string().default(getTranscriptResponseLanguageDetectionOptionsFallbackLanguageDefault).describe(
3169
+ 'If the detected language of the audio file is not in the list of expected languages, the `fallback_language` is used. Specify `["auto"]` to let our model choose the fallback language from `expected_languages` with the highest confidence score. See [Automatic Language Detection](https://www.assemblyai.com/docs/pre-recorded-audio/language-detection) for more details.\n'
3170
+ ),
3171
+ code_switching: zod3.boolean().optional().describe(
3172
+ "Whether [code switching](/docs/speech-to-text/pre-recorded-audio/code-switching) should be detected.\n"
3173
+ ),
3174
+ code_switching_confidence_threshold: zod3.number().default(
3175
+ getTranscriptResponseLanguageDetectionOptionsCodeSwitchingConfidenceThresholdDefault
3176
+ ).describe(
3177
+ "The confidence threshold for [code switching](/docs/speech-to-text/pre-recorded-audio/code-switching) detection. If the code switching confidence is below this threshold, the transcript will be processed in the language with the highest `language_detection_confidence` score.\n"
3178
+ )
3179
+ }).optional().describe(
3180
+ "Specify options for [Automatic Language Detection](https://www.assemblyai.com/docs/pre-recorded-audio/language-detection)."
3181
+ ),
3182
+ multichannel: zod3.boolean().nullish().describe(
3183
+ "Whether [Multichannel transcription](https://www.assemblyai.com/docs/pre-recorded-audio/multichannel) was enabled in the transcription request, either true or false"
3184
+ ),
3185
+ prompt: zod3.string().optional().describe(
3186
+ "Provide natural language prompting of up to 1,500 words of contextual information to the model. See the [Prompting Guide](https://www.assemblyai.com/docs/pre-recorded-audio/prompting) for best practices.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n"
3187
+ ),
3188
+ punctuate: zod3.boolean().nullish().describe(
3189
+ "Whether [Automatic Punctuation](https://www.assemblyai.com/docs/pre-recorded-audio) is enabled, either true or false"
3190
+ ),
3191
+ redact_pii: zod3.boolean().describe(
3192
+ "Whether [PII Redaction](https://www.assemblyai.com/docs/pii-redaction) is enabled, either true or false"
3193
+ ),
3194
+ redact_pii_audio: zod3.boolean().nullish().describe(
3195
+ "Whether a redacted version of the audio file was generated,\neither true or false. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction#request-for-redacted-audio) for more information.\n"
3196
+ ),
3197
+ redact_pii_audio_options: zod3.object({
3198
+ return_redacted_no_speech_audio: zod3.boolean().optional().describe(
3199
+ "By default, audio redaction provides redacted audio URLs only when speech is detected. However, if your use-case specifically requires redacted audio files even for silent audio files without any dialogue, you can opt to receive these URLs by setting this parameter to `true`."
3200
+ ),
3201
+ override_audio_redaction_method: zod3.enum(["silence"]).optional().describe(
3202
+ "Specify the method used to redact audio. By default, redacted audio uses a beep sound. Set to `silence` to replace PII with silence instead of a beep."
3203
+ )
3204
+ }).optional().describe(
3205
+ "The options for PII-redacted audio, if redact_pii_audio is enabled.\nSee [PII redaction](https://www.assemblyai.com/docs/pii-redaction#request-for-redacted-audio) for more information.\n"
3206
+ ),
3207
+ redact_pii_audio_quality: zod3.enum(["mp3", "wav"]).describe(
3208
+ "Controls the filetype of the audio created by redact_pii_audio. Currently supports mp3 (default) and wav. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction#request-for-redacted-audio) for more details."
3209
+ ).or(zod3.null()).optional().describe(
3210
+ "The audio quality of the PII-redacted audio file, if redact_pii_audio is enabled.\nSee [PII redaction](https://www.assemblyai.com/docs/pii-redaction#request-for-redacted-audio) for more information.\n"
3211
+ ),
3212
+ redact_pii_policies: zod3.array(
3213
+ zod3.enum([
3214
+ "account_number",
3215
+ "banking_information",
3216
+ "blood_type",
3217
+ "credit_card_cvv",
3218
+ "credit_card_expiration",
3219
+ "credit_card_number",
3220
+ "date",
3221
+ "date_interval",
3222
+ "date_of_birth",
3223
+ "drivers_license",
3224
+ "drug",
3225
+ "duration",
3226
+ "email_address",
3227
+ "event",
3228
+ "filename",
3229
+ "gender_sexuality",
3230
+ "healthcare_number",
3231
+ "injury",
3232
+ "ip_address",
3233
+ "language",
3234
+ "location",
3235
+ "marital_status",
3236
+ "medical_condition",
3237
+ "medical_process",
3238
+ "money_amount",
3239
+ "nationality",
3240
+ "number_sequence",
3241
+ "occupation",
3242
+ "organization",
3243
+ "passport_number",
3244
+ "password",
3245
+ "person_age",
3246
+ "person_name",
3247
+ "phone_number",
3248
+ "physical_attribute",
3249
+ "political_affiliation",
3250
+ "religion",
3251
+ "statistics",
3252
+ "time",
3253
+ "url",
3254
+ "us_social_security_number",
3255
+ "username",
3256
+ "vehicle_id",
3257
+ "zodiac_sign"
3258
+ ]).describe("The type of PII to redact")
3259
+ ).nullish().describe(
3260
+ "The list of PII Redaction policies that were enabled, if PII Redaction is enabled.\nSee [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
3261
+ ),
3262
+ redact_pii_sub: zod3.enum(["entity_name", "hash"]).optional().describe(
3263
+ "The replacement logic for detected PII, can be `entity_name` or `hash`. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more details."
3264
+ ),
3265
+ sentiment_analysis: zod3.boolean().nullish().describe(
3266
+ "Whether [Sentiment Analysis](https://www.assemblyai.com/docs/speech-understanding/sentiment-analysis) is enabled, can be true or false"
3267
+ ),
3268
+ sentiment_analysis_results: zod3.array(
3269
+ zod3.object({
3270
+ text: zod3.string().describe("The transcript of the sentence"),
3271
+ start: zod3.number().describe("The starting time, in milliseconds, of the sentence"),
3272
+ end: zod3.number().describe("The ending time, in milliseconds, of the sentence"),
3273
+ sentiment: zod3.enum(["POSITIVE", "NEUTRAL", "NEGATIVE"]),
3274
+ confidence: zod3.number().describe(
3275
+ "The confidence score for the detected sentiment of the sentence, from 0 to 1"
3276
+ ),
3277
+ channel: zod3.string().nullish().describe(
3278
+ "The channel of this utterance. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
3279
+ ),
3280
+ speaker: zod3.string().nullable().describe(
3281
+ "The speaker of the sentence if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/speaker-diarization) is enabled, else null"
3282
+ )
3283
+ }).describe("The result of the Sentiment Analysis model")
3284
+ ).nullish().describe(
3285
+ "An array of results for the Sentiment Analysis model, if it is enabled.\nSee [Sentiment Analysis](https://www.assemblyai.com/docs/speech-understanding/sentiment-analysis) for more information.\n"
3286
+ ),
3287
+ speaker_labels: zod3.boolean().nullish().describe(
3288
+ "Whether [Speaker diarization](https://www.assemblyai.com/docs/pre-recorded-audio/speaker-diarization) is enabled, can be true or false"
3289
+ ),
3290
+ speakers_expected: zod3.number().nullish().describe(
3291
+ "Tell the speaker label model how many speakers it should attempt to identify. See [Set number of speakers expected](https://www.assemblyai.com/docs/pre-recorded-audio/speaker-diarization#set-number-of-speakers-expected) for more details."
3292
+ ),
3293
+ speech_model_used: zod3.string().optional().describe(
3294
+ "The speech model to use for the transcription. See [Model Selection](https://www.assemblyai.com/docs/pre-recorded-audio/select-the-speech-model) for available models."
3295
+ ),
3296
+ speech_models: zod3.array(
3297
+ zod3.string().describe(
3298
+ "The speech model to use for the transcription. See [Model Selection](https://www.assemblyai.com/docs/pre-recorded-audio/select-the-speech-model) for available models."
3299
+ )
3300
+ ).nullish().describe(
3301
+ "List multiple speech models in priority order, allowing our system to automatically route your audio to the best available option. See [Model Selection](https://www.assemblyai.com/docs/pre-recorded-audio/select-the-speech-model) for available models and routing behavior.\n"
3302
+ ),
3303
+ speech_threshold: zod3.number().nullish().describe(
3304
+ "Defaults to null. Reject audio files that contain less than this fraction of speech.\nValid values are in the range [0, 1] inclusive. See [Speech Threshold](https://www.assemblyai.com/docs/speech-threshold) for more details.\n"
3305
+ ),
3306
+ speech_understanding: zod3.object({
3307
+ request: zod3.object({
3308
+ translation: zod3.object({
3309
+ target_languages: zod3.array(zod3.string()).describe(
3310
+ 'List of target language codes (e.g., `["es", "de"]`). See [Translation](https://www.assemblyai.com/docs/speech-understanding/translation) for supported languages.'
3311
+ ),
3312
+ formal: zod3.boolean().default(getTranscriptResponseSpeechUnderstandingRequestTranslationFormalDefault).describe(
3313
+ "Use formal language style. See [Translation](https://www.assemblyai.com/docs/speech-understanding/translation) for more details."
3314
+ ),
3315
+ match_original_utterance: zod3.boolean().optional().describe(
3316
+ "When enabled with Speaker Labels, returns translated text in the utterances array. Each utterance will include a `translated_texts` key containing translations for each target language."
3317
+ )
3318
+ })
3319
+ }).describe(
3320
+ "Request body for [Translation](https://www.assemblyai.com/docs/speech-understanding/translation)."
3321
+ ).or(
3322
+ zod3.object({
3323
+ speaker_identification: zod3.object({
3324
+ speaker_type: zod3.enum(["role", "name"]).describe(
3325
+ "Type of speaker identification. See [Speaker Identification](https://www.assemblyai.com/docs/speech-understanding/speaker-identification) for details on each type."
3326
+ ),
3327
+ known_values: zod3.array(zod3.string()).optional().describe(
3328
+ 'Required if speaker_type is "role". Each value must be 35 characters or less.'
3329
+ ),
3330
+ speakers: zod3.array(
3331
+ zod3.object({
3332
+ role: zod3.string().optional().describe(
3333
+ 'The role of the speaker. Required when `speaker_type` is "role".'
3334
+ ),
3335
+ name: zod3.string().optional().describe(
3336
+ 'The name of the speaker. Required when `speaker_type` is "name".'
3337
+ ),
3338
+ description: zod3.string().optional().describe(
3339
+ "A description of the speaker to help the model identify them based on conversational context."
3340
+ )
3341
+ })
3342
+ ).optional().describe(
3343
+ "An array of speaker objects with metadata to improve identification accuracy. Each object should include a `role` or `name` (depending on `speaker_type`) and an optional `description` to help the model identify the speaker. You can also include any additional custom properties (e.g., `company`, `title`) to provide more context. Use this as an alternative to `known_values` when you want to provide additional context about each speaker."
3344
+ )
3345
+ })
3346
+ }).describe(
3347
+ "Request body for [Speaker Identification](https://www.assemblyai.com/docs/speech-understanding/speaker-identification)."
3348
+ )
3349
+ ).or(
3350
+ zod3.object({
3351
+ custom_formatting: zod3.object({
3352
+ date: zod3.string().optional().describe(
3353
+ 'Date format pattern (e.g., `"mm/dd/yyyy"`). See [Custom Formatting](https://www.assemblyai.com/docs/speech-understanding/custom-formatting) for more details.'
3354
+ ),
3355
+ phone_number: zod3.string().optional().describe(
3356
+ 'Phone number format pattern (e.g., `"(xxx)xxx-xxxx"`). See [Custom Formatting](https://www.assemblyai.com/docs/speech-understanding/custom-formatting) for more details.'
3357
+ ),
3358
+ email: zod3.string().optional().describe(
3359
+ 'Email format pattern (e.g., `"username@domain.com"`). See [Custom Formatting](https://www.assemblyai.com/docs/speech-understanding/custom-formatting) for more details.'
3360
+ )
3361
+ })
3362
+ }).describe(
3363
+ "Request body for [Custom Formatting](https://www.assemblyai.com/docs/speech-understanding/custom-formatting)."
3364
+ )
3365
+ ).optional(),
3366
+ response: zod3.object({
3367
+ translation: zod3.object({
3368
+ status: zod3.string().optional()
3369
+ }).optional()
3370
+ }).or(
3371
+ zod3.object({
3372
+ speaker_identification: zod3.object({
3373
+ mapping: zod3.record(zod3.string(), zod3.string()).optional().describe(
3374
+ 'A mapping of the original generic speaker labels (e.g., "A", "B") to the identified speaker names or roles.'
3375
+ ),
3376
+ status: zod3.string().optional()
3377
+ }).optional()
3378
+ })
3379
+ ).or(
3380
+ zod3.object({
3381
+ custom_formatting: zod3.object({
3382
+ mapping: zod3.record(zod3.string(), zod3.string()).optional(),
3383
+ formatted_text: zod3.string().optional()
3384
+ }).optional()
3385
+ })
3386
+ ).optional()
3387
+ }).optional().describe(
3388
+ "Speech understanding tasks like [Translation](https://www.assemblyai.com/docs/speech-understanding/translation), [Speaker Identification](https://www.assemblyai.com/docs/speech-understanding/speaker-identification), and [Custom Formatting](https://www.assemblyai.com/docs/speech-understanding/custom-formatting). See the task-specific docs for available options and configuration.\n"
3389
+ ),
3390
+ status: zod3.enum(["queued", "processing", "completed", "error"]).describe(
3391
+ "The status of your transcript. Possible values are queued, processing, completed, or error."
3392
+ ),
3393
+ summarization: zod3.boolean().describe(
3394
+ "Whether [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarization) is enabled, either true or false. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarization) for details.\n\nNote: This parameter is only supported for the Universal-2 model.\n"
3395
+ ),
3396
+ summary: zod3.string().nullish().describe(
3397
+ "The generated summary of the media file, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarization) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarization) for details."
3398
+ ),
3399
+ summary_model: zod3.string().nullish().describe(
3400
+ "The Summarization model used to generate the summary,\nif [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarization#summary-models) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarization) for details.\n"
3401
+ ),
3402
+ summary_type: zod3.string().nullish().describe(
3403
+ "The type of summary generated, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarization#summary-types) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarization) for details."
3404
+ ),
3405
+ remove_audio_tags: zod3.enum(["all"]).describe(
3406
+ "Whether [audio event tags](https://www.assemblyai.com/docs/pre-recorded-audio/universal-3-pro#audio-event-tags) were removed from the transcript text.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n"
3407
+ ).or(zod3.null()).optional().describe(
3408
+ "Whether [audio event tags](https://www.assemblyai.com/docs/pre-recorded-audio/universal-3-pro#audio-event-tags) were removed from the transcript text.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n"
3409
+ ),
3410
+ temperature: zod3.number().nullish().describe(
3411
+ "The temperature that was used for the model's response. See the [Prompting Guide](https://www.assemblyai.com/docs/pre-recorded-audio/prompting) for more details.\n\nNote: This parameter can only be used with the Universal-3 Pro model.\n"
3412
+ ),
3413
+ text: zod3.string().nullish().describe("The textual transcript of your media file"),
3414
+ throttled: zod3.boolean().nullish().describe(
3415
+ "True while a request is throttled and false when a request is no longer throttled"
3416
+ ),
3417
+ utterances: zod3.array(
3418
+ zod3.object({
3419
+ confidence: zod3.number().describe("The confidence score for the transcript of this utterance"),
3420
+ start: zod3.number().describe("The starting time, in milliseconds, of the utterance in the audio file"),
3421
+ end: zod3.number().describe("The ending time, in milliseconds, of the utterance in the audio file"),
3422
+ text: zod3.string().describe("The text for this utterance"),
3423
+ words: zod3.array(
3424
+ zod3.object({
3425
+ confidence: zod3.number().describe("The confidence score for the transcript of this word"),
3426
+ start: zod3.number().describe("The starting time, in milliseconds, for the word"),
3427
+ end: zod3.number().describe("The ending time, in milliseconds, for the word"),
3428
+ text: zod3.string().describe("The text of the word"),
3429
+ channel: zod3.string().nullish().describe(
3430
+ "The channel of the word. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
3431
+ ),
3432
+ speaker: zod3.string().nullable().describe(
3433
+ "The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/speaker-diarization) is enabled, else null"
3434
+ )
3435
+ })
3436
+ ).describe("The words in the utterance."),
3437
+ channel: zod3.string().nullish().describe(
3438
+ "The channel of this utterance. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
3439
+ ),
3440
+ speaker: zod3.string().describe(
3441
+ 'The speaker of this utterance, where each speaker is assigned a sequential capital letter - e.g. "A" for Speaker A, "B" for Speaker B, etc.'
3442
+ ),
3443
+ translated_texts: zod3.record(zod3.string(), zod3.string()).optional().describe(
3444
+ 'Translations keyed by language code (e.g., `{"es": "Texto traducido", "de": "\xDCbersetzter Text"}`). Only present when `match_original_utterance` is enabled with translation.'
3445
+ )
3446
+ })
3447
+ ).nullish().describe(
3448
+ "When multichannel or speaker_labels is enabled, a list of turn-by-turn utterance objects.\nSee [Speaker diarization](https://www.assemblyai.com/docs/pre-recorded-audio/speaker-diarization) and [Multichannel transcription](https://www.assemblyai.com/docs/pre-recorded-audio/multichannel) for more information.\n"
3449
+ ),
3450
+ webhook_auth: zod3.boolean().describe(
3451
+ "Whether [webhook](https://www.assemblyai.com/docs/deployment/webhooks-for-pre-recorded-audio) authentication details were provided"
3452
+ ),
3453
+ webhook_auth_header_name: zod3.string().nullish().describe(
3454
+ "The header name to be sent with the transcript completed or failed [webhook](https://www.assemblyai.com/docs/deployment/webhooks-for-pre-recorded-audio) requests"
3455
+ ),
3456
+ webhook_status_code: zod3.number().nullish().describe(
3457
+ "The status code we received from your server when delivering the transcript completed or failed [webhook](https://www.assemblyai.com/docs/deployment/webhooks-for-pre-recorded-audio) request, if a webhook URL was provided"
3458
+ ),
3459
+ webhook_url: zod3.string().nullish().describe(
3460
+ "The URL to which we send [webhook](https://www.assemblyai.com/docs/deployment/webhooks-for-pre-recorded-audio) requests.\n"
3461
+ ),
3462
+ words: zod3.array(
3463
+ zod3.object({
3464
+ confidence: zod3.number().describe("The confidence score for the transcript of this word"),
3465
+ start: zod3.number().describe("The starting time, in milliseconds, for the word"),
3466
+ end: zod3.number().describe("The ending time, in milliseconds, for the word"),
3467
+ text: zod3.string().describe("The text of the word"),
3468
+ channel: zod3.string().nullish().describe(
3469
+ "The channel of the word. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
3470
+ ),
3471
+ speaker: zod3.string().nullable().describe(
3472
+ "The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/speaker-diarization) is enabled, else null"
3473
+ )
3474
+ })
3475
+ ).nullish().describe(
3476
+ "An array of temporally-sequential word objects, one for each word in the transcript.\n"
3477
+ ),
3478
+ acoustic_model: zod3.string().describe("This parameter does not currently have any functionality attached to it."),
3479
+ custom_topics: zod3.boolean().nullish().describe("This parameter does not currently have any functionality attached to it."),
3480
+ language_model: zod3.string().describe("This parameter does not currently have any functionality attached to it."),
3481
+ speech_model: zod3.string().describe(
3482
+ "The speech model to use for the transcription. See [Model Selection](https://www.assemblyai.com/docs/pre-recorded-audio/select-the-speech-model) for available models."
3483
+ ).or(zod3.null()).describe(
3484
+ "This parameter has been replaced with the `speech_models` parameter, learn more about the `speech_models` parameter [here](https://www.assemblyai.com/docs/pre-recorded-audio/select-the-speech-model).\n"
3485
+ ),
3486
+ speed_boost: zod3.boolean().nullish().describe("This parameter does not currently have any functionality attached to it."),
3487
+ topics: zod3.array(zod3.string()).optional().describe("This parameter does not currently have any functionality attached to it."),
3488
+ translated_texts: zod3.object({
3489
+ language_code: zod3.string().optional().describe("Translated text for this language code")
3490
+ }).optional().describe(
3491
+ "Translated text keyed by language code. See [Translation](https://www.assemblyai.com/docs/speech-understanding/translation) for more details."
3492
+ )
3493
+ }).describe("A transcript object");
3494
+ var deleteTranscriptParams = zod3.object({
3495
+ transcript_id: zod3.string().describe("ID of the transcript")
3496
+ });
3497
+ var deleteTranscriptResponseLanguageDetectionOptionsFallbackLanguageDefault = "auto";
3498
+ var deleteTranscriptResponseLanguageDetectionOptionsCodeSwitchingConfidenceThresholdDefault = 0.3;
3499
+ var deleteTranscriptResponseSpeechUnderstandingRequestTranslationFormalDefault = true;
3500
+ var deleteTranscriptResponse = zod3.object({
3501
+ audio_channels: zod3.number().optional().describe(
3502
+ "The number of audio channels in the audio file. This is only present when [multichannel](https://www.assemblyai.com/docs/pre-recorded-audio/multichannel) is enabled."
3503
+ ),
3504
+ audio_duration: zod3.number().nullish().describe("The duration of this transcript object's media file, in seconds"),
3505
+ audio_end_at: zod3.number().nullish().describe(
3506
+ "The point in time, in milliseconds, in the file at which the transcription was terminated. See [Set the start and end of the transcript](https://www.assemblyai.com/docs/pre-recorded-audio/set-the-start-and-end-of-the-transcript) for more details."
3507
+ ),
3508
+ audio_start_from: zod3.number().nullish().describe(
3509
+ "The point in time, in milliseconds, in the file at which the transcription was started. See [Set the start and end of the transcript](https://www.assemblyai.com/docs/pre-recorded-audio/set-the-start-and-end-of-the-transcript) for more details."
3510
+ ),
3511
+ audio_url: zod3.string().describe("The URL of the media that was transcribed"),
3512
+ auto_chapters: zod3.boolean().nullish().describe(
3513
+ "Whether [Auto Chapters](https://www.assemblyai.com/docs/speech-understanding/auto-chapters) is enabled, can be true or false. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible chapter summaries. See the [updated Auto Chapters page](https://www.assemblyai.com/docs/speech-understanding/auto-chapters) for details.\n\nNote: This parameter is only supported for the Universal-2 model.\n"
3514
+ ),
3515
+ auto_highlights: zod3.boolean().describe(
3516
+ "Whether [Key Phrases](https://www.assemblyai.com/docs/speech-understanding/key-phrases) is enabled, either true or false"
3517
+ ),
3518
+ auto_highlights_result: zod3.object({
3519
+ status: zod3.enum(["success", "unavailable"]).describe("Either success, or unavailable in the rare case that the model failed"),
3520
+ results: zod3.array(
3521
+ zod3.object({
3522
+ count: zod3.number().describe("The total number of times the key phrase appears in the audio file"),
3523
+ rank: zod3.number().describe(
3524
+ "The total relevancy to the overall audio file of this key phrase - a greater number means more relevant"
3525
+ ),
3526
+ text: zod3.string().describe("The text itself of the key phrase"),
3527
+ timestamps: zod3.array(
3528
+ zod3.object({
3529
+ start: zod3.number().describe("The start time in milliseconds"),
3530
+ end: zod3.number().describe("The end time in milliseconds")
3531
+ }).describe("Timestamp containing a start and end property in milliseconds")
3532
+ ).describe("The timestamp of the of the key phrase")
3533
+ })
3534
+ ).describe("A temporally-sequential array of Key Phrases")
3535
+ }).describe(
3536
+ "An array of results for the Key Phrases model, if it is enabled.\nSee [Key phrases](https://www.assemblyai.com/docs/speech-understanding/key-phrases) for more information.\n"
3537
+ ).or(zod3.null()).optional().describe(
3538
+ "An array of results for the Key Phrases model, if it is enabled.\nSee [Key Phrases](https://www.assemblyai.com/docs/speech-understanding/key-phrases) for more information.\n"
3539
+ ),
3540
+ chapters: zod3.array(
3541
+ zod3.object({
3542
+ gist: zod3.string().describe(
3543
+ "An ultra-short summary (just a few words) of the content spoken in the chapter"
3544
+ ),
3545
+ headline: zod3.string().describe("A single sentence summary of the content spoken during the chapter"),
3546
+ summary: zod3.string().describe("A one paragraph summary of the content spoken during the chapter"),
3547
+ start: zod3.number().describe("The starting time, in milliseconds, for the chapter"),
3548
+ end: zod3.number().describe("The starting time, in milliseconds, for the chapter")
3549
+ }).describe("Chapter of the audio file")
3550
+ ).nullish().describe(
3551
+ "An array of temporally sequential chapters for the audio file. See [Auto Chapters](https://www.assemblyai.com/docs/speech-understanding/auto-chapters) for more information."
3552
+ ),
3553
+ confidence: zod3.number().nullish().describe(
3554
+ "The confidence score for the transcript, between 0.0 (low confidence) and 1.0 (high confidence)"
3555
+ ),
3556
+ content_safety: zod3.boolean().nullish().describe(
3557
+ "Whether [Content Moderation](https://www.assemblyai.com/docs/content-moderation) is enabled, can be true or false"
3558
+ ),
3559
+ content_safety_labels: zod3.object({
3560
+ status: zod3.enum(["success", "unavailable"]).describe("Either success, or unavailable in the rare case that the model failed"),
3561
+ results: zod3.array(
3562
+ zod3.object({
3563
+ text: zod3.string().describe("The transcript of the section flagged by the Content Moderation model"),
3564
+ labels: zod3.array(
3565
+ zod3.object({
3566
+ label: zod3.string().describe("The label of the sensitive topic"),
3567
+ confidence: zod3.number().describe("The confidence score for the topic being discussed, from 0 to 1"),
3568
+ severity: zod3.number().describe("How severely the topic is discussed in the section, from 0 to 1")
3569
+ })
3570
+ ).describe(
3571
+ "An array of safety labels, one per sensitive topic that was detected in the section"
3572
+ ),
3573
+ sentences_idx_start: zod3.number().describe("The sentence index at which the section begins"),
3574
+ sentences_idx_end: zod3.number().describe("The sentence index at which the section ends"),
3575
+ timestamp: zod3.object({
3576
+ start: zod3.number().describe("The start time in milliseconds"),
3577
+ end: zod3.number().describe("The end time in milliseconds")
3578
+ }).describe("Timestamp containing a start and end property in milliseconds")
3579
+ })
3580
+ ).describe("An array of results for the Content Moderation model"),
3581
+ summary: zod3.record(zod3.string(), zod3.number()).describe(
3582
+ "A summary of the Content Moderation confidence results for the entire audio file"
3583
+ ),
3584
+ severity_score_summary: zod3.record(
3585
+ zod3.string(),
3586
+ zod3.object({
3587
+ low: zod3.number(),
3588
+ medium: zod3.number(),
3589
+ high: zod3.number()
3590
+ })
3591
+ ).describe(
3592
+ "A summary of the Content Moderation severity results for the entire audio file"
3593
+ )
3594
+ }).describe(
3595
+ "An array of results for the Content Moderation model, if it is enabled.\nSee [Content moderation](https://www.assemblyai.com/docs/content-moderation) for more information.\n"
3596
+ ).or(zod3.null()).optional().describe(
3597
+ "An array of results for the Content Moderation model, if it is enabled.\nSee [Content moderation](https://www.assemblyai.com/docs/content-moderation) for more information.\n"
3598
+ ),
3599
+ custom_spelling: zod3.array(
3600
+ zod3.object({
3601
+ from: zod3.array(zod3.string()).describe("Words or phrases to replace"),
3602
+ to: zod3.string().describe("Word to replace with")
3603
+ }).describe(
3604
+ "Object containing words or phrases to replace, and the word or phrase to replace with"
3605
+ )
3606
+ ).nullish().describe(
3607
+ "Customize how words are spelled and formatted using to and from values. See [Custom Spelling](https://www.assemblyai.com/docs/pre-recorded-audio/custom-spelling) for more details."
3608
+ ),
3609
+ disfluencies: zod3.boolean().nullish().describe(
3610
+ 'Transcribe [Filler Words](https://www.assemblyai.com/docs/pre-recorded-audio/filler-words), like "umm", in your media file; can be true or false'
3611
+ ),
3612
+ domain: zod3.string().nullish().describe(
3613
+ 'The domain-specific model applied to the transcript. When set to `"medical-v1"`, [Medical Mode](https://www.assemblyai.com/docs/pre-recorded-audio/medical-mode) was used to improve accuracy for medical terminology.\n'
3614
+ ),
3615
+ entities: zod3.array(
3616
+ zod3.object({
3617
+ entity_type: zod3.enum([
3618
+ "account_number",
3619
+ "banking_information",
3620
+ "blood_type",
3621
+ "credit_card_cvv",
3622
+ "credit_card_expiration",
3623
+ "credit_card_number",
3624
+ "date",
3625
+ "date_interval",
3626
+ "date_of_birth",
3627
+ "drivers_license",
3628
+ "drug",
3629
+ "duration",
3630
+ "email_address",
3631
+ "event",
3632
+ "filename",
3633
+ "gender_sexuality",
3634
+ "healthcare_number",
3635
+ "injury",
3636
+ "ip_address",
3637
+ "language",
3638
+ "location",
3639
+ "marital_status",
3640
+ "medical_condition",
3641
+ "medical_process",
3642
+ "money_amount",
3643
+ "nationality",
3644
+ "number_sequence",
3645
+ "occupation",
3646
+ "organization",
3647
+ "passport_number",
3648
+ "password",
3649
+ "person_age",
3650
+ "person_name",
3651
+ "phone_number",
3652
+ "physical_attribute",
3653
+ "political_affiliation",
3654
+ "religion",
3655
+ "statistics",
3656
+ "time",
3657
+ "url",
3658
+ "us_social_security_number",
3659
+ "username",
3660
+ "vehicle_id",
3661
+ "zodiac_sign"
3662
+ ]).describe("The type of entity for the detected entity"),
3663
+ text: zod3.string().describe("The text for the detected entity"),
3664
+ start: zod3.number().describe(
3665
+ "The starting time, in milliseconds, at which the detected entity appears in the audio file"
3666
+ ),
3667
+ end: zod3.number().describe(
3668
+ "The ending time, in milliseconds, for the detected entity in the audio file"
3669
+ )
3670
+ }).describe("A detected entity")
3671
+ ).nullish().describe(
3672
+ "An array of results for the Entity Detection model, if it is enabled.\nSee [Entity detection](https://www.assemblyai.com/docs/speech-understanding/entity-detection) for more information.\n"
3673
+ ),
3674
+ entity_detection: zod3.boolean().nullish().describe(
3675
+ "Whether [Entity Detection](https://www.assemblyai.com/docs/speech-understanding/entity-detection) is enabled, can be true or false"
3676
+ ),
3677
+ error: zod3.string().optional().describe("Error message of why the transcript failed"),
3678
+ filter_profanity: zod3.boolean().nullish().describe(
3679
+ "Whether [Profanity Filtering](https://www.assemblyai.com/docs/profanity-filtering) is enabled, either true or false"
3680
+ ),
3681
+ format_text: zod3.boolean().nullish().describe(
3682
+ "Whether [Text Formatting](https://www.assemblyai.com/docs/pre-recorded-audio) is enabled, either true or false"
3683
+ ),
3684
+ iab_categories: zod3.boolean().nullish().describe(
3685
+ "Whether [Topic Detection](https://www.assemblyai.com/docs/speech-understanding/topic-detection) is enabled, can be true or false"
3686
+ ),
3687
+ iab_categories_result: zod3.object({
3688
+ status: zod3.enum(["success", "unavailable"]).describe("Either success, or unavailable in the rare case that the model failed"),
3689
+ results: zod3.array(
3690
+ zod3.object({
3691
+ text: zod3.string().describe("The text in the transcript in which a detected topic occurs"),
3692
+ labels: zod3.array(
3693
+ zod3.object({
3694
+ relevance: zod3.number().describe("How relevant the detected topic is of a detected topic"),
3695
+ label: zod3.string().describe(
3696
+ "The IAB taxonomical label for the label of the detected topic, where > denotes supertopic/subtopic relationship"
3697
+ )
3698
+ })
3699
+ ).optional().describe("An array of detected topics in the text"),
3700
+ timestamp: zod3.object({
3701
+ start: zod3.number().describe("The start time in milliseconds"),
3702
+ end: zod3.number().describe("The end time in milliseconds")
3703
+ }).optional().describe("Timestamp containing a start and end property in milliseconds")
3704
+ }).describe("The result of the topic detection model")
3705
+ ).describe("An array of results for the Topic Detection model"),
3706
+ summary: zod3.record(zod3.string(), zod3.number()).describe("The overall relevance of topic to the entire audio file")
3707
+ }).describe(
3708
+ "The result of the Topic Detection model, if it is enabled.\nSee [Topic Detection](https://www.assemblyai.com/docs/speech-understanding/topic-detection) for more information.\n"
3709
+ ).or(zod3.null()).optional().describe(
3710
+ "The result of the Topic Detection model, if it is enabled.\nSee [Topic Detection](https://www.assemblyai.com/docs/speech-understanding/topic-detection) for more information.\n"
3711
+ ),
3712
+ id: zod3.string().uuid().describe("The unique identifier of your transcript"),
3713
+ keyterms_prompt: zod3.array(zod3.string()).optional().describe(
3714
+ "Improve accuracy with up to 200 (for Universal-2) or 1000 (for Universal-3 Pro) domain-specific words or phrases (maximum 6 words per phrase). See [Keyterms Prompting](https://www.assemblyai.com/docs/pre-recorded-audio/keyterms-prompting) for more details.\n"
3715
+ ),
3716
+ language_code: zod3.enum([
3717
+ "en",
3718
+ "en_au",
3719
+ "en_uk",
3720
+ "en_us",
3721
+ "es",
3722
+ "fr",
3723
+ "de",
3724
+ "it",
3725
+ "pt",
3726
+ "nl",
3727
+ "af",
3728
+ "sq",
3729
+ "am",
3730
+ "ar",
3731
+ "hy",
3732
+ "as",
3733
+ "az",
3734
+ "ba",
3735
+ "eu",
3736
+ "be",
3737
+ "bn",
3738
+ "bs",
3739
+ "br",
3740
+ "bg",
3741
+ "my",
3742
+ "ca",
3743
+ "zh",
3744
+ "hr",
3745
+ "cs",
3746
+ "da",
3747
+ "et",
3748
+ "fo",
3749
+ "fi",
3750
+ "gl",
3751
+ "ka",
3752
+ "el",
3753
+ "gu",
3754
+ "ht",
3755
+ "ha",
3756
+ "haw",
3757
+ "he",
3758
+ "hi",
3759
+ "hu",
3760
+ "is",
3761
+ "id",
3762
+ "ja",
3763
+ "jw",
3764
+ "kn",
3765
+ "kk",
3766
+ "km",
3767
+ "ko",
3768
+ "lo",
3769
+ "la",
3770
+ "lv",
3771
+ "ln",
3772
+ "lt",
3773
+ "lb",
3774
+ "mk",
3775
+ "mg",
3776
+ "ms",
3777
+ "ml",
3778
+ "mt",
3779
+ "mi",
3780
+ "mr",
3781
+ "mn",
3782
+ "ne",
3783
+ "no",
3784
+ "nn",
3785
+ "oc",
3786
+ "pa",
3787
+ "ps",
3788
+ "fa",
3789
+ "pl",
3790
+ "ro",
3791
+ "ru",
3792
+ "sa",
3793
+ "sr",
3794
+ "sn",
3795
+ "sd",
3796
+ "si",
3797
+ "sk",
3798
+ "sl",
3799
+ "so",
3800
+ "su",
3801
+ "sw",
3802
+ "sv",
3803
+ "tl",
3804
+ "tg",
3805
+ "ta",
3806
+ "tt",
3807
+ "te",
3808
+ "th",
3809
+ "bo",
3810
+ "tr",
3811
+ "tk",
3812
+ "uk",
3813
+ "ur",
3814
+ "uz",
3815
+ "vi",
3816
+ "cy",
3817
+ "yi",
3818
+ "yo"
3819
+ ]).optional().describe(
3820
+ "The language of your audio file. Possible values are found in [Supported Languages](https://www.assemblyai.com/docs/pre-recorded-audio/supported-languages).\nThe default value is 'en_us'.\n"
3821
+ ),
3822
+ language_codes: zod3.array(
3823
+ zod3.enum([
3824
+ "en",
3825
+ "en_au",
3826
+ "en_uk",
3827
+ "en_us",
3828
+ "es",
3829
+ "fr",
3830
+ "de",
3831
+ "it",
3832
+ "pt",
3833
+ "nl",
3834
+ "af",
3835
+ "sq",
3836
+ "am",
3837
+ "ar",
3838
+ "hy",
3839
+ "as",
3840
+ "az",
3841
+ "ba",
3842
+ "eu",
3843
+ "be",
3844
+ "bn",
3845
+ "bs",
3846
+ "br",
3847
+ "bg",
3848
+ "my",
3849
+ "ca",
3850
+ "zh",
3851
+ "hr",
3852
+ "cs",
3853
+ "da",
3854
+ "et",
3855
+ "fo",
3856
+ "fi",
3857
+ "gl",
3858
+ "ka",
3859
+ "el",
3860
+ "gu",
3861
+ "ht",
3862
+ "ha",
3863
+ "haw",
3864
+ "he",
3865
+ "hi",
3866
+ "hu",
3867
+ "is",
3868
+ "id",
3869
+ "ja",
3870
+ "jw",
3871
+ "kn",
3872
+ "kk",
3873
+ "km",
3874
+ "ko",
3875
+ "lo",
3876
+ "la",
3877
+ "lv",
3878
+ "ln",
3879
+ "lt",
3880
+ "lb",
3881
+ "mk",
3882
+ "mg",
3883
+ "ms",
3884
+ "ml",
3885
+ "mt",
3886
+ "mi",
3887
+ "mr",
3888
+ "mn",
3889
+ "ne",
3890
+ "no",
3891
+ "nn",
3892
+ "oc",
3893
+ "pa",
3894
+ "ps",
3895
+ "fa",
3896
+ "pl",
3897
+ "ro",
3898
+ "ru",
3899
+ "sa",
3900
+ "sr",
3901
+ "sn",
3902
+ "sd",
3903
+ "si",
3904
+ "sk",
3905
+ "sl",
3906
+ "so",
3907
+ "su",
3908
+ "sw",
3909
+ "sv",
3910
+ "tl",
3911
+ "tg",
3912
+ "ta",
3913
+ "tt",
3914
+ "te",
3915
+ "th",
3916
+ "bo",
3917
+ "tr",
3918
+ "tk",
3919
+ "uk",
3920
+ "ur",
3921
+ "uz",
3922
+ "vi",
3923
+ "cy",
3924
+ "yi",
3925
+ "yo"
3926
+ ]).describe(
3927
+ "The language of your audio file. Possible values are found in [Supported Languages](https://www.assemblyai.com/docs/pre-recorded-audio/supported-languages).\nThe default value is 'en_us'.\n"
3928
+ )
3929
+ ).nullish().describe(
3930
+ "The language codes of your audio file. Used for [Code switching](/docs/speech-to-text/pre-recorded-audio/code-switching)\nOne of the values specified must be `en`.\n"
3931
+ ),
3932
+ language_confidence: zod3.number().nullable().describe(
3933
+ "The confidence score for the detected language, between 0.0 (low confidence) and 1.0 (high confidence). See [Automatic Language Detection](https://www.assemblyai.com/docs/pre-recorded-audio/language-detection) for more details."
3934
+ ),
3935
+ language_confidence_threshold: zod3.number().nullable().describe(
3936
+ "The confidence threshold for the automatically detected language.\nAn error will be returned if the language confidence is below this threshold.\nSee [Automatic Language Detection](https://www.assemblyai.com/docs/pre-recorded-audio/language-detection) for more details.\n"
3937
+ ),
3938
+ language_detection: zod3.boolean().nullish().describe(
3939
+ "Whether [Automatic language detection](/docs/pre-recorded-audio/automatic-language-detection) is enabled, either true or false"
3940
+ ),
3941
+ language_detection_options: zod3.object({
3942
+ expected_languages: zod3.array(zod3.string()).optional().describe(
3943
+ 'List of languages expected in the audio file. Defaults to `["all"]` when unspecified. See [Automatic Language Detection](https://www.assemblyai.com/docs/pre-recorded-audio/language-detection) for more details.'
3944
+ ),
3945
+ fallback_language: zod3.string().default(deleteTranscriptResponseLanguageDetectionOptionsFallbackLanguageDefault).describe(
3946
+ 'If the detected language of the audio file is not in the list of expected languages, the `fallback_language` is used. Specify `["auto"]` to let our model choose the fallback language from `expected_languages` with the highest confidence score. See [Automatic Language Detection](https://www.assemblyai.com/docs/pre-recorded-audio/language-detection) for more details.\n'
3947
+ ),
3948
+ code_switching: zod3.boolean().optional().describe(
3949
+ "Whether [code switching](/docs/speech-to-text/pre-recorded-audio/code-switching) should be detected.\n"
3950
+ ),
3951
+ code_switching_confidence_threshold: zod3.number().default(
3952
+ deleteTranscriptResponseLanguageDetectionOptionsCodeSwitchingConfidenceThresholdDefault
3953
+ ).describe(
3954
+ "The confidence threshold for [code switching](/docs/speech-to-text/pre-recorded-audio/code-switching) detection. If the code switching confidence is below this threshold, the transcript will be processed in the language with the highest `language_detection_confidence` score.\n"
3955
+ )
3956
+ }).optional().describe(
3957
+ "Specify options for [Automatic Language Detection](https://www.assemblyai.com/docs/pre-recorded-audio/language-detection)."
3958
+ ),
3959
+ multichannel: zod3.boolean().nullish().describe(
3960
+ "Whether [Multichannel transcription](https://www.assemblyai.com/docs/pre-recorded-audio/multichannel) was enabled in the transcription request, either true or false"
3961
+ ),
3962
+ prompt: zod3.string().optional().describe(
3963
+ "Provide natural language prompting of up to 1,500 words of contextual information to the model. See the [Prompting Guide](https://www.assemblyai.com/docs/pre-recorded-audio/prompting) for best practices.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n"
3964
+ ),
3965
+ punctuate: zod3.boolean().nullish().describe(
3966
+ "Whether [Automatic Punctuation](https://www.assemblyai.com/docs/pre-recorded-audio) is enabled, either true or false"
3967
+ ),
3968
+ redact_pii: zod3.boolean().describe(
3969
+ "Whether [PII Redaction](https://www.assemblyai.com/docs/pii-redaction) is enabled, either true or false"
3970
+ ),
3971
+ redact_pii_audio: zod3.boolean().nullish().describe(
3972
+ "Whether a redacted version of the audio file was generated,\neither true or false. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction#request-for-redacted-audio) for more information.\n"
3973
+ ),
3974
+ redact_pii_audio_options: zod3.object({
3975
+ return_redacted_no_speech_audio: zod3.boolean().optional().describe(
3976
+ "By default, audio redaction provides redacted audio URLs only when speech is detected. However, if your use-case specifically requires redacted audio files even for silent audio files without any dialogue, you can opt to receive these URLs by setting this parameter to `true`."
3977
+ ),
3978
+ override_audio_redaction_method: zod3.enum(["silence"]).optional().describe(
3979
+ "Specify the method used to redact audio. By default, redacted audio uses a beep sound. Set to `silence` to replace PII with silence instead of a beep."
3980
+ )
3981
+ }).optional().describe(
3982
+ "The options for PII-redacted audio, if redact_pii_audio is enabled.\nSee [PII redaction](https://www.assemblyai.com/docs/pii-redaction#request-for-redacted-audio) for more information.\n"
3983
+ ),
3984
+ redact_pii_audio_quality: zod3.enum(["mp3", "wav"]).describe(
3985
+ "Controls the filetype of the audio created by redact_pii_audio. Currently supports mp3 (default) and wav. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction#request-for-redacted-audio) for more details."
3986
+ ).or(zod3.null()).optional().describe(
3987
+ "The audio quality of the PII-redacted audio file, if redact_pii_audio is enabled.\nSee [PII redaction](https://www.assemblyai.com/docs/pii-redaction#request-for-redacted-audio) for more information.\n"
3988
+ ),
3989
+ redact_pii_policies: zod3.array(
3990
+ zod3.enum([
3991
+ "account_number",
3992
+ "banking_information",
3993
+ "blood_type",
3994
+ "credit_card_cvv",
3995
+ "credit_card_expiration",
3996
+ "credit_card_number",
3997
+ "date",
3998
+ "date_interval",
3999
+ "date_of_birth",
4000
+ "drivers_license",
4001
+ "drug",
4002
+ "duration",
4003
+ "email_address",
4004
+ "event",
4005
+ "filename",
4006
+ "gender_sexuality",
4007
+ "healthcare_number",
4008
+ "injury",
4009
+ "ip_address",
4010
+ "language",
4011
+ "location",
4012
+ "marital_status",
4013
+ "medical_condition",
4014
+ "medical_process",
4015
+ "money_amount",
4016
+ "nationality",
4017
+ "number_sequence",
4018
+ "occupation",
4019
+ "organization",
4020
+ "passport_number",
4021
+ "password",
4022
+ "person_age",
4023
+ "person_name",
4024
+ "phone_number",
4025
+ "physical_attribute",
4026
+ "political_affiliation",
4027
+ "religion",
4028
+ "statistics",
4029
+ "time",
4030
+ "url",
4031
+ "us_social_security_number",
4032
+ "username",
4033
+ "vehicle_id",
4034
+ "zodiac_sign"
4035
+ ]).describe("The type of PII to redact")
4036
+ ).nullish().describe(
4037
+ "The list of PII Redaction policies that were enabled, if PII Redaction is enabled.\nSee [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more information.\n"
3295
4038
  ),
3296
- summary: zod3.string().nullish().describe(
3297
- "The generated summary of the media file, if [Summarization](https://www.assemblyai.com/docs/models/summarization) is enabled"
4039
+ redact_pii_sub: zod3.enum(["entity_name", "hash"]).optional().describe(
4040
+ "The replacement logic for detected PII, can be `entity_name` or `hash`. See [PII redaction](https://www.assemblyai.com/docs/pii-redaction) for more details."
3298
4041
  ),
3299
- custom_topics: zod3.boolean().nullish().describe("Whether custom topics is enabled, either true or false"),
3300
- topics: zod3.array(zod3.string()).optional().describe("The list of custom topics provided if custom topics is enabled"),
3301
4042
  sentiment_analysis: zod3.boolean().nullish().describe(
3302
- "Whether [Sentiment Analysis](https://www.assemblyai.com/docs/models/sentiment-analysis) is enabled, can be true or false"
4043
+ "Whether [Sentiment Analysis](https://www.assemblyai.com/docs/speech-understanding/sentiment-analysis) is enabled, can be true or false"
3303
4044
  ),
3304
4045
  sentiment_analysis_results: zod3.array(
3305
4046
  zod3.object({
@@ -3307,120 +4048,242 @@ var deleteTranscriptResponse = zod3.object({
3307
4048
  start: zod3.number().describe("The starting time, in milliseconds, of the sentence"),
3308
4049
  end: zod3.number().describe("The ending time, in milliseconds, of the sentence"),
3309
4050
  sentiment: zod3.enum(["POSITIVE", "NEUTRAL", "NEGATIVE"]),
3310
- confidence: zod3.number().min(deleteTranscriptResponseSentimentAnalysisResultsItemConfidenceMin).max(deleteTranscriptResponseSentimentAnalysisResultsItemConfidenceMax).describe(
4051
+ confidence: zod3.number().describe(
3311
4052
  "The confidence score for the detected sentiment of the sentence, from 0 to 1"
3312
4053
  ),
3313
4054
  channel: zod3.string().nullish().describe(
3314
4055
  "The channel of this utterance. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
3315
4056
  ),
3316
4057
  speaker: zod3.string().nullable().describe(
3317
- "The speaker of the sentence if [Speaker Diarization](https://www.assemblyai.com/docs/models/speaker-diarization) is enabled, else null"
4058
+ "The speaker of the sentence if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/speaker-diarization) is enabled, else null"
3318
4059
  )
3319
4060
  }).describe("The result of the Sentiment Analysis model")
3320
4061
  ).nullish().describe(
3321
- "An array of results for the Sentiment Analysis model, if it is enabled.\nSee [Sentiment Analysis](https://www.assemblyai.com/docs/models/sentiment-analysis) for more information.\n"
4062
+ "An array of results for the Sentiment Analysis model, if it is enabled.\nSee [Sentiment Analysis](https://www.assemblyai.com/docs/speech-understanding/sentiment-analysis) for more information.\n"
3322
4063
  ),
3323
- entity_detection: zod3.boolean().nullish().describe(
3324
- "Whether [Entity Detection](https://www.assemblyai.com/docs/models/entity-detection) is enabled, can be true or false"
4064
+ speaker_labels: zod3.boolean().nullish().describe(
4065
+ "Whether [Speaker diarization](https://www.assemblyai.com/docs/pre-recorded-audio/speaker-diarization) is enabled, can be true or false"
3325
4066
  ),
3326
- entities: zod3.array(
4067
+ speakers_expected: zod3.number().nullish().describe(
4068
+ "Tell the speaker label model how many speakers it should attempt to identify. See [Set number of speakers expected](https://www.assemblyai.com/docs/pre-recorded-audio/speaker-diarization#set-number-of-speakers-expected) for more details."
4069
+ ),
4070
+ speech_model_used: zod3.string().optional().describe(
4071
+ "The speech model to use for the transcription. See [Model Selection](https://www.assemblyai.com/docs/pre-recorded-audio/select-the-speech-model) for available models."
4072
+ ),
4073
+ speech_models: zod3.array(
4074
+ zod3.string().describe(
4075
+ "The speech model to use for the transcription. See [Model Selection](https://www.assemblyai.com/docs/pre-recorded-audio/select-the-speech-model) for available models."
4076
+ )
4077
+ ).nullish().describe(
4078
+ "List multiple speech models in priority order, allowing our system to automatically route your audio to the best available option. See [Model Selection](https://www.assemblyai.com/docs/pre-recorded-audio/select-the-speech-model) for available models and routing behavior.\n"
4079
+ ),
4080
+ speech_threshold: zod3.number().nullish().describe(
4081
+ "Defaults to null. Reject audio files that contain less than this fraction of speech.\nValid values are in the range [0, 1] inclusive. See [Speech Threshold](https://www.assemblyai.com/docs/speech-threshold) for more details.\n"
4082
+ ),
4083
+ speech_understanding: zod3.object({
4084
+ request: zod3.object({
4085
+ translation: zod3.object({
4086
+ target_languages: zod3.array(zod3.string()).describe(
4087
+ 'List of target language codes (e.g., `["es", "de"]`). See [Translation](https://www.assemblyai.com/docs/speech-understanding/translation) for supported languages.'
4088
+ ),
4089
+ formal: zod3.boolean().default(deleteTranscriptResponseSpeechUnderstandingRequestTranslationFormalDefault).describe(
4090
+ "Use formal language style. See [Translation](https://www.assemblyai.com/docs/speech-understanding/translation) for more details."
4091
+ ),
4092
+ match_original_utterance: zod3.boolean().optional().describe(
4093
+ "When enabled with Speaker Labels, returns translated text in the utterances array. Each utterance will include a `translated_texts` key containing translations for each target language."
4094
+ )
4095
+ })
4096
+ }).describe(
4097
+ "Request body for [Translation](https://www.assemblyai.com/docs/speech-understanding/translation)."
4098
+ ).or(
4099
+ zod3.object({
4100
+ speaker_identification: zod3.object({
4101
+ speaker_type: zod3.enum(["role", "name"]).describe(
4102
+ "Type of speaker identification. See [Speaker Identification](https://www.assemblyai.com/docs/speech-understanding/speaker-identification) for details on each type."
4103
+ ),
4104
+ known_values: zod3.array(zod3.string()).optional().describe(
4105
+ 'Required if speaker_type is "role". Each value must be 35 characters or less.'
4106
+ ),
4107
+ speakers: zod3.array(
4108
+ zod3.object({
4109
+ role: zod3.string().optional().describe(
4110
+ 'The role of the speaker. Required when `speaker_type` is "role".'
4111
+ ),
4112
+ name: zod3.string().optional().describe(
4113
+ 'The name of the speaker. Required when `speaker_type` is "name".'
4114
+ ),
4115
+ description: zod3.string().optional().describe(
4116
+ "A description of the speaker to help the model identify them based on conversational context."
4117
+ )
4118
+ })
4119
+ ).optional().describe(
4120
+ "An array of speaker objects with metadata to improve identification accuracy. Each object should include a `role` or `name` (depending on `speaker_type`) and an optional `description` to help the model identify the speaker. You can also include any additional custom properties (e.g., `company`, `title`) to provide more context. Use this as an alternative to `known_values` when you want to provide additional context about each speaker."
4121
+ )
4122
+ })
4123
+ }).describe(
4124
+ "Request body for [Speaker Identification](https://www.assemblyai.com/docs/speech-understanding/speaker-identification)."
4125
+ )
4126
+ ).or(
4127
+ zod3.object({
4128
+ custom_formatting: zod3.object({
4129
+ date: zod3.string().optional().describe(
4130
+ 'Date format pattern (e.g., `"mm/dd/yyyy"`). See [Custom Formatting](https://www.assemblyai.com/docs/speech-understanding/custom-formatting) for more details.'
4131
+ ),
4132
+ phone_number: zod3.string().optional().describe(
4133
+ 'Phone number format pattern (e.g., `"(xxx)xxx-xxxx"`). See [Custom Formatting](https://www.assemblyai.com/docs/speech-understanding/custom-formatting) for more details.'
4134
+ ),
4135
+ email: zod3.string().optional().describe(
4136
+ 'Email format pattern (e.g., `"username@domain.com"`). See [Custom Formatting](https://www.assemblyai.com/docs/speech-understanding/custom-formatting) for more details.'
4137
+ )
4138
+ })
4139
+ }).describe(
4140
+ "Request body for [Custom Formatting](https://www.assemblyai.com/docs/speech-understanding/custom-formatting)."
4141
+ )
4142
+ ).optional(),
4143
+ response: zod3.object({
4144
+ translation: zod3.object({
4145
+ status: zod3.string().optional()
4146
+ }).optional()
4147
+ }).or(
4148
+ zod3.object({
4149
+ speaker_identification: zod3.object({
4150
+ mapping: zod3.record(zod3.string(), zod3.string()).optional().describe(
4151
+ 'A mapping of the original generic speaker labels (e.g., "A", "B") to the identified speaker names or roles.'
4152
+ ),
4153
+ status: zod3.string().optional()
4154
+ }).optional()
4155
+ })
4156
+ ).or(
4157
+ zod3.object({
4158
+ custom_formatting: zod3.object({
4159
+ mapping: zod3.record(zod3.string(), zod3.string()).optional(),
4160
+ formatted_text: zod3.string().optional()
4161
+ }).optional()
4162
+ })
4163
+ ).optional()
4164
+ }).optional().describe(
4165
+ "Speech understanding tasks like [Translation](https://www.assemblyai.com/docs/speech-understanding/translation), [Speaker Identification](https://www.assemblyai.com/docs/speech-understanding/speaker-identification), and [Custom Formatting](https://www.assemblyai.com/docs/speech-understanding/custom-formatting). See the task-specific docs for available options and configuration.\n"
4166
+ ),
4167
+ status: zod3.enum(["queued", "processing", "completed", "error"]).describe(
4168
+ "The status of your transcript. Possible values are queued, processing, completed, or error."
4169
+ ),
4170
+ summarization: zod3.boolean().describe(
4171
+ "Whether [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarization) is enabled, either true or false. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarization) for details.\n\nNote: This parameter is only supported for the Universal-2 model.\n"
4172
+ ),
4173
+ summary: zod3.string().nullish().describe(
4174
+ "The generated summary of the media file, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarization) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarization) for details."
4175
+ ),
4176
+ summary_model: zod3.string().nullish().describe(
4177
+ "The Summarization model used to generate the summary,\nif [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarization#summary-models) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarization) for details.\n"
4178
+ ),
4179
+ summary_type: zod3.string().nullish().describe(
4180
+ "The type of summary generated, if [Summarization](https://www.assemblyai.com/docs/speech-understanding/summarization#summary-types) is enabled. Deprecated - use [LLM Gateway](https://www.assemblyai.com/docs/llm-gateway/overview) instead for more flexible summaries. See the [updated Summarization page](https://www.assemblyai.com/docs/speech-understanding/summarization) for details."
4181
+ ),
4182
+ remove_audio_tags: zod3.enum(["all"]).describe(
4183
+ "Whether [audio event tags](https://www.assemblyai.com/docs/pre-recorded-audio/universal-3-pro#audio-event-tags) were removed from the transcript text.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n"
4184
+ ).or(zod3.null()).optional().describe(
4185
+ "Whether [audio event tags](https://www.assemblyai.com/docs/pre-recorded-audio/universal-3-pro#audio-event-tags) were removed from the transcript text.\n\nNote: This parameter is only supported for the Universal-3 Pro model.\n"
4186
+ ),
4187
+ temperature: zod3.number().nullish().describe(
4188
+ "The temperature that was used for the model's response. See the [Prompting Guide](https://www.assemblyai.com/docs/pre-recorded-audio/prompting) for more details.\n\nNote: This parameter can only be used with the Universal-3 Pro model.\n"
4189
+ ),
4190
+ text: zod3.string().nullish().describe("The textual transcript of your media file"),
4191
+ throttled: zod3.boolean().nullish().describe(
4192
+ "True while a request is throttled and false when a request is no longer throttled"
4193
+ ),
4194
+ utterances: zod3.array(
3327
4195
  zod3.object({
3328
- entity_type: zod3.enum([
3329
- "account_number",
3330
- "banking_information",
3331
- "blood_type",
3332
- "credit_card_cvv",
3333
- "credit_card_expiration",
3334
- "credit_card_number",
3335
- "date",
3336
- "date_interval",
3337
- "date_of_birth",
3338
- "drivers_license",
3339
- "drug",
3340
- "duration",
3341
- "email_address",
3342
- "event",
3343
- "filename",
3344
- "gender_sexuality",
3345
- "healthcare_number",
3346
- "injury",
3347
- "ip_address",
3348
- "language",
3349
- "location",
3350
- "marital_status",
3351
- "medical_condition",
3352
- "medical_process",
3353
- "money_amount",
3354
- "nationality",
3355
- "number_sequence",
3356
- "occupation",
3357
- "organization",
3358
- "passport_number",
3359
- "password",
3360
- "person_age",
3361
- "person_name",
3362
- "phone_number",
3363
- "physical_attribute",
3364
- "political_affiliation",
3365
- "religion",
3366
- "statistics",
3367
- "time",
3368
- "url",
3369
- "us_social_security_number",
3370
- "username",
3371
- "vehicle_id",
3372
- "zodiac_sign"
3373
- ]).describe("The type of entity for the detected entity"),
3374
- text: zod3.string().describe("The text for the detected entity"),
3375
- start: zod3.number().describe(
3376
- "The starting time, in milliseconds, at which the detected entity appears in the audio file"
4196
+ confidence: zod3.number().describe("The confidence score for the transcript of this utterance"),
4197
+ start: zod3.number().describe("The starting time, in milliseconds, of the utterance in the audio file"),
4198
+ end: zod3.number().describe("The ending time, in milliseconds, of the utterance in the audio file"),
4199
+ text: zod3.string().describe("The text for this utterance"),
4200
+ words: zod3.array(
4201
+ zod3.object({
4202
+ confidence: zod3.number().describe("The confidence score for the transcript of this word"),
4203
+ start: zod3.number().describe("The starting time, in milliseconds, for the word"),
4204
+ end: zod3.number().describe("The ending time, in milliseconds, for the word"),
4205
+ text: zod3.string().describe("The text of the word"),
4206
+ channel: zod3.string().nullish().describe(
4207
+ "The channel of the word. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
4208
+ ),
4209
+ speaker: zod3.string().nullable().describe(
4210
+ "The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/speaker-diarization) is enabled, else null"
4211
+ )
4212
+ })
4213
+ ).describe("The words in the utterance."),
4214
+ channel: zod3.string().nullish().describe(
4215
+ "The channel of this utterance. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
3377
4216
  ),
3378
- end: zod3.number().describe(
3379
- "The ending time, in milliseconds, for the detected entity in the audio file"
4217
+ speaker: zod3.string().describe(
4218
+ 'The speaker of this utterance, where each speaker is assigned a sequential capital letter - e.g. "A" for Speaker A, "B" for Speaker B, etc.'
4219
+ ),
4220
+ translated_texts: zod3.record(zod3.string(), zod3.string()).optional().describe(
4221
+ 'Translations keyed by language code (e.g., `{"es": "Texto traducido", "de": "\xDCbersetzter Text"}`). Only present when `match_original_utterance` is enabled with translation.'
3380
4222
  )
3381
- }).describe("A detected entity")
4223
+ })
3382
4224
  ).nullish().describe(
3383
- "An array of results for the Entity Detection model, if it is enabled.\nSee [Entity detection](https://www.assemblyai.com/docs/models/entity-detection) for more information.\n"
4225
+ "When multichannel or speaker_labels is enabled, a list of turn-by-turn utterance objects.\nSee [Speaker diarization](https://www.assemblyai.com/docs/pre-recorded-audio/speaker-diarization) and [Multichannel transcription](https://www.assemblyai.com/docs/pre-recorded-audio/multichannel) for more information.\n"
3384
4226
  ),
3385
- speech_threshold: zod3.number().min(deleteTranscriptResponseSpeechThresholdMin).max(deleteTranscriptResponseSpeechThresholdMax).nullish().describe(
3386
- "Defaults to null. Reject audio files that contain less than this fraction of speech.\nValid values are in the range [0, 1] inclusive.\n"
4227
+ webhook_auth: zod3.boolean().describe(
4228
+ "Whether [webhook](https://www.assemblyai.com/docs/deployment/webhooks-for-pre-recorded-audio) authentication details were provided"
3387
4229
  ),
3388
- throttled: zod3.boolean().nullish().describe(
3389
- "True while a request is throttled and false when a request is no longer throttled"
4230
+ webhook_auth_header_name: zod3.string().nullish().describe(
4231
+ "The header name to be sent with the transcript completed or failed [webhook](https://www.assemblyai.com/docs/deployment/webhooks-for-pre-recorded-audio) requests"
3390
4232
  ),
3391
- error: zod3.string().optional().describe("Error message of why the transcript failed"),
3392
- language_model: zod3.string().describe("The language model that was used for the transcript"),
3393
- acoustic_model: zod3.string().describe("The acoustic model that was used for the transcript")
4233
+ webhook_status_code: zod3.number().nullish().describe(
4234
+ "The status code we received from your server when delivering the transcript completed or failed [webhook](https://www.assemblyai.com/docs/deployment/webhooks-for-pre-recorded-audio) request, if a webhook URL was provided"
4235
+ ),
4236
+ webhook_url: zod3.string().nullish().describe(
4237
+ "The URL to which we send [webhook](https://www.assemblyai.com/docs/deployment/webhooks-for-pre-recorded-audio) requests.\n"
4238
+ ),
4239
+ words: zod3.array(
4240
+ zod3.object({
4241
+ confidence: zod3.number().describe("The confidence score for the transcript of this word"),
4242
+ start: zod3.number().describe("The starting time, in milliseconds, for the word"),
4243
+ end: zod3.number().describe("The ending time, in milliseconds, for the word"),
4244
+ text: zod3.string().describe("The text of the word"),
4245
+ channel: zod3.string().nullish().describe(
4246
+ "The channel of the word. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
4247
+ ),
4248
+ speaker: zod3.string().nullable().describe(
4249
+ "The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/speaker-diarization) is enabled, else null"
4250
+ )
4251
+ })
4252
+ ).nullish().describe(
4253
+ "An array of temporally-sequential word objects, one for each word in the transcript.\n"
4254
+ ),
4255
+ acoustic_model: zod3.string().describe("This parameter does not currently have any functionality attached to it."),
4256
+ custom_topics: zod3.boolean().nullish().describe("This parameter does not currently have any functionality attached to it."),
4257
+ language_model: zod3.string().describe("This parameter does not currently have any functionality attached to it."),
4258
+ speech_model: zod3.string().describe(
4259
+ "The speech model to use for the transcription. See [Model Selection](https://www.assemblyai.com/docs/pre-recorded-audio/select-the-speech-model) for available models."
4260
+ ).or(zod3.null()).describe(
4261
+ "This parameter has been replaced with the `speech_models` parameter, learn more about the `speech_models` parameter [here](https://www.assemblyai.com/docs/pre-recorded-audio/select-the-speech-model).\n"
4262
+ ),
4263
+ speed_boost: zod3.boolean().nullish().describe("This parameter does not currently have any functionality attached to it."),
4264
+ topics: zod3.array(zod3.string()).optional().describe("This parameter does not currently have any functionality attached to it."),
4265
+ translated_texts: zod3.object({
4266
+ language_code: zod3.string().optional().describe("Translated text for this language code")
4267
+ }).optional().describe(
4268
+ "Translated text keyed by language code. See [Translation](https://www.assemblyai.com/docs/speech-understanding/translation) for more details."
4269
+ )
3394
4270
  }).describe("A transcript object");
3395
- var getSubtitlesParams = zod3.object({
3396
- transcript_id: zod3.string().describe("ID of the transcript"),
3397
- subtitle_format: zod3.enum(["srt", "vtt"]).describe("The format of the captions")
3398
- });
3399
- var getSubtitlesQueryParams = zod3.object({
3400
- chars_per_caption: zod3.number().optional().describe("The maximum number of characters per caption")
3401
- });
3402
4271
  var getTranscriptSentencesParams = zod3.object({
3403
4272
  transcript_id: zod3.string().describe("ID of the transcript")
3404
4273
  });
3405
- var getTranscriptSentencesResponseConfidenceMin = 0;
3406
- var getTranscriptSentencesResponseConfidenceMax = 1;
3407
- var getTranscriptSentencesResponseSentencesItemConfidenceMin = 0;
3408
- var getTranscriptSentencesResponseSentencesItemConfidenceMax = 1;
3409
- var getTranscriptSentencesResponseSentencesItemWordsItemConfidenceMin = 0;
3410
- var getTranscriptSentencesResponseSentencesItemWordsItemConfidenceMax = 1;
3411
4274
  var getTranscriptSentencesResponse = zod3.object({
3412
4275
  id: zod3.string().uuid().describe("The unique identifier for the transcript"),
3413
- confidence: zod3.number().min(getTranscriptSentencesResponseConfidenceMin).max(getTranscriptSentencesResponseConfidenceMax).describe("The confidence score for the transcript"),
4276
+ confidence: zod3.number().describe("The confidence score for the transcript"),
3414
4277
  audio_duration: zod3.number().describe("The duration of the audio file in seconds"),
3415
4278
  sentences: zod3.array(
3416
4279
  zod3.object({
3417
4280
  text: zod3.string().describe("The transcript of the sentence"),
3418
4281
  start: zod3.number().describe("The starting time, in milliseconds, for the sentence"),
3419
4282
  end: zod3.number().describe("The ending time, in milliseconds, for the sentence"),
3420
- confidence: zod3.number().min(getTranscriptSentencesResponseSentencesItemConfidenceMin).max(getTranscriptSentencesResponseSentencesItemConfidenceMax).describe("The confidence score for the transcript of this sentence"),
4283
+ confidence: zod3.number().describe("The confidence score for the transcript of this sentence"),
3421
4284
  words: zod3.array(
3422
4285
  zod3.object({
3423
- confidence: zod3.number().min(getTranscriptSentencesResponseSentencesItemWordsItemConfidenceMin).max(getTranscriptSentencesResponseSentencesItemWordsItemConfidenceMax).describe("The confidence score for the transcript of this word"),
4286
+ confidence: zod3.number().describe("The confidence score for the transcript of this word"),
3424
4287
  start: zod3.number().describe("The starting time, in milliseconds, for the word"),
3425
4288
  end: zod3.number().describe("The ending time, in milliseconds, for the word"),
3426
4289
  text: zod3.string().describe("The text of the word"),
@@ -3428,7 +4291,7 @@ var getTranscriptSentencesResponse = zod3.object({
3428
4291
  "The channel of the word. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
3429
4292
  ),
3430
4293
  speaker: zod3.string().nullable().describe(
3431
- "The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/models/speaker-diarization) is enabled, else null"
4294
+ "The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/speaker-diarization) is enabled, else null"
3432
4295
  )
3433
4296
  })
3434
4297
  ).describe("An array of words in the sentence"),
@@ -3436,7 +4299,7 @@ var getTranscriptSentencesResponse = zod3.object({
3436
4299
  "The channel of the sentence. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
3437
4300
  ),
3438
4301
  speaker: zod3.string().nullable().describe(
3439
- "The speaker of the sentence if [Speaker Diarization](https://www.assemblyai.com/docs/models/speaker-diarization) is enabled, else null"
4302
+ "The speaker of the sentence if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/speaker-diarization) is enabled, else null"
3440
4303
  )
3441
4304
  })
3442
4305
  ).describe("An array of sentences in the transcript")
@@ -3444,25 +4307,19 @@ var getTranscriptSentencesResponse = zod3.object({
3444
4307
  var getTranscriptParagraphsParams = zod3.object({
3445
4308
  transcript_id: zod3.string().describe("ID of the transcript")
3446
4309
  });
3447
- var getTranscriptParagraphsResponseConfidenceMin = 0;
3448
- var getTranscriptParagraphsResponseConfidenceMax = 1;
3449
- var getTranscriptParagraphsResponseParagraphsItemConfidenceMin = 0;
3450
- var getTranscriptParagraphsResponseParagraphsItemConfidenceMax = 1;
3451
- var getTranscriptParagraphsResponseParagraphsItemWordsItemConfidenceMin = 0;
3452
- var getTranscriptParagraphsResponseParagraphsItemWordsItemConfidenceMax = 1;
3453
4310
  var getTranscriptParagraphsResponse = zod3.object({
3454
4311
  id: zod3.string().uuid().describe("The unique identifier of your transcript"),
3455
- confidence: zod3.number().min(getTranscriptParagraphsResponseConfidenceMin).max(getTranscriptParagraphsResponseConfidenceMax).describe("The confidence score for the transcript"),
4312
+ confidence: zod3.number().describe("The confidence score for the transcript"),
3456
4313
  audio_duration: zod3.number().describe("The duration of the audio file in seconds"),
3457
4314
  paragraphs: zod3.array(
3458
4315
  zod3.object({
3459
4316
  text: zod3.string().describe("The transcript of the paragraph"),
3460
4317
  start: zod3.number().describe("The starting time, in milliseconds, of the paragraph"),
3461
4318
  end: zod3.number().describe("The ending time, in milliseconds, of the paragraph"),
3462
- confidence: zod3.number().min(getTranscriptParagraphsResponseParagraphsItemConfidenceMin).max(getTranscriptParagraphsResponseParagraphsItemConfidenceMax).describe("The confidence score for the transcript of this paragraph"),
4319
+ confidence: zod3.number().describe("The confidence score for the transcript of this paragraph"),
3463
4320
  words: zod3.array(
3464
4321
  zod3.object({
3465
- confidence: zod3.number().min(getTranscriptParagraphsResponseParagraphsItemWordsItemConfidenceMin).max(getTranscriptParagraphsResponseParagraphsItemWordsItemConfidenceMax).describe("The confidence score for the transcript of this word"),
4322
+ confidence: zod3.number().describe("The confidence score for the transcript of this word"),
3466
4323
  start: zod3.number().describe("The starting time, in milliseconds, for the word"),
3467
4324
  end: zod3.number().describe("The ending time, in milliseconds, for the word"),
3468
4325
  text: zod3.string().describe("The text of the word"),
@@ -3470,13 +4327,28 @@ var getTranscriptParagraphsResponse = zod3.object({
3470
4327
  "The channel of the word. The left and right channels are channels 1 and 2. Additional channels increment the channel number sequentially."
3471
4328
  ),
3472
4329
  speaker: zod3.string().nullable().describe(
3473
- "The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/models/speaker-diarization) is enabled, else null"
4330
+ "The speaker of the word if [Speaker Diarization](https://www.assemblyai.com/docs/pre-recorded-audio/speaker-diarization) is enabled, else null"
3474
4331
  )
3475
4332
  })
3476
4333
  ).describe("An array of words in the paragraph")
3477
4334
  })
3478
4335
  ).describe("An array of paragraphs in the transcript")
3479
4336
  });
4337
+ var getSubtitlesParams = zod3.object({
4338
+ transcript_id: zod3.string().describe("ID of the transcript"),
4339
+ subtitle_format: zod3.enum(["srt", "vtt"]).describe("The format of the captions")
4340
+ });
4341
+ var getSubtitlesQueryParams = zod3.object({
4342
+ chars_per_caption: zod3.number().optional().describe("The maximum number of characters per caption")
4343
+ });
4344
+ var getSubtitlesResponse = zod3.object({});
4345
+ var getRedactedAudioParams = zod3.object({
4346
+ transcript_id: zod3.string().describe("ID of the transcript")
4347
+ });
4348
+ var getRedactedAudioResponse = zod3.object({
4349
+ status: zod3.enum(["redacted_audio_ready"]).describe("The status of the redacted audio"),
4350
+ redacted_audio_url: zod3.string().describe("The URL of the redacted audio file")
4351
+ });
3480
4352
  var wordSearchParams = zod3.object({
3481
4353
  transcript_id: zod3.string().describe("ID of the transcript")
3482
4354
  });
@@ -3493,7 +4365,7 @@ var wordSearchResponse = zod3.object({
3493
4365
  text: zod3.string().describe("The matched word"),
3494
4366
  count: zod3.number().describe("The total amount of times the word is in the transcript"),
3495
4367
  timestamps: zod3.array(
3496
- zod3.array(zod3.number().describe("Timestamp in milliseconds")).describe(
4368
+ zod3.array(zod3.number()).describe(
3497
4369
  "An array of timestamps structured as [`start_time`, `end_time`] in milliseconds"
3498
4370
  )
3499
4371
  ).describe("An array of timestamps"),
@@ -3503,199 +4375,6 @@ var wordSearchResponse = zod3.object({
3503
4375
  })
3504
4376
  ).describe("The matches of the search")
3505
4377
  });
3506
- var getRedactedAudioParams = zod3.object({
3507
- transcript_id: zod3.string().describe("ID of the transcript")
3508
- });
3509
- var getRedactedAudioResponse = zod3.object({
3510
- status: zod3.enum(["redacted_audio_ready"]).describe("The status of the redacted audio"),
3511
- redacted_audio_url: zod3.string().describe("The URL of the redacted audio file")
3512
- });
3513
- var createTemporaryTokenBodyExpiresInMin = 60;
3514
- var createTemporaryTokenBody = zod3.object({
3515
- expires_in: zod3.number().min(createTemporaryTokenBodyExpiresInMin).describe("The amount of time until the token expires in seconds")
3516
- });
3517
- var createTemporaryTokenResponse = zod3.object({
3518
- token: zod3.string().describe("The temporary authentication token for Streaming Speech-to-Text")
3519
- });
3520
- var lemurTaskBodyMaxOutputSizeDefault = 2e3;
3521
- var lemurTaskBodyTemperatureMin = 0;
3522
- var lemurTaskBodyTemperatureMax = 1;
3523
- var lemurTaskBody = zod3.object({
3524
- prompt: zod3.string().describe(
3525
- "Your text to prompt the model to produce a desired output, including any context you want to pass into the model."
3526
- )
3527
- }).and(
3528
- zod3.object({
3529
- transcript_ids: zod3.array(zod3.string().uuid()).optional().describe(
3530
- "A list of completed transcripts with text. Up to a maximum of 100 hours of audio.\nUse either transcript_ids or input_text as input into LeMUR.\n"
3531
- ),
3532
- input_text: zod3.string().optional().describe(
3533
- "Custom formatted transcript data. Maximum size is the context limit of the selected model.\nUse either transcript_ids or input_text as input into LeMUR.\n"
3534
- ),
3535
- context: zod3.string().or(zod3.record(zod3.string(), zod3.any())).optional().describe("Context to provide the model. This can be a string or a free-form JSON value."),
3536
- final_model: zod3.enum([
3537
- "anthropic/claude-3-5-sonnet",
3538
- "anthropic/claude-3-opus",
3539
- "anthropic/claude-3-haiku"
3540
- ]).describe("The model that is used for the final prompt after compression is performed.\n").or(zod3.string()).describe("The model that is used for the final prompt after compression is performed.\n"),
3541
- max_output_size: zod3.number().default(lemurTaskBodyMaxOutputSizeDefault).describe("Max output size in tokens."),
3542
- temperature: zod3.number().min(lemurTaskBodyTemperatureMin).max(lemurTaskBodyTemperatureMax).optional().describe(
3543
- "The temperature to use for the model.\nHigher values result in answers that are more creative, lower values are more conservative.\nCan be any value between 0.0 and 1.0 inclusive.\n"
3544
- )
3545
- })
3546
- );
3547
- var lemurTaskResponseUsageInputTokensMin = 0;
3548
- var lemurTaskResponseUsageOutputTokensMin = 0;
3549
- var lemurTaskResponse = zod3.object({
3550
- response: zod3.string().describe("The response generated by LeMUR.")
3551
- }).and(
3552
- zod3.object({
3553
- request_id: zod3.string().uuid().describe("The ID of the LeMUR request"),
3554
- usage: zod3.object({
3555
- input_tokens: zod3.number().min(lemurTaskResponseUsageInputTokensMin).describe("The number of input tokens used by the model"),
3556
- output_tokens: zod3.number().min(lemurTaskResponseUsageOutputTokensMin).describe("The number of output tokens generated by the model")
3557
- }).describe("The usage numbers for the LeMUR request")
3558
- })
3559
- );
3560
- var lemurSummaryBodyMaxOutputSizeDefault = 2e3;
3561
- var lemurSummaryBodyTemperatureMin = 0;
3562
- var lemurSummaryBodyTemperatureMax = 1;
3563
- var lemurSummaryBody = zod3.object({
3564
- transcript_ids: zod3.array(zod3.string().uuid()).optional().describe(
3565
- "A list of completed transcripts with text. Up to a maximum of 100 hours of audio.\nUse either transcript_ids or input_text as input into LeMUR.\n"
3566
- ),
3567
- input_text: zod3.string().optional().describe(
3568
- "Custom formatted transcript data. Maximum size is the context limit of the selected model.\nUse either transcript_ids or input_text as input into LeMUR.\n"
3569
- ),
3570
- context: zod3.string().or(zod3.record(zod3.string(), zod3.any())).optional().describe("Context to provide the model. This can be a string or a free-form JSON value."),
3571
- final_model: zod3.enum(["anthropic/claude-3-5-sonnet", "anthropic/claude-3-opus", "anthropic/claude-3-haiku"]).describe("The model that is used for the final prompt after compression is performed.\n").or(zod3.string()).describe("The model that is used for the final prompt after compression is performed.\n"),
3572
- max_output_size: zod3.number().default(lemurSummaryBodyMaxOutputSizeDefault).describe("Max output size in tokens."),
3573
- temperature: zod3.number().min(lemurSummaryBodyTemperatureMin).max(lemurSummaryBodyTemperatureMax).optional().describe(
3574
- "The temperature to use for the model.\nHigher values result in answers that are more creative, lower values are more conservative.\nCan be any value between 0.0 and 1.0 inclusive.\n"
3575
- )
3576
- }).and(
3577
- zod3.object({
3578
- answer_format: zod3.string().optional().describe(
3579
- 'How you want the summary to be returned. This can be any text. Examples: "TLDR", "bullet points"\n'
3580
- )
3581
- })
3582
- );
3583
- var lemurSummaryResponseUsageInputTokensMin = 0;
3584
- var lemurSummaryResponseUsageOutputTokensMin = 0;
3585
- var lemurSummaryResponse = zod3.object({
3586
- response: zod3.string().describe("The response generated by LeMUR.")
3587
- }).and(
3588
- zod3.object({
3589
- request_id: zod3.string().uuid().describe("The ID of the LeMUR request"),
3590
- usage: zod3.object({
3591
- input_tokens: zod3.number().min(lemurSummaryResponseUsageInputTokensMin).describe("The number of input tokens used by the model"),
3592
- output_tokens: zod3.number().min(lemurSummaryResponseUsageOutputTokensMin).describe("The number of output tokens generated by the model")
3593
- }).describe("The usage numbers for the LeMUR request")
3594
- })
3595
- );
3596
- var lemurQuestionAnswerBodyMaxOutputSizeDefault = 2e3;
3597
- var lemurQuestionAnswerBodyTemperatureMin = 0;
3598
- var lemurQuestionAnswerBodyTemperatureMax = 1;
3599
- var lemurQuestionAnswerBody = zod3.object({
3600
- transcript_ids: zod3.array(zod3.string().uuid()).optional().describe(
3601
- "A list of completed transcripts with text. Up to a maximum of 100 hours of audio.\nUse either transcript_ids or input_text as input into LeMUR.\n"
3602
- ),
3603
- input_text: zod3.string().optional().describe(
3604
- "Custom formatted transcript data. Maximum size is the context limit of the selected model.\nUse either transcript_ids or input_text as input into LeMUR.\n"
3605
- ),
3606
- context: zod3.string().or(zod3.record(zod3.string(), zod3.any())).optional().describe("Context to provide the model. This can be a string or a free-form JSON value."),
3607
- final_model: zod3.enum(["anthropic/claude-3-5-sonnet", "anthropic/claude-3-opus", "anthropic/claude-3-haiku"]).describe("The model that is used for the final prompt after compression is performed.\n").or(zod3.string()).describe("The model that is used for the final prompt after compression is performed.\n"),
3608
- max_output_size: zod3.number().default(lemurQuestionAnswerBodyMaxOutputSizeDefault).describe("Max output size in tokens."),
3609
- temperature: zod3.number().min(lemurQuestionAnswerBodyTemperatureMin).max(lemurQuestionAnswerBodyTemperatureMax).optional().describe(
3610
- "The temperature to use for the model.\nHigher values result in answers that are more creative, lower values are more conservative.\nCan be any value between 0.0 and 1.0 inclusive.\n"
3611
- )
3612
- }).and(
3613
- zod3.object({
3614
- questions: zod3.array(
3615
- zod3.object({
3616
- question: zod3.string().describe(
3617
- "The question you wish to ask. For more complex questions use default model."
3618
- ),
3619
- context: zod3.string().or(zod3.record(zod3.string(), zod3.any())).optional().describe(
3620
- "Any context about the transcripts you wish to provide. This can be a string or any object."
3621
- ),
3622
- answer_format: zod3.string().optional().describe(
3623
- `How you want the answer to be returned. This can be any text. Can't be used with answer_options. Examples: "short sentence", "bullet points"
3624
- `
3625
- ),
3626
- answer_options: zod3.array(zod3.string()).optional().describe(
3627
- `What discrete options to return. Useful for precise responses. Can't be used with answer_format. Example: ["Yes", "No"]
3628
- `
3629
- )
3630
- })
3631
- ).describe("A list of questions to ask")
3632
- })
3633
- );
3634
- var lemurQuestionAnswerResponseUsageInputTokensMin = 0;
3635
- var lemurQuestionAnswerResponseUsageOutputTokensMin = 0;
3636
- var lemurQuestionAnswerResponse = zod3.object({
3637
- request_id: zod3.string().uuid().describe("The ID of the LeMUR request"),
3638
- usage: zod3.object({
3639
- input_tokens: zod3.number().min(lemurQuestionAnswerResponseUsageInputTokensMin).describe("The number of input tokens used by the model"),
3640
- output_tokens: zod3.number().min(lemurQuestionAnswerResponseUsageOutputTokensMin).describe("The number of output tokens generated by the model")
3641
- }).describe("The usage numbers for the LeMUR request")
3642
- }).and(
3643
- zod3.object({
3644
- response: zod3.array(
3645
- zod3.object({
3646
- question: zod3.string().describe("The question for LeMUR to answer"),
3647
- answer: zod3.string().describe("The answer generated by LeMUR")
3648
- }).describe("An answer generated by LeMUR and its question")
3649
- ).describe("The answers generated by LeMUR and their questions")
3650
- })
3651
- );
3652
- var getLemurResponseParams = zod3.object({
3653
- request_id: zod3.string().describe(
3654
- "The ID of the LeMUR request you previously made.\nThis would be found in the response of the original request.\n"
3655
- )
3656
- });
3657
- var getLemurResponseResponseUsageInputTokensMin = 0;
3658
- var getLemurResponseResponseUsageOutputTokensMin = 0;
3659
- var getLemurResponseResponseUsageInputTokensMinOne = 0;
3660
- var getLemurResponseResponseUsageOutputTokensMinOne = 0;
3661
- var getLemurResponseResponse = zod3.object({
3662
- response: zod3.string().describe("The response generated by LeMUR.")
3663
- }).and(
3664
- zod3.object({
3665
- request_id: zod3.string().uuid().describe("The ID of the LeMUR request"),
3666
- usage: zod3.object({
3667
- input_tokens: zod3.number().min(getLemurResponseResponseUsageInputTokensMin).describe("The number of input tokens used by the model"),
3668
- output_tokens: zod3.number().min(getLemurResponseResponseUsageOutputTokensMin).describe("The number of output tokens generated by the model")
3669
- }).describe("The usage numbers for the LeMUR request")
3670
- })
3671
- ).or(
3672
- zod3.object({
3673
- request_id: zod3.string().uuid().describe("The ID of the LeMUR request"),
3674
- usage: zod3.object({
3675
- input_tokens: zod3.number().min(getLemurResponseResponseUsageInputTokensMinOne).describe("The number of input tokens used by the model"),
3676
- output_tokens: zod3.number().min(getLemurResponseResponseUsageOutputTokensMinOne).describe("The number of output tokens generated by the model")
3677
- }).describe("The usage numbers for the LeMUR request")
3678
- }).and(
3679
- zod3.object({
3680
- response: zod3.array(
3681
- zod3.object({
3682
- question: zod3.string().describe("The question for LeMUR to answer"),
3683
- answer: zod3.string().describe("The answer generated by LeMUR")
3684
- }).describe("An answer generated by LeMUR and its question")
3685
- ).describe("The answers generated by LeMUR and their questions")
3686
- })
3687
- )
3688
- );
3689
- var purgeLemurRequestDataParams = zod3.object({
3690
- request_id: zod3.string().describe(
3691
- "The ID of the LeMUR request whose data you want to delete. This would be found in the response of the original request."
3692
- )
3693
- });
3694
- var purgeLemurRequestDataResponse = zod3.object({
3695
- request_id: zod3.string().uuid().describe("The ID of the deletion request of the LeMUR request"),
3696
- request_id_to_purge: zod3.string().uuid().describe("The ID of the LeMUR request to purge the data for"),
3697
- deleted: zod3.boolean().describe("Whether the request data was deleted")
3698
- });
3699
4378
 
3700
4379
  // src/generated/assemblyai/streaming-types.zod.ts
3701
4380
  import { z as zod4 } from "zod";
@@ -3710,25 +4389,37 @@ var streamingTranscriberParams = zod4.object({
3710
4389
  enableExtraSessionInformation: zod4.boolean().optional().describe(
3711
4390
  "Set to true to receive the SessionInformation message before the session ends. Defaults to false."
3712
4391
  ),
4392
+ domain: zod4.string().optional().describe(
4393
+ 'Enable domain-specific transcription models to improve accuracy for specialized terminology. Set to `"medical-v1"` to enable [Medical Mode](https://www.assemblyai.com/docs/streaming/medical-mode) for improved accuracy of medical terms such as medications, procedures, conditions, and dosages. Supported languages: English (`en`), Spanish (`es`), German (`de`), French (`fr`). If used with an unsupported language, the parameter is ignored and a warning is returned.'
4394
+ ),
3713
4395
  endOfTurnConfidenceThreshold: zod4.number().optional().describe("From SDK v3"),
3714
4396
  minEndOfTurnSilenceWhenConfident: zod4.number().optional().describe("From SDK v3"),
4397
+ minTurnSilence: zod4.number().optional().describe("From SDK v3"),
3715
4398
  maxTurnSilence: zod4.number().optional().describe("From SDK v3"),
3716
4399
  vadThreshold: zod4.number().optional().describe("From SDK v3"),
3717
4400
  formatTurns: zod4.boolean().optional().describe("From SDK v3"),
3718
4401
  filterProfanity: zod4.boolean().optional().describe("From SDK v3"),
3719
4402
  keyterms: zod4.array(zod4.string()).optional().describe("From SDK v3"),
3720
4403
  keytermsPrompt: zod4.array(zod4.string()).optional().describe("From SDK v3"),
3721
- speechModel: zod4.enum(["universal-streaming-english", "universal-streaming-multilingual"]).optional().describe("From SDK v3"),
4404
+ prompt: zod4.string().optional().describe("From SDK v3"),
4405
+ speechModel: zod4.enum(["universal-streaming-english", "universal-streaming-multilingual"]).describe("From SDK v3"),
3722
4406
  languageDetection: zod4.boolean().optional().describe("From SDK v3"),
3723
- inactivityTimeout: zod4.number().optional().describe("From SDK v3")
4407
+ inactivityTimeout: zod4.number().optional().describe("From SDK v3"),
4408
+ speakerLabels: zod4.boolean().optional().describe("From SDK v3"),
4409
+ maxSpeakers: zod4.number().optional().describe("From SDK v3"),
4410
+ llmGateway: zod4.unknown().optional().describe("From SDK v3")
3724
4411
  });
3725
4412
  var streamingUpdateConfigParams = zod4.object({
3726
4413
  end_utterance_silence_threshold: zod4.number().min(0).max(2e4).optional().describe("The duration threshold in milliseconds"),
3727
4414
  end_of_turn_confidence_threshold: zod4.number().optional().describe("From SDK v3"),
3728
4415
  min_end_of_turn_silence_when_confident: zod4.number().optional().describe("From SDK v3"),
4416
+ min_turn_silence: zod4.number().optional().describe("From SDK v3"),
3729
4417
  max_turn_silence: zod4.number().optional().describe("From SDK v3"),
3730
4418
  vad_threshold: zod4.number().optional().describe("From SDK v3"),
3731
- format_turns: zod4.boolean().optional().describe("From SDK v3")
4419
+ format_turns: zod4.boolean().optional().describe("From SDK v3"),
4420
+ keyterms_prompt: zod4.array(zod4.string()).optional().describe("From SDK v3"),
4421
+ prompt: zod4.string().optional().describe("From SDK v3"),
4422
+ filter_profanity: zod4.boolean().optional().describe("From SDK v3")
3732
4423
  });
3733
4424
 
3734
4425
  // src/generated/gladia/api/gladiaControlAPI.zod.ts
@@ -20585,7 +21276,7 @@ var createRealtimeClientSecretBody = zod6.object({
20585
21276
  format: zod6.discriminatedUnion("type", [
20586
21277
  zod6.object({
20587
21278
  type: zod6.enum(["audio/pcm"]).describe("The audio format. Always `audio/pcm`."),
20588
- rate: zod6.literal(24e3).optional().describe("The sample rate of the audio. Always `24000`.")
21279
+ rate: zod6.literal(24e3).describe("The sample rate of the audio. Always `24000`.")
20589
21280
  }).describe("The PCM audio format. Only a 24kHz sample rate is supported."),
20590
21281
  zod6.object({
20591
21282
  type: zod6.enum(["audio/pcmu"]).describe("The audio format. Always `audio/pcmu`.")
@@ -20684,7 +21375,7 @@ var createRealtimeClientSecretBody = zod6.object({
20684
21375
  format: zod6.discriminatedUnion("type", [
20685
21376
  zod6.object({
20686
21377
  type: zod6.enum(["audio/pcm"]).describe("The audio format. Always `audio/pcm`."),
20687
- rate: zod6.literal(24e3).optional().describe("The sample rate of the audio. Always `24000`.")
21378
+ rate: zod6.literal(24e3).describe("The sample rate of the audio. Always `24000`.")
20688
21379
  }).describe("The PCM audio format. Only a 24kHz sample rate is supported."),
20689
21380
  zod6.object({
20690
21381
  type: zod6.enum(["audio/pcmu"]).describe("The audio format. Always `audio/pcmu`.")
@@ -20894,7 +21585,7 @@ var createRealtimeClientSecretBody = zod6.object({
20894
21585
  format: zod6.discriminatedUnion("type", [
20895
21586
  zod6.object({
20896
21587
  type: zod6.enum(["audio/pcm"]).describe("The audio format. Always `audio/pcm`."),
20897
- rate: zod6.literal(24e3).optional().describe("The sample rate of the audio. Always `24000`.")
21588
+ rate: zod6.literal(24e3).describe("The sample rate of the audio. Always `24000`.")
20898
21589
  }).describe("The PCM audio format. Only a 24kHz sample rate is supported."),
20899
21590
  zod6.object({
20900
21591
  type: zod6.enum(["audio/pcmu"]).describe("The audio format. Always `audio/pcmu`.")
@@ -21058,7 +21749,7 @@ var createRealtimeClientSecretResponse = zod6.object({
21058
21749
  format: zod6.discriminatedUnion("type", [
21059
21750
  zod6.object({
21060
21751
  type: zod6.enum(["audio/pcm"]).describe("The audio format. Always `audio/pcm`."),
21061
- rate: zod6.literal(24e3).optional().describe("The sample rate of the audio. Always `24000`.")
21752
+ rate: zod6.literal(24e3).describe("The sample rate of the audio. Always `24000`.")
21062
21753
  }).describe("The PCM audio format. Only a 24kHz sample rate is supported."),
21063
21754
  zod6.object({
21064
21755
  type: zod6.enum(["audio/pcmu"]).describe("The audio format. Always `audio/pcmu`.")
@@ -21157,7 +21848,7 @@ var createRealtimeClientSecretResponse = zod6.object({
21157
21848
  format: zod6.discriminatedUnion("type", [
21158
21849
  zod6.object({
21159
21850
  type: zod6.enum(["audio/pcm"]).describe("The audio format. Always `audio/pcm`."),
21160
- rate: zod6.literal(24e3).optional().describe("The sample rate of the audio. Always `24000`.")
21851
+ rate: zod6.literal(24e3).describe("The sample rate of the audio. Always `24000`.")
21161
21852
  }).describe("The PCM audio format. Only a 24kHz sample rate is supported."),
21162
21853
  zod6.object({
21163
21854
  type: zod6.enum(["audio/pcmu"]).describe("The audio format. Always `audio/pcmu`.")
@@ -21376,7 +22067,7 @@ var createRealtimeClientSecretResponse = zod6.object({
21376
22067
  format: zod6.discriminatedUnion("type", [
21377
22068
  zod6.object({
21378
22069
  type: zod6.enum(["audio/pcm"]).describe("The audio format. Always `audio/pcm`."),
21379
- rate: zod6.literal(24e3).optional().describe("The sample rate of the audio. Always `24000`.")
22070
+ rate: zod6.literal(24e3).describe("The sample rate of the audio. Always `24000`.")
21380
22071
  }).describe("The PCM audio format. Only a 24kHz sample rate is supported."),
21381
22072
  zod6.object({
21382
22073
  type: zod6.enum(["audio/pcmu"]).describe("The audio format. Always `audio/pcmu`.")
@@ -21600,7 +22291,7 @@ var createRealtimeSessionResponse = zod6.object({
21600
22291
  format: zod6.discriminatedUnion("type", [
21601
22292
  zod6.object({
21602
22293
  type: zod6.enum(["audio/pcm"]).describe("The audio format. Always `audio/pcm`."),
21603
- rate: zod6.literal(24e3).optional().describe("The sample rate of the audio. Always `24000`.")
22294
+ rate: zod6.literal(24e3).describe("The sample rate of the audio. Always `24000`.")
21604
22295
  }).describe("The PCM audio format. Only a 24kHz sample rate is supported."),
21605
22296
  zod6.object({
21606
22297
  type: zod6.enum(["audio/pcmu"]).describe("The audio format. Always `audio/pcmu`.")
@@ -21644,7 +22335,7 @@ var createRealtimeSessionResponse = zod6.object({
21644
22335
  format: zod6.discriminatedUnion("type", [
21645
22336
  zod6.object({
21646
22337
  type: zod6.enum(["audio/pcm"]).describe("The audio format. Always `audio/pcm`."),
21647
- rate: zod6.literal(24e3).optional().describe("The sample rate of the audio. Always `24000`.")
22338
+ rate: zod6.literal(24e3).describe("The sample rate of the audio. Always `24000`.")
21648
22339
  }).describe("The PCM audio format. Only a 24kHz sample rate is supported."),
21649
22340
  zod6.object({
21650
22341
  type: zod6.enum(["audio/pcmu"]).describe("The audio format. Always `audio/pcmu`.")