npm - @cartesia/cartesia-js - Versions diffs - 3.0.0-b3 → 3.0.0-b4 - Mend

@cartesia/cartesia-js 3.0.0-b3 → 3.0.0-b4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (97) hide show

package/CHANGELOG.md +9 -0
package/README.md +21 -21
package/client.d.mts +2 -2
package/client.d.mts.map +1 -1
package/client.d.ts +2 -2
package/client.d.ts.map +1 -1
package/client.js.map +1 -1
package/client.mjs.map +1 -1
package/package.json +1 -1
package/resources/access-token.d.mts +5 -0
package/resources/access-token.d.mts.map +1 -1
package/resources/access-token.d.ts +5 -0
package/resources/access-token.d.ts.map +1 -1
package/resources/agents/agents.js +1 -1
package/resources/agents/agents.js.map +1 -1
package/resources/agents/agents.mjs +1 -1
package/resources/agents/agents.mjs.map +1 -1
package/resources/agents/metrics/index.d.mts +1 -1
package/resources/agents/metrics/index.d.mts.map +1 -1
package/resources/agents/metrics/index.d.ts +1 -1
package/resources/agents/metrics/index.d.ts.map +1 -1
package/resources/agents/metrics/index.js.map +1 -1
package/resources/agents/metrics/index.mjs.map +1 -1
package/resources/agents/metrics/metrics.d.mts +2 -2
package/resources/agents/metrics/metrics.d.mts.map +1 -1
package/resources/agents/metrics/metrics.d.ts +2 -2
package/resources/agents/metrics/metrics.d.ts.map +1 -1
package/resources/agents/metrics/metrics.js.map +1 -1
package/resources/agents/metrics/metrics.mjs.map +1 -1
package/resources/agents/metrics/results.d.mts +23 -22
package/resources/agents/metrics/results.d.mts.map +1 -1
package/resources/agents/metrics/results.d.ts +23 -22
package/resources/agents/metrics/results.d.ts.map +1 -1
package/resources/agents/metrics/results.js +4 -5
package/resources/agents/metrics/results.js.map +1 -1
package/resources/agents/metrics/results.mjs +4 -5
package/resources/agents/metrics/results.mjs.map +1 -1
package/resources/index.d.mts +1 -1
package/resources/index.d.mts.map +1 -1
package/resources/index.d.ts +1 -1
package/resources/index.d.ts.map +1 -1
package/resources/index.js.map +1 -1
package/resources/index.mjs.map +1 -1
package/resources/infill.d.mts +3 -4
package/resources/infill.d.mts.map +1 -1
package/resources/infill.d.ts +3 -4
package/resources/infill.d.ts.map +1 -1
package/resources/infill.js +0 -2
package/resources/infill.js.map +1 -1
package/resources/infill.mjs +0 -2
package/resources/infill.mjs.map +1 -1
package/resources/pronunciation-dicts.d.mts +0 -8
package/resources/pronunciation-dicts.d.mts.map +1 -1
package/resources/pronunciation-dicts.d.ts +0 -8
package/resources/pronunciation-dicts.d.ts.map +1 -1
package/resources/pronunciation-dicts.js +0 -18
package/resources/pronunciation-dicts.js.map +1 -1
package/resources/pronunciation-dicts.mjs +0 -18
package/resources/pronunciation-dicts.mjs.map +1 -1
package/resources/stt.d.mts +1 -104
package/resources/stt.d.mts.map +1 -1
package/resources/stt.d.ts +1 -104
package/resources/stt.d.ts.map +1 -1
package/resources/tts/tts.d.mts +79 -115
package/resources/tts/tts.d.mts.map +1 -1
package/resources/tts/tts.d.ts +79 -115
package/resources/tts/tts.d.ts.map +1 -1
package/resources/voice-changer.d.mts +2 -2
package/resources/voice-changer.d.mts.map +1 -1
package/resources/voice-changer.d.ts +2 -2
package/resources/voice-changer.d.ts.map +1 -1
package/resources/voices.d.mts +70 -34
package/resources/voices.d.mts.map +1 -1
package/resources/voices.d.ts +70 -34
package/resources/voices.d.ts.map +1 -1
package/resources/voices.js +45 -3
package/resources/voices.js.map +1 -1
package/resources/voices.mjs +45 -3
package/resources/voices.mjs.map +1 -1
package/src/client.ts +2 -0
package/src/resources/access-token.ts +6 -0
package/src/resources/agents/agents.ts +1 -1
package/src/resources/agents/metrics/index.ts +1 -0
package/src/resources/agents/metrics/metrics.ts +2 -0
package/src/resources/agents/metrics/results.ts +27 -23
package/src/resources/index.ts +1 -0
package/src/resources/infill.ts +3 -4
package/src/resources/pronunciation-dicts.ts +0 -20
package/src/resources/stt.ts +102 -104
package/src/resources/tts/tts.ts +146 -128
package/src/resources/voice-changer.ts +2 -2
package/src/resources/voices.ts +105 -38
package/src/version.ts +1 -1
package/version.d.mts +1 -1
package/version.d.ts +1 -1
package/version.js +1 -1
package/version.mjs +1 -1

package/src/resources/stt.ts CHANGED Viewed

@@ -107,111 +107,109 @@ export interface SttTranscribeParams {
   /**
    * Body param: The language of the input audio in ISO-639-1 format. Defaults to
    * `en`.
-   *
-   * <Accordion title="Supported languages">
-   *   - `en` (English)
-   *   - `zh` (Chinese)
-   *   - `de` (German)
-   *   - `es` (Spanish)
-   *   - `ru` (Russian)
-   *   - `ko` (Korean)
-   *   - `fr` (French)
-   *   - `ja` (Japanese)
-   *   - `pt` (Portuguese)
-   *   - `tr` (Turkish)
-   *   - `pl` (Polish)
-   *   - `ca` (Catalan)
-   *   - `nl` (Dutch)
-   *   - `ar` (Arabic)
-   *   - `sv` (Swedish)
-   *   - `it` (Italian)
-   *   - `id` (Indonesian)
-   *   - `hi` (Hindi)
-   *   - `fi` (Finnish)
-   *   - `vi` (Vietnamese)
-   *   - `he` (Hebrew)
-   *   - `uk` (Ukrainian)
-   *   - `el` (Greek)
-   *   - `ms` (Malay)
-   *   - `cs` (Czech)
-   *   - `ro` (Romanian)
-   *   - `da` (Danish)
-   *   - `hu` (Hungarian)
-   *   - `ta` (Tamil)
-   *   - `no` (Norwegian)
-   *   - `th` (Thai)
-   *   - `ur` (Urdu)
-   *   - `hr` (Croatian)
-   *   - `bg` (Bulgarian)
-   *   - `lt` (Lithuanian)
-   *   - `la` (Latin)
-   *   - `mi` (Maori)
-   *   - `ml` (Malayalam)
-   *   - `cy` (Welsh)
-   *   - `sk` (Slovak)
-   *   - `te` (Telugu)
-   *   - `fa` (Persian)
-   *   - `lv` (Latvian)
-   *   - `bn` (Bengali)
-   *   - `sr` (Serbian)
-   *   - `az` (Azerbaijani)
-   *   - `sl` (Slovenian)
-   *   - `kn` (Kannada)
-   *   - `et` (Estonian)
-   *   - `mk` (Macedonian)
-   *   - `br` (Breton)
-   *   - `eu` (Basque)
-   *   - `is` (Icelandic)
-   *   - `hy` (Armenian)
-   *   - `ne` (Nepali)
-   *   - `mn` (Mongolian)
-   *   - `bs` (Bosnian)
-   *   - `kk` (Kazakh)
-   *   - `sq` (Albanian)
-   *   - `sw` (Swahili)
-   *   - `gl` (Galician)
-   *   - `mr` (Marathi)
-   *   - `pa` (Punjabi)
-   *   - `si` (Sinhala)
-   *   - `km` (Khmer)
-   *   - `sn` (Shona)
-   *   - `yo` (Yoruba)
-   *   - `so` (Somali)
-   *   - `af` (Afrikaans)
-   *   - `oc` (Occitan)
-   *   - `ka` (Georgian)
-   *   - `be` (Belarusian)
-   *   - `tg` (Tajik)
-   *   - `sd` (Sindhi)
-   *   - `gu` (Gujarati)
-   *   - `am` (Amharic)
-   *   - `yi` (Yiddish)
-   *   - `lo` (Lao)
-   *   - `uz` (Uzbek)
-   *   - `fo` (Faroese)
-   *   - `ht` (Haitian Creole)
-   *   - `ps` (Pashto)
-   *   - `tk` (Turkmen)
-   *   - `nn` (Nynorsk)
-   *   - `mt` (Maltese)
-   *   - `sa` (Sanskrit)
-   *   - `lb` (Luxembourgish)
-   *   - `my` (Myanmar)
-   *   - `bo` (Tibetan)
-   *   - `tl` (Tagalog)
-   *   - `mg` (Malagasy)
-   *   - `as` (Assamese)
-   *   - `tt` (Tatar)
-   *   - `haw` (Hawaiian)
-   *   - `ln` (Lingala)
-   *   - `ha` (Hausa)
-   *   - `ba` (Bashkir)
-   *   - `jw` (Javanese)
-   *   - `su` (Sundanese)
-   *   - `yue` (Cantonese)
-   * </Accordion>
    */
-  language?: string | null;
+  language?:
+    | 'en'
+    | 'zh'
+    | 'de'
+    | 'es'
+    | 'ru'
+    | 'ko'
+    | 'fr'
+    | 'ja'
+    | 'pt'
+    | 'tr'
+    | 'pl'
+    | 'ca'
+    | 'nl'
+    | 'ar'
+    | 'sv'
+    | 'it'
+    | 'id'
+    | 'hi'
+    | 'fi'
+    | 'vi'
+    | 'he'
+    | 'uk'
+    | 'el'
+    | 'ms'
+    | 'cs'
+    | 'ro'
+    | 'da'
+    | 'hu'
+    | 'ta'
+    | 'no'
+    | 'th'
+    | 'ur'
+    | 'hr'
+    | 'bg'
+    | 'lt'
+    | 'la'
+    | 'mi'
+    | 'ml'
+    | 'cy'
+    | 'sk'
+    | 'te'
+    | 'fa'
+    | 'lv'
+    | 'bn'
+    | 'sr'
+    | 'az'
+    | 'sl'
+    | 'kn'
+    | 'et'
+    | 'mk'
+    | 'br'
+    | 'eu'
+    | 'is'
+    | 'hy'
+    | 'ne'
+    | 'mn'
+    | 'bs'
+    | 'kk'
+    | 'sq'
+    | 'sw'
+    | 'gl'
+    | 'mr'
+    | 'pa'
+    | 'si'
+    | 'km'
+    | 'sn'
+    | 'yo'
+    | 'so'
+    | 'af'
+    | 'oc'
+    | 'ka'
+    | 'be'
+    | 'tg'
+    | 'sd'
+    | 'gu'
+    | 'am'
+    | 'yi'
+    | 'lo'
+    | 'uz'
+    | 'fo'
+    | 'ht'
+    | 'ps'
+    | 'tk'
+    | 'nn'
+    | 'mt'
+    | 'sa'
+    | 'lb'
+    | 'my'
+    | 'bo'
+    | 'tl'
+    | 'mg'
+    | 'as'
+    | 'tt'
+    | 'haw'
+    | 'ln'
+    | 'ha'
+    | 'ba'
+    | 'jw'
+    | 'su'
+    | 'yue'
+    | null;
   /**
    * Body param: ID of the model to use for transcription. Use `ink-whisper` for the

package/src/resources/tts/tts.ts CHANGED Viewed

@@ -34,42 +34,88 @@ export class TTS extends APIResource {
 }
 /**
- * Configure the various attributes of the generated speech. These controls are
- * only available for `sonic-3-preview` and will have no effect on earlier models.
+ * Configure the various attributes of the generated speech. These are only for
+ * `sonic-3` and have no effect on earlier models.
+ *
+ * See
+ * [Volume, Speed, and Emotion in Sonic-3](/build-with-cartesia/sonic-3/volume-speed-emotion)
+ * for a guide on this option.
  */
 export interface GenerationConfig {
   /**
-   * These controls are **experimental** and subject to breaking changes.
-   */
-  experimental?: GenerationConfig.Experimental | null;
-  /**
-   * Adjust the speed of the generated speech between -1.0 (slower) and 1.0 (faster).
-   * 0.0 is the default speed.
-   */
-  speed?: number | null;
-  /**
-   * Adjust the volume of the generated speech between -1.0 (softer) and 1.0
-   * (louder). 0.0 is the default volume.
-   */
-  volume?: number | null;
-}
-export namespace GenerationConfig {
-  /**
-   * These controls are **experimental** and subject to breaking changes.
-   */
-  export interface Experimental {
-    /**
-     * Toggle accent localization: 0 (disabled, default) or 1 (enabled). When enabled,
-     * the voice adapts to match the transcript language's accent while preserving
-     * vocal characteristics. When disabled, maintains the original voice accent. For
-     * more information, see
-     * [Localize Voices](/build-with-sonic/capabilities/localize-voices).
-     */
-    accent_localization?: number | null;
-  }
+   * Guide the emotion of the generated speech.
+   */
+  emotion?:
+    | 'neutral'
+    | 'happy'
+    | 'excited'
+    | 'enthusiastic'
+    | 'elated'
+    | 'euphoric'
+    | 'triumphant'
+    | 'amazed'
+    | 'surprised'
+    | 'flirtatious'
+    | 'curious'
+    | 'content'
+    | 'peaceful'
+    | 'serene'
+    | 'calm'
+    | 'grateful'
+    | 'affectionate'
+    | 'trust'
+    | 'sympathetic'
+    | 'anticipation'
+    | 'mysterious'
+    | 'angry'
+    | 'mad'
+    | 'outraged'
+    | 'frustrated'
+    | 'agitated'
+    | 'threatened'
+    | 'disgusted'
+    | 'contempt'
+    | 'envious'
+    | 'sarcastic'
+    | 'ironic'
+    | 'sad'
+    | 'dejected'
+    | 'melancholic'
+    | 'disappointed'
+    | 'hurt'
+    | 'guilty'
+    | 'bored'
+    | 'tired'
+    | 'rejected'
+    | 'nostalgic'
+    | 'wistful'
+    | 'apologetic'
+    | 'hesitant'
+    | 'insecure'
+    | 'confused'
+    | 'resigned'
+    | 'anxious'
+    | 'panicked'
+    | 'alarmed'
+    | 'scared'
+    | 'proud'
+    | 'confident'
+    | 'distant'
+    | 'skeptical'
+    | 'contemplative'
+    | 'determined';
+  /**
+   * Adjust the speed of the generated speech between 0.6x and 1.5x the original
+   * speed (default is 1.0x). Valid values are between [0.6, 1.5] inclusive.
+   */
+  speed?: number;
+  /**
+   * Adjust the volume of the generated speech between 0.5x and 2.0x the original
+   * volume (default is 1.0x). Valid values are between [0.5, 2.0] inclusive.
+   */
+  volume?: number;
 }
 export interface GenerationRequest {
@@ -117,30 +163,30 @@ export interface GenerationRequest {
    */
   continue?: boolean | null;
-  /**
-   * The maximum duration of the audio in seconds. You do not usually need to specify
-   * this. If the duration is not appropriate for the length of the transcript, the
-   * output audio may be truncated.
-   */
-  duration?: number | null;
   /**
    * Whether to flush the context.
    */
   flush?: boolean | null;
   /**
-   * The language that the given voice should speak the transcript in.
+   * Configure the various attributes of the generated speech. These are only for
+   * `sonic-3` and have no effect on earlier models.
    *
-   * Options: English (en), French (fr), German (de), Spanish (es), Portuguese (pt),
-   * Chinese (zh), Japanese (ja), Hindi (hi), Italian (it), Korean (ko), Dutch (nl),
-   * Polish (pl), Russian (ru), Swedish (sv), Turkish (tr).
+   * See
+   * [Volume, Speed, and Emotion in Sonic-3](/build-with-cartesia/sonic-3/volume-speed-emotion)
+   * for a guide on this option.
    */
-  language?: VoicesAPI.SupportedLanguage | null;
+  generation_config?: GenerationConfig;
+  /**
+   * The language that the given voice should speak the transcript in. For valid
+   * options, see [Models](/build-with-cartesia/tts-models).
+   */
+  language?: VoicesAPI.SupportedLanguage;
   /**
    * The maximum time in milliseconds to buffer text before starting generation.
-   * Values between [0, 1000]ms are supported. Defaults to 0 (no buffering).
+   * Values between [0, 5000]ms are supported. Defaults to 3000ms.
    *
    * When set, the model will buffer incoming text chunks until it's confident it has
    * enough context to generate high-quality speech, or the buffer delay elapses,
@@ -153,22 +199,18 @@ export interface GenerationRequest {
   max_buffer_delay_ms?: number | null;
   /**
-   * A list of pronunciation dict IDs to use for the generation. This will be applied
-   * in addition to the pinned pronunciation dict, which will be treated as the first
-   * element of the list. If there are conflicts with dict items, the latest dict
-   * will take precedence.
+   * The ID of a pronunciation dictionary to use for the generation. Pronunciation
+   * dictionaries are supported by `sonic-3` models and newer.
    */
-  pronunciation_dict_ids?: Array<string> | null;
+  pronunciation_dict_id?: string | null;
   /**
-   * > This feature is experimental and may not work for all voices.
-   *
-   * Speed setting for the model. Defaults to `normal`.
-   *
-   * Influences the speed of the generated speech. Faster speeds may reduce
-   * hallucination rate.
+   * @deprecated Use `generation_config.speed` for sonic-3. Speed setting for the
+   * model. Defaults to `normal`. This feature is experimental and may not work for
+   * all voices. Influences the speed of the generated speech. Faster speeds may
+   * reduce hallucination rate.
    */
-  speed?: ModelSpeed | null;
+  speed?: ModelSpeed;
   /**
    * Whether to use normalized timestamps (True) or original timestamps (False).
@@ -182,24 +224,22 @@ export namespace GenerationRequest {
     encoding: InfillAPI.RawEncoding;
-    sample_rate: number;
+    sample_rate: 8000 | 16000 | 22050 | 24000 | 44100 | 48000;
   }
 }
 /**
- * > This feature is experimental and may not work for all voices.
- *
- * Speed setting for the model. Defaults to `normal`.
- *
- * Influences the speed of the generated speech. Faster speeds may reduce
- * hallucination rate.
+ * @deprecated Use `generation_config.speed` for sonic-3. Speed setting for the
+ * model. Defaults to `normal`. This feature is experimental and may not work for
+ * all voices. Influences the speed of the generated speech. Faster speeds may
+ * reduce hallucination rate.
  */
 export type ModelSpeed = 'slow' | 'normal' | 'fast';
 export interface RawOutputFormat {
   encoding: InfillAPI.RawEncoding;
-  sample_rate: number;
+  sample_rate: 8000 | 16000 | 22050 | 24000 | 44100 | 48000;
 }
 export interface VoiceSpecifier {
@@ -245,16 +285,10 @@ export type WebsocketResponse =
 export namespace WebsocketResponse {
   export interface Chunk {
-    data: string;
     done: boolean;
     status_code: number;
-    step_time: number;
-    type: 'chunk';
     /**
      * A unique identifier for the context. You can use any unique identifier, like a
      * UUID or human ID.
@@ -263,6 +297,8 @@ export namespace WebsocketResponse {
      * conversation IDs) as context IDs.
      */
     context_id?: string | null;
+    type?: 'chunk';
   }
   export interface FlushDone {
@@ -368,34 +404,26 @@ export interface TTSGenerateParams {
   voice: VoiceSpecifier;
   /**
-   * The maximum duration of the audio in seconds. You do not usually need to specify
-   * this. If the duration is not appropriate for the length of the transcript, the
-   * output audio may be truncated.
-   */
-  duration?: number | null;
-  /**
-   * Configure the various attributes of the generated speech. These controls are
-   * only available for `sonic-3-preview` and will have no effect on earlier models.
+   * Configure the various attributes of the generated speech. These are only for
+   * `sonic-3` and have no effect on earlier models.
+   *
+   * See
+   * [Volume, Speed, and Emotion in Sonic-3](/build-with-cartesia/sonic-3/volume-speed-emotion)
+   * for a guide on this option.
    */
-  generation_config?: GenerationConfig | null;
+  generation_config?: GenerationConfig;
   /**
-   * The language that the given voice should speak the transcript in.
-   *
-   * Options: English (en), French (fr), German (de), Spanish (es), Portuguese (pt),
-   * Chinese (zh), Japanese (ja), Hindi (hi), Italian (it), Korean (ko), Dutch (nl),
-   * Polish (pl), Russian (ru), Swedish (sv), Turkish (tr).
+   * The language that the given voice should speak the transcript in. For valid
+   * options, see [Models](/build-with-cartesia/tts-models).
    */
   language?: VoicesAPI.SupportedLanguage | null;
   /**
-   * A list of pronunciation dict IDs to use for the generation. This will be applied
-   * in addition to the pinned pronunciation dict, which will be treated as the first
-   * element of the list. If there are conflicts with dict items, the latest dict
-   * will take precedence.
+   * The ID of a pronunciation dictionary to use for the generation. Pronunciation
+   * dictionaries are supported by `sonic-3` models and newer.
    */
-  pronunciation_dict_ids?: Array<string> | null;
+  pronunciation_dict_id?: string | null;
   /**
    * Whether to save the generated audio file. When true, the response will include a
@@ -404,14 +432,12 @@ export interface TTSGenerateParams {
   save?: boolean | null;
   /**
-   * > This feature is experimental and may not work for all voices.
-   *
-   * Speed setting for the model. Defaults to `normal`.
-   *
-   * Influences the speed of the generated speech. Faster speeds may reduce
-   * hallucination rate.
+   * @deprecated Use `generation_config.speed` for sonic-3. Speed setting for the
+   * model. Defaults to `normal`. This feature is experimental and may not work for
+   * all voices. Influences the speed of the generated speech. Faster speeds may
+   * reduce hallucination rate.
    */
-  speed?: ModelSpeed | null;
+  speed?: ModelSpeed;
 }
 export namespace TTSGenerateParams {
@@ -424,13 +450,9 @@ export namespace TTSGenerateParams {
   }
   export interface MP3OutputFormat {
-    /**
-     * The bit rate of the audio in bits per second. Supported bit rates are 32000,
-     * 64000, 96000, 128000, 192000.
-     */
-    bit_rate: number;
+    bit_rate: 32000 | 64000 | 96000 | 128000 | 192000;
-    sample_rate: number;
+    sample_rate: 8000 | 16000 | 22050 | 24000 | 44100 | 48000;
     container?: 'mp3';
   }
@@ -469,38 +491,34 @@ export interface TTSGenerateSseParams {
   context_id?: string | null;
   /**
-   * The maximum duration of the audio in seconds. You do not usually need to specify
-   * this. If the duration is not appropriate for the length of the transcript, the
-   * output audio may be truncated.
+   * Configure the various attributes of the generated speech. These are only for
+   * `sonic-3` and have no effect on earlier models.
+   *
+   * See
+   * [Volume, Speed, and Emotion in Sonic-3](/build-with-cartesia/sonic-3/volume-speed-emotion)
+   * for a guide on this option.
    */
-  duration?: number | null;
+  generation_config?: GenerationConfig;
   /**
-   * The language that the given voice should speak the transcript in.
-   *
-   * Options: English (en), French (fr), German (de), Spanish (es), Portuguese (pt),
-   * Chinese (zh), Japanese (ja), Hindi (hi), Italian (it), Korean (ko), Dutch (nl),
-   * Polish (pl), Russian (ru), Swedish (sv), Turkish (tr).
+   * The language that the given voice should speak the transcript in. For valid
+   * options, see [Models](/build-with-cartesia/tts-models).
    */
-  language?: VoicesAPI.SupportedLanguage | null;
+  language?: VoicesAPI.SupportedLanguage;
   /**
-   * A list of pronunciation dict IDs to use for the generation. This will be applied
-   * in addition to the pinned pronunciation dict, which will be treated as the first
-   * element of the list. If there are conflicts with dict items, the latest dict
-   * will take precedence.
+   * The ID of a pronunciation dictionary to use for the generation. Pronunciation
+   * dictionaries are supported by `sonic-3` models and newer.
    */
-  pronunciation_dict_ids?: Array<string> | null;
+  pronunciation_dict_id?: string | null;
   /**
-   * > This feature is experimental and may not work for all voices.
-   *
-   * Speed setting for the model. Defaults to `normal`.
-   *
-   * Influences the speed of the generated speech. Faster speeds may reduce
-   * hallucination rate.
+   * @deprecated Use `generation_config.speed` for sonic-3. Speed setting for the
+   * model. Defaults to `normal`. This feature is experimental and may not work for
+   * all voices. Influences the speed of the generated speech. Faster speeds may
+   * reduce hallucination rate.
    */
-  speed?: ModelSpeed | null;
+  speed?: ModelSpeed;
   /**
    * Whether to use normalized timestamps (True) or original timestamps (False).
@@ -514,7 +532,7 @@ export namespace TTSGenerateSseParams {
     encoding: InfillAPI.RawEncoding;
-    sample_rate: number;
+    sample_rate: 8000 | 16000 | 22050 | 24000 | 44100 | 48000;
   }
 }

package/src/resources/voice-changer.ts CHANGED Viewed

@@ -54,7 +54,7 @@ export interface VoiceChangerChangeVoiceBytesParams {
    */
   'output_format[encoding]'?: InfillAPI.RawEncoding | null;
-  'output_format[sample_rate]'?: number;
+  'output_format[sample_rate]'?: 8000 | 16000 | 22050 | 24000 | 44100 | 48000;
   'voice[id]'?: string;
 }
@@ -74,7 +74,7 @@ export interface VoiceChangerChangeVoiceSseParams {
    */
   'output_format[encoding]'?: InfillAPI.RawEncoding | null;
-  'output_format[sample_rate]'?: number;
+  'output_format[sample_rate]'?: 8000 | 16000 | 22050 | 24000 | 44100 | 48000;
   'voice[id]'?: string;
 }