fal-endpoint-types 1.3.36 → 1.3.37

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "fal-endpoint-types",
3
- "version": "1.3.36",
3
+ "version": "1.3.37",
4
4
  "description": "TypeScript types for Fal AI endpoints generated from the OpenAPI schemas.",
5
5
  "homepage": "https://github.com/rawpixel-vincent/fal-endpoint-types#readme",
6
6
  "bugs": {
@@ -2292,6 +2292,53 @@ export interface SpeechTimestamp {
2292
2292
  start: number;
2293
2293
  }
2294
2294
 
2295
+ export interface SpeakerConfig {
2296
+ /**
2297
+ * Speaker Id
2298
+ * @description Alias used to identify this speaker in the prompt. Use this alias as a prefix in the prompt field, e.g. 'Alice: Hello! Bob: Hi there!'. Must be alphanumeric with no whitespace.
2299
+ * @example Speaker1
2300
+ * @example Alice
2301
+ * @example Narrator
2302
+ */
2303
+ speaker_id: string;
2304
+ /**
2305
+ * Voice
2306
+ * @description Voice preset for this speaker.
2307
+ * @enum {string}
2308
+ */
2309
+ voice:
2310
+ | 'Achernar'
2311
+ | 'Achird'
2312
+ | 'Algenib'
2313
+ | 'Algieba'
2314
+ | 'Alnilam'
2315
+ | 'Aoede'
2316
+ | 'Autonoe'
2317
+ | 'Callirrhoe'
2318
+ | 'Charon'
2319
+ | 'Despina'
2320
+ | 'Enceladus'
2321
+ | 'Erinome'
2322
+ | 'Fenrir'
2323
+ | 'Gacrux'
2324
+ | 'Iapetus'
2325
+ | 'Kore'
2326
+ | 'Laomedeia'
2327
+ | 'Leda'
2328
+ | 'Orus'
2329
+ | 'Pulcherrima'
2330
+ | 'Puck'
2331
+ | 'Rasalgethi'
2332
+ | 'Sadachbia'
2333
+ | 'Sadaltager'
2334
+ | 'Schedar'
2335
+ | 'Sulafat'
2336
+ | 'Umbriel'
2337
+ | 'Vindemiatrix'
2338
+ | 'Zephyr'
2339
+ | 'Zubenelgenubi';
2340
+ }
2341
+
2295
2342
  export interface Speaker {
2296
2343
  audio_url: string;
2297
2344
  prompt: string;
@@ -3596,7 +3643,22 @@ export interface KlingV3MultiPromptElement {
3596
3643
  * @default 5
3597
3644
  * @enum {string}
3598
3645
  */
3599
- duration?: '3' | '4' | '5' | '6' | '7' | '8' | '9' | '10' | '11' | '12' | '13' | '14' | '15';
3646
+ duration?:
3647
+ | '1'
3648
+ | '2'
3649
+ | '3'
3650
+ | '4'
3651
+ | '5'
3652
+ | '6'
3653
+ | '7'
3654
+ | '8'
3655
+ | '9'
3656
+ | '10'
3657
+ | '11'
3658
+ | '12'
3659
+ | '13'
3660
+ | '14'
3661
+ | '15';
3600
3662
  /**
3601
3663
  * Prompt
3602
3664
  * @description The prompt for this shot.
@@ -3633,6 +3695,11 @@ export interface KlingV3ComboElementInput {
3633
3695
  * @description The video URL of the element. A request can only have one element with a video.
3634
3696
  */
3635
3697
  video_url?: string;
3698
+ /**
3699
+ * Voice Id
3700
+ * @description The voice ID for this element. The voice will be binded to the element and references to this element will use the binded voice. Voice binding is only supported for video elements, and cannot be used with image elements. Get voice IDs from the following endpoint: https://fal.ai/models/fal-ai/kling-video/create-voice
3701
+ */
3702
+ voice_id?: string;
3636
3703
  }
3637
3704
 
3638
3705
  export interface KeyframeTransition {
@@ -14,11 +14,21 @@ declare global {
14
14
  output: falEndpoints.XaiGrokImagineVideoTextToVideoOutput;
15
15
  };
16
16
 
17
+ 'xai/grok-imagine-video/reference-to-video': {
18
+ input: falEndpoints.XaiGrokImagineVideoReferenceToVideoInput;
19
+ output: falEndpoints.XaiGrokImagineVideoReferenceToVideoOutput;
20
+ };
21
+
17
22
  'xai/grok-imagine-video/image-to-video': {
18
23
  input: falEndpoints.XaiGrokImagineVideoImageToVideoInput;
19
24
  output: falEndpoints.XaiGrokImagineVideoImageToVideoOutput;
20
25
  };
21
26
 
27
+ 'xai/grok-imagine-video/extend-video': {
28
+ input: falEndpoints.XaiGrokImagineVideoExtendVideoInput;
29
+ output: falEndpoints.XaiGrokImagineVideoExtendVideoOutput;
30
+ };
31
+
22
32
  'xai/grok-imagine-video/edit-video': {
23
33
  input: falEndpoints.XaiGrokImagineVideoEditVideoInput;
24
34
  output: falEndpoints.XaiGrokImagineVideoEditVideoOutput;
@@ -1244,6 +1254,11 @@ declare global {
1244
1254
  output: falEndpoints.SeedvrUpscaleVideoOutput;
1245
1255
  };
1246
1256
 
1257
+ 'fal-ai/seedvr/upscale/image/seamless': {
1258
+ input: falEndpoints.SeedvrUpscaleImageSeamlessInput;
1259
+ output: falEndpoints.SeedvrUpscaleImageSeamlessOutput;
1260
+ };
1261
+
1247
1262
  'fal-ai/seedvr/upscale/image': {
1248
1263
  input: falEndpoints.SeedvrUpscaleImageInput;
1249
1264
  output: falEndpoints.SeedvrUpscaleImageOutput;
@@ -3559,6 +3574,11 @@ declare global {
3559
3574
  output: falEndpoints.IpAdapterFaceIdOutput;
3560
3575
  };
3561
3576
 
3577
+ 'fal-ai/inworld-tts': {
3578
+ input: falEndpoints.InworldTtsInput;
3579
+ output: falEndpoints.InworldTtsOutput;
3580
+ };
3581
+
3562
3582
  'fal-ai/invisible-watermark': {
3563
3583
  input: falEndpoints.InvisibleWatermarkInput;
3564
3584
  output: falEndpoints.InvisibleWatermarkOutput;
@@ -4349,6 +4369,11 @@ declare global {
4349
4369
  output: falEndpoints.GenfocusOutput;
4350
4370
  };
4351
4371
 
4372
+ 'fal-ai/gemini-tts': {
4373
+ input: falEndpoints.GeminiTtsInput;
4374
+ output: falEndpoints.GeminiTtsOutput;
4375
+ };
4376
+
4352
4377
  'fal-ai/gemini-flash-edit/multi': {
4353
4378
  input: falEndpoints.GeminiFlashEditMultiInput;
4354
4379
  output: falEndpoints.GeminiFlashEditMultiOutput;
@@ -94,8 +94,62 @@ export interface XaiGrokImagineVideoTextToVideoOutput {
94
94
  * "fps": 24,
95
95
  * "width": 1280,
96
96
  * "file_name": "RUAbFYlssdqnbjNLmE8qP_IX7BNYGP.mp4",
97
- * "num_frames": 145,
98
- * "content_type": "video/mp4"
97
+ * "content_type": "video/mp4",
98
+ * "num_frames": 145
99
+ * }
100
+ */
101
+ video: Components.VideoFile;
102
+ }
103
+
104
+ export interface XaiGrokImagineVideoReferenceToVideoInput {
105
+ /**
106
+ * Aspect Ratio
107
+ * @description Aspect ratio of the generated video.
108
+ * @default 16:9
109
+ * @enum {string}
110
+ */
111
+ aspect_ratio?: '16:9' | '4:3' | '3:2' | '1:1' | '2:3' | '3:4' | '9:16';
112
+ /**
113
+ * Duration
114
+ * @description Video duration in seconds.
115
+ * @default 8
116
+ */
117
+ duration?: number;
118
+ /**
119
+ * Prompt
120
+ * @description Text prompt describing the video to generate. Use @Image1, @Image2, etc. to reference specific images from reference_image_urls in order.
121
+ * @example A @Image1 running through a sunlit meadow, cinematic slow motion
122
+ */
123
+ prompt: string;
124
+ /**
125
+ * Reference Image URLs
126
+ * @description One or more reference image URLs to guide the video generation as style and content references. Reference in prompt as @Image1, @Image2, etc. Maximum 7 images.
127
+ * @example [
128
+ * "https://v3b.fal.media/files/b/0a8b90e0/BFLE9VDlZqsryU-UA3BoD_image_004.png"
129
+ * ]
130
+ */
131
+ reference_image_urls: string[];
132
+ /**
133
+ * Resolution
134
+ * @description Resolution of the output video.
135
+ * @default 480p
136
+ * @enum {string}
137
+ */
138
+ resolution?: '480p' | '720p';
139
+ }
140
+
141
+ export interface XaiGrokImagineVideoReferenceToVideoOutput {
142
+ /**
143
+ * @description The generated video.
144
+ * @example {
145
+ * "height": 720,
146
+ * "duration": 8,
147
+ * "url": "https://v3b.fal.media/files/b/0a8b90e4/r2v_output.mp4",
148
+ * "fps": 24,
149
+ * "width": 1280,
150
+ * "file_name": "r2v_output.mp4",
151
+ * "content_type": "video/mp4",
152
+ * "num_frames": 192
99
153
  * }
100
154
  */
101
155
  video: Components.VideoFile;
@@ -145,8 +199,46 @@ export interface XaiGrokImagineVideoImageToVideoOutput {
145
199
  * "fps": 24,
146
200
  * "width": 1280,
147
201
  * "file_name": "0Ci1dviuSnEyUZzBUq-_5_nu7MrAAa.mp4",
148
- * "num_frames": 145,
149
- * "content_type": "video/mp4"
202
+ * "content_type": "video/mp4",
203
+ * "num_frames": 145
204
+ * }
205
+ */
206
+ video: Components.VideoFile;
207
+ }
208
+
209
+ export interface XaiGrokImagineVideoExtendVideoInput {
210
+ /**
211
+ * Duration
212
+ * @description Length of the extension in seconds.
213
+ * @default 6
214
+ */
215
+ duration?: number;
216
+ /**
217
+ * Prompt
218
+ * @description Text description of what should happen next in the video.
219
+ * @example The camera slowly zooms out to reveal the city skyline at sunset
220
+ */
221
+ prompt: string;
222
+ /**
223
+ * Video URL
224
+ * @description URL of the source video to extend. Must be MP4 format (H.264, H.265, or AV1 codec), 2-15 seconds long.
225
+ * @example https://v3b.fal.media/files/b/0a8b9112/V5Z_NIPE3ppMDWivNo6_q_video_019.mp4
226
+ */
227
+ video_url: string;
228
+ }
229
+
230
+ export interface XaiGrokImagineVideoExtendVideoOutput {
231
+ /**
232
+ * @description The extended video (original + extension stitched together).
233
+ * @example {
234
+ * "height": 720,
235
+ * "duration": 16,
236
+ * "url": "https://v3b.fal.media/files/b/0a8b9113/extended_video.mp4",
237
+ * "fps": 24,
238
+ * "width": 1280,
239
+ * "file_name": "extended_video.mp4",
240
+ * "content_type": "video/mp4",
241
+ * "num_frames": 384
150
242
  * }
151
243
  */
152
244
  video: Components.VideoFile;
@@ -184,8 +276,8 @@ export interface XaiGrokImagineVideoEditVideoOutput {
184
276
  * "fps": 24,
185
277
  * "width": 1280,
186
278
  * "file_name": "EuDrZuQTW9m1phBXOsauz_EpJH3s8X.mp4",
187
- * "num_frames": 121,
188
- * "content_type": "video/mp4"
279
+ * "content_type": "video/mp4",
280
+ * "num_frames": 121
189
281
  * }
190
282
  */
191
283
  video: Components.VideoFile;
@@ -6232,6 +6324,22 @@ export interface SharedType_92a {
6232
6324
  video: Components.File;
6233
6325
  }
6234
6326
 
6327
+ export interface SharedType_920 {
6328
+ /**
6329
+ * @description Upscaled image file after processing
6330
+ * @example {
6331
+ * "content_type": "image/png",
6332
+ * "url": "https://storage.googleapis.com/falserverless/example_outputs/seedvr2/image_out.png"
6333
+ * }
6334
+ */
6335
+ image: Components.ImageFile;
6336
+ /**
6337
+ * Seed
6338
+ * @description The random seed used for the generation process.
6339
+ */
6340
+ seed: number;
6341
+ }
6342
+
6235
6343
  export interface SharedType_91c {
6236
6344
  /**
6237
6345
  * Image Url
@@ -6849,53 +6957,6 @@ export interface SharedType_8a3 {
6849
6957
  video: Components.File;
6850
6958
  }
6851
6959
 
6852
- export interface SharedType_89f {
6853
- /**
6854
- * Aspect Ratio
6855
- * @description Aspect ratio of the generated video.
6856
- * @default 16:9
6857
- * @enum {string}
6858
- */
6859
- aspect_ratio?: '16:9' | '9:16' | '1:1';
6860
- /**
6861
- * Duration
6862
- * @description Video duration in seconds (3-15s).
6863
- * @default 5
6864
- * @enum {string}
6865
- */
6866
- duration?: '3' | '4' | '5' | '6' | '7' | '8' | '9' | '10' | '11' | '12' | '13' | '14' | '15';
6867
- /**
6868
- * Generate Audio
6869
- * @description Whether to generate native audio for the video.
6870
- * @default false
6871
- */
6872
- generate_audio?: boolean;
6873
- /**
6874
- * Multi Prompt
6875
- * @description List of prompts for multi-shot video generation.
6876
- * @example null
6877
- */
6878
- multi_prompt?: Components.KlingV3MultiPromptElement[];
6879
- /**
6880
- * Prompt
6881
- * @description Text prompt for video generation. Required unless multi_prompt is provided.
6882
- * @example A mecha lands on the ground to save the city, and says "I'm here", in anime style
6883
- */
6884
- prompt?: string;
6885
- /**
6886
- * Shot Type
6887
- * @description The type of multi-shot video generation.
6888
- * @default customize
6889
- * @constant
6890
- */
6891
- shot_type?: 'customize';
6892
- /**
6893
- * Voice Ids
6894
- * @description Optional Voice IDs for video generation. Reference voices in your prompt with <<<voice_1>>> and <<<voice_2>>> (maximum 2 voices per task). Get voice IDs from the kling video create-voice endpoint: https://fal.ai/models/fal-ai/kling-video/create-voice
6895
- */
6896
- voice_ids?: string[];
6897
- }
6898
-
6899
6960
  export interface SharedType_896 {
6900
6961
  /**
6901
6962
  * Aspect Ratio
@@ -12233,6 +12294,48 @@ export interface SharedType_266 {
12233
12294
  video: Components.File;
12234
12295
  }
12235
12296
 
12297
+ export interface SharedType_25d {
12298
+ /**
12299
+ * Aspect Ratio
12300
+ * @description Aspect ratio of the generated video.
12301
+ * @default 16:9
12302
+ * @enum {string}
12303
+ */
12304
+ aspect_ratio?: '16:9' | '9:16' | '1:1';
12305
+ /**
12306
+ * Duration
12307
+ * @description Video duration in seconds (3-15s).
12308
+ * @default 5
12309
+ * @enum {string}
12310
+ */
12311
+ duration?: '3' | '4' | '5' | '6' | '7' | '8' | '9' | '10' | '11' | '12' | '13' | '14' | '15';
12312
+ /**
12313
+ * Generate Audio
12314
+ * @description Whether to generate native audio for the video.
12315
+ * @default false
12316
+ */
12317
+ generate_audio?: boolean;
12318
+ /**
12319
+ * Multi Prompt
12320
+ * @description List of prompts for multi-shot video generation.
12321
+ * @example null
12322
+ */
12323
+ multi_prompt?: Components.KlingV3MultiPromptElement[];
12324
+ /**
12325
+ * Prompt
12326
+ * @description Text prompt for video generation. Required unless multi_prompt is provided.
12327
+ * @example A mecha lands on the ground to save the city, and says "I'm here", in anime style
12328
+ */
12329
+ prompt?: string;
12330
+ /**
12331
+ * Shot Type
12332
+ * @description The type of multi-shot video generation.
12333
+ * @default customize
12334
+ * @constant
12335
+ */
12336
+ shot_type?: 'customize';
12337
+ }
12338
+
12236
12339
  export interface SharedType_23c {
12237
12340
  /**
12238
12341
  * @description The generated video.
@@ -27052,7 +27155,13 @@ export interface SeedvrUpscaleVideoOutput {
27052
27155
  video: Components.File;
27053
27156
  }
27054
27157
 
27055
- export interface SeedvrUpscaleImageInput {
27158
+ export interface SeedvrUpscaleImageSeamlessInput {
27159
+ /**
27160
+ * Enable Safety Checker
27161
+ * @description If set to true, the safety checker will be enabled.
27162
+ * @default true
27163
+ */
27164
+ enable_safety_checker?: boolean;
27056
27165
  /**
27057
27166
  * Image Url
27058
27167
  * @description The input image to be processed
@@ -27068,10 +27177,10 @@ export interface SeedvrUpscaleImageInput {
27068
27177
  /**
27069
27178
  * Output Format
27070
27179
  * @description The format of the output image.
27071
- * @default jpg
27180
+ * @default png
27072
27181
  * @enum {string}
27073
27182
  */
27074
- output_format?: 'png' | 'jpg' | 'webp';
27183
+ output_format?: 'png' | 'jpeg' | 'webp';
27075
27184
  /**
27076
27185
  * Seed
27077
27186
  * @description The random seed used for the generation process.
@@ -27105,22 +27214,63 @@ export interface SeedvrUpscaleImageInput {
27105
27214
  upscale_mode?: 'target' | 'factor';
27106
27215
  }
27107
27216
 
27108
- export interface SeedvrUpscaleImageOutput {
27217
+ export interface SeedvrUpscaleImageSeamlessOutput extends SharedType_920 {}
27218
+
27219
+ export interface SeedvrUpscaleImageInput {
27109
27220
  /**
27110
- * @description Upscaled image file after processing
27111
- * @example {
27112
- * "content_type": "image/png",
27113
- * "url": "https://storage.googleapis.com/falserverless/example_outputs/seedvr2/image_out.png"
27114
- * }
27221
+ * Image Url
27222
+ * @description The input image to be processed
27223
+ * @example https://storage.googleapis.com/falserverless/example_inputs/seedvr2/image_in.png
27115
27224
  */
27116
- image: Components.ImageFile;
27225
+ image_url: string;
27226
+ /**
27227
+ * Noise Scale
27228
+ * @description The noise scale to use for the generation process.
27229
+ * @default 0.1
27230
+ */
27231
+ noise_scale?: number;
27232
+ /**
27233
+ * Output Format
27234
+ * @description The format of the output image.
27235
+ * @default jpg
27236
+ * @enum {string}
27237
+ */
27238
+ output_format?: 'png' | 'jpg' | 'webp';
27117
27239
  /**
27118
27240
  * Seed
27119
27241
  * @description The random seed used for the generation process.
27120
27242
  */
27121
- seed: number;
27243
+ seed?: number;
27244
+ /**
27245
+ * Sync Mode
27246
+ * @description If `True`, the media will be returned as a data URI and the output data won't be available in the request history.
27247
+ * @default false
27248
+ */
27249
+ sync_mode?: boolean;
27250
+ /**
27251
+ * Target Resolution
27252
+ * @description The target resolution to upscale to when `upscale_mode` is `target`.
27253
+ * @default 1080p
27254
+ * @enum {string}
27255
+ */
27256
+ target_resolution?: '720p' | '1080p' | '1440p' | '2160p';
27257
+ /**
27258
+ * Upscale Factor
27259
+ * @description Upscaling factor to be used. Will multiply the dimensions with this factor when `upscale_mode` is `factor`.
27260
+ * @default 2
27261
+ */
27262
+ upscale_factor?: number;
27263
+ /**
27264
+ * Upscale Mode
27265
+ * @description The mode to use for the upscale. If 'target', the upscale factor will be calculated based on the target resolution. If 'factor', the upscale factor will be used directly.
27266
+ * @default factor
27267
+ * @enum {string}
27268
+ */
27269
+ upscale_mode?: 'target' | 'factor';
27122
27270
  }
27123
27271
 
27272
+ export interface SeedvrUpscaleImageOutput extends SharedType_920 {}
27273
+
27124
27274
  export interface SdxlControlnetUnionInpaintingInput {
27125
27275
  /**
27126
27276
  * Canny Image Url
@@ -50618,11 +50768,6 @@ export interface KlingVideoV3StandardTextToVideoInput {
50618
50768
  * @enum {string}
50619
50769
  */
50620
50770
  shot_type?: 'customize' | 'intelligent';
50621
- /**
50622
- * Voice Ids
50623
- * @description Optional Voice IDs for video generation. Reference voices in your prompt with <<<voice_1>>> and <<<voice_2>>> (maximum 2 voices per task). Get voice IDs from the kling video create-voice endpoint: https://fal.ai/models/fal-ai/kling-video/create-voice
50624
- */
50625
- voice_ids?: string[];
50626
50771
  }
50627
50772
 
50628
50773
  export interface KlingVideoV3StandardTextToVideoOutput {
@@ -50760,11 +50905,6 @@ export interface KlingVideoV3StandardImageToVideoInput {
50760
50905
  * @example https://storage.googleapis.com/falserverless/example_inputs/kling-v3/standard-i2v/start_image.png
50761
50906
  */
50762
50907
  start_image_url: string;
50763
- /**
50764
- * Voice Ids
50765
- * @description Optional Voice IDs for video generation. Reference voices in your prompt with <<<voice_1>>> and <<<voice_2>>> (maximum 2 voices per task). Get voice IDs from the kling video create-voice endpoint: https://fal.ai/models/fal-ai/kling-video/create-voice
50766
- */
50767
- voice_ids?: string[];
50768
50908
  }
50769
50909
 
50770
50910
  export interface KlingVideoV3StandardImageToVideoOutput {
@@ -50832,11 +50972,6 @@ export interface KlingVideoV3ProTextToVideoInput {
50832
50972
  * @enum {string}
50833
50973
  */
50834
50974
  shot_type?: 'customize' | 'intelligent';
50835
- /**
50836
- * Voice Ids
50837
- * @description Optional Voice IDs for video generation. Reference voices in your prompt with <<<voice_1>>> and <<<voice_2>>> (maximum 2 voices per task). Get voice IDs from the kling video create-voice endpoint: https://fal.ai/models/fal-ai/kling-video/create-voice
50838
- */
50839
- voice_ids?: string[];
50840
50975
  }
50841
50976
 
50842
50977
  export interface KlingVideoV3ProTextToVideoOutput {
@@ -50974,11 +51109,6 @@ export interface KlingVideoV3ProImageToVideoInput {
50974
51109
  * @example https://storage.googleapis.com/falserverless/example_inputs/kling-v3/pro-i2v/start_image.png
50975
51110
  */
50976
51111
  start_image_url: string;
50977
- /**
50978
- * Voice Ids
50979
- * @description Optional Voice IDs for video generation. Reference voices in your prompt with <<<voice_1>>> and <<<voice_2>>> (maximum 2 voices per task). Get voice IDs from the kling video create-voice endpoint: https://fal.ai/models/fal-ai/kling-video/create-voice
50980
- */
50981
- voice_ids?: string[];
50982
51112
  }
50983
51113
 
50984
51114
  export interface KlingVideoV3ProImageToVideoOutput {
@@ -52004,7 +52134,7 @@ export interface KlingVideoO3StandardVideoToVideoEditOutput {
52004
52134
  video: Components.File;
52005
52135
  }
52006
52136
 
52007
- export interface KlingVideoO3StandardTextToVideoInput extends SharedType_89f {}
52137
+ export interface KlingVideoO3StandardTextToVideoInput extends SharedType_25d {}
52008
52138
 
52009
52139
  export interface KlingVideoO3StandardTextToVideoOutput extends SharedType_723 {}
52010
52140
 
@@ -52246,7 +52376,7 @@ export interface KlingVideoO3ProVideoToVideoEditOutput {
52246
52376
  video: Components.File;
52247
52377
  }
52248
52378
 
52249
- export interface KlingVideoO3ProTextToVideoInput extends SharedType_89f {}
52379
+ export interface KlingVideoO3ProTextToVideoInput extends SharedType_25d {}
52250
52380
 
52251
52381
  export interface KlingVideoO3ProTextToVideoOutput extends SharedType_723 {}
52252
52382
 
@@ -53244,6 +53374,152 @@ export interface IpAdapterFaceIdInput {
53244
53374
 
53245
53375
  export interface IpAdapterFaceIdOutput extends SharedType_678 {}
53246
53376
 
53377
+ export interface InworldTtsInput {
53378
+ /**
53379
+ * Sample Rate Hertz
53380
+ * @description The sample rate in Hz for the output audio.
53381
+ * @default 48000
53382
+ * @enum {integer}
53383
+ */
53384
+ sample_rate_hertz?: 8000 | 16000 | 24000 | 32000 | 40000 | 48000;
53385
+ /**
53386
+ * Text
53387
+ * @description The text to synthesize into speech.
53388
+ * @example Hello! This is a demo of Inworld's TTS.
53389
+ */
53390
+ text: string;
53391
+ /**
53392
+ * Voice
53393
+ * @description The voice to use for synthesis.
53394
+ * @default Craig (en)
53395
+ * @enum {string}
53396
+ */
53397
+ voice?:
53398
+ | 'Loretta (en)'
53399
+ | 'Darlene (en)'
53400
+ | 'Marlene (en)'
53401
+ | 'Hank (en)'
53402
+ | 'Evelyn (en)'
53403
+ | 'Celeste (en)'
53404
+ | 'Pippa (en)'
53405
+ | 'Tessa (en)'
53406
+ | 'Liam (en)'
53407
+ | 'Callum (en)'
53408
+ | 'Hamish (en)'
53409
+ | 'Abby (en)'
53410
+ | 'Graham (en)'
53411
+ | 'Rupert (en)'
53412
+ | 'Mortimer (en)'
53413
+ | 'Snik (en)'
53414
+ | 'Anjali (en)'
53415
+ | 'Saanvi (en)'
53416
+ | 'Arjun (en)'
53417
+ | 'Claire (en)'
53418
+ | 'Oliver (en)'
53419
+ | 'Simon (en)'
53420
+ | 'Elliot (en)'
53421
+ | 'James (en)'
53422
+ | 'Serena (en)'
53423
+ | 'Gareth (en)'
53424
+ | 'Vinny (en)'
53425
+ | 'Lauren (en)'
53426
+ | 'Jessica (en)'
53427
+ | 'Ethan (en)'
53428
+ | 'Tyler (en)'
53429
+ | 'Jason (en)'
53430
+ | 'Chloe (en)'
53431
+ | 'Veronica (en)'
53432
+ | 'Victoria (en)'
53433
+ | 'Miranda (en)'
53434
+ | 'Sebastian (en)'
53435
+ | 'Victor (en)'
53436
+ | 'Malcolm (en)'
53437
+ | 'Kayla (en)'
53438
+ | 'Nate (en)'
53439
+ | 'Jake (en)'
53440
+ | 'Brian (en)'
53441
+ | 'Amina (en)'
53442
+ | 'Kelsey (en)'
53443
+ | 'Derek (en)'
53444
+ | 'Grant (en)'
53445
+ | 'Evan (en)'
53446
+ | 'Alex (en)'
53447
+ | 'Ashley (en)'
53448
+ | 'Craig (en)'
53449
+ | 'Deborah (en)'
53450
+ | 'Dennis (en)'
53451
+ | 'Edward (en)'
53452
+ | 'Elizabeth (en)'
53453
+ | 'Hades (en)'
53454
+ | 'Julia (en)'
53455
+ | 'Pixie (en)'
53456
+ | 'Mark (en)'
53457
+ | 'Olivia (en)'
53458
+ | 'Priya (en)'
53459
+ | 'Ronald (en)'
53460
+ | 'Sarah (en)'
53461
+ | 'Shaun (en)'
53462
+ | 'Theodore (en)'
53463
+ | 'Timothy (en)'
53464
+ | 'Wendy (en)'
53465
+ | 'Dominus (en)'
53466
+ | 'Hana (en)'
53467
+ | 'Clive (en)'
53468
+ | 'Carter (en)'
53469
+ | 'Blake (en)'
53470
+ | 'Luna (en)'
53471
+ | 'Yichen (zh)'
53472
+ | 'Xiaoyin (zh)'
53473
+ | 'Xinyi (zh)'
53474
+ | 'Jing (zh)'
53475
+ | 'Erik (nl)'
53476
+ | 'Katrien (nl)'
53477
+ | 'Lennart (nl)'
53478
+ | 'Lore (nl)'
53479
+ | 'Alain (fr)'
53480
+ | 'Hélène (fr)'
53481
+ | 'Mathieu (fr)'
53482
+ | 'Étienne (fr)'
53483
+ | 'Johanna (de)'
53484
+ | 'Josef (de)'
53485
+ | 'Gianni (it)'
53486
+ | 'Orietta (it)'
53487
+ | 'Asuka (ja)'
53488
+ | 'Satoshi (ja)'
53489
+ | 'Hyunwoo (ko)'
53490
+ | 'Minji (ko)'
53491
+ | 'Seojun (ko)'
53492
+ | 'Yoona (ko)'
53493
+ | 'Szymon (pl)'
53494
+ | 'Wojciech (pl)'
53495
+ | 'Heitor (pt)'
53496
+ | 'Maitê (pt)'
53497
+ | 'Diego (es)'
53498
+ | 'Lupita (es)'
53499
+ | 'Miguel (es)'
53500
+ | 'Rafael (es)'
53501
+ | 'Svetlana (ru)'
53502
+ | 'Elena (ru)'
53503
+ | 'Dmitry (ru)'
53504
+ | 'Nikolai (ru)'
53505
+ | 'Riya (hi)'
53506
+ | 'Manoj (hi)'
53507
+ | 'Yael (he)'
53508
+ | 'Oren (he)'
53509
+ | 'Nour (ar)'
53510
+ | 'Omar (ar)';
53511
+ }
53512
+
53513
+ export interface InworldTtsOutput {
53514
+ /**
53515
+ * @description Generated audio file.
53516
+ * @example {
53517
+ * "url": "https://v3b.fal.media/files/b/0a920730/38aud4s6sF7bOWFoQHaJk_tmpvv2htrpc.wav"
53518
+ * }
53519
+ */
53520
+ audio: Components.File;
53521
+ }
53522
+
53247
53523
  export interface InvisibleWatermarkInput {
53248
53524
  /**
53249
53525
  * Decode
@@ -61851,6 +62127,204 @@ export interface GenfocusInput {
61851
62127
 
61852
62128
  export interface GenfocusOutput extends SharedType_951 {}
61853
62129
 
62130
+ export interface GeminiTtsInput {
62131
+ /**
62132
+ * Language Code
62133
+ * @description Language for multilingual synthesis. When set, steers the model to speak in the specified language. Supports 24 GA languages and 60+ Preview languages. If not set, the model auto-detects the language from the text.
62134
+ * @example English (US)
62135
+ * @example French (France)
62136
+ * @example Japanese (Japan)
62137
+ */
62138
+ language_code?:
62139
+ | 'Arabic (Egypt)'
62140
+ | 'Bangla (Bangladesh)'
62141
+ | 'Dutch (Netherlands)'
62142
+ | 'English (India)'
62143
+ | 'English (US)'
62144
+ | 'French (France)'
62145
+ | 'German (Germany)'
62146
+ | 'Hindi (India)'
62147
+ | 'Indonesian (Indonesia)'
62148
+ | 'Italian (Italy)'
62149
+ | 'Japanese (Japan)'
62150
+ | 'Korean (South Korea)'
62151
+ | 'Marathi (India)'
62152
+ | 'Polish (Poland)'
62153
+ | 'Portuguese (Brazil)'
62154
+ | 'Romanian (Romania)'
62155
+ | 'Russian (Russia)'
62156
+ | 'Spanish (Spain)'
62157
+ | 'Tamil (India)'
62158
+ | 'Telugu (India)'
62159
+ | 'Thai (Thailand)'
62160
+ | 'Turkish (Turkey)'
62161
+ | 'Ukrainian (Ukraine)'
62162
+ | 'Vietnamese (Vietnam)'
62163
+ | 'Afrikaans (South Africa)'
62164
+ | 'Albanian (Albania)'
62165
+ | 'Amharic (Ethiopia)'
62166
+ | 'Arabic (World)'
62167
+ | 'Armenian (Armenia)'
62168
+ | 'Azerbaijani (Azerbaijan)'
62169
+ | 'Basque (Spain)'
62170
+ | 'Belarusian (Belarus)'
62171
+ | 'Bulgarian (Bulgaria)'
62172
+ | 'Burmese (Myanmar)'
62173
+ | 'Catalan (Spain)'
62174
+ | 'Cebuano (Philippines)'
62175
+ | 'Chinese Mandarin (China)'
62176
+ | 'Chinese Mandarin (Taiwan)'
62177
+ | 'Croatian (Croatia)'
62178
+ | 'Czech (Czech Republic)'
62179
+ | 'Danish (Denmark)'
62180
+ | 'English (Australia)'
62181
+ | 'English (UK)'
62182
+ | 'Estonian (Estonia)'
62183
+ | 'Filipino (Philippines)'
62184
+ | 'Finnish (Finland)'
62185
+ | 'French (Canada)'
62186
+ | 'Galician (Spain)'
62187
+ | 'Georgian (Georgia)'
62188
+ | 'Greek (Greece)'
62189
+ | 'Gujarati (India)'
62190
+ | 'Haitian Creole (Haiti)'
62191
+ | 'Hebrew (Israel)'
62192
+ | 'Hungarian (Hungary)'
62193
+ | 'Icelandic (Iceland)'
62194
+ | 'Javanese (Java)'
62195
+ | 'Kannada (India)'
62196
+ | 'Konkani (India)'
62197
+ | 'Lao (Laos)'
62198
+ | 'Latin (Vatican City)'
62199
+ | 'Latvian (Latvia)'
62200
+ | 'Lithuanian (Lithuania)'
62201
+ | 'Luxembourgish (Luxembourg)'
62202
+ | 'Macedonian (North Macedonia)'
62203
+ | 'Maithili (India)'
62204
+ | 'Malagasy (Madagascar)'
62205
+ | 'Malay (Malaysia)'
62206
+ | 'Malayalam (India)'
62207
+ | 'Mongolian (Mongolia)'
62208
+ | 'Nepali (Nepal)'
62209
+ | 'Norwegian Bokmal (Norway)'
62210
+ | 'Norwegian Nynorsk (Norway)'
62211
+ | 'Odia (India)'
62212
+ | 'Pashto (Afghanistan)'
62213
+ | 'Persian (Iran)'
62214
+ | 'Portuguese (Portugal)'
62215
+ | 'Punjabi (India)'
62216
+ | 'Serbian (Serbia)'
62217
+ | 'Sindhi (India)'
62218
+ | 'Sinhala (Sri Lanka)'
62219
+ | 'Slovak (Slovakia)'
62220
+ | 'Slovenian (Slovenia)'
62221
+ | 'Spanish (Latin America)'
62222
+ | 'Spanish (Mexico)'
62223
+ | 'Swahili (Kenya)'
62224
+ | 'Swedish (Sweden)'
62225
+ | 'Urdu (Pakistan)';
62226
+ /**
62227
+ * Model
62228
+ * @description Which Gemini TTS model to use. gemini-2.5-flash-tts: low latency, cost-efficient for everyday applications (recommended). gemini-2.5-pro-tts: highest quality, best for structured workflows like podcasts, audiobooks, and customer support.
62229
+ * @default gemini-2.5-flash-tts
62230
+ * @enum {string}
62231
+ */
62232
+ model?: 'gemini-2.5-flash-tts' | 'gemini-2.5-pro-tts';
62233
+ /**
62234
+ * Output Format
62235
+ * @description Audio output format. mp3: compressed, small file size (recommended). wav: uncompressed PCM wrapped in WAV (24 kHz, 16-bit mono). ogg_opus: Ogg container with Opus codec, good quality-to-size ratio.
62236
+ * @default mp3
62237
+ * @enum {string}
62238
+ */
62239
+ output_format?: 'wav' | 'mp3' | 'ogg_opus';
62240
+ /**
62241
+ * Prompt
62242
+ * @description The text to convert to speech. Gemini TTS supports natural-language prompting for style, pace, accent, and emotional expression — include delivery instructions inline with the text (e.g. 'Say cheerfully: Have a wonderful day!'). For multi-speaker synthesis, prefix lines with speaker aliases defined in the speakers field (e.g. 'Alice: Hello!\nBob: Hi!'). Supports inline pace/style markers like [slowly], [whispering], [excited], [extremely fast].
62243
+ * @example Host: Welcome back to AI Frontiers, the podcast where we explore the latest breakthroughs in artificial intelligence. Today we have a very special guest. Doctor Chen, thank you for joining us!
62244
+ * DrChen: Thanks for having me! I'm excited to be here.
62245
+ * Host: So, let's dive right in. Your recent paper on neural architecture search has been making waves. Can you tell our listeners what inspired this research?
62246
+ * DrChen: Absolutely. It all started when we noticed that most existing approaches were optimizing for the wrong metrics. We asked ourselves, what if we could let the model design itself?
62247
+ */
62248
+ prompt: string;
62249
+ /**
62250
+ * Speakers
62251
+ * @description Multi-speaker voice configuration. When set, enables multi-speaker synthesis where different parts of the text are spoken by different voices. Each speaker needs a voice and a speaker_id (alias) that matches prefixes in the prompt. Requires gemini-2.5-pro-tts or gemini-2.5-flash-tts model. Not supported with gemini-2.5-flash-lite-preview-tts.
62252
+ * @example [
62253
+ * {
62254
+ * "voice": "Charon",
62255
+ * "speaker_id": "Host"
62256
+ * },
62257
+ * {
62258
+ * "voice": "Kore",
62259
+ * "speaker_id": "DrChen"
62260
+ * }
62261
+ * ]
62262
+ */
62263
+ speakers?: Components.SpeakerConfig[];
62264
+ /**
62265
+ * Style Instructions
62266
+ * @description Optional style and delivery instructions prepended to the prompt. Controls expressiveness, accent, pace, tone, and emotional expression using natural language. Use this to separate style control from the text content. Examples: 'Speak warmly and slowly', 'Read this as a dramatic newscast', 'Use a British accent with a cheerful tone', 'Whisper mysteriously'.
62267
+ * @example Say the following in a warm, conversational tone
62268
+ * @example Read this as a dramatic newscast with gravitas
62269
+ * @example Speak with a British accent, cheerfully and energetically
62270
+ * @example This is a podcast conversation. The host is enthusiastic and curious, the guest is knowledgeable and articulate
62271
+ */
62272
+ style_instructions?: string;
62273
+ /**
62274
+ * Temperature
62275
+ * @description Controls the randomness of the speech output. Higher values produce more creative and varied delivery, while lower values make the output more predictable and focused.
62276
+ * @default 1
62277
+ */
62278
+ temperature?: number;
62279
+ /**
62280
+ * Voice
62281
+ * @description Voice preset for single-speaker synthesis. 30 distinct voices are available. Ignored when speakers is set. Popular choices: Kore (strong, firm female), Puck (upbeat, lively male), Charon (calm, professional male), Zephyr (bright, clear female), Aoede (warm, melodic female).
62282
+ * @default Kore
62283
+ * @enum {string}
62284
+ */
62285
+ voice?:
62286
+ | 'Achernar'
62287
+ | 'Achird'
62288
+ | 'Algenib'
62289
+ | 'Algieba'
62290
+ | 'Alnilam'
62291
+ | 'Aoede'
62292
+ | 'Autonoe'
62293
+ | 'Callirrhoe'
62294
+ | 'Charon'
62295
+ | 'Despina'
62296
+ | 'Enceladus'
62297
+ | 'Erinome'
62298
+ | 'Fenrir'
62299
+ | 'Gacrux'
62300
+ | 'Iapetus'
62301
+ | 'Kore'
62302
+ | 'Laomedeia'
62303
+ | 'Leda'
62304
+ | 'Orus'
62305
+ | 'Pulcherrima'
62306
+ | 'Puck'
62307
+ | 'Rasalgethi'
62308
+ | 'Sadachbia'
62309
+ | 'Sadaltager'
62310
+ | 'Schedar'
62311
+ | 'Sulafat'
62312
+ | 'Umbriel'
62313
+ | 'Vindemiatrix'
62314
+ | 'Zephyr'
62315
+ | 'Zubenelgenubi';
62316
+ }
62317
+
62318
+ export interface GeminiTtsOutput {
62319
+ /**
62320
+ * @description The generated audio file.
62321
+ * @example {
62322
+ * "url": "https://v3b.fal.media/files/b/0a935d4f/Ez4NpcnFTuGsu2FHDaJTR_gemini_tts_output.mp3"
62323
+ * }
62324
+ */
62325
+ audio: Components.File;
62326
+ }
62327
+
61854
62328
  export interface GeminiFlashEditMultiInput {
61855
62329
  /**
61856
62330
  * Input Image Urls
@@ -75552,220 +76026,220 @@ export interface ElevenlabsSpeechToTextScribeV2Output {
75552
76026
  * Words
75553
76027
  * @description Word-level transcription details
75554
76028
  * @example {
75555
- * "end": 0.539,
76029
+ * "text": "Hey,",
75556
76030
  * "start": 0.079,
75557
76031
  * "type": "word",
75558
- * "text": "Hey,",
76032
+ * "end": 0.539,
75559
76033
  * "speaker_id": "speaker_0"
75560
76034
  * }
75561
76035
  * @example {
75562
- * "end": 0.599,
76036
+ * "text": " ",
75563
76037
  * "start": 0.539,
75564
76038
  * "type": "spacing",
75565
- * "text": " ",
76039
+ * "end": 0.599,
75566
76040
  * "speaker_id": "speaker_0"
75567
76041
  * }
75568
76042
  * @example {
75569
- * "end": 0.679,
76043
+ * "text": "this",
75570
76044
  * "start": 0.599,
75571
76045
  * "type": "word",
75572
- * "text": "this",
76046
+ * "end": 0.679,
75573
76047
  * "speaker_id": "speaker_0"
75574
76048
  * }
75575
76049
  * @example {
75576
- * "end": 0.739,
76050
+ * "text": " ",
75577
76051
  * "start": 0.679,
75578
76052
  * "type": "spacing",
75579
- * "text": " ",
76053
+ * "end": 0.739,
75580
76054
  * "speaker_id": "speaker_0"
75581
76055
  * }
75582
76056
  * @example {
75583
- * "end": 0.799,
76057
+ * "text": "is",
75584
76058
  * "start": 0.739,
75585
76059
  * "type": "word",
75586
- * "text": "is",
76060
+ * "end": 0.799,
75587
76061
  * "speaker_id": "speaker_0"
75588
76062
  * }
75589
76063
  * @example {
75590
- * "end": 0.939,
76064
+ * "text": " ",
75591
76065
  * "start": 0.799,
75592
76066
  * "type": "spacing",
75593
- * "text": " ",
76067
+ * "end": 0.939,
75594
76068
  * "speaker_id": "speaker_0"
75595
76069
  * }
75596
76070
  * @example {
75597
- * "end": 0.939,
76071
+ * "text": "a",
75598
76072
  * "start": 0.939,
75599
76073
  * "type": "word",
75600
- * "text": "a",
76074
+ * "end": 0.939,
75601
76075
  * "speaker_id": "speaker_0"
75602
76076
  * }
75603
76077
  * @example {
75604
- * "end": 0.959,
76078
+ * "text": " ",
75605
76079
  * "start": 0.939,
75606
76080
  * "type": "spacing",
75607
- * "text": " ",
76081
+ * "end": 0.959,
75608
76082
  * "speaker_id": "speaker_0"
75609
76083
  * }
75610
76084
  * @example {
75611
- * "end": 1.179,
76085
+ * "text": "test",
75612
76086
  * "start": 0.959,
75613
76087
  * "type": "word",
75614
- * "text": "test",
76088
+ * "end": 1.179,
75615
76089
  * "speaker_id": "speaker_0"
75616
76090
  * }
75617
76091
  * @example {
75618
- * "end": 1.219,
76092
+ * "text": " ",
75619
76093
  * "start": 1.179,
75620
76094
  * "type": "spacing",
75621
- * "text": " ",
76095
+ * "end": 1.219,
75622
76096
  * "speaker_id": "speaker_0"
75623
76097
  * }
75624
76098
  * @example {
75625
- * "end": 1.719,
76099
+ * "text": "recording",
75626
76100
  * "start": 1.22,
75627
76101
  * "type": "word",
75628
- * "text": "recording",
76102
+ * "end": 1.719,
75629
76103
  * "speaker_id": "speaker_0"
75630
76104
  * }
75631
76105
  * @example {
75632
- * "end": 1.719,
76106
+ * "text": " ",
75633
76107
  * "start": 1.719,
75634
76108
  * "type": "spacing",
75635
- * "text": " ",
76109
+ * "end": 1.719,
75636
76110
  * "speaker_id": "speaker_0"
75637
76111
  * }
75638
76112
  * @example {
75639
- * "end": 1.86,
76113
+ * "text": "for",
75640
76114
  * "start": 1.719,
75641
76115
  * "type": "word",
75642
- * "text": "for",
76116
+ * "end": 1.86,
75643
76117
  * "speaker_id": "speaker_0"
75644
76118
  * }
75645
76119
  * @example {
75646
- * "end": 1.879,
76120
+ * "text": " ",
75647
76121
  * "start": 1.86,
75648
76122
  * "type": "spacing",
75649
- * "text": " ",
76123
+ * "end": 1.879,
75650
76124
  * "speaker_id": "speaker_0"
75651
76125
  * }
75652
76126
  * @example {
75653
- * "end": 2.24,
76127
+ * "text": "Scribe",
75654
76128
  * "start": 1.879,
75655
76129
  * "type": "word",
75656
- * "text": "Scribe",
76130
+ * "end": 2.24,
75657
76131
  * "speaker_id": "speaker_0"
75658
76132
  * }
75659
76133
  * @example {
75660
- * "end": 2.319,
76134
+ * "text": " ",
75661
76135
  * "start": 2.24,
75662
76136
  * "type": "spacing",
75663
- * "text": " ",
76137
+ * "end": 2.319,
75664
76138
  * "speaker_id": "speaker_0"
75665
76139
  * }
75666
76140
  * @example {
75667
- * "end": 2.759,
76141
+ * "text": "version",
75668
76142
  * "start": 2.319,
75669
76143
  * "type": "word",
75670
- * "text": "version",
76144
+ * "end": 2.759,
75671
76145
  * "speaker_id": "speaker_0"
75672
76146
  * }
75673
76147
  * @example {
75674
- * "end": 2.779,
76148
+ * "text": " ",
75675
76149
  * "start": 2.759,
75676
76150
  * "type": "spacing",
75677
- * "text": " ",
76151
+ * "end": 2.779,
75678
76152
  * "speaker_id": "speaker_0"
75679
76153
  * }
75680
76154
  * @example {
75681
- * "end": 3.379,
76155
+ * "text": "two,",
75682
76156
  * "start": 2.779,
75683
76157
  * "type": "word",
75684
- * "text": "two,",
76158
+ * "end": 3.379,
75685
76159
  * "speaker_id": "speaker_0"
75686
76160
  * }
75687
76161
  * @example {
75688
- * "end": 3.399,
76162
+ * "text": " ",
75689
76163
  * "start": 3.379,
75690
76164
  * "type": "spacing",
75691
- * "text": " ",
76165
+ * "end": 3.399,
75692
76166
  * "speaker_id": "speaker_0"
75693
76167
  * }
75694
76168
  * @example {
75695
- * "end": 3.519,
76169
+ * "text": "which",
75696
76170
  * "start": 3.399,
75697
76171
  * "type": "word",
75698
- * "text": "which",
76172
+ * "end": 3.519,
75699
76173
  * "speaker_id": "speaker_0"
75700
76174
  * }
75701
76175
  * @example {
75702
- * "end": 3.539,
76176
+ * "text": " ",
75703
76177
  * "start": 3.519,
75704
76178
  * "type": "spacing",
75705
- * "text": " ",
76179
+ * "end": 3.539,
75706
76180
  * "speaker_id": "speaker_0"
75707
76181
  * }
75708
76182
  * @example {
75709
- * "end": 3.659,
76183
+ * "text": "is",
75710
76184
  * "start": 3.539,
75711
76185
  * "type": "word",
75712
- * "text": "is",
76186
+ * "end": 3.659,
75713
76187
  * "speaker_id": "speaker_0"
75714
76188
  * }
75715
76189
  * @example {
75716
- * "end": 3.699,
76190
+ * "text": " ",
75717
76191
  * "start": 3.659,
75718
76192
  * "type": "spacing",
75719
- * "text": " ",
76193
+ * "end": 3.699,
75720
76194
  * "speaker_id": "speaker_0"
75721
76195
  * }
75722
76196
  * @example {
75723
- * "end": 3.839,
76197
+ * "text": "now",
75724
76198
  * "start": 3.699,
75725
76199
  * "type": "word",
75726
- * "text": "now",
76200
+ * "end": 3.839,
75727
76201
  * "speaker_id": "speaker_0"
75728
76202
  * }
75729
76203
  * @example {
75730
- * "end": 3.839,
76204
+ * "text": " ",
75731
76205
  * "start": 3.839,
75732
76206
  * "type": "spacing",
75733
- * "text": " ",
76207
+ * "end": 3.839,
75734
76208
  * "speaker_id": "speaker_0"
75735
76209
  * }
75736
76210
  * @example {
75737
- * "end": 4.319,
76211
+ * "text": "available",
75738
76212
  * "start": 3.839,
75739
76213
  * "type": "word",
75740
- * "text": "available",
76214
+ * "end": 4.319,
75741
76215
  * "speaker_id": "speaker_0"
75742
76216
  * }
75743
76217
  * @example {
75744
- * "end": 4.339,
76218
+ * "text": " ",
75745
76219
  * "start": 4.319,
75746
76220
  * "type": "spacing",
75747
- * "text": " ",
76221
+ * "end": 4.339,
75748
76222
  * "speaker_id": "speaker_0"
75749
76223
  * }
75750
76224
  * @example {
75751
- * "end": 4.579,
76225
+ * "text": "on",
75752
76226
  * "start": 4.339,
75753
76227
  * "type": "word",
75754
- * "text": "on",
76228
+ * "end": 4.579,
75755
76229
  * "speaker_id": "speaker_0"
75756
76230
  * }
75757
76231
  * @example {
75758
- * "end": 4.599,
76232
+ * "text": " ",
75759
76233
  * "start": 4.579,
75760
76234
  * "type": "spacing",
75761
- * "text": " ",
76235
+ * "end": 4.599,
75762
76236
  * "speaker_id": "speaker_0"
75763
76237
  * }
75764
76238
  * @example {
75765
- * "end": 5.699,
76239
+ * "text": "fal.ai.",
75766
76240
  * "start": 4.599,
75767
76241
  * "type": "word",
75768
- * "text": "fal.ai.",
76242
+ * "end": 5.699,
75769
76243
  * "speaker_id": "speaker_0"
75770
76244
  * }
75771
76245
  */
@@ -80409,6 +80883,32 @@ export interface BytedanceDreamactorV2Output {
80409
80883
  }
80410
80884
 
80411
80885
  export interface BytedanceUpscalerUpscaleVideoInput {
80886
+ /**
80887
+ * Enhancement Preset
80888
+ * @description The enhancement preset optimized for specific video scenarios. 'general' is a general-purpose template, 'ugc' targets user-generated short videos, 'short_series' is for short dramas, 'aigc' is for AI-generated content, and 'old_film' is for classic film restoration.
80889
+ * @default general
80890
+ * @enum {string}
80891
+ */
80892
+ enhancement_preset?: 'general' | 'ugc' | 'short_series' | 'aigc' | 'old_film';
80893
+ /**
80894
+ * Enhancement Tier
80895
+ * @description The enhancement quality tier. 'fast' provides essential upscaling with good speed, 'standard' uses adaptive algorithms for better visual texture, 'pro' uses large-model restoration for cinematic quality (longer processing time), and 10 times the cost of `standard` and `fast`.
80896
+ * @default standard
80897
+ * @enum {string}
80898
+ */
80899
+ enhancement_tier?: 'fast' | 'standard' | 'pro';
80900
+ /**
80901
+ * Fidelity
80902
+ * @description The enhancement intensity. 'high' applies mild enhancement while keeping visual texture close to the source video. 'medium' provides a balanced image quality enhancement.
80903
+ * @default high
80904
+ * @enum {string}
80905
+ */
80906
+ fidelity?: 'high' | 'medium';
80907
+ /**
80908
+ * Scale Ratio
80909
+ * @description The scaling ratio for the output video resolution. When set, overrides target_resolution and scales the input resolution by this factor (e.g., 2.0 doubles the resolution). Range: 1.1 to 10.0. Please note that this is valid only up to 4k resolution, and trying to scale beyond 4k will result in an error. (4k is defined as having atotal pixel count of 3840x2160).
80910
+ */
80911
+ scale_ratio?: number;
80412
80912
  /**
80413
80913
  * Target Fps
80414
80914
  * @description The target FPS of the video to upscale.
@@ -81342,6 +81842,13 @@ export interface BenV2VideoInput {
81342
81842
  * @description Optional RGB values (0-255) for the background color. If not provided, the background will be transparent. For ex: [0, 0, 0]
81343
81843
  */
81344
81844
  background_color?: [number, number, number];
81845
+ /**
81846
+ * Output Format
81847
+ * @description Output video format. Use "webm" for true transparency support (VP9 codec with alpha channel). MP4 format does not support transparency and will render transparent areas as black.
81848
+ * @default mp4
81849
+ * @enum {string}
81850
+ */
81851
+ output_format?: 'mp4' | 'webm';
81345
81852
  /**
81346
81853
  * Seed
81347
81854
  * @description Random seed for reproducible generation.
@@ -81390,8 +81897,8 @@ export interface BenV2ImageOutput {
81390
81897
  /**
81391
81898
  * @description The output image after background removal.
81392
81899
  * @example {
81393
- * "height": 512,
81394
81900
  * "file_size": 423052,
81901
+ * "height": 512,
81395
81902
  * "file_name": "zrZNETpI_ul2jonraqpxN_a57c3f3825d9418f8b3d39cde87c3310.png",
81396
81903
  * "content_type": "image/png",
81397
81904
  * "url": "https://storage.googleapis.com/falserverless/gallery/Ben2/zrZNETpI_ul2jonraqpxN_a57c3f3825d9418f8b3d39cde87c3310.png",
@@ -84319,18 +84826,18 @@ export interface BriaEmbedProductInput {
84319
84826
  * {
84320
84827
  * "coordinates": {
84321
84828
  * "y": 317,
84829
+ * "width": 100,
84322
84830
  * "height": 300,
84323
- * "x": 300,
84324
- * "width": 100
84831
+ * "x": 300
84325
84832
  * },
84326
84833
  * "image_source": "https://bria-datasets.s3.us-east-1.amazonaws.com/embed-product/a_standing_lamp_over_white_background_0.png"
84327
84834
  * },
84328
84835
  * {
84329
84836
  * "coordinates": {
84330
84837
  * "y": 287,
84838
+ * "width": 120,
84331
84839
  * "height": 156,
84332
- * "x": 646,
84333
- * "width": 120
84840
+ * "x": 646
84334
84841
  * },
84335
84842
  * "image_source": "https://bria-datasets.s3.us-east-1.amazonaws.com/embed-product/a_wall_picture_on_white_background_0.png"
84336
84843
  * }