fal-endpoint-types 1.3.35 → 1.3.37

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -94,8 +94,62 @@ export interface XaiGrokImagineVideoTextToVideoOutput {
94
94
  * "fps": 24,
95
95
  * "width": 1280,
96
96
  * "file_name": "RUAbFYlssdqnbjNLmE8qP_IX7BNYGP.mp4",
97
- * "num_frames": 145,
98
- * "content_type": "video/mp4"
97
+ * "content_type": "video/mp4",
98
+ * "num_frames": 145
99
+ * }
100
+ */
101
+ video: Components.VideoFile;
102
+ }
103
+
104
+ export interface XaiGrokImagineVideoReferenceToVideoInput {
105
+ /**
106
+ * Aspect Ratio
107
+ * @description Aspect ratio of the generated video.
108
+ * @default 16:9
109
+ * @enum {string}
110
+ */
111
+ aspect_ratio?: '16:9' | '4:3' | '3:2' | '1:1' | '2:3' | '3:4' | '9:16';
112
+ /**
113
+ * Duration
114
+ * @description Video duration in seconds.
115
+ * @default 8
116
+ */
117
+ duration?: number;
118
+ /**
119
+ * Prompt
120
+ * @description Text prompt describing the video to generate. Use @Image1, @Image2, etc. to reference specific images from reference_image_urls in order.
121
+ * @example A @Image1 running through a sunlit meadow, cinematic slow motion
122
+ */
123
+ prompt: string;
124
+ /**
125
+ * Reference Image URLs
126
+ * @description One or more reference image URLs to guide the video generation as style and content references. Reference in prompt as @Image1, @Image2, etc. Maximum 7 images.
127
+ * @example [
128
+ * "https://v3b.fal.media/files/b/0a8b90e0/BFLE9VDlZqsryU-UA3BoD_image_004.png"
129
+ * ]
130
+ */
131
+ reference_image_urls: string[];
132
+ /**
133
+ * Resolution
134
+ * @description Resolution of the output video.
135
+ * @default 480p
136
+ * @enum {string}
137
+ */
138
+ resolution?: '480p' | '720p';
139
+ }
140
+
141
+ export interface XaiGrokImagineVideoReferenceToVideoOutput {
142
+ /**
143
+ * @description The generated video.
144
+ * @example {
145
+ * "height": 720,
146
+ * "duration": 8,
147
+ * "url": "https://v3b.fal.media/files/b/0a8b90e4/r2v_output.mp4",
148
+ * "fps": 24,
149
+ * "width": 1280,
150
+ * "file_name": "r2v_output.mp4",
151
+ * "content_type": "video/mp4",
152
+ * "num_frames": 192
99
153
  * }
100
154
  */
101
155
  video: Components.VideoFile;
@@ -145,8 +199,46 @@ export interface XaiGrokImagineVideoImageToVideoOutput {
145
199
  * "fps": 24,
146
200
  * "width": 1280,
147
201
  * "file_name": "0Ci1dviuSnEyUZzBUq-_5_nu7MrAAa.mp4",
148
- * "num_frames": 145,
149
- * "content_type": "video/mp4"
202
+ * "content_type": "video/mp4",
203
+ * "num_frames": 145
204
+ * }
205
+ */
206
+ video: Components.VideoFile;
207
+ }
208
+
209
+ export interface XaiGrokImagineVideoExtendVideoInput {
210
+ /**
211
+ * Duration
212
+ * @description Length of the extension in seconds.
213
+ * @default 6
214
+ */
215
+ duration?: number;
216
+ /**
217
+ * Prompt
218
+ * @description Text description of what should happen next in the video.
219
+ * @example The camera slowly zooms out to reveal the city skyline at sunset
220
+ */
221
+ prompt: string;
222
+ /**
223
+ * Video URL
224
+ * @description URL of the source video to extend. Must be MP4 format (H.264, H.265, or AV1 codec), 2-15 seconds long.
225
+ * @example https://v3b.fal.media/files/b/0a8b9112/V5Z_NIPE3ppMDWivNo6_q_video_019.mp4
226
+ */
227
+ video_url: string;
228
+ }
229
+
230
+ export interface XaiGrokImagineVideoExtendVideoOutput {
231
+ /**
232
+ * @description The extended video (original + extension stitched together).
233
+ * @example {
234
+ * "height": 720,
235
+ * "duration": 16,
236
+ * "url": "https://v3b.fal.media/files/b/0a8b9113/extended_video.mp4",
237
+ * "fps": 24,
238
+ * "width": 1280,
239
+ * "file_name": "extended_video.mp4",
240
+ * "content_type": "video/mp4",
241
+ * "num_frames": 384
150
242
  * }
151
243
  */
152
244
  video: Components.VideoFile;
@@ -184,8 +276,8 @@ export interface XaiGrokImagineVideoEditVideoOutput {
184
276
  * "fps": 24,
185
277
  * "width": 1280,
186
278
  * "file_name": "EuDrZuQTW9m1phBXOsauz_EpJH3s8X.mp4",
187
- * "num_frames": 121,
188
- * "content_type": "video/mp4"
279
+ * "content_type": "video/mp4",
280
+ * "num_frames": 121
189
281
  * }
190
282
  */
191
283
  video: Components.VideoFile;
@@ -1582,6 +1674,38 @@ export interface SharedType_fe5 {
1582
1674
  };
1583
1675
  }
1584
1676
 
1677
+ export interface SharedType_fda {
1678
+ /**
1679
+ * Masks
1680
+ * @description Dictionary of label: mask image
1681
+ * @example [
1682
+ * {
1683
+ * "height": 1200,
1684
+ * "file_size": 15724,
1685
+ * "file_name": "019c3c1e3c50446e9996f709d36debb4.png",
1686
+ * "content_type": "image/png",
1687
+ * "url": "https://v3.fal.media/files/monkey/6ITmhHQJ-69s-UxajrY5T_019c3c1e3c50446e9996f709d36debb4.png",
1688
+ * "width": 1800
1689
+ * },
1690
+ * {
1691
+ * "height": 1200,
1692
+ * "file_size": 14905,
1693
+ * "file_name": "0a1522ca410942c7ad6c73efa15b3549.png",
1694
+ * "content_type": "image/png",
1695
+ * "url": "https://v3.fal.media/files/monkey/IljtMxahoo9-7SUpx0fth_0a1522ca410942c7ad6c73efa15b3549.png",
1696
+ * "width": 1800
1697
+ * }
1698
+ * ]
1699
+ */
1700
+ masks: Components.Image[];
1701
+ /**
1702
+ * Output
1703
+ * @description Generated output
1704
+ * @example <p> A white pickup truck </p> [SEG] is parked on the side of <p> the red building </p> [SEG] , creating a unique and eye-catching contrast.<|im_end|>
1705
+ */
1706
+ output: string;
1707
+ }
1708
+
1585
1709
  export interface SharedType_fd1 {
1586
1710
  /**
1587
1711
  * @description The generated video
@@ -1854,13 +1978,6 @@ export interface SharedType_faf {
1854
1978
  sync_mode?: boolean;
1855
1979
  }
1856
1980
 
1857
- export interface SharedType_f7a {
1858
- config_file: Components.File;
1859
- debug_dataset?: Components.File;
1860
- lora_file: Components.File;
1861
- video?: Components.File;
1862
- }
1863
-
1864
1981
  export interface SharedType_f67 {
1865
1982
  /**
1866
1983
  * Auto Trim
@@ -2616,14 +2733,6 @@ export interface SharedType_e4b {
2616
2733
  steps?: number;
2617
2734
  }
2618
2735
 
2619
- export interface SharedType_e43 {
2620
- /**
2621
- * Image
2622
- * @description The generated image file info.
2623
- */
2624
- image: Components.Image_2;
2625
- }
2626
-
2627
2736
  export interface SharedType_e33 {
2628
2737
  /**
2629
2738
  * Has Nsfw Concepts
@@ -2659,6 +2768,29 @@ export interface SharedType_e33 {
2659
2768
  };
2660
2769
  }
2661
2770
 
2771
+ export interface SharedType_e19 {
2772
+ /**
2773
+ * Images
2774
+ * @description The generated images
2775
+ * @example [
2776
+ * {
2777
+ * "height": 1536,
2778
+ * "file_size": 3731290,
2779
+ * "file_name": "257cf8e7bd3a47c2959396343d5b38cf.png",
2780
+ * "content_type": "image/png",
2781
+ * "url": "https://v3.fal.media/files/tiger/48e63e0K6C9XQYBuomoU-_257cf8e7bd3a47c2959396343d5b38cf.png",
2782
+ * "width": 1536
2783
+ * }
2784
+ * ]
2785
+ */
2786
+ images: Components.Image[];
2787
+ /**
2788
+ * Seed
2789
+ * @description Seed value used for generation.
2790
+ */
2791
+ seed: number;
2792
+ }
2793
+
2662
2794
  export interface SharedType_e18 {
2663
2795
  /**
2664
2796
  * Default Caption
@@ -3880,33 +4012,24 @@ export interface SharedType_cbd {
3880
4012
  sync_mode?: boolean;
3881
4013
  }
3882
4014
 
3883
- export interface SharedType_cb6 {
3884
- /**
3885
- * Prompt
3886
- * @description The prompt used for the generation.
3887
- * @example A woman stands still amid a busy neon-lit street at night. The camera slowly dollies in toward her face as people blur past, their motion emphasizing her calm presence. City lights flicker and reflections shift across her denim jacket.
3888
- */
3889
- prompt: string;
4015
+ export interface SharedType_cbb {
3890
4016
  /**
3891
- * Seed
3892
- * @description The seed used for the random number generator.
3893
- * @example 2078003885
4017
+ * Output
4018
+ * @description Generated output from video processing
4019
+ * @example that's the way I look at it and I don't know what you would say. Sooner or later the child gets run over.
4020
+ * They seem to be too local, too provincial.
3894
4021
  */
3895
- seed: number;
4022
+ output: string;
3896
4023
  /**
3897
- * @description The generated video.
4024
+ * @description Token usage information
3898
4025
  * @example {
3899
- * "height": 704,
3900
- * "duration": 6.44,
3901
- * "url": "https://v3b.fal.media/files/b/0a894013/N9lnMTq7W3uMC0lOQg845_BknRPV8I.mp4",
3902
- * "fps": 25,
3903
- * "width": 1248,
3904
- * "file_name": "CJcQGDrxOSRg2YFl5GNDt_glXPMoji.mp4",
3905
- * "num_frames": 161,
3906
- * "content_type": "video/mp4"
4026
+ * "completion_tokens": 100,
4027
+ * "total_tokens": 1100,
4028
+ * "prompt_tokens": 1000,
4029
+ * "cost": 0.0005
3907
4030
  * }
3908
4031
  */
3909
- video: Components.VideoFile;
4032
+ usage: Components.UsageInfo;
3910
4033
  }
3911
4034
 
3912
4035
  export interface SharedType_cb4 {
@@ -5230,29 +5353,6 @@ export interface SharedType_a9b {
5230
5353
  seed?: number;
5231
5354
  }
5232
5355
 
5233
- export interface SharedType_a97 {
5234
- /**
5235
- * Images
5236
- * @description The generated images
5237
- * @example [
5238
- * {
5239
- * "file_size": 3731290,
5240
- * "height": 1536,
5241
- * "file_name": "257cf8e7bd3a47c2959396343d5b38cf.png",
5242
- * "content_type": "image/png",
5243
- * "url": "https://v3.fal.media/files/tiger/48e63e0K6C9XQYBuomoU-_257cf8e7bd3a47c2959396343d5b38cf.png",
5244
- * "width": 1536
5245
- * }
5246
- * ]
5247
- */
5248
- images: Components.Image[];
5249
- /**
5250
- * Seed
5251
- * @description Seed value used for generation.
5252
- */
5253
- seed: number;
5254
- }
5255
-
5256
5356
  export interface SharedType_a8f {
5257
5357
  /**
5258
5358
  * Image Url
@@ -6224,6 +6324,22 @@ export interface SharedType_92a {
6224
6324
  video: Components.File;
6225
6325
  }
6226
6326
 
6327
+ export interface SharedType_920 {
6328
+ /**
6329
+ * @description Upscaled image file after processing
6330
+ * @example {
6331
+ * "content_type": "image/png",
6332
+ * "url": "https://storage.googleapis.com/falserverless/example_outputs/seedvr2/image_out.png"
6333
+ * }
6334
+ */
6335
+ image: Components.ImageFile;
6336
+ /**
6337
+ * Seed
6338
+ * @description The random seed used for the generation process.
6339
+ */
6340
+ seed: number;
6341
+ }
6342
+
6227
6343
  export interface SharedType_91c {
6228
6344
  /**
6229
6345
  * Image Url
@@ -6808,6 +6924,26 @@ export interface SharedType_8b9 {
6808
6924
  video: Components.VideoFile;
6809
6925
  }
6810
6926
 
6927
+ export interface SharedType_8b7 {
6928
+ /**
6929
+ * Image Url
6930
+ * @description The URL of the image to be processed.
6931
+ * @example https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/tasks/car.jpg
6932
+ * @example http://ecx.images-amazon.com/images/I/51UUzBDAMsL.jpg
6933
+ */
6934
+ image_url: string;
6935
+ /**
6936
+ * @description The user input coordinates
6937
+ * @example {
6938
+ * "y1": 100,
6939
+ * "x2": 200,
6940
+ * "y2": 200,
6941
+ * "x1": 100
6942
+ * }
6943
+ */
6944
+ region: Components.Region;
6945
+ }
6946
+
6811
6947
  export interface SharedType_8a3 {
6812
6948
  /**
6813
6949
  * @description The generated video
@@ -6821,53 +6957,6 @@ export interface SharedType_8a3 {
6821
6957
  video: Components.File;
6822
6958
  }
6823
6959
 
6824
- export interface SharedType_89f {
6825
- /**
6826
- * Aspect Ratio
6827
- * @description Aspect ratio of the generated video.
6828
- * @default 16:9
6829
- * @enum {string}
6830
- */
6831
- aspect_ratio?: '16:9' | '9:16' | '1:1';
6832
- /**
6833
- * Duration
6834
- * @description Video duration in seconds (3-15s).
6835
- * @default 5
6836
- * @enum {string}
6837
- */
6838
- duration?: '3' | '4' | '5' | '6' | '7' | '8' | '9' | '10' | '11' | '12' | '13' | '14' | '15';
6839
- /**
6840
- * Generate Audio
6841
- * @description Whether to generate native audio for the video.
6842
- * @default false
6843
- */
6844
- generate_audio?: boolean;
6845
- /**
6846
- * Multi Prompt
6847
- * @description List of prompts for multi-shot video generation.
6848
- * @example null
6849
- */
6850
- multi_prompt?: Components.KlingV3MultiPromptElement[];
6851
- /**
6852
- * Prompt
6853
- * @description Text prompt for video generation. Required unless multi_prompt is provided.
6854
- * @example A mecha lands on the ground to save the city, and says "I'm here", in anime style
6855
- */
6856
- prompt?: string;
6857
- /**
6858
- * Shot Type
6859
- * @description The type of multi-shot video generation.
6860
- * @default customize
6861
- * @constant
6862
- */
6863
- shot_type?: 'customize';
6864
- /**
6865
- * Voice Ids
6866
- * @description Optional Voice IDs for video generation. Reference voices in your prompt with <<<voice_1>>> and <<<voice_2>>> (maximum 2 voices per task). Get voice IDs from the kling video create-voice endpoint: https://fal.ai/models/fal-ai/kling-video/create-voice
6867
- */
6868
- voice_ids?: string[];
6869
- }
6870
-
6871
6960
  export interface SharedType_896 {
6872
6961
  /**
6873
6962
  * Aspect Ratio
@@ -7023,35 +7112,6 @@ export interface SharedType_85d {
7023
7112
  sync_mode?: boolean;
7024
7113
  }
7025
7114
 
7026
- export interface SharedType_857 {
7027
- /**
7028
- * Prompt
7029
- * @description The prompt used for the generation.
7030
- * @example A cowboy walking through a dusty town at high noon, camera following from behind, cinematic depth, realistic lighting, western mood, 4K film grain.
7031
- */
7032
- prompt: string;
7033
- /**
7034
- * Seed
7035
- * @description The seed used for the random number generator.
7036
- * @example 149063119
7037
- */
7038
- seed: number;
7039
- /**
7040
- * @description The generated video.
7041
- * @example {
7042
- * "height": 704,
7043
- * "duration": 6.44,
7044
- * "url": "https://v3b.fal.media/files/b/0a8824b1/sdm0KfmenrlywesfzY1Y1_if6euPp1.mp4",
7045
- * "fps": 25,
7046
- * "width": 1248,
7047
- * "file_name": "sdm0KfmenrlywesfzY1Y1_if6euPp1.mp4",
7048
- * "num_frames": 161,
7049
- * "content_type": "video/mp4"
7050
- * }
7051
- */
7052
- video: Components.VideoFile;
7053
- }
7054
-
7055
7115
  export interface SharedType_844 {
7056
7116
  /**
7057
7117
  * Prompt
@@ -8355,6 +8415,35 @@ export interface SharedType_709 {
8355
8415
  video_url: string;
8356
8416
  }
8357
8417
 
8418
+ export interface SharedType_6ef {
8419
+ /**
8420
+ * Prompt
8421
+ * @description The prompt used for the generation.
8422
+ * @example A cowboy walking through a dusty town at high noon, camera following from behind, cinematic depth, realistic lighting, western mood, 4K film grain.
8423
+ */
8424
+ prompt: string;
8425
+ /**
8426
+ * Seed
8427
+ * @description The seed used for the random number generator.
8428
+ * @example 149063119
8429
+ */
8430
+ seed: number;
8431
+ /**
8432
+ * @description The generated video.
8433
+ * @example {
8434
+ * "height": 704,
8435
+ * "duration": 6.44,
8436
+ * "url": "https://v3b.fal.media/files/b/0a8824b1/sdm0KfmenrlywesfzY1Y1_if6euPp1.mp4",
8437
+ * "width": 1248,
8438
+ * "fps": 25,
8439
+ * "file_name": "sdm0KfmenrlywesfzY1Y1_if6euPp1.mp4",
8440
+ * "content_type": "video/mp4",
8441
+ * "num_frames": 161
8442
+ * }
8443
+ */
8444
+ video: Components.VideoFile;
8445
+ }
8446
+
8358
8447
  export interface SharedType_6cd {
8359
8448
  /**
8360
8449
  * Aspect Ratio
@@ -8654,26 +8743,6 @@ export interface SharedType_678 {
8654
8743
  seed: number;
8655
8744
  }
8656
8745
 
8657
- export interface SharedType_676 {
8658
- /**
8659
- * Output
8660
- * @description Generated output from video processing
8661
- * @example that's the way I look at it and I don't know what you would say. Sooner or later the child gets run over.
8662
- * They seem to be too local, too provincial.
8663
- */
8664
- output: string;
8665
- /**
8666
- * @description Token usage information
8667
- * @example {
8668
- * "prompt_tokens": 1000,
8669
- * "total_tokens": 1100,
8670
- * "completion_tokens": 100,
8671
- * "cost": 0.0005
8672
- * }
8673
- */
8674
- usage: Components.UsageInfo;
8675
- }
8676
-
8677
8746
  export interface SharedType_6711 {
8678
8747
  /**
8679
8748
  * Prompt
@@ -9116,6 +9185,35 @@ export interface SharedType_5f1 {
9116
9185
  };
9117
9186
  }
9118
9187
 
9188
+ export interface SharedType_5db {
9189
+ /**
9190
+ * Prompt
9191
+ * @description The prompt used for the generation.
9192
+ * @example A woman stands still amid a busy neon-lit street at night. The camera slowly dollies in toward her face as people blur past, their motion emphasizing her calm presence. City lights flicker and reflections shift across her denim jacket.
9193
+ */
9194
+ prompt: string;
9195
+ /**
9196
+ * Seed
9197
+ * @description The seed used for the random number generator.
9198
+ * @example 2078003885
9199
+ */
9200
+ seed: number;
9201
+ /**
9202
+ * @description The generated video.
9203
+ * @example {
9204
+ * "height": 704,
9205
+ * "duration": 6.44,
9206
+ * "url": "https://v3b.fal.media/files/b/0a894013/N9lnMTq7W3uMC0lOQg845_BknRPV8I.mp4",
9207
+ * "width": 1248,
9208
+ * "fps": 25,
9209
+ * "file_name": "CJcQGDrxOSRg2YFl5GNDt_glXPMoji.mp4",
9210
+ * "content_type": "video/mp4",
9211
+ * "num_frames": 161
9212
+ * }
9213
+ */
9214
+ video: Components.VideoFile;
9215
+ }
9216
+
9119
9217
  export interface SharedType_5ac {
9120
9218
  /**
9121
9219
  * Acoustic Cfg Scale
@@ -11052,6 +11150,35 @@ export interface SharedType_43e {
11052
11150
  image: Components.Image_2;
11053
11151
  }
11054
11152
 
11153
+ export interface SharedType_437 {
11154
+ /**
11155
+ * Prompt
11156
+ * @description The prompt used for the generation.
11157
+ * @example Continue the scene naturally, maintaining the same style and motion.
11158
+ */
11159
+ prompt: string;
11160
+ /**
11161
+ * Seed
11162
+ * @description The seed used for the random number generator.
11163
+ * @example 866232447
11164
+ */
11165
+ seed: number;
11166
+ /**
11167
+ * @description The generated video.
11168
+ * @example {
11169
+ * "height": 704,
11170
+ * "duration": 10.28,
11171
+ * "url": "https://v3b.fal.media/files/b/0a88289e/CJcQGDrxOSRg2YFl5GNDt_glXPMoji.mp4",
11172
+ * "width": 1248,
11173
+ * "fps": 25,
11174
+ * "file_name": "CJcQGDrxOSRg2YFl5GNDt_glXPMoji.mp4",
11175
+ * "content_type": "video/mp4",
11176
+ * "num_frames": 257
11177
+ * }
11178
+ */
11179
+ video: Components.VideoFile;
11180
+ }
11181
+
11055
11182
  export interface SharedType_411 {
11056
11183
  /**
11057
11184
  * Aspect Ratio
@@ -11150,35 +11277,6 @@ export interface SharedType_3be {
11150
11277
  video: Components.File;
11151
11278
  }
11152
11279
 
11153
- export interface SharedType_3b0 {
11154
- /**
11155
- * Prompt
11156
- * @description The prompt used for the generation.
11157
- * @example black-and-white video, a cowboy walks through a dusty town, film grain
11158
- */
11159
- prompt: string;
11160
- /**
11161
- * Seed
11162
- * @description The seed used for the random number generator.
11163
- * @example 1490631192028410600
11164
- */
11165
- seed: number;
11166
- /**
11167
- * @description The generated video.
11168
- * @example {
11169
- * "height": 704,
11170
- * "duration": 6.44,
11171
- * "url": "https://v3b.fal.media/files/b/0a895ed5/SaTGe87IpMUMiSq33w5Qb_RoCJFZhc.mp4",
11172
- * "fps": 25,
11173
- * "width": 1248,
11174
- * "file_name": "SaTGe87IpMUMiSq33w5Qb_RoCJFZhc.mp4",
11175
- * "num_frames": 161,
11176
- * "content_type": "video/mp4"
11177
- * }
11178
- */
11179
- video: Components.VideoFile;
11180
- }
11181
-
11182
11280
  export interface SharedType_397 {
11183
11281
  /**
11184
11282
  * Acceleration
@@ -11499,35 +11597,6 @@ export interface SharedType_371 {
11499
11597
  prompt_optimizer?: boolean;
11500
11598
  }
11501
11599
 
11502
- export interface SharedType_369 {
11503
- /**
11504
- * Prompt
11505
- * @description The prompt used for the generation.
11506
- * @example Continue the scene naturally, maintaining the same style and motion.
11507
- */
11508
- prompt: string;
11509
- /**
11510
- * Seed
11511
- * @description The seed used for the random number generator.
11512
- * @example 866232447
11513
- */
11514
- seed: number;
11515
- /**
11516
- * @description The generated video.
11517
- * @example {
11518
- * "height": 704,
11519
- * "duration": 10.28,
11520
- * "url": "https://v3b.fal.media/files/b/0a88289e/CJcQGDrxOSRg2YFl5GNDt_glXPMoji.mp4",
11521
- * "fps": 25,
11522
- * "width": 1248,
11523
- * "file_name": "CJcQGDrxOSRg2YFl5GNDt_glXPMoji.mp4",
11524
- * "num_frames": 257,
11525
- * "content_type": "video/mp4"
11526
- * }
11527
- */
11528
- video: Components.VideoFile;
11529
- }
11530
-
11531
11600
  export interface SharedType_367 {
11532
11601
  /**
11533
11602
  * Aspect Ratio
@@ -11737,26 +11806,6 @@ export interface SharedType_304 {
11737
11806
  voice: Components.TextVoice;
11738
11807
  }
11739
11808
 
11740
- export interface SharedType_2ff {
11741
- /**
11742
- * Image Url
11743
- * @description The URL of the image to be processed.
11744
- * @example https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/tasks/car.jpg
11745
- * @example http://ecx.images-amazon.com/images/I/51UUzBDAMsL.jpg
11746
- */
11747
- image_url: string;
11748
- /**
11749
- * @description The user input coordinates
11750
- * @example {
11751
- * "y2": 200,
11752
- * "x2": 200,
11753
- * "x1": 100,
11754
- * "y1": 100
11755
- * }
11756
- */
11757
- region: Components.Region;
11758
- }
11759
-
11760
11809
  export interface SharedType_2e6 {
11761
11810
  /**
11762
11811
  * Aspect Ratio
@@ -12245,6 +12294,48 @@ export interface SharedType_266 {
12245
12294
  video: Components.File;
12246
12295
  }
12247
12296
 
12297
+ export interface SharedType_25d {
12298
+ /**
12299
+ * Aspect Ratio
12300
+ * @description Aspect ratio of the generated video.
12301
+ * @default 16:9
12302
+ * @enum {string}
12303
+ */
12304
+ aspect_ratio?: '16:9' | '9:16' | '1:1';
12305
+ /**
12306
+ * Duration
12307
+ * @description Video duration in seconds (3-15s).
12308
+ * @default 5
12309
+ * @enum {string}
12310
+ */
12311
+ duration?: '3' | '4' | '5' | '6' | '7' | '8' | '9' | '10' | '11' | '12' | '13' | '14' | '15';
12312
+ /**
12313
+ * Generate Audio
12314
+ * @description Whether to generate native audio for the video.
12315
+ * @default false
12316
+ */
12317
+ generate_audio?: boolean;
12318
+ /**
12319
+ * Multi Prompt
12320
+ * @description List of prompts for multi-shot video generation.
12321
+ * @example null
12322
+ */
12323
+ multi_prompt?: Components.KlingV3MultiPromptElement[];
12324
+ /**
12325
+ * Prompt
12326
+ * @description Text prompt for video generation. Required unless multi_prompt is provided.
12327
+ * @example A mecha lands on the ground to save the city, and says "I'm here", in anime style
12328
+ */
12329
+ prompt?: string;
12330
+ /**
12331
+ * Shot Type
12332
+ * @description The type of multi-shot video generation.
12333
+ * @default customize
12334
+ * @constant
12335
+ */
12336
+ shot_type?: 'customize';
12337
+ }
12338
+
12248
12339
  export interface SharedType_23c {
12249
12340
  /**
12250
12341
  * @description The generated video.
@@ -12536,6 +12627,35 @@ export interface SharedType_20d {
12536
12627
  seed: number;
12537
12628
  }
12538
12629
 
12630
+ export interface SharedType_1f5 {
12631
+ /**
12632
+ * Prompt
12633
+ * @description The prompt used for the generation.
12634
+ * @example black-and-white video, a cowboy walks through a dusty town, film grain
12635
+ */
12636
+ prompt: string;
12637
+ /**
12638
+ * Seed
12639
+ * @description The seed used for the random number generator.
12640
+ * @example 1490631192028410600
12641
+ */
12642
+ seed: number;
12643
+ /**
12644
+ * @description The generated video.
12645
+ * @example {
12646
+ * "height": 704,
12647
+ * "duration": 6.44,
12648
+ * "url": "https://v3b.fal.media/files/b/0a895ed5/SaTGe87IpMUMiSq33w5Qb_RoCJFZhc.mp4",
12649
+ * "width": 1248,
12650
+ * "fps": 25,
12651
+ * "file_name": "SaTGe87IpMUMiSq33w5Qb_RoCJFZhc.mp4",
12652
+ * "content_type": "video/mp4",
12653
+ * "num_frames": 161
12654
+ * }
12655
+ */
12656
+ video: Components.VideoFile;
12657
+ }
12658
+
12539
12659
  export interface SharedType_1eb {
12540
12660
  /**
12541
12661
  * @description The generated video file.
@@ -13006,6 +13126,148 @@ export interface SharedType_151 {
13006
13126
  seed: number;
13007
13127
  }
13008
13128
 
13129
+ export interface SharedType_133 {
13130
+ /**
13131
+ * Aspect Ratio
13132
+ * @description The aspect ratio of the video.
13133
+ * @default auto
13134
+ * @example auto
13135
+ * @enum {string}
13136
+ */
13137
+ aspect_ratio?: '9:16' | '1:1' | '16:9' | 'auto';
13138
+ /**
13139
+ * Constant Rate Factor
13140
+ * @description The constant rate factor (CRF) to compress input media with. Compressed input media more closely matches the model's training data, which can improve motion quality.
13141
+ * @default 29
13142
+ * @example 29
13143
+ */
13144
+ constant_rate_factor?: number;
13145
+ /**
13146
+ * Enable Detail Pass
13147
+ * @description Whether to use a detail pass. If True, the model will perform a second pass to refine the video and enhance details. This incurs a 2.0x cost multiplier on the base price.
13148
+ * @default false
13149
+ * @example false
13150
+ */
13151
+ enable_detail_pass?: boolean;
13152
+ /**
13153
+ * Enable Safety Checker
13154
+ * @description Whether to enable the safety checker.
13155
+ * @default true
13156
+ * @example true
13157
+ */
13158
+ enable_safety_checker?: boolean;
13159
+ /**
13160
+ * Expand Prompt
13161
+ * @description Whether to expand the prompt using a language model.
13162
+ * @default false
13163
+ * @example false
13164
+ */
13165
+ expand_prompt?: boolean;
13166
+ /**
13167
+ * Number of Inference Steps
13168
+ * @description Number of inference steps during the first pass.
13169
+ * @default 8
13170
+ * @example 8
13171
+ */
13172
+ first_pass_num_inference_steps?: number;
13173
+ /**
13174
+ * Frame Rate
13175
+ * @description The frame rate of the video.
13176
+ * @default 24
13177
+ * @example 24
13178
+ */
13179
+ frame_rate?: number;
13180
+ /**
13181
+ * Loras
13182
+ * @description LoRA weights to use for generation
13183
+ * @default []
13184
+ */
13185
+ loras?: Components.LoRAWeight[];
13186
+ /**
13187
+ * Negative Prompt
13188
+ * @description Negative prompt for generation
13189
+ * @default worst quality, inconsistent motion, blurry, jittery, distorted
13190
+ */
13191
+ negative_prompt?: string;
13192
+ /**
13193
+ * Number of Frames
13194
+ * @description The number of frames in the video.
13195
+ * @default 121
13196
+ * @example 121
13197
+ */
13198
+ num_frames?: number;
13199
+ /**
13200
+ * Prompt
13201
+ * @description Text prompt to guide generation
13202
+ * @example Woman walking on a street in Tokyo
13203
+ */
13204
+ prompt: string;
13205
+ /**
13206
+ * Resolution
13207
+ * @description Resolution of the generated video.
13208
+ * @default 720p
13209
+ * @example 720p
13210
+ * @enum {string}
13211
+ */
13212
+ resolution?: '480p' | '720p';
13213
+ /**
13214
+ * Reverse Video
13215
+ * @description Whether to reverse the video.
13216
+ * @default false
13217
+ * @example false
13218
+ */
13219
+ reverse_video?: boolean;
13220
+ /**
13221
+ * Second Pass Number of Inference Steps
13222
+ * @description Number of inference steps during the second pass.
13223
+ * @default 8
13224
+ * @example 8
13225
+ */
13226
+ second_pass_num_inference_steps?: number;
13227
+ /**
13228
+ * Second Pass Skip Initial Steps
13229
+ * @description The number of inference steps to skip in the initial steps of the second pass. By skipping some steps at the beginning, the second pass can focus on smaller details instead of larger changes.
13230
+ * @default 5
13231
+ * @example 5
13232
+ */
13233
+ second_pass_skip_initial_steps?: number;
13234
+ /**
13235
+ * Seed
13236
+ * @description Random seed for generation
13237
+ */
13238
+ seed?: number;
13239
+ /**
13240
+ * Temporal AdaIN Factor
13241
+ * @description The factor for adaptive instance normalization (AdaIN) applied to generated video chunks after the first. This can help deal with a gradual increase in saturation/contrast in the generated video by normalizing the color distribution across the video. A high value will ensure the color distribution is more consistent across the video, while a low value will allow for more variation in color distribution.
13242
+ * @default 0.5
13243
+ * @example 0.5
13244
+ */
13245
+ temporal_adain_factor?: number;
13246
+ /**
13247
+ * Tone Map Compression Ratio
13248
+ * @description The compression ratio for tone mapping. This is used to compress the dynamic range of the video to improve visual quality. A value of 0.0 means no compression, while a value of 1.0 means maximum compression.
13249
+ * @default 0
13250
+ * @example 0
13251
+ */
13252
+ tone_map_compression_ratio?: number;
13253
+ /**
13254
+ * @description Video to be extended.
13255
+ * @example {
13256
+ * "video_url": "https://storage.googleapis.com/falserverless/web-examples/wan/t2v.mp4",
13257
+ * "start_frame_num": 0,
13258
+ * "reverse_video": false,
13259
+ * "limit_num_frames": false,
13260
+ * "resample_fps": false,
13261
+ * "strength": 1,
13262
+ * "target_fps": 24,
13263
+ * "max_num_frames": 1441,
13264
+ * "conditioning_type": "rgb",
13265
+ * "preprocess": false
13266
+ * }
13267
+ */
13268
+ video: Components.ExtendVideoConditioningInput;
13269
+ }
13270
+
13009
13271
  export interface SharedType_129 {
13010
13272
  /**
13011
13273
  * Results
@@ -13014,7 +13276,7 @@ export interface SharedType_129 {
13014
13276
  results: string;
13015
13277
  }
13016
13278
 
13017
- export interface SharedType_1001 {
13279
+ export interface SharedType_100 {
13018
13280
  /**
13019
13281
  * Prompt
13020
13282
  * @description The prompt used for generating the video.
@@ -13032,38 +13294,6 @@ export interface SharedType_1001 {
13032
13294
  video: Components.File;
13033
13295
  }
13034
13296
 
13035
- export interface SharedType_100 {
13036
- /**
13037
- * Masks
13038
- * @description Dictionary of label: mask image
13039
- * @example [
13040
- * {
13041
- * "file_size": 15724,
13042
- * "height": 1200,
13043
- * "file_name": "019c3c1e3c50446e9996f709d36debb4.png",
13044
- * "content_type": "image/png",
13045
- * "url": "https://v3.fal.media/files/monkey/6ITmhHQJ-69s-UxajrY5T_019c3c1e3c50446e9996f709d36debb4.png",
13046
- * "width": 1800
13047
- * },
13048
- * {
13049
- * "file_size": 14905,
13050
- * "height": 1200,
13051
- * "file_name": "0a1522ca410942c7ad6c73efa15b3549.png",
13052
- * "content_type": "image/png",
13053
- * "url": "https://v3.fal.media/files/monkey/IljtMxahoo9-7SUpx0fth_0a1522ca410942c7ad6c73efa15b3549.png",
13054
- * "width": 1800
13055
- * }
13056
- * ]
13057
- */
13058
- masks: Components.Image[];
13059
- /**
13060
- * Output
13061
- * @description Generated output
13062
- * @example <p> A white pickup truck </p> [SEG] is parked on the side of <p> the red building </p> [SEG] , creating a unique and eye-catching contrast.<|im_end|>
13063
- */
13064
- output: string;
13065
- }
13066
-
13067
13297
  export interface SharedType_0ff {
13068
13298
  /**
13069
13299
  * @description The generated speaker embedding file in safetensors format.
@@ -14537,9 +14767,9 @@ export interface OpenrouterRouterVisionOutput {
14537
14767
  /**
14538
14768
  * @description Token usage information
14539
14769
  * @example {
14540
- * "prompt_tokens": 1340,
14541
- * "total_tokens": 1403,
14542
14770
  * "completion_tokens": 63,
14771
+ * "total_tokens": 1403,
14772
+ * "prompt_tokens": 1340,
14543
14773
  * "cost": 0.0005595
14544
14774
  * }
14545
14775
  */
@@ -14603,7 +14833,7 @@ export interface OpenrouterRouterVideoEnterpriseInput {
14603
14833
  video_urls?: string[];
14604
14834
  }
14605
14835
 
14606
- export interface OpenrouterRouterVideoEnterpriseOutput extends SharedType_676 {}
14836
+ export interface OpenrouterRouterVideoEnterpriseOutput extends SharedType_cbb {}
14607
14837
 
14608
14838
  export interface OpenrouterRouterVideoInput {
14609
14839
  /**
@@ -14655,7 +14885,7 @@ export interface OpenrouterRouterVideoInput {
14655
14885
  video_urls?: string[];
14656
14886
  }
14657
14887
 
14658
- export interface OpenrouterRouterVideoOutput extends SharedType_676 {}
14888
+ export interface OpenrouterRouterVideoOutput extends SharedType_cbb {}
14659
14889
 
14660
14890
  export interface OpenrouterRouterAudioInput {
14661
14891
  /**
@@ -14714,9 +14944,9 @@ export interface OpenrouterRouterAudioOutput {
14714
14944
  /**
14715
14945
  * @description Token usage information
14716
14946
  * @example {
14717
- * "prompt_tokens": 500,
14718
- * "total_tokens": 550,
14719
14947
  * "completion_tokens": 50,
14948
+ * "total_tokens": 550,
14949
+ * "prompt_tokens": 500,
14720
14950
  * "cost": 0.0003
14721
14951
  * }
14722
14952
  */
@@ -14798,9 +15028,9 @@ export interface OpenrouterRouterOutput {
14798
15028
  /**
14799
15029
  * @description Token usage information
14800
15030
  * @example {
14801
- * "prompt_tokens": 40,
14802
- * "total_tokens": 267,
14803
15031
  * "completion_tokens": 227,
15032
+ * "total_tokens": 267,
15033
+ * "prompt_tokens": 40,
14804
15034
  * "cost": 0.0005795
14805
15035
  * }
14806
15036
  */
@@ -17594,13 +17824,6 @@ export interface WhisperInput {
17594
17824
  * @enum {string}
17595
17825
  */
17596
17826
  task?: 'transcribe' | 'translate';
17597
- /**
17598
- * Version
17599
- * @description Version of the model to use. All of the models are the Whisper large variant.
17600
- * @default 3
17601
- * @constant
17602
- */
17603
- version?: '3';
17604
17827
  }
17605
17828
 
17606
17829
  export interface WhisperOutput {
@@ -17722,6 +17945,7 @@ export interface WhisperOutput {
17722
17945
  /**
17723
17946
  * Text
17724
17947
  * @description Transcription of the audio file
17948
+ * @example María, ¿qué cenamos hoy? No sé, ¿qué cenamos? ¿Cenamos pollo frito o pollo asado o algo? Mejor a la plancha, quiero una salada. A la plancha, vale. Y hacemos una ensalada con tomate y esas cosas. Vale. Pues eso lo hacemos, ¿vale? Venga, vale.
17725
17949
  */
17726
17950
  text: string;
17727
17951
  }
@@ -25022,7 +25246,7 @@ export interface Swin2srInput {
25022
25246
  task?: 'classical_sr' | 'compressed_sr' | 'real_sr';
25023
25247
  }
25024
25248
 
25025
- export interface Swin2srOutput extends SharedType_e43 {}
25249
+ export interface Swin2srOutput extends SharedType_357 {}
25026
25250
 
25027
25251
  export interface StepxEdit2Input {
25028
25252
  /**
@@ -26931,7 +27155,13 @@ export interface SeedvrUpscaleVideoOutput {
26931
27155
  video: Components.File;
26932
27156
  }
26933
27157
 
26934
- export interface SeedvrUpscaleImageInput {
27158
+ export interface SeedvrUpscaleImageSeamlessInput {
27159
+ /**
27160
+ * Enable Safety Checker
27161
+ * @description If set to true, the safety checker will be enabled.
27162
+ * @default true
27163
+ */
27164
+ enable_safety_checker?: boolean;
26935
27165
  /**
26936
27166
  * Image Url
26937
27167
  * @description The input image to be processed
@@ -26947,10 +27177,10 @@ export interface SeedvrUpscaleImageInput {
26947
27177
  /**
26948
27178
  * Output Format
26949
27179
  * @description The format of the output image.
26950
- * @default jpg
27180
+ * @default png
26951
27181
  * @enum {string}
26952
27182
  */
26953
- output_format?: 'png' | 'jpg' | 'webp';
27183
+ output_format?: 'png' | 'jpeg' | 'webp';
26954
27184
  /**
26955
27185
  * Seed
26956
27186
  * @description The random seed used for the generation process.
@@ -26984,22 +27214,63 @@ export interface SeedvrUpscaleImageInput {
26984
27214
  upscale_mode?: 'target' | 'factor';
26985
27215
  }
26986
27216
 
26987
- export interface SeedvrUpscaleImageOutput {
27217
+ export interface SeedvrUpscaleImageSeamlessOutput extends SharedType_920 {}
27218
+
27219
+ export interface SeedvrUpscaleImageInput {
26988
27220
  /**
26989
- * @description Upscaled image file after processing
26990
- * @example {
26991
- * "content_type": "image/png",
26992
- * "url": "https://storage.googleapis.com/falserverless/example_outputs/seedvr2/image_out.png"
26993
- * }
27221
+ * Image Url
27222
+ * @description The input image to be processed
27223
+ * @example https://storage.googleapis.com/falserverless/example_inputs/seedvr2/image_in.png
26994
27224
  */
26995
- image: Components.ImageFile;
27225
+ image_url: string;
27226
+ /**
27227
+ * Noise Scale
27228
+ * @description The noise scale to use for the generation process.
27229
+ * @default 0.1
27230
+ */
27231
+ noise_scale?: number;
27232
+ /**
27233
+ * Output Format
27234
+ * @description The format of the output image.
27235
+ * @default jpg
27236
+ * @enum {string}
27237
+ */
27238
+ output_format?: 'png' | 'jpg' | 'webp';
26996
27239
  /**
26997
27240
  * Seed
26998
27241
  * @description The random seed used for the generation process.
26999
27242
  */
27000
- seed: number;
27243
+ seed?: number;
27244
+ /**
27245
+ * Sync Mode
27246
+ * @description If `True`, the media will be returned as a data URI and the output data won't be available in the request history.
27247
+ * @default false
27248
+ */
27249
+ sync_mode?: boolean;
27250
+ /**
27251
+ * Target Resolution
27252
+ * @description The target resolution to upscale to when `upscale_mode` is `target`.
27253
+ * @default 1080p
27254
+ * @enum {string}
27255
+ */
27256
+ target_resolution?: '720p' | '1080p' | '1440p' | '2160p';
27257
+ /**
27258
+ * Upscale Factor
27259
+ * @description Upscaling factor to be used. Will multiply the dimensions with this factor when `upscale_mode` is `factor`.
27260
+ * @default 2
27261
+ */
27262
+ upscale_factor?: number;
27263
+ /**
27264
+ * Upscale Mode
27265
+ * @description The mode to use for the upscale. If 'target', the upscale factor will be calculated based on the target resolution. If 'factor', the upscale factor will be used directly.
27266
+ * @default factor
27267
+ * @enum {string}
27268
+ */
27269
+ upscale_mode?: 'target' | 'factor';
27001
27270
  }
27002
27271
 
27272
+ export interface SeedvrUpscaleImageOutput extends SharedType_920 {}
27273
+
27003
27274
  export interface SdxlControlnetUnionInpaintingInput {
27004
27275
  /**
27005
27276
  * Canny Image Url
@@ -27882,54 +28153,67 @@ export interface SanaSprintOutput extends SharedType_7c6 {}
27882
28153
 
27883
28154
  export interface SanaVideoInput {
27884
28155
  /**
27885
- * Fps
27886
- * @description Frames per second for the output video
28156
+ * Aspect Ratio
28157
+ * @description The aspect ratio of the output video. Only used when resolution is '720p'.
28158
+ * @default 16:9
28159
+ * @enum {string}
28160
+ */
28161
+ aspect_ratio?: '16:9' | '9:16' | '4:3' | '3:4' | '1:1';
28162
+ /**
28163
+ * Enable Safety Checker
28164
+ * @description Enable safety checking of the generated video.
28165
+ * @default true
28166
+ */
28167
+ enable_safety_checker?: boolean;
28168
+ /**
28169
+ * Frames Per Second
28170
+ * @description Frames per second for the output video.
27887
28171
  * @default 16
27888
28172
  */
27889
- fps?: number;
28173
+ frames_per_second?: number;
27890
28174
  /**
27891
28175
  * Guidance Scale
27892
- * @description Guidance scale for generation (higher = more prompt adherence)
28176
+ * @description Guidance scale for generation (higher = more prompt adherence).
27893
28177
  * @default 6
27894
28178
  */
27895
28179
  guidance_scale?: number;
27896
28180
  /**
27897
28181
  * Motion Score
27898
- * @description Motion intensity score (higher = more motion)
28182
+ * @description Motion intensity score (higher = more motion).
27899
28183
  * @default 30
27900
28184
  */
27901
28185
  motion_score?: number;
27902
28186
  /**
27903
28187
  * Negative Prompt
27904
- * @description The negative prompt describing what to avoid in the generation
28188
+ * @description The negative prompt describing what to avoid in the generation.
27905
28189
  * @default A chaotic sequence with misshapen, deformed limbs in heavy motion blur, sudden disappearance, jump cuts, jerky movements, rapid shot changes, frames out of sync, inconsistent character shapes, temporal artifacts, jitter, and ghosting effects, creating a disorienting visual experience.
27906
28190
  */
27907
28191
  negative_prompt?: string;
27908
28192
  /**
27909
28193
  * Num Frames
27910
- * @description Number of frames to generate
28194
+ * @description Number of frames to generate.
27911
28195
  * @default 81
27912
28196
  */
27913
28197
  num_frames?: number;
27914
28198
  /**
27915
28199
  * Num Inference Steps
27916
- * @description Number of denoising steps
28200
+ * @description Number of denoising steps.
27917
28201
  * @default 28
27918
28202
  */
27919
28203
  num_inference_steps?: number;
27920
28204
  /**
27921
28205
  * Prompt
27922
- * @description The text prompt describing the video to generate
28206
+ * @description The text prompt describing the video to generate.
27923
28207
  * @example Evening, backlight, side lighting, soft light, high contrast, mid-shot, centered composition, clean solo shot, warm color. A young Caucasian man stands in a forest, golden light glimmers on his hair as sunlight filters through the leaves.
27924
28208
  */
27925
28209
  prompt: string;
27926
28210
  /**
27927
28211
  * Resolution
27928
- * @description The resolution of the output video
28212
+ * @description The resolution of the output video.
27929
28213
  * @default 480p
27930
28214
  * @enum {string}
27931
28215
  */
27932
- resolution?: '480p';
28216
+ resolution?: '480p' | '720p';
27933
28217
  /**
27934
28218
  * Seed
27935
28219
  * @description Random seed for reproducible generation. If not provided, a random seed will be used.
@@ -27940,18 +28224,24 @@ export interface SanaVideoInput {
27940
28224
  export interface SanaVideoOutput {
27941
28225
  /**
27942
28226
  * Seed
27943
- * @description The random seed used for the generation process
28227
+ * @description The random seed used for the generation process.
27944
28228
  */
27945
28229
  seed: number;
27946
28230
  /**
27947
- * Video
27948
- * @description Generated video file
28231
+ * Timings
28232
+ * @description Performance timing breakdown.
28233
+ */
28234
+ timings: {
28235
+ [key: string]: number;
28236
+ };
28237
+ /**
28238
+ * @description Generated video file.
27949
28239
  * @example {
27950
28240
  * "content_type": "video/mp4",
27951
28241
  * "url": "https://v3b.fal.media/files/b/zebra/TipA9XXsXRYlB6vK6PQ0l_output.mp4"
27952
28242
  * }
27953
28243
  */
27954
- video: Components.File_1;
28244
+ video: Components.File;
27955
28245
  }
27956
28246
 
27957
28247
  export interface SanaInput extends SharedType_5f3 {}
@@ -27998,15 +28288,15 @@ export interface Sam2VideoInput {
27998
28288
  * @example [
27999
28289
  * {
28000
28290
  * "y": 350,
28001
- * "x": 210,
28291
+ * "label": 1,
28002
28292
  * "frame_index": 0,
28003
- * "label": 1
28293
+ * "x": 210
28004
28294
  * },
28005
28295
  * {
28006
28296
  * "y": 220,
28007
- * "x": 250,
28297
+ * "label": 1,
28008
28298
  * "frame_index": 0,
28009
- * "label": 1
28299
+ * "x": 250
28010
28300
  * }
28011
28301
  * ]
28012
28302
  */
@@ -28065,8 +28355,8 @@ export interface Sam2ImageInput {
28065
28355
  * @example [
28066
28356
  * {
28067
28357
  * "y": 375,
28068
- * "x": 500,
28069
- * "label": 1
28358
+ * "label": 1,
28359
+ * "x": 500
28070
28360
  * }
28071
28361
  * ]
28072
28362
  */
@@ -28870,7 +29160,7 @@ export interface Sa2va8bVideoOutput extends SharedType_800 {}
28870
29160
 
28871
29161
  export interface Sa2va8bImageInput extends SharedType_f51 {}
28872
29162
 
28873
- export interface Sa2va8bImageOutput extends SharedType_100 {}
29163
+ export interface Sa2va8bImageOutput extends SharedType_fda {}
28874
29164
 
28875
29165
  export interface Sa2va4bVideoInput extends SharedType_5f5 {}
28876
29166
 
@@ -28878,7 +29168,7 @@ export interface Sa2va4bVideoOutput extends SharedType_800 {}
28878
29168
 
28879
29169
  export interface Sa2va4bImageInput extends SharedType_f51 {}
28880
29170
 
28881
- export interface Sa2va4bImageOutput extends SharedType_100 {}
29171
+ export interface Sa2va4bImageOutput extends SharedType_fda {}
28882
29172
 
28883
29173
  export interface RifeVideoInput {
28884
29174
  /**
@@ -37275,13 +37565,13 @@ export interface NovaSrOutput {
37275
37565
  /**
37276
37566
  * @description The enhanced audio file.
37277
37567
  * @example {
37278
- * "bitrate": "192k",
37568
+ * "channels": 1,
37279
37569
  * "duration": 12.283291666666667,
37280
37570
  * "url": "https://v3b.fal.media/files/b/0a8a3f1a/lTKExJu-R6ZJdnFlpzEeq_TxmNTNhl.mp3",
37281
37571
  * "file_name": "lTKExJu-R6ZJdnFlpzEeq_TxmNTNhl.mp3",
37282
37572
  * "sample_rate": 48000,
37283
37573
  * "content_type": "audio/mpeg",
37284
- * "channels": 1
37574
+ * "bitrate": "192k"
37285
37575
  * }
37286
37576
  */
37287
37577
  audio: Components.AudioFile;
@@ -38147,7 +38437,6 @@ export interface MmaudioV2TextToAudioInput {
38147
38437
 
38148
38438
  export interface MmaudioV2TextToAudioOutput {
38149
38439
  /**
38150
- * Audio
38151
38440
  * @description The generated audio.
38152
38441
  * @example {
38153
38442
  * "file_size": 1001342,
@@ -38156,7 +38445,7 @@ export interface MmaudioV2TextToAudioOutput {
38156
38445
  * "url": "https://storage.googleapis.com/falserverless/model_tests/video_models/mmaudio_output.flac"
38157
38446
  * }
38158
38447
  */
38159
- audio: Components.File_1;
38448
+ audio: Components.File;
38160
38449
  }
38161
38450
 
38162
38451
  export interface MmaudioV2Input {
@@ -38211,7 +38500,6 @@ export interface MmaudioV2Input {
38211
38500
 
38212
38501
  export interface MmaudioV2Output {
38213
38502
  /**
38214
- * Video
38215
38503
  * @description The generated video with the lip sync.
38216
38504
  * @example {
38217
38505
  * "file_size": 1001342,
@@ -38220,7 +38508,7 @@ export interface MmaudioV2Output {
38220
38508
  * "url": "https://storage.googleapis.com/falserverless/model_tests/video_models/mmaudio_output.mp4"
38221
38509
  * }
38222
38510
  */
38223
- video: Components.File_1;
38511
+ video: Components.File;
38224
38512
  }
38225
38513
 
38226
38514
  export interface MixDehazeNetInput {
@@ -38243,7 +38531,13 @@ export interface MixDehazeNetInput {
38243
38531
  seed?: number;
38244
38532
  }
38245
38533
 
38246
- export interface MixDehazeNetOutput extends SharedType_e43 {}
38534
+ export interface MixDehazeNetOutput {
38535
+ /**
38536
+ * Image
38537
+ * @description The generated image file info.
38538
+ */
38539
+ image: Components.Image_2;
38540
+ }
38247
38541
 
38248
38542
  export interface MinimaxVoiceDesignInput {
38249
38543
  /**
@@ -40742,147 +41036,7 @@ export interface Ltxv13b098DistilledImageToVideoInput extends SharedType_2e6 {}
40742
41036
 
40743
41037
  export interface Ltxv13b098DistilledImageToVideoOutput extends SharedType_6711 {}
40744
41038
 
40745
- export interface Ltxv13b098DistilledExtendInput {
40746
- /**
40747
- * Aspect Ratio
40748
- * @description The aspect ratio of the video.
40749
- * @default auto
40750
- * @example auto
40751
- * @enum {string}
40752
- */
40753
- aspect_ratio?: '9:16' | '1:1' | '16:9' | 'auto';
40754
- /**
40755
- * Constant Rate Factor
40756
- * @description The constant rate factor (CRF) to compress input media with. Compressed input media more closely matches the model's training data, which can improve motion quality.
40757
- * @default 29
40758
- * @example 29
40759
- */
40760
- constant_rate_factor?: number;
40761
- /**
40762
- * Enable Detail Pass
40763
- * @description Whether to use a detail pass. If True, the model will perform a second pass to refine the video and enhance details. This incurs a 2.0x cost multiplier on the base price.
40764
- * @default false
40765
- * @example false
40766
- */
40767
- enable_detail_pass?: boolean;
40768
- /**
40769
- * Enable Safety Checker
40770
- * @description Whether to enable the safety checker.
40771
- * @default true
40772
- * @example true
40773
- */
40774
- enable_safety_checker?: boolean;
40775
- /**
40776
- * Expand Prompt
40777
- * @description Whether to expand the prompt using a language model.
40778
- * @default false
40779
- * @example false
40780
- */
40781
- expand_prompt?: boolean;
40782
- /**
40783
- * Number of Inference Steps
40784
- * @description Number of inference steps during the first pass.
40785
- * @default 8
40786
- * @example 8
40787
- */
40788
- first_pass_num_inference_steps?: number;
40789
- /**
40790
- * Frame Rate
40791
- * @description The frame rate of the video.
40792
- * @default 24
40793
- * @example 24
40794
- */
40795
- frame_rate?: number;
40796
- /**
40797
- * Loras
40798
- * @description LoRA weights to use for generation
40799
- * @default []
40800
- */
40801
- loras?: Components.LoRAWeight[];
40802
- /**
40803
- * Negative Prompt
40804
- * @description Negative prompt for generation
40805
- * @default worst quality, inconsistent motion, blurry, jittery, distorted
40806
- */
40807
- negative_prompt?: string;
40808
- /**
40809
- * Number of Frames
40810
- * @description The number of frames in the video.
40811
- * @default 121
40812
- * @example 121
40813
- */
40814
- num_frames?: number;
40815
- /**
40816
- * Prompt
40817
- * @description Text prompt to guide generation
40818
- * @example Woman walking on a street in Tokyo
40819
- */
40820
- prompt: string;
40821
- /**
40822
- * Resolution
40823
- * @description Resolution of the generated video.
40824
- * @default 720p
40825
- * @example 720p
40826
- * @enum {string}
40827
- */
40828
- resolution?: '480p' | '720p';
40829
- /**
40830
- * Reverse Video
40831
- * @description Whether to reverse the video.
40832
- * @default false
40833
- * @example false
40834
- */
40835
- reverse_video?: boolean;
40836
- /**
40837
- * Second Pass Number of Inference Steps
40838
- * @description Number of inference steps during the second pass.
40839
- * @default 8
40840
- * @example 8
40841
- */
40842
- second_pass_num_inference_steps?: number;
40843
- /**
40844
- * Second Pass Skip Initial Steps
40845
- * @description The number of inference steps to skip in the initial steps of the second pass. By skipping some steps at the beginning, the second pass can focus on smaller details instead of larger changes.
40846
- * @default 5
40847
- * @example 5
40848
- */
40849
- second_pass_skip_initial_steps?: number;
40850
- /**
40851
- * Seed
40852
- * @description Random seed for generation
40853
- */
40854
- seed?: number;
40855
- /**
40856
- * Temporal AdaIN Factor
40857
- * @description The factor for adaptive instance normalization (AdaIN) applied to generated video chunks after the first. This can help deal with a gradual increase in saturation/contrast in the generated video by normalizing the color distribution across the video. A high value will ensure the color distribution is more consistent across the video, while a low value will allow for more variation in color distribution.
40858
- * @default 0.5
40859
- * @example 0.5
40860
- */
40861
- temporal_adain_factor?: number;
40862
- /**
40863
- * Tone Map Compression Ratio
40864
- * @description The compression ratio for tone mapping. This is used to compress the dynamic range of the video to improve visual quality. A value of 0.0 means no compression, while a value of 1.0 means maximum compression.
40865
- * @default 0
40866
- * @example 0
40867
- */
40868
- tone_map_compression_ratio?: number;
40869
- /**
40870
- * @description Video to be extended.
40871
- * @example {
40872
- * "video_url": "https://storage.googleapis.com/falserverless/web-examples/wan/t2v.mp4",
40873
- * "start_frame_num": 0,
40874
- * "reverse_video": false,
40875
- * "limit_num_frames": false,
40876
- * "resample_fps": false,
40877
- * "strength": 1,
40878
- * "target_fps": 24,
40879
- * "max_num_frames": 1441,
40880
- * "conditioning_type": "rgb",
40881
- * "preprocess": false
40882
- * }
40883
- */
40884
- video: Components.ExtendVideoConditioningInput;
40885
- }
41039
+ export interface Ltxv13b098DistilledExtendInput extends SharedType_133 {}
40886
41040
 
40887
41041
  export interface Ltxv13b098DistilledExtendOutput extends SharedType_844 {}
40888
41042
 
@@ -41068,165 +41222,13 @@ export interface Ltx2VideoTrainerInput {
41068
41222
  with_audio?: boolean;
41069
41223
  }
41070
41224
 
41071
- export interface Ltx2VideoTrainerOutput extends SharedType_f7a {}
41072
-
41073
- export interface Ltx2V2vTrainerInput {
41074
- /**
41075
- * Aspect Ratio
41076
- * @description Aspect ratio to use for training.
41077
- * @default 1:1
41078
- * @example 1:1
41079
- * @enum {string}
41080
- */
41081
- aspect_ratio?: '16:9' | '1:1' | '9:16';
41082
- /**
41083
- * Auto Scale Input
41084
- * @description If true, videos will be automatically scaled to the target frame count and fps. This option has no effect on image datasets.
41085
- * @default false
41086
- * @example false
41087
- */
41088
- auto_scale_input?: boolean;
41089
- /**
41090
- * Debug Dataset
41091
- * @description When enabled, the trainer returns a downloadable archive of your preprocessed training data for manual inspection. Use this to verify that your videos, images, and captions were processed correctly before committing to a full training run.
41092
- * @default false
41093
- */
41094
- debug_dataset?: boolean;
41095
- /**
41096
- * First Frame Conditioning P
41097
- * @description Probability of conditioning on the first frame during training. Lower values work better for video-to-video transformation.
41098
- * @default 0.1
41099
- */
41100
- first_frame_conditioning_p?: number;
41101
- /**
41102
- * Frame Rate
41103
- * @description Target frames per second for the video.
41104
- * @default 25
41105
- * @example 25
41106
- */
41107
- frame_rate?: number;
41108
- /**
41109
- * Learning Rate
41110
- * @description Learning rate for optimization. Higher values can lead to faster training but may cause overfitting.
41111
- * @default 0.0002
41112
- * @example 0.0002
41113
- */
41114
- learning_rate?: number;
41115
- /**
41116
- * Number Of Frames
41117
- * @description Number of frames per training sample. Must satisfy frames % 8 == 1 (e.g., 1, 9, 17, 25, 33, 41, 49, 57, 65, 73, 81, 89, 97).
41118
- * @default 89
41119
- * @example 89
41120
- */
41121
- number_of_frames?: number;
41122
- /**
41123
- * Number Of Steps
41124
- * @description The number of training steps.
41125
- * @default 2000
41126
- * @example 2000
41127
- */
41128
- number_of_steps?: number;
41129
- /**
41130
- * Rank
41131
- * @description The rank of the LoRA adaptation. Higher values increase capacity but use more memory.
41132
- * @default 32
41133
- * @example 32
41134
- * @enum {integer}
41135
- */
41136
- rank?: 8 | 16 | 32 | 64 | 128;
41137
- /**
41138
- * Resolution
41139
- * @description Resolution to use for training. Higher resolutions require more memory.
41140
- * @default medium
41141
- * @example medium
41142
- * @enum {string}
41143
- */
41144
- resolution?: 'low' | 'medium' | 'high';
41145
- /**
41146
- * Split Input Duration Threshold
41147
- * @description The duration threshold in seconds. If a video is longer than this, it will be split into scenes.
41148
- * @default 30
41149
- * @example 30
41150
- */
41151
- split_input_duration_threshold?: number;
41152
- /**
41153
- * Split Input Into Scenes
41154
- * @description If true, videos above a certain duration threshold will be split into scenes.
41155
- * @default true
41156
- * @example true
41157
- */
41158
- split_input_into_scenes?: boolean;
41159
- /**
41160
- * Stg Scale
41161
- * @description STG (Spatio-Temporal Guidance) scale. 0.0 disables STG. Recommended value is 1.0.
41162
- * @default 1
41163
- */
41164
- stg_scale?: number;
41165
- /**
41166
- * Training Data Url
41167
- * @description URL to zip archive with videos or images. Try to use at least 10 files, although more is better.
41168
- *
41169
- * **Supported video formats:** .mp4, .mov, .avi, .mkv
41170
- * **Supported image formats:** .png, .jpg, .jpeg
41171
- *
41172
- * Note: The dataset must contain ONLY videos OR ONLY images - mixed datasets are not supported.
41173
- *
41174
- * The archive can also contain text files with captions. Each text file should have the same name as the media file it corresponds to.
41175
- */
41176
- training_data_url: string;
41177
- /**
41178
- * Trigger Phrase
41179
- * @description A phrase that will trigger the LoRA style. Will be prepended to captions during training.
41180
- * @default
41181
- * @example
41182
- */
41183
- trigger_phrase?: string;
41184
- /**
41185
- * Validation
41186
- * @description A list of validation inputs with prompts and reference videos.
41187
- * @default []
41188
- */
41189
- validation?: Components.V2VValidation[];
41190
- /**
41191
- * Validation Aspect Ratio
41192
- * @description The aspect ratio to use for validation.
41193
- * @default 1:1
41194
- * @example 1:1
41195
- * @enum {string}
41196
- */
41197
- validation_aspect_ratio?: '16:9' | '1:1' | '9:16';
41198
- /**
41199
- * Validation Frame Rate
41200
- * @description Target frames per second for validation videos.
41201
- * @default 25
41202
- * @example 25
41203
- */
41204
- validation_frame_rate?: number;
41205
- /**
41206
- * Validation Negative Prompt
41207
- * @description A negative prompt to use for validation.
41208
- * @default worst quality, inconsistent motion, blurry, jittery, distorted
41209
- */
41210
- validation_negative_prompt?: string;
41211
- /**
41212
- * Validation Number Of Frames
41213
- * @description The number of frames in validation videos.
41214
- * @default 89
41215
- * @example 89
41216
- */
41217
- validation_number_of_frames?: number;
41218
- /**
41219
- * Validation Resolution
41220
- * @description The resolution to use for validation.
41221
- * @default high
41222
- * @example high
41223
- * @enum {string}
41224
- */
41225
- validation_resolution?: 'low' | 'medium' | 'high';
41225
+ export interface Ltx2VideoTrainerOutput {
41226
+ config_file: Components.File;
41227
+ debug_dataset?: Components.File;
41228
+ lora_file: Components.File;
41229
+ video?: Components.File;
41226
41230
  }
41227
41231
 
41228
- export interface Ltx2V2vTrainerOutput extends SharedType_f7a {}
41229
-
41230
41232
  export interface LtxVideoImageToVideoInput {
41231
41233
  /**
41232
41234
  * Guidance Scale
@@ -41633,6 +41635,13 @@ export interface LtxVideoLoraMulticonditioningInput {
41633
41635
  * @enum {string}
41634
41636
  */
41635
41637
  aspect_ratio?: '16:9' | '1:1' | '9:16' | 'auto';
41638
+ /**
41639
+ * Enable Prompt Expansion
41640
+ * @description Whether to expand the prompt using the LLM.
41641
+ * @default false
41642
+ * @example false
41643
+ */
41644
+ enable_prompt_expansion?: boolean;
41636
41645
  /**
41637
41646
  * Enable Safety Checker
41638
41647
  * @description Whether to enable the safety checker.
@@ -41641,19 +41650,12 @@ export interface LtxVideoLoraMulticonditioningInput {
41641
41650
  */
41642
41651
  enable_safety_checker?: boolean;
41643
41652
  /**
41644
- * Expand Prompt
41645
- * @description Whether to expand the prompt using the LLM.
41646
- * @default false
41647
- * @example false
41648
- */
41649
- expand_prompt?: boolean;
41650
- /**
41651
- * Frame Rate
41653
+ * Frames Per Second
41652
41654
  * @description The frame rate of the video.
41653
41655
  * @default 25
41654
41656
  * @example 25
41655
41657
  */
41656
- frame_rate?: number;
41658
+ frames_per_second?: number;
41657
41659
  /**
41658
41660
  * Images
41659
41661
  * @description The image conditions to use for generation.
@@ -41684,6 +41686,13 @@ export interface LtxVideoLoraMulticonditioningInput {
41684
41686
  * @default blurry, low quality, low resolution, inconsistent motion, jittery, distorted
41685
41687
  */
41686
41688
  negative_prompt?: string;
41689
+ /**
41690
+ * Num Inference Steps
41691
+ * @description The number of inference steps to use.
41692
+ * @default 30
41693
+ * @example 30
41694
+ */
41695
+ num_inference_steps?: number;
41687
41696
  /**
41688
41697
  * Number Of Frames
41689
41698
  * @description The number of frames in the video.
@@ -41691,13 +41700,6 @@ export interface LtxVideoLoraMulticonditioningInput {
41691
41700
  * @example 89
41692
41701
  */
41693
41702
  number_of_frames?: number;
41694
- /**
41695
- * Number Of Steps
41696
- * @description The number of inference steps to use.
41697
- * @default 30
41698
- * @example 30
41699
- */
41700
- number_of_steps?: number;
41701
41703
  /**
41702
41704
  * Prompt
41703
41705
  * @description The prompt to generate the video from.
@@ -41762,6 +41764,13 @@ export interface LtxVideoLoraImageToVideoInput {
41762
41764
  * @enum {string}
41763
41765
  */
41764
41766
  aspect_ratio?: '16:9' | '1:1' | '9:16' | 'auto';
41767
+ /**
41768
+ * Enable Prompt Expansion
41769
+ * @description Whether to expand the prompt using the LLM.
41770
+ * @default false
41771
+ * @example false
41772
+ */
41773
+ enable_prompt_expansion?: boolean;
41765
41774
  /**
41766
41775
  * Enable Safety Checker
41767
41776
  * @description Whether to enable the safety checker.
@@ -41770,19 +41779,12 @@ export interface LtxVideoLoraImageToVideoInput {
41770
41779
  */
41771
41780
  enable_safety_checker?: boolean;
41772
41781
  /**
41773
- * Expand Prompt
41774
- * @description Whether to expand the prompt using the LLM.
41775
- * @default false
41776
- * @example false
41777
- */
41778
- expand_prompt?: boolean;
41779
- /**
41780
- * Frame Rate
41782
+ * Frames Per Second
41781
41783
  * @description The frame rate of the video.
41782
41784
  * @default 25
41783
41785
  * @example 25
41784
41786
  */
41785
- frame_rate?: number;
41787
+ frames_per_second?: number;
41786
41788
  /**
41787
41789
  * Image Url
41788
41790
  * @description The URL of the image to use as input.
@@ -41801,6 +41803,13 @@ export interface LtxVideoLoraImageToVideoInput {
41801
41803
  * @default blurry, low quality, low resolution, inconsistent motion, jittery, distorted
41802
41804
  */
41803
41805
  negative_prompt?: string;
41806
+ /**
41807
+ * Num Inference Steps
41808
+ * @description The number of inference steps to use.
41809
+ * @default 30
41810
+ * @example 30
41811
+ */
41812
+ num_inference_steps?: number;
41804
41813
  /**
41805
41814
  * Number Of Frames
41806
41815
  * @description The number of frames in the video.
@@ -41808,13 +41817,6 @@ export interface LtxVideoLoraImageToVideoInput {
41808
41817
  * @example 89
41809
41818
  */
41810
41819
  number_of_frames?: number;
41811
- /**
41812
- * Number Of Steps
41813
- * @description The number of inference steps to use.
41814
- * @default 30
41815
- * @example 30
41816
- */
41817
- number_of_steps?: number;
41818
41820
  /**
41819
41821
  * Prompt
41820
41822
  * @description The prompt to generate the video from.
@@ -41872,147 +41874,7 @@ export interface LtxVideo13bDistilledImageToVideoInput extends SharedType_2e6 {}
41872
41874
 
41873
41875
  export interface LtxVideo13bDistilledImageToVideoOutput extends SharedType_6711 {}
41874
41876
 
41875
- export interface LtxVideo13bDistilledExtendInput {
41876
- /**
41877
- * Aspect Ratio
41878
- * @description The aspect ratio of the video.
41879
- * @default auto
41880
- * @example auto
41881
- * @enum {string}
41882
- */
41883
- aspect_ratio?: '9:16' | '1:1' | '16:9' | 'auto';
41884
- /**
41885
- * Constant Rate Factor
41886
- * @description The constant rate factor (CRF) to compress input media with. Compressed input media more closely matches the model's training data, which can improve motion quality.
41887
- * @default 29
41888
- * @example 29
41889
- */
41890
- constant_rate_factor?: number;
41891
- /**
41892
- * Enable Detail Pass
41893
- * @description Whether to use a detail pass. If True, the model will perform a second pass to refine the video and enhance details. This incurs a 2.0x cost multiplier on the base price.
41894
- * @default false
41895
- * @example false
41896
- */
41897
- enable_detail_pass?: boolean;
41898
- /**
41899
- * Enable Safety Checker
41900
- * @description Whether to enable the safety checker.
41901
- * @default true
41902
- * @example true
41903
- */
41904
- enable_safety_checker?: boolean;
41905
- /**
41906
- * Expand Prompt
41907
- * @description Whether to expand the prompt using a language model.
41908
- * @default false
41909
- * @example false
41910
- */
41911
- expand_prompt?: boolean;
41912
- /**
41913
- * Number of Inference Steps
41914
- * @description Number of inference steps during the first pass.
41915
- * @default 8
41916
- * @example 8
41917
- */
41918
- first_pass_num_inference_steps?: number;
41919
- /**
41920
- * Frame Rate
41921
- * @description The frame rate of the video.
41922
- * @default 24
41923
- * @example 24
41924
- */
41925
- frame_rate?: number;
41926
- /**
41927
- * Loras
41928
- * @description LoRA weights to use for generation
41929
- * @default []
41930
- */
41931
- loras?: Components.LoRAWeight[];
41932
- /**
41933
- * Negative Prompt
41934
- * @description Negative prompt for generation
41935
- * @default worst quality, inconsistent motion, blurry, jittery, distorted
41936
- */
41937
- negative_prompt?: string;
41938
- /**
41939
- * Number of Frames
41940
- * @description The number of frames in the video.
41941
- * @default 121
41942
- * @example 121
41943
- */
41944
- num_frames?: number;
41945
- /**
41946
- * Prompt
41947
- * @description Text prompt to guide generation
41948
- * @example Woman walking on a street in Tokyo
41949
- */
41950
- prompt: string;
41951
- /**
41952
- * Resolution
41953
- * @description Resolution of the generated video.
41954
- * @default 720p
41955
- * @example 720p
41956
- * @enum {string}
41957
- */
41958
- resolution?: '480p' | '720p';
41959
- /**
41960
- * Reverse Video
41961
- * @description Whether to reverse the video.
41962
- * @default false
41963
- * @example false
41964
- */
41965
- reverse_video?: boolean;
41966
- /**
41967
- * Second Pass Number of Inference Steps
41968
- * @description Number of inference steps during the second pass.
41969
- * @default 8
41970
- * @example 8
41971
- */
41972
- second_pass_num_inference_steps?: number;
41973
- /**
41974
- * Second Pass Skip Initial Steps
41975
- * @description The number of inference steps to skip in the initial steps of the second pass. By skipping some steps at the beginning, the second pass can focus on smaller details instead of larger changes.
41976
- * @default 5
41977
- * @example 5
41978
- */
41979
- second_pass_skip_initial_steps?: number;
41980
- /**
41981
- * Seed
41982
- * @description Random seed for generation
41983
- */
41984
- seed?: number;
41985
- /**
41986
- * Temporal AdaIN Factor
41987
- * @description The factor for adaptive instance normalization (AdaIN) applied to generated video chunks after the first. This can help deal with a gradual increase in saturation/contrast in the generated video by normalizing the color distribution across the video. A high value will ensure the color distribution is more consistent across the video, while a low value will allow for more variation in color distribution.
41988
- * @default 0.5
41989
- * @example 0.5
41990
- */
41991
- temporal_adain_factor?: number;
41992
- /**
41993
- * Tone Map Compression Ratio
41994
- * @description The compression ratio for tone mapping. This is used to compress the dynamic range of the video to improve visual quality. A value of 0.0 means no compression, while a value of 1.0 means maximum compression.
41995
- * @default 0
41996
- * @example 0
41997
- */
41998
- tone_map_compression_ratio?: number;
41999
- /**
42000
- * @description Video to be extended.
42001
- * @example {
42002
- * "video_url": "https://storage.googleapis.com/falserverless/web-examples/wan/t2v.mp4",
42003
- * "reverse_video": false,
42004
- * "start_frame_num": 0,
42005
- * "limit_num_frames": false,
42006
- * "resample_fps": false,
42007
- * "strength": 1,
42008
- * "target_fps": 24,
42009
- * "max_num_frames": 1441,
42010
- * "conditioning_type": "rgb",
42011
- * "preprocess": false
42012
- * }
42013
- */
42014
- video: Components.ExtendVideoConditioningInput;
42015
- }
41877
+ export interface LtxVideo13bDistilledExtendInput extends SharedType_133 {}
42016
41878
 
42017
41879
  export interface LtxVideo13bDistilledExtendOutput extends SharedType_844 {}
42018
41880
 
@@ -43490,7 +43352,7 @@ export interface Ltx219bVideoToVideoLoraInput {
43490
43352
  video_write_mode?: 'fast' | 'balanced' | 'small';
43491
43353
  }
43492
43354
 
43493
- export interface Ltx219bVideoToVideoLoraOutput extends SharedType_3b0 {}
43355
+ export interface Ltx219bVideoToVideoLoraOutput extends SharedType_1f5 {}
43494
43356
 
43495
43357
  export interface Ltx219bVideoToVideoInput {
43496
43358
  /**
@@ -43712,7 +43574,7 @@ export interface Ltx219bVideoToVideoInput {
43712
43574
  video_write_mode?: 'fast' | 'balanced' | 'small';
43713
43575
  }
43714
43576
 
43715
- export interface Ltx219bVideoToVideoOutput extends SharedType_3b0 {}
43577
+ export interface Ltx219bVideoToVideoOutput extends SharedType_1f5 {}
43716
43578
 
43717
43579
  export interface Ltx219bTextToVideoLoraInput {
43718
43580
  /**
@@ -43859,7 +43721,7 @@ export interface Ltx219bTextToVideoLoraInput {
43859
43721
  video_write_mode?: 'fast' | 'balanced' | 'small';
43860
43722
  }
43861
43723
 
43862
- export interface Ltx219bTextToVideoLoraOutput extends SharedType_857 {}
43724
+ export interface Ltx219bTextToVideoLoraOutput extends SharedType_6ef {}
43863
43725
 
43864
43726
  export interface Ltx219bTextToVideoInput {
43865
43727
  /**
@@ -44001,7 +43863,7 @@ export interface Ltx219bTextToVideoInput {
44001
43863
  video_write_mode?: 'fast' | 'balanced' | 'small';
44002
43864
  }
44003
43865
 
44004
- export interface Ltx219bTextToVideoOutput extends SharedType_857 {}
43866
+ export interface Ltx219bTextToVideoOutput extends SharedType_6ef {}
44005
43867
 
44006
43868
  export interface Ltx219bImageToVideoLoraInput {
44007
43869
  /**
@@ -44179,7 +44041,7 @@ export interface Ltx219bImageToVideoLoraInput {
44179
44041
  video_write_mode?: 'fast' | 'balanced' | 'small';
44180
44042
  }
44181
44043
 
44182
- export interface Ltx219bImageToVideoLoraOutput extends SharedType_369 {}
44044
+ export interface Ltx219bImageToVideoLoraOutput extends SharedType_437 {}
44183
44045
 
44184
44046
  export interface Ltx219bImageToVideoInput {
44185
44047
  /**
@@ -44352,7 +44214,7 @@ export interface Ltx219bImageToVideoInput {
44352
44214
  video_write_mode?: 'fast' | 'balanced' | 'small';
44353
44215
  }
44354
44216
 
44355
- export interface Ltx219bImageToVideoOutput extends SharedType_369 {}
44217
+ export interface Ltx219bImageToVideoOutput extends SharedType_437 {}
44356
44218
 
44357
44219
  export interface Ltx219bExtendVideoLoraInput {
44358
44220
  /**
@@ -44548,7 +44410,7 @@ export interface Ltx219bExtendVideoLoraInput {
44548
44410
  video_write_mode?: 'fast' | 'balanced' | 'small';
44549
44411
  }
44550
44412
 
44551
- export interface Ltx219bExtendVideoLoraOutput extends SharedType_cb6 {}
44413
+ export interface Ltx219bExtendVideoLoraOutput extends SharedType_5db {}
44552
44414
 
44553
44415
  export interface Ltx219bExtendVideoInput {
44554
44416
  /**
@@ -44739,7 +44601,7 @@ export interface Ltx219bExtendVideoInput {
44739
44601
  video_write_mode?: 'fast' | 'balanced' | 'small';
44740
44602
  }
44741
44603
 
44742
- export interface Ltx219bExtendVideoOutput extends SharedType_cb6 {}
44604
+ export interface Ltx219bExtendVideoOutput extends SharedType_5db {}
44743
44605
 
44744
44606
  export interface Ltx219bDistilledVideoToVideoLoraInput {
44745
44607
  /**
@@ -44954,7 +44816,7 @@ export interface Ltx219bDistilledVideoToVideoLoraInput {
44954
44816
  video_write_mode?: 'fast' | 'balanced' | 'small';
44955
44817
  }
44956
44818
 
44957
- export interface Ltx219bDistilledVideoToVideoLoraOutput extends SharedType_3b0 {}
44819
+ export interface Ltx219bDistilledVideoToVideoLoraOutput extends SharedType_1f5 {}
44958
44820
 
44959
44821
  export interface Ltx219bDistilledVideoToVideoInput {
44960
44822
  /**
@@ -45164,7 +45026,7 @@ export interface Ltx219bDistilledVideoToVideoInput {
45164
45026
  video_write_mode?: 'fast' | 'balanced' | 'small';
45165
45027
  }
45166
45028
 
45167
- export interface Ltx219bDistilledVideoToVideoOutput extends SharedType_3b0 {}
45029
+ export interface Ltx219bDistilledVideoToVideoOutput extends SharedType_1f5 {}
45168
45030
 
45169
45031
  export interface Ltx219bDistilledTextToVideoLoraInput {
45170
45032
  /**
@@ -45299,7 +45161,7 @@ export interface Ltx219bDistilledTextToVideoLoraInput {
45299
45161
  video_write_mode?: 'fast' | 'balanced' | 'small';
45300
45162
  }
45301
45163
 
45302
- export interface Ltx219bDistilledTextToVideoLoraOutput extends SharedType_857 {}
45164
+ export interface Ltx219bDistilledTextToVideoLoraOutput extends SharedType_6ef {}
45303
45165
 
45304
45166
  export interface Ltx219bDistilledTextToVideoInput {
45305
45167
  /**
@@ -45429,7 +45291,7 @@ export interface Ltx219bDistilledTextToVideoInput {
45429
45291
  video_write_mode?: 'fast' | 'balanced' | 'small';
45430
45292
  }
45431
45293
 
45432
- export interface Ltx219bDistilledTextToVideoOutput extends SharedType_857 {}
45294
+ export interface Ltx219bDistilledTextToVideoOutput extends SharedType_6ef {}
45433
45295
 
45434
45296
  export interface Ltx219bDistilledImageToVideoLoraInput {
45435
45297
  /**
@@ -45595,7 +45457,7 @@ export interface Ltx219bDistilledImageToVideoLoraInput {
45595
45457
  video_write_mode?: 'fast' | 'balanced' | 'small';
45596
45458
  }
45597
45459
 
45598
- export interface Ltx219bDistilledImageToVideoLoraOutput extends SharedType_369 {}
45460
+ export interface Ltx219bDistilledImageToVideoLoraOutput extends SharedType_437 {}
45599
45461
 
45600
45462
  export interface Ltx219bDistilledImageToVideoInput {
45601
45463
  /**
@@ -45756,7 +45618,7 @@ export interface Ltx219bDistilledImageToVideoInput {
45756
45618
  video_write_mode?: 'fast' | 'balanced' | 'small';
45757
45619
  }
45758
45620
 
45759
- export interface Ltx219bDistilledImageToVideoOutput extends SharedType_369 {}
45621
+ export interface Ltx219bDistilledImageToVideoOutput extends SharedType_437 {}
45760
45622
 
45761
45623
  export interface Ltx219bDistilledExtendVideoLoraInput {
45762
45624
  /**
@@ -45940,7 +45802,7 @@ export interface Ltx219bDistilledExtendVideoLoraInput {
45940
45802
  video_write_mode?: 'fast' | 'balanced' | 'small';
45941
45803
  }
45942
45804
 
45943
- export interface Ltx219bDistilledExtendVideoLoraOutput extends SharedType_cb6 {}
45805
+ export interface Ltx219bDistilledExtendVideoLoraOutput extends SharedType_5db {}
45944
45806
 
45945
45807
  export interface Ltx219bDistilledExtendVideoInput {
45946
45808
  /**
@@ -46119,7 +45981,7 @@ export interface Ltx219bDistilledExtendVideoInput {
46119
45981
  video_write_mode?: 'fast' | 'balanced' | 'small';
46120
45982
  }
46121
45983
 
46122
- export interface Ltx219bDistilledExtendVideoOutput extends SharedType_cb6 {}
45984
+ export interface Ltx219bDistilledExtendVideoOutput extends SharedType_5db {}
46123
45985
 
46124
45986
  export interface Ltx219bDistilledAudioToVideoLoraInput {
46125
45987
  /**
@@ -49324,9 +49186,9 @@ export interface LightxRelightInput {
49324
49186
  /**
49325
49187
  * @description Relighting parameters (required for relight_condition_type='ic'). Not used for 'bg' (which expects a background image URL instead).
49326
49188
  * @example {
49327
- * "use_sky_mask": false,
49328
- * "bg_source": "Right",
49329
49189
  * "relight_prompt": "Sunlight",
49190
+ * "bg_source": "Right",
49191
+ * "use_sky_mask": false,
49330
49192
  * "cfg": 2
49331
49193
  * }
49332
49194
  */
@@ -50906,11 +50768,6 @@ export interface KlingVideoV3StandardTextToVideoInput {
50906
50768
  * @enum {string}
50907
50769
  */
50908
50770
  shot_type?: 'customize' | 'intelligent';
50909
- /**
50910
- * Voice Ids
50911
- * @description Optional Voice IDs for video generation. Reference voices in your prompt with <<<voice_1>>> and <<<voice_2>>> (maximum 2 voices per task). Get voice IDs from the kling video create-voice endpoint: https://fal.ai/models/fal-ai/kling-video/create-voice
50912
- */
50913
- voice_ids?: string[];
50914
50771
  }
50915
50772
 
50916
50773
  export interface KlingVideoV3StandardTextToVideoOutput {
@@ -51048,11 +50905,6 @@ export interface KlingVideoV3StandardImageToVideoInput {
51048
50905
  * @example https://storage.googleapis.com/falserverless/example_inputs/kling-v3/standard-i2v/start_image.png
51049
50906
  */
51050
50907
  start_image_url: string;
51051
- /**
51052
- * Voice Ids
51053
- * @description Optional Voice IDs for video generation. Reference voices in your prompt with <<<voice_1>>> and <<<voice_2>>> (maximum 2 voices per task). Get voice IDs from the kling video create-voice endpoint: https://fal.ai/models/fal-ai/kling-video/create-voice
51054
- */
51055
- voice_ids?: string[];
51056
50908
  }
51057
50909
 
51058
50910
  export interface KlingVideoV3StandardImageToVideoOutput {
@@ -51120,11 +50972,6 @@ export interface KlingVideoV3ProTextToVideoInput {
51120
50972
  * @enum {string}
51121
50973
  */
51122
50974
  shot_type?: 'customize' | 'intelligent';
51123
- /**
51124
- * Voice Ids
51125
- * @description Optional Voice IDs for video generation. Reference voices in your prompt with <<<voice_1>>> and <<<voice_2>>> (maximum 2 voices per task). Get voice IDs from the kling video create-voice endpoint: https://fal.ai/models/fal-ai/kling-video/create-voice
51126
- */
51127
- voice_ids?: string[];
51128
50975
  }
51129
50976
 
51130
50977
  export interface KlingVideoV3ProTextToVideoOutput {
@@ -51262,11 +51109,6 @@ export interface KlingVideoV3ProImageToVideoInput {
51262
51109
  * @example https://storage.googleapis.com/falserverless/example_inputs/kling-v3/pro-i2v/start_image.png
51263
51110
  */
51264
51111
  start_image_url: string;
51265
- /**
51266
- * Voice Ids
51267
- * @description Optional Voice IDs for video generation. Reference voices in your prompt with <<<voice_1>>> and <<<voice_2>>> (maximum 2 voices per task). Get voice IDs from the kling video create-voice endpoint: https://fal.ai/models/fal-ai/kling-video/create-voice
51268
- */
51269
- voice_ids?: string[];
51270
51112
  }
51271
51113
 
51272
51114
  export interface KlingVideoV3ProImageToVideoOutput {
@@ -52292,7 +52134,7 @@ export interface KlingVideoO3StandardVideoToVideoEditOutput {
52292
52134
  video: Components.File;
52293
52135
  }
52294
52136
 
52295
- export interface KlingVideoO3StandardTextToVideoInput extends SharedType_89f {}
52137
+ export interface KlingVideoO3StandardTextToVideoInput extends SharedType_25d {}
52296
52138
 
52297
52139
  export interface KlingVideoO3StandardTextToVideoOutput extends SharedType_723 {}
52298
52140
 
@@ -52534,7 +52376,7 @@ export interface KlingVideoO3ProVideoToVideoEditOutput {
52534
52376
  video: Components.File;
52535
52377
  }
52536
52378
 
52537
- export interface KlingVideoO3ProTextToVideoInput extends SharedType_89f {}
52379
+ export interface KlingVideoO3ProTextToVideoInput extends SharedType_25d {}
52538
52380
 
52539
52381
  export interface KlingVideoO3ProTextToVideoOutput extends SharedType_723 {}
52540
52382
 
@@ -53532,6 +53374,152 @@ export interface IpAdapterFaceIdInput {
53532
53374
 
53533
53375
  export interface IpAdapterFaceIdOutput extends SharedType_678 {}
53534
53376
 
53377
+ export interface InworldTtsInput {
53378
+ /**
53379
+ * Sample Rate Hertz
53380
+ * @description The sample rate in Hz for the output audio.
53381
+ * @default 48000
53382
+ * @enum {integer}
53383
+ */
53384
+ sample_rate_hertz?: 8000 | 16000 | 24000 | 32000 | 40000 | 48000;
53385
+ /**
53386
+ * Text
53387
+ * @description The text to synthesize into speech.
53388
+ * @example Hello! This is a demo of Inworld's TTS.
53389
+ */
53390
+ text: string;
53391
+ /**
53392
+ * Voice
53393
+ * @description The voice to use for synthesis.
53394
+ * @default Craig (en)
53395
+ * @enum {string}
53396
+ */
53397
+ voice?:
53398
+ | 'Loretta (en)'
53399
+ | 'Darlene (en)'
53400
+ | 'Marlene (en)'
53401
+ | 'Hank (en)'
53402
+ | 'Evelyn (en)'
53403
+ | 'Celeste (en)'
53404
+ | 'Pippa (en)'
53405
+ | 'Tessa (en)'
53406
+ | 'Liam (en)'
53407
+ | 'Callum (en)'
53408
+ | 'Hamish (en)'
53409
+ | 'Abby (en)'
53410
+ | 'Graham (en)'
53411
+ | 'Rupert (en)'
53412
+ | 'Mortimer (en)'
53413
+ | 'Snik (en)'
53414
+ | 'Anjali (en)'
53415
+ | 'Saanvi (en)'
53416
+ | 'Arjun (en)'
53417
+ | 'Claire (en)'
53418
+ | 'Oliver (en)'
53419
+ | 'Simon (en)'
53420
+ | 'Elliot (en)'
53421
+ | 'James (en)'
53422
+ | 'Serena (en)'
53423
+ | 'Gareth (en)'
53424
+ | 'Vinny (en)'
53425
+ | 'Lauren (en)'
53426
+ | 'Jessica (en)'
53427
+ | 'Ethan (en)'
53428
+ | 'Tyler (en)'
53429
+ | 'Jason (en)'
53430
+ | 'Chloe (en)'
53431
+ | 'Veronica (en)'
53432
+ | 'Victoria (en)'
53433
+ | 'Miranda (en)'
53434
+ | 'Sebastian (en)'
53435
+ | 'Victor (en)'
53436
+ | 'Malcolm (en)'
53437
+ | 'Kayla (en)'
53438
+ | 'Nate (en)'
53439
+ | 'Jake (en)'
53440
+ | 'Brian (en)'
53441
+ | 'Amina (en)'
53442
+ | 'Kelsey (en)'
53443
+ | 'Derek (en)'
53444
+ | 'Grant (en)'
53445
+ | 'Evan (en)'
53446
+ | 'Alex (en)'
53447
+ | 'Ashley (en)'
53448
+ | 'Craig (en)'
53449
+ | 'Deborah (en)'
53450
+ | 'Dennis (en)'
53451
+ | 'Edward (en)'
53452
+ | 'Elizabeth (en)'
53453
+ | 'Hades (en)'
53454
+ | 'Julia (en)'
53455
+ | 'Pixie (en)'
53456
+ | 'Mark (en)'
53457
+ | 'Olivia (en)'
53458
+ | 'Priya (en)'
53459
+ | 'Ronald (en)'
53460
+ | 'Sarah (en)'
53461
+ | 'Shaun (en)'
53462
+ | 'Theodore (en)'
53463
+ | 'Timothy (en)'
53464
+ | 'Wendy (en)'
53465
+ | 'Dominus (en)'
53466
+ | 'Hana (en)'
53467
+ | 'Clive (en)'
53468
+ | 'Carter (en)'
53469
+ | 'Blake (en)'
53470
+ | 'Luna (en)'
53471
+ | 'Yichen (zh)'
53472
+ | 'Xiaoyin (zh)'
53473
+ | 'Xinyi (zh)'
53474
+ | 'Jing (zh)'
53475
+ | 'Erik (nl)'
53476
+ | 'Katrien (nl)'
53477
+ | 'Lennart (nl)'
53478
+ | 'Lore (nl)'
53479
+ | 'Alain (fr)'
53480
+ | 'Hélène (fr)'
53481
+ | 'Mathieu (fr)'
53482
+ | 'Étienne (fr)'
53483
+ | 'Johanna (de)'
53484
+ | 'Josef (de)'
53485
+ | 'Gianni (it)'
53486
+ | 'Orietta (it)'
53487
+ | 'Asuka (ja)'
53488
+ | 'Satoshi (ja)'
53489
+ | 'Hyunwoo (ko)'
53490
+ | 'Minji (ko)'
53491
+ | 'Seojun (ko)'
53492
+ | 'Yoona (ko)'
53493
+ | 'Szymon (pl)'
53494
+ | 'Wojciech (pl)'
53495
+ | 'Heitor (pt)'
53496
+ | 'Maitê (pt)'
53497
+ | 'Diego (es)'
53498
+ | 'Lupita (es)'
53499
+ | 'Miguel (es)'
53500
+ | 'Rafael (es)'
53501
+ | 'Svetlana (ru)'
53502
+ | 'Elena (ru)'
53503
+ | 'Dmitry (ru)'
53504
+ | 'Nikolai (ru)'
53505
+ | 'Riya (hi)'
53506
+ | 'Manoj (hi)'
53507
+ | 'Yael (he)'
53508
+ | 'Oren (he)'
53509
+ | 'Nour (ar)'
53510
+ | 'Omar (ar)';
53511
+ }
53512
+
53513
+ export interface InworldTtsOutput {
53514
+ /**
53515
+ * @description Generated audio file.
53516
+ * @example {
53517
+ * "url": "https://v3b.fal.media/files/b/0a920730/38aud4s6sF7bOWFoQHaJk_tmpvv2htrpc.wav"
53518
+ * }
53519
+ */
53520
+ audio: Components.File;
53521
+ }
53522
+
53535
53523
  export interface InvisibleWatermarkInput {
53536
53524
  /**
53537
53525
  * Decode
@@ -58548,17 +58536,17 @@ export interface Hunyuan3dV3TextTo3dOutput {
58548
58536
  /**
58549
58537
  * @description URLs for different 3D model formats
58550
58538
  * @example {
58551
- * "obj": {
58552
- * "file_size": 44084728,
58553
- * "file_name": "model.obj",
58554
- * "content_type": "text/plain",
58555
- * "url": "https://v3b.fal.media/files/b/0a8686a8/AVgdsVFrGAKGAFr4e2g56_model.obj"
58556
- * },
58557
58539
  * "glb": {
58558
58540
  * "file_size": 64724836,
58559
58541
  * "file_name": "model.glb",
58560
58542
  * "content_type": "model/gltf-binary",
58561
58543
  * "url": "https://v3b.fal.media/files/b/0a8686a8/1hPquv3AqqkfnqSM9fpmB_model.glb"
58544
+ * },
58545
+ * "obj": {
58546
+ * "file_size": 44084728,
58547
+ * "file_name": "model.obj",
58548
+ * "content_type": "text/plain",
58549
+ * "url": "https://v3b.fal.media/files/b/0a8686a8/AVgdsVFrGAKGAFr4e2g56_model.obj"
58562
58550
  * }
58563
58551
  * }
58564
58552
  */
@@ -58717,17 +58705,17 @@ export interface Hunyuan3dV3ImageTo3dOutput {
58717
58705
  /**
58718
58706
  * @description URLs for different 3D model formats
58719
58707
  * @example {
58720
- * "obj": {
58721
- * "file_size": 42886419,
58722
- * "file_name": "model.obj",
58723
- * "content_type": "text/plain",
58724
- * "url": "https://v3b.fal.media/files/b/0a8686ad/ifdJskhUfQysq-NN20iQR_model.obj"
58725
- * },
58726
58708
  * "glb": {
58727
58709
  * "file_size": 64122888,
58728
58710
  * "file_name": "model.glb",
58729
58711
  * "content_type": "model/gltf-binary",
58730
58712
  * "url": "https://v3b.fal.media/files/b/0a8686ae/MQN_KtP32PbqtPr_VLcyp_model.glb"
58713
+ * },
58714
+ * "obj": {
58715
+ * "file_size": 42886419,
58716
+ * "file_name": "model.obj",
58717
+ * "content_type": "text/plain",
58718
+ * "url": "https://v3b.fal.media/files/b/0a8686ad/ifdJskhUfQysq-NN20iQR_model.obj"
58731
58719
  * }
58732
58720
  * }
58733
58721
  */
@@ -60483,8 +60471,8 @@ export interface Hunyuan_worldOutput {
60483
60471
  /**
60484
60472
  * @description The generated panorama image.
60485
60473
  * @example {
60486
- * "file_size": 2738127,
60487
60474
  * "height": 960,
60475
+ * "file_size": 2738127,
60488
60476
  * "file_name": "5db7925423b44f2a98098cd8f7cad7ec.png",
60489
60477
  * "content_type": "image/png",
60490
60478
  * "url": "https://v3.fal.media/files/kangaroo/P2AmXuLlyDIsivqjV_rAr_5db7925423b44f2a98098cd8f7cad7ec.png",
@@ -62139,6 +62127,204 @@ export interface GenfocusInput {
62139
62127
 
62140
62128
  export interface GenfocusOutput extends SharedType_951 {}
62141
62129
 
62130
+ export interface GeminiTtsInput {
62131
+ /**
62132
+ * Language Code
62133
+ * @description Language for multilingual synthesis. When set, steers the model to speak in the specified language. Supports 24 GA languages and 60+ Preview languages. If not set, the model auto-detects the language from the text.
62134
+ * @example English (US)
62135
+ * @example French (France)
62136
+ * @example Japanese (Japan)
62137
+ */
62138
+ language_code?:
62139
+ | 'Arabic (Egypt)'
62140
+ | 'Bangla (Bangladesh)'
62141
+ | 'Dutch (Netherlands)'
62142
+ | 'English (India)'
62143
+ | 'English (US)'
62144
+ | 'French (France)'
62145
+ | 'German (Germany)'
62146
+ | 'Hindi (India)'
62147
+ | 'Indonesian (Indonesia)'
62148
+ | 'Italian (Italy)'
62149
+ | 'Japanese (Japan)'
62150
+ | 'Korean (South Korea)'
62151
+ | 'Marathi (India)'
62152
+ | 'Polish (Poland)'
62153
+ | 'Portuguese (Brazil)'
62154
+ | 'Romanian (Romania)'
62155
+ | 'Russian (Russia)'
62156
+ | 'Spanish (Spain)'
62157
+ | 'Tamil (India)'
62158
+ | 'Telugu (India)'
62159
+ | 'Thai (Thailand)'
62160
+ | 'Turkish (Turkey)'
62161
+ | 'Ukrainian (Ukraine)'
62162
+ | 'Vietnamese (Vietnam)'
62163
+ | 'Afrikaans (South Africa)'
62164
+ | 'Albanian (Albania)'
62165
+ | 'Amharic (Ethiopia)'
62166
+ | 'Arabic (World)'
62167
+ | 'Armenian (Armenia)'
62168
+ | 'Azerbaijani (Azerbaijan)'
62169
+ | 'Basque (Spain)'
62170
+ | 'Belarusian (Belarus)'
62171
+ | 'Bulgarian (Bulgaria)'
62172
+ | 'Burmese (Myanmar)'
62173
+ | 'Catalan (Spain)'
62174
+ | 'Cebuano (Philippines)'
62175
+ | 'Chinese Mandarin (China)'
62176
+ | 'Chinese Mandarin (Taiwan)'
62177
+ | 'Croatian (Croatia)'
62178
+ | 'Czech (Czech Republic)'
62179
+ | 'Danish (Denmark)'
62180
+ | 'English (Australia)'
62181
+ | 'English (UK)'
62182
+ | 'Estonian (Estonia)'
62183
+ | 'Filipino (Philippines)'
62184
+ | 'Finnish (Finland)'
62185
+ | 'French (Canada)'
62186
+ | 'Galician (Spain)'
62187
+ | 'Georgian (Georgia)'
62188
+ | 'Greek (Greece)'
62189
+ | 'Gujarati (India)'
62190
+ | 'Haitian Creole (Haiti)'
62191
+ | 'Hebrew (Israel)'
62192
+ | 'Hungarian (Hungary)'
62193
+ | 'Icelandic (Iceland)'
62194
+ | 'Javanese (Java)'
62195
+ | 'Kannada (India)'
62196
+ | 'Konkani (India)'
62197
+ | 'Lao (Laos)'
62198
+ | 'Latin (Vatican City)'
62199
+ | 'Latvian (Latvia)'
62200
+ | 'Lithuanian (Lithuania)'
62201
+ | 'Luxembourgish (Luxembourg)'
62202
+ | 'Macedonian (North Macedonia)'
62203
+ | 'Maithili (India)'
62204
+ | 'Malagasy (Madagascar)'
62205
+ | 'Malay (Malaysia)'
62206
+ | 'Malayalam (India)'
62207
+ | 'Mongolian (Mongolia)'
62208
+ | 'Nepali (Nepal)'
62209
+ | 'Norwegian Bokmal (Norway)'
62210
+ | 'Norwegian Nynorsk (Norway)'
62211
+ | 'Odia (India)'
62212
+ | 'Pashto (Afghanistan)'
62213
+ | 'Persian (Iran)'
62214
+ | 'Portuguese (Portugal)'
62215
+ | 'Punjabi (India)'
62216
+ | 'Serbian (Serbia)'
62217
+ | 'Sindhi (India)'
62218
+ | 'Sinhala (Sri Lanka)'
62219
+ | 'Slovak (Slovakia)'
62220
+ | 'Slovenian (Slovenia)'
62221
+ | 'Spanish (Latin America)'
62222
+ | 'Spanish (Mexico)'
62223
+ | 'Swahili (Kenya)'
62224
+ | 'Swedish (Sweden)'
62225
+ | 'Urdu (Pakistan)';
62226
+ /**
62227
+ * Model
62228
+ * @description Which Gemini TTS model to use. gemini-2.5-flash-tts: low latency, cost-efficient for everyday applications (recommended). gemini-2.5-pro-tts: highest quality, best for structured workflows like podcasts, audiobooks, and customer support.
62229
+ * @default gemini-2.5-flash-tts
62230
+ * @enum {string}
62231
+ */
62232
+ model?: 'gemini-2.5-flash-tts' | 'gemini-2.5-pro-tts';
62233
+ /**
62234
+ * Output Format
62235
+ * @description Audio output format. mp3: compressed, small file size (recommended). wav: uncompressed PCM wrapped in WAV (24 kHz, 16-bit mono). ogg_opus: Ogg container with Opus codec, good quality-to-size ratio.
62236
+ * @default mp3
62237
+ * @enum {string}
62238
+ */
62239
+ output_format?: 'wav' | 'mp3' | 'ogg_opus';
62240
+ /**
62241
+ * Prompt
62242
+ * @description The text to convert to speech. Gemini TTS supports natural-language prompting for style, pace, accent, and emotional expression — include delivery instructions inline with the text (e.g. 'Say cheerfully: Have a wonderful day!'). For multi-speaker synthesis, prefix lines with speaker aliases defined in the speakers field (e.g. 'Alice: Hello!\nBob: Hi!'). Supports inline pace/style markers like [slowly], [whispering], [excited], [extremely fast].
62243
+ * @example Host: Welcome back to AI Frontiers, the podcast where we explore the latest breakthroughs in artificial intelligence. Today we have a very special guest. Doctor Chen, thank you for joining us!
62244
+ * DrChen: Thanks for having me! I'm excited to be here.
62245
+ * Host: So, let's dive right in. Your recent paper on neural architecture search has been making waves. Can you tell our listeners what inspired this research?
62246
+ * DrChen: Absolutely. It all started when we noticed that most existing approaches were optimizing for the wrong metrics. We asked ourselves, what if we could let the model design itself?
62247
+ */
62248
+ prompt: string;
62249
+ /**
62250
+ * Speakers
62251
+ * @description Multi-speaker voice configuration. When set, enables multi-speaker synthesis where different parts of the text are spoken by different voices. Each speaker needs a voice and a speaker_id (alias) that matches prefixes in the prompt. Requires gemini-2.5-pro-tts or gemini-2.5-flash-tts model. Not supported with gemini-2.5-flash-lite-preview-tts.
62252
+ * @example [
62253
+ * {
62254
+ * "voice": "Charon",
62255
+ * "speaker_id": "Host"
62256
+ * },
62257
+ * {
62258
+ * "voice": "Kore",
62259
+ * "speaker_id": "DrChen"
62260
+ * }
62261
+ * ]
62262
+ */
62263
+ speakers?: Components.SpeakerConfig[];
62264
+ /**
62265
+ * Style Instructions
62266
+ * @description Optional style and delivery instructions prepended to the prompt. Controls expressiveness, accent, pace, tone, and emotional expression using natural language. Use this to separate style control from the text content. Examples: 'Speak warmly and slowly', 'Read this as a dramatic newscast', 'Use a British accent with a cheerful tone', 'Whisper mysteriously'.
62267
+ * @example Say the following in a warm, conversational tone
62268
+ * @example Read this as a dramatic newscast with gravitas
62269
+ * @example Speak with a British accent, cheerfully and energetically
62270
+ * @example This is a podcast conversation. The host is enthusiastic and curious, the guest is knowledgeable and articulate
62271
+ */
62272
+ style_instructions?: string;
62273
+ /**
62274
+ * Temperature
62275
+ * @description Controls the randomness of the speech output. Higher values produce more creative and varied delivery, while lower values make the output more predictable and focused.
62276
+ * @default 1
62277
+ */
62278
+ temperature?: number;
62279
+ /**
62280
+ * Voice
62281
+ * @description Voice preset for single-speaker synthesis. 30 distinct voices are available. Ignored when speakers is set. Popular choices: Kore (strong, firm female), Puck (upbeat, lively male), Charon (calm, professional male), Zephyr (bright, clear female), Aoede (warm, melodic female).
62282
+ * @default Kore
62283
+ * @enum {string}
62284
+ */
62285
+ voice?:
62286
+ | 'Achernar'
62287
+ | 'Achird'
62288
+ | 'Algenib'
62289
+ | 'Algieba'
62290
+ | 'Alnilam'
62291
+ | 'Aoede'
62292
+ | 'Autonoe'
62293
+ | 'Callirrhoe'
62294
+ | 'Charon'
62295
+ | 'Despina'
62296
+ | 'Enceladus'
62297
+ | 'Erinome'
62298
+ | 'Fenrir'
62299
+ | 'Gacrux'
62300
+ | 'Iapetus'
62301
+ | 'Kore'
62302
+ | 'Laomedeia'
62303
+ | 'Leda'
62304
+ | 'Orus'
62305
+ | 'Pulcherrima'
62306
+ | 'Puck'
62307
+ | 'Rasalgethi'
62308
+ | 'Sadachbia'
62309
+ | 'Sadaltager'
62310
+ | 'Schedar'
62311
+ | 'Sulafat'
62312
+ | 'Umbriel'
62313
+ | 'Vindemiatrix'
62314
+ | 'Zephyr'
62315
+ | 'Zubenelgenubi';
62316
+ }
62317
+
62318
+ export interface GeminiTtsOutput {
62319
+ /**
62320
+ * @description The generated audio file.
62321
+ * @example {
62322
+ * "url": "https://v3b.fal.media/files/b/0a935d4f/Ez4NpcnFTuGsu2FHDaJTR_gemini_tts_output.mp3"
62323
+ * }
62324
+ */
62325
+ audio: Components.File;
62326
+ }
62327
+
62142
62328
  export interface GeminiFlashEditMultiInput {
62143
62329
  /**
62144
62330
  * Input Image Urls
@@ -62410,9 +62596,9 @@ export interface FooocusUpscaleOrVaryInput {
62410
62596
  * Styles
62411
62597
  * @description The style to use.
62412
62598
  * @default [
62413
- * "Fooocus Enhance",
62414
62599
  * "Fooocus V2",
62415
- * "Fooocus Sharp"
62600
+ * "Fooocus Sharp",
62601
+ * "Fooocus Enhance"
62416
62602
  * ]
62417
62603
  */
62418
62604
  styles?: (
@@ -62919,9 +63105,9 @@ export interface FooocusInpaintInput {
62919
63105
  * Styles
62920
63106
  * @description The style to use.
62921
63107
  * @default [
62922
- * "Fooocus Enhance",
62923
63108
  * "Fooocus V2",
62924
- * "Fooocus Sharp"
63109
+ * "Fooocus Sharp",
63110
+ * "Fooocus Enhance"
62925
63111
  * ]
62926
63112
  */
62927
63113
  styles?: (
@@ -63236,10 +63422,10 @@ export interface FooocusImagePromptInput {
63236
63422
  guidance_scale?: number;
63237
63423
  /**
63238
63424
  * @example {
63239
- * "image_url": "https://storage.googleapis.com/falserverless/model_tests/fooocus/Pikachu.webp",
63425
+ * "weight": 1,
63240
63426
  * "stop_at": 1,
63241
63427
  * "type": "PyraCanny",
63242
- * "weight": 1
63428
+ * "image_url": "https://storage.googleapis.com/falserverless/model_tests/fooocus/Pikachu.webp"
63243
63429
  * }
63244
63430
  */
63245
63431
  image_prompt_1: Components.ImagePrompt;
@@ -63370,9 +63556,9 @@ export interface FooocusImagePromptInput {
63370
63556
  * Styles
63371
63557
  * @description The style to use.
63372
63558
  * @default [
63373
- * "Fooocus Enhance",
63374
63559
  * "Fooocus V2",
63375
- * "Fooocus Sharp"
63560
+ * "Fooocus Sharp",
63561
+ * "Fooocus Enhance"
63376
63562
  * ]
63377
63563
  */
63378
63564
  styles?: (
@@ -63828,9 +64014,9 @@ export interface FooocusInput {
63828
64014
  * Styles
63829
64015
  * @description The style to use.
63830
64016
  * @default [
63831
- * "Fooocus Enhance",
63832
64017
  * "Fooocus V2",
63833
- * "Fooocus Sharp"
64018
+ * "Fooocus Sharp",
64019
+ * "Fooocus Enhance"
63834
64020
  * ]
63835
64021
  */
63836
64022
  styles?: (
@@ -64790,8 +64976,8 @@ export interface FluxVisionUpscalerOutput {
64790
64976
  /**
64791
64977
  * @description The URL of the generated image.
64792
64978
  * @example {
64793
- * "file_size": 8842156,
64794
64979
  * "height": 2048,
64980
+ * "file_size": 8842156,
64795
64981
  * "file_name": "20TZeUQtQ8oKgsCKXSL81_StableSR_00002_.png",
64796
64982
  * "content_type": "image/png",
64797
64983
  * "url": "https://v3b.fal.media/files/b/panda/20TZeUQtQ8oKgsCKXSL81_StableSR_00002_.png",
@@ -71795,15 +71981,15 @@ export interface FloweditOutput {
71795
71981
  seed: number;
71796
71982
  }
71797
71983
 
71798
- export interface Florence2LargeRegionToSegmentationInput extends SharedType_2ff {}
71984
+ export interface Florence2LargeRegionToSegmentationInput extends SharedType_8b7 {}
71799
71985
 
71800
71986
  export interface Florence2LargeRegionToSegmentationOutput extends SharedType_4aa {}
71801
71987
 
71802
- export interface Florence2LargeRegionToDescriptionInput extends SharedType_2ff {}
71988
+ export interface Florence2LargeRegionToDescriptionInput extends SharedType_8b7 {}
71803
71989
 
71804
71990
  export interface Florence2LargeRegionToDescriptionOutput extends SharedType_129 {}
71805
71991
 
71806
- export interface Florence2LargeRegionToCategoryInput extends SharedType_2ff {}
71992
+ export interface Florence2LargeRegionToCategoryInput extends SharedType_8b7 {}
71807
71993
 
71808
71994
  export interface Florence2LargeRegionToCategoryOutput extends SharedType_129 {}
71809
71995
 
@@ -75840,220 +76026,220 @@ export interface ElevenlabsSpeechToTextScribeV2Output {
75840
76026
  * Words
75841
76027
  * @description Word-level transcription details
75842
76028
  * @example {
75843
- * "end": 0.539,
76029
+ * "text": "Hey,",
75844
76030
  * "start": 0.079,
75845
76031
  * "type": "word",
75846
- * "text": "Hey,",
76032
+ * "end": 0.539,
75847
76033
  * "speaker_id": "speaker_0"
75848
76034
  * }
75849
76035
  * @example {
75850
- * "end": 0.599,
76036
+ * "text": " ",
75851
76037
  * "start": 0.539,
75852
76038
  * "type": "spacing",
75853
- * "text": " ",
76039
+ * "end": 0.599,
75854
76040
  * "speaker_id": "speaker_0"
75855
76041
  * }
75856
76042
  * @example {
75857
- * "end": 0.679,
76043
+ * "text": "this",
75858
76044
  * "start": 0.599,
75859
76045
  * "type": "word",
75860
- * "text": "this",
76046
+ * "end": 0.679,
75861
76047
  * "speaker_id": "speaker_0"
75862
76048
  * }
75863
76049
  * @example {
75864
- * "end": 0.739,
76050
+ * "text": " ",
75865
76051
  * "start": 0.679,
75866
76052
  * "type": "spacing",
75867
- * "text": " ",
76053
+ * "end": 0.739,
75868
76054
  * "speaker_id": "speaker_0"
75869
76055
  * }
75870
76056
  * @example {
75871
- * "end": 0.799,
76057
+ * "text": "is",
75872
76058
  * "start": 0.739,
75873
76059
  * "type": "word",
75874
- * "text": "is",
76060
+ * "end": 0.799,
75875
76061
  * "speaker_id": "speaker_0"
75876
76062
  * }
75877
76063
  * @example {
75878
- * "end": 0.939,
76064
+ * "text": " ",
75879
76065
  * "start": 0.799,
75880
76066
  * "type": "spacing",
75881
- * "text": " ",
76067
+ * "end": 0.939,
75882
76068
  * "speaker_id": "speaker_0"
75883
76069
  * }
75884
76070
  * @example {
75885
- * "end": 0.939,
76071
+ * "text": "a",
75886
76072
  * "start": 0.939,
75887
76073
  * "type": "word",
75888
- * "text": "a",
76074
+ * "end": 0.939,
75889
76075
  * "speaker_id": "speaker_0"
75890
76076
  * }
75891
76077
  * @example {
75892
- * "end": 0.959,
76078
+ * "text": " ",
75893
76079
  * "start": 0.939,
75894
76080
  * "type": "spacing",
75895
- * "text": " ",
76081
+ * "end": 0.959,
75896
76082
  * "speaker_id": "speaker_0"
75897
76083
  * }
75898
76084
  * @example {
75899
- * "end": 1.179,
76085
+ * "text": "test",
75900
76086
  * "start": 0.959,
75901
76087
  * "type": "word",
75902
- * "text": "test",
76088
+ * "end": 1.179,
75903
76089
  * "speaker_id": "speaker_0"
75904
76090
  * }
75905
76091
  * @example {
75906
- * "end": 1.219,
76092
+ * "text": " ",
75907
76093
  * "start": 1.179,
75908
76094
  * "type": "spacing",
75909
- * "text": " ",
76095
+ * "end": 1.219,
75910
76096
  * "speaker_id": "speaker_0"
75911
76097
  * }
75912
76098
  * @example {
75913
- * "end": 1.719,
76099
+ * "text": "recording",
75914
76100
  * "start": 1.22,
75915
76101
  * "type": "word",
75916
- * "text": "recording",
76102
+ * "end": 1.719,
75917
76103
  * "speaker_id": "speaker_0"
75918
76104
  * }
75919
76105
  * @example {
75920
- * "end": 1.719,
76106
+ * "text": " ",
75921
76107
  * "start": 1.719,
75922
76108
  * "type": "spacing",
75923
- * "text": " ",
76109
+ * "end": 1.719,
75924
76110
  * "speaker_id": "speaker_0"
75925
76111
  * }
75926
76112
  * @example {
75927
- * "end": 1.86,
76113
+ * "text": "for",
75928
76114
  * "start": 1.719,
75929
76115
  * "type": "word",
75930
- * "text": "for",
76116
+ * "end": 1.86,
75931
76117
  * "speaker_id": "speaker_0"
75932
76118
  * }
75933
76119
  * @example {
75934
- * "end": 1.879,
76120
+ * "text": " ",
75935
76121
  * "start": 1.86,
75936
76122
  * "type": "spacing",
75937
- * "text": " ",
76123
+ * "end": 1.879,
75938
76124
  * "speaker_id": "speaker_0"
75939
76125
  * }
75940
76126
  * @example {
75941
- * "end": 2.24,
76127
+ * "text": "Scribe",
75942
76128
  * "start": 1.879,
75943
76129
  * "type": "word",
75944
- * "text": "Scribe",
76130
+ * "end": 2.24,
75945
76131
  * "speaker_id": "speaker_0"
75946
76132
  * }
75947
76133
  * @example {
75948
- * "end": 2.319,
76134
+ * "text": " ",
75949
76135
  * "start": 2.24,
75950
76136
  * "type": "spacing",
75951
- * "text": " ",
76137
+ * "end": 2.319,
75952
76138
  * "speaker_id": "speaker_0"
75953
76139
  * }
75954
76140
  * @example {
75955
- * "end": 2.759,
76141
+ * "text": "version",
75956
76142
  * "start": 2.319,
75957
76143
  * "type": "word",
75958
- * "text": "version",
76144
+ * "end": 2.759,
75959
76145
  * "speaker_id": "speaker_0"
75960
76146
  * }
75961
76147
  * @example {
75962
- * "end": 2.779,
76148
+ * "text": " ",
75963
76149
  * "start": 2.759,
75964
76150
  * "type": "spacing",
75965
- * "text": " ",
76151
+ * "end": 2.779,
75966
76152
  * "speaker_id": "speaker_0"
75967
76153
  * }
75968
76154
  * @example {
75969
- * "end": 3.379,
76155
+ * "text": "two,",
75970
76156
  * "start": 2.779,
75971
76157
  * "type": "word",
75972
- * "text": "two,",
76158
+ * "end": 3.379,
75973
76159
  * "speaker_id": "speaker_0"
75974
76160
  * }
75975
76161
  * @example {
75976
- * "end": 3.399,
76162
+ * "text": " ",
75977
76163
  * "start": 3.379,
75978
76164
  * "type": "spacing",
75979
- * "text": " ",
76165
+ * "end": 3.399,
75980
76166
  * "speaker_id": "speaker_0"
75981
76167
  * }
75982
76168
  * @example {
75983
- * "end": 3.519,
76169
+ * "text": "which",
75984
76170
  * "start": 3.399,
75985
76171
  * "type": "word",
75986
- * "text": "which",
76172
+ * "end": 3.519,
75987
76173
  * "speaker_id": "speaker_0"
75988
76174
  * }
75989
76175
  * @example {
75990
- * "end": 3.539,
76176
+ * "text": " ",
75991
76177
  * "start": 3.519,
75992
76178
  * "type": "spacing",
75993
- * "text": " ",
76179
+ * "end": 3.539,
75994
76180
  * "speaker_id": "speaker_0"
75995
76181
  * }
75996
76182
  * @example {
75997
- * "end": 3.659,
76183
+ * "text": "is",
75998
76184
  * "start": 3.539,
75999
76185
  * "type": "word",
76000
- * "text": "is",
76186
+ * "end": 3.659,
76001
76187
  * "speaker_id": "speaker_0"
76002
76188
  * }
76003
76189
  * @example {
76004
- * "end": 3.699,
76190
+ * "text": " ",
76005
76191
  * "start": 3.659,
76006
76192
  * "type": "spacing",
76007
- * "text": " ",
76193
+ * "end": 3.699,
76008
76194
  * "speaker_id": "speaker_0"
76009
76195
  * }
76010
76196
  * @example {
76011
- * "end": 3.839,
76197
+ * "text": "now",
76012
76198
  * "start": 3.699,
76013
76199
  * "type": "word",
76014
- * "text": "now",
76200
+ * "end": 3.839,
76015
76201
  * "speaker_id": "speaker_0"
76016
76202
  * }
76017
76203
  * @example {
76018
- * "end": 3.839,
76204
+ * "text": " ",
76019
76205
  * "start": 3.839,
76020
76206
  * "type": "spacing",
76021
- * "text": " ",
76207
+ * "end": 3.839,
76022
76208
  * "speaker_id": "speaker_0"
76023
76209
  * }
76024
76210
  * @example {
76025
- * "end": 4.319,
76211
+ * "text": "available",
76026
76212
  * "start": 3.839,
76027
76213
  * "type": "word",
76028
- * "text": "available",
76214
+ * "end": 4.319,
76029
76215
  * "speaker_id": "speaker_0"
76030
76216
  * }
76031
76217
  * @example {
76032
- * "end": 4.339,
76218
+ * "text": " ",
76033
76219
  * "start": 4.319,
76034
76220
  * "type": "spacing",
76035
- * "text": " ",
76221
+ * "end": 4.339,
76036
76222
  * "speaker_id": "speaker_0"
76037
76223
  * }
76038
76224
  * @example {
76039
- * "end": 4.579,
76225
+ * "text": "on",
76040
76226
  * "start": 4.339,
76041
76227
  * "type": "word",
76042
- * "text": "on",
76228
+ * "end": 4.579,
76043
76229
  * "speaker_id": "speaker_0"
76044
76230
  * }
76045
76231
  * @example {
76046
- * "end": 4.599,
76232
+ * "text": " ",
76047
76233
  * "start": 4.579,
76048
76234
  * "type": "spacing",
76049
- * "text": " ",
76235
+ * "end": 4.599,
76050
76236
  * "speaker_id": "speaker_0"
76051
76237
  * }
76052
76238
  * @example {
76053
- * "end": 5.699,
76239
+ * "text": "fal.ai.",
76054
76240
  * "start": 4.599,
76055
76241
  * "type": "word",
76056
- * "text": "fal.ai.",
76242
+ * "end": 5.699,
76057
76243
  * "speaker_id": "speaker_0"
76058
76244
  * }
76059
76245
  */
@@ -77347,51 +77533,6 @@ export interface Deepfilternet3Output {
77347
77533
  timings: Components.DeepFilterNetTimings;
77348
77534
  }
77349
77535
 
77350
- export interface DecartLucy5bImageToVideoInput {
77351
- /**
77352
- * Aspect Ratio
77353
- * @description Aspect ratio of the generated video.
77354
- * @default 16:9
77355
- * @enum {string}
77356
- */
77357
- aspect_ratio?: '9:16' | '16:9';
77358
- /**
77359
- * Image Url
77360
- * @description URL of the image to use as the first frame
77361
- * @example https://v3.fal.media/files/monkey/OlpQEYh7oNeJ3qKsdiaym_ia5ECOgFbfcniMDu01_18_da73e078e0924472b51d92f3e3fba98c.png
77362
- */
77363
- image_url: string;
77364
- /**
77365
- * Prompt
77366
- * @description Text description of the desired video content
77367
- * @example A cat is walking slowly in the garden
77368
- */
77369
- prompt: string;
77370
- /**
77371
- * Resolution
77372
- * @description Resolution of the generated video
77373
- * @default 720p
77374
- * @constant
77375
- */
77376
- resolution?: '720p';
77377
- /**
77378
- * Sync Mode
77379
- * @description If `True`, the media will be returned as a data URI and the output data won't be available in the request history.
77380
- * @default true
77381
- */
77382
- sync_mode?: boolean;
77383
- }
77384
-
77385
- export interface DecartLucy5bImageToVideoOutput {
77386
- /**
77387
- * @description The generated MP4 video with H.264 encoding
77388
- * @example {
77389
- * "url": "https://v3.fal.media/files/kangaroo/rIFaCsyWvBxYBKw3cPbOU_indir.mp4"
77390
- * }
77391
- */
77392
- video: Components.File;
77393
- }
77394
-
77395
77536
  export interface DdcolorInput {
77396
77537
  /**
77397
77538
  * Image Url
@@ -78191,7 +78332,7 @@ export interface Cogvideox5bVideoToVideoInput {
78191
78332
  video_url: string;
78192
78333
  }
78193
78334
 
78194
- export interface Cogvideox5bVideoToVideoOutput extends SharedType_1001 {}
78335
+ export interface Cogvideox5bVideoToVideoOutput extends SharedType_100 {}
78195
78336
 
78196
78337
  export interface Cogvideox5bImageToVideoInput {
78197
78338
  /**
@@ -78270,7 +78411,7 @@ export interface Cogvideox5bImageToVideoInput {
78270
78411
  );
78271
78412
  }
78272
78413
 
78273
- export interface Cogvideox5bImageToVideoOutput extends SharedType_1001 {}
78414
+ export interface Cogvideox5bImageToVideoOutput extends SharedType_100 {}
78274
78415
 
78275
78416
  export interface Cogvideox5bInput {
78276
78417
  /**
@@ -78343,7 +78484,7 @@ export interface Cogvideox5bInput {
78343
78484
  );
78344
78485
  }
78345
78486
 
78346
- export interface Cogvideox5bOutput extends SharedType_1001 {}
78487
+ export interface Cogvideox5bOutput extends SharedType_100 {}
78347
78488
 
78348
78489
  export interface CodeformerInput {
78349
78490
  /**
@@ -78393,8 +78534,8 @@ export interface CodeformerOutput {
78393
78534
  /**
78394
78535
  * @description The generated image file info.
78395
78536
  * @example {
78396
- * "height": 512,
78397
78537
  * "file_size": 423052,
78538
+ * "height": 512,
78398
78539
  * "file_name": "36d3ca4791a647678b2ff01a35c87f5a.png",
78399
78540
  * "content_type": "image/png",
78400
78541
  * "url": "https://storage.googleapis.com/falserverless/model_tests/codeformer/codeformer_restored_1.jpeg",
@@ -79250,95 +79391,6 @@ export interface CartoonifyInput {
79250
79391
 
79251
79392
  export interface CartoonifyOutput extends SharedType_7c6 {}
79252
79393
 
79253
- export interface CalligrapherInput {
79254
- /**
79255
- * Auto Mask Generation
79256
- * @description Whether to automatically generate mask from detected text
79257
- * @default false
79258
- */
79259
- auto_mask_generation?: boolean;
79260
- /**
79261
- * Cfg Scale
79262
- * @description Guidance or strength scale for the model
79263
- * @default 1
79264
- */
79265
- cfg_scale?: number;
79266
- /**
79267
- * Image Size
79268
- * @description Target image size for generation
79269
- * @default {
79270
- * "height": 1024,
79271
- * "width": 1024
79272
- * }
79273
- */
79274
- image_size?:
79275
- | Components.ImageSize
79276
- | (
79277
- | 'square_hd'
79278
- | 'square'
79279
- | 'portrait_4_3'
79280
- | 'portrait_16_9'
79281
- | 'landscape_4_3'
79282
- | 'landscape_16_9'
79283
- );
79284
- /**
79285
- * Mask Image Url
79286
- * @description Base64-encoded mask image (optional if using auto_mask_generation)
79287
- * @example https://storage.googleapis.com/falserverless/calligrapher/test17_mask.png
79288
- */
79289
- mask_image_url?: string;
79290
- /**
79291
- * Num Images
79292
- * @description How many images to generate
79293
- * @default 1
79294
- */
79295
- num_images?: number;
79296
- /**
79297
- * Num Inference Steps
79298
- * @description Number of inference steps (1-100)
79299
- * @default 50
79300
- */
79301
- num_inference_steps?: number;
79302
- /**
79303
- * Prompt
79304
- * @description Text prompt to inpaint or customize
79305
- * @example The text is 'Rise'
79306
- */
79307
- prompt: string;
79308
- /**
79309
- * Reference Image Url
79310
- * @description Optional base64 reference image for style
79311
- */
79312
- reference_image_url?: string;
79313
- /**
79314
- * Seed
79315
- * @description Random seed for reproducibility
79316
- */
79317
- seed?: number;
79318
- /**
79319
- * Source Image Url
79320
- * @description Base64-encoded source image with drawn mask layers
79321
- * @example https://storage.googleapis.com/falserverless/calligrapher/test17_source.png
79322
- */
79323
- source_image_url: string;
79324
- /**
79325
- * Source Text
79326
- * @description Source text to replace (if empty, masks all detected text)
79327
- * @default
79328
- */
79329
- source_text?: string;
79330
- /**
79331
- * Use Context
79332
- * @description Whether to prepend context reference to the input
79333
- * @default true
79334
- */
79335
- use_context?: boolean;
79336
- }
79337
-
79338
- export interface CalligrapherOutput {
79339
- images: Components.Image[];
79340
- }
79341
-
79342
79394
  export interface BytedanceVideoStylizeInput {
79343
79395
  /**
79344
79396
  * Image Url
@@ -80831,6 +80883,32 @@ export interface BytedanceDreamactorV2Output {
80831
80883
  }
80832
80884
 
80833
80885
  export interface BytedanceUpscalerUpscaleVideoInput {
80886
+ /**
80887
+ * Enhancement Preset
80888
+ * @description The enhancement preset optimized for specific video scenarios. 'general' is a general-purpose template, 'ugc' targets user-generated short videos, 'short_series' is for short dramas, 'aigc' is for AI-generated content, and 'old_film' is for classic film restoration.
80889
+ * @default general
80890
+ * @enum {string}
80891
+ */
80892
+ enhancement_preset?: 'general' | 'ugc' | 'short_series' | 'aigc' | 'old_film';
80893
+ /**
80894
+ * Enhancement Tier
80895
+ * @description The enhancement quality tier. 'fast' provides essential upscaling with good speed, 'standard' uses adaptive algorithms for better visual texture, 'pro' uses large-model restoration for cinematic quality (longer processing time), and 10 times the cost of `standard` and `fast`.
80896
+ * @default standard
80897
+ * @enum {string}
80898
+ */
80899
+ enhancement_tier?: 'fast' | 'standard' | 'pro';
80900
+ /**
80901
+ * Fidelity
80902
+ * @description The enhancement intensity. 'high' applies mild enhancement while keeping visual texture close to the source video. 'medium' provides a balanced image quality enhancement.
80903
+ * @default high
80904
+ * @enum {string}
80905
+ */
80906
+ fidelity?: 'high' | 'medium';
80907
+ /**
80908
+ * Scale Ratio
80909
+ * @description The scaling ratio for the output video resolution. When set, overrides target_resolution and scales the input resolution by this factor (e.g., 2.0 doubles the resolution). Range: 1.1 to 10.0. Please note that this is valid only up to 4k resolution, and trying to scale beyond 4k will result in an error. (4k is defined as having atotal pixel count of 3840x2160).
80910
+ */
80911
+ scale_ratio?: number;
80834
80912
  /**
80835
80913
  * Target Fps
80836
80914
  * @description The target FPS of the video to upscale.
@@ -80870,7 +80948,7 @@ export interface BytedanceUpscalerUpscaleVideoOutput {
80870
80948
 
80871
80949
  export interface BriaTextToImageHdInput extends SharedType_411 {}
80872
80950
 
80873
- export interface BriaTextToImageHdOutput extends SharedType_a97 {}
80951
+ export interface BriaTextToImageHdOutput extends SharedType_e19 {}
80874
80952
 
80875
80953
  export interface BriaTextToImageFastInput {
80876
80954
  /**
@@ -80943,11 +81021,11 @@ export interface BriaTextToImageFastInput {
80943
81021
  sync_mode?: boolean;
80944
81022
  }
80945
81023
 
80946
- export interface BriaTextToImageFastOutput extends SharedType_a97 {}
81024
+ export interface BriaTextToImageFastOutput extends SharedType_e19 {}
80947
81025
 
80948
81026
  export interface BriaTextToImageBaseInput extends SharedType_411 {}
80949
81027
 
80950
- export interface BriaTextToImageBaseOutput extends SharedType_a97 {}
81028
+ export interface BriaTextToImageBaseOutput extends SharedType_e19 {}
80951
81029
 
80952
81030
  export interface BriaReimagineInput {
80953
81031
  /**
@@ -81176,8 +81254,8 @@ export interface BriaGenfillOutput {
81176
81254
  * @description Generated Images
81177
81255
  * @example [
81178
81256
  * {
81179
- * "file_size": 1064550,
81180
81257
  * "height": 768,
81258
+ * "file_size": 1064550,
81181
81259
  * "file_name": "a0d138e6820c4ad58f1fd3c758f16047.png",
81182
81260
  * "content_type": "image/png",
81183
81261
  * "url": "https://storage.googleapis.com/falserverless/bria/bria_genfill_res.png",
@@ -81261,8 +81339,8 @@ export interface BriaExpandOutput {
81261
81339
  /**
81262
81340
  * @description The generated image
81263
81341
  * @example {
81264
- * "file_size": 1471342,
81265
81342
  * "height": 674,
81343
+ * "file_size": 1471342,
81266
81344
  * "file_name": "afa402a35ea742cdb5c3e219b2b19bfb.png",
81267
81345
  * "content_type": "image/png",
81268
81346
  * "url": "https://v3.fal.media/files/koala/8np-spgxxG-I1r3cjthRV_afa402a35ea742cdb5c3e219b2b19bfb.png",
@@ -81418,8 +81496,8 @@ export interface BriaBackgroundRemoveOutput {
81418
81496
  /**
81419
81497
  * @description The generated image
81420
81498
  * @example {
81421
- * "file_size": 1076276,
81422
81499
  * "height": 1024,
81500
+ * "file_size": 1076276,
81423
81501
  * "file_name": "070c731993e949d993c10ef6283d335d.png",
81424
81502
  * "content_type": "image/png",
81425
81503
  * "url": "https://v3.fal.media/files/tiger/GQEMNjRyxSoza7N8LPPqb_070c731993e949d993c10ef6283d335d.png",
@@ -81618,8 +81696,8 @@ export interface BirefnetV2VideoOutput {
81618
81696
  * "height": 1080,
81619
81697
  * "duration": 8,
81620
81698
  * "url": "https://storage.googleapis.com/falserverless/example_outputs/birefnet-video-output.webm",
81621
- * "width": 1920,
81622
81699
  * "fps": 24,
81700
+ * "width": 1920,
81623
81701
  * "file_name": "birefnet-video-output.webm",
81624
81702
  * "num_frames": 192,
81625
81703
  * "content_type": "video/webm"
@@ -81764,6 +81842,13 @@ export interface BenV2VideoInput {
81764
81842
  * @description Optional RGB values (0-255) for the background color. If not provided, the background will be transparent. For ex: [0, 0, 0]
81765
81843
  */
81766
81844
  background_color?: [number, number, number];
81845
+ /**
81846
+ * Output Format
81847
+ * @description Output video format. Use "webm" for true transparency support (VP9 codec with alpha channel). MP4 format does not support transparency and will render transparent areas as black.
81848
+ * @default mp4
81849
+ * @enum {string}
81850
+ */
81851
+ output_format?: 'mp4' | 'webm';
81767
81852
  /**
81768
81853
  * Seed
81769
81854
  * @description Random seed for reproducible generation.
@@ -81812,8 +81897,8 @@ export interface BenV2ImageOutput {
81812
81897
  /**
81813
81898
  * @description The output image after background removal.
81814
81899
  * @example {
81815
- * "height": 512,
81816
81900
  * "file_size": 423052,
81901
+ * "height": 512,
81817
81902
  * "file_name": "zrZNETpI_ul2jonraqpxN_a57c3f3825d9418f8b3d39cde87c3310.png",
81818
81903
  * "content_type": "image/png",
81819
81904
  * "url": "https://storage.googleapis.com/falserverless/gallery/Ben2/zrZNETpI_ul2jonraqpxN_a57c3f3825d9418f8b3d39cde87c3310.png",
@@ -81915,8 +82000,8 @@ export interface BagelEditOutput {
81915
82000
  * @description The edited images.
81916
82001
  * @example [
81917
82002
  * {
81918
- * "height": 1024,
81919
82003
  * "file_size": 423052,
82004
+ * "height": 1024,
81920
82005
  * "file_name": "hQnndOMvGSt2UsYAiV3vs.jpeg",
81921
82006
  * "content_type": "image/jpeg",
81922
82007
  * "url": "https://storage.googleapis.com/falserverless/bagel/hQnndOMvGSt2UsYAiV3vs.jpeg",
@@ -81978,8 +82063,8 @@ export interface BagelOutput {
81978
82063
  * @description The generated images.
81979
82064
  * @example [
81980
82065
  * {
81981
- * "height": 1024,
81982
82066
  * "file_size": 423052,
82067
+ * "height": 1024,
81983
82068
  * "file_name": "wRhCPSyiKTiLnnWvUpGIl.jpeg",
81984
82069
  * "content_type": "image/jpeg",
81985
82070
  * "url": "https://storage.googleapis.com/falserverless/bagel/wRhCPSyiKTiLnnWvUpGIl.jpeg",
@@ -83582,11 +83667,11 @@ export interface ClarityaiCrystalVideoUpscalerOutput {
83582
83667
  * "height": 2160,
83583
83668
  * "duration": 13.056527,
83584
83669
  * "url": "https://storage.googleapis.com/falserverless/example_outputs/crystal_upscaler/video_upscaling/video_out.mp4",
83585
- * "fps": 23.130193905817176,
83586
83670
  * "width": 4096,
83671
+ * "fps": 23.130193905817176,
83587
83672
  * "file_name": "w0VQQvPdwvV2GSCtRTMzh_hDH8SPrB.mp4",
83588
- * "content_type": "video/mp4",
83589
- * "num_frames": 302
83673
+ * "num_frames": 302,
83674
+ * "content_type": "video/mp4"
83590
83675
  * }
83591
83676
  */
83592
83677
  video: Components.VideoFile;
@@ -84741,18 +84826,18 @@ export interface BriaEmbedProductInput {
84741
84826
  * {
84742
84827
  * "coordinates": {
84743
84828
  * "y": 317,
84829
+ * "width": 100,
84744
84830
  * "height": 300,
84745
- * "x": 300,
84746
- * "width": 100
84831
+ * "x": 300
84747
84832
  * },
84748
84833
  * "image_source": "https://bria-datasets.s3.us-east-1.amazonaws.com/embed-product/a_standing_lamp_over_white_background_0.png"
84749
84834
  * },
84750
84835
  * {
84751
84836
  * "coordinates": {
84752
84837
  * "y": 287,
84838
+ * "width": 120,
84753
84839
  * "height": 156,
84754
- * "x": 646,
84755
- * "width": 120
84840
+ * "x": 646
84756
84841
  * },
84757
84842
  * "image_source": "https://bria-datasets.s3.us-east-1.amazonaws.com/embed-product/a_wall_picture_on_white_background_0.png"
84758
84843
  * }