fal-endpoint-types 1.3.36 → 1.3.38

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -94,8 +94,62 @@ export interface XaiGrokImagineVideoTextToVideoOutput {
94
94
  * "fps": 24,
95
95
  * "width": 1280,
96
96
  * "file_name": "RUAbFYlssdqnbjNLmE8qP_IX7BNYGP.mp4",
97
- * "num_frames": 145,
98
- * "content_type": "video/mp4"
97
+ * "content_type": "video/mp4",
98
+ * "num_frames": 145
99
+ * }
100
+ */
101
+ video: Components.VideoFile;
102
+ }
103
+
104
+ export interface XaiGrokImagineVideoReferenceToVideoInput {
105
+ /**
106
+ * Aspect Ratio
107
+ * @description Aspect ratio of the generated video.
108
+ * @default 16:9
109
+ * @enum {string}
110
+ */
111
+ aspect_ratio?: '16:9' | '4:3' | '3:2' | '1:1' | '2:3' | '3:4' | '9:16';
112
+ /**
113
+ * Duration
114
+ * @description Video duration in seconds.
115
+ * @default 8
116
+ */
117
+ duration?: number;
118
+ /**
119
+ * Prompt
120
+ * @description Text prompt describing the video to generate. Use @Image1, @Image2, etc. to reference specific images from reference_image_urls in order.
121
+ * @example A @Image1 running through a sunlit meadow, cinematic slow motion
122
+ */
123
+ prompt: string;
124
+ /**
125
+ * Reference Image URLs
126
+ * @description One or more reference image URLs to guide the video generation as style and content references. Reference in prompt as @Image1, @Image2, etc. Maximum 7 images.
127
+ * @example [
128
+ * "https://v3b.fal.media/files/b/0a8b90e0/BFLE9VDlZqsryU-UA3BoD_image_004.png"
129
+ * ]
130
+ */
131
+ reference_image_urls: string[];
132
+ /**
133
+ * Resolution
134
+ * @description Resolution of the output video.
135
+ * @default 480p
136
+ * @enum {string}
137
+ */
138
+ resolution?: '480p' | '720p';
139
+ }
140
+
141
+ export interface XaiGrokImagineVideoReferenceToVideoOutput {
142
+ /**
143
+ * @description The generated video.
144
+ * @example {
145
+ * "height": 720,
146
+ * "duration": 8,
147
+ * "url": "https://v3b.fal.media/files/b/0a8b90e4/r2v_output.mp4",
148
+ * "fps": 24,
149
+ * "width": 1280,
150
+ * "file_name": "r2v_output.mp4",
151
+ * "content_type": "video/mp4",
152
+ * "num_frames": 192
99
153
  * }
100
154
  */
101
155
  video: Components.VideoFile;
@@ -145,8 +199,46 @@ export interface XaiGrokImagineVideoImageToVideoOutput {
145
199
  * "fps": 24,
146
200
  * "width": 1280,
147
201
  * "file_name": "0Ci1dviuSnEyUZzBUq-_5_nu7MrAAa.mp4",
148
- * "num_frames": 145,
149
- * "content_type": "video/mp4"
202
+ * "content_type": "video/mp4",
203
+ * "num_frames": 145
204
+ * }
205
+ */
206
+ video: Components.VideoFile;
207
+ }
208
+
209
+ export interface XaiGrokImagineVideoExtendVideoInput {
210
+ /**
211
+ * Duration
212
+ * @description Length of the extension in seconds.
213
+ * @default 6
214
+ */
215
+ duration?: number;
216
+ /**
217
+ * Prompt
218
+ * @description Text description of what should happen next in the video.
219
+ * @example The camera slowly zooms out to reveal the city skyline at sunset
220
+ */
221
+ prompt: string;
222
+ /**
223
+ * Video URL
224
+ * @description URL of the source video to extend. Must be MP4 format (H.264, H.265, or AV1 codec), 2-15 seconds long.
225
+ * @example https://v3b.fal.media/files/b/0a8b9112/V5Z_NIPE3ppMDWivNo6_q_video_019.mp4
226
+ */
227
+ video_url: string;
228
+ }
229
+
230
+ export interface XaiGrokImagineVideoExtendVideoOutput {
231
+ /**
232
+ * @description The extended video (original + extension stitched together).
233
+ * @example {
234
+ * "height": 720,
235
+ * "duration": 16,
236
+ * "url": "https://v3b.fal.media/files/b/0a8b9113/extended_video.mp4",
237
+ * "fps": 24,
238
+ * "width": 1280,
239
+ * "file_name": "extended_video.mp4",
240
+ * "content_type": "video/mp4",
241
+ * "num_frames": 384
150
242
  * }
151
243
  */
152
244
  video: Components.VideoFile;
@@ -184,8 +276,8 @@ export interface XaiGrokImagineVideoEditVideoOutput {
184
276
  * "fps": 24,
185
277
  * "width": 1280,
186
278
  * "file_name": "EuDrZuQTW9m1phBXOsauz_EpJH3s8X.mp4",
187
- * "num_frames": 121,
188
- * "content_type": "video/mp4"
279
+ * "content_type": "video/mp4",
280
+ * "num_frames": 121
189
281
  * }
190
282
  */
191
283
  video: Components.VideoFile;
@@ -2676,29 +2768,6 @@ export interface SharedType_e33 {
2676
2768
  };
2677
2769
  }
2678
2770
 
2679
- export interface SharedType_e19 {
2680
- /**
2681
- * Images
2682
- * @description The generated images
2683
- * @example [
2684
- * {
2685
- * "height": 1536,
2686
- * "file_size": 3731290,
2687
- * "file_name": "257cf8e7bd3a47c2959396343d5b38cf.png",
2688
- * "content_type": "image/png",
2689
- * "url": "https://v3.fal.media/files/tiger/48e63e0K6C9XQYBuomoU-_257cf8e7bd3a47c2959396343d5b38cf.png",
2690
- * "width": 1536
2691
- * }
2692
- * ]
2693
- */
2694
- images: Components.Image[];
2695
- /**
2696
- * Seed
2697
- * @description Seed value used for generation.
2698
- */
2699
- seed: number;
2700
- }
2701
-
2702
2771
  export interface SharedType_e18 {
2703
2772
  /**
2704
2773
  * Default Caption
@@ -3634,24 +3703,6 @@ export interface SharedType_cfd {
3634
3703
  sync_mode?: boolean;
3635
3704
  }
3636
3705
 
3637
- export interface SharedType_cf1 {
3638
- /**
3639
- * Images
3640
- * @description The generated images with objects removed.
3641
- * @example [
3642
- * {
3643
- * "file_size": 730703,
3644
- * "height": 768,
3645
- * "file_name": "85a2309b2c954c85a75120e664adbe17.png",
3646
- * "content_type": "image/png",
3647
- * "url": "https://v3.fal.media/files/lion/arYSoJeqWjhbcA8o4budv_85a2309b2c954c85a75120e664adbe17.png",
3648
- * "width": 1024
3649
- * }
3650
- * ]
3651
- */
3652
- images: Components.Image_2[];
3653
- }
3654
-
3655
3706
  export interface SharedType_cde {
3656
3707
  /**
3657
3708
  * Actual Prompt
@@ -5261,6 +5312,29 @@ export interface SharedType_a9b {
5261
5312
  seed?: number;
5262
5313
  }
5263
5314
 
5315
+ export interface SharedType_a97 {
5316
+ /**
5317
+ * Images
5318
+ * @description The generated images
5319
+ * @example [
5320
+ * {
5321
+ * "file_size": 3731290,
5322
+ * "height": 1536,
5323
+ * "file_name": "257cf8e7bd3a47c2959396343d5b38cf.png",
5324
+ * "content_type": "image/png",
5325
+ * "url": "https://v3.fal.media/files/tiger/48e63e0K6C9XQYBuomoU-_257cf8e7bd3a47c2959396343d5b38cf.png",
5326
+ * "width": 1536
5327
+ * }
5328
+ * ]
5329
+ */
5330
+ images: Components.Image[];
5331
+ /**
5332
+ * Seed
5333
+ * @description Seed value used for generation.
5334
+ */
5335
+ seed: number;
5336
+ }
5337
+
5264
5338
  export interface SharedType_a8f {
5265
5339
  /**
5266
5340
  * Image Url
@@ -6232,6 +6306,22 @@ export interface SharedType_92a {
6232
6306
  video: Components.File;
6233
6307
  }
6234
6308
 
6309
+ export interface SharedType_920 {
6310
+ /**
6311
+ * @description Upscaled image file after processing
6312
+ * @example {
6313
+ * "content_type": "image/png",
6314
+ * "url": "https://storage.googleapis.com/falserverless/example_outputs/seedvr2/image_out.png"
6315
+ * }
6316
+ */
6317
+ image: Components.ImageFile;
6318
+ /**
6319
+ * Seed
6320
+ * @description The random seed used for the generation process.
6321
+ */
6322
+ seed: number;
6323
+ }
6324
+
6235
6325
  export interface SharedType_91c {
6236
6326
  /**
6237
6327
  * Image Url
@@ -6849,53 +6939,6 @@ export interface SharedType_8a3 {
6849
6939
  video: Components.File;
6850
6940
  }
6851
6941
 
6852
- export interface SharedType_89f {
6853
- /**
6854
- * Aspect Ratio
6855
- * @description Aspect ratio of the generated video.
6856
- * @default 16:9
6857
- * @enum {string}
6858
- */
6859
- aspect_ratio?: '16:9' | '9:16' | '1:1';
6860
- /**
6861
- * Duration
6862
- * @description Video duration in seconds (3-15s).
6863
- * @default 5
6864
- * @enum {string}
6865
- */
6866
- duration?: '3' | '4' | '5' | '6' | '7' | '8' | '9' | '10' | '11' | '12' | '13' | '14' | '15';
6867
- /**
6868
- * Generate Audio
6869
- * @description Whether to generate native audio for the video.
6870
- * @default false
6871
- */
6872
- generate_audio?: boolean;
6873
- /**
6874
- * Multi Prompt
6875
- * @description List of prompts for multi-shot video generation.
6876
- * @example null
6877
- */
6878
- multi_prompt?: Components.KlingV3MultiPromptElement[];
6879
- /**
6880
- * Prompt
6881
- * @description Text prompt for video generation. Required unless multi_prompt is provided.
6882
- * @example A mecha lands on the ground to save the city, and says "I'm here", in anime style
6883
- */
6884
- prompt?: string;
6885
- /**
6886
- * Shot Type
6887
- * @description The type of multi-shot video generation.
6888
- * @default customize
6889
- * @constant
6890
- */
6891
- shot_type?: 'customize';
6892
- /**
6893
- * Voice Ids
6894
- * @description Optional Voice IDs for video generation. Reference voices in your prompt with <<<voice_1>>> and <<<voice_2>>> (maximum 2 voices per task). Get voice IDs from the kling video create-voice endpoint: https://fal.ai/models/fal-ai/kling-video/create-voice
6895
- */
6896
- voice_ids?: string[];
6897
- }
6898
-
6899
6942
  export interface SharedType_896 {
6900
6943
  /**
6901
6944
  * Aspect Ratio
@@ -11475,6 +11518,24 @@ export interface SharedType_38d {
11475
11518
  image_url: string;
11476
11519
  }
11477
11520
 
11521
+ export interface SharedType_386 {
11522
+ /**
11523
+ * Images
11524
+ * @description The generated images with objects removed.
11525
+ * @example [
11526
+ * {
11527
+ * "file_size": 730703,
11528
+ * "height": 768,
11529
+ * "file_name": "85a2309b2c954c85a75120e664adbe17.png",
11530
+ * "content_type": "image/png",
11531
+ * "url": "https://v3.fal.media/files/lion/arYSoJeqWjhbcA8o4budv_85a2309b2c954c85a75120e664adbe17.png",
11532
+ * "width": 1024
11533
+ * }
11534
+ * ]
11535
+ */
11536
+ images: Components.Image[];
11537
+ }
11538
+
11478
11539
  export interface SharedType_384 {
11479
11540
  /**
11480
11541
  * Images
@@ -12233,6 +12294,48 @@ export interface SharedType_266 {
12233
12294
  video: Components.File;
12234
12295
  }
12235
12296
 
12297
+ export interface SharedType_25d {
12298
+ /**
12299
+ * Aspect Ratio
12300
+ * @description Aspect ratio of the generated video.
12301
+ * @default 16:9
12302
+ * @enum {string}
12303
+ */
12304
+ aspect_ratio?: '16:9' | '9:16' | '1:1';
12305
+ /**
12306
+ * Duration
12307
+ * @description Video duration in seconds (3-15s).
12308
+ * @default 5
12309
+ * @enum {string}
12310
+ */
12311
+ duration?: '3' | '4' | '5' | '6' | '7' | '8' | '9' | '10' | '11' | '12' | '13' | '14' | '15';
12312
+ /**
12313
+ * Generate Audio
12314
+ * @description Whether to generate native audio for the video.
12315
+ * @default false
12316
+ */
12317
+ generate_audio?: boolean;
12318
+ /**
12319
+ * Multi Prompt
12320
+ * @description List of prompts for multi-shot video generation.
12321
+ * @example null
12322
+ */
12323
+ multi_prompt?: Components.KlingV3MultiPromptElement[];
12324
+ /**
12325
+ * Prompt
12326
+ * @description Text prompt for video generation. Required unless multi_prompt is provided.
12327
+ * @example A mecha lands on the ground to save the city, and says "I'm here", in anime style
12328
+ */
12329
+ prompt?: string;
12330
+ /**
12331
+ * Shot Type
12332
+ * @description The type of multi-shot video generation.
12333
+ * @default customize
12334
+ * @constant
12335
+ */
12336
+ shot_type?: 'customize';
12337
+ }
12338
+
12236
12339
  export interface SharedType_23c {
12237
12340
  /**
12238
12341
  * @description The generated video.
@@ -14693,6 +14796,7 @@ export interface OpenrouterRouterVideoEnterpriseInput {
14693
14796
  * @example google/gemini-2.0-flash-lite-001
14694
14797
  * @example google/gemini-3-flash-preview
14695
14798
  * @example google/gemini-3-pro-preview
14799
+ * @example google/gemini-3.1-pro-preview
14696
14800
  */
14697
14801
  model: string;
14698
14802
  /**
@@ -14745,6 +14849,7 @@ export interface OpenrouterRouterVideoInput {
14745
14849
  * @example google/gemini-2.5-pro
14746
14850
  * @example google/gemini-3-flash-preview
14747
14851
  * @example google/gemini-3-pro-preview
14852
+ * @example google/gemini-3.1-pro-preview
14748
14853
  */
14749
14854
  model: string;
14750
14855
  /**
@@ -14802,6 +14907,7 @@ export interface OpenrouterRouterAudioInput {
14802
14907
  * @example google/gemini-3-flash-preview
14803
14908
  * @example google/gemini-2.5-flash
14804
14909
  * @example google/gemini-3-pro-preview
14910
+ * @example google/gemini-3.1-pro-preview
14805
14911
  */
14806
14912
  model: string;
14807
14913
  /**
@@ -24220,16 +24326,8 @@ export interface TriposrInput {
24220
24326
  }
24221
24327
 
24222
24328
  export interface TriposrOutput {
24223
- /**
24224
- * Model Mesh
24225
- * @description Generated 3D object file.
24226
- */
24227
- model_mesh: Components.File_1;
24228
- /**
24229
- * Remeshing Dir
24230
- * @description Directory containing textures for the remeshed model.
24231
- */
24232
- remeshing_dir?: Components.File_1;
24329
+ model_mesh: Components.File;
24330
+ remeshing_dir?: Components.File;
24233
24331
  /**
24234
24332
  * Timings
24235
24333
  * @description Inference timings.
@@ -27052,7 +27150,13 @@ export interface SeedvrUpscaleVideoOutput {
27052
27150
  video: Components.File;
27053
27151
  }
27054
27152
 
27055
- export interface SeedvrUpscaleImageInput {
27153
+ export interface SeedvrUpscaleImageSeamlessInput {
27154
+ /**
27155
+ * Enable Safety Checker
27156
+ * @description If set to true, the safety checker will be enabled.
27157
+ * @default true
27158
+ */
27159
+ enable_safety_checker?: boolean;
27056
27160
  /**
27057
27161
  * Image Url
27058
27162
  * @description The input image to be processed
@@ -27068,10 +27172,10 @@ export interface SeedvrUpscaleImageInput {
27068
27172
  /**
27069
27173
  * Output Format
27070
27174
  * @description The format of the output image.
27071
- * @default jpg
27175
+ * @default png
27072
27176
  * @enum {string}
27073
27177
  */
27074
- output_format?: 'png' | 'jpg' | 'webp';
27178
+ output_format?: 'png' | 'jpeg' | 'webp';
27075
27179
  /**
27076
27180
  * Seed
27077
27181
  * @description The random seed used for the generation process.
@@ -27105,207 +27209,33 @@ export interface SeedvrUpscaleImageInput {
27105
27209
  upscale_mode?: 'target' | 'factor';
27106
27210
  }
27107
27211
 
27108
- export interface SeedvrUpscaleImageOutput {
27109
- /**
27110
- * @description Upscaled image file after processing
27111
- * @example {
27112
- * "content_type": "image/png",
27113
- * "url": "https://storage.googleapis.com/falserverless/example_outputs/seedvr2/image_out.png"
27114
- * }
27115
- */
27116
- image: Components.ImageFile;
27117
- /**
27118
- * Seed
27119
- * @description The random seed used for the generation process.
27120
- */
27121
- seed: number;
27122
- }
27212
+ export interface SeedvrUpscaleImageSeamlessOutput extends SharedType_920 {}
27123
27213
 
27124
- export interface SdxlControlnetUnionInpaintingInput {
27125
- /**
27126
- * Canny Image Url
27127
- * @description The URL of the control image.
27128
- * @example https://fal-cdn.batuhan-941.workers.dev/files/rabbit/MiN_j3St9B8esJleCZKMU.jpeg
27129
- */
27130
- canny_image_url?: string;
27131
- /**
27132
- * Canny Preprocess
27133
- * @description Whether to preprocess the canny image.
27134
- * @default true
27135
- */
27136
- canny_preprocess?: boolean;
27137
- /**
27138
- * Controlnet Conditioning Scale
27139
- * @description The scale of the controlnet conditioning.
27140
- * @default 0.5
27141
- */
27142
- controlnet_conditioning_scale?: number;
27143
- /**
27144
- * Depth Image Url
27145
- * @description The URL of the control image.
27146
- * @example https://fal-cdn.batuhan-941.workers.dev/files/rabbit/MiN_j3St9B8esJleCZKMU.jpeg
27147
- */
27148
- depth_image_url?: string;
27149
- /**
27150
- * Depth Preprocess
27151
- * @description Whether to preprocess the depth image.
27152
- * @default true
27153
- */
27154
- depth_preprocess?: boolean;
27155
- /**
27156
- * Embeddings
27157
- * @description The list of embeddings to use.
27158
- * @default []
27159
- */
27160
- embeddings?: Components.Embedding[];
27161
- /**
27162
- * Enable Safety Checker
27163
- * @description If set to true, the safety checker will be enabled.
27164
- * @default true
27165
- */
27166
- enable_safety_checker?: boolean;
27167
- /**
27168
- * Expand Prompt
27169
- * @description If set to true, the prompt will be expanded with additional prompts.
27170
- * @default false
27171
- */
27172
- expand_prompt?: boolean;
27173
- /**
27174
- * Format
27175
- * @description The format of the generated image.
27176
- * @default jpeg
27177
- * @enum {string}
27178
- */
27179
- format?: 'jpeg' | 'png';
27180
- /**
27181
- * Guidance scale (CFG)
27182
- * @description The CFG (Classifier Free Guidance) scale is a measure of how close you want
27183
- * the model to stick to your prompt when looking for a related image to show you.
27184
- * @default 7.5
27185
- */
27186
- guidance_scale?: number;
27187
- /**
27188
- * Image Size
27189
- * @description The size of the generated image. Leave it none to automatically infer from the control image.
27190
- * @example null
27191
- */
27192
- image_size?:
27193
- | Components.ImageSize
27194
- | (
27195
- | 'square_hd'
27196
- | 'square'
27197
- | 'portrait_4_3'
27198
- | 'portrait_16_9'
27199
- | 'landscape_4_3'
27200
- | 'landscape_16_9'
27201
- );
27214
+ export interface SeedvrUpscaleImageInput {
27202
27215
  /**
27203
27216
  * Image Url
27204
- * @description The URL of the image to use as a starting point for the generation.
27205
- * @example https://raw.githubusercontent.com/CompVis/latent-diffusion/main/data/inpainting_examples/overture-creations-5sI6fQgYIuo.png
27217
+ * @description The input image to be processed
27218
+ * @example https://storage.googleapis.com/falserverless/example_inputs/seedvr2/image_in.png
27206
27219
  */
27207
27220
  image_url: string;
27208
27221
  /**
27209
- * Loras
27210
- * @description The list of LoRA weights to use.
27211
- * @default []
27212
- */
27213
- loras?: Components.LoraWeight_1[];
27214
- /**
27215
- * Mask Url
27216
- * @description The URL of the mask to use for inpainting.
27217
- * @example https://raw.githubusercontent.com/CompVis/latent-diffusion/main/data/inpainting_examples/overture-creations-5sI6fQgYIuo_mask.png
27218
- */
27219
- mask_url: string;
27220
- /**
27221
- * Negative Prompt
27222
- * @description The negative prompt to use.Use it to address details that you don't want
27223
- * in the image. This could be colors, objects, scenery and even the small details
27224
- * (e.g. moustache, blurry, low resolution).
27225
- * @default
27226
- * @example cartoon, illustration, animation. face. male, female
27227
- */
27228
- negative_prompt?: string;
27229
- /**
27230
- * Normal Image Url
27231
- * @description The URL of the control image.
27232
- * @example https://fal-cdn.batuhan-941.workers.dev/files/rabbit/MiN_j3St9B8esJleCZKMU.jpeg
27233
- */
27234
- normal_image_url?: string;
27235
- /**
27236
- * Normal Preprocess
27237
- * @description Whether to preprocess the normal image.
27238
- * @default true
27239
- */
27240
- normal_preprocess?: boolean;
27241
- /**
27242
- * Num Images
27243
- * @description The number of images to generate.
27244
- * @default 1
27245
- */
27246
- num_images?: number;
27247
- /**
27248
- * Num Inference Steps
27249
- * @description The number of inference steps to perform.
27250
- * @default 35
27251
- */
27252
- num_inference_steps?: number;
27253
- /**
27254
- * Openpose Image Url
27255
- * @description The URL of the control image.
27256
- * @example https://fal-cdn.batuhan-941.workers.dev/files/rabbit/MiN_j3St9B8esJleCZKMU.jpeg
27257
- */
27258
- openpose_image_url?: string;
27259
- /**
27260
- * Openpose Preprocess
27261
- * @description Whether to preprocess the openpose image.
27262
- * @default true
27263
- */
27264
- openpose_preprocess?: boolean;
27265
- /**
27266
- * Prompt
27267
- * @description The prompt to use for generating the image. Be as descriptive as possible for best results.
27268
- * @example Ice fortress, aurora skies, polar wildlife, twilight
27269
- */
27270
- prompt: string;
27271
- /**
27272
- * Request Id
27273
- * @description An id bound to a request, can be used with response to identify the request
27274
- * itself.
27275
- * @default
27222
+ * Noise Scale
27223
+ * @description The noise scale to use for the generation process.
27224
+ * @default 0.1
27276
27225
  */
27277
- request_id?: string;
27226
+ noise_scale?: number;
27278
27227
  /**
27279
- * Safety Checker Version
27280
- * @description The version of the safety checker to use. v1 is the default CompVis safety checker. v2 uses a custom ViT model.
27281
- * @default v1
27228
+ * Output Format
27229
+ * @description The format of the output image.
27230
+ * @default jpg
27282
27231
  * @enum {string}
27283
27232
  */
27284
- safety_checker_version?: 'v1' | 'v2';
27233
+ output_format?: 'png' | 'jpg' | 'webp';
27285
27234
  /**
27286
27235
  * Seed
27287
- * @description The same seed and the same prompt given to the same version of Stable Diffusion
27288
- * will output the same image every time.
27236
+ * @description The random seed used for the generation process.
27289
27237
  */
27290
27238
  seed?: number;
27291
- /**
27292
- * Segmentation Image Url
27293
- * @description The URL of the control image.
27294
- * @example https://fal-cdn.batuhan-941.workers.dev/files/rabbit/MiN_j3St9B8esJleCZKMU.jpeg
27295
- */
27296
- segmentation_image_url?: string;
27297
- /**
27298
- * Segmentation Preprocess
27299
- * @description Whether to preprocess the segmentation image.
27300
- * @default true
27301
- */
27302
- segmentation_preprocess?: boolean;
27303
- /**
27304
- * Strength
27305
- * @description determines how much the generated image resembles the initial image
27306
- * @default 0.95
27307
- */
27308
- strength?: number;
27309
27239
  /**
27310
27240
  * Sync Mode
27311
27241
  * @description If `True`, the media will be returned as a data URI and the output data won't be available in the request history.
@@ -27313,22 +27243,30 @@ export interface SdxlControlnetUnionInpaintingInput {
27313
27243
  */
27314
27244
  sync_mode?: boolean;
27315
27245
  /**
27316
- * Teed Image Url
27317
- * @description The URL of the control image.
27318
- * @example https://fal-cdn.batuhan-941.workers.dev/files/rabbit/MiN_j3St9B8esJleCZKMU.jpeg
27246
+ * Target Resolution
27247
+ * @description The target resolution to upscale to when `upscale_mode` is `target`.
27248
+ * @default 1080p
27249
+ * @enum {string}
27319
27250
  */
27320
- teed_image_url?: string;
27251
+ target_resolution?: '720p' | '1080p' | '1440p' | '2160p';
27321
27252
  /**
27322
- * Teed Preprocess
27323
- * @description Whether to preprocess the teed image.
27324
- * @default true
27253
+ * Upscale Factor
27254
+ * @description Upscaling factor to be used. Will multiply the dimensions with this factor when `upscale_mode` is `factor`.
27255
+ * @default 2
27325
27256
  */
27326
- teed_preprocess?: boolean;
27257
+ upscale_factor?: number;
27258
+ /**
27259
+ * Upscale Mode
27260
+ * @description The mode to use for the upscale. If 'target', the upscale factor will be calculated based on the target resolution. If 'factor', the upscale factor will be used directly.
27261
+ * @default factor
27262
+ * @enum {string}
27263
+ */
27264
+ upscale_mode?: 'target' | 'factor';
27327
27265
  }
27328
27266
 
27329
- export interface SdxlControlnetUnionInpaintingOutput extends SharedType_7c6 {}
27267
+ export interface SeedvrUpscaleImageOutput extends SharedType_920 {}
27330
27268
 
27331
- export interface SdxlControlnetUnionImageToImageInput {
27269
+ export interface SdxlControlnetUnionInpaintingInput {
27332
27270
  /**
27333
27271
  * Canny Image Url
27334
27272
  * @description The URL of the control image.
@@ -27347,12 +27285,6 @@ export interface SdxlControlnetUnionImageToImageInput {
27347
27285
  * @default 0.5
27348
27286
  */
27349
27287
  controlnet_conditioning_scale?: number;
27350
- /**
27351
- * Crop Output
27352
- * @description If set to true, the output cropped to the proper aspect ratio after generating.
27353
- * @default false
27354
- */
27355
- crop_output?: boolean;
27356
27288
  /**
27357
27289
  * Depth Image Url
27358
27290
  * @description The URL of the control image.
@@ -27415,7 +27347,220 @@ export interface SdxlControlnetUnionImageToImageInput {
27415
27347
  /**
27416
27348
  * Image Url
27417
27349
  * @description The URL of the image to use as a starting point for the generation.
27418
- * @example https://fal-cdn.batuhan-941.workers.dev/files/tiger/IExuP-WICqaIesLZAZPur.jpeg
27350
+ * @example https://raw.githubusercontent.com/CompVis/latent-diffusion/main/data/inpainting_examples/overture-creations-5sI6fQgYIuo.png
27351
+ */
27352
+ image_url: string;
27353
+ /**
27354
+ * Loras
27355
+ * @description The list of LoRA weights to use.
27356
+ * @default []
27357
+ */
27358
+ loras?: Components.LoraWeight_1[];
27359
+ /**
27360
+ * Mask Url
27361
+ * @description The URL of the mask to use for inpainting.
27362
+ * @example https://raw.githubusercontent.com/CompVis/latent-diffusion/main/data/inpainting_examples/overture-creations-5sI6fQgYIuo_mask.png
27363
+ */
27364
+ mask_url: string;
27365
+ /**
27366
+ * Negative Prompt
27367
+ * @description The negative prompt to use.Use it to address details that you don't want
27368
+ * in the image. This could be colors, objects, scenery and even the small details
27369
+ * (e.g. moustache, blurry, low resolution).
27370
+ * @default
27371
+ * @example cartoon, illustration, animation. face. male, female
27372
+ */
27373
+ negative_prompt?: string;
27374
+ /**
27375
+ * Normal Image Url
27376
+ * @description The URL of the control image.
27377
+ * @example https://fal-cdn.batuhan-941.workers.dev/files/rabbit/MiN_j3St9B8esJleCZKMU.jpeg
27378
+ */
27379
+ normal_image_url?: string;
27380
+ /**
27381
+ * Normal Preprocess
27382
+ * @description Whether to preprocess the normal image.
27383
+ * @default true
27384
+ */
27385
+ normal_preprocess?: boolean;
27386
+ /**
27387
+ * Num Images
27388
+ * @description The number of images to generate.
27389
+ * @default 1
27390
+ */
27391
+ num_images?: number;
27392
+ /**
27393
+ * Num Inference Steps
27394
+ * @description The number of inference steps to perform.
27395
+ * @default 35
27396
+ */
27397
+ num_inference_steps?: number;
27398
+ /**
27399
+ * Openpose Image Url
27400
+ * @description The URL of the control image.
27401
+ * @example https://fal-cdn.batuhan-941.workers.dev/files/rabbit/MiN_j3St9B8esJleCZKMU.jpeg
27402
+ */
27403
+ openpose_image_url?: string;
27404
+ /**
27405
+ * Openpose Preprocess
27406
+ * @description Whether to preprocess the openpose image.
27407
+ * @default true
27408
+ */
27409
+ openpose_preprocess?: boolean;
27410
+ /**
27411
+ * Prompt
27412
+ * @description The prompt to use for generating the image. Be as descriptive as possible for best results.
27413
+ * @example Ice fortress, aurora skies, polar wildlife, twilight
27414
+ */
27415
+ prompt: string;
27416
+ /**
27417
+ * Request Id
27418
+ * @description An id bound to a request, can be used with response to identify the request
27419
+ * itself.
27420
+ * @default
27421
+ */
27422
+ request_id?: string;
27423
+ /**
27424
+ * Safety Checker Version
27425
+ * @description The version of the safety checker to use. v1 is the default CompVis safety checker. v2 uses a custom ViT model.
27426
+ * @default v1
27427
+ * @enum {string}
27428
+ */
27429
+ safety_checker_version?: 'v1' | 'v2';
27430
+ /**
27431
+ * Seed
27432
+ * @description The same seed and the same prompt given to the same version of Stable Diffusion
27433
+ * will output the same image every time.
27434
+ */
27435
+ seed?: number;
27436
+ /**
27437
+ * Segmentation Image Url
27438
+ * @description The URL of the control image.
27439
+ * @example https://fal-cdn.batuhan-941.workers.dev/files/rabbit/MiN_j3St9B8esJleCZKMU.jpeg
27440
+ */
27441
+ segmentation_image_url?: string;
27442
+ /**
27443
+ * Segmentation Preprocess
27444
+ * @description Whether to preprocess the segmentation image.
27445
+ * @default true
27446
+ */
27447
+ segmentation_preprocess?: boolean;
27448
+ /**
27449
+ * Strength
27450
+ * @description determines how much the generated image resembles the initial image
27451
+ * @default 0.95
27452
+ */
27453
+ strength?: number;
27454
+ /**
27455
+ * Sync Mode
27456
+ * @description If `True`, the media will be returned as a data URI and the output data won't be available in the request history.
27457
+ * @default false
27458
+ */
27459
+ sync_mode?: boolean;
27460
+ /**
27461
+ * Teed Image Url
27462
+ * @description The URL of the control image.
27463
+ * @example https://fal-cdn.batuhan-941.workers.dev/files/rabbit/MiN_j3St9B8esJleCZKMU.jpeg
27464
+ */
27465
+ teed_image_url?: string;
27466
+ /**
27467
+ * Teed Preprocess
27468
+ * @description Whether to preprocess the teed image.
27469
+ * @default true
27470
+ */
27471
+ teed_preprocess?: boolean;
27472
+ }
27473
+
27474
+ export interface SdxlControlnetUnionInpaintingOutput extends SharedType_7c6 {}
27475
+
27476
+ export interface SdxlControlnetUnionImageToImageInput {
27477
+ /**
27478
+ * Canny Image Url
27479
+ * @description The URL of the control image.
27480
+ * @example https://fal-cdn.batuhan-941.workers.dev/files/rabbit/MiN_j3St9B8esJleCZKMU.jpeg
27481
+ */
27482
+ canny_image_url?: string;
27483
+ /**
27484
+ * Canny Preprocess
27485
+ * @description Whether to preprocess the canny image.
27486
+ * @default true
27487
+ */
27488
+ canny_preprocess?: boolean;
27489
+ /**
27490
+ * Controlnet Conditioning Scale
27491
+ * @description The scale of the controlnet conditioning.
27492
+ * @default 0.5
27493
+ */
27494
+ controlnet_conditioning_scale?: number;
27495
+ /**
27496
+ * Crop Output
27497
+ * @description If set to true, the output cropped to the proper aspect ratio after generating.
27498
+ * @default false
27499
+ */
27500
+ crop_output?: boolean;
27501
+ /**
27502
+ * Depth Image Url
27503
+ * @description The URL of the control image.
27504
+ * @example https://fal-cdn.batuhan-941.workers.dev/files/rabbit/MiN_j3St9B8esJleCZKMU.jpeg
27505
+ */
27506
+ depth_image_url?: string;
27507
+ /**
27508
+ * Depth Preprocess
27509
+ * @description Whether to preprocess the depth image.
27510
+ * @default true
27511
+ */
27512
+ depth_preprocess?: boolean;
27513
+ /**
27514
+ * Embeddings
27515
+ * @description The list of embeddings to use.
27516
+ * @default []
27517
+ */
27518
+ embeddings?: Components.Embedding[];
27519
+ /**
27520
+ * Enable Safety Checker
27521
+ * @description If set to true, the safety checker will be enabled.
27522
+ * @default true
27523
+ */
27524
+ enable_safety_checker?: boolean;
27525
+ /**
27526
+ * Expand Prompt
27527
+ * @description If set to true, the prompt will be expanded with additional prompts.
27528
+ * @default false
27529
+ */
27530
+ expand_prompt?: boolean;
27531
+ /**
27532
+ * Format
27533
+ * @description The format of the generated image.
27534
+ * @default jpeg
27535
+ * @enum {string}
27536
+ */
27537
+ format?: 'jpeg' | 'png';
27538
+ /**
27539
+ * Guidance scale (CFG)
27540
+ * @description The CFG (Classifier Free Guidance) scale is a measure of how close you want
27541
+ * the model to stick to your prompt when looking for a related image to show you.
27542
+ * @default 7.5
27543
+ */
27544
+ guidance_scale?: number;
27545
+ /**
27546
+ * Image Size
27547
+ * @description The size of the generated image. Leave it none to automatically infer from the control image.
27548
+ * @example null
27549
+ */
27550
+ image_size?:
27551
+ | Components.ImageSize
27552
+ | (
27553
+ | 'square_hd'
27554
+ | 'square'
27555
+ | 'portrait_4_3'
27556
+ | 'portrait_16_9'
27557
+ | 'landscape_4_3'
27558
+ | 'landscape_16_9'
27559
+ );
27560
+ /**
27561
+ * Image Url
27562
+ * @description The URL of the image to use as a starting point for the generation.
27563
+ * @example https://fal-cdn.batuhan-941.workers.dev/files/tiger/IExuP-WICqaIesLZAZPur.jpeg
27419
27564
  */
27420
27565
  image_url: string;
27421
27566
  /**
@@ -28138,15 +28283,15 @@ export interface Sam2VideoInput {
28138
28283
  * @example [
28139
28284
  * {
28140
28285
  * "y": 350,
28141
- * "label": 1,
28286
+ * "x": 210,
28142
28287
  * "frame_index": 0,
28143
- * "x": 210
28288
+ * "label": 1
28144
28289
  * },
28145
28290
  * {
28146
28291
  * "y": 220,
28147
- * "label": 1,
28292
+ * "x": 250,
28148
28293
  * "frame_index": 0,
28149
- * "x": 250
28294
+ * "label": 1
28150
28295
  * }
28151
28296
  * ]
28152
28297
  */
@@ -28205,8 +28350,8 @@ export interface Sam2ImageInput {
28205
28350
  * @example [
28206
28351
  * {
28207
28352
  * "y": 375,
28208
- * "label": 1,
28209
- * "x": 500
28353
+ * "x": 500,
28354
+ * "label": 1
28210
28355
  * }
28211
28356
  * ]
28212
28357
  */
@@ -35101,7 +35246,7 @@ export interface PixverseExtendFastInput {
35101
35246
  * @default v4.5
35102
35247
  * @enum {string}
35103
35248
  */
35104
- model?: 'v3.5' | 'v4' | 'v4.5' | 'v5' | 'v5.5' | 'v5.6';
35249
+ model?: 'v3.5' | 'v4' | 'v4.5' | 'v5' | 'v5.5' | 'v5.6' | 'v6';
35105
35250
  /**
35106
35251
  * Negative Prompt
35107
35252
  * @description Negative prompt to be used for the generation
@@ -35155,7 +35300,7 @@ export interface PixverseExtendInput {
35155
35300
  * @default v4.5
35156
35301
  * @enum {string}
35157
35302
  */
35158
- model?: 'v3.5' | 'v4' | 'v4.5' | 'v5' | 'v5.5' | 'v5.6';
35303
+ model?: 'v3.5' | 'v4' | 'v4.5' | 'v5' | 'v5.5' | 'v5.6' | 'v6';
35159
35304
  /**
35160
35305
  * Negative Prompt
35161
35306
  * @description Negative prompt to be used for the generation
@@ -36138,6 +36283,196 @@ export interface PhotomakerOutput {
36138
36283
  seed: number;
36139
36284
  }
36140
36285
 
36286
+ export interface PhotaEnhanceInput {
36287
+ /**
36288
+ * Image URL
36289
+ * @description URL/Base64 data URI of the image to enhance.
36290
+ * @example https://v3b.fal.media/files/b/0a8b911d/Abk8vStrvmSPlzUqI_NN3_image_043.png
36291
+ */
36292
+ image_url: string;
36293
+ /**
36294
+ * Number of Images
36295
+ * @description Number of images to generate.
36296
+ * @default 1
36297
+ */
36298
+ num_images?: number;
36299
+ /**
36300
+ * Output Format
36301
+ * @description The format of the generated image.
36302
+ * @default jpeg
36303
+ * @enum {string}
36304
+ */
36305
+ output_format?: 'jpeg' | 'png' | 'webp';
36306
+ /**
36307
+ * Profile Ids
36308
+ * @description List of profile IDs to use for the image enhancement. The profiles sent over will be used as candidates for identity preservation.
36309
+ */
36310
+ profile_ids?: string[];
36311
+ /**
36312
+ * Sync Mode
36313
+ * @description If `True`, the media will be returned as a data URI and the output data won't be available in the request history.
36314
+ * @default false
36315
+ */
36316
+ sync_mode?: boolean;
36317
+ }
36318
+
36319
+ export interface PhotaEnhanceOutput {
36320
+ /**
36321
+ * Images
36322
+ * @description The URL of the enhanced image.
36323
+ * @example [
36324
+ * {
36325
+ * "url": "https://v3b.fal.media/files/b/0a8b911d/XMqiVoO2ECXUZEUYmPl2l.jpg"
36326
+ * }
36327
+ * ]
36328
+ */
36329
+ images: Components.ImageFile[];
36330
+ }
36331
+
36332
+ export interface PhotaEditInput {
36333
+ /**
36334
+ * Aspect Ratio
36335
+ * @description Aspect ratio of the generated image.
36336
+ * @default auto
36337
+ * @enum {string}
36338
+ */
36339
+ aspect_ratio?: 'auto' | '1:1' | '16:9' | '4:3' | '3:4' | '9:16';
36340
+ /**
36341
+ * Image URLs
36342
+ * @description List of URLs/ Base64 data URIs of the images to edit. A maximum of 10 images are supported, additional images will be ignored.
36343
+ * @example [
36344
+ * "https://v3b.fal.media/files/b/0a8b911d/Abk8vStrvmSPlzUqI_NN3_image_043.png"
36345
+ * ]
36346
+ */
36347
+ image_urls?: string[];
36348
+ /**
36349
+ * Number of Images
36350
+ * @description Number of images to generate.
36351
+ * @default 1
36352
+ */
36353
+ num_images?: number;
36354
+ /**
36355
+ * Output Format
36356
+ * @description The format of the generated image.
36357
+ * @default jpeg
36358
+ * @enum {string}
36359
+ */
36360
+ output_format?: 'jpeg' | 'png' | 'webp';
36361
+ /**
36362
+ * Profile Ids
36363
+ * @description List of profile IDs to use for the image generation. Profiles may be tagged in the prompt as @Profile1, @Profile2, etc.
36364
+ */
36365
+ profile_ids?: string[];
36366
+ /**
36367
+ * Prompt
36368
+ * @description Text description of the desired image. To refer to specific profiles, use @Profile1, @Profile2, etc. Profiles will not be applied if not referenced in the prompt.
36369
+ * @example Make this scene more realistic but still keep the game vibes
36370
+ */
36371
+ prompt: string;
36372
+ /**
36373
+ * Resolution
36374
+ * @description Resolution of the generated image.
36375
+ * @default 1K
36376
+ * @enum {string}
36377
+ */
36378
+ resolution?: '1K' | '4K';
36379
+ /**
36380
+ * Sync Mode
36381
+ * @description If `True`, the media will be returned as a data URI and the output data won't be available in the request history.
36382
+ * @default false
36383
+ */
36384
+ sync_mode?: boolean;
36385
+ }
36386
+
36387
+ export interface PhotaEditOutput {
36388
+ /**
36389
+ * Images
36390
+ * @description The URL of the edited image.
36391
+ * @example [
36392
+ * {
36393
+ * "url": "https://v3b.fal.media/files/b/0a8b911d/XMqiVoO2ECXUZEUYmPl2l.jpg"
36394
+ * }
36395
+ * ]
36396
+ */
36397
+ images: Components.ImageFile[];
36398
+ }
36399
+
36400
+ export interface PhotaCreateProfileInput {
36401
+ /**
36402
+ * Image ZIP URL
36403
+ * @description URL to a ZIP archive containing the profile images.
36404
+ */
36405
+ image_data_url: string;
36406
+ }
36407
+
36408
+ export interface PhotaCreateProfileOutput {
36409
+ /**
36410
+ * Profile Id
36411
+ * @description The Photalabs profile ID.
36412
+ */
36413
+ profile_id: string;
36414
+ }
36415
+
36416
+ export interface PhotaInput {
36417
+ /**
36418
+ * Aspect Ratio
36419
+ * @description Aspect ratio of the generated image.
36420
+ * @default auto
36421
+ * @enum {string}
36422
+ */
36423
+ aspect_ratio?: 'auto' | '1:1' | '16:9' | '4:3' | '3:4' | '9:16';
36424
+ /**
36425
+ * Number of Images
36426
+ * @description Number of images to generate.
36427
+ * @default 1
36428
+ */
36429
+ num_images?: number;
36430
+ /**
36431
+ * Output Format
36432
+ * @description The format of the generated image.
36433
+ * @default jpeg
36434
+ * @enum {string}
36435
+ */
36436
+ output_format?: 'jpeg' | 'png' | 'webp';
36437
+ /**
36438
+ * Profile Ids
36439
+ * @description List of profile IDs to use for the image generation. Profiles may be tagged in the prompt as @Profile1, @Profile2, etc.
36440
+ */
36441
+ profile_ids?: string[];
36442
+ /**
36443
+ * Prompt
36444
+ * @description Text description of the desired image. In case you wish to use specific profiles, refer to them as @Profile1, @Profile2, etc. Profiles will not be applied if not referenced in the prompt.
36445
+ * @example Middle Eastern man in traditional clothing sitting in a cool tent in the desert with a laptop
36446
+ */
36447
+ prompt: string;
36448
+ /**
36449
+ * Resolution
36450
+ * @description Resolution of the generated image.
36451
+ * @default 1K
36452
+ * @enum {string}
36453
+ */
36454
+ resolution?: '1K' | '4K';
36455
+ /**
36456
+ * Sync Mode
36457
+ * @description If `True`, the media will be returned as a data URI and the output data won't be available in the request history.
36458
+ * @default false
36459
+ */
36460
+ sync_mode?: boolean;
36461
+ }
36462
+
36463
+ export interface PhotaOutput {
36464
+ /**
36465
+ * Images
36466
+ * @description The URL of the generated image.
36467
+ * @example [
36468
+ * {
36469
+ * "url": "https://v3b.fal.media/files/b/0a8b90b7/9avg_nKJmcVinjQHJR_Ja.jpg"
36470
+ * }
36471
+ * ]
36472
+ */
36473
+ images: Components.ImageFile[];
36474
+ }
36475
+
36141
36476
  export interface PersonaplexRealtimeInput {
36142
36477
  /**
36143
36478
  * Audio
@@ -37315,7 +37650,7 @@ export interface ObjectRemovalMaskInput {
37315
37650
  model?: 'low_quality' | 'medium_quality' | 'high_quality' | 'best_quality';
37316
37651
  }
37317
37652
 
37318
- export interface ObjectRemovalMaskOutput extends SharedType_cf1 {}
37653
+ export interface ObjectRemovalMaskOutput extends SharedType_386 {}
37319
37654
 
37320
37655
  export interface ObjectRemovalBboxInput {
37321
37656
  /**
@@ -37352,7 +37687,7 @@ export interface ObjectRemovalBboxInput {
37352
37687
  model?: 'low_quality' | 'medium_quality' | 'high_quality' | 'best_quality';
37353
37688
  }
37354
37689
 
37355
- export interface ObjectRemovalBboxOutput extends SharedType_cf1 {}
37690
+ export interface ObjectRemovalBboxOutput extends SharedType_386 {}
37356
37691
 
37357
37692
  export interface ObjectRemovalInput {
37358
37693
  /**
@@ -37381,7 +37716,7 @@ export interface ObjectRemovalInput {
37381
37716
  prompt: string;
37382
37717
  }
37383
37718
 
37384
- export interface ObjectRemovalOutput extends SharedType_cf1 {}
37719
+ export interface ObjectRemovalOutput extends SharedType_386 {}
37385
37720
 
37386
37721
  export interface NovaSrInput {
37387
37722
  /**
@@ -49036,9 +49371,9 @@ export interface LightxRelightInput {
49036
49371
  /**
49037
49372
  * @description Relighting parameters (required for relight_condition_type='ic'). Not used for 'bg' (which expects a background image URL instead).
49038
49373
  * @example {
49039
- * "relight_prompt": "Sunlight",
49040
- * "bg_source": "Right",
49041
49374
  * "use_sky_mask": false,
49375
+ * "bg_source": "Right",
49376
+ * "relight_prompt": "Sunlight",
49042
49377
  * "cfg": 2
49043
49378
  * }
49044
49379
  */
@@ -50618,11 +50953,6 @@ export interface KlingVideoV3StandardTextToVideoInput {
50618
50953
  * @enum {string}
50619
50954
  */
50620
50955
  shot_type?: 'customize' | 'intelligent';
50621
- /**
50622
- * Voice Ids
50623
- * @description Optional Voice IDs for video generation. Reference voices in your prompt with <<<voice_1>>> and <<<voice_2>>> (maximum 2 voices per task). Get voice IDs from the kling video create-voice endpoint: https://fal.ai/models/fal-ai/kling-video/create-voice
50624
- */
50625
- voice_ids?: string[];
50626
50956
  }
50627
50957
 
50628
50958
  export interface KlingVideoV3StandardTextToVideoOutput {
@@ -50760,11 +51090,6 @@ export interface KlingVideoV3StandardImageToVideoInput {
50760
51090
  * @example https://storage.googleapis.com/falserverless/example_inputs/kling-v3/standard-i2v/start_image.png
50761
51091
  */
50762
51092
  start_image_url: string;
50763
- /**
50764
- * Voice Ids
50765
- * @description Optional Voice IDs for video generation. Reference voices in your prompt with <<<voice_1>>> and <<<voice_2>>> (maximum 2 voices per task). Get voice IDs from the kling video create-voice endpoint: https://fal.ai/models/fal-ai/kling-video/create-voice
50766
- */
50767
- voice_ids?: string[];
50768
51093
  }
50769
51094
 
50770
51095
  export interface KlingVideoV3StandardImageToVideoOutput {
@@ -50832,11 +51157,6 @@ export interface KlingVideoV3ProTextToVideoInput {
50832
51157
  * @enum {string}
50833
51158
  */
50834
51159
  shot_type?: 'customize' | 'intelligent';
50835
- /**
50836
- * Voice Ids
50837
- * @description Optional Voice IDs for video generation. Reference voices in your prompt with <<<voice_1>>> and <<<voice_2>>> (maximum 2 voices per task). Get voice IDs from the kling video create-voice endpoint: https://fal.ai/models/fal-ai/kling-video/create-voice
50838
- */
50839
- voice_ids?: string[];
50840
51160
  }
50841
51161
 
50842
51162
  export interface KlingVideoV3ProTextToVideoOutput {
@@ -50974,11 +51294,6 @@ export interface KlingVideoV3ProImageToVideoInput {
50974
51294
  * @example https://storage.googleapis.com/falserverless/example_inputs/kling-v3/pro-i2v/start_image.png
50975
51295
  */
50976
51296
  start_image_url: string;
50977
- /**
50978
- * Voice Ids
50979
- * @description Optional Voice IDs for video generation. Reference voices in your prompt with <<<voice_1>>> and <<<voice_2>>> (maximum 2 voices per task). Get voice IDs from the kling video create-voice endpoint: https://fal.ai/models/fal-ai/kling-video/create-voice
50980
- */
50981
- voice_ids?: string[];
50982
51297
  }
50983
51298
 
50984
51299
  export interface KlingVideoV3ProImageToVideoOutput {
@@ -52004,7 +52319,7 @@ export interface KlingVideoO3StandardVideoToVideoEditOutput {
52004
52319
  video: Components.File;
52005
52320
  }
52006
52321
 
52007
- export interface KlingVideoO3StandardTextToVideoInput extends SharedType_89f {}
52322
+ export interface KlingVideoO3StandardTextToVideoInput extends SharedType_25d {}
52008
52323
 
52009
52324
  export interface KlingVideoO3StandardTextToVideoOutput extends SharedType_723 {}
52010
52325
 
@@ -52246,7 +52561,7 @@ export interface KlingVideoO3ProVideoToVideoEditOutput {
52246
52561
  video: Components.File;
52247
52562
  }
52248
52563
 
52249
- export interface KlingVideoO3ProTextToVideoInput extends SharedType_89f {}
52564
+ export interface KlingVideoO3ProTextToVideoInput extends SharedType_25d {}
52250
52565
 
52251
52566
  export interface KlingVideoO3ProTextToVideoOutput extends SharedType_723 {}
52252
52567
 
@@ -53244,6 +53559,152 @@ export interface IpAdapterFaceIdInput {
53244
53559
 
53245
53560
  export interface IpAdapterFaceIdOutput extends SharedType_678 {}
53246
53561
 
53562
+ export interface InworldTtsInput {
53563
+ /**
53564
+ * Sample Rate Hertz
53565
+ * @description The sample rate in Hz for the output audio.
53566
+ * @default 48000
53567
+ * @enum {integer}
53568
+ */
53569
+ sample_rate_hertz?: 8000 | 16000 | 24000 | 32000 | 40000 | 48000;
53570
+ /**
53571
+ * Text
53572
+ * @description The text to synthesize into speech.
53573
+ * @example Hello! This is a demo of Inworld's TTS.
53574
+ */
53575
+ text: string;
53576
+ /**
53577
+ * Voice
53578
+ * @description The voice to use for synthesis.
53579
+ * @default Craig (en)
53580
+ * @enum {string}
53581
+ */
53582
+ voice?:
53583
+ | 'Loretta (en)'
53584
+ | 'Darlene (en)'
53585
+ | 'Marlene (en)'
53586
+ | 'Hank (en)'
53587
+ | 'Evelyn (en)'
53588
+ | 'Celeste (en)'
53589
+ | 'Pippa (en)'
53590
+ | 'Tessa (en)'
53591
+ | 'Liam (en)'
53592
+ | 'Callum (en)'
53593
+ | 'Hamish (en)'
53594
+ | 'Abby (en)'
53595
+ | 'Graham (en)'
53596
+ | 'Rupert (en)'
53597
+ | 'Mortimer (en)'
53598
+ | 'Snik (en)'
53599
+ | 'Anjali (en)'
53600
+ | 'Saanvi (en)'
53601
+ | 'Arjun (en)'
53602
+ | 'Claire (en)'
53603
+ | 'Oliver (en)'
53604
+ | 'Simon (en)'
53605
+ | 'Elliot (en)'
53606
+ | 'James (en)'
53607
+ | 'Serena (en)'
53608
+ | 'Gareth (en)'
53609
+ | 'Vinny (en)'
53610
+ | 'Lauren (en)'
53611
+ | 'Jessica (en)'
53612
+ | 'Ethan (en)'
53613
+ | 'Tyler (en)'
53614
+ | 'Jason (en)'
53615
+ | 'Chloe (en)'
53616
+ | 'Veronica (en)'
53617
+ | 'Victoria (en)'
53618
+ | 'Miranda (en)'
53619
+ | 'Sebastian (en)'
53620
+ | 'Victor (en)'
53621
+ | 'Malcolm (en)'
53622
+ | 'Kayla (en)'
53623
+ | 'Nate (en)'
53624
+ | 'Jake (en)'
53625
+ | 'Brian (en)'
53626
+ | 'Amina (en)'
53627
+ | 'Kelsey (en)'
53628
+ | 'Derek (en)'
53629
+ | 'Grant (en)'
53630
+ | 'Evan (en)'
53631
+ | 'Alex (en)'
53632
+ | 'Ashley (en)'
53633
+ | 'Craig (en)'
53634
+ | 'Deborah (en)'
53635
+ | 'Dennis (en)'
53636
+ | 'Edward (en)'
53637
+ | 'Elizabeth (en)'
53638
+ | 'Hades (en)'
53639
+ | 'Julia (en)'
53640
+ | 'Pixie (en)'
53641
+ | 'Mark (en)'
53642
+ | 'Olivia (en)'
53643
+ | 'Priya (en)'
53644
+ | 'Ronald (en)'
53645
+ | 'Sarah (en)'
53646
+ | 'Shaun (en)'
53647
+ | 'Theodore (en)'
53648
+ | 'Timothy (en)'
53649
+ | 'Wendy (en)'
53650
+ | 'Dominus (en)'
53651
+ | 'Hana (en)'
53652
+ | 'Clive (en)'
53653
+ | 'Carter (en)'
53654
+ | 'Blake (en)'
53655
+ | 'Luna (en)'
53656
+ | 'Yichen (zh)'
53657
+ | 'Xiaoyin (zh)'
53658
+ | 'Xinyi (zh)'
53659
+ | 'Jing (zh)'
53660
+ | 'Erik (nl)'
53661
+ | 'Katrien (nl)'
53662
+ | 'Lennart (nl)'
53663
+ | 'Lore (nl)'
53664
+ | 'Alain (fr)'
53665
+ | 'Hélène (fr)'
53666
+ | 'Mathieu (fr)'
53667
+ | 'Étienne (fr)'
53668
+ | 'Johanna (de)'
53669
+ | 'Josef (de)'
53670
+ | 'Gianni (it)'
53671
+ | 'Orietta (it)'
53672
+ | 'Asuka (ja)'
53673
+ | 'Satoshi (ja)'
53674
+ | 'Hyunwoo (ko)'
53675
+ | 'Minji (ko)'
53676
+ | 'Seojun (ko)'
53677
+ | 'Yoona (ko)'
53678
+ | 'Szymon (pl)'
53679
+ | 'Wojciech (pl)'
53680
+ | 'Heitor (pt)'
53681
+ | 'Maitê (pt)'
53682
+ | 'Diego (es)'
53683
+ | 'Lupita (es)'
53684
+ | 'Miguel (es)'
53685
+ | 'Rafael (es)'
53686
+ | 'Svetlana (ru)'
53687
+ | 'Elena (ru)'
53688
+ | 'Dmitry (ru)'
53689
+ | 'Nikolai (ru)'
53690
+ | 'Riya (hi)'
53691
+ | 'Manoj (hi)'
53692
+ | 'Yael (he)'
53693
+ | 'Oren (he)'
53694
+ | 'Nour (ar)'
53695
+ | 'Omar (ar)';
53696
+ }
53697
+
53698
+ export interface InworldTtsOutput {
53699
+ /**
53700
+ * @description Generated audio file.
53701
+ * @example {
53702
+ * "url": "https://v3b.fal.media/files/b/0a920730/38aud4s6sF7bOWFoQHaJk_tmpvv2htrpc.wav"
53703
+ * }
53704
+ */
53705
+ audio: Components.File;
53706
+ }
53707
+
53247
53708
  export interface InvisibleWatermarkInput {
53248
53709
  /**
53249
53710
  * Decode
@@ -58187,6 +58648,103 @@ export interface Hyper3dRodinOutput {
58187
58648
  textures: Components.Image[];
58188
58649
  }
58189
58650
 
58651
+ export interface HyWuEditInput {
58652
+ /**
58653
+ * Enable Safety Checker
58654
+ * @description If set to true, the safety checker will be enabled.
58655
+ * @default true
58656
+ */
58657
+ enable_safety_checker?: boolean;
58658
+ /**
58659
+ * Enable Thinking
58660
+ * @description Enable thinking mode. The model reasons about the edit before generating, producing higher quality results at the cost of longer inference time. Disable for faster results on straightforward edits.
58661
+ * @default true
58662
+ */
58663
+ enable_thinking?: boolean;
58664
+ /**
58665
+ * Image Size
58666
+ * @description The desired size of the generated image. If auto, image size will be determined by the model.
58667
+ * @default auto
58668
+ */
58669
+ image_size?:
58670
+ | Components.ImageSize
58671
+ | (
58672
+ | 'auto'
58673
+ | 'square_hd'
58674
+ | 'square'
58675
+ | 'portrait_4_3'
58676
+ | 'portrait_16_9'
58677
+ | 'landscape_4_3'
58678
+ | 'landscape_16_9'
58679
+ );
58680
+ /**
58681
+ * Image Urls
58682
+ * @description URLs of input images for editing. Typically 2 images: the base image and the reference image. Supports up to 3 images.
58683
+ * @example [
58684
+ * "https://v3b.fal.media/files/b/0a933dff/BE-FgBximAbCJzZSgDNNw_input_1_1.png",
58685
+ * "https://v3b.fal.media/files/b/0a933dff/fNUqzO_Lxwvr-_-4BLeCV_input_1_2.png"
58686
+ * ]
58687
+ */
58688
+ image_urls: string[];
58689
+ /**
58690
+ * Num Images
58691
+ * @description The number of images to generate.
58692
+ * @default 1
58693
+ */
58694
+ num_images?: number;
58695
+ /**
58696
+ * Num Inference Steps
58697
+ * @description Number of diffusion denoising steps.
58698
+ * @default 30
58699
+ */
58700
+ num_inference_steps?: number;
58701
+ /**
58702
+ * Output Format
58703
+ * @description The format of the generated image.
58704
+ * @default png
58705
+ * @enum {string}
58706
+ */
58707
+ output_format?: 'jpeg' | 'png';
58708
+ /**
58709
+ * Prompt
58710
+ * @description The text prompt describing the desired edit. Supports both English and Chinese. Use specific instructions like 'Replace the clothing on figure 1 with the outfit from figure 2'.
58711
+ * @example Using image 1 as the base image, replace the outfit with the clothing from image 2 while keeping the subject, pose, and background unchanged.
58712
+ */
58713
+ prompt: string;
58714
+ /**
58715
+ * Seed
58716
+ * @description Random seed for reproducible results. If None, a random seed is used.
58717
+ */
58718
+ seed?: number;
58719
+ /**
58720
+ * Sync Mode
58721
+ * @description If True, the media will be returned as a data URI.
58722
+ * @default false
58723
+ */
58724
+ sync_mode?: boolean;
58725
+ }
58726
+
58727
+ export interface HyWuEditOutput {
58728
+ /**
58729
+ * Images
58730
+ * @description A list of the generated/edited images.
58731
+ */
58732
+ images: Components.Image[];
58733
+ /**
58734
+ * Seed
58735
+ * @description The seed used for generation.
58736
+ */
58737
+ seed: number;
58738
+ /**
58739
+ * Timings
58740
+ * @description Performance timing breakdown.
58741
+ * @default {}
58742
+ */
58743
+ timings?: {
58744
+ [key: string]: number;
58745
+ };
58746
+ }
58747
+
58190
58748
  export interface Hunyuan3dV2TurboInput extends SharedType_df1 {}
58191
58749
 
58192
58750
  export interface Hunyuan3dV2TurboOutput extends SharedType_7c0 {}
@@ -61787,70 +62345,268 @@ export interface GenfocusAllInFocusInput {
61787
62345
  target_long_side?: number;
61788
62346
  }
61789
62347
 
61790
- export interface GenfocusAllInFocusOutput extends SharedType_951 {}
62348
+ export interface GenfocusAllInFocusOutput extends SharedType_951 {}
62349
+
62350
+ export interface GenfocusInput {
62351
+ /**
62352
+ * Blur Strength (K)
62353
+ * @description Blur strength (K value). Higher values produce stronger bokeh/defocus. Set to 0 to only perform all-in-focus (deblur) estimation without bokeh.
62354
+ * @default 8
62355
+ */
62356
+ blur_strength?: number;
62357
+ /**
62358
+ * Enable Safety Checker
62359
+ * @description If set to true, the safety checker will be enabled.
62360
+ * @default true
62361
+ */
62362
+ enable_safety_checker?: boolean;
62363
+ /**
62364
+ * Focus Point
62365
+ * @description The [x, y] coordinates of the focus point as fractions of image dimensions (0.0 to 1.0). For example, [0.5, 0.5] means center. If not provided, the center of the image is used.
62366
+ * @default [
62367
+ * 0.5,
62368
+ * 0.5
62369
+ * ]
62370
+ */
62371
+ focus_point?: number[];
62372
+ /**
62373
+ * Image URL
62374
+ * @description The URL of the image to refocus.
62375
+ * @example https://v3b.fal.media/files/b/0a8e76a1/AOVtMeklSm7IGoCTMd2T3.jpg
62376
+ */
62377
+ image_url: string;
62378
+ /**
62379
+ * Num Inference Steps
62380
+ * @description The number of inference steps to perform. Lower values are faster but may reduce quality.
62381
+ * @default 28
62382
+ */
62383
+ num_inference_steps?: number;
62384
+ /**
62385
+ * Output Format
62386
+ * @description The format of the generated image.
62387
+ * @default jpeg
62388
+ * @enum {string}
62389
+ */
62390
+ output_format?: 'jpeg' | 'png';
62391
+ /**
62392
+ * Seed
62393
+ * @description The same seed and the same input given to the same version of the model will output the same image every time.
62394
+ */
62395
+ seed?: number;
62396
+ /**
62397
+ * Sync Mode
62398
+ * @description If `True`, the media will be returned as a data URI and the output data won't be available in the request history.
62399
+ * @default false
62400
+ */
62401
+ sync_mode?: boolean;
62402
+ /**
62403
+ * Target Long Side
62404
+ * @description Resize the image so the longer side matches this value (in pixels). If not set, the original resolution is used (aligned to 16px). Recommended range: 512 to 2000.
62405
+ * @default 512
62406
+ */
62407
+ target_long_side?: number;
62408
+ }
62409
+
62410
+ export interface GenfocusOutput extends SharedType_951 {}
61791
62411
 
61792
- export interface GenfocusInput {
62412
+ export interface GeminiTtsInput {
61793
62413
  /**
61794
- * Blur Strength (K)
61795
- * @description Blur strength (K value). Higher values produce stronger bokeh/defocus. Set to 0 to only perform all-in-focus (deblur) estimation without bokeh.
61796
- * @default 8
62414
+ * Language Code
62415
+ * @description Language for multilingual synthesis. When set, steers the model to speak in the specified language. Supports 24 GA languages and 60+ Preview languages. If not set, the model auto-detects the language from the text.
62416
+ * @example English (US)
62417
+ * @example French (France)
62418
+ * @example Japanese (Japan)
61797
62419
  */
61798
- blur_strength?: number;
62420
+ language_code?:
62421
+ | 'Arabic (Egypt)'
62422
+ | 'Bangla (Bangladesh)'
62423
+ | 'Dutch (Netherlands)'
62424
+ | 'English (India)'
62425
+ | 'English (US)'
62426
+ | 'French (France)'
62427
+ | 'German (Germany)'
62428
+ | 'Hindi (India)'
62429
+ | 'Indonesian (Indonesia)'
62430
+ | 'Italian (Italy)'
62431
+ | 'Japanese (Japan)'
62432
+ | 'Korean (South Korea)'
62433
+ | 'Marathi (India)'
62434
+ | 'Polish (Poland)'
62435
+ | 'Portuguese (Brazil)'
62436
+ | 'Romanian (Romania)'
62437
+ | 'Russian (Russia)'
62438
+ | 'Spanish (Spain)'
62439
+ | 'Tamil (India)'
62440
+ | 'Telugu (India)'
62441
+ | 'Thai (Thailand)'
62442
+ | 'Turkish (Turkey)'
62443
+ | 'Ukrainian (Ukraine)'
62444
+ | 'Vietnamese (Vietnam)'
62445
+ | 'Afrikaans (South Africa)'
62446
+ | 'Albanian (Albania)'
62447
+ | 'Amharic (Ethiopia)'
62448
+ | 'Arabic (World)'
62449
+ | 'Armenian (Armenia)'
62450
+ | 'Azerbaijani (Azerbaijan)'
62451
+ | 'Basque (Spain)'
62452
+ | 'Belarusian (Belarus)'
62453
+ | 'Bulgarian (Bulgaria)'
62454
+ | 'Burmese (Myanmar)'
62455
+ | 'Catalan (Spain)'
62456
+ | 'Cebuano (Philippines)'
62457
+ | 'Chinese Mandarin (China)'
62458
+ | 'Chinese Mandarin (Taiwan)'
62459
+ | 'Croatian (Croatia)'
62460
+ | 'Czech (Czech Republic)'
62461
+ | 'Danish (Denmark)'
62462
+ | 'English (Australia)'
62463
+ | 'English (UK)'
62464
+ | 'Estonian (Estonia)'
62465
+ | 'Filipino (Philippines)'
62466
+ | 'Finnish (Finland)'
62467
+ | 'French (Canada)'
62468
+ | 'Galician (Spain)'
62469
+ | 'Georgian (Georgia)'
62470
+ | 'Greek (Greece)'
62471
+ | 'Gujarati (India)'
62472
+ | 'Haitian Creole (Haiti)'
62473
+ | 'Hebrew (Israel)'
62474
+ | 'Hungarian (Hungary)'
62475
+ | 'Icelandic (Iceland)'
62476
+ | 'Javanese (Java)'
62477
+ | 'Kannada (India)'
62478
+ | 'Konkani (India)'
62479
+ | 'Lao (Laos)'
62480
+ | 'Latin (Vatican City)'
62481
+ | 'Latvian (Latvia)'
62482
+ | 'Lithuanian (Lithuania)'
62483
+ | 'Luxembourgish (Luxembourg)'
62484
+ | 'Macedonian (North Macedonia)'
62485
+ | 'Maithili (India)'
62486
+ | 'Malagasy (Madagascar)'
62487
+ | 'Malay (Malaysia)'
62488
+ | 'Malayalam (India)'
62489
+ | 'Mongolian (Mongolia)'
62490
+ | 'Nepali (Nepal)'
62491
+ | 'Norwegian Bokmal (Norway)'
62492
+ | 'Norwegian Nynorsk (Norway)'
62493
+ | 'Odia (India)'
62494
+ | 'Pashto (Afghanistan)'
62495
+ | 'Persian (Iran)'
62496
+ | 'Portuguese (Portugal)'
62497
+ | 'Punjabi (India)'
62498
+ | 'Serbian (Serbia)'
62499
+ | 'Sindhi (India)'
62500
+ | 'Sinhala (Sri Lanka)'
62501
+ | 'Slovak (Slovakia)'
62502
+ | 'Slovenian (Slovenia)'
62503
+ | 'Spanish (Latin America)'
62504
+ | 'Spanish (Mexico)'
62505
+ | 'Swahili (Kenya)'
62506
+ | 'Swedish (Sweden)'
62507
+ | 'Urdu (Pakistan)';
61799
62508
  /**
61800
- * Enable Safety Checker
61801
- * @description If set to true, the safety checker will be enabled.
61802
- * @default true
62509
+ * Model
62510
+ * @description Which Gemini TTS model to use. gemini-2.5-flash-tts: low latency, cost-efficient for everyday applications (recommended). gemini-2.5-pro-tts: highest quality, best for structured workflows like podcasts, audiobooks, and customer support.
62511
+ * @default gemini-2.5-flash-tts
62512
+ * @enum {string}
61803
62513
  */
61804
- enable_safety_checker?: boolean;
62514
+ model?: 'gemini-2.5-flash-tts' | 'gemini-2.5-pro-tts';
61805
62515
  /**
61806
- * Focus Point
61807
- * @description The [x, y] coordinates of the focus point as fractions of image dimensions (0.0 to 1.0). For example, [0.5, 0.5] means center. If not provided, the center of the image is used.
61808
- * @default [
61809
- * 0.5,
61810
- * 0.5
61811
- * ]
62516
+ * Output Format
62517
+ * @description Audio output format. mp3: compressed, small file size (recommended). wav: uncompressed PCM wrapped in WAV (24 kHz, 16-bit mono). ogg_opus: Ogg container with Opus codec, good quality-to-size ratio.
62518
+ * @default mp3
62519
+ * @enum {string}
61812
62520
  */
61813
- focus_point?: number[];
62521
+ output_format?: 'wav' | 'mp3' | 'ogg_opus';
61814
62522
  /**
61815
- * Image URL
61816
- * @description The URL of the image to refocus.
61817
- * @example https://v3b.fal.media/files/b/0a8e76a1/AOVtMeklSm7IGoCTMd2T3.jpg
62523
+ * Prompt
62524
+ * @description The text to convert to speech. Gemini TTS supports natural-language prompting for style, pace, accent, and emotional expression — include delivery instructions inline with the text (e.g. 'Say cheerfully: Have a wonderful day!'). For multi-speaker synthesis, prefix lines with speaker aliases defined in the speakers field (e.g. 'Alice: Hello!\nBob: Hi!'). Supports inline pace/style markers like [slowly], [whispering], [excited], [extremely fast].
62525
+ * @example Host: Welcome back to AI Frontiers, the podcast where we explore the latest breakthroughs in artificial intelligence. Today we have a very special guest. Doctor Chen, thank you for joining us!
62526
+ * DrChen: Thanks for having me! I'm excited to be here.
62527
+ * Host: So, let's dive right in. Your recent paper on neural architecture search has been making waves. Can you tell our listeners what inspired this research?
62528
+ * DrChen: Absolutely. It all started when we noticed that most existing approaches were optimizing for the wrong metrics. We asked ourselves, what if we could let the model design itself?
61818
62529
  */
61819
- image_url: string;
62530
+ prompt: string;
61820
62531
  /**
61821
- * Num Inference Steps
61822
- * @description The number of inference steps to perform. Lower values are faster but may reduce quality.
61823
- * @default 28
62532
+ * Speakers
62533
+ * @description Multi-speaker voice configuration. When set, enables multi-speaker synthesis where different parts of the text are spoken by different voices. Each speaker needs a voice and a speaker_id (alias) that matches prefixes in the prompt. Requires gemini-2.5-pro-tts or gemini-2.5-flash-tts model. Not supported with gemini-2.5-flash-lite-preview-tts.
62534
+ * @example [
62535
+ * {
62536
+ * "voice": "Charon",
62537
+ * "speaker_id": "Host"
62538
+ * },
62539
+ * {
62540
+ * "voice": "Kore",
62541
+ * "speaker_id": "DrChen"
62542
+ * }
62543
+ * ]
61824
62544
  */
61825
- num_inference_steps?: number;
62545
+ speakers?: Components.SpeakerConfig[];
61826
62546
  /**
61827
- * Output Format
61828
- * @description The format of the generated image.
61829
- * @default jpeg
61830
- * @enum {string}
62547
+ * Style Instructions
62548
+ * @description Optional style and delivery instructions prepended to the prompt. Controls expressiveness, accent, pace, tone, and emotional expression using natural language. Use this to separate style control from the text content. Examples: 'Speak warmly and slowly', 'Read this as a dramatic newscast', 'Use a British accent with a cheerful tone', 'Whisper mysteriously'.
62549
+ * @example Say the following in a warm, conversational tone
62550
+ * @example Read this as a dramatic newscast with gravitas
62551
+ * @example Speak with a British accent, cheerfully and energetically
62552
+ * @example This is a podcast conversation. The host is enthusiastic and curious, the guest is knowledgeable and articulate
61831
62553
  */
61832
- output_format?: 'jpeg' | 'png';
62554
+ style_instructions?: string;
61833
62555
  /**
61834
- * Seed
61835
- * @description The same seed and the same input given to the same version of the model will output the same image every time.
62556
+ * Temperature
62557
+ * @description Controls the randomness of the speech output. Higher values produce more creative and varied delivery, while lower values make the output more predictable and focused.
62558
+ * @default 1
61836
62559
  */
61837
- seed?: number;
62560
+ temperature?: number;
61838
62561
  /**
61839
- * Sync Mode
61840
- * @description If `True`, the media will be returned as a data URI and the output data won't be available in the request history.
61841
- * @default false
62562
+ * Voice
62563
+ * @description Voice preset for single-speaker synthesis. 30 distinct voices are available. Ignored when speakers is set. Popular choices: Kore (strong, firm female), Puck (upbeat, lively male), Charon (calm, professional male), Zephyr (bright, clear female), Aoede (warm, melodic female).
62564
+ * @default Kore
62565
+ * @enum {string}
61842
62566
  */
61843
- sync_mode?: boolean;
62567
+ voice?:
62568
+ | 'Achernar'
62569
+ | 'Achird'
62570
+ | 'Algenib'
62571
+ | 'Algieba'
62572
+ | 'Alnilam'
62573
+ | 'Aoede'
62574
+ | 'Autonoe'
62575
+ | 'Callirrhoe'
62576
+ | 'Charon'
62577
+ | 'Despina'
62578
+ | 'Enceladus'
62579
+ | 'Erinome'
62580
+ | 'Fenrir'
62581
+ | 'Gacrux'
62582
+ | 'Iapetus'
62583
+ | 'Kore'
62584
+ | 'Laomedeia'
62585
+ | 'Leda'
62586
+ | 'Orus'
62587
+ | 'Pulcherrima'
62588
+ | 'Puck'
62589
+ | 'Rasalgethi'
62590
+ | 'Sadachbia'
62591
+ | 'Sadaltager'
62592
+ | 'Schedar'
62593
+ | 'Sulafat'
62594
+ | 'Umbriel'
62595
+ | 'Vindemiatrix'
62596
+ | 'Zephyr'
62597
+ | 'Zubenelgenubi';
62598
+ }
62599
+
62600
+ export interface GeminiTtsOutput {
61844
62601
  /**
61845
- * Target Long Side
61846
- * @description Resize the image so the longer side matches this value (in pixels). If not set, the original resolution is used (aligned to 16px). Recommended range: 512 to 2000.
61847
- * @default 512
62602
+ * @description The generated audio file.
62603
+ * @example {
62604
+ * "url": "https://v3b.fal.media/files/b/0a935d4f/Ez4NpcnFTuGsu2FHDaJTR_gemini_tts_output.mp3"
62605
+ * }
61848
62606
  */
61849
- target_long_side?: number;
62607
+ audio: Components.File;
61850
62608
  }
61851
62609
 
61852
- export interface GenfocusOutput extends SharedType_951 {}
61853
-
61854
62610
  export interface GeminiFlashEditMultiInput {
61855
62611
  /**
61856
62612
  * Input Image Urls
@@ -75552,220 +76308,220 @@ export interface ElevenlabsSpeechToTextScribeV2Output {
75552
76308
  * Words
75553
76309
  * @description Word-level transcription details
75554
76310
  * @example {
75555
- * "end": 0.539,
76311
+ * "text": "Hey,",
75556
76312
  * "start": 0.079,
75557
76313
  * "type": "word",
75558
- * "text": "Hey,",
76314
+ * "end": 0.539,
75559
76315
  * "speaker_id": "speaker_0"
75560
76316
  * }
75561
76317
  * @example {
75562
- * "end": 0.599,
76318
+ * "text": " ",
75563
76319
  * "start": 0.539,
75564
76320
  * "type": "spacing",
75565
- * "text": " ",
76321
+ * "end": 0.599,
75566
76322
  * "speaker_id": "speaker_0"
75567
76323
  * }
75568
76324
  * @example {
75569
- * "end": 0.679,
76325
+ * "text": "this",
75570
76326
  * "start": 0.599,
75571
76327
  * "type": "word",
75572
- * "text": "this",
76328
+ * "end": 0.679,
75573
76329
  * "speaker_id": "speaker_0"
75574
76330
  * }
75575
76331
  * @example {
75576
- * "end": 0.739,
76332
+ * "text": " ",
75577
76333
  * "start": 0.679,
75578
76334
  * "type": "spacing",
75579
- * "text": " ",
76335
+ * "end": 0.739,
75580
76336
  * "speaker_id": "speaker_0"
75581
76337
  * }
75582
76338
  * @example {
75583
- * "end": 0.799,
76339
+ * "text": "is",
75584
76340
  * "start": 0.739,
75585
76341
  * "type": "word",
75586
- * "text": "is",
76342
+ * "end": 0.799,
75587
76343
  * "speaker_id": "speaker_0"
75588
76344
  * }
75589
76345
  * @example {
75590
- * "end": 0.939,
76346
+ * "text": " ",
75591
76347
  * "start": 0.799,
75592
76348
  * "type": "spacing",
75593
- * "text": " ",
76349
+ * "end": 0.939,
75594
76350
  * "speaker_id": "speaker_0"
75595
76351
  * }
75596
76352
  * @example {
75597
- * "end": 0.939,
76353
+ * "text": "a",
75598
76354
  * "start": 0.939,
75599
76355
  * "type": "word",
75600
- * "text": "a",
76356
+ * "end": 0.939,
75601
76357
  * "speaker_id": "speaker_0"
75602
76358
  * }
75603
76359
  * @example {
75604
- * "end": 0.959,
76360
+ * "text": " ",
75605
76361
  * "start": 0.939,
75606
76362
  * "type": "spacing",
75607
- * "text": " ",
76363
+ * "end": 0.959,
75608
76364
  * "speaker_id": "speaker_0"
75609
76365
  * }
75610
76366
  * @example {
75611
- * "end": 1.179,
76367
+ * "text": "test",
75612
76368
  * "start": 0.959,
75613
76369
  * "type": "word",
75614
- * "text": "test",
76370
+ * "end": 1.179,
75615
76371
  * "speaker_id": "speaker_0"
75616
76372
  * }
75617
76373
  * @example {
75618
- * "end": 1.219,
76374
+ * "text": " ",
75619
76375
  * "start": 1.179,
75620
76376
  * "type": "spacing",
75621
- * "text": " ",
76377
+ * "end": 1.219,
75622
76378
  * "speaker_id": "speaker_0"
75623
76379
  * }
75624
76380
  * @example {
75625
- * "end": 1.719,
76381
+ * "text": "recording",
75626
76382
  * "start": 1.22,
75627
76383
  * "type": "word",
75628
- * "text": "recording",
76384
+ * "end": 1.719,
75629
76385
  * "speaker_id": "speaker_0"
75630
76386
  * }
75631
76387
  * @example {
75632
- * "end": 1.719,
76388
+ * "text": " ",
75633
76389
  * "start": 1.719,
75634
76390
  * "type": "spacing",
75635
- * "text": " ",
76391
+ * "end": 1.719,
75636
76392
  * "speaker_id": "speaker_0"
75637
76393
  * }
75638
76394
  * @example {
75639
- * "end": 1.86,
76395
+ * "text": "for",
75640
76396
  * "start": 1.719,
75641
76397
  * "type": "word",
75642
- * "text": "for",
76398
+ * "end": 1.86,
75643
76399
  * "speaker_id": "speaker_0"
75644
76400
  * }
75645
76401
  * @example {
75646
- * "end": 1.879,
76402
+ * "text": " ",
75647
76403
  * "start": 1.86,
75648
76404
  * "type": "spacing",
75649
- * "text": " ",
76405
+ * "end": 1.879,
75650
76406
  * "speaker_id": "speaker_0"
75651
76407
  * }
75652
76408
  * @example {
75653
- * "end": 2.24,
76409
+ * "text": "Scribe",
75654
76410
  * "start": 1.879,
75655
76411
  * "type": "word",
75656
- * "text": "Scribe",
76412
+ * "end": 2.24,
75657
76413
  * "speaker_id": "speaker_0"
75658
76414
  * }
75659
76415
  * @example {
75660
- * "end": 2.319,
76416
+ * "text": " ",
75661
76417
  * "start": 2.24,
75662
76418
  * "type": "spacing",
75663
- * "text": " ",
76419
+ * "end": 2.319,
75664
76420
  * "speaker_id": "speaker_0"
75665
76421
  * }
75666
76422
  * @example {
75667
- * "end": 2.759,
76423
+ * "text": "version",
75668
76424
  * "start": 2.319,
75669
76425
  * "type": "word",
75670
- * "text": "version",
76426
+ * "end": 2.759,
75671
76427
  * "speaker_id": "speaker_0"
75672
76428
  * }
75673
76429
  * @example {
75674
- * "end": 2.779,
76430
+ * "text": " ",
75675
76431
  * "start": 2.759,
75676
76432
  * "type": "spacing",
75677
- * "text": " ",
76433
+ * "end": 2.779,
75678
76434
  * "speaker_id": "speaker_0"
75679
76435
  * }
75680
76436
  * @example {
75681
- * "end": 3.379,
76437
+ * "text": "two,",
75682
76438
  * "start": 2.779,
75683
76439
  * "type": "word",
75684
- * "text": "two,",
76440
+ * "end": 3.379,
75685
76441
  * "speaker_id": "speaker_0"
75686
76442
  * }
75687
76443
  * @example {
75688
- * "end": 3.399,
76444
+ * "text": " ",
75689
76445
  * "start": 3.379,
75690
76446
  * "type": "spacing",
75691
- * "text": " ",
76447
+ * "end": 3.399,
75692
76448
  * "speaker_id": "speaker_0"
75693
76449
  * }
75694
76450
  * @example {
75695
- * "end": 3.519,
76451
+ * "text": "which",
75696
76452
  * "start": 3.399,
75697
76453
  * "type": "word",
75698
- * "text": "which",
76454
+ * "end": 3.519,
75699
76455
  * "speaker_id": "speaker_0"
75700
76456
  * }
75701
76457
  * @example {
75702
- * "end": 3.539,
76458
+ * "text": " ",
75703
76459
  * "start": 3.519,
75704
76460
  * "type": "spacing",
75705
- * "text": " ",
76461
+ * "end": 3.539,
75706
76462
  * "speaker_id": "speaker_0"
75707
76463
  * }
75708
76464
  * @example {
75709
- * "end": 3.659,
76465
+ * "text": "is",
75710
76466
  * "start": 3.539,
75711
76467
  * "type": "word",
75712
- * "text": "is",
76468
+ * "end": 3.659,
75713
76469
  * "speaker_id": "speaker_0"
75714
76470
  * }
75715
76471
  * @example {
75716
- * "end": 3.699,
76472
+ * "text": " ",
75717
76473
  * "start": 3.659,
75718
76474
  * "type": "spacing",
75719
- * "text": " ",
76475
+ * "end": 3.699,
75720
76476
  * "speaker_id": "speaker_0"
75721
76477
  * }
75722
76478
  * @example {
75723
- * "end": 3.839,
76479
+ * "text": "now",
75724
76480
  * "start": 3.699,
75725
76481
  * "type": "word",
75726
- * "text": "now",
76482
+ * "end": 3.839,
75727
76483
  * "speaker_id": "speaker_0"
75728
76484
  * }
75729
76485
  * @example {
75730
- * "end": 3.839,
76486
+ * "text": " ",
75731
76487
  * "start": 3.839,
75732
76488
  * "type": "spacing",
75733
- * "text": " ",
76489
+ * "end": 3.839,
75734
76490
  * "speaker_id": "speaker_0"
75735
76491
  * }
75736
76492
  * @example {
75737
- * "end": 4.319,
76493
+ * "text": "available",
75738
76494
  * "start": 3.839,
75739
76495
  * "type": "word",
75740
- * "text": "available",
76496
+ * "end": 4.319,
75741
76497
  * "speaker_id": "speaker_0"
75742
76498
  * }
75743
76499
  * @example {
75744
- * "end": 4.339,
76500
+ * "text": " ",
75745
76501
  * "start": 4.319,
75746
76502
  * "type": "spacing",
75747
- * "text": " ",
76503
+ * "end": 4.339,
75748
76504
  * "speaker_id": "speaker_0"
75749
76505
  * }
75750
76506
  * @example {
75751
- * "end": 4.579,
76507
+ * "text": "on",
75752
76508
  * "start": 4.339,
75753
76509
  * "type": "word",
75754
- * "text": "on",
76510
+ * "end": 4.579,
75755
76511
  * "speaker_id": "speaker_0"
75756
76512
  * }
75757
76513
  * @example {
75758
- * "end": 4.599,
76514
+ * "text": " ",
75759
76515
  * "start": 4.579,
75760
76516
  * "type": "spacing",
75761
- * "text": " ",
76517
+ * "end": 4.599,
75762
76518
  * "speaker_id": "speaker_0"
75763
76519
  * }
75764
76520
  * @example {
75765
- * "end": 5.699,
76521
+ * "text": "fal.ai.",
75766
76522
  * "start": 4.599,
75767
76523
  * "type": "word",
75768
- * "text": "fal.ai.",
76524
+ * "end": 5.699,
75769
76525
  * "speaker_id": "speaker_0"
75770
76526
  * }
75771
76527
  */
@@ -80409,6 +81165,32 @@ export interface BytedanceDreamactorV2Output {
80409
81165
  }
80410
81166
 
80411
81167
  export interface BytedanceUpscalerUpscaleVideoInput {
81168
+ /**
81169
+ * Enhancement Preset
81170
+ * @description The enhancement preset optimized for specific video scenarios. 'general' is a general-purpose template, 'ugc' targets user-generated short videos, 'short_series' is for short dramas, 'aigc' is for AI-generated content, and 'old_film' is for classic film restoration.
81171
+ * @default general
81172
+ * @enum {string}
81173
+ */
81174
+ enhancement_preset?: 'general' | 'ugc' | 'short_series' | 'aigc' | 'old_film';
81175
+ /**
81176
+ * Enhancement Tier
81177
+ * @description The enhancement quality tier. 'fast' provides essential upscaling with good speed, 'standard' uses adaptive algorithms for better visual texture, 'pro' uses large-model restoration for cinematic quality (longer processing time), and 10 times the cost of `standard` and `fast`.
81178
+ * @default standard
81179
+ * @enum {string}
81180
+ */
81181
+ enhancement_tier?: 'fast' | 'standard' | 'pro';
81182
+ /**
81183
+ * Fidelity
81184
+ * @description The enhancement intensity. 'high' applies mild enhancement while keeping visual texture close to the source video. 'medium' provides a balanced image quality enhancement.
81185
+ * @default high
81186
+ * @enum {string}
81187
+ */
81188
+ fidelity?: 'high' | 'medium';
81189
+ /**
81190
+ * Scale Ratio
81191
+ * @description The scaling ratio for the output video resolution. When set, overrides target_resolution and scales the input resolution by this factor (e.g., 2.0 doubles the resolution). Range: 1.1 to 10.0. Please note that this is valid only up to 4k resolution, and trying to scale beyond 4k will result in an error. (4k is defined as having atotal pixel count of 3840x2160).
81192
+ */
81193
+ scale_ratio?: number;
80412
81194
  /**
80413
81195
  * Target Fps
80414
81196
  * @description The target FPS of the video to upscale.
@@ -80448,7 +81230,7 @@ export interface BytedanceUpscalerUpscaleVideoOutput {
80448
81230
 
80449
81231
  export interface BriaTextToImageHdInput extends SharedType_411 {}
80450
81232
 
80451
- export interface BriaTextToImageHdOutput extends SharedType_e19 {}
81233
+ export interface BriaTextToImageHdOutput extends SharedType_a97 {}
80452
81234
 
80453
81235
  export interface BriaTextToImageFastInput {
80454
81236
  /**
@@ -80521,11 +81303,11 @@ export interface BriaTextToImageFastInput {
80521
81303
  sync_mode?: boolean;
80522
81304
  }
80523
81305
 
80524
- export interface BriaTextToImageFastOutput extends SharedType_e19 {}
81306
+ export interface BriaTextToImageFastOutput extends SharedType_a97 {}
80525
81307
 
80526
81308
  export interface BriaTextToImageBaseInput extends SharedType_411 {}
80527
81309
 
80528
- export interface BriaTextToImageBaseOutput extends SharedType_e19 {}
81310
+ export interface BriaTextToImageBaseOutput extends SharedType_a97 {}
80529
81311
 
80530
81312
  export interface BriaReimagineInput {
80531
81313
  /**
@@ -80754,8 +81536,8 @@ export interface BriaGenfillOutput {
80754
81536
  * @description Generated Images
80755
81537
  * @example [
80756
81538
  * {
80757
- * "height": 768,
80758
81539
  * "file_size": 1064550,
81540
+ * "height": 768,
80759
81541
  * "file_name": "a0d138e6820c4ad58f1fd3c758f16047.png",
80760
81542
  * "content_type": "image/png",
80761
81543
  * "url": "https://storage.googleapis.com/falserverless/bria/bria_genfill_res.png",
@@ -80839,8 +81621,8 @@ export interface BriaExpandOutput {
80839
81621
  /**
80840
81622
  * @description The generated image
80841
81623
  * @example {
80842
- * "height": 674,
80843
81624
  * "file_size": 1471342,
81625
+ * "height": 674,
80844
81626
  * "file_name": "afa402a35ea742cdb5c3e219b2b19bfb.png",
80845
81627
  * "content_type": "image/png",
80846
81628
  * "url": "https://v3.fal.media/files/koala/8np-spgxxG-I1r3cjthRV_afa402a35ea742cdb5c3e219b2b19bfb.png",
@@ -80996,8 +81778,8 @@ export interface BriaBackgroundRemoveOutput {
80996
81778
  /**
80997
81779
  * @description The generated image
80998
81780
  * @example {
80999
- * "height": 1024,
81000
81781
  * "file_size": 1076276,
81782
+ * "height": 1024,
81001
81783
  * "file_name": "070c731993e949d993c10ef6283d335d.png",
81002
81784
  * "content_type": "image/png",
81003
81785
  * "url": "https://v3.fal.media/files/tiger/GQEMNjRyxSoza7N8LPPqb_070c731993e949d993c10ef6283d335d.png",
@@ -81342,6 +82124,13 @@ export interface BenV2VideoInput {
81342
82124
  * @description Optional RGB values (0-255) for the background color. If not provided, the background will be transparent. For ex: [0, 0, 0]
81343
82125
  */
81344
82126
  background_color?: [number, number, number];
82127
+ /**
82128
+ * Output Format
82129
+ * @description Output video format. Use "webm" for true transparency support (VP9 codec with alpha channel). MP4 format does not support transparency and will render transparent areas as black.
82130
+ * @default mp4
82131
+ * @enum {string}
82132
+ */
82133
+ output_format?: 'mp4' | 'webm';
81345
82134
  /**
81346
82135
  * Seed
81347
82136
  * @description Random seed for reproducible generation.
@@ -81390,8 +82179,8 @@ export interface BenV2ImageOutput {
81390
82179
  /**
81391
82180
  * @description The output image after background removal.
81392
82181
  * @example {
81393
- * "height": 512,
81394
82182
  * "file_size": 423052,
82183
+ * "height": 512,
81395
82184
  * "file_name": "zrZNETpI_ul2jonraqpxN_a57c3f3825d9418f8b3d39cde87c3310.png",
81396
82185
  * "content_type": "image/png",
81397
82186
  * "url": "https://storage.googleapis.com/falserverless/gallery/Ben2/zrZNETpI_ul2jonraqpxN_a57c3f3825d9418f8b3d39cde87c3310.png",
@@ -84319,18 +85108,18 @@ export interface BriaEmbedProductInput {
84319
85108
  * {
84320
85109
  * "coordinates": {
84321
85110
  * "y": 317,
85111
+ * "width": 100,
84322
85112
  * "height": 300,
84323
- * "x": 300,
84324
- * "width": 100
85113
+ * "x": 300
84325
85114
  * },
84326
85115
  * "image_source": "https://bria-datasets.s3.us-east-1.amazonaws.com/embed-product/a_standing_lamp_over_white_background_0.png"
84327
85116
  * },
84328
85117
  * {
84329
85118
  * "coordinates": {
84330
85119
  * "y": 287,
85120
+ * "width": 120,
84331
85121
  * "height": 156,
84332
- * "x": 646,
84333
- * "width": 120
85122
+ * "x": 646
84334
85123
  * },
84335
85124
  * "image_source": "https://bria-datasets.s3.us-east-1.amazonaws.com/embed-product/a_wall_picture_on_white_background_0.png"
84336
85125
  * }