fal-endpoint-types 1.3.36 → 1.3.37
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json
CHANGED
|
@@ -2292,6 +2292,53 @@ export interface SpeechTimestamp {
|
|
|
2292
2292
|
start: number;
|
|
2293
2293
|
}
|
|
2294
2294
|
|
|
2295
|
+
export interface SpeakerConfig {
|
|
2296
|
+
/**
|
|
2297
|
+
* Speaker Id
|
|
2298
|
+
* @description Alias used to identify this speaker in the prompt. Use this alias as a prefix in the prompt field, e.g. 'Alice: Hello! Bob: Hi there!'. Must be alphanumeric with no whitespace.
|
|
2299
|
+
* @example Speaker1
|
|
2300
|
+
* @example Alice
|
|
2301
|
+
* @example Narrator
|
|
2302
|
+
*/
|
|
2303
|
+
speaker_id: string;
|
|
2304
|
+
/**
|
|
2305
|
+
* Voice
|
|
2306
|
+
* @description Voice preset for this speaker.
|
|
2307
|
+
* @enum {string}
|
|
2308
|
+
*/
|
|
2309
|
+
voice:
|
|
2310
|
+
| 'Achernar'
|
|
2311
|
+
| 'Achird'
|
|
2312
|
+
| 'Algenib'
|
|
2313
|
+
| 'Algieba'
|
|
2314
|
+
| 'Alnilam'
|
|
2315
|
+
| 'Aoede'
|
|
2316
|
+
| 'Autonoe'
|
|
2317
|
+
| 'Callirrhoe'
|
|
2318
|
+
| 'Charon'
|
|
2319
|
+
| 'Despina'
|
|
2320
|
+
| 'Enceladus'
|
|
2321
|
+
| 'Erinome'
|
|
2322
|
+
| 'Fenrir'
|
|
2323
|
+
| 'Gacrux'
|
|
2324
|
+
| 'Iapetus'
|
|
2325
|
+
| 'Kore'
|
|
2326
|
+
| 'Laomedeia'
|
|
2327
|
+
| 'Leda'
|
|
2328
|
+
| 'Orus'
|
|
2329
|
+
| 'Pulcherrima'
|
|
2330
|
+
| 'Puck'
|
|
2331
|
+
| 'Rasalgethi'
|
|
2332
|
+
| 'Sadachbia'
|
|
2333
|
+
| 'Sadaltager'
|
|
2334
|
+
| 'Schedar'
|
|
2335
|
+
| 'Sulafat'
|
|
2336
|
+
| 'Umbriel'
|
|
2337
|
+
| 'Vindemiatrix'
|
|
2338
|
+
| 'Zephyr'
|
|
2339
|
+
| 'Zubenelgenubi';
|
|
2340
|
+
}
|
|
2341
|
+
|
|
2295
2342
|
export interface Speaker {
|
|
2296
2343
|
audio_url: string;
|
|
2297
2344
|
prompt: string;
|
|
@@ -3596,7 +3643,22 @@ export interface KlingV3MultiPromptElement {
|
|
|
3596
3643
|
* @default 5
|
|
3597
3644
|
* @enum {string}
|
|
3598
3645
|
*/
|
|
3599
|
-
duration?:
|
|
3646
|
+
duration?:
|
|
3647
|
+
| '1'
|
|
3648
|
+
| '2'
|
|
3649
|
+
| '3'
|
|
3650
|
+
| '4'
|
|
3651
|
+
| '5'
|
|
3652
|
+
| '6'
|
|
3653
|
+
| '7'
|
|
3654
|
+
| '8'
|
|
3655
|
+
| '9'
|
|
3656
|
+
| '10'
|
|
3657
|
+
| '11'
|
|
3658
|
+
| '12'
|
|
3659
|
+
| '13'
|
|
3660
|
+
| '14'
|
|
3661
|
+
| '15';
|
|
3600
3662
|
/**
|
|
3601
3663
|
* Prompt
|
|
3602
3664
|
* @description The prompt for this shot.
|
|
@@ -3633,6 +3695,11 @@ export interface KlingV3ComboElementInput {
|
|
|
3633
3695
|
* @description The video URL of the element. A request can only have one element with a video.
|
|
3634
3696
|
*/
|
|
3635
3697
|
video_url?: string;
|
|
3698
|
+
/**
|
|
3699
|
+
* Voice Id
|
|
3700
|
+
* @description The voice ID for this element. The voice will be binded to the element and references to this element will use the binded voice. Voice binding is only supported for video elements, and cannot be used with image elements. Get voice IDs from the following endpoint: https://fal.ai/models/fal-ai/kling-video/create-voice
|
|
3701
|
+
*/
|
|
3702
|
+
voice_id?: string;
|
|
3636
3703
|
}
|
|
3637
3704
|
|
|
3638
3705
|
export interface KeyframeTransition {
|
|
@@ -14,11 +14,21 @@ declare global {
|
|
|
14
14
|
output: falEndpoints.XaiGrokImagineVideoTextToVideoOutput;
|
|
15
15
|
};
|
|
16
16
|
|
|
17
|
+
'xai/grok-imagine-video/reference-to-video': {
|
|
18
|
+
input: falEndpoints.XaiGrokImagineVideoReferenceToVideoInput;
|
|
19
|
+
output: falEndpoints.XaiGrokImagineVideoReferenceToVideoOutput;
|
|
20
|
+
};
|
|
21
|
+
|
|
17
22
|
'xai/grok-imagine-video/image-to-video': {
|
|
18
23
|
input: falEndpoints.XaiGrokImagineVideoImageToVideoInput;
|
|
19
24
|
output: falEndpoints.XaiGrokImagineVideoImageToVideoOutput;
|
|
20
25
|
};
|
|
21
26
|
|
|
27
|
+
'xai/grok-imagine-video/extend-video': {
|
|
28
|
+
input: falEndpoints.XaiGrokImagineVideoExtendVideoInput;
|
|
29
|
+
output: falEndpoints.XaiGrokImagineVideoExtendVideoOutput;
|
|
30
|
+
};
|
|
31
|
+
|
|
22
32
|
'xai/grok-imagine-video/edit-video': {
|
|
23
33
|
input: falEndpoints.XaiGrokImagineVideoEditVideoInput;
|
|
24
34
|
output: falEndpoints.XaiGrokImagineVideoEditVideoOutput;
|
|
@@ -1244,6 +1254,11 @@ declare global {
|
|
|
1244
1254
|
output: falEndpoints.SeedvrUpscaleVideoOutput;
|
|
1245
1255
|
};
|
|
1246
1256
|
|
|
1257
|
+
'fal-ai/seedvr/upscale/image/seamless': {
|
|
1258
|
+
input: falEndpoints.SeedvrUpscaleImageSeamlessInput;
|
|
1259
|
+
output: falEndpoints.SeedvrUpscaleImageSeamlessOutput;
|
|
1260
|
+
};
|
|
1261
|
+
|
|
1247
1262
|
'fal-ai/seedvr/upscale/image': {
|
|
1248
1263
|
input: falEndpoints.SeedvrUpscaleImageInput;
|
|
1249
1264
|
output: falEndpoints.SeedvrUpscaleImageOutput;
|
|
@@ -3559,6 +3574,11 @@ declare global {
|
|
|
3559
3574
|
output: falEndpoints.IpAdapterFaceIdOutput;
|
|
3560
3575
|
};
|
|
3561
3576
|
|
|
3577
|
+
'fal-ai/inworld-tts': {
|
|
3578
|
+
input: falEndpoints.InworldTtsInput;
|
|
3579
|
+
output: falEndpoints.InworldTtsOutput;
|
|
3580
|
+
};
|
|
3581
|
+
|
|
3562
3582
|
'fal-ai/invisible-watermark': {
|
|
3563
3583
|
input: falEndpoints.InvisibleWatermarkInput;
|
|
3564
3584
|
output: falEndpoints.InvisibleWatermarkOutput;
|
|
@@ -4349,6 +4369,11 @@ declare global {
|
|
|
4349
4369
|
output: falEndpoints.GenfocusOutput;
|
|
4350
4370
|
};
|
|
4351
4371
|
|
|
4372
|
+
'fal-ai/gemini-tts': {
|
|
4373
|
+
input: falEndpoints.GeminiTtsInput;
|
|
4374
|
+
output: falEndpoints.GeminiTtsOutput;
|
|
4375
|
+
};
|
|
4376
|
+
|
|
4352
4377
|
'fal-ai/gemini-flash-edit/multi': {
|
|
4353
4378
|
input: falEndpoints.GeminiFlashEditMultiInput;
|
|
4354
4379
|
output: falEndpoints.GeminiFlashEditMultiOutput;
|
|
@@ -94,8 +94,62 @@ export interface XaiGrokImagineVideoTextToVideoOutput {
|
|
|
94
94
|
* "fps": 24,
|
|
95
95
|
* "width": 1280,
|
|
96
96
|
* "file_name": "RUAbFYlssdqnbjNLmE8qP_IX7BNYGP.mp4",
|
|
97
|
-
* "
|
|
98
|
-
* "
|
|
97
|
+
* "content_type": "video/mp4",
|
|
98
|
+
* "num_frames": 145
|
|
99
|
+
* }
|
|
100
|
+
*/
|
|
101
|
+
video: Components.VideoFile;
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
export interface XaiGrokImagineVideoReferenceToVideoInput {
|
|
105
|
+
/**
|
|
106
|
+
* Aspect Ratio
|
|
107
|
+
* @description Aspect ratio of the generated video.
|
|
108
|
+
* @default 16:9
|
|
109
|
+
* @enum {string}
|
|
110
|
+
*/
|
|
111
|
+
aspect_ratio?: '16:9' | '4:3' | '3:2' | '1:1' | '2:3' | '3:4' | '9:16';
|
|
112
|
+
/**
|
|
113
|
+
* Duration
|
|
114
|
+
* @description Video duration in seconds.
|
|
115
|
+
* @default 8
|
|
116
|
+
*/
|
|
117
|
+
duration?: number;
|
|
118
|
+
/**
|
|
119
|
+
* Prompt
|
|
120
|
+
* @description Text prompt describing the video to generate. Use @Image1, @Image2, etc. to reference specific images from reference_image_urls in order.
|
|
121
|
+
* @example A @Image1 running through a sunlit meadow, cinematic slow motion
|
|
122
|
+
*/
|
|
123
|
+
prompt: string;
|
|
124
|
+
/**
|
|
125
|
+
* Reference Image URLs
|
|
126
|
+
* @description One or more reference image URLs to guide the video generation as style and content references. Reference in prompt as @Image1, @Image2, etc. Maximum 7 images.
|
|
127
|
+
* @example [
|
|
128
|
+
* "https://v3b.fal.media/files/b/0a8b90e0/BFLE9VDlZqsryU-UA3BoD_image_004.png"
|
|
129
|
+
* ]
|
|
130
|
+
*/
|
|
131
|
+
reference_image_urls: string[];
|
|
132
|
+
/**
|
|
133
|
+
* Resolution
|
|
134
|
+
* @description Resolution of the output video.
|
|
135
|
+
* @default 480p
|
|
136
|
+
* @enum {string}
|
|
137
|
+
*/
|
|
138
|
+
resolution?: '480p' | '720p';
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
export interface XaiGrokImagineVideoReferenceToVideoOutput {
|
|
142
|
+
/**
|
|
143
|
+
* @description The generated video.
|
|
144
|
+
* @example {
|
|
145
|
+
* "height": 720,
|
|
146
|
+
* "duration": 8,
|
|
147
|
+
* "url": "https://v3b.fal.media/files/b/0a8b90e4/r2v_output.mp4",
|
|
148
|
+
* "fps": 24,
|
|
149
|
+
* "width": 1280,
|
|
150
|
+
* "file_name": "r2v_output.mp4",
|
|
151
|
+
* "content_type": "video/mp4",
|
|
152
|
+
* "num_frames": 192
|
|
99
153
|
* }
|
|
100
154
|
*/
|
|
101
155
|
video: Components.VideoFile;
|
|
@@ -145,8 +199,46 @@ export interface XaiGrokImagineVideoImageToVideoOutput {
|
|
|
145
199
|
* "fps": 24,
|
|
146
200
|
* "width": 1280,
|
|
147
201
|
* "file_name": "0Ci1dviuSnEyUZzBUq-_5_nu7MrAAa.mp4",
|
|
148
|
-
* "
|
|
149
|
-
* "
|
|
202
|
+
* "content_type": "video/mp4",
|
|
203
|
+
* "num_frames": 145
|
|
204
|
+
* }
|
|
205
|
+
*/
|
|
206
|
+
video: Components.VideoFile;
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
export interface XaiGrokImagineVideoExtendVideoInput {
|
|
210
|
+
/**
|
|
211
|
+
* Duration
|
|
212
|
+
* @description Length of the extension in seconds.
|
|
213
|
+
* @default 6
|
|
214
|
+
*/
|
|
215
|
+
duration?: number;
|
|
216
|
+
/**
|
|
217
|
+
* Prompt
|
|
218
|
+
* @description Text description of what should happen next in the video.
|
|
219
|
+
* @example The camera slowly zooms out to reveal the city skyline at sunset
|
|
220
|
+
*/
|
|
221
|
+
prompt: string;
|
|
222
|
+
/**
|
|
223
|
+
* Video URL
|
|
224
|
+
* @description URL of the source video to extend. Must be MP4 format (H.264, H.265, or AV1 codec), 2-15 seconds long.
|
|
225
|
+
* @example https://v3b.fal.media/files/b/0a8b9112/V5Z_NIPE3ppMDWivNo6_q_video_019.mp4
|
|
226
|
+
*/
|
|
227
|
+
video_url: string;
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
export interface XaiGrokImagineVideoExtendVideoOutput {
|
|
231
|
+
/**
|
|
232
|
+
* @description The extended video (original + extension stitched together).
|
|
233
|
+
* @example {
|
|
234
|
+
* "height": 720,
|
|
235
|
+
* "duration": 16,
|
|
236
|
+
* "url": "https://v3b.fal.media/files/b/0a8b9113/extended_video.mp4",
|
|
237
|
+
* "fps": 24,
|
|
238
|
+
* "width": 1280,
|
|
239
|
+
* "file_name": "extended_video.mp4",
|
|
240
|
+
* "content_type": "video/mp4",
|
|
241
|
+
* "num_frames": 384
|
|
150
242
|
* }
|
|
151
243
|
*/
|
|
152
244
|
video: Components.VideoFile;
|
|
@@ -184,8 +276,8 @@ export interface XaiGrokImagineVideoEditVideoOutput {
|
|
|
184
276
|
* "fps": 24,
|
|
185
277
|
* "width": 1280,
|
|
186
278
|
* "file_name": "EuDrZuQTW9m1phBXOsauz_EpJH3s8X.mp4",
|
|
187
|
-
* "
|
|
188
|
-
* "
|
|
279
|
+
* "content_type": "video/mp4",
|
|
280
|
+
* "num_frames": 121
|
|
189
281
|
* }
|
|
190
282
|
*/
|
|
191
283
|
video: Components.VideoFile;
|
|
@@ -6232,6 +6324,22 @@ export interface SharedType_92a {
|
|
|
6232
6324
|
video: Components.File;
|
|
6233
6325
|
}
|
|
6234
6326
|
|
|
6327
|
+
export interface SharedType_920 {
|
|
6328
|
+
/**
|
|
6329
|
+
* @description Upscaled image file after processing
|
|
6330
|
+
* @example {
|
|
6331
|
+
* "content_type": "image/png",
|
|
6332
|
+
* "url": "https://storage.googleapis.com/falserverless/example_outputs/seedvr2/image_out.png"
|
|
6333
|
+
* }
|
|
6334
|
+
*/
|
|
6335
|
+
image: Components.ImageFile;
|
|
6336
|
+
/**
|
|
6337
|
+
* Seed
|
|
6338
|
+
* @description The random seed used for the generation process.
|
|
6339
|
+
*/
|
|
6340
|
+
seed: number;
|
|
6341
|
+
}
|
|
6342
|
+
|
|
6235
6343
|
export interface SharedType_91c {
|
|
6236
6344
|
/**
|
|
6237
6345
|
* Image Url
|
|
@@ -6849,53 +6957,6 @@ export interface SharedType_8a3 {
|
|
|
6849
6957
|
video: Components.File;
|
|
6850
6958
|
}
|
|
6851
6959
|
|
|
6852
|
-
export interface SharedType_89f {
|
|
6853
|
-
/**
|
|
6854
|
-
* Aspect Ratio
|
|
6855
|
-
* @description Aspect ratio of the generated video.
|
|
6856
|
-
* @default 16:9
|
|
6857
|
-
* @enum {string}
|
|
6858
|
-
*/
|
|
6859
|
-
aspect_ratio?: '16:9' | '9:16' | '1:1';
|
|
6860
|
-
/**
|
|
6861
|
-
* Duration
|
|
6862
|
-
* @description Video duration in seconds (3-15s).
|
|
6863
|
-
* @default 5
|
|
6864
|
-
* @enum {string}
|
|
6865
|
-
*/
|
|
6866
|
-
duration?: '3' | '4' | '5' | '6' | '7' | '8' | '9' | '10' | '11' | '12' | '13' | '14' | '15';
|
|
6867
|
-
/**
|
|
6868
|
-
* Generate Audio
|
|
6869
|
-
* @description Whether to generate native audio for the video.
|
|
6870
|
-
* @default false
|
|
6871
|
-
*/
|
|
6872
|
-
generate_audio?: boolean;
|
|
6873
|
-
/**
|
|
6874
|
-
* Multi Prompt
|
|
6875
|
-
* @description List of prompts for multi-shot video generation.
|
|
6876
|
-
* @example null
|
|
6877
|
-
*/
|
|
6878
|
-
multi_prompt?: Components.KlingV3MultiPromptElement[];
|
|
6879
|
-
/**
|
|
6880
|
-
* Prompt
|
|
6881
|
-
* @description Text prompt for video generation. Required unless multi_prompt is provided.
|
|
6882
|
-
* @example A mecha lands on the ground to save the city, and says "I'm here", in anime style
|
|
6883
|
-
*/
|
|
6884
|
-
prompt?: string;
|
|
6885
|
-
/**
|
|
6886
|
-
* Shot Type
|
|
6887
|
-
* @description The type of multi-shot video generation.
|
|
6888
|
-
* @default customize
|
|
6889
|
-
* @constant
|
|
6890
|
-
*/
|
|
6891
|
-
shot_type?: 'customize';
|
|
6892
|
-
/**
|
|
6893
|
-
* Voice Ids
|
|
6894
|
-
* @description Optional Voice IDs for video generation. Reference voices in your prompt with <<<voice_1>>> and <<<voice_2>>> (maximum 2 voices per task). Get voice IDs from the kling video create-voice endpoint: https://fal.ai/models/fal-ai/kling-video/create-voice
|
|
6895
|
-
*/
|
|
6896
|
-
voice_ids?: string[];
|
|
6897
|
-
}
|
|
6898
|
-
|
|
6899
6960
|
export interface SharedType_896 {
|
|
6900
6961
|
/**
|
|
6901
6962
|
* Aspect Ratio
|
|
@@ -12233,6 +12294,48 @@ export interface SharedType_266 {
|
|
|
12233
12294
|
video: Components.File;
|
|
12234
12295
|
}
|
|
12235
12296
|
|
|
12297
|
+
export interface SharedType_25d {
|
|
12298
|
+
/**
|
|
12299
|
+
* Aspect Ratio
|
|
12300
|
+
* @description Aspect ratio of the generated video.
|
|
12301
|
+
* @default 16:9
|
|
12302
|
+
* @enum {string}
|
|
12303
|
+
*/
|
|
12304
|
+
aspect_ratio?: '16:9' | '9:16' | '1:1';
|
|
12305
|
+
/**
|
|
12306
|
+
* Duration
|
|
12307
|
+
* @description Video duration in seconds (3-15s).
|
|
12308
|
+
* @default 5
|
|
12309
|
+
* @enum {string}
|
|
12310
|
+
*/
|
|
12311
|
+
duration?: '3' | '4' | '5' | '6' | '7' | '8' | '9' | '10' | '11' | '12' | '13' | '14' | '15';
|
|
12312
|
+
/**
|
|
12313
|
+
* Generate Audio
|
|
12314
|
+
* @description Whether to generate native audio for the video.
|
|
12315
|
+
* @default false
|
|
12316
|
+
*/
|
|
12317
|
+
generate_audio?: boolean;
|
|
12318
|
+
/**
|
|
12319
|
+
* Multi Prompt
|
|
12320
|
+
* @description List of prompts for multi-shot video generation.
|
|
12321
|
+
* @example null
|
|
12322
|
+
*/
|
|
12323
|
+
multi_prompt?: Components.KlingV3MultiPromptElement[];
|
|
12324
|
+
/**
|
|
12325
|
+
* Prompt
|
|
12326
|
+
* @description Text prompt for video generation. Required unless multi_prompt is provided.
|
|
12327
|
+
* @example A mecha lands on the ground to save the city, and says "I'm here", in anime style
|
|
12328
|
+
*/
|
|
12329
|
+
prompt?: string;
|
|
12330
|
+
/**
|
|
12331
|
+
* Shot Type
|
|
12332
|
+
* @description The type of multi-shot video generation.
|
|
12333
|
+
* @default customize
|
|
12334
|
+
* @constant
|
|
12335
|
+
*/
|
|
12336
|
+
shot_type?: 'customize';
|
|
12337
|
+
}
|
|
12338
|
+
|
|
12236
12339
|
export interface SharedType_23c {
|
|
12237
12340
|
/**
|
|
12238
12341
|
* @description The generated video.
|
|
@@ -27052,7 +27155,13 @@ export interface SeedvrUpscaleVideoOutput {
|
|
|
27052
27155
|
video: Components.File;
|
|
27053
27156
|
}
|
|
27054
27157
|
|
|
27055
|
-
export interface
|
|
27158
|
+
export interface SeedvrUpscaleImageSeamlessInput {
|
|
27159
|
+
/**
|
|
27160
|
+
* Enable Safety Checker
|
|
27161
|
+
* @description If set to true, the safety checker will be enabled.
|
|
27162
|
+
* @default true
|
|
27163
|
+
*/
|
|
27164
|
+
enable_safety_checker?: boolean;
|
|
27056
27165
|
/**
|
|
27057
27166
|
* Image Url
|
|
27058
27167
|
* @description The input image to be processed
|
|
@@ -27068,10 +27177,10 @@ export interface SeedvrUpscaleImageInput {
|
|
|
27068
27177
|
/**
|
|
27069
27178
|
* Output Format
|
|
27070
27179
|
* @description The format of the output image.
|
|
27071
|
-
* @default
|
|
27180
|
+
* @default png
|
|
27072
27181
|
* @enum {string}
|
|
27073
27182
|
*/
|
|
27074
|
-
output_format?: 'png' | '
|
|
27183
|
+
output_format?: 'png' | 'jpeg' | 'webp';
|
|
27075
27184
|
/**
|
|
27076
27185
|
* Seed
|
|
27077
27186
|
* @description The random seed used for the generation process.
|
|
@@ -27105,22 +27214,63 @@ export interface SeedvrUpscaleImageInput {
|
|
|
27105
27214
|
upscale_mode?: 'target' | 'factor';
|
|
27106
27215
|
}
|
|
27107
27216
|
|
|
27108
|
-
export interface
|
|
27217
|
+
export interface SeedvrUpscaleImageSeamlessOutput extends SharedType_920 {}
|
|
27218
|
+
|
|
27219
|
+
export interface SeedvrUpscaleImageInput {
|
|
27109
27220
|
/**
|
|
27110
|
-
*
|
|
27111
|
-
* @
|
|
27112
|
-
*
|
|
27113
|
-
* "url": "https://storage.googleapis.com/falserverless/example_outputs/seedvr2/image_out.png"
|
|
27114
|
-
* }
|
|
27221
|
+
* Image Url
|
|
27222
|
+
* @description The input image to be processed
|
|
27223
|
+
* @example https://storage.googleapis.com/falserverless/example_inputs/seedvr2/image_in.png
|
|
27115
27224
|
*/
|
|
27116
|
-
|
|
27225
|
+
image_url: string;
|
|
27226
|
+
/**
|
|
27227
|
+
* Noise Scale
|
|
27228
|
+
* @description The noise scale to use for the generation process.
|
|
27229
|
+
* @default 0.1
|
|
27230
|
+
*/
|
|
27231
|
+
noise_scale?: number;
|
|
27232
|
+
/**
|
|
27233
|
+
* Output Format
|
|
27234
|
+
* @description The format of the output image.
|
|
27235
|
+
* @default jpg
|
|
27236
|
+
* @enum {string}
|
|
27237
|
+
*/
|
|
27238
|
+
output_format?: 'png' | 'jpg' | 'webp';
|
|
27117
27239
|
/**
|
|
27118
27240
|
* Seed
|
|
27119
27241
|
* @description The random seed used for the generation process.
|
|
27120
27242
|
*/
|
|
27121
|
-
seed
|
|
27243
|
+
seed?: number;
|
|
27244
|
+
/**
|
|
27245
|
+
* Sync Mode
|
|
27246
|
+
* @description If `True`, the media will be returned as a data URI and the output data won't be available in the request history.
|
|
27247
|
+
* @default false
|
|
27248
|
+
*/
|
|
27249
|
+
sync_mode?: boolean;
|
|
27250
|
+
/**
|
|
27251
|
+
* Target Resolution
|
|
27252
|
+
* @description The target resolution to upscale to when `upscale_mode` is `target`.
|
|
27253
|
+
* @default 1080p
|
|
27254
|
+
* @enum {string}
|
|
27255
|
+
*/
|
|
27256
|
+
target_resolution?: '720p' | '1080p' | '1440p' | '2160p';
|
|
27257
|
+
/**
|
|
27258
|
+
* Upscale Factor
|
|
27259
|
+
* @description Upscaling factor to be used. Will multiply the dimensions with this factor when `upscale_mode` is `factor`.
|
|
27260
|
+
* @default 2
|
|
27261
|
+
*/
|
|
27262
|
+
upscale_factor?: number;
|
|
27263
|
+
/**
|
|
27264
|
+
* Upscale Mode
|
|
27265
|
+
* @description The mode to use for the upscale. If 'target', the upscale factor will be calculated based on the target resolution. If 'factor', the upscale factor will be used directly.
|
|
27266
|
+
* @default factor
|
|
27267
|
+
* @enum {string}
|
|
27268
|
+
*/
|
|
27269
|
+
upscale_mode?: 'target' | 'factor';
|
|
27122
27270
|
}
|
|
27123
27271
|
|
|
27272
|
+
export interface SeedvrUpscaleImageOutput extends SharedType_920 {}
|
|
27273
|
+
|
|
27124
27274
|
export interface SdxlControlnetUnionInpaintingInput {
|
|
27125
27275
|
/**
|
|
27126
27276
|
* Canny Image Url
|
|
@@ -50618,11 +50768,6 @@ export interface KlingVideoV3StandardTextToVideoInput {
|
|
|
50618
50768
|
* @enum {string}
|
|
50619
50769
|
*/
|
|
50620
50770
|
shot_type?: 'customize' | 'intelligent';
|
|
50621
|
-
/**
|
|
50622
|
-
* Voice Ids
|
|
50623
|
-
* @description Optional Voice IDs for video generation. Reference voices in your prompt with <<<voice_1>>> and <<<voice_2>>> (maximum 2 voices per task). Get voice IDs from the kling video create-voice endpoint: https://fal.ai/models/fal-ai/kling-video/create-voice
|
|
50624
|
-
*/
|
|
50625
|
-
voice_ids?: string[];
|
|
50626
50771
|
}
|
|
50627
50772
|
|
|
50628
50773
|
export interface KlingVideoV3StandardTextToVideoOutput {
|
|
@@ -50760,11 +50905,6 @@ export interface KlingVideoV3StandardImageToVideoInput {
|
|
|
50760
50905
|
* @example https://storage.googleapis.com/falserverless/example_inputs/kling-v3/standard-i2v/start_image.png
|
|
50761
50906
|
*/
|
|
50762
50907
|
start_image_url: string;
|
|
50763
|
-
/**
|
|
50764
|
-
* Voice Ids
|
|
50765
|
-
* @description Optional Voice IDs for video generation. Reference voices in your prompt with <<<voice_1>>> and <<<voice_2>>> (maximum 2 voices per task). Get voice IDs from the kling video create-voice endpoint: https://fal.ai/models/fal-ai/kling-video/create-voice
|
|
50766
|
-
*/
|
|
50767
|
-
voice_ids?: string[];
|
|
50768
50908
|
}
|
|
50769
50909
|
|
|
50770
50910
|
export interface KlingVideoV3StandardImageToVideoOutput {
|
|
@@ -50832,11 +50972,6 @@ export interface KlingVideoV3ProTextToVideoInput {
|
|
|
50832
50972
|
* @enum {string}
|
|
50833
50973
|
*/
|
|
50834
50974
|
shot_type?: 'customize' | 'intelligent';
|
|
50835
|
-
/**
|
|
50836
|
-
* Voice Ids
|
|
50837
|
-
* @description Optional Voice IDs for video generation. Reference voices in your prompt with <<<voice_1>>> and <<<voice_2>>> (maximum 2 voices per task). Get voice IDs from the kling video create-voice endpoint: https://fal.ai/models/fal-ai/kling-video/create-voice
|
|
50838
|
-
*/
|
|
50839
|
-
voice_ids?: string[];
|
|
50840
50975
|
}
|
|
50841
50976
|
|
|
50842
50977
|
export interface KlingVideoV3ProTextToVideoOutput {
|
|
@@ -50974,11 +51109,6 @@ export interface KlingVideoV3ProImageToVideoInput {
|
|
|
50974
51109
|
* @example https://storage.googleapis.com/falserverless/example_inputs/kling-v3/pro-i2v/start_image.png
|
|
50975
51110
|
*/
|
|
50976
51111
|
start_image_url: string;
|
|
50977
|
-
/**
|
|
50978
|
-
* Voice Ids
|
|
50979
|
-
* @description Optional Voice IDs for video generation. Reference voices in your prompt with <<<voice_1>>> and <<<voice_2>>> (maximum 2 voices per task). Get voice IDs from the kling video create-voice endpoint: https://fal.ai/models/fal-ai/kling-video/create-voice
|
|
50980
|
-
*/
|
|
50981
|
-
voice_ids?: string[];
|
|
50982
51112
|
}
|
|
50983
51113
|
|
|
50984
51114
|
export interface KlingVideoV3ProImageToVideoOutput {
|
|
@@ -52004,7 +52134,7 @@ export interface KlingVideoO3StandardVideoToVideoEditOutput {
|
|
|
52004
52134
|
video: Components.File;
|
|
52005
52135
|
}
|
|
52006
52136
|
|
|
52007
|
-
export interface KlingVideoO3StandardTextToVideoInput extends
|
|
52137
|
+
export interface KlingVideoO3StandardTextToVideoInput extends SharedType_25d {}
|
|
52008
52138
|
|
|
52009
52139
|
export interface KlingVideoO3StandardTextToVideoOutput extends SharedType_723 {}
|
|
52010
52140
|
|
|
@@ -52246,7 +52376,7 @@ export interface KlingVideoO3ProVideoToVideoEditOutput {
|
|
|
52246
52376
|
video: Components.File;
|
|
52247
52377
|
}
|
|
52248
52378
|
|
|
52249
|
-
export interface KlingVideoO3ProTextToVideoInput extends
|
|
52379
|
+
export interface KlingVideoO3ProTextToVideoInput extends SharedType_25d {}
|
|
52250
52380
|
|
|
52251
52381
|
export interface KlingVideoO3ProTextToVideoOutput extends SharedType_723 {}
|
|
52252
52382
|
|
|
@@ -53244,6 +53374,152 @@ export interface IpAdapterFaceIdInput {
|
|
|
53244
53374
|
|
|
53245
53375
|
export interface IpAdapterFaceIdOutput extends SharedType_678 {}
|
|
53246
53376
|
|
|
53377
|
+
export interface InworldTtsInput {
|
|
53378
|
+
/**
|
|
53379
|
+
* Sample Rate Hertz
|
|
53380
|
+
* @description The sample rate in Hz for the output audio.
|
|
53381
|
+
* @default 48000
|
|
53382
|
+
* @enum {integer}
|
|
53383
|
+
*/
|
|
53384
|
+
sample_rate_hertz?: 8000 | 16000 | 24000 | 32000 | 40000 | 48000;
|
|
53385
|
+
/**
|
|
53386
|
+
* Text
|
|
53387
|
+
* @description The text to synthesize into speech.
|
|
53388
|
+
* @example Hello! This is a demo of Inworld's TTS.
|
|
53389
|
+
*/
|
|
53390
|
+
text: string;
|
|
53391
|
+
/**
|
|
53392
|
+
* Voice
|
|
53393
|
+
* @description The voice to use for synthesis.
|
|
53394
|
+
* @default Craig (en)
|
|
53395
|
+
* @enum {string}
|
|
53396
|
+
*/
|
|
53397
|
+
voice?:
|
|
53398
|
+
| 'Loretta (en)'
|
|
53399
|
+
| 'Darlene (en)'
|
|
53400
|
+
| 'Marlene (en)'
|
|
53401
|
+
| 'Hank (en)'
|
|
53402
|
+
| 'Evelyn (en)'
|
|
53403
|
+
| 'Celeste (en)'
|
|
53404
|
+
| 'Pippa (en)'
|
|
53405
|
+
| 'Tessa (en)'
|
|
53406
|
+
| 'Liam (en)'
|
|
53407
|
+
| 'Callum (en)'
|
|
53408
|
+
| 'Hamish (en)'
|
|
53409
|
+
| 'Abby (en)'
|
|
53410
|
+
| 'Graham (en)'
|
|
53411
|
+
| 'Rupert (en)'
|
|
53412
|
+
| 'Mortimer (en)'
|
|
53413
|
+
| 'Snik (en)'
|
|
53414
|
+
| 'Anjali (en)'
|
|
53415
|
+
| 'Saanvi (en)'
|
|
53416
|
+
| 'Arjun (en)'
|
|
53417
|
+
| 'Claire (en)'
|
|
53418
|
+
| 'Oliver (en)'
|
|
53419
|
+
| 'Simon (en)'
|
|
53420
|
+
| 'Elliot (en)'
|
|
53421
|
+
| 'James (en)'
|
|
53422
|
+
| 'Serena (en)'
|
|
53423
|
+
| 'Gareth (en)'
|
|
53424
|
+
| 'Vinny (en)'
|
|
53425
|
+
| 'Lauren (en)'
|
|
53426
|
+
| 'Jessica (en)'
|
|
53427
|
+
| 'Ethan (en)'
|
|
53428
|
+
| 'Tyler (en)'
|
|
53429
|
+
| 'Jason (en)'
|
|
53430
|
+
| 'Chloe (en)'
|
|
53431
|
+
| 'Veronica (en)'
|
|
53432
|
+
| 'Victoria (en)'
|
|
53433
|
+
| 'Miranda (en)'
|
|
53434
|
+
| 'Sebastian (en)'
|
|
53435
|
+
| 'Victor (en)'
|
|
53436
|
+
| 'Malcolm (en)'
|
|
53437
|
+
| 'Kayla (en)'
|
|
53438
|
+
| 'Nate (en)'
|
|
53439
|
+
| 'Jake (en)'
|
|
53440
|
+
| 'Brian (en)'
|
|
53441
|
+
| 'Amina (en)'
|
|
53442
|
+
| 'Kelsey (en)'
|
|
53443
|
+
| 'Derek (en)'
|
|
53444
|
+
| 'Grant (en)'
|
|
53445
|
+
| 'Evan (en)'
|
|
53446
|
+
| 'Alex (en)'
|
|
53447
|
+
| 'Ashley (en)'
|
|
53448
|
+
| 'Craig (en)'
|
|
53449
|
+
| 'Deborah (en)'
|
|
53450
|
+
| 'Dennis (en)'
|
|
53451
|
+
| 'Edward (en)'
|
|
53452
|
+
| 'Elizabeth (en)'
|
|
53453
|
+
| 'Hades (en)'
|
|
53454
|
+
| 'Julia (en)'
|
|
53455
|
+
| 'Pixie (en)'
|
|
53456
|
+
| 'Mark (en)'
|
|
53457
|
+
| 'Olivia (en)'
|
|
53458
|
+
| 'Priya (en)'
|
|
53459
|
+
| 'Ronald (en)'
|
|
53460
|
+
| 'Sarah (en)'
|
|
53461
|
+
| 'Shaun (en)'
|
|
53462
|
+
| 'Theodore (en)'
|
|
53463
|
+
| 'Timothy (en)'
|
|
53464
|
+
| 'Wendy (en)'
|
|
53465
|
+
| 'Dominus (en)'
|
|
53466
|
+
| 'Hana (en)'
|
|
53467
|
+
| 'Clive (en)'
|
|
53468
|
+
| 'Carter (en)'
|
|
53469
|
+
| 'Blake (en)'
|
|
53470
|
+
| 'Luna (en)'
|
|
53471
|
+
| 'Yichen (zh)'
|
|
53472
|
+
| 'Xiaoyin (zh)'
|
|
53473
|
+
| 'Xinyi (zh)'
|
|
53474
|
+
| 'Jing (zh)'
|
|
53475
|
+
| 'Erik (nl)'
|
|
53476
|
+
| 'Katrien (nl)'
|
|
53477
|
+
| 'Lennart (nl)'
|
|
53478
|
+
| 'Lore (nl)'
|
|
53479
|
+
| 'Alain (fr)'
|
|
53480
|
+
| 'Hélène (fr)'
|
|
53481
|
+
| 'Mathieu (fr)'
|
|
53482
|
+
| 'Étienne (fr)'
|
|
53483
|
+
| 'Johanna (de)'
|
|
53484
|
+
| 'Josef (de)'
|
|
53485
|
+
| 'Gianni (it)'
|
|
53486
|
+
| 'Orietta (it)'
|
|
53487
|
+
| 'Asuka (ja)'
|
|
53488
|
+
| 'Satoshi (ja)'
|
|
53489
|
+
| 'Hyunwoo (ko)'
|
|
53490
|
+
| 'Minji (ko)'
|
|
53491
|
+
| 'Seojun (ko)'
|
|
53492
|
+
| 'Yoona (ko)'
|
|
53493
|
+
| 'Szymon (pl)'
|
|
53494
|
+
| 'Wojciech (pl)'
|
|
53495
|
+
| 'Heitor (pt)'
|
|
53496
|
+
| 'Maitê (pt)'
|
|
53497
|
+
| 'Diego (es)'
|
|
53498
|
+
| 'Lupita (es)'
|
|
53499
|
+
| 'Miguel (es)'
|
|
53500
|
+
| 'Rafael (es)'
|
|
53501
|
+
| 'Svetlana (ru)'
|
|
53502
|
+
| 'Elena (ru)'
|
|
53503
|
+
| 'Dmitry (ru)'
|
|
53504
|
+
| 'Nikolai (ru)'
|
|
53505
|
+
| 'Riya (hi)'
|
|
53506
|
+
| 'Manoj (hi)'
|
|
53507
|
+
| 'Yael (he)'
|
|
53508
|
+
| 'Oren (he)'
|
|
53509
|
+
| 'Nour (ar)'
|
|
53510
|
+
| 'Omar (ar)';
|
|
53511
|
+
}
|
|
53512
|
+
|
|
53513
|
+
export interface InworldTtsOutput {
|
|
53514
|
+
/**
|
|
53515
|
+
* @description Generated audio file.
|
|
53516
|
+
* @example {
|
|
53517
|
+
* "url": "https://v3b.fal.media/files/b/0a920730/38aud4s6sF7bOWFoQHaJk_tmpvv2htrpc.wav"
|
|
53518
|
+
* }
|
|
53519
|
+
*/
|
|
53520
|
+
audio: Components.File;
|
|
53521
|
+
}
|
|
53522
|
+
|
|
53247
53523
|
export interface InvisibleWatermarkInput {
|
|
53248
53524
|
/**
|
|
53249
53525
|
* Decode
|
|
@@ -61851,6 +62127,204 @@ export interface GenfocusInput {
|
|
|
61851
62127
|
|
|
61852
62128
|
export interface GenfocusOutput extends SharedType_951 {}
|
|
61853
62129
|
|
|
62130
|
+
export interface GeminiTtsInput {
|
|
62131
|
+
/**
|
|
62132
|
+
* Language Code
|
|
62133
|
+
* @description Language for multilingual synthesis. When set, steers the model to speak in the specified language. Supports 24 GA languages and 60+ Preview languages. If not set, the model auto-detects the language from the text.
|
|
62134
|
+
* @example English (US)
|
|
62135
|
+
* @example French (France)
|
|
62136
|
+
* @example Japanese (Japan)
|
|
62137
|
+
*/
|
|
62138
|
+
language_code?:
|
|
62139
|
+
| 'Arabic (Egypt)'
|
|
62140
|
+
| 'Bangla (Bangladesh)'
|
|
62141
|
+
| 'Dutch (Netherlands)'
|
|
62142
|
+
| 'English (India)'
|
|
62143
|
+
| 'English (US)'
|
|
62144
|
+
| 'French (France)'
|
|
62145
|
+
| 'German (Germany)'
|
|
62146
|
+
| 'Hindi (India)'
|
|
62147
|
+
| 'Indonesian (Indonesia)'
|
|
62148
|
+
| 'Italian (Italy)'
|
|
62149
|
+
| 'Japanese (Japan)'
|
|
62150
|
+
| 'Korean (South Korea)'
|
|
62151
|
+
| 'Marathi (India)'
|
|
62152
|
+
| 'Polish (Poland)'
|
|
62153
|
+
| 'Portuguese (Brazil)'
|
|
62154
|
+
| 'Romanian (Romania)'
|
|
62155
|
+
| 'Russian (Russia)'
|
|
62156
|
+
| 'Spanish (Spain)'
|
|
62157
|
+
| 'Tamil (India)'
|
|
62158
|
+
| 'Telugu (India)'
|
|
62159
|
+
| 'Thai (Thailand)'
|
|
62160
|
+
| 'Turkish (Turkey)'
|
|
62161
|
+
| 'Ukrainian (Ukraine)'
|
|
62162
|
+
| 'Vietnamese (Vietnam)'
|
|
62163
|
+
| 'Afrikaans (South Africa)'
|
|
62164
|
+
| 'Albanian (Albania)'
|
|
62165
|
+
| 'Amharic (Ethiopia)'
|
|
62166
|
+
| 'Arabic (World)'
|
|
62167
|
+
| 'Armenian (Armenia)'
|
|
62168
|
+
| 'Azerbaijani (Azerbaijan)'
|
|
62169
|
+
| 'Basque (Spain)'
|
|
62170
|
+
| 'Belarusian (Belarus)'
|
|
62171
|
+
| 'Bulgarian (Bulgaria)'
|
|
62172
|
+
| 'Burmese (Myanmar)'
|
|
62173
|
+
| 'Catalan (Spain)'
|
|
62174
|
+
| 'Cebuano (Philippines)'
|
|
62175
|
+
| 'Chinese Mandarin (China)'
|
|
62176
|
+
| 'Chinese Mandarin (Taiwan)'
|
|
62177
|
+
| 'Croatian (Croatia)'
|
|
62178
|
+
| 'Czech (Czech Republic)'
|
|
62179
|
+
| 'Danish (Denmark)'
|
|
62180
|
+
| 'English (Australia)'
|
|
62181
|
+
| 'English (UK)'
|
|
62182
|
+
| 'Estonian (Estonia)'
|
|
62183
|
+
| 'Filipino (Philippines)'
|
|
62184
|
+
| 'Finnish (Finland)'
|
|
62185
|
+
| 'French (Canada)'
|
|
62186
|
+
| 'Galician (Spain)'
|
|
62187
|
+
| 'Georgian (Georgia)'
|
|
62188
|
+
| 'Greek (Greece)'
|
|
62189
|
+
| 'Gujarati (India)'
|
|
62190
|
+
| 'Haitian Creole (Haiti)'
|
|
62191
|
+
| 'Hebrew (Israel)'
|
|
62192
|
+
| 'Hungarian (Hungary)'
|
|
62193
|
+
| 'Icelandic (Iceland)'
|
|
62194
|
+
| 'Javanese (Java)'
|
|
62195
|
+
| 'Kannada (India)'
|
|
62196
|
+
| 'Konkani (India)'
|
|
62197
|
+
| 'Lao (Laos)'
|
|
62198
|
+
| 'Latin (Vatican City)'
|
|
62199
|
+
| 'Latvian (Latvia)'
|
|
62200
|
+
| 'Lithuanian (Lithuania)'
|
|
62201
|
+
| 'Luxembourgish (Luxembourg)'
|
|
62202
|
+
| 'Macedonian (North Macedonia)'
|
|
62203
|
+
| 'Maithili (India)'
|
|
62204
|
+
| 'Malagasy (Madagascar)'
|
|
62205
|
+
| 'Malay (Malaysia)'
|
|
62206
|
+
| 'Malayalam (India)'
|
|
62207
|
+
| 'Mongolian (Mongolia)'
|
|
62208
|
+
| 'Nepali (Nepal)'
|
|
62209
|
+
| 'Norwegian Bokmal (Norway)'
|
|
62210
|
+
| 'Norwegian Nynorsk (Norway)'
|
|
62211
|
+
| 'Odia (India)'
|
|
62212
|
+
| 'Pashto (Afghanistan)'
|
|
62213
|
+
| 'Persian (Iran)'
|
|
62214
|
+
| 'Portuguese (Portugal)'
|
|
62215
|
+
| 'Punjabi (India)'
|
|
62216
|
+
| 'Serbian (Serbia)'
|
|
62217
|
+
| 'Sindhi (India)'
|
|
62218
|
+
| 'Sinhala (Sri Lanka)'
|
|
62219
|
+
| 'Slovak (Slovakia)'
|
|
62220
|
+
| 'Slovenian (Slovenia)'
|
|
62221
|
+
| 'Spanish (Latin America)'
|
|
62222
|
+
| 'Spanish (Mexico)'
|
|
62223
|
+
| 'Swahili (Kenya)'
|
|
62224
|
+
| 'Swedish (Sweden)'
|
|
62225
|
+
| 'Urdu (Pakistan)';
|
|
62226
|
+
/**
|
|
62227
|
+
* Model
|
|
62228
|
+
* @description Which Gemini TTS model to use. gemini-2.5-flash-tts: low latency, cost-efficient for everyday applications (recommended). gemini-2.5-pro-tts: highest quality, best for structured workflows like podcasts, audiobooks, and customer support.
|
|
62229
|
+
* @default gemini-2.5-flash-tts
|
|
62230
|
+
* @enum {string}
|
|
62231
|
+
*/
|
|
62232
|
+
model?: 'gemini-2.5-flash-tts' | 'gemini-2.5-pro-tts';
|
|
62233
|
+
/**
|
|
62234
|
+
* Output Format
|
|
62235
|
+
* @description Audio output format. mp3: compressed, small file size (recommended). wav: uncompressed PCM wrapped in WAV (24 kHz, 16-bit mono). ogg_opus: Ogg container with Opus codec, good quality-to-size ratio.
|
|
62236
|
+
* @default mp3
|
|
62237
|
+
* @enum {string}
|
|
62238
|
+
*/
|
|
62239
|
+
output_format?: 'wav' | 'mp3' | 'ogg_opus';
|
|
62240
|
+
/**
|
|
62241
|
+
* Prompt
|
|
62242
|
+
* @description The text to convert to speech. Gemini TTS supports natural-language prompting for style, pace, accent, and emotional expression — include delivery instructions inline with the text (e.g. 'Say cheerfully: Have a wonderful day!'). For multi-speaker synthesis, prefix lines with speaker aliases defined in the speakers field (e.g. 'Alice: Hello!\nBob: Hi!'). Supports inline pace/style markers like [slowly], [whispering], [excited], [extremely fast].
|
|
62243
|
+
* @example Host: Welcome back to AI Frontiers, the podcast where we explore the latest breakthroughs in artificial intelligence. Today we have a very special guest. Doctor Chen, thank you for joining us!
|
|
62244
|
+
* DrChen: Thanks for having me! I'm excited to be here.
|
|
62245
|
+
* Host: So, let's dive right in. Your recent paper on neural architecture search has been making waves. Can you tell our listeners what inspired this research?
|
|
62246
|
+
* DrChen: Absolutely. It all started when we noticed that most existing approaches were optimizing for the wrong metrics. We asked ourselves, what if we could let the model design itself?
|
|
62247
|
+
*/
|
|
62248
|
+
prompt: string;
|
|
62249
|
+
/**
|
|
62250
|
+
* Speakers
|
|
62251
|
+
* @description Multi-speaker voice configuration. When set, enables multi-speaker synthesis where different parts of the text are spoken by different voices. Each speaker needs a voice and a speaker_id (alias) that matches prefixes in the prompt. Requires gemini-2.5-pro-tts or gemini-2.5-flash-tts model. Not supported with gemini-2.5-flash-lite-preview-tts.
|
|
62252
|
+
* @example [
|
|
62253
|
+
* {
|
|
62254
|
+
* "voice": "Charon",
|
|
62255
|
+
* "speaker_id": "Host"
|
|
62256
|
+
* },
|
|
62257
|
+
* {
|
|
62258
|
+
* "voice": "Kore",
|
|
62259
|
+
* "speaker_id": "DrChen"
|
|
62260
|
+
* }
|
|
62261
|
+
* ]
|
|
62262
|
+
*/
|
|
62263
|
+
speakers?: Components.SpeakerConfig[];
|
|
62264
|
+
/**
|
|
62265
|
+
* Style Instructions
|
|
62266
|
+
* @description Optional style and delivery instructions prepended to the prompt. Controls expressiveness, accent, pace, tone, and emotional expression using natural language. Use this to separate style control from the text content. Examples: 'Speak warmly and slowly', 'Read this as a dramatic newscast', 'Use a British accent with a cheerful tone', 'Whisper mysteriously'.
|
|
62267
|
+
* @example Say the following in a warm, conversational tone
|
|
62268
|
+
* @example Read this as a dramatic newscast with gravitas
|
|
62269
|
+
* @example Speak with a British accent, cheerfully and energetically
|
|
62270
|
+
* @example This is a podcast conversation. The host is enthusiastic and curious, the guest is knowledgeable and articulate
|
|
62271
|
+
*/
|
|
62272
|
+
style_instructions?: string;
|
|
62273
|
+
/**
|
|
62274
|
+
* Temperature
|
|
62275
|
+
* @description Controls the randomness of the speech output. Higher values produce more creative and varied delivery, while lower values make the output more predictable and focused.
|
|
62276
|
+
* @default 1
|
|
62277
|
+
*/
|
|
62278
|
+
temperature?: number;
|
|
62279
|
+
/**
|
|
62280
|
+
* Voice
|
|
62281
|
+
* @description Voice preset for single-speaker synthesis. 30 distinct voices are available. Ignored when speakers is set. Popular choices: Kore (strong, firm female), Puck (upbeat, lively male), Charon (calm, professional male), Zephyr (bright, clear female), Aoede (warm, melodic female).
|
|
62282
|
+
* @default Kore
|
|
62283
|
+
* @enum {string}
|
|
62284
|
+
*/
|
|
62285
|
+
voice?:
|
|
62286
|
+
| 'Achernar'
|
|
62287
|
+
| 'Achird'
|
|
62288
|
+
| 'Algenib'
|
|
62289
|
+
| 'Algieba'
|
|
62290
|
+
| 'Alnilam'
|
|
62291
|
+
| 'Aoede'
|
|
62292
|
+
| 'Autonoe'
|
|
62293
|
+
| 'Callirrhoe'
|
|
62294
|
+
| 'Charon'
|
|
62295
|
+
| 'Despina'
|
|
62296
|
+
| 'Enceladus'
|
|
62297
|
+
| 'Erinome'
|
|
62298
|
+
| 'Fenrir'
|
|
62299
|
+
| 'Gacrux'
|
|
62300
|
+
| 'Iapetus'
|
|
62301
|
+
| 'Kore'
|
|
62302
|
+
| 'Laomedeia'
|
|
62303
|
+
| 'Leda'
|
|
62304
|
+
| 'Orus'
|
|
62305
|
+
| 'Pulcherrima'
|
|
62306
|
+
| 'Puck'
|
|
62307
|
+
| 'Rasalgethi'
|
|
62308
|
+
| 'Sadachbia'
|
|
62309
|
+
| 'Sadaltager'
|
|
62310
|
+
| 'Schedar'
|
|
62311
|
+
| 'Sulafat'
|
|
62312
|
+
| 'Umbriel'
|
|
62313
|
+
| 'Vindemiatrix'
|
|
62314
|
+
| 'Zephyr'
|
|
62315
|
+
| 'Zubenelgenubi';
|
|
62316
|
+
}
|
|
62317
|
+
|
|
62318
|
+
export interface GeminiTtsOutput {
|
|
62319
|
+
/**
|
|
62320
|
+
* @description The generated audio file.
|
|
62321
|
+
* @example {
|
|
62322
|
+
* "url": "https://v3b.fal.media/files/b/0a935d4f/Ez4NpcnFTuGsu2FHDaJTR_gemini_tts_output.mp3"
|
|
62323
|
+
* }
|
|
62324
|
+
*/
|
|
62325
|
+
audio: Components.File;
|
|
62326
|
+
}
|
|
62327
|
+
|
|
61854
62328
|
export interface GeminiFlashEditMultiInput {
|
|
61855
62329
|
/**
|
|
61856
62330
|
* Input Image Urls
|
|
@@ -75552,220 +76026,220 @@ export interface ElevenlabsSpeechToTextScribeV2Output {
|
|
|
75552
76026
|
* Words
|
|
75553
76027
|
* @description Word-level transcription details
|
|
75554
76028
|
* @example {
|
|
75555
|
-
* "
|
|
76029
|
+
* "text": "Hey,",
|
|
75556
76030
|
* "start": 0.079,
|
|
75557
76031
|
* "type": "word",
|
|
75558
|
-
* "
|
|
76032
|
+
* "end": 0.539,
|
|
75559
76033
|
* "speaker_id": "speaker_0"
|
|
75560
76034
|
* }
|
|
75561
76035
|
* @example {
|
|
75562
|
-
* "
|
|
76036
|
+
* "text": " ",
|
|
75563
76037
|
* "start": 0.539,
|
|
75564
76038
|
* "type": "spacing",
|
|
75565
|
-
* "
|
|
76039
|
+
* "end": 0.599,
|
|
75566
76040
|
* "speaker_id": "speaker_0"
|
|
75567
76041
|
* }
|
|
75568
76042
|
* @example {
|
|
75569
|
-
* "
|
|
76043
|
+
* "text": "this",
|
|
75570
76044
|
* "start": 0.599,
|
|
75571
76045
|
* "type": "word",
|
|
75572
|
-
* "
|
|
76046
|
+
* "end": 0.679,
|
|
75573
76047
|
* "speaker_id": "speaker_0"
|
|
75574
76048
|
* }
|
|
75575
76049
|
* @example {
|
|
75576
|
-
* "
|
|
76050
|
+
* "text": " ",
|
|
75577
76051
|
* "start": 0.679,
|
|
75578
76052
|
* "type": "spacing",
|
|
75579
|
-
* "
|
|
76053
|
+
* "end": 0.739,
|
|
75580
76054
|
* "speaker_id": "speaker_0"
|
|
75581
76055
|
* }
|
|
75582
76056
|
* @example {
|
|
75583
|
-
* "
|
|
76057
|
+
* "text": "is",
|
|
75584
76058
|
* "start": 0.739,
|
|
75585
76059
|
* "type": "word",
|
|
75586
|
-
* "
|
|
76060
|
+
* "end": 0.799,
|
|
75587
76061
|
* "speaker_id": "speaker_0"
|
|
75588
76062
|
* }
|
|
75589
76063
|
* @example {
|
|
75590
|
-
* "
|
|
76064
|
+
* "text": " ",
|
|
75591
76065
|
* "start": 0.799,
|
|
75592
76066
|
* "type": "spacing",
|
|
75593
|
-
* "
|
|
76067
|
+
* "end": 0.939,
|
|
75594
76068
|
* "speaker_id": "speaker_0"
|
|
75595
76069
|
* }
|
|
75596
76070
|
* @example {
|
|
75597
|
-
* "
|
|
76071
|
+
* "text": "a",
|
|
75598
76072
|
* "start": 0.939,
|
|
75599
76073
|
* "type": "word",
|
|
75600
|
-
* "
|
|
76074
|
+
* "end": 0.939,
|
|
75601
76075
|
* "speaker_id": "speaker_0"
|
|
75602
76076
|
* }
|
|
75603
76077
|
* @example {
|
|
75604
|
-
* "
|
|
76078
|
+
* "text": " ",
|
|
75605
76079
|
* "start": 0.939,
|
|
75606
76080
|
* "type": "spacing",
|
|
75607
|
-
* "
|
|
76081
|
+
* "end": 0.959,
|
|
75608
76082
|
* "speaker_id": "speaker_0"
|
|
75609
76083
|
* }
|
|
75610
76084
|
* @example {
|
|
75611
|
-
* "
|
|
76085
|
+
* "text": "test",
|
|
75612
76086
|
* "start": 0.959,
|
|
75613
76087
|
* "type": "word",
|
|
75614
|
-
* "
|
|
76088
|
+
* "end": 1.179,
|
|
75615
76089
|
* "speaker_id": "speaker_0"
|
|
75616
76090
|
* }
|
|
75617
76091
|
* @example {
|
|
75618
|
-
* "
|
|
76092
|
+
* "text": " ",
|
|
75619
76093
|
* "start": 1.179,
|
|
75620
76094
|
* "type": "spacing",
|
|
75621
|
-
* "
|
|
76095
|
+
* "end": 1.219,
|
|
75622
76096
|
* "speaker_id": "speaker_0"
|
|
75623
76097
|
* }
|
|
75624
76098
|
* @example {
|
|
75625
|
-
* "
|
|
76099
|
+
* "text": "recording",
|
|
75626
76100
|
* "start": 1.22,
|
|
75627
76101
|
* "type": "word",
|
|
75628
|
-
* "
|
|
76102
|
+
* "end": 1.719,
|
|
75629
76103
|
* "speaker_id": "speaker_0"
|
|
75630
76104
|
* }
|
|
75631
76105
|
* @example {
|
|
75632
|
-
* "
|
|
76106
|
+
* "text": " ",
|
|
75633
76107
|
* "start": 1.719,
|
|
75634
76108
|
* "type": "spacing",
|
|
75635
|
-
* "
|
|
76109
|
+
* "end": 1.719,
|
|
75636
76110
|
* "speaker_id": "speaker_0"
|
|
75637
76111
|
* }
|
|
75638
76112
|
* @example {
|
|
75639
|
-
* "
|
|
76113
|
+
* "text": "for",
|
|
75640
76114
|
* "start": 1.719,
|
|
75641
76115
|
* "type": "word",
|
|
75642
|
-
* "
|
|
76116
|
+
* "end": 1.86,
|
|
75643
76117
|
* "speaker_id": "speaker_0"
|
|
75644
76118
|
* }
|
|
75645
76119
|
* @example {
|
|
75646
|
-
* "
|
|
76120
|
+
* "text": " ",
|
|
75647
76121
|
* "start": 1.86,
|
|
75648
76122
|
* "type": "spacing",
|
|
75649
|
-
* "
|
|
76123
|
+
* "end": 1.879,
|
|
75650
76124
|
* "speaker_id": "speaker_0"
|
|
75651
76125
|
* }
|
|
75652
76126
|
* @example {
|
|
75653
|
-
* "
|
|
76127
|
+
* "text": "Scribe",
|
|
75654
76128
|
* "start": 1.879,
|
|
75655
76129
|
* "type": "word",
|
|
75656
|
-
* "
|
|
76130
|
+
* "end": 2.24,
|
|
75657
76131
|
* "speaker_id": "speaker_0"
|
|
75658
76132
|
* }
|
|
75659
76133
|
* @example {
|
|
75660
|
-
* "
|
|
76134
|
+
* "text": " ",
|
|
75661
76135
|
* "start": 2.24,
|
|
75662
76136
|
* "type": "spacing",
|
|
75663
|
-
* "
|
|
76137
|
+
* "end": 2.319,
|
|
75664
76138
|
* "speaker_id": "speaker_0"
|
|
75665
76139
|
* }
|
|
75666
76140
|
* @example {
|
|
75667
|
-
* "
|
|
76141
|
+
* "text": "version",
|
|
75668
76142
|
* "start": 2.319,
|
|
75669
76143
|
* "type": "word",
|
|
75670
|
-
* "
|
|
76144
|
+
* "end": 2.759,
|
|
75671
76145
|
* "speaker_id": "speaker_0"
|
|
75672
76146
|
* }
|
|
75673
76147
|
* @example {
|
|
75674
|
-
* "
|
|
76148
|
+
* "text": " ",
|
|
75675
76149
|
* "start": 2.759,
|
|
75676
76150
|
* "type": "spacing",
|
|
75677
|
-
* "
|
|
76151
|
+
* "end": 2.779,
|
|
75678
76152
|
* "speaker_id": "speaker_0"
|
|
75679
76153
|
* }
|
|
75680
76154
|
* @example {
|
|
75681
|
-
* "
|
|
76155
|
+
* "text": "two,",
|
|
75682
76156
|
* "start": 2.779,
|
|
75683
76157
|
* "type": "word",
|
|
75684
|
-
* "
|
|
76158
|
+
* "end": 3.379,
|
|
75685
76159
|
* "speaker_id": "speaker_0"
|
|
75686
76160
|
* }
|
|
75687
76161
|
* @example {
|
|
75688
|
-
* "
|
|
76162
|
+
* "text": " ",
|
|
75689
76163
|
* "start": 3.379,
|
|
75690
76164
|
* "type": "spacing",
|
|
75691
|
-
* "
|
|
76165
|
+
* "end": 3.399,
|
|
75692
76166
|
* "speaker_id": "speaker_0"
|
|
75693
76167
|
* }
|
|
75694
76168
|
* @example {
|
|
75695
|
-
* "
|
|
76169
|
+
* "text": "which",
|
|
75696
76170
|
* "start": 3.399,
|
|
75697
76171
|
* "type": "word",
|
|
75698
|
-
* "
|
|
76172
|
+
* "end": 3.519,
|
|
75699
76173
|
* "speaker_id": "speaker_0"
|
|
75700
76174
|
* }
|
|
75701
76175
|
* @example {
|
|
75702
|
-
* "
|
|
76176
|
+
* "text": " ",
|
|
75703
76177
|
* "start": 3.519,
|
|
75704
76178
|
* "type": "spacing",
|
|
75705
|
-
* "
|
|
76179
|
+
* "end": 3.539,
|
|
75706
76180
|
* "speaker_id": "speaker_0"
|
|
75707
76181
|
* }
|
|
75708
76182
|
* @example {
|
|
75709
|
-
* "
|
|
76183
|
+
* "text": "is",
|
|
75710
76184
|
* "start": 3.539,
|
|
75711
76185
|
* "type": "word",
|
|
75712
|
-
* "
|
|
76186
|
+
* "end": 3.659,
|
|
75713
76187
|
* "speaker_id": "speaker_0"
|
|
75714
76188
|
* }
|
|
75715
76189
|
* @example {
|
|
75716
|
-
* "
|
|
76190
|
+
* "text": " ",
|
|
75717
76191
|
* "start": 3.659,
|
|
75718
76192
|
* "type": "spacing",
|
|
75719
|
-
* "
|
|
76193
|
+
* "end": 3.699,
|
|
75720
76194
|
* "speaker_id": "speaker_0"
|
|
75721
76195
|
* }
|
|
75722
76196
|
* @example {
|
|
75723
|
-
* "
|
|
76197
|
+
* "text": "now",
|
|
75724
76198
|
* "start": 3.699,
|
|
75725
76199
|
* "type": "word",
|
|
75726
|
-
* "
|
|
76200
|
+
* "end": 3.839,
|
|
75727
76201
|
* "speaker_id": "speaker_0"
|
|
75728
76202
|
* }
|
|
75729
76203
|
* @example {
|
|
75730
|
-
* "
|
|
76204
|
+
* "text": " ",
|
|
75731
76205
|
* "start": 3.839,
|
|
75732
76206
|
* "type": "spacing",
|
|
75733
|
-
* "
|
|
76207
|
+
* "end": 3.839,
|
|
75734
76208
|
* "speaker_id": "speaker_0"
|
|
75735
76209
|
* }
|
|
75736
76210
|
* @example {
|
|
75737
|
-
* "
|
|
76211
|
+
* "text": "available",
|
|
75738
76212
|
* "start": 3.839,
|
|
75739
76213
|
* "type": "word",
|
|
75740
|
-
* "
|
|
76214
|
+
* "end": 4.319,
|
|
75741
76215
|
* "speaker_id": "speaker_0"
|
|
75742
76216
|
* }
|
|
75743
76217
|
* @example {
|
|
75744
|
-
* "
|
|
76218
|
+
* "text": " ",
|
|
75745
76219
|
* "start": 4.319,
|
|
75746
76220
|
* "type": "spacing",
|
|
75747
|
-
* "
|
|
76221
|
+
* "end": 4.339,
|
|
75748
76222
|
* "speaker_id": "speaker_0"
|
|
75749
76223
|
* }
|
|
75750
76224
|
* @example {
|
|
75751
|
-
* "
|
|
76225
|
+
* "text": "on",
|
|
75752
76226
|
* "start": 4.339,
|
|
75753
76227
|
* "type": "word",
|
|
75754
|
-
* "
|
|
76228
|
+
* "end": 4.579,
|
|
75755
76229
|
* "speaker_id": "speaker_0"
|
|
75756
76230
|
* }
|
|
75757
76231
|
* @example {
|
|
75758
|
-
* "
|
|
76232
|
+
* "text": " ",
|
|
75759
76233
|
* "start": 4.579,
|
|
75760
76234
|
* "type": "spacing",
|
|
75761
|
-
* "
|
|
76235
|
+
* "end": 4.599,
|
|
75762
76236
|
* "speaker_id": "speaker_0"
|
|
75763
76237
|
* }
|
|
75764
76238
|
* @example {
|
|
75765
|
-
* "
|
|
76239
|
+
* "text": "fal.ai.",
|
|
75766
76240
|
* "start": 4.599,
|
|
75767
76241
|
* "type": "word",
|
|
75768
|
-
* "
|
|
76242
|
+
* "end": 5.699,
|
|
75769
76243
|
* "speaker_id": "speaker_0"
|
|
75770
76244
|
* }
|
|
75771
76245
|
*/
|
|
@@ -80409,6 +80883,32 @@ export interface BytedanceDreamactorV2Output {
|
|
|
80409
80883
|
}
|
|
80410
80884
|
|
|
80411
80885
|
export interface BytedanceUpscalerUpscaleVideoInput {
|
|
80886
|
+
/**
|
|
80887
|
+
* Enhancement Preset
|
|
80888
|
+
* @description The enhancement preset optimized for specific video scenarios. 'general' is a general-purpose template, 'ugc' targets user-generated short videos, 'short_series' is for short dramas, 'aigc' is for AI-generated content, and 'old_film' is for classic film restoration.
|
|
80889
|
+
* @default general
|
|
80890
|
+
* @enum {string}
|
|
80891
|
+
*/
|
|
80892
|
+
enhancement_preset?: 'general' | 'ugc' | 'short_series' | 'aigc' | 'old_film';
|
|
80893
|
+
/**
|
|
80894
|
+
* Enhancement Tier
|
|
80895
|
+
* @description The enhancement quality tier. 'fast' provides essential upscaling with good speed, 'standard' uses adaptive algorithms for better visual texture, 'pro' uses large-model restoration for cinematic quality (longer processing time), and 10 times the cost of `standard` and `fast`.
|
|
80896
|
+
* @default standard
|
|
80897
|
+
* @enum {string}
|
|
80898
|
+
*/
|
|
80899
|
+
enhancement_tier?: 'fast' | 'standard' | 'pro';
|
|
80900
|
+
/**
|
|
80901
|
+
* Fidelity
|
|
80902
|
+
* @description The enhancement intensity. 'high' applies mild enhancement while keeping visual texture close to the source video. 'medium' provides a balanced image quality enhancement.
|
|
80903
|
+
* @default high
|
|
80904
|
+
* @enum {string}
|
|
80905
|
+
*/
|
|
80906
|
+
fidelity?: 'high' | 'medium';
|
|
80907
|
+
/**
|
|
80908
|
+
* Scale Ratio
|
|
80909
|
+
* @description The scaling ratio for the output video resolution. When set, overrides target_resolution and scales the input resolution by this factor (e.g., 2.0 doubles the resolution). Range: 1.1 to 10.0. Please note that this is valid only up to 4k resolution, and trying to scale beyond 4k will result in an error. (4k is defined as having atotal pixel count of 3840x2160).
|
|
80910
|
+
*/
|
|
80911
|
+
scale_ratio?: number;
|
|
80412
80912
|
/**
|
|
80413
80913
|
* Target Fps
|
|
80414
80914
|
* @description The target FPS of the video to upscale.
|
|
@@ -81342,6 +81842,13 @@ export interface BenV2VideoInput {
|
|
|
81342
81842
|
* @description Optional RGB values (0-255) for the background color. If not provided, the background will be transparent. For ex: [0, 0, 0]
|
|
81343
81843
|
*/
|
|
81344
81844
|
background_color?: [number, number, number];
|
|
81845
|
+
/**
|
|
81846
|
+
* Output Format
|
|
81847
|
+
* @description Output video format. Use "webm" for true transparency support (VP9 codec with alpha channel). MP4 format does not support transparency and will render transparent areas as black.
|
|
81848
|
+
* @default mp4
|
|
81849
|
+
* @enum {string}
|
|
81850
|
+
*/
|
|
81851
|
+
output_format?: 'mp4' | 'webm';
|
|
81345
81852
|
/**
|
|
81346
81853
|
* Seed
|
|
81347
81854
|
* @description Random seed for reproducible generation.
|
|
@@ -81390,8 +81897,8 @@ export interface BenV2ImageOutput {
|
|
|
81390
81897
|
/**
|
|
81391
81898
|
* @description The output image after background removal.
|
|
81392
81899
|
* @example {
|
|
81393
|
-
* "height": 512,
|
|
81394
81900
|
* "file_size": 423052,
|
|
81901
|
+
* "height": 512,
|
|
81395
81902
|
* "file_name": "zrZNETpI_ul2jonraqpxN_a57c3f3825d9418f8b3d39cde87c3310.png",
|
|
81396
81903
|
* "content_type": "image/png",
|
|
81397
81904
|
* "url": "https://storage.googleapis.com/falserverless/gallery/Ben2/zrZNETpI_ul2jonraqpxN_a57c3f3825d9418f8b3d39cde87c3310.png",
|
|
@@ -84319,18 +84826,18 @@ export interface BriaEmbedProductInput {
|
|
|
84319
84826
|
* {
|
|
84320
84827
|
* "coordinates": {
|
|
84321
84828
|
* "y": 317,
|
|
84829
|
+
* "width": 100,
|
|
84322
84830
|
* "height": 300,
|
|
84323
|
-
* "x": 300
|
|
84324
|
-
* "width": 100
|
|
84831
|
+
* "x": 300
|
|
84325
84832
|
* },
|
|
84326
84833
|
* "image_source": "https://bria-datasets.s3.us-east-1.amazonaws.com/embed-product/a_standing_lamp_over_white_background_0.png"
|
|
84327
84834
|
* },
|
|
84328
84835
|
* {
|
|
84329
84836
|
* "coordinates": {
|
|
84330
84837
|
* "y": 287,
|
|
84838
|
+
* "width": 120,
|
|
84331
84839
|
* "height": 156,
|
|
84332
|
-
* "x": 646
|
|
84333
|
-
* "width": 120
|
|
84840
|
+
* "x": 646
|
|
84334
84841
|
* },
|
|
84335
84842
|
* "image_source": "https://bria-datasets.s3.us-east-1.amazonaws.com/embed-product/a_wall_picture_on_white_background_0.png"
|
|
84336
84843
|
* }
|