vargai 0.4.0-alpha108 → 0.4.0-alpha111
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +4 -1
- package/src/ai-sdk/generate-video.ts +14 -8
- package/src/ai-sdk/providers/editly/backends/types.ts +4 -0
- package/src/ai-sdk/providers/editly/layers.ts +39 -19
- package/src/ai-sdk/providers/editly/rendi/index.ts +214 -3
- package/src/ai-sdk/providers/fal.ts +133 -2
- package/src/ai-sdk/providers/model-rules.ts +18 -0
- package/src/ai-sdk/providers/varg.ts +7 -4
- package/src/core/registry/resolver.ts +4 -0
- package/src/core/schema/types.ts +65 -0
- package/src/definitions/actions/video.ts +24 -4
- package/src/definitions/models/elevenlabs.ts +14 -1
- package/src/definitions/models/flux.ts +17 -1
- package/src/definitions/models/heygen.ts +20 -1
- package/src/definitions/models/index.ts +68 -2
- package/src/definitions/models/kling.ts +326 -1
- package/src/definitions/models/llama.ts +13 -1
- package/src/definitions/models/ltx-a2v.ts +17 -1
- package/src/definitions/models/nano-banana-2.ts +23 -1
- package/src/definitions/models/nano-banana-pro.ts +17 -1
- package/src/definitions/models/omnihuman.ts +13 -1
- package/src/definitions/models/phota.ts +29 -1
- package/src/definitions/models/qwen-image-2.ts +14 -1
- package/src/definitions/models/recraft-v4.ts +13 -1
- package/src/definitions/models/reve.ts +13 -1
- package/src/definitions/models/seedance.ts +21 -1
- package/src/definitions/models/sonauto.ts +13 -1
- package/src/definitions/models/soul.ts +13 -1
- package/src/definitions/models/veed-fabric.ts +16 -1
- package/src/definitions/models/wan.ts +23 -3
- package/src/definitions/models/whisper.ts +25 -1
- package/src/providers/fal.ts +97 -0
- package/src/react/elements.ts +81 -0
- package/src/react/index.ts +8 -0
- package/src/react/renderers/burn-captions.ts +83 -19
- package/src/react/renderers/captions.ts +292 -25
- package/src/react/renderers/emoji.ts +256 -0
- package/src/react/renderers/fonts.ts +509 -0
- package/src/react/renderers/progress.ts +12 -1
- package/src/react/renderers/render.ts +83 -4
- package/src/react/renderers/video.ts +24 -3
- package/src/react/resolve.ts +295 -4
- package/src/react/resolved-element.ts +13 -6
- package/src/react/types.ts +87 -4
- package/src/speech/map-segments.ts +2 -1
- package/src/speech/parse-alignment.ts +111 -6
- package/src/speech/word-segmenter.ts +172 -0
package/package.json
CHANGED
|
@@ -28,6 +28,7 @@
|
|
|
28
28
|
"@commitlint/config-conventional": "^20.0.0",
|
|
29
29
|
"@size-limit/preset-small-lib": "^11.2.0",
|
|
30
30
|
"@types/bun": "latest",
|
|
31
|
+
"@types/opentype.js": "^1.3.9",
|
|
31
32
|
"@types/react": "^19.2.7",
|
|
32
33
|
"husky": "^9.1.7",
|
|
33
34
|
"lint-staged": "^16.2.7"
|
|
@@ -58,9 +59,11 @@
|
|
|
58
59
|
"ai": "^6.0.26",
|
|
59
60
|
"apify-client": "^2.20.0",
|
|
60
61
|
"citty": "^0.1.6",
|
|
62
|
+
"fflate": "^0.8.2",
|
|
61
63
|
"fluent-ffmpeg": "^2.1.3",
|
|
62
64
|
"groq-sdk": "^0.36.0",
|
|
63
65
|
"ink": "^6.5.1",
|
|
66
|
+
"opentype.js": "^1.3.4",
|
|
64
67
|
"p-limit": "^6.2.0",
|
|
65
68
|
"p-map": "^7.0.4",
|
|
66
69
|
"react": "^19.2.0",
|
|
@@ -104,7 +107,7 @@
|
|
|
104
107
|
"license": "Apache-2.0",
|
|
105
108
|
"author": "varg.ai <hello@varg.ai> (https://varg.ai)",
|
|
106
109
|
"sideEffects": false,
|
|
107
|
-
"version": "0.4.0-
|
|
110
|
+
"version": "0.4.0-alpha111",
|
|
108
111
|
"exports": {
|
|
109
112
|
".": "./src/index.ts",
|
|
110
113
|
"./ai": "./src/ai-sdk/index.ts",
|
|
@@ -10,9 +10,9 @@ export type GenerateVideoPrompt =
|
|
|
10
10
|
| string
|
|
11
11
|
| {
|
|
12
12
|
text?: string;
|
|
13
|
-
images?: Array<DataContent>;
|
|
14
|
-
audio?: DataContent
|
|
15
|
-
video?: DataContent
|
|
13
|
+
images?: DataContent | Array<DataContent>;
|
|
14
|
+
audio?: DataContent | Array<DataContent>;
|
|
15
|
+
video?: DataContent | Array<DataContent>;
|
|
16
16
|
};
|
|
17
17
|
|
|
18
18
|
export interface GenerateVideoOptions {
|
|
@@ -76,6 +76,12 @@ function toUint8Array(data: DataContent): Uint8Array {
|
|
|
76
76
|
return data;
|
|
77
77
|
}
|
|
78
78
|
|
|
79
|
+
/** Normalize singular or array to array */
|
|
80
|
+
function toArray<T>(value: T | T[] | undefined): T[] {
|
|
81
|
+
if (value == null) return [];
|
|
82
|
+
return Array.isArray(value) ? value : [value];
|
|
83
|
+
}
|
|
84
|
+
|
|
79
85
|
function normalizePrompt(prompt: GenerateVideoPrompt): {
|
|
80
86
|
prompt: string | undefined;
|
|
81
87
|
files: ImageModelV3File[] | undefined;
|
|
@@ -86,7 +92,7 @@ function normalizePrompt(prompt: GenerateVideoPrompt): {
|
|
|
86
92
|
|
|
87
93
|
const files: ImageModelV3File[] = [];
|
|
88
94
|
|
|
89
|
-
for (const img of prompt.images
|
|
95
|
+
for (const img of toArray(prompt.images)) {
|
|
90
96
|
files.push({
|
|
91
97
|
type: "file",
|
|
92
98
|
mediaType: "image/png",
|
|
@@ -94,19 +100,19 @@ function normalizePrompt(prompt: GenerateVideoPrompt): {
|
|
|
94
100
|
});
|
|
95
101
|
}
|
|
96
102
|
|
|
97
|
-
|
|
103
|
+
for (const aud of toArray(prompt.audio)) {
|
|
98
104
|
files.push({
|
|
99
105
|
type: "file",
|
|
100
106
|
mediaType: "audio/mpeg",
|
|
101
|
-
data: toUint8Array(
|
|
107
|
+
data: toUint8Array(aud),
|
|
102
108
|
});
|
|
103
109
|
}
|
|
104
110
|
|
|
105
|
-
|
|
111
|
+
for (const vid of toArray(prompt.video)) {
|
|
106
112
|
files.push({
|
|
107
113
|
type: "file",
|
|
108
114
|
mediaType: "video/mp4",
|
|
109
|
-
data: toUint8Array(
|
|
115
|
+
data: toUint8Array(vid),
|
|
110
116
|
});
|
|
111
117
|
}
|
|
112
118
|
|
|
@@ -47,6 +47,10 @@ export interface FFmpegRunOptions {
|
|
|
47
47
|
verbose?: boolean;
|
|
48
48
|
/** Max execution time in seconds (used by cloud backends like Rendi, ignored by local) */
|
|
49
49
|
timeoutSeconds?: number;
|
|
50
|
+
/** Extra files (e.g. fonts) to include alongside inputs.
|
|
51
|
+
* When present, cloud backends like Rendi use compressed folder mode
|
|
52
|
+
* (input_compressed_folder) to bundle all files together. */
|
|
53
|
+
auxiliaryFiles?: { url: string; fileName: string }[];
|
|
50
54
|
}
|
|
51
55
|
|
|
52
56
|
export type FFmpegOutput =
|
|
@@ -43,12 +43,20 @@ function getCropPositionExpr(position: CropPosition | undefined): {
|
|
|
43
43
|
}
|
|
44
44
|
|
|
45
45
|
function escapeDrawText(text: string): string {
|
|
46
|
-
return
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
46
|
+
return (
|
|
47
|
+
text
|
|
48
|
+
.replace(/\\/g, "\\\\")
|
|
49
|
+
.replace(/'/g, "'\\''")
|
|
50
|
+
.replace(/:/g, "\\:")
|
|
51
|
+
.replace(/\[/g, "\\[")
|
|
52
|
+
.replace(/\]/g, "\\]")
|
|
53
|
+
// Replace straight double quotes with typographic curly quotes.
|
|
54
|
+
// Straight " breaks Rendi's command parser (the -filter_complex value is
|
|
55
|
+
// wrapped in double quotes, so an unescaped " inside it terminates the
|
|
56
|
+
// argument and causes ffmpeg to interpret the next word as a file path).
|
|
57
|
+
.replace(/\u201C|\u201D/g, "\u201C") // normalise any existing curly quotes
|
|
58
|
+
.replace(/"/g, "\u201C")
|
|
59
|
+
);
|
|
52
60
|
}
|
|
53
61
|
|
|
54
62
|
function parseSize(val: number | string | undefined, base: number): number {
|
|
@@ -161,11 +169,15 @@ export function getVideoFilter(
|
|
|
161
169
|
};
|
|
162
170
|
}
|
|
163
171
|
|
|
164
|
-
let scaleFilter
|
|
165
|
-
if (layer.resizeMode === "
|
|
166
|
-
scaleFilter = `scale=${width}:${height}:force_original_aspect_ratio=
|
|
172
|
+
let scaleFilter: string;
|
|
173
|
+
if (layer.resizeMode === "contain") {
|
|
174
|
+
scaleFilter = `scale=${width}:${height}:force_original_aspect_ratio=decrease`;
|
|
167
175
|
} else if (layer.resizeMode === "stretch") {
|
|
168
176
|
scaleFilter = `scale=${width}:${height}`;
|
|
177
|
+
} else {
|
|
178
|
+
// Default ("cover" or undefined): scale up to fill canvas, crop excess
|
|
179
|
+
const { x, y } = getCropPositionExpr(layer.cropPosition);
|
|
180
|
+
scaleFilter = `scale=${width}:${height}:force_original_aspect_ratio=increase,crop=${width}:${height}:${x}:${y}`;
|
|
169
181
|
}
|
|
170
182
|
|
|
171
183
|
filters.push(scaleFilter);
|
|
@@ -219,11 +231,15 @@ export function getVideoFilterWithTrim(
|
|
|
219
231
|
filters.push("fps=30");
|
|
220
232
|
filters.push("settb=1/30");
|
|
221
233
|
} else {
|
|
222
|
-
let scaleFilter
|
|
223
|
-
if (layer.resizeMode === "
|
|
224
|
-
scaleFilter = `scale=${width}:${height}:force_original_aspect_ratio=
|
|
234
|
+
let scaleFilter: string;
|
|
235
|
+
if (layer.resizeMode === "contain") {
|
|
236
|
+
scaleFilter = `scale=${width}:${height}:force_original_aspect_ratio=decrease`;
|
|
225
237
|
} else if (layer.resizeMode === "stretch") {
|
|
226
238
|
scaleFilter = `scale=${width}:${height}`;
|
|
239
|
+
} else {
|
|
240
|
+
// Default ("cover" or undefined): scale up to fill canvas, crop excess
|
|
241
|
+
const { x, y } = getCropPositionExpr(layer.cropPosition);
|
|
242
|
+
scaleFilter = `scale=${width}:${height}:force_original_aspect_ratio=increase,crop=${width}:${height}:${x}:${y}`;
|
|
227
243
|
}
|
|
228
244
|
|
|
229
245
|
filters.push(scaleFilter);
|
|
@@ -386,11 +402,14 @@ export function getImageFilter(
|
|
|
386
402
|
};
|
|
387
403
|
}
|
|
388
404
|
|
|
389
|
-
let scaleFilter
|
|
390
|
-
if (layer.resizeMode === "
|
|
391
|
-
scaleFilter = `scale=${width}:${height}:force_original_aspect_ratio=
|
|
405
|
+
let scaleFilter: string;
|
|
406
|
+
if (layer.resizeMode === "contain") {
|
|
407
|
+
scaleFilter = `scale=${width}:${height}:force_original_aspect_ratio=decrease`;
|
|
392
408
|
} else if (layer.resizeMode === "stretch") {
|
|
393
409
|
scaleFilter = `scale=${width}:${height}`;
|
|
410
|
+
} else {
|
|
411
|
+
// Default ("cover" or undefined): scale up to fill canvas, crop excess
|
|
412
|
+
scaleFilter = `scale=${width}:${height}:force_original_aspect_ratio=increase,crop=${width}:${height}`;
|
|
394
413
|
}
|
|
395
414
|
filters.push(scaleFilter);
|
|
396
415
|
filters.push(`pad=${width}:${height}:(ow-iw)/2:(oh-ih)/2:black`);
|
|
@@ -532,13 +551,14 @@ export function getImageOverlayFilter(
|
|
|
532
551
|
let scaleExpr: string;
|
|
533
552
|
if (!hasExplicitHeight) {
|
|
534
553
|
scaleExpr = `scale=${targetWidth}:-2`;
|
|
535
|
-
} else if (layer.resizeMode === "
|
|
536
|
-
|
|
537
|
-
scaleExpr = `scale=${targetWidth}:${targetHeight}:force_original_aspect_ratio=increase,crop=${targetWidth}:${targetHeight}:${x}:${y}`;
|
|
554
|
+
} else if (layer.resizeMode === "contain") {
|
|
555
|
+
scaleExpr = `scale=${targetWidth}:${targetHeight}:force_original_aspect_ratio=decrease,pad=${targetWidth}:${targetHeight}:(ow-iw)/2:(oh-ih)/2:black`;
|
|
538
556
|
} else if (layer.resizeMode === "stretch") {
|
|
539
557
|
scaleExpr = `scale=${targetWidth}:${targetHeight}`;
|
|
540
558
|
} else {
|
|
541
|
-
|
|
559
|
+
// Default ("cover" or undefined): scale up to fill, crop excess
|
|
560
|
+
const { x, y } = getCropPositionExpr(layer.cropPosition);
|
|
561
|
+
scaleExpr = `scale=${targetWidth}:${targetHeight}:force_original_aspect_ratio=increase,crop=${targetWidth}:${targetHeight}:${x}:${y}`;
|
|
542
562
|
}
|
|
543
563
|
|
|
544
564
|
const zoomDir = layer.zoomDirection ?? null;
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import { zipSync } from "fflate";
|
|
1
2
|
import sharp from "sharp";
|
|
2
3
|
import { File } from "../../../file";
|
|
3
4
|
import type { StorageProvider } from "../../../storage/types";
|
|
@@ -128,6 +129,11 @@ export class RendiBackend implements FFmpegBackend {
|
|
|
128
129
|
}
|
|
129
130
|
|
|
130
131
|
async run(options: FFmpegRunOptions): Promise<FFmpegRunResult> {
|
|
132
|
+
// When auxiliary files (e.g. fonts) are present, use compressed folder mode
|
|
133
|
+
if (options.auxiliaryFiles && options.auxiliaryFiles.length > 0) {
|
|
134
|
+
return this.runWithCompressedFolder(options);
|
|
135
|
+
}
|
|
136
|
+
|
|
131
137
|
let {
|
|
132
138
|
inputs,
|
|
133
139
|
filterComplex,
|
|
@@ -287,6 +293,194 @@ export class RendiBackend implements FFmpegBackend {
|
|
|
287
293
|
throw new Error("Rendi command timed out");
|
|
288
294
|
}
|
|
289
295
|
|
|
296
|
+
/**
|
|
297
|
+
* Run an FFmpeg command using Rendi's input_compressed_folder mode.
|
|
298
|
+
*
|
|
299
|
+
* Used when auxiliary files (e.g. fonts for subtitle rendering) need to be
|
|
300
|
+
* bundled alongside regular inputs. Creates a ZIP containing all input files
|
|
301
|
+
* and auxiliary files, uploads it to storage, and submits to Rendi with
|
|
302
|
+
* `input_compressed_folder` instead of `input_files`.
|
|
303
|
+
*
|
|
304
|
+
* Inside the ZIP, all files are at the root level. The ffmpeg command
|
|
305
|
+
* references files by their bare filenames (not placeholders).
|
|
306
|
+
*/
|
|
307
|
+
private async runWithCompressedFolder(
|
|
308
|
+
options: FFmpegRunOptions,
|
|
309
|
+
): Promise<FFmpegRunResult> {
|
|
310
|
+
const {
|
|
311
|
+
inputs,
|
|
312
|
+
videoFilter,
|
|
313
|
+
filterComplex,
|
|
314
|
+
outputArgs = [],
|
|
315
|
+
outputPath,
|
|
316
|
+
verbose,
|
|
317
|
+
auxiliaryFiles = [],
|
|
318
|
+
} = options;
|
|
319
|
+
|
|
320
|
+
// 1. Resolve all input files to URLs
|
|
321
|
+
const inputEntries: { fileName: string; url: string }[] = [];
|
|
322
|
+
for (const input of inputs ?? []) {
|
|
323
|
+
const path = this.getInputPath(input);
|
|
324
|
+
const url = await this.resolvePath(path);
|
|
325
|
+
// Extract filename from URL or path
|
|
326
|
+
const fileName =
|
|
327
|
+
url.split("/").pop()?.split("?")[0] ?? `input_${inputEntries.length}`;
|
|
328
|
+
inputEntries.push({ fileName, url });
|
|
329
|
+
}
|
|
330
|
+
|
|
331
|
+
// 2. Download all files (inputs + auxiliary) into memory
|
|
332
|
+
const zipContents: Record<string, Uint8Array> = {};
|
|
333
|
+
|
|
334
|
+
const downloadTasks = [
|
|
335
|
+
...inputEntries.map(async (entry) => {
|
|
336
|
+
const res = await fetch(entry.url);
|
|
337
|
+
if (!res.ok)
|
|
338
|
+
throw new Error(
|
|
339
|
+
`Failed to download input ${entry.fileName}: ${res.status}`,
|
|
340
|
+
);
|
|
341
|
+
zipContents[entry.fileName] = new Uint8Array(await res.arrayBuffer());
|
|
342
|
+
}),
|
|
343
|
+
...auxiliaryFiles.map(async (file) => {
|
|
344
|
+
const res = await fetch(file.url);
|
|
345
|
+
if (!res.ok)
|
|
346
|
+
throw new Error(
|
|
347
|
+
`Failed to download auxiliary file ${file.fileName}: ${res.status}`,
|
|
348
|
+
);
|
|
349
|
+
zipContents[file.fileName] = new Uint8Array(await res.arrayBuffer());
|
|
350
|
+
}),
|
|
351
|
+
];
|
|
352
|
+
|
|
353
|
+
await Promise.all(downloadTasks);
|
|
354
|
+
|
|
355
|
+
if (verbose) {
|
|
356
|
+
const totalSize = Object.values(zipContents).reduce(
|
|
357
|
+
(sum, buf) => sum + buf.length,
|
|
358
|
+
0,
|
|
359
|
+
);
|
|
360
|
+
console.log(
|
|
361
|
+
`[rendi] creating ZIP with ${Object.keys(zipContents).length} files (${(totalSize / 1024 / 1024).toFixed(1)} MB)`,
|
|
362
|
+
);
|
|
363
|
+
}
|
|
364
|
+
|
|
365
|
+
// 3. Create ZIP
|
|
366
|
+
const zipData = zipSync(zipContents, { level: 1 }); // fast compression
|
|
367
|
+
|
|
368
|
+
// 4. Upload ZIP to storage
|
|
369
|
+
const zipKey = `internal/rendi-compressed-${Date.now()}.zip`;
|
|
370
|
+
const zipUrl = await this.storage.upload(
|
|
371
|
+
zipData,
|
|
372
|
+
zipKey,
|
|
373
|
+
"application/zip",
|
|
374
|
+
);
|
|
375
|
+
|
|
376
|
+
if (verbose) {
|
|
377
|
+
console.log(
|
|
378
|
+
`[rendi] uploaded ZIP (${(zipData.length / 1024 / 1024).toFixed(1)} MB) -> ${zipUrl}`,
|
|
379
|
+
);
|
|
380
|
+
}
|
|
381
|
+
|
|
382
|
+
// 5. Build ffmpeg command using bare filenames (not {{in_X}} placeholders)
|
|
383
|
+
const inputArgs: string[] = [];
|
|
384
|
+
for (const [i, input] of (inputs ?? []).entries()) {
|
|
385
|
+
if (typeof input !== "string" && "options" in input && input.options) {
|
|
386
|
+
inputArgs.push(...input.options);
|
|
387
|
+
}
|
|
388
|
+
inputArgs.push("-i", inputEntries[i]!.fileName);
|
|
389
|
+
}
|
|
390
|
+
|
|
391
|
+
const filterArgs: string[] = [];
|
|
392
|
+
if (filterComplex) {
|
|
393
|
+
filterArgs.push("-filter_complex", filterComplex);
|
|
394
|
+
}
|
|
395
|
+
if (videoFilter) {
|
|
396
|
+
// For compressed folder mode, the video filter references files by
|
|
397
|
+
// their bare filenames (already resolved in the working directory)
|
|
398
|
+
filterArgs.push("-vf", videoFilter);
|
|
399
|
+
}
|
|
400
|
+
|
|
401
|
+
const processedOutputArgs = outputArgs.filter((arg) => arg !== "-y");
|
|
402
|
+
|
|
403
|
+
const commandParts = [
|
|
404
|
+
...inputArgs,
|
|
405
|
+
...filterArgs,
|
|
406
|
+
...processedOutputArgs,
|
|
407
|
+
"{{out_1}}",
|
|
408
|
+
];
|
|
409
|
+
const ffmpegCommand = this.buildCommandString(commandParts);
|
|
410
|
+
const outputFilename = outputPath?.split("/").pop() ?? "output.mp4";
|
|
411
|
+
|
|
412
|
+
if (verbose) {
|
|
413
|
+
console.log("[rendi] input_compressed_folder:", zipUrl);
|
|
414
|
+
console.log("[rendi] ffmpeg_command:", ffmpegCommand);
|
|
415
|
+
}
|
|
416
|
+
|
|
417
|
+
// 6. Submit to Rendi with input_compressed_folder
|
|
418
|
+
const submitResponse = await fetch(`${RENDI_API_BASE}/run-ffmpeg-command`, {
|
|
419
|
+
method: "POST",
|
|
420
|
+
headers: {
|
|
421
|
+
"X-API-KEY": this.apiKey,
|
|
422
|
+
"Content-Type": "application/json",
|
|
423
|
+
},
|
|
424
|
+
body: JSON.stringify({
|
|
425
|
+
input_compressed_folder: zipUrl,
|
|
426
|
+
output_files: { out_1: outputFilename },
|
|
427
|
+
ffmpeg_command: ffmpegCommand,
|
|
428
|
+
max_command_run_seconds:
|
|
429
|
+
options.timeoutSeconds ?? this.maxCommandRunSeconds,
|
|
430
|
+
}),
|
|
431
|
+
});
|
|
432
|
+
|
|
433
|
+
if (!submitResponse.ok) {
|
|
434
|
+
const errorText = await submitResponse.text();
|
|
435
|
+
throw new Error(
|
|
436
|
+
`Rendi submit failed: ${submitResponse.status} - ${errorText}`,
|
|
437
|
+
);
|
|
438
|
+
}
|
|
439
|
+
|
|
440
|
+
const { command_id } =
|
|
441
|
+
(await submitResponse.json()) as RendiCommandResponse;
|
|
442
|
+
|
|
443
|
+
if (verbose) {
|
|
444
|
+
console.log("[rendi] command_id:", command_id);
|
|
445
|
+
}
|
|
446
|
+
|
|
447
|
+
// 7. Poll for completion (same as standard run)
|
|
448
|
+
let attempts = 0;
|
|
449
|
+
while (attempts < MAX_POLL_ATTEMPTS) {
|
|
450
|
+
const statusResponse = await fetch(
|
|
451
|
+
`${RENDI_API_BASE}/commands/${command_id}`,
|
|
452
|
+
{
|
|
453
|
+
headers: { "X-API-KEY": this.apiKey },
|
|
454
|
+
},
|
|
455
|
+
);
|
|
456
|
+
|
|
457
|
+
if (!statusResponse.ok) {
|
|
458
|
+
throw new Error(`Rendi poll failed: ${statusResponse.status}`);
|
|
459
|
+
}
|
|
460
|
+
|
|
461
|
+
const status = (await statusResponse.json()) as RendiStatusResponse;
|
|
462
|
+
|
|
463
|
+
if (status.status === "SUCCESS") {
|
|
464
|
+
const outputFile = status.output_files?.out_1;
|
|
465
|
+
if (!outputFile?.storage_url) {
|
|
466
|
+
throw new Error("Rendi completed but no output URL");
|
|
467
|
+
}
|
|
468
|
+
return { output: { type: "url", url: outputFile.storage_url } };
|
|
469
|
+
}
|
|
470
|
+
|
|
471
|
+
if (status.status === "FAILED") {
|
|
472
|
+
throw new Error(
|
|
473
|
+
`Rendi command failed: ${status.error_message ?? "unknown error"}`,
|
|
474
|
+
);
|
|
475
|
+
}
|
|
476
|
+
|
|
477
|
+
await this.sleep(POLL_INTERVAL_MS);
|
|
478
|
+
attempts++;
|
|
479
|
+
}
|
|
480
|
+
|
|
481
|
+
throw new Error("Rendi command timed out");
|
|
482
|
+
}
|
|
483
|
+
|
|
290
484
|
async resolvePath(input: FilePath): Promise<string> {
|
|
291
485
|
if (input instanceof File) {
|
|
292
486
|
return input.upload(this.storage);
|
|
@@ -326,13 +520,30 @@ export class RendiBackend implements FFmpegBackend {
|
|
|
326
520
|
private buildCommandString(args: string[]): string {
|
|
327
521
|
return args
|
|
328
522
|
.map((arg) => {
|
|
523
|
+
// Flags (e.g. -i, -filter_complex) and output placeholders pass through
|
|
329
524
|
if (arg.startsWith("-") || arg.startsWith("{{")) {
|
|
330
525
|
return arg;
|
|
331
526
|
}
|
|
332
|
-
|
|
333
|
-
|
|
527
|
+
// For values that need quoting (spaces, colons, single-quotes etc.):
|
|
528
|
+
// Rendi's server-side parser splits the command string like a POSIX
|
|
529
|
+
// shell. The old approach wrapped values in "..." and tried to escape
|
|
530
|
+
// inner " with \", but Rendi's parser does NOT reliably honour \"
|
|
531
|
+
// inside double-quoted strings — any literal " in user text (e.g.
|
|
532
|
+
// drawtext titles) would terminate the quoted arg and cause the next
|
|
533
|
+
// word to be treated as an output path.
|
|
534
|
+
//
|
|
535
|
+
// Defence-in-depth: replace any surviving straight " with the
|
|
536
|
+
// typographic curly-quote equivalent (the primary escaping happens in
|
|
537
|
+
// escapeDrawText, but filter strings can also come from other sources).
|
|
538
|
+
const sanitised = arg.replace(/"/g, "\u201C");
|
|
539
|
+
if (
|
|
540
|
+
sanitised.includes(" ") ||
|
|
541
|
+
sanitised.includes(":") ||
|
|
542
|
+
sanitised.includes("'")
|
|
543
|
+
) {
|
|
544
|
+
return `"${sanitised}"`;
|
|
334
545
|
}
|
|
335
|
-
return
|
|
546
|
+
return sanitised;
|
|
336
547
|
})
|
|
337
548
|
.join(" ");
|
|
338
549
|
}
|
|
@@ -107,6 +107,11 @@ const VIDEO_MODELS: Record<string, { t2v: string; i2v: string }> = {
|
|
|
107
107
|
t2v: "fal-ai/kling-video/o3/standard/text-to-video",
|
|
108
108
|
i2v: "fal-ai/kling-video/o3/standard/image-to-video",
|
|
109
109
|
},
|
|
110
|
+
// Kling O3 4K - native 4K output (i2v only, t2v falls back to pro)
|
|
111
|
+
"kling-v3-4k-image-to-video": {
|
|
112
|
+
t2v: "fal-ai/kling-video/o3/pro/text-to-video",
|
|
113
|
+
i2v: "fal-ai/kling-video/o3/4k/image-to-video",
|
|
114
|
+
},
|
|
110
115
|
// Kling v2.6 - with native audio generation
|
|
111
116
|
"kling-v2.6": {
|
|
112
117
|
t2v: "fal-ai/kling-video/v2.6/pro/text-to-video",
|
|
@@ -163,8 +168,25 @@ const VIDEO_EDIT_MODELS: Record<string, string> = {
|
|
|
163
168
|
"sora-2-remix": "fal-ai/sora-2/video-to-video/remix",
|
|
164
169
|
};
|
|
165
170
|
|
|
171
|
+
// Reference-to-video models - images/elements + prompt → video with character consistency
|
|
172
|
+
const REFERENCE_VIDEO_MODELS: Record<string, string> = {
|
|
173
|
+
"kling-v3-pro-reference-to-video":
|
|
174
|
+
"fal-ai/kling-video/o3/pro/reference-to-video",
|
|
175
|
+
"kling-v3-4k-reference-to-video":
|
|
176
|
+
"fal-ai/kling-video/o3/4k/reference-to-video",
|
|
177
|
+
};
|
|
178
|
+
|
|
179
|
+
// Video-to-video reference models - reference video + prompt → new video preserving motion/camera
|
|
180
|
+
const V2V_REFERENCE_MODELS: Record<string, string> = {
|
|
181
|
+
"kling-v3-standard-v2v-reference":
|
|
182
|
+
"fal-ai/kling-video/o3/standard/video-to-video/reference",
|
|
183
|
+
};
|
|
184
|
+
|
|
166
185
|
// Motion control models - video-to-video with motion transfer
|
|
167
186
|
const MOTION_CONTROL_MODELS: Record<string, string> = {
|
|
187
|
+
"kling-v3-pro-motion-control": "fal-ai/kling-video/v3/pro/motion-control",
|
|
188
|
+
"kling-v3-standard-motion-control":
|
|
189
|
+
"fal-ai/kling-video/v3/standard/motion-control",
|
|
168
190
|
"kling-v2.6-motion": "fal-ai/kling-video/v2.6/pro/motion-control",
|
|
169
191
|
"kling-v2.6-motion-standard":
|
|
170
192
|
"fal-ai/kling-video/v2.6/standard/motion-control",
|
|
@@ -520,8 +542,12 @@ class FalVideoModel implements VideoModelV3 {
|
|
|
520
542
|
const isMotionControl = MOTION_CONTROL_MODELS[this.modelId] !== undefined;
|
|
521
543
|
const isVideoEdit = VIDEO_EDIT_MODELS[this.modelId] !== undefined;
|
|
522
544
|
const isVideoUpscale = VIDEO_UPSCALE_MODELS[this.modelId] !== undefined;
|
|
545
|
+
const isReferenceVideo = REFERENCE_VIDEO_MODELS[this.modelId] !== undefined;
|
|
546
|
+
const isV2VReference = V2V_REFERENCE_MODELS[this.modelId] !== undefined;
|
|
523
547
|
const isKlingV3 =
|
|
524
|
-
this.modelId === "kling-v3" ||
|
|
548
|
+
this.modelId === "kling-v3" ||
|
|
549
|
+
this.modelId === "kling-v3-standard" ||
|
|
550
|
+
this.modelId === "kling-v3-4k-image-to-video";
|
|
525
551
|
const isKlingV26 = this.modelId === "kling-v2.6";
|
|
526
552
|
const isLtx2 = this.modelId === "ltx-2-19b-distilled";
|
|
527
553
|
const isGrokImagine = this.modelId === "grok-imagine";
|
|
@@ -537,7 +563,11 @@ class FalVideoModel implements VideoModelV3 {
|
|
|
537
563
|
? this.resolveVideoEditEndpoint()
|
|
538
564
|
: isVideoUpscale
|
|
539
565
|
? this.resolveVideoUpscaleEndpoint()
|
|
540
|
-
:
|
|
566
|
+
: isReferenceVideo
|
|
567
|
+
? this.resolveReferenceVideoEndpoint()
|
|
568
|
+
: isV2VReference
|
|
569
|
+
? this.resolveV2VReferenceEndpoint()
|
|
570
|
+
: this.resolveEndpoint(hasImageInput ?? false);
|
|
541
571
|
|
|
542
572
|
const input: Record<string, unknown> = {
|
|
543
573
|
...(providerOptions?.fal ?? {}),
|
|
@@ -600,6 +630,11 @@ class FalVideoModel implements VideoModelV3 {
|
|
|
600
630
|
if (input.keep_original_sound === undefined) {
|
|
601
631
|
input.keep_original_sound = true;
|
|
602
632
|
}
|
|
633
|
+
|
|
634
|
+
// Pass aspect ratio so the provider returns the correct output dimensions
|
|
635
|
+
if (aspectRatio && !input.aspect_ratio) {
|
|
636
|
+
input.aspect_ratio = aspectRatio;
|
|
637
|
+
}
|
|
603
638
|
} else if (isVideoEdit) {
|
|
604
639
|
// Video edit: video input + prompt for editing instruction
|
|
605
640
|
input.prompt = prompt;
|
|
@@ -625,6 +660,86 @@ class FalVideoModel implements VideoModelV3 {
|
|
|
625
660
|
if (videoFile) {
|
|
626
661
|
input.video_url = await fileToUrl(videoFile);
|
|
627
662
|
}
|
|
663
|
+
} else if (isReferenceVideo) {
|
|
664
|
+
// Reference-to-video: prompt + optional start/end images + reference images
|
|
665
|
+
// Elements and multi_prompt are passed via providerOptions.fal
|
|
666
|
+
if (prompt) {
|
|
667
|
+
input.prompt = prompt;
|
|
668
|
+
}
|
|
669
|
+
|
|
670
|
+
if (files) {
|
|
671
|
+
const imageFiles = files.filter((f) =>
|
|
672
|
+
getMediaType(f)?.startsWith("image/"),
|
|
673
|
+
);
|
|
674
|
+
// First image → start_image_url, second → end_image_url
|
|
675
|
+
if (imageFiles[0]) {
|
|
676
|
+
input.start_image_url = await fileToUrl(imageFiles[0]);
|
|
677
|
+
}
|
|
678
|
+
if (imageFiles[1]) {
|
|
679
|
+
input.end_image_url = await fileToUrl(imageFiles[1]);
|
|
680
|
+
}
|
|
681
|
+
// Additional images (3+) → image_urls for style/appearance reference
|
|
682
|
+
if (imageFiles.length > 2) {
|
|
683
|
+
const additionalUrls: string[] = [];
|
|
684
|
+
for (let i = 2; i < imageFiles.length; i++) {
|
|
685
|
+
additionalUrls.push(await fileToUrl(imageFiles[i]!));
|
|
686
|
+
}
|
|
687
|
+
input.image_urls = additionalUrls;
|
|
688
|
+
}
|
|
689
|
+
}
|
|
690
|
+
|
|
691
|
+
// Duration as string integer for Kling O3
|
|
692
|
+
const normalized = normalizeProviderInput(this.modelId, { duration });
|
|
693
|
+
input.duration = normalized.duration;
|
|
694
|
+
|
|
695
|
+
if (!input.aspect_ratio) {
|
|
696
|
+
input.aspect_ratio = aspectRatio ?? "16:9";
|
|
697
|
+
}
|
|
698
|
+
|
|
699
|
+
// Default to generating audio
|
|
700
|
+
if (input.generate_audio === undefined) {
|
|
701
|
+
input.generate_audio = true;
|
|
702
|
+
}
|
|
703
|
+
} else if (isV2VReference) {
|
|
704
|
+
// Video-to-video reference: reference video + prompt → new video preserving motion/camera
|
|
705
|
+
// Elements and image_urls are passed via providerOptions.fal
|
|
706
|
+
if (prompt) {
|
|
707
|
+
input.prompt = prompt;
|
|
708
|
+
}
|
|
709
|
+
|
|
710
|
+
const videoFile = files?.find((f) =>
|
|
711
|
+
getMediaType(f)?.startsWith("video/"),
|
|
712
|
+
);
|
|
713
|
+
if (videoFile) {
|
|
714
|
+
input.video_url = await fileToUrl(videoFile);
|
|
715
|
+
}
|
|
716
|
+
|
|
717
|
+
// Reference images from file inputs (for style/appearance)
|
|
718
|
+
if (files) {
|
|
719
|
+
const imageFiles = files.filter((f) =>
|
|
720
|
+
getMediaType(f)?.startsWith("image/"),
|
|
721
|
+
);
|
|
722
|
+
if (imageFiles.length > 0) {
|
|
723
|
+
const imageUrls: string[] = [];
|
|
724
|
+
for (const imgFile of imageFiles) {
|
|
725
|
+
imageUrls.push(await fileToUrl(imgFile));
|
|
726
|
+
}
|
|
727
|
+
input.image_urls = imageUrls;
|
|
728
|
+
}
|
|
729
|
+
}
|
|
730
|
+
|
|
731
|
+
// Duration as string integer for Kling O3
|
|
732
|
+
const normalized = normalizeProviderInput(this.modelId, { duration });
|
|
733
|
+
input.duration = normalized.duration;
|
|
734
|
+
|
|
735
|
+
if (!input.aspect_ratio) {
|
|
736
|
+
input.aspect_ratio = aspectRatio ?? "auto";
|
|
737
|
+
}
|
|
738
|
+
|
|
739
|
+
// Default to keeping original audio from reference video
|
|
740
|
+
if (input.keep_audio === undefined) {
|
|
741
|
+
input.keep_audio = true;
|
|
742
|
+
}
|
|
628
743
|
} else {
|
|
629
744
|
// Standard video generation
|
|
630
745
|
input.prompt = prompt;
|
|
@@ -825,6 +940,22 @@ class FalVideoModel implements VideoModelV3 {
|
|
|
825
940
|
|
|
826
941
|
return VIDEO_UPSCALE_MODELS[this.modelId] ?? this.modelId;
|
|
827
942
|
}
|
|
943
|
+
|
|
944
|
+
private resolveReferenceVideoEndpoint(): string {
|
|
945
|
+
if (this.modelId.startsWith("raw:")) {
|
|
946
|
+
return this.modelId.slice(4);
|
|
947
|
+
}
|
|
948
|
+
|
|
949
|
+
return REFERENCE_VIDEO_MODELS[this.modelId] ?? this.modelId;
|
|
950
|
+
}
|
|
951
|
+
|
|
952
|
+
private resolveV2VReferenceEndpoint(): string {
|
|
953
|
+
if (this.modelId.startsWith("raw:")) {
|
|
954
|
+
return this.modelId.slice(4);
|
|
955
|
+
}
|
|
956
|
+
|
|
957
|
+
return V2V_REFERENCE_MODELS[this.modelId] ?? this.modelId;
|
|
958
|
+
}
|
|
828
959
|
}
|
|
829
960
|
|
|
830
961
|
class FalImageModel implements ImageModelV3 {
|
|
@@ -70,6 +70,24 @@ const ModelDurationRules: Record<string, z.ZodType> = {
|
|
|
70
70
|
"kling-v3": z.object({ duration: stringIntDuration(3, 15, 5) }),
|
|
71
71
|
"kling-v3-standard": z.object({ duration: stringIntDuration(3, 15, 5) }),
|
|
72
72
|
|
|
73
|
+
// Kling O3 4K: same rules as v3
|
|
74
|
+
"kling-v3-4k-image-to-video": z.object({
|
|
75
|
+
duration: stringIntDuration(3, 15, 5),
|
|
76
|
+
}),
|
|
77
|
+
|
|
78
|
+
// Kling O3 reference-to-video: same duration range
|
|
79
|
+
"kling-v3-pro-reference-to-video": z.object({
|
|
80
|
+
duration: stringIntDuration(3, 15, 5),
|
|
81
|
+
}),
|
|
82
|
+
"kling-v3-4k-reference-to-video": z.object({
|
|
83
|
+
duration: stringIntDuration(3, 15, 5),
|
|
84
|
+
}),
|
|
85
|
+
|
|
86
|
+
// Kling O3 video-to-video reference: same duration range
|
|
87
|
+
"kling-v3-standard-v2v-reference": z.object({
|
|
88
|
+
duration: stringIntDuration(3, 15, 5),
|
|
89
|
+
}),
|
|
90
|
+
|
|
73
91
|
// Kling v2.6: same rules as v3
|
|
74
92
|
"kling-v2.6": z.object({ duration: stringIntDuration(3, 15, 5) }),
|
|
75
93
|
|