vargai 0.4.0-alpha81 → 0.4.0-alpha83
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/ai-sdk/examples/garry-tan-varg.ts +61 -0
- package/src/ai-sdk/middleware/placeholder.ts +21 -3
- package/src/ai-sdk/providers/editly/backends/local.ts +10 -2
- package/src/ai-sdk/providers/editly/editly.test.ts +110 -0
- package/src/ai-sdk/providers/editly/index.ts +61 -2
- package/src/ai-sdk/providers/editly/layers.ts +6 -2
- package/src/cli/commands/render.tsx +34 -7
- package/src/react/examples/omnihuman15-react-test.tsx +58 -0
- package/src/react/renderers/clip.ts +74 -5
- package/src/react/renderers/render.ts +15 -2
- package/src/react/resolve.ts +10 -1
- package/src/react/types.ts +4 -0
- package/src/react/warnings.test.ts +5 -3
- package/.claude/settings.local.json +0 -7
- package/.env.example +0 -33
- package/.github/workflows/ci.yml +0 -23
- package/.husky/README.md +0 -102
- package/.husky/commit-msg +0 -6
- package/.husky/pre-commit +0 -9
- package/.husky/pre-push +0 -6
package/package.json
CHANGED
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Garry Tan Talking Head Video
|
|
3
|
+
* Generate a video of Garry Tan saying "varg.ai is cool!"
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import {
|
|
7
|
+
generateImage,
|
|
8
|
+
experimental_generateSpeech as generateSpeech,
|
|
9
|
+
} from "ai";
|
|
10
|
+
import { elevenlabs, File, fal, generateVideo } from "../index";
|
|
11
|
+
|
|
12
|
+
async function main() {
|
|
13
|
+
const script = `varg.ai is cool!`;
|
|
14
|
+
|
|
15
|
+
console.log("generating Garry Tan image and voice in parallel...");
|
|
16
|
+
const [imageResult, speechResult] = await Promise.all([
|
|
17
|
+
generateImage({
|
|
18
|
+
model: fal.imageModel("flux-schnell"),
|
|
19
|
+
prompt:
|
|
20
|
+
"Garry Tan, Y Combinator CEO, Asian American man, short dark hair, glasses, friendly smile, professional headshot, studio lighting, clean background, looking at camera",
|
|
21
|
+
n: 1,
|
|
22
|
+
}),
|
|
23
|
+
generateSpeech({
|
|
24
|
+
model: elevenlabs.speechModel("turbo"),
|
|
25
|
+
text: script,
|
|
26
|
+
voice: "adam",
|
|
27
|
+
}),
|
|
28
|
+
]);
|
|
29
|
+
|
|
30
|
+
const firstImage = imageResult.images[0];
|
|
31
|
+
if (!firstImage) throw new Error("No image generated");
|
|
32
|
+
const image = File.from(firstImage);
|
|
33
|
+
const audio = File.from(speechResult.audio);
|
|
34
|
+
|
|
35
|
+
console.log(`image: ${(await image.data()).byteLength} bytes`);
|
|
36
|
+
console.log(`audio: ${(await audio.data()).byteLength} bytes`);
|
|
37
|
+
|
|
38
|
+
await Bun.write("output/garry-tan-image.png", await image.data());
|
|
39
|
+
await Bun.write("output/garry-tan-voice.mp3", await audio.data());
|
|
40
|
+
|
|
41
|
+
console.log("\nanimating Garry Tan (5 seconds)...");
|
|
42
|
+
const { video } = await generateVideo({
|
|
43
|
+
model: fal.videoModel("wan-2.5"),
|
|
44
|
+
prompt: {
|
|
45
|
+
text: "man talking naturally, moving mouth while speaking, subtle head movements, professional demeanor, blinking naturally",
|
|
46
|
+
images: [await image.data()],
|
|
47
|
+
},
|
|
48
|
+
duration: 5,
|
|
49
|
+
});
|
|
50
|
+
|
|
51
|
+
const output = File.from(video);
|
|
52
|
+
console.log(`video: ${(await output.data()).byteLength} bytes`);
|
|
53
|
+
await Bun.write("output/garry-tan-varg.mp4", await output.data());
|
|
54
|
+
|
|
55
|
+
console.log("\ndone! files saved to output/");
|
|
56
|
+
console.log("- output/garry-tan-image.png");
|
|
57
|
+
console.log("- output/garry-tan-voice.mp3");
|
|
58
|
+
console.log("- output/garry-tan-varg.mp4");
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
main().catch(console.error);
|
|
@@ -84,22 +84,40 @@ export async function generatePlaceholder(
|
|
|
84
84
|
const { $ } = await import("bun");
|
|
85
85
|
|
|
86
86
|
try {
|
|
87
|
+
let ffmpegResult: Awaited<ReturnType<typeof $>>;
|
|
88
|
+
|
|
87
89
|
if (type === "audio") {
|
|
88
|
-
|
|
90
|
+
ffmpegResult =
|
|
91
|
+
await $`ffmpeg -y -f lavfi -i anullsrc=r=44100:cl=stereo -t ${duration} -c:a libmp3lame ${outputPath}`
|
|
92
|
+
.quiet()
|
|
93
|
+
.nothrow();
|
|
89
94
|
} else if (type === "image") {
|
|
90
95
|
const colorInput = `color=c=0x${hexColor}:s=${width}x${height}:d=1`;
|
|
91
96
|
const labelY = `(h/2)-${labelFontSize}`;
|
|
92
97
|
const promptY = `(h/2)+${Math.floor(labelFontSize * 0.5)}`;
|
|
93
98
|
const drawLabel = `drawtext=text='${typeLabel}':fontcolor=white:fontsize=${labelFontSize}:x=(w-text_w)/2:y=${labelY}`;
|
|
94
99
|
const drawPrompt = `drawtext=text='${promptText}':fontcolor=white@0.7:fontsize=${promptFontSize}:x=(w-text_w)/2:y=${promptY}`;
|
|
95
|
-
|
|
100
|
+
ffmpegResult =
|
|
101
|
+
await $`ffmpeg -y -f lavfi -i ${colorInput} -vf ${drawLabel},${drawPrompt} -frames:v 1 -update 1 ${outputPath}`
|
|
102
|
+
.quiet()
|
|
103
|
+
.nothrow();
|
|
96
104
|
} else {
|
|
97
105
|
const colorInput = `color=c=0x${hexColor}:s=${width}x${height}:d=${duration}:r=30`;
|
|
98
106
|
const labelY = `(h/2)-${labelFontSize}`;
|
|
99
107
|
const promptY = `(h/2)+${Math.floor(labelFontSize * 0.5)}`;
|
|
100
108
|
const drawLabel = `drawtext=text='${typeLabel}':fontcolor=white:fontsize=${labelFontSize}:x=(w-text_w)/2:y=${labelY}`;
|
|
101
109
|
const drawPrompt = `drawtext=text='${promptText}':fontcolor=white@0.7:fontsize=${promptFontSize}:x=(w-text_w)/2:y=${promptY}`;
|
|
102
|
-
|
|
110
|
+
ffmpegResult =
|
|
111
|
+
await $`ffmpeg -y -f lavfi -i ${colorInput} -vf ${drawLabel},${drawPrompt} -c:v libx264 -preset ultrafast -pix_fmt yuv420p ${outputPath}`
|
|
112
|
+
.quiet()
|
|
113
|
+
.nothrow();
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
if (ffmpegResult.exitCode !== 0) {
|
|
117
|
+
const stderr = ffmpegResult.stderr.toString().trim();
|
|
118
|
+
throw new Error(
|
|
119
|
+
`ffmpeg placeholder failed (exit ${ffmpegResult.exitCode}): ${stderr || "unknown error"}`,
|
|
120
|
+
);
|
|
103
121
|
}
|
|
104
122
|
|
|
105
123
|
const data = await Bun.file(outputPath).bytes();
|
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
import { mkdirSync } from "node:fs";
|
|
2
|
+
import { dirname } from "node:path";
|
|
1
3
|
import { $ } from "bun";
|
|
2
4
|
import { File } from "../../../file";
|
|
3
5
|
import type {
|
|
@@ -86,14 +88,20 @@ export class LocalBackend implements FFmpegBackend {
|
|
|
86
88
|
outputPath,
|
|
87
89
|
];
|
|
88
90
|
|
|
91
|
+
// Ensure the output directory exists (ffmpeg cannot create directories)
|
|
92
|
+
mkdirSync(dirname(outputPath), { recursive: true });
|
|
93
|
+
|
|
89
94
|
if (verbose) {
|
|
90
95
|
console.log("ffmpeg", ffmpegArgs.join(" "));
|
|
91
96
|
}
|
|
92
97
|
|
|
93
|
-
const result = await $`ffmpeg ${ffmpegArgs}`.quiet();
|
|
98
|
+
const result = await $`ffmpeg ${ffmpegArgs}`.quiet().nothrow();
|
|
94
99
|
|
|
95
100
|
if (result.exitCode !== 0) {
|
|
96
|
-
|
|
101
|
+
const stderr = result.stderr.toString().trim();
|
|
102
|
+
throw new Error(
|
|
103
|
+
`ffmpeg failed (exit ${result.exitCode}): ${stderr || "unknown error"}`,
|
|
104
|
+
);
|
|
97
105
|
}
|
|
98
106
|
|
|
99
107
|
return { output: { type: "file", path: outputPath } };
|
|
@@ -1409,4 +1409,114 @@ describe("editly", () => {
|
|
|
1409
1409
|
}),
|
|
1410
1410
|
).rejects.toThrow("produced no video output");
|
|
1411
1411
|
});
|
|
1412
|
+
|
|
1413
|
+
// Per-clip overlay tests (feature/per-clip-overlay)
|
|
1414
|
+
|
|
1415
|
+
test("clip-local video overlay with start/stop timing", async () => {
|
|
1416
|
+
const outPath = "output/editly-test-clip-overlay-timing.mp4";
|
|
1417
|
+
if (existsSync(outPath)) unlinkSync(outPath);
|
|
1418
|
+
|
|
1419
|
+
await editly({
|
|
1420
|
+
outPath,
|
|
1421
|
+
width: 1280,
|
|
1422
|
+
height: 720,
|
|
1423
|
+
fps: 30,
|
|
1424
|
+
clips: [
|
|
1425
|
+
{
|
|
1426
|
+
duration: 4,
|
|
1427
|
+
layers: [
|
|
1428
|
+
{ type: "fill-color", color: "#1a1a2e" },
|
|
1429
|
+
{
|
|
1430
|
+
type: "video",
|
|
1431
|
+
path: VIDEO_1,
|
|
1432
|
+
width: "30%",
|
|
1433
|
+
height: "30%",
|
|
1434
|
+
left: "68%",
|
|
1435
|
+
top: "2%",
|
|
1436
|
+
start: 1,
|
|
1437
|
+
stop: 3,
|
|
1438
|
+
},
|
|
1439
|
+
],
|
|
1440
|
+
},
|
|
1441
|
+
],
|
|
1442
|
+
});
|
|
1443
|
+
|
|
1444
|
+
expect(existsSync(outPath)).toBe(true);
|
|
1445
|
+
const info = await ffprobe(outPath);
|
|
1446
|
+
expect(info.duration).toBeCloseTo(4, 0);
|
|
1447
|
+
});
|
|
1448
|
+
|
|
1449
|
+
test("clip-local image overlay with start/stop timing", async () => {
|
|
1450
|
+
const outPath = "output/editly-test-clip-image-overlay-timing.mp4";
|
|
1451
|
+
if (existsSync(outPath)) unlinkSync(outPath);
|
|
1452
|
+
|
|
1453
|
+
await editly({
|
|
1454
|
+
outPath,
|
|
1455
|
+
width: 1280,
|
|
1456
|
+
height: 720,
|
|
1457
|
+
fps: 30,
|
|
1458
|
+
clips: [
|
|
1459
|
+
{
|
|
1460
|
+
duration: 4,
|
|
1461
|
+
layers: [
|
|
1462
|
+
{ type: "fill-color", color: "#1a1a2e" },
|
|
1463
|
+
{
|
|
1464
|
+
type: "image-overlay",
|
|
1465
|
+
path: IMAGE_SQUARE,
|
|
1466
|
+
position: "top-right",
|
|
1467
|
+
width: "20%",
|
|
1468
|
+
start: 1,
|
|
1469
|
+
stop: 3,
|
|
1470
|
+
},
|
|
1471
|
+
],
|
|
1472
|
+
},
|
|
1473
|
+
],
|
|
1474
|
+
});
|
|
1475
|
+
|
|
1476
|
+
expect(existsSync(outPath)).toBe(true);
|
|
1477
|
+
const info = await ffprobe(outPath);
|
|
1478
|
+
expect(info.duration).toBeCloseTo(4, 0);
|
|
1479
|
+
});
|
|
1480
|
+
|
|
1481
|
+
test("clip-local overlay only appears in its clip, not in others", async () => {
|
|
1482
|
+
const outPath = "output/editly-test-clip-overlay-scoped.mp4";
|
|
1483
|
+
if (existsSync(outPath)) unlinkSync(outPath);
|
|
1484
|
+
|
|
1485
|
+
await editly({
|
|
1486
|
+
outPath,
|
|
1487
|
+
width: 1280,
|
|
1488
|
+
height: 720,
|
|
1489
|
+
fps: 30,
|
|
1490
|
+
clips: [
|
|
1491
|
+
{
|
|
1492
|
+
duration: 2,
|
|
1493
|
+
layers: [{ type: "fill-color", color: "#ff0000" }],
|
|
1494
|
+
transition: { name: "fade", duration: 0.3 },
|
|
1495
|
+
},
|
|
1496
|
+
{
|
|
1497
|
+
duration: 2,
|
|
1498
|
+
layers: [
|
|
1499
|
+
{ type: "fill-color", color: "#00ff00" },
|
|
1500
|
+
{
|
|
1501
|
+
type: "image-overlay",
|
|
1502
|
+
path: IMAGE_SQUARE,
|
|
1503
|
+
position: "center",
|
|
1504
|
+
width: "30%",
|
|
1505
|
+
start: 0.5,
|
|
1506
|
+
stop: 1.5,
|
|
1507
|
+
},
|
|
1508
|
+
],
|
|
1509
|
+
transition: { name: "fade", duration: 0.3 },
|
|
1510
|
+
},
|
|
1511
|
+
{
|
|
1512
|
+
duration: 2,
|
|
1513
|
+
layers: [{ type: "fill-color", color: "#0000ff" }],
|
|
1514
|
+
},
|
|
1515
|
+
],
|
|
1516
|
+
});
|
|
1517
|
+
|
|
1518
|
+
expect(existsSync(outPath)).toBe(true);
|
|
1519
|
+
const info = await ffprobe(outPath);
|
|
1520
|
+
expect(info.duration).toBeGreaterThan(4);
|
|
1521
|
+
});
|
|
1412
1522
|
});
|
|
@@ -168,6 +168,15 @@ function isImageOverlayLayer(layer: Layer): boolean {
|
|
|
168
168
|
return layer.type === "image-overlay";
|
|
169
169
|
}
|
|
170
170
|
|
|
171
|
+
/**
|
|
172
|
+
* Clip-local image overlay: has start/stop timing (from <Overlay start end> inside <Clip>).
|
|
173
|
+
* These should be composited within their clip with enable expressions.
|
|
174
|
+
*/
|
|
175
|
+
function isClipLocalImageOverlay(layer: Layer): boolean {
|
|
176
|
+
if (!isImageOverlayLayer(layer)) return false;
|
|
177
|
+
return layer.start !== undefined || layer.stop !== undefined;
|
|
178
|
+
}
|
|
179
|
+
|
|
171
180
|
function isOverlayLayer(layer: Layer): boolean {
|
|
172
181
|
return isVideoOverlayLayer(layer) || isImageOverlayLayer(layer);
|
|
173
182
|
}
|
|
@@ -216,6 +225,10 @@ function buildBaseClipFilter(
|
|
|
216
225
|
(l) => l && isClipLocalVideoOverlay(l),
|
|
217
226
|
) as VideoLayer[];
|
|
218
227
|
|
|
228
|
+
const clipLocalImageOverlays = clip.layers.filter(
|
|
229
|
+
(l) => l && isClipLocalImageOverlay(l),
|
|
230
|
+
) as ImageOverlayLayer[];
|
|
231
|
+
|
|
219
232
|
for (let i = 0; i < baseLayers.length; i++) {
|
|
220
233
|
const layer = baseLayers[i];
|
|
221
234
|
if (!layer) continue;
|
|
@@ -253,7 +266,10 @@ function buildBaseClipFilter(
|
|
|
253
266
|
}
|
|
254
267
|
}
|
|
255
268
|
|
|
256
|
-
if (
|
|
269
|
+
if (
|
|
270
|
+
!baseLabel &&
|
|
271
|
+
(clipLocalOverlays.length > 0 || clipLocalImageOverlays.length > 0)
|
|
272
|
+
) {
|
|
257
273
|
const fillFilter = getFillColorFilter(
|
|
258
274
|
{ type: "fill-color", color: "#000000" },
|
|
259
275
|
inputIdx,
|
|
@@ -295,12 +311,50 @@ function buildBaseClipFilter(
|
|
|
295
311
|
width,
|
|
296
312
|
height,
|
|
297
313
|
outputLabel,
|
|
314
|
+
clip.duration,
|
|
298
315
|
);
|
|
299
316
|
filters.push(positionFilter);
|
|
300
317
|
baseLabel = outputLabel;
|
|
301
318
|
inputIdx++;
|
|
302
319
|
}
|
|
303
320
|
|
|
321
|
+
// Composite clip-local image overlays (from <Overlay start end> inside <Clip>)
|
|
322
|
+
for (let i = 0; i < clipLocalImageOverlays.length; i++) {
|
|
323
|
+
const layer = clipLocalImageOverlays[i];
|
|
324
|
+
if (!layer) continue;
|
|
325
|
+
|
|
326
|
+
if (!baseLabel) {
|
|
327
|
+
throw new Error(
|
|
328
|
+
`Clip ${clipIndex} is missing a base layer for image overlay placement`,
|
|
329
|
+
);
|
|
330
|
+
}
|
|
331
|
+
|
|
332
|
+
const imgFilter = getImageOverlayFilter(
|
|
333
|
+
layer,
|
|
334
|
+
inputIdx,
|
|
335
|
+
width,
|
|
336
|
+
height,
|
|
337
|
+
clip.duration,
|
|
338
|
+
);
|
|
339
|
+
|
|
340
|
+
inputs.push(layer.path);
|
|
341
|
+
filters.push(imgFilter.filterComplex);
|
|
342
|
+
|
|
343
|
+
const outputLabel = `clip${clipIndex}imgov${i}`;
|
|
344
|
+
const posFilter = getImageOverlayPositionFilter(
|
|
345
|
+
baseLabel,
|
|
346
|
+
imgFilter.outputLabel,
|
|
347
|
+
layer,
|
|
348
|
+
width,
|
|
349
|
+
height,
|
|
350
|
+
outputLabel,
|
|
351
|
+
clip.duration,
|
|
352
|
+
);
|
|
353
|
+
filters.push(posFilter);
|
|
354
|
+
baseLabel = outputLabel;
|
|
355
|
+
inputIdx++;
|
|
356
|
+
}
|
|
357
|
+
|
|
304
358
|
if (!baseLabel) {
|
|
305
359
|
throw new Error(
|
|
306
360
|
`Clip ${clipIndex} produced no video output — ensure it has at least one visual layer (video, image, or fill-color)`,
|
|
@@ -356,7 +410,12 @@ function collectImageOverlays(
|
|
|
356
410
|
|
|
357
411
|
for (const clip of clips) {
|
|
358
412
|
for (const layer of clip.layers) {
|
|
359
|
-
|
|
413
|
+
// Skip clip-local image overlays (with start/stop) — they are composited per-clip
|
|
414
|
+
if (
|
|
415
|
+
layer &&
|
|
416
|
+
isImageOverlayLayer(layer) &&
|
|
417
|
+
!isClipLocalImageOverlay(layer)
|
|
418
|
+
) {
|
|
360
419
|
const imgLayer = layer as ImageOverlayLayer;
|
|
361
420
|
const key = `${imgLayer.path}:${JSON.stringify(imgLayer.position ?? "")}:${imgLayer.width ?? ""}:${imgLayer.height ?? ""}`;
|
|
362
421
|
const existing = overlays.get(key);
|
|
@@ -247,6 +247,7 @@ export function getOverlayFilter(
|
|
|
247
247
|
width: number,
|
|
248
248
|
height: number,
|
|
249
249
|
outputLabel: string,
|
|
250
|
+
clipDuration?: number,
|
|
250
251
|
): string {
|
|
251
252
|
const baseX = layer.left !== undefined ? parseSize(layer.left, width) : 0;
|
|
252
253
|
const baseY = layer.top !== undefined ? parseSize(layer.top, height) : 0;
|
|
@@ -266,7 +267,8 @@ export function getOverlayFilter(
|
|
|
266
267
|
yExpr = `${baseY}-overlay_h`;
|
|
267
268
|
}
|
|
268
269
|
|
|
269
|
-
|
|
270
|
+
const enable = getEnableExpr(layer.start, layer.stop, clipDuration ?? 9999);
|
|
271
|
+
return `[${baseLabel}][${overlayLabel}]overlay=${xExpr}:${yExpr}:shortest=1${enable}[${outputLabel}]`;
|
|
270
272
|
}
|
|
271
273
|
|
|
272
274
|
export function getImageFilter(
|
|
@@ -594,9 +596,11 @@ export function getImageOverlayPositionFilter(
|
|
|
594
596
|
width: number,
|
|
595
597
|
height: number,
|
|
596
598
|
outputLabel: string,
|
|
599
|
+
clipDuration?: number,
|
|
597
600
|
): string {
|
|
598
601
|
const { x, y } = resolvePositionForOverlay(layer.position, width, height);
|
|
599
|
-
|
|
602
|
+
const enable = getEnableExpr(layer.start, layer.stop, clipDuration ?? 9999);
|
|
603
|
+
return `[${baseLabel}][${overlayLabel}]overlay=${x}:${y}:shortest=1${enable}[${outputLabel}]`;
|
|
600
604
|
}
|
|
601
605
|
|
|
602
606
|
function getEnableExpr(
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
/** @jsxImportSource react */
|
|
2
2
|
|
|
3
|
-
import { existsSync, mkdirSync } from "node:fs";
|
|
4
|
-
import { resolve } from "node:path";
|
|
3
|
+
import { existsSync, mkdirSync, unlinkSync } from "node:fs";
|
|
4
|
+
import { dirname, resolve } from "node:path";
|
|
5
5
|
import { defineCommand } from "citty";
|
|
6
6
|
import { Box, Text } from "ink";
|
|
7
7
|
import { render } from "../../react/render";
|
|
@@ -11,12 +11,22 @@ import { renderStatic } from "../ui/render.ts";
|
|
|
11
11
|
|
|
12
12
|
const AUTO_IMPORTS = `/** @jsxImportSource vargai */
|
|
13
13
|
import { Captions, Clip, Image, Music, Overlay, Packshot, Render, Slider, Speech, Split, Subtitle, Swipe, TalkingHead, Title, Video, Grid } from "vargai/react";
|
|
14
|
-
import { fal, elevenlabs, replicate } from "vargai/ai";
|
|
14
|
+
import { fal, elevenlabs, replicate, varg } from "vargai/ai";
|
|
15
15
|
`;
|
|
16
16
|
|
|
17
17
|
async function detectDefaultModels(): Promise<DefaultModels | undefined> {
|
|
18
18
|
const defaults: DefaultModels = {};
|
|
19
19
|
|
|
20
|
+
// Gateway provider — single key for all models (recommended)
|
|
21
|
+
if (process.env.VARG_API_KEY) {
|
|
22
|
+
const { varg } = await import("../../ai-sdk/providers/varg");
|
|
23
|
+
defaults.image = varg.imageModel("nano-banana-pro");
|
|
24
|
+
defaults.video = varg.videoModel("kling-v3");
|
|
25
|
+
defaults.speech = varg.speechModel("eleven_v3");
|
|
26
|
+
defaults.music = varg.musicModel("music_v1");
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
// Direct providers override gateway when available
|
|
20
30
|
const falKey = process.env.FAL_API_KEY ?? process.env.FAL_KEY;
|
|
21
31
|
if (falKey) {
|
|
22
32
|
const { fal } = await import("../../ai-sdk/providers/fal");
|
|
@@ -26,7 +36,7 @@ async function detectDefaultModels(): Promise<DefaultModels | undefined> {
|
|
|
26
36
|
|
|
27
37
|
if (process.env.ELEVENLABS_API_KEY) {
|
|
28
38
|
const { elevenlabs } = await import("../../ai-sdk/providers/elevenlabs");
|
|
29
|
-
defaults.speech = elevenlabs.speechModel("
|
|
39
|
+
defaults.speech = elevenlabs.speechModel("eleven_v3");
|
|
30
40
|
defaults.music = elevenlabs.musicModel("music_v1");
|
|
31
41
|
}
|
|
32
42
|
|
|
@@ -73,13 +83,23 @@ async function loadComponent(filePath: string): Promise<VargElement> {
|
|
|
73
83
|
|
|
74
84
|
if (hasVargaiImport) {
|
|
75
85
|
const tmpFile = `${tmpDir}/${Date.now()}.tsx`;
|
|
76
|
-
|
|
86
|
+
// Resolve @jsxImportSource to absolute path so it works from the cache dir
|
|
87
|
+
const runtimeDir = resolve(pkgDir, "src/react/runtime");
|
|
88
|
+
const resolvedSource = source.replace(
|
|
89
|
+
/@jsxImportSource\s+vargai/,
|
|
90
|
+
`@jsxImportSource ${runtimeDir}`,
|
|
91
|
+
);
|
|
92
|
+
await Bun.write(tmpFile, resolvedSource);
|
|
77
93
|
|
|
78
94
|
try {
|
|
79
95
|
const mod = await import(tmpFile);
|
|
80
96
|
return resolveDefaultExport(mod);
|
|
81
97
|
} finally {
|
|
82
|
-
|
|
98
|
+
try {
|
|
99
|
+
unlinkSync(tmpFile);
|
|
100
|
+
} catch {
|
|
101
|
+
/* ignore cleanup errors */
|
|
102
|
+
}
|
|
83
103
|
}
|
|
84
104
|
}
|
|
85
105
|
|
|
@@ -96,7 +116,11 @@ async function loadComponent(filePath: string): Promise<VargElement> {
|
|
|
96
116
|
const mod = await import(tmpFile);
|
|
97
117
|
return resolveDefaultExport(mod);
|
|
98
118
|
} finally {
|
|
99
|
-
|
|
119
|
+
try {
|
|
120
|
+
unlinkSync(tmpFile);
|
|
121
|
+
} catch {
|
|
122
|
+
/* ignore cleanup errors */
|
|
123
|
+
}
|
|
100
124
|
}
|
|
101
125
|
}
|
|
102
126
|
|
|
@@ -166,6 +190,9 @@ async function runRender(
|
|
|
166
190
|
.pop();
|
|
167
191
|
const outputPath = (args.output as string) ?? `output/${basename}.mp4`;
|
|
168
192
|
|
|
193
|
+
// Ensure the output directory exists (ffmpeg cannot create directories)
|
|
194
|
+
mkdirSync(dirname(outputPath), { recursive: true });
|
|
195
|
+
|
|
169
196
|
if (!args.quiet) {
|
|
170
197
|
const modeLabel = mode === "preview" ? " (fast)" : "";
|
|
171
198
|
console.log(`rendering ${file} → ${outputPath}${modeLabel}`);
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* OmniHuman v1.5 React syntax test
|
|
3
|
+
*
|
|
4
|
+
* Uses a local image + local audio file to generate a talking video.
|
|
5
|
+
*
|
|
6
|
+
* Run: bun run src/react/examples/omnihuman15-react-test.tsx
|
|
7
|
+
* Output: output/omnihuman15-react-test.mp4
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
import { fal } from "../../ai-sdk/providers/fal";
|
|
11
|
+
import { Clip, Render, render, Video } from "..";
|
|
12
|
+
|
|
13
|
+
const IMAGE_PATH = "output/garry-tan-image.png";
|
|
14
|
+
const AUDIO_PATH = "output/garry-tan-voice.mp3";
|
|
15
|
+
|
|
16
|
+
const video = (
|
|
17
|
+
<Render width={720} height={1280}>
|
|
18
|
+
<Clip duration={5}>
|
|
19
|
+
<Video
|
|
20
|
+
model={fal.videoModel("omnihuman-v1.5")}
|
|
21
|
+
prompt={{
|
|
22
|
+
text: "friendly professional talking head, natural blinking, subtle head movement",
|
|
23
|
+
images: [IMAGE_PATH],
|
|
24
|
+
audio: AUDIO_PATH,
|
|
25
|
+
}}
|
|
26
|
+
providerOptions={{
|
|
27
|
+
fal: {
|
|
28
|
+
resolution: "720p",
|
|
29
|
+
turbo_mode: true,
|
|
30
|
+
},
|
|
31
|
+
}}
|
|
32
|
+
/>
|
|
33
|
+
</Clip>
|
|
34
|
+
</Render>
|
|
35
|
+
);
|
|
36
|
+
|
|
37
|
+
async function main() {
|
|
38
|
+
if (!process.env.FAL_API_KEY && !process.env.FAL_KEY) {
|
|
39
|
+
console.error("ERROR: FAL_API_KEY/FAL_KEY not found in environment");
|
|
40
|
+
process.exit(1);
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
const result = await render(video, {
|
|
44
|
+
output: "output/omnihuman15-react-test.mp4",
|
|
45
|
+
cache: ".cache/ai",
|
|
46
|
+
});
|
|
47
|
+
|
|
48
|
+
console.log(
|
|
49
|
+
`ok: output/omnihuman15-react-test.mp4 (${(result.video.byteLength / 1024 / 1024).toFixed(2)} MB)`,
|
|
50
|
+
);
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
if (import.meta.main) {
|
|
54
|
+
main().catch((err) => {
|
|
55
|
+
console.error(err);
|
|
56
|
+
process.exit(1);
|
|
57
|
+
});
|
|
58
|
+
}
|
|
@@ -12,6 +12,7 @@ import type {
|
|
|
12
12
|
ClipProps,
|
|
13
13
|
ImageProps,
|
|
14
14
|
MusicProps,
|
|
15
|
+
OverlayProps,
|
|
15
16
|
SpeechProps,
|
|
16
17
|
VargElement,
|
|
17
18
|
VargNode,
|
|
@@ -231,11 +232,79 @@ async function renderClipLayers(
|
|
|
231
232
|
}
|
|
232
233
|
|
|
233
234
|
case "overlay": {
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
235
|
+
const overlayProps = element.props as OverlayProps;
|
|
236
|
+
for (const overlayChild of element.children) {
|
|
237
|
+
if (
|
|
238
|
+
!overlayChild ||
|
|
239
|
+
typeof overlayChild !== "object" ||
|
|
240
|
+
!("type" in overlayChild)
|
|
241
|
+
)
|
|
242
|
+
continue;
|
|
243
|
+
const overlayChildElement = overlayChild as VargElement;
|
|
244
|
+
|
|
245
|
+
if (overlayChildElement.type === "image") {
|
|
246
|
+
const hasPosition =
|
|
247
|
+
overlayProps.left !== undefined ||
|
|
248
|
+
overlayProps.top !== undefined ||
|
|
249
|
+
overlayProps.width !== undefined ||
|
|
250
|
+
overlayProps.height !== undefined;
|
|
251
|
+
|
|
252
|
+
pending.push({
|
|
253
|
+
type: "async",
|
|
254
|
+
promise: renderImage(
|
|
255
|
+
overlayChildElement as VargElement<"image">,
|
|
256
|
+
ctx,
|
|
257
|
+
)
|
|
258
|
+
.then((file) => ctx.backend.resolvePath(file))
|
|
259
|
+
.then((path) =>
|
|
260
|
+
hasPosition
|
|
261
|
+
? ({
|
|
262
|
+
type: "image-overlay",
|
|
263
|
+
path,
|
|
264
|
+
width: overlayProps.width,
|
|
265
|
+
height: overlayProps.height,
|
|
266
|
+
position: {
|
|
267
|
+
x: overlayProps.left ?? 0,
|
|
268
|
+
y: overlayProps.top ?? 0,
|
|
269
|
+
},
|
|
270
|
+
start: overlayProps.start,
|
|
271
|
+
stop: overlayProps.end,
|
|
272
|
+
} as ImageOverlayLayer)
|
|
273
|
+
: ({
|
|
274
|
+
type: "image",
|
|
275
|
+
path,
|
|
276
|
+
start: overlayProps.start,
|
|
277
|
+
stop: overlayProps.end,
|
|
278
|
+
} as ImageLayer),
|
|
279
|
+
),
|
|
280
|
+
});
|
|
281
|
+
} else if (overlayChildElement.type === "video") {
|
|
282
|
+
pending.push({
|
|
283
|
+
type: "async",
|
|
284
|
+
promise: renderVideo(
|
|
285
|
+
overlayChildElement as VargElement<"video">,
|
|
286
|
+
ctx,
|
|
287
|
+
)
|
|
288
|
+
.then((file) => ctx.backend.resolvePath(file))
|
|
289
|
+
.then(
|
|
290
|
+
(path) =>
|
|
291
|
+
({
|
|
292
|
+
type: "video",
|
|
293
|
+
path,
|
|
294
|
+
mixVolume: overlayProps.keepAudio
|
|
295
|
+
? (overlayProps.volume ?? 1)
|
|
296
|
+
: 0,
|
|
297
|
+
left: overlayProps.left,
|
|
298
|
+
top: overlayProps.top,
|
|
299
|
+
width: overlayProps.width,
|
|
300
|
+
height: overlayProps.height,
|
|
301
|
+
start: overlayProps.start,
|
|
302
|
+
stop: overlayProps.end,
|
|
303
|
+
}) as VideoLayer,
|
|
304
|
+
),
|
|
305
|
+
});
|
|
306
|
+
}
|
|
307
|
+
}
|
|
239
308
|
break;
|
|
240
309
|
}
|
|
241
310
|
}
|
|
@@ -246,12 +246,24 @@ export async function renderRoot(
|
|
|
246
246
|
// offsets (they all start at the container's position in the timeline).
|
|
247
247
|
const firstLeafClipIndex = clipIndexCounter; // before recursion increments it
|
|
248
248
|
|
|
249
|
-
//
|
|
249
|
+
// Collect overlays from container level — these get injected into each
|
|
250
|
+
// child clip so the overlay appears across all inner clips.
|
|
251
|
+
const containerOverlays: VargElement[] = [];
|
|
252
|
+
for (const el of nonClipChildren) {
|
|
253
|
+
if (el.type === "overlay") {
|
|
254
|
+
containerOverlays.push(el);
|
|
255
|
+
}
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
// Recurse into child clips, injecting container-level overlays
|
|
250
259
|
for (const childClip of childClips) {
|
|
260
|
+
if (containerOverlays.length > 0) {
|
|
261
|
+
childClip.children = [...childClip.children, ...containerOverlays];
|
|
262
|
+
}
|
|
251
263
|
flattenClip(childClip);
|
|
252
264
|
}
|
|
253
265
|
|
|
254
|
-
// Process non-clip children at the container level
|
|
266
|
+
// Process remaining non-clip children at the container level
|
|
255
267
|
for (const el of nonClipChildren) {
|
|
256
268
|
if (el.type === "captions") {
|
|
257
269
|
hoistedCaptions.push({
|
|
@@ -264,6 +276,7 @@ export async function renderRoot(
|
|
|
264
276
|
clipIndex: firstLeafClipIndex,
|
|
265
277
|
});
|
|
266
278
|
}
|
|
279
|
+
// overlay: already handled above (distributed to child clips)
|
|
267
280
|
// Image/Video at container level: not supported yet (would need
|
|
268
281
|
// background layer spanning all child clips — a future feature)
|
|
269
282
|
}
|
package/src/react/resolve.ts
CHANGED
|
@@ -276,7 +276,16 @@ async function sliceAudio(
|
|
|
276
276
|
|
|
277
277
|
// Fallback: no backend (top-level `await` outside render()) — use local ffmpeg directly.
|
|
278
278
|
const inputPath = await file.toTempFile();
|
|
279
|
-
|
|
279
|
+
const sliceResult =
|
|
280
|
+
await $`ffmpeg -y -ss ${start} -i ${inputPath} -t ${duration} -acodec libmp3lame -q:a 2 ${outPath}`
|
|
281
|
+
.quiet()
|
|
282
|
+
.nothrow();
|
|
283
|
+
if (sliceResult.exitCode !== 0) {
|
|
284
|
+
const stderr = sliceResult.stderr.toString().trim();
|
|
285
|
+
throw new Error(
|
|
286
|
+
`ffmpeg audio slice failed (exit ${sliceResult.exitCode}): ${stderr || "unknown error"}`,
|
|
287
|
+
);
|
|
288
|
+
}
|
|
280
289
|
|
|
281
290
|
const sliced = await Bun.file(outPath).arrayBuffer();
|
|
282
291
|
try {
|
package/src/react/types.ts
CHANGED
|
@@ -121,6 +121,10 @@ export interface ClipProps extends BaseProps {
|
|
|
121
121
|
}
|
|
122
122
|
|
|
123
123
|
export interface OverlayProps extends BaseProps, PositionProps, AudioProps {
|
|
124
|
+
/** Start time in seconds (relative to parent clip). Only used when inside a <Clip>. */
|
|
125
|
+
start?: number;
|
|
126
|
+
/** End time in seconds (relative to parent clip). Only used when inside a <Clip>. */
|
|
127
|
+
end?: number;
|
|
124
128
|
children?: VargNode;
|
|
125
129
|
}
|
|
126
130
|
|
|
@@ -3,7 +3,7 @@ import { existsSync, unlinkSync, writeFileSync } from "node:fs";
|
|
|
3
3
|
|
|
4
4
|
describe("warnings", () => {
|
|
5
5
|
test(
|
|
6
|
-
"issue #45:
|
|
6
|
+
"issue #45: Overlay inside Clip renders without warning",
|
|
7
7
|
async () => {
|
|
8
8
|
const script = `
|
|
9
9
|
import { Clip, Image, Overlay, Render, render } from "./src/react/index";
|
|
@@ -45,8 +45,10 @@ await render(
|
|
|
45
45
|
unlinkSync(tmpFile);
|
|
46
46
|
|
|
47
47
|
const output = stdout + stderr;
|
|
48
|
-
|
|
49
|
-
expect(output).toContain(
|
|
48
|
+
// <Overlay> inside <Clip> is now valid — no warning should be emitted
|
|
49
|
+
expect(output).not.toContain(
|
|
50
|
+
"<Overlay> placed inside <Clip> will be ignored",
|
|
51
|
+
);
|
|
50
52
|
expect(existsSync("output/test-issue-45.mp4")).toBe(true);
|
|
51
53
|
},
|
|
52
54
|
{ timeout: 15000 },
|
package/.env.example
DELETED
|
@@ -1,33 +0,0 @@
|
|
|
1
|
-
# fal.ai api key
|
|
2
|
-
FAL_API_KEY=fal_xxx
|
|
3
|
-
|
|
4
|
-
# higgsfield credentials
|
|
5
|
-
HIGGSFIELD_API_KEY=hf_xxx
|
|
6
|
-
HIGGSFIELD_SECRET=secret_xxx
|
|
7
|
-
|
|
8
|
-
# elevenlabs api key
|
|
9
|
-
ELEVENLABS_API_KEY=el_xxx
|
|
10
|
-
|
|
11
|
-
# groq api key (ultra-fast whisper transcription)
|
|
12
|
-
GROQ_API_KEY=gsk_xxx
|
|
13
|
-
|
|
14
|
-
# fireworks api key (word-level transcription with timestamps)
|
|
15
|
-
FIREWORKS_API_KEY=fw_xxx
|
|
16
|
-
|
|
17
|
-
# cloudflare r2 / s3 storage
|
|
18
|
-
CLOUDFLARE_R2_API_URL=https://xxx.r2.cloudflarestorage.com
|
|
19
|
-
CLOUDFLARE_ACCESS_KEY_ID=xxx
|
|
20
|
-
CLOUDFLARE_ACCESS_SECRET=xxx
|
|
21
|
-
CLOUDFLARE_R2_BUCKET=m
|
|
22
|
-
|
|
23
|
-
# replicate (optional)
|
|
24
|
-
REPLICATE_API_TOKEN=r8_xxx
|
|
25
|
-
|
|
26
|
-
# apify (web scraping actors)
|
|
27
|
-
APIFY_TOKEN=apify_api_xxx
|
|
28
|
-
|
|
29
|
-
# decart ai (real-time & batch video/image)
|
|
30
|
-
DECART_API_KEY=decart_xxx
|
|
31
|
-
|
|
32
|
-
# together ai (fast flux-schnell, no queue)
|
|
33
|
-
TOGETHER_API_KEY=together_xxx
|
package/.github/workflows/ci.yml
DELETED
|
@@ -1,23 +0,0 @@
|
|
|
1
|
-
name: CI
|
|
2
|
-
|
|
3
|
-
on:
|
|
4
|
-
push:
|
|
5
|
-
branches: [main]
|
|
6
|
-
pull_request:
|
|
7
|
-
branches: [main]
|
|
8
|
-
|
|
9
|
-
jobs:
|
|
10
|
-
lint-and-format:
|
|
11
|
-
runs-on: ubuntu-latest
|
|
12
|
-
steps:
|
|
13
|
-
- uses: actions/checkout@v4
|
|
14
|
-
|
|
15
|
-
- uses: oven-sh/setup-bun@v2
|
|
16
|
-
with:
|
|
17
|
-
bun-version: latest
|
|
18
|
-
|
|
19
|
-
- name: Install dependencies
|
|
20
|
-
run: bun install
|
|
21
|
-
|
|
22
|
-
- name: Check
|
|
23
|
-
run: bun run check
|
package/.husky/README.md
DELETED
|
@@ -1,102 +0,0 @@
|
|
|
1
|
-
# Git Hooks Configuration
|
|
2
|
-
|
|
3
|
-
This project uses [Husky](https://typicode.github.io/husky/) to manage Git hooks for maintaining code quality and security.
|
|
4
|
-
|
|
5
|
-
## Installed Hooks
|
|
6
|
-
|
|
7
|
-
### `pre-commit`
|
|
8
|
-
Runs before each commit:
|
|
9
|
-
- **Gitleaks** - Scans staged files for secrets and credentials
|
|
10
|
-
- **Lint-staged** - Runs Biome linter/formatter on staged files
|
|
11
|
-
|
|
12
|
-
### `commit-msg`
|
|
13
|
-
Validates commit messages:
|
|
14
|
-
- **Commitlint** - Enforces [Conventional Commits](https://www.conventionalcommits.org/) format
|
|
15
|
-
|
|
16
|
-
### `pre-push`
|
|
17
|
-
Runs before pushing to remote:
|
|
18
|
-
- **TypeScript type checking** - Ensures no type errors before push
|
|
19
|
-
|
|
20
|
-
## Commit Message Format
|
|
21
|
-
|
|
22
|
-
Follow the Conventional Commits specification:
|
|
23
|
-
|
|
24
|
-
```
|
|
25
|
-
<type>(<scope>): <subject>
|
|
26
|
-
|
|
27
|
-
<body>
|
|
28
|
-
|
|
29
|
-
<footer>
|
|
30
|
-
```
|
|
31
|
-
|
|
32
|
-
### Types
|
|
33
|
-
- `feat`: New feature
|
|
34
|
-
- `fix`: Bug fix
|
|
35
|
-
- `docs`: Documentation changes
|
|
36
|
-
- `style`: Code style changes (formatting, etc)
|
|
37
|
-
- `refactor`: Code refactoring
|
|
38
|
-
- `perf`: Performance improvements
|
|
39
|
-
- `test`: Test changes
|
|
40
|
-
- `build`: Build system changes
|
|
41
|
-
- `ci`: CI/CD changes
|
|
42
|
-
- `chore`: Other changes
|
|
43
|
-
- `revert`: Revert previous commit
|
|
44
|
-
|
|
45
|
-
### Examples
|
|
46
|
-
```bash
|
|
47
|
-
feat: add video generation API
|
|
48
|
-
fix(transcribe): handle empty audio files
|
|
49
|
-
docs: update installation guide
|
|
50
|
-
refactor: simplify audio processing pipeline
|
|
51
|
-
```
|
|
52
|
-
|
|
53
|
-
## Available Scripts
|
|
54
|
-
|
|
55
|
-
```bash
|
|
56
|
-
# Run linter
|
|
57
|
-
bun run lint
|
|
58
|
-
|
|
59
|
-
# Format code
|
|
60
|
-
bun run format
|
|
61
|
-
|
|
62
|
-
# Type check
|
|
63
|
-
bun run type-check
|
|
64
|
-
|
|
65
|
-
# Check bundle size
|
|
66
|
-
bun run size
|
|
67
|
-
```
|
|
68
|
-
|
|
69
|
-
## Bypassing Hooks
|
|
70
|
-
|
|
71
|
-
⚠️ **Not recommended** - Only use when absolutely necessary:
|
|
72
|
-
|
|
73
|
-
```bash
|
|
74
|
-
# Skip all hooks
|
|
75
|
-
git commit --no-verify -m "emergency fix"
|
|
76
|
-
|
|
77
|
-
# Skip specific checks by setting env vars
|
|
78
|
-
HUSKY=0 git commit -m "skip all hooks"
|
|
79
|
-
```
|
|
80
|
-
|
|
81
|
-
## Troubleshooting
|
|
82
|
-
|
|
83
|
-
If hooks aren't running:
|
|
84
|
-
|
|
85
|
-
```bash
|
|
86
|
-
# Reinstall hooks
|
|
87
|
-
rm -rf .husky/_
|
|
88
|
-
bun run prepare
|
|
89
|
-
chmod +x .husky/pre-commit .husky/commit-msg .husky/pre-push
|
|
90
|
-
```
|
|
91
|
-
|
|
92
|
-
## Size Limits
|
|
93
|
-
|
|
94
|
-
Bundle size limits are defined in `.size-limit.json`. Check size before publishing:
|
|
95
|
-
|
|
96
|
-
```bash
|
|
97
|
-
bun run size
|
|
98
|
-
```
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
package/.husky/commit-msg
DELETED
package/.husky/pre-commit
DELETED