vargai 0.4.0-alpha4 → 0.4.0-alpha40
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +6 -0
- package/README.md +483 -61
- package/assets/fonts/TikTokSans-Bold.ttf +0 -0
- package/examples/grok-imagine-test.tsx +155 -0
- package/launch-videos/06-kawaii-fruits.tsx +93 -0
- package/launch-videos/07-ugc-weight-loss.tsx +132 -0
- package/launch-videos/08-talking-head-varg.tsx +107 -0
- package/launch-videos/09-girl.tsx +160 -0
- package/launch-videos/README.md +42 -0
- package/package.json +10 -4
- package/pipeline/cookbooks/round-video-character.md +1 -1
- package/skills/varg-video-generation/SKILL.md +224 -0
- package/skills/varg-video-generation/references/templates.md +380 -0
- package/skills/varg-video-generation/scripts/setup.ts +265 -0
- package/src/ai-sdk/cache.ts +1 -3
- package/src/ai-sdk/examples/google-image.ts +62 -0
- package/src/ai-sdk/index.ts +10 -0
- package/src/ai-sdk/middleware/wrap-image-model.ts +4 -21
- package/src/ai-sdk/middleware/wrap-music-model.ts +4 -16
- package/src/ai-sdk/middleware/wrap-video-model.ts +5 -17
- package/src/ai-sdk/providers/CONTRIBUTING.md +457 -0
- package/src/ai-sdk/providers/editly/backends/index.ts +8 -0
- package/src/ai-sdk/providers/editly/backends/local.ts +94 -0
- package/src/ai-sdk/providers/editly/backends/types.ts +74 -0
- package/src/ai-sdk/providers/editly/editly.test.ts +49 -1
- package/src/ai-sdk/providers/editly/index.ts +164 -80
- package/src/ai-sdk/providers/editly/layers.ts +58 -6
- package/src/ai-sdk/providers/editly/rendi/editly-with-rendi-backend.test.ts +335 -0
- package/src/ai-sdk/providers/editly/rendi/index.ts +289 -0
- package/src/ai-sdk/providers/editly/rendi/rendi.test.ts +35 -0
- package/src/ai-sdk/providers/editly/types.ts +30 -0
- package/src/ai-sdk/providers/elevenlabs.ts +10 -2
- package/src/ai-sdk/providers/fal.test.ts +214 -0
- package/src/ai-sdk/providers/fal.ts +435 -40
- package/src/ai-sdk/providers/google.ts +423 -0
- package/src/ai-sdk/providers/together.ts +191 -0
- package/src/cli/commands/find.tsx +1 -0
- package/src/cli/commands/frame.tsx +616 -0
- package/src/cli/commands/hello.ts +85 -0
- package/src/cli/commands/help.tsx +18 -30
- package/src/cli/commands/index.ts +11 -2
- package/src/cli/commands/init.tsx +570 -0
- package/src/cli/commands/list.tsx +1 -0
- package/src/cli/commands/render.tsx +322 -76
- package/src/cli/commands/run.tsx +1 -0
- package/src/cli/commands/storyboard.tsx +1714 -0
- package/src/cli/commands/which.tsx +1 -0
- package/src/cli/index.ts +23 -4
- package/src/cli/ui/components/Badge.tsx +1 -0
- package/src/cli/ui/components/DataTable.tsx +1 -0
- package/src/cli/ui/components/Header.tsx +1 -0
- package/src/cli/ui/components/HelpBlock.tsx +1 -0
- package/src/cli/ui/components/KeyValue.tsx +1 -0
- package/src/cli/ui/components/OptionRow.tsx +1 -0
- package/src/cli/ui/components/Separator.tsx +1 -0
- package/src/cli/ui/components/StatusBox.tsx +1 -0
- package/src/cli/ui/components/VargBox.tsx +1 -0
- package/src/cli/ui/components/VargProgress.tsx +1 -0
- package/src/cli/ui/components/VargSpinner.tsx +1 -0
- package/src/cli/ui/components/VargText.tsx +1 -0
- package/src/definitions/actions/grok-edit.ts +133 -0
- package/src/definitions/actions/index.ts +16 -0
- package/src/definitions/actions/qwen-angles.ts +218 -0
- package/src/index.ts +1 -0
- package/src/providers/fal.ts +196 -0
- package/src/react/assets.ts +9 -0
- package/src/react/elements.ts +0 -5
- package/src/react/examples/branching.tsx +6 -4
- package/src/react/examples/character-video.tsx +13 -10
- package/src/react/examples/local-files-test.tsx +19 -0
- package/src/react/examples/ltx2-test.tsx +25 -0
- package/src/react/examples/madi.tsx +13 -10
- package/src/react/examples/mcmeows.tsx +40 -0
- package/src/react/examples/music-defaults.tsx +24 -0
- package/src/react/examples/quickstart-test.tsx +101 -0
- package/src/react/examples/qwen-angles-test.tsx +72 -0
- package/src/react/index.ts +3 -3
- package/src/react/layouts/grid.tsx +1 -1
- package/src/react/layouts/index.ts +2 -1
- package/src/react/layouts/slot.tsx +85 -0
- package/src/react/layouts/split.tsx +18 -0
- package/src/react/react.test.ts +60 -11
- package/src/react/renderers/burn-captions.ts +95 -0
- package/src/react/renderers/cache.test.ts +182 -0
- package/src/react/renderers/captions.ts +25 -6
- package/src/react/renderers/clip.ts +56 -25
- package/src/react/renderers/context.ts +5 -2
- package/src/react/renderers/image.ts +5 -2
- package/src/react/renderers/index.ts +0 -1
- package/src/react/renderers/music.ts +8 -3
- package/src/react/renderers/packshot/blinking-button.ts +413 -0
- package/src/react/renderers/packshot.ts +170 -8
- package/src/react/renderers/progress.ts +4 -3
- package/src/react/renderers/render.ts +127 -71
- package/src/react/renderers/speech.ts +2 -2
- package/src/react/renderers/split.ts +34 -13
- package/src/react/renderers/utils.test.ts +80 -0
- package/src/react/renderers/utils.ts +37 -1
- package/src/react/renderers/video.ts +47 -9
- package/src/react/types.ts +70 -17
- package/src/studio/stages.ts +40 -39
- package/src/studio/step-renderer.ts +14 -24
- package/src/studio/ui/index.html +2 -2
- package/src/tests/all.test.ts +4 -4
- package/src/tests/index.ts +1 -1
- package/test-slot-grid.tsx +19 -0
- package/test-slot-userland.tsx +30 -0
- package/test-sync-v2.ts +30 -0
- package/test-sync-v2.tsx +29 -0
- package/tsconfig.json +1 -1
- package/video.tsx +7 -0
- package/src/ai-sdk/providers/editly/ffmpeg.ts +0 -60
- package/src/react/renderers/animate.ts +0 -59
- /package/src/cli/commands/{studio.tsx → studio.ts} +0 -0
|
@@ -1,10 +1,21 @@
|
|
|
1
1
|
import { editly } from "../../ai-sdk/providers/editly";
|
|
2
|
-
import type {
|
|
2
|
+
import type {
|
|
3
|
+
Clip,
|
|
4
|
+
CropPosition,
|
|
5
|
+
Layer,
|
|
6
|
+
ResizeMode,
|
|
7
|
+
} from "../../ai-sdk/providers/editly/types";
|
|
3
8
|
import type { SplitProps, VargElement } from "../types";
|
|
4
9
|
import type { RenderContext } from "./context";
|
|
5
10
|
import { renderImage } from "./image";
|
|
6
11
|
import { renderVideo } from "./video";
|
|
7
12
|
|
|
13
|
+
interface SplitCell {
|
|
14
|
+
path: string;
|
|
15
|
+
resizeMode?: ResizeMode;
|
|
16
|
+
cropPosition?: CropPosition;
|
|
17
|
+
}
|
|
18
|
+
|
|
8
19
|
export async function renderSplit(
|
|
9
20
|
element: VargElement<"split">,
|
|
10
21
|
ctx: RenderContext,
|
|
@@ -12,30 +23,39 @@ export async function renderSplit(
|
|
|
12
23
|
const props = element.props as SplitProps;
|
|
13
24
|
const direction = props.direction ?? "horizontal";
|
|
14
25
|
|
|
15
|
-
const
|
|
26
|
+
const cells: SplitCell[] = [];
|
|
16
27
|
|
|
17
28
|
for (const child of element.children) {
|
|
18
29
|
if (!child || typeof child !== "object" || !("type" in child)) continue;
|
|
19
30
|
const childElement = child as VargElement;
|
|
31
|
+
const childProps = childElement.props as Record<string, unknown>;
|
|
20
32
|
|
|
21
33
|
if (childElement.type === "image") {
|
|
22
34
|
const path = await renderImage(childElement as VargElement<"image">, ctx);
|
|
23
|
-
|
|
35
|
+
cells.push({
|
|
36
|
+
path,
|
|
37
|
+
resizeMode: childProps.resize as ResizeMode | undefined,
|
|
38
|
+
cropPosition: childProps.cropPosition as CropPosition | undefined,
|
|
39
|
+
});
|
|
24
40
|
} else if (childElement.type === "video") {
|
|
25
41
|
const path = await renderVideo(childElement as VargElement<"video">, ctx);
|
|
26
|
-
|
|
42
|
+
cells.push({
|
|
43
|
+
path,
|
|
44
|
+
resizeMode: childProps.resize as ResizeMode | undefined,
|
|
45
|
+
cropPosition: childProps.cropPosition as CropPosition | undefined,
|
|
46
|
+
});
|
|
27
47
|
}
|
|
28
48
|
}
|
|
29
49
|
|
|
30
|
-
if (
|
|
50
|
+
if (cells.length === 0) {
|
|
31
51
|
throw new Error("Split element requires at least one image or video child");
|
|
32
52
|
}
|
|
33
53
|
|
|
34
|
-
if (
|
|
35
|
-
return
|
|
54
|
+
if (cells.length === 1) {
|
|
55
|
+
return cells[0]!.path;
|
|
36
56
|
}
|
|
37
57
|
|
|
38
|
-
const numChildren =
|
|
58
|
+
const numChildren = cells.length;
|
|
39
59
|
const cellWidth =
|
|
40
60
|
direction === "horizontal"
|
|
41
61
|
? Math.floor(ctx.width / numChildren)
|
|
@@ -45,24 +65,26 @@ export async function renderSplit(
|
|
|
45
65
|
? Math.floor(ctx.height / numChildren)
|
|
46
66
|
: ctx.height;
|
|
47
67
|
|
|
48
|
-
const layers: Layer[] =
|
|
49
|
-
const isVideo = path.endsWith(".mp4") || path.endsWith(".webm");
|
|
68
|
+
const layers: Layer[] = cells.map((cell, i) => {
|
|
69
|
+
const isVideo = cell.path.endsWith(".mp4") || cell.path.endsWith(".webm");
|
|
50
70
|
const left = direction === "horizontal" ? cellWidth * i : 0;
|
|
51
71
|
const top = direction === "vertical" ? cellHeight * i : 0;
|
|
52
72
|
|
|
53
73
|
if (isVideo) {
|
|
54
74
|
return {
|
|
55
75
|
type: "video" as const,
|
|
56
|
-
path,
|
|
76
|
+
path: cell.path,
|
|
57
77
|
left,
|
|
58
78
|
top,
|
|
59
79
|
width: cellWidth,
|
|
60
80
|
height: cellHeight,
|
|
81
|
+
resizeMode: cell.resizeMode,
|
|
82
|
+
cropPosition: cell.cropPosition,
|
|
61
83
|
};
|
|
62
84
|
}
|
|
63
85
|
return {
|
|
64
86
|
type: "image-overlay" as const,
|
|
65
|
-
path,
|
|
87
|
+
path: cell.path,
|
|
66
88
|
position: { x: left, y: top },
|
|
67
89
|
width: cellWidth,
|
|
68
90
|
height: cellHeight,
|
|
@@ -73,7 +95,6 @@ export async function renderSplit(
|
|
|
73
95
|
|
|
74
96
|
const clip: Clip = {
|
|
75
97
|
layers,
|
|
76
|
-
duration: 5,
|
|
77
98
|
};
|
|
78
99
|
|
|
79
100
|
const outPath = `/tmp/varg-split-${Date.now()}.mp4`;
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
import { describe, expect, test } from "bun:test";
|
|
2
|
+
import { fal } from "../../ai-sdk/providers/fal";
|
|
3
|
+
import { Image, Video } from "../elements";
|
|
4
|
+
import { computeCacheKey } from "./utils";
|
|
5
|
+
|
|
6
|
+
describe("computeCacheKey", () => {
|
|
7
|
+
test("ignores layout props for images", () => {
|
|
8
|
+
const base = Image({
|
|
9
|
+
prompt: "lion on a couch",
|
|
10
|
+
model: fal.imageModel("flux-schnell"),
|
|
11
|
+
aspectRatio: "16:9",
|
|
12
|
+
});
|
|
13
|
+
|
|
14
|
+
const variant = Image({
|
|
15
|
+
prompt: "lion on a couch",
|
|
16
|
+
model: fal.imageModel("flux-schnell"),
|
|
17
|
+
aspectRatio: "16:9",
|
|
18
|
+
left: "10%",
|
|
19
|
+
top: "5%",
|
|
20
|
+
width: "50%",
|
|
21
|
+
height: "50%",
|
|
22
|
+
resize: "cover",
|
|
23
|
+
zoom: "in",
|
|
24
|
+
key: "layout-1",
|
|
25
|
+
});
|
|
26
|
+
|
|
27
|
+
expect(computeCacheKey(base)).toEqual(computeCacheKey(variant));
|
|
28
|
+
});
|
|
29
|
+
|
|
30
|
+
test("ignores trim/audio/layout props for videos", () => {
|
|
31
|
+
const base = Video({
|
|
32
|
+
prompt: "walk forward, confident stride",
|
|
33
|
+
model: fal.videoModel("kling-v2.5"),
|
|
34
|
+
aspectRatio: "9:16",
|
|
35
|
+
});
|
|
36
|
+
|
|
37
|
+
const variant = Video({
|
|
38
|
+
prompt: "walk forward, confident stride",
|
|
39
|
+
model: fal.videoModel("kling-v2.5"),
|
|
40
|
+
aspectRatio: "9:16",
|
|
41
|
+
cutFrom: 0.5,
|
|
42
|
+
cutTo: 2.5,
|
|
43
|
+
left: "15%",
|
|
44
|
+
width: "70%",
|
|
45
|
+
keepAudio: true,
|
|
46
|
+
volume: 0.5,
|
|
47
|
+
key: "clip-2",
|
|
48
|
+
});
|
|
49
|
+
|
|
50
|
+
expect(computeCacheKey(base)).toEqual(computeCacheKey(variant));
|
|
51
|
+
});
|
|
52
|
+
|
|
53
|
+
test("changes when prompt changes", () => {
|
|
54
|
+
const a = Image({
|
|
55
|
+
prompt: "lion on a couch",
|
|
56
|
+
model: fal.imageModel("flux-schnell"),
|
|
57
|
+
});
|
|
58
|
+
|
|
59
|
+
const b = Image({
|
|
60
|
+
prompt: "tiger on a couch",
|
|
61
|
+
model: fal.imageModel("flux-schnell"),
|
|
62
|
+
});
|
|
63
|
+
|
|
64
|
+
expect(computeCacheKey(a)).not.toEqual(computeCacheKey(b));
|
|
65
|
+
});
|
|
66
|
+
|
|
67
|
+
test("changes when model changes", () => {
|
|
68
|
+
const a = Video({
|
|
69
|
+
prompt: "walk forward",
|
|
70
|
+
model: fal.videoModel("kling-v2.5"),
|
|
71
|
+
});
|
|
72
|
+
|
|
73
|
+
const b = Video({
|
|
74
|
+
prompt: "walk forward",
|
|
75
|
+
model: fal.videoModel("wan-2.5"),
|
|
76
|
+
});
|
|
77
|
+
|
|
78
|
+
expect(computeCacheKey(a)).not.toEqual(computeCacheKey(b));
|
|
79
|
+
});
|
|
80
|
+
});
|
|
@@ -41,6 +41,42 @@ function getFileFingerprint(path: string): string {
|
|
|
41
41
|
return `${path}:${stat.mtimeMs}:${stat.size}`;
|
|
42
42
|
}
|
|
43
43
|
|
|
44
|
+
const COMMON_IGNORED_PROPS = new Set(["children", "key"]);
|
|
45
|
+
|
|
46
|
+
const IGNORED_PROPS_BY_TYPE: Partial<Record<VargElement["type"], Set<string>>> =
|
|
47
|
+
{
|
|
48
|
+
image: new Set([
|
|
49
|
+
"left",
|
|
50
|
+
"top",
|
|
51
|
+
"width",
|
|
52
|
+
"height",
|
|
53
|
+
"resize",
|
|
54
|
+
"position",
|
|
55
|
+
"size",
|
|
56
|
+
"zoom",
|
|
57
|
+
]),
|
|
58
|
+
video: new Set([
|
|
59
|
+
"left",
|
|
60
|
+
"top",
|
|
61
|
+
"width",
|
|
62
|
+
"height",
|
|
63
|
+
"resize",
|
|
64
|
+
"cutFrom",
|
|
65
|
+
"cutTo",
|
|
66
|
+
"volume",
|
|
67
|
+
"keepAudio",
|
|
68
|
+
]),
|
|
69
|
+
speech: new Set(["volume", "id"]),
|
|
70
|
+
};
|
|
71
|
+
|
|
72
|
+
function shouldIgnoreProp(
|
|
73
|
+
elementType: VargElement["type"],
|
|
74
|
+
key: string,
|
|
75
|
+
): boolean {
|
|
76
|
+
if (COMMON_IGNORED_PROPS.has(key)) return true;
|
|
77
|
+
return IGNORED_PROPS_BY_TYPE[elementType]?.has(key) ?? false;
|
|
78
|
+
}
|
|
79
|
+
|
|
44
80
|
function serializeValue(v: unknown): string {
|
|
45
81
|
if (typeof v === "string") {
|
|
46
82
|
if (isLocalFilePath(v)) {
|
|
@@ -67,7 +103,7 @@ export function computeCacheKey(element: VargElement): CacheKeyPart[] {
|
|
|
67
103
|
const key: CacheKeyPart[] = [element.type];
|
|
68
104
|
|
|
69
105
|
for (const [k, v] of Object.entries(element.props)) {
|
|
70
|
-
if (k
|
|
106
|
+
if (shouldIgnoreProp(element.type, k)) continue;
|
|
71
107
|
if (k === "model" && v && typeof v === "object" && "modelId" in v) {
|
|
72
108
|
const model = v as {
|
|
73
109
|
provider?: string;
|
|
@@ -9,6 +9,7 @@ import type {
|
|
|
9
9
|
import type { RenderContext } from "./context";
|
|
10
10
|
import { renderImage } from "./image";
|
|
11
11
|
import { addTask, completeTask, startTask } from "./progress";
|
|
12
|
+
import { renderSpeech } from "./speech";
|
|
12
13
|
import { computeCacheKey, toFileUrl } from "./utils";
|
|
13
14
|
|
|
14
15
|
async function resolveImageInput(
|
|
@@ -27,13 +28,46 @@ async function resolveImageInput(
|
|
|
27
28
|
return new Uint8Array(await response.arrayBuffer());
|
|
28
29
|
}
|
|
29
30
|
|
|
30
|
-
async function
|
|
31
|
-
input: Uint8Array | string | undefined,
|
|
31
|
+
async function resolveAudioInput(
|
|
32
|
+
input: Uint8Array | string | VargElement<"speech"> | undefined,
|
|
33
|
+
ctx: RenderContext,
|
|
32
34
|
): Promise<Uint8Array | undefined> {
|
|
33
35
|
if (!input) return undefined;
|
|
34
36
|
if (input instanceof Uint8Array) return input;
|
|
35
|
-
|
|
36
|
-
|
|
37
|
+
if (typeof input === "string") {
|
|
38
|
+
const response = await fetch(toFileUrl(input));
|
|
39
|
+
return new Uint8Array(await response.arrayBuffer());
|
|
40
|
+
}
|
|
41
|
+
// It's a Speech element - render it first
|
|
42
|
+
if (input.type === "speech") {
|
|
43
|
+
const { path } = await renderSpeech(input, ctx);
|
|
44
|
+
const response = await fetch(toFileUrl(path));
|
|
45
|
+
return new Uint8Array(await response.arrayBuffer());
|
|
46
|
+
}
|
|
47
|
+
throw new Error(
|
|
48
|
+
`Unsupported audio input type: ${(input as VargElement).type}`,
|
|
49
|
+
);
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
async function resolveVideoInput(
|
|
53
|
+
input: Uint8Array | string | VargElement<"video"> | undefined,
|
|
54
|
+
ctx: RenderContext,
|
|
55
|
+
): Promise<Uint8Array | undefined> {
|
|
56
|
+
if (!input) return undefined;
|
|
57
|
+
if (input instanceof Uint8Array) return input;
|
|
58
|
+
if (typeof input === "string") {
|
|
59
|
+
const response = await fetch(toFileUrl(input));
|
|
60
|
+
return new Uint8Array(await response.arrayBuffer());
|
|
61
|
+
}
|
|
62
|
+
// It's a Video element - render it first
|
|
63
|
+
if (input.type === "video") {
|
|
64
|
+
const path = await renderVideo(input, ctx);
|
|
65
|
+
const response = await fetch(toFileUrl(path));
|
|
66
|
+
return new Uint8Array(await response.arrayBuffer());
|
|
67
|
+
}
|
|
68
|
+
throw new Error(
|
|
69
|
+
`Unsupported video input type: ${(input as VargElement).type}`,
|
|
70
|
+
);
|
|
37
71
|
}
|
|
38
72
|
|
|
39
73
|
async function resolvePrompt(
|
|
@@ -55,8 +89,8 @@ async function resolvePrompt(
|
|
|
55
89
|
prompt.images
|
|
56
90
|
? Promise.all(prompt.images.map((img) => resolveImageInput(img, ctx)))
|
|
57
91
|
: undefined,
|
|
58
|
-
|
|
59
|
-
|
|
92
|
+
resolveAudioInput(prompt.audio, ctx),
|
|
93
|
+
resolveVideoInput(prompt.video, ctx),
|
|
60
94
|
]);
|
|
61
95
|
return {
|
|
62
96
|
text: prompt.text,
|
|
@@ -81,9 +115,11 @@ export async function renderVideo(
|
|
|
81
115
|
throw new Error("Video element requires either 'prompt' or 'src'");
|
|
82
116
|
}
|
|
83
117
|
|
|
84
|
-
const model = props.model;
|
|
118
|
+
const model = props.model ?? ctx.defaults?.video;
|
|
85
119
|
if (!model) {
|
|
86
|
-
throw new Error(
|
|
120
|
+
throw new Error(
|
|
121
|
+
"Video element requires 'model' prop (or set defaults.video in render options)",
|
|
122
|
+
);
|
|
87
123
|
}
|
|
88
124
|
|
|
89
125
|
// Compute cache key for deduplication
|
|
@@ -109,7 +145,9 @@ export async function renderVideo(
|
|
|
109
145
|
const { video } = await ctx.generateVideo({
|
|
110
146
|
model,
|
|
111
147
|
prompt: resolvedPrompt,
|
|
112
|
-
duration: 5,
|
|
148
|
+
duration: props.duration ?? 5,
|
|
149
|
+
aspectRatio: props.aspectRatio,
|
|
150
|
+
providerOptions: props.providerOptions,
|
|
113
151
|
cacheKey,
|
|
114
152
|
} as Parameters<typeof generateVideo>[0]);
|
|
115
153
|
|
package/src/react/types.ts
CHANGED
|
@@ -1,6 +1,13 @@
|
|
|
1
|
-
import type {
|
|
1
|
+
import type {
|
|
2
|
+
ImageModelV3,
|
|
3
|
+
SharedV3ProviderOptions,
|
|
4
|
+
SpeechModelV3,
|
|
5
|
+
} from "@ai-sdk/provider";
|
|
6
|
+
import type { FFmpegBackend } from "@/ai-sdk/providers/editly/backends";
|
|
7
|
+
import type { CacheStorage } from "../ai-sdk/cache";
|
|
2
8
|
import type { MusicModelV3 } from "../ai-sdk/music-model";
|
|
3
9
|
import type {
|
|
10
|
+
CropPosition,
|
|
4
11
|
Position,
|
|
5
12
|
ResizeMode,
|
|
6
13
|
SizeValue,
|
|
@@ -14,7 +21,6 @@ export type VargElementType =
|
|
|
14
21
|
| "overlay"
|
|
15
22
|
| "image"
|
|
16
23
|
| "video"
|
|
17
|
-
| "animate"
|
|
18
24
|
| "speech"
|
|
19
25
|
| "talking-head"
|
|
20
26
|
| "title"
|
|
@@ -69,6 +75,7 @@ export interface RenderProps extends BaseProps {
|
|
|
69
75
|
height?: number;
|
|
70
76
|
fps?: number;
|
|
71
77
|
normalize?: boolean;
|
|
78
|
+
shortest?: boolean;
|
|
72
79
|
children?: VargNode;
|
|
73
80
|
}
|
|
74
81
|
|
|
@@ -99,6 +106,8 @@ export interface ImageProps extends BaseProps, PositionProps {
|
|
|
99
106
|
position?: Position;
|
|
100
107
|
size?: { width: string; height: string };
|
|
101
108
|
removeBackground?: boolean;
|
|
109
|
+
/** Provider-specific options (e.g., fal: { acceleration: "high" }) */
|
|
110
|
+
providerOptions?: SharedV3ProviderOptions;
|
|
102
111
|
}
|
|
103
112
|
|
|
104
113
|
export type VideoPrompt =
|
|
@@ -106,8 +115,8 @@ export type VideoPrompt =
|
|
|
106
115
|
| {
|
|
107
116
|
text?: string;
|
|
108
117
|
images?: ImageInput[];
|
|
109
|
-
audio?: Uint8Array | string
|
|
110
|
-
video?: Uint8Array | string
|
|
118
|
+
audio?: Uint8Array | string | VargElement<"speech">;
|
|
119
|
+
video?: Uint8Array | string | VargElement<"video">;
|
|
111
120
|
};
|
|
112
121
|
|
|
113
122
|
export type VideoProps = BaseProps &
|
|
@@ -118,17 +127,12 @@ export type VideoProps = BaseProps &
|
|
|
118
127
|
src?: string;
|
|
119
128
|
model?: VideoModelV3;
|
|
120
129
|
resize?: ResizeMode;
|
|
130
|
+
cropPosition?: CropPosition;
|
|
131
|
+
aspectRatio?: `${number}:${number}`;
|
|
132
|
+
/** Provider-specific options (e.g., fal: { generate_audio: true }) */
|
|
133
|
+
providerOptions?: SharedV3ProviderOptions;
|
|
121
134
|
};
|
|
122
135
|
|
|
123
|
-
// Image-to-video animation
|
|
124
|
-
export interface AnimateProps extends BaseProps, PositionProps {
|
|
125
|
-
image?: VargElement<"image">;
|
|
126
|
-
src?: string;
|
|
127
|
-
model?: VideoModelV3;
|
|
128
|
-
motion?: string;
|
|
129
|
-
duration?: number;
|
|
130
|
-
}
|
|
131
|
-
|
|
132
136
|
export interface SpeechProps extends BaseProps, VolumeProps {
|
|
133
137
|
voice?: string;
|
|
134
138
|
model?: SpeechModelV3;
|
|
@@ -168,6 +172,8 @@ export type MusicProps = BaseProps &
|
|
|
168
172
|
prompt?: string;
|
|
169
173
|
model?: MusicModelV3;
|
|
170
174
|
src?: string;
|
|
175
|
+
/** Timeline offset in seconds — when in the video this audio starts playing */
|
|
176
|
+
start?: number;
|
|
171
177
|
loop?: boolean;
|
|
172
178
|
ducking?: boolean;
|
|
173
179
|
};
|
|
@@ -176,6 +182,7 @@ export interface CaptionsProps extends BaseProps {
|
|
|
176
182
|
src?: string | VargElement<"speech">;
|
|
177
183
|
srt?: string;
|
|
178
184
|
style?: "tiktok" | "karaoke" | "bounce" | "typewriter";
|
|
185
|
+
position?: "top" | "center" | "bottom";
|
|
179
186
|
color?: string;
|
|
180
187
|
activeColor?: string;
|
|
181
188
|
fontSize?: number;
|
|
@@ -200,23 +207,70 @@ export interface SwipeProps extends BaseProps {
|
|
|
200
207
|
export interface PackshotProps extends BaseProps {
|
|
201
208
|
background?: VargElement<"image"> | string;
|
|
202
209
|
logo?: string;
|
|
210
|
+
/**
|
|
211
|
+
* Logo position on screen.
|
|
212
|
+
*
|
|
213
|
+
* Accepts any {@link Position} value including PositionObject (`{ x, y }`).
|
|
214
|
+
* A PositionObject is normalised to the closest string position at render
|
|
215
|
+
* time (see ctaPosition docs for the conversion rules).
|
|
216
|
+
*/
|
|
203
217
|
logoPosition?: Position;
|
|
204
218
|
logoSize?: SizeValue;
|
|
219
|
+
/** Title text displayed below the logo (e.g. app name) */
|
|
220
|
+
title?: string;
|
|
221
|
+
/** Title text color (hex, default: "#FFFFFF") */
|
|
222
|
+
titleColor?: string;
|
|
223
|
+
/** Title position on screen (default: "center") */
|
|
224
|
+
titlePosition?: Position;
|
|
225
|
+
/** CTA button text */
|
|
205
226
|
cta?: string;
|
|
227
|
+
/**
|
|
228
|
+
* CTA button position on screen.
|
|
229
|
+
*
|
|
230
|
+
* Accepts any value from the {@link Position} union:
|
|
231
|
+
* - **String literals** (`"top"`, `"bottom"`, `"center"`, `"top-left"`, etc.)
|
|
232
|
+
* are used directly (compound positions like `"top-left"` are collapsed to
|
|
233
|
+
* their vertical component for the blinking-button renderer).
|
|
234
|
+
* - **PositionObject** (`{ x, y }` with optional `originX` / `originY`) is
|
|
235
|
+
* supported and will be **normalised** to the closest string position at
|
|
236
|
+
* render time. The y-coordinate is converted to a 0-1 fraction (pixels
|
|
237
|
+
* are divided by the video height; percentages are divided by 100) and
|
|
238
|
+
* mapped to `"top"` (< 33 %), `"center"` (33-67 %), or `"bottom"` (> 67 %).
|
|
239
|
+
* The x-coordinate follows the same logic for contexts that use the full
|
|
240
|
+
* nine-position grid.
|
|
241
|
+
*/
|
|
206
242
|
ctaPosition?: Position;
|
|
243
|
+
/** CTA button background color (hex, default: "#FF6B00") */
|
|
207
244
|
ctaColor?: string;
|
|
208
|
-
|
|
245
|
+
/** CTA button text color (hex, default: "#FFFFFF") */
|
|
246
|
+
ctaTextColor?: string;
|
|
247
|
+
/** CTA button size in pixels { width, height } */
|
|
248
|
+
ctaSize?: { width: number; height: number };
|
|
249
|
+
/** Enable blinking animation (scale + brightness pulse) */
|
|
209
250
|
blinkCta?: boolean;
|
|
251
|
+
/** Blink animation cycle duration in seconds (default: 0.8) */
|
|
252
|
+
blinkFrequency?: number;
|
|
253
|
+
/** Packshot duration in seconds */
|
|
210
254
|
duration?: number;
|
|
211
255
|
}
|
|
212
256
|
|
|
213
|
-
export type RenderMode = "strict" | "
|
|
257
|
+
export type RenderMode = "strict" | "preview";
|
|
258
|
+
|
|
259
|
+
export interface DefaultModels {
|
|
260
|
+
image?: ImageModelV3;
|
|
261
|
+
video?: VideoModelV3;
|
|
262
|
+
speech?: SpeechModelV3;
|
|
263
|
+
music?: MusicModelV3;
|
|
264
|
+
}
|
|
214
265
|
|
|
215
266
|
export interface RenderOptions {
|
|
216
267
|
output?: string;
|
|
217
|
-
cache?: string;
|
|
268
|
+
cache?: string | CacheStorage;
|
|
218
269
|
quiet?: boolean;
|
|
270
|
+
verbose?: boolean;
|
|
219
271
|
mode?: RenderMode;
|
|
272
|
+
defaults?: DefaultModels;
|
|
273
|
+
backend?: FFmpegBackend;
|
|
220
274
|
}
|
|
221
275
|
|
|
222
276
|
export interface ElementPropsMap {
|
|
@@ -225,7 +279,6 @@ export interface ElementPropsMap {
|
|
|
225
279
|
overlay: OverlayProps;
|
|
226
280
|
image: ImageProps;
|
|
227
281
|
video: VideoProps;
|
|
228
|
-
animate: AnimateProps;
|
|
229
282
|
speech: SpeechProps;
|
|
230
283
|
"talking-head": TalkingHeadProps;
|
|
231
284
|
title: TitleProps;
|
package/src/studio/stages.ts
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import type { VargElement, VargNode } from "../react/types";
|
|
2
2
|
|
|
3
|
-
export type StageType = "image" | "video" | "
|
|
3
|
+
export type StageType = "image" | "video" | "speech" | "music";
|
|
4
4
|
|
|
5
5
|
export interface RenderStage {
|
|
6
6
|
id: string;
|
|
@@ -70,11 +70,6 @@ export function extractStages(element: VargElement): ExtractedStages {
|
|
|
70
70
|
return "video";
|
|
71
71
|
}
|
|
72
72
|
|
|
73
|
-
if (type === "animate") {
|
|
74
|
-
const motion = props.motion;
|
|
75
|
-
return motion ? `animate: ${motion}` : "animate";
|
|
76
|
-
}
|
|
77
|
-
|
|
78
73
|
if (type === "speech") {
|
|
79
74
|
const text = getTextContent(element.children);
|
|
80
75
|
return `speech: ${text.slice(0, 30)}${text.length > 30 ? "..." : ""}`;
|
|
@@ -120,13 +115,7 @@ export function extractStages(element: VargElement): ExtractedStages {
|
|
|
120
115
|
const collectedDeps: string[] = [...parentDeps];
|
|
121
116
|
|
|
122
117
|
// Check if this is a renderable stage
|
|
123
|
-
const stageTypes: StageType[] = [
|
|
124
|
-
"image",
|
|
125
|
-
"video",
|
|
126
|
-
"animate",
|
|
127
|
-
"speech",
|
|
128
|
-
"music",
|
|
129
|
-
];
|
|
118
|
+
const stageTypes: StageType[] = ["image", "video", "speech", "music"];
|
|
130
119
|
|
|
131
120
|
if (stageTypes.includes(element.type as StageType)) {
|
|
132
121
|
const stageType = element.type as StageType;
|
|
@@ -137,35 +126,47 @@ export function extractStages(element: VargElement): ExtractedStages {
|
|
|
137
126
|
return [];
|
|
138
127
|
}
|
|
139
128
|
|
|
140
|
-
|
|
141
|
-
const
|
|
142
|
-
|
|
143
|
-
if (
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
const imgElement = imgInput as VargElement;
|
|
154
|
-
if (imgElement.type === "image") {
|
|
155
|
-
const deps = walkTree(imgElement, currentPath, collectedDeps);
|
|
156
|
-
imageDeps.push(...deps);
|
|
157
|
-
}
|
|
129
|
+
const nestedDeps: string[] = [];
|
|
130
|
+
const prompt = props.prompt as Record<string, unknown> | undefined;
|
|
131
|
+
|
|
132
|
+
if (prompt && typeof prompt === "object") {
|
|
133
|
+
if (Array.isArray(prompt.images)) {
|
|
134
|
+
for (const input of prompt.images) {
|
|
135
|
+
if (input && typeof input === "object" && "type" in input) {
|
|
136
|
+
const deps = walkTree(
|
|
137
|
+
input as VargElement,
|
|
138
|
+
currentPath,
|
|
139
|
+
collectedDeps,
|
|
140
|
+
);
|
|
141
|
+
nestedDeps.push(...deps);
|
|
158
142
|
}
|
|
159
143
|
}
|
|
160
144
|
}
|
|
161
145
|
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
146
|
+
if (
|
|
147
|
+
prompt.video &&
|
|
148
|
+
typeof prompt.video === "object" &&
|
|
149
|
+
"type" in prompt.video
|
|
150
|
+
) {
|
|
151
|
+
const deps = walkTree(
|
|
152
|
+
prompt.video as VargElement,
|
|
153
|
+
currentPath,
|
|
154
|
+
collectedDeps,
|
|
155
|
+
);
|
|
156
|
+
nestedDeps.push(...deps);
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
if (
|
|
160
|
+
prompt.audio &&
|
|
161
|
+
typeof prompt.audio === "object" &&
|
|
162
|
+
"type" in prompt.audio
|
|
163
|
+
) {
|
|
164
|
+
const deps = walkTree(
|
|
165
|
+
prompt.audio as VargElement,
|
|
166
|
+
currentPath,
|
|
167
|
+
collectedDeps,
|
|
168
|
+
);
|
|
169
|
+
nestedDeps.push(...deps);
|
|
169
170
|
}
|
|
170
171
|
}
|
|
171
172
|
|
|
@@ -176,7 +177,7 @@ export function extractStages(element: VargElement): ExtractedStages {
|
|
|
176
177
|
label: getLabel(stageType, element),
|
|
177
178
|
element,
|
|
178
179
|
path: currentPath,
|
|
179
|
-
dependsOn: [...new Set([...collectedDeps, ...
|
|
180
|
+
dependsOn: [...new Set([...collectedDeps, ...nestedDeps])],
|
|
180
181
|
status: "pending",
|
|
181
182
|
};
|
|
182
183
|
|
|
@@ -1,8 +1,7 @@
|
|
|
1
1
|
import { generateImage } from "ai";
|
|
2
|
-
import { withCache } from "../ai-sdk/cache";
|
|
2
|
+
import { type CacheStorage, withCache } from "../ai-sdk/cache";
|
|
3
3
|
import { fileCache } from "../ai-sdk/file-cache";
|
|
4
4
|
import { generateVideo } from "../ai-sdk/generate-video";
|
|
5
|
-
import { renderAnimate } from "../react/renderers/animate";
|
|
6
5
|
import type { RenderContext } from "../react/renderers/context";
|
|
7
6
|
import { renderImage } from "../react/renderers/image";
|
|
8
7
|
import { renderMusic } from "../react/renderers/music";
|
|
@@ -28,21 +27,26 @@ const sessions = new Map<string, StepSession>();
|
|
|
28
27
|
export function createStepSession(
|
|
29
28
|
code: string,
|
|
30
29
|
rootElement: VargElement,
|
|
31
|
-
|
|
30
|
+
cache?: string | CacheStorage,
|
|
32
31
|
): StepSession {
|
|
33
32
|
const props = rootElement.props as RenderProps;
|
|
34
|
-
const
|
|
33
|
+
const cacheStorage =
|
|
34
|
+
cache === undefined
|
|
35
|
+
? undefined
|
|
36
|
+
: typeof cache === "string"
|
|
37
|
+
? fileCache({ dir: cache })
|
|
38
|
+
: cache;
|
|
35
39
|
|
|
36
40
|
const ctx: RenderContext = {
|
|
37
41
|
width: props.width ?? 1920,
|
|
38
42
|
height: props.height ?? 1080,
|
|
39
43
|
fps: props.fps ?? 30,
|
|
40
|
-
cache,
|
|
41
|
-
generateImage:
|
|
42
|
-
? withCache(generateImage, { storage:
|
|
44
|
+
cache: cacheStorage,
|
|
45
|
+
generateImage: cacheStorage
|
|
46
|
+
? withCache(generateImage, { storage: cacheStorage })
|
|
43
47
|
: generateImage,
|
|
44
|
-
generateVideo:
|
|
45
|
-
? withCache(generateVideo, { storage:
|
|
48
|
+
generateVideo: cacheStorage
|
|
49
|
+
? withCache(generateVideo, { storage: cacheStorage })
|
|
46
50
|
: generateVideo,
|
|
47
51
|
tempFiles: [],
|
|
48
52
|
progress: createProgressTracker(false),
|
|
@@ -124,20 +128,6 @@ export async function executeStage(
|
|
|
124
128
|
break;
|
|
125
129
|
}
|
|
126
130
|
|
|
127
|
-
case "animate": {
|
|
128
|
-
const path = await renderAnimate(
|
|
129
|
-
stage.element as VargElement<"animate">,
|
|
130
|
-
session.ctx,
|
|
131
|
-
);
|
|
132
|
-
result = {
|
|
133
|
-
type: "video",
|
|
134
|
-
path,
|
|
135
|
-
previewUrl: `/api/step/preview/${session.id}/${stageId}`,
|
|
136
|
-
mimeType: "video/mp4",
|
|
137
|
-
};
|
|
138
|
-
break;
|
|
139
|
-
}
|
|
140
|
-
|
|
141
131
|
case "speech": {
|
|
142
132
|
const speechResult = await renderSpeech(
|
|
143
133
|
stage.element as VargElement<"speech">,
|
|
@@ -238,7 +228,7 @@ export async function finalizeRender(
|
|
|
238
228
|
|
|
239
229
|
await render(session.rootElement, {
|
|
240
230
|
output: outputPath,
|
|
241
|
-
cache: session.ctx.cache
|
|
231
|
+
cache: session.ctx.cache,
|
|
242
232
|
quiet: true,
|
|
243
233
|
});
|
|
244
234
|
|