vargai 0.4.0-alpha26 → 0.4.0-alpha28
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +2 -1
- package/src/cli/commands/frame.tsx +58 -7
- package/src/cli/commands/init.tsx +158 -0
- package/src/cli/commands/render.tsx +18 -0
- package/src/cli/commands/storyboard.tsx +929 -259
- package/src/studio/stages.ts +40 -19
package/package.json
CHANGED
|
@@ -43,6 +43,7 @@
|
|
|
43
43
|
"@ai-sdk/groq": "^3.0.12",
|
|
44
44
|
"@ai-sdk/openai": "^3.0.9",
|
|
45
45
|
"@ai-sdk/provider": "^3.0.2",
|
|
46
|
+
"@ai-sdk/provider-utils": "^4.0.4",
|
|
46
47
|
"@ai-sdk/replicate": "^2.0.5",
|
|
47
48
|
"@aws-sdk/client-s3": "^3.937.0",
|
|
48
49
|
"@aws-sdk/s3-request-presigner": "^3.937.0",
|
|
@@ -65,7 +66,7 @@
|
|
|
65
66
|
"vargai": "^0.4.0-alpha11",
|
|
66
67
|
"zod": "^4.2.1"
|
|
67
68
|
},
|
|
68
|
-
"version": "0.4.0-
|
|
69
|
+
"version": "0.4.0-alpha28",
|
|
69
70
|
"exports": {
|
|
70
71
|
".": "./src/index.ts",
|
|
71
72
|
"./ai": "./src/ai-sdk/index.ts",
|
|
@@ -6,9 +6,9 @@ import { generateImage, wrapImageModel } from "ai";
|
|
|
6
6
|
import { defineCommand } from "citty";
|
|
7
7
|
import { Box, Text } from "ink";
|
|
8
8
|
import { withCache } from "../../ai-sdk/cache";
|
|
9
|
-
import { File } from "../../ai-sdk/file";
|
|
10
9
|
import { fileCache } from "../../ai-sdk/file-cache";
|
|
11
10
|
import { imagePlaceholderFallbackMiddleware } from "../../ai-sdk/middleware";
|
|
11
|
+
import { computeCacheKey } from "../../react/renderers/utils";
|
|
12
12
|
import type {
|
|
13
13
|
ClipProps,
|
|
14
14
|
ImageInput,
|
|
@@ -16,7 +16,6 @@ import type {
|
|
|
16
16
|
ImageProps,
|
|
17
17
|
RenderProps,
|
|
18
18
|
VargElement,
|
|
19
|
-
VargNode,
|
|
20
19
|
VideoProps,
|
|
21
20
|
} from "../../react/types";
|
|
22
21
|
import { Header, HelpBlock, VargBox, VargText } from "../ui/index.ts";
|
|
@@ -35,6 +34,7 @@ interface FrameInfo {
|
|
|
35
34
|
aspectRatio?: string;
|
|
36
35
|
duration: number;
|
|
37
36
|
startTime: number;
|
|
37
|
+
imageElement?: VargElement<"image">;
|
|
38
38
|
}
|
|
39
39
|
|
|
40
40
|
async function loadComponent(filePath: string): Promise<VargElement> {
|
|
@@ -101,7 +101,15 @@ function toFileUrl(pathOrUrl: string): string {
|
|
|
101
101
|
return `file://${resolved}`;
|
|
102
102
|
}
|
|
103
103
|
|
|
104
|
-
|
|
104
|
+
interface ImageGeneratorContext {
|
|
105
|
+
generateImage: typeof generateImage;
|
|
106
|
+
defaultModel?: unknown;
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
async function resolveImageInput(
|
|
110
|
+
input: ImageInput,
|
|
111
|
+
ctx: ImageGeneratorContext,
|
|
112
|
+
): Promise<Uint8Array> {
|
|
105
113
|
if (input instanceof Uint8Array) {
|
|
106
114
|
return input;
|
|
107
115
|
}
|
|
@@ -123,7 +131,34 @@ async function resolveImageInput(input: ImageInput): Promise<Uint8Array> {
|
|
|
123
131
|
return new Uint8Array(await response.arrayBuffer());
|
|
124
132
|
}
|
|
125
133
|
|
|
126
|
-
|
|
134
|
+
if (props.prompt) {
|
|
135
|
+
const model = props.model ?? ctx.defaultModel;
|
|
136
|
+
if (!model) {
|
|
137
|
+
throw new Error("Nested image requires model");
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
const resolvedPrompt = await resolvePromptForGeneration(
|
|
141
|
+
props.prompt,
|
|
142
|
+
ctx,
|
|
143
|
+
);
|
|
144
|
+
const cacheKey = computeCacheKey(imageElement);
|
|
145
|
+
|
|
146
|
+
const { images } = await ctx.generateImage({
|
|
147
|
+
model: model as Parameters<typeof generateImage>[0]["model"],
|
|
148
|
+
prompt: resolvedPrompt,
|
|
149
|
+
aspectRatio: props.aspectRatio as `${number}:${number}` | undefined,
|
|
150
|
+
n: 1,
|
|
151
|
+
cacheKey,
|
|
152
|
+
} as Parameters<typeof generateImage>[0]);
|
|
153
|
+
|
|
154
|
+
const firstImage = images[0];
|
|
155
|
+
if (!firstImage?.uint8Array) {
|
|
156
|
+
throw new Error("Nested image generation returned no data");
|
|
157
|
+
}
|
|
158
|
+
return firstImage.uint8Array;
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
throw new Error("Image element requires prompt or src");
|
|
127
162
|
}
|
|
128
163
|
throw new Error("Unknown image input type");
|
|
129
164
|
}
|
|
@@ -202,6 +237,7 @@ function extractFrames(element: VargElement): FrameInfo[] {
|
|
|
202
237
|
aspectRatio: props.aspectRatio,
|
|
203
238
|
duration,
|
|
204
239
|
startTime: currentTime,
|
|
240
|
+
imageElement: clipChildElement as VargElement<"image">,
|
|
205
241
|
});
|
|
206
242
|
break;
|
|
207
243
|
}
|
|
@@ -222,6 +258,7 @@ function extractFrames(element: VargElement): FrameInfo[] {
|
|
|
222
258
|
aspectRatio: imageProps.aspectRatio ?? videoProps.aspectRatio,
|
|
223
259
|
duration,
|
|
224
260
|
startTime: currentTime,
|
|
261
|
+
imageElement: nestedImage,
|
|
225
262
|
});
|
|
226
263
|
} else if (prompt) {
|
|
227
264
|
frames.push({
|
|
@@ -268,6 +305,7 @@ async function detectDefaultImageModel() {
|
|
|
268
305
|
|
|
269
306
|
async function resolvePromptForGeneration(
|
|
270
307
|
prompt: ImagePrompt,
|
|
308
|
+
ctx: ImageGeneratorContext,
|
|
271
309
|
): Promise<string | { text?: string; images: Uint8Array[] }> {
|
|
272
310
|
if (typeof prompt === "string") {
|
|
273
311
|
return prompt;
|
|
@@ -275,7 +313,7 @@ async function resolvePromptForGeneration(
|
|
|
275
313
|
|
|
276
314
|
const resolvedImages: Uint8Array[] = [];
|
|
277
315
|
for (const img of prompt.images) {
|
|
278
|
-
const resolved = await resolveImageInput(img);
|
|
316
|
+
const resolved = await resolveImageInput(img, ctx);
|
|
279
317
|
if (resolved) {
|
|
280
318
|
resolvedImages.push(resolved);
|
|
281
319
|
}
|
|
@@ -455,14 +493,27 @@ export const frameCmd = defineCommand({
|
|
|
455
493
|
process.exit(1);
|
|
456
494
|
}
|
|
457
495
|
|
|
458
|
-
const
|
|
496
|
+
const generatorCtx: ImageGeneratorContext = {
|
|
497
|
+
generateImage: wrapGenerateImage,
|
|
498
|
+
defaultModel,
|
|
499
|
+
};
|
|
500
|
+
|
|
501
|
+
const resolvedPrompt = await resolvePromptForGeneration(
|
|
502
|
+
frame.prompt,
|
|
503
|
+
generatorCtx,
|
|
504
|
+
);
|
|
505
|
+
|
|
506
|
+
const cacheKey = frame.imageElement
|
|
507
|
+
? computeCacheKey(frame.imageElement)
|
|
508
|
+
: undefined;
|
|
459
509
|
|
|
460
510
|
const { images } = await wrapGenerateImage({
|
|
461
511
|
model: model as Parameters<typeof generateImage>[0]["model"],
|
|
462
512
|
prompt: resolvedPrompt,
|
|
463
513
|
aspectRatio: frame.aspectRatio as `${number}:${number}` | undefined,
|
|
464
514
|
n: 1,
|
|
465
|
-
|
|
515
|
+
cacheKey,
|
|
516
|
+
} as Parameters<typeof generateImage>[0]);
|
|
466
517
|
|
|
467
518
|
const firstImage = images[0];
|
|
468
519
|
if (!firstImage?.uint8Array) {
|
|
@@ -171,6 +171,159 @@ const character = Image({ prompt: "blue robot" });
|
|
|
171
171
|
- \`1:1\` - Instagram (square)
|
|
172
172
|
`;
|
|
173
173
|
|
|
174
|
+
const GUIDE_MD = `## vargai - AI Video Generation
|
|
175
|
+
|
|
176
|
+
### Setup
|
|
177
|
+
|
|
178
|
+
\`\`\`bash
|
|
179
|
+
bunx vargai init
|
|
180
|
+
bun install vargai ai
|
|
181
|
+
\`\`\`
|
|
182
|
+
|
|
183
|
+
Required: \`FAL_KEY\` in \`.env\`
|
|
184
|
+
Optional: \`ELEVENLABS_API_KEY\` (voice/music), \`REPLICATE_API_TOKEN\` (lipsync), \`GROQ_API_KEY\` (transcription)
|
|
185
|
+
|
|
186
|
+
### Render videos
|
|
187
|
+
|
|
188
|
+
\`\`\`bash
|
|
189
|
+
bunx vargai render video.tsx
|
|
190
|
+
\`\`\`
|
|
191
|
+
|
|
192
|
+
### Basic structure
|
|
193
|
+
|
|
194
|
+
Every file needs the JSX pragma and exports a default \`<Render>\`:
|
|
195
|
+
|
|
196
|
+
\`\`\`tsx
|
|
197
|
+
/** @jsxImportSource vargai */
|
|
198
|
+
import { Render, Clip, Image, Video, Speech, Captions, Music } from "vargai/react";
|
|
199
|
+
import { fal, elevenlabs } from "vargai/ai";
|
|
200
|
+
|
|
201
|
+
export default (
|
|
202
|
+
<Render width={1080} height={1920}>
|
|
203
|
+
<Clip duration={5}>
|
|
204
|
+
<Image prompt="..." model={fal.imageModel("flux-pro")} aspectRatio="9:16" />
|
|
205
|
+
</Clip>
|
|
206
|
+
</Render>
|
|
207
|
+
);
|
|
208
|
+
\`\`\`
|
|
209
|
+
|
|
210
|
+
### Quirks and gotchas
|
|
211
|
+
|
|
212
|
+
1. **Reusable character pattern** - Export character as named export so it can be imported by other files, then use \`<Image src={character} />\` in the Clip:
|
|
213
|
+
\`\`\`tsx
|
|
214
|
+
/** @jsxImportSource vargai */
|
|
215
|
+
import { Captions, Clip, Image, Render, Speech } from "vargai/react";
|
|
216
|
+
import { elevenlabs, fal } from "vargai/ai";
|
|
217
|
+
|
|
218
|
+
export const character = Image({
|
|
219
|
+
prompt: "character description",
|
|
220
|
+
model: fal.imageModel("nano-banana-pro"),
|
|
221
|
+
aspectRatio: "9:16",
|
|
222
|
+
});
|
|
223
|
+
|
|
224
|
+
export default (
|
|
225
|
+
<Render width={1080} height={1920}>
|
|
226
|
+
<Clip duration={21}>
|
|
227
|
+
<Image src={character} />
|
|
228
|
+
</Clip>
|
|
229
|
+
<Captions src={voiceover} style="tiktok" color="#ffffff" activeColor="#FFD700" />
|
|
230
|
+
</Render>
|
|
231
|
+
);
|
|
232
|
+
\`\`\`
|
|
233
|
+
This file can be both rendered directly (\`bunx vargai render file.tsx\`) and imported by other files (\`import { character } from "./file.tsx"\`).
|
|
234
|
+
|
|
235
|
+
2. **Captions include audio** - \`<Captions src={voiceover} />\` already plays the audio. No need for separate \`<Speech>\` in the clip.
|
|
236
|
+
|
|
237
|
+
3. **Clip duration** - Omit \`duration\` to auto-fit content. Set explicit \`duration={N}\` to lock length. If duration is shorter than content, you get black screen while audio continues.
|
|
238
|
+
|
|
239
|
+
4. **Model names must be exact**:
|
|
240
|
+
- Images: \`flux-pro\`, \`nano-banana-pro\`, \`nano-banana-pro/edit\`
|
|
241
|
+
- Videos: \`kling-v2.5\`, \`wan-2.5\`
|
|
242
|
+
- Lipsync: \`sync-v2-pro\` (NOT \`sync-lipsync\`)
|
|
243
|
+
- Speech: \`eleven_multilingual_v2\`
|
|
244
|
+
|
|
245
|
+
5. **Speech function syntax**:
|
|
246
|
+
\`\`\`tsx
|
|
247
|
+
// Correct - use for voiceover/captions source
|
|
248
|
+
const voiceover = Speech({
|
|
249
|
+
model: elevenlabs.speechModel("eleven_multilingual_v2"),
|
|
250
|
+
voice: "21m00Tcm4TlvDq8ikWAM",
|
|
251
|
+
children: "Text to speak",
|
|
252
|
+
});
|
|
253
|
+
|
|
254
|
+
// Inside Clip - use JSX with children
|
|
255
|
+
<Speech voice="21m00Tcm4TlvDq8ikWAM" model={elevenlabs.speechModel("eleven_multilingual_v2")}>
|
|
256
|
+
Text to speak
|
|
257
|
+
</Speech>
|
|
258
|
+
\`\`\`
|
|
259
|
+
|
|
260
|
+
6. **Image-to-image editing** - Use \`nano-banana-pro/edit\` with prompt object:
|
|
261
|
+
\`\`\`tsx
|
|
262
|
+
const edited = Image({
|
|
263
|
+
prompt: {
|
|
264
|
+
text: "new description",
|
|
265
|
+
images: [baseImage],
|
|
266
|
+
},
|
|
267
|
+
model: fal.imageModel("nano-banana-pro/edit"),
|
|
268
|
+
});
|
|
269
|
+
\`\`\`
|
|
270
|
+
|
|
271
|
+
7. **Lipsync videos** - Pass video and audio to sync:
|
|
272
|
+
\`\`\`tsx
|
|
273
|
+
<Video
|
|
274
|
+
prompt={{
|
|
275
|
+
video: generatedVideo,
|
|
276
|
+
audio: voiceover,
|
|
277
|
+
}}
|
|
278
|
+
model={fal.videoModel("sync-v2-pro")}
|
|
279
|
+
/>
|
|
280
|
+
\`\`\`
|
|
281
|
+
|
|
282
|
+
8. **Caching** - Same prompts/params hit cache automatically. Regenerate by changing prompts.
|
|
283
|
+
|
|
284
|
+
### Simple template (still image + voiceover + captions)
|
|
285
|
+
|
|
286
|
+
\`\`\`tsx
|
|
287
|
+
/** @jsxImportSource vargai */
|
|
288
|
+
import { Captions, Clip, Image, Render, Speech } from "vargai/react";
|
|
289
|
+
import { elevenlabs, fal } from "vargai/ai";
|
|
290
|
+
|
|
291
|
+
const SCRIPT = \\\`Your script here.\\\`;
|
|
292
|
+
|
|
293
|
+
const voiceover = Speech({
|
|
294
|
+
model: elevenlabs.speechModel("eleven_multilingual_v2"),
|
|
295
|
+
voice: "21m00Tcm4TlvDq8ikWAM",
|
|
296
|
+
children: SCRIPT,
|
|
297
|
+
});
|
|
298
|
+
|
|
299
|
+
export default (
|
|
300
|
+
<Render width={1080} height={1920}>
|
|
301
|
+
<Clip duration={21}>
|
|
302
|
+
<Image
|
|
303
|
+
prompt="character description"
|
|
304
|
+
model={fal.imageModel("nano-banana-pro")}
|
|
305
|
+
aspectRatio="9:16"
|
|
306
|
+
/>
|
|
307
|
+
</Clip>
|
|
308
|
+
<Captions src={voiceover} style="tiktok" color="#ffffff" activeColor="#FFD700" />
|
|
309
|
+
</Render>
|
|
310
|
+
);
|
|
311
|
+
\`\`\`
|
|
312
|
+
|
|
313
|
+
### Aspect ratios
|
|
314
|
+
|
|
315
|
+
- \`9:16\` - TikTok, Reels, Shorts (vertical)
|
|
316
|
+
- \`16:9\` - YouTube (horizontal)
|
|
317
|
+
- \`1:1\` - Instagram (square)
|
|
318
|
+
|
|
319
|
+
### Workflow
|
|
320
|
+
|
|
321
|
+
1. **Commit every change** - After each successful render or code change, commit to preserve progress.
|
|
322
|
+
2. **Test incrementally** - Start simple (still image + audio), add complexity (video, lipsync) step by step.
|
|
323
|
+
3. **Check duration** - Use \`ffprobe -v error -show_entries format=duration -of csv=p=0 output/file.mp4\` to verify video length.
|
|
324
|
+
4. **Open to preview** - Use \`open output/file.mp4\` to view rendered videos.
|
|
325
|
+
`;
|
|
326
|
+
|
|
174
327
|
const ENV_TEMPLATE = `# Varg AI Video Generation - API Keys
|
|
175
328
|
|
|
176
329
|
# REQUIRED - Fal.ai (image & video generation)
|
|
@@ -332,6 +485,10 @@ Get your free API key at: ${COLORS.cyan}https://fal.ai/dashboard/keys${COLORS.re
|
|
|
332
485
|
writeFileSync(skillPath, SKILL_MD);
|
|
333
486
|
log.success("Installed SKILL.md (Agent Skills format)");
|
|
334
487
|
|
|
488
|
+
const guidePath = join(skillsDir, "GUIDE.md");
|
|
489
|
+
writeFileSync(guidePath, GUIDE_MD);
|
|
490
|
+
log.success("Installed GUIDE.md (usage guide)");
|
|
491
|
+
|
|
335
492
|
const rulesDir = join(cwd, ".claude/rules");
|
|
336
493
|
const rulePath = join(rulesDir, "video-generation.md");
|
|
337
494
|
|
|
@@ -388,6 +545,7 @@ ${COLORS.green}${COLORS.bold}Setup complete!${COLORS.reset}
|
|
|
388
545
|
${COLORS.bold}What was installed:${COLORS.reset}
|
|
389
546
|
${COLORS.dim}├─${COLORS.reset} hello.tsx ${COLORS.dim}(starter video)${COLORS.reset}
|
|
390
547
|
${COLORS.dim}├─${COLORS.reset} .claude/skills/varg-video-generation/SKILL.md ${COLORS.dim}(Agent Skills)${COLORS.reset}
|
|
548
|
+
${COLORS.dim}├─${COLORS.reset} .claude/skills/varg-video-generation/GUIDE.md ${COLORS.dim}(usage guide)${COLORS.reset}
|
|
391
549
|
${COLORS.dim}├─${COLORS.reset} output/ ${COLORS.dim}(video output folder)${COLORS.reset}
|
|
392
550
|
${COLORS.dim}└─${COLORS.reset} .cache/ai/ ${COLORS.dim}(generation cache)${COLORS.reset}
|
|
393
551
|
|
|
@@ -120,6 +120,11 @@ const sharedArgs = {
|
|
|
120
120
|
description: "show ffmpeg commands",
|
|
121
121
|
default: false,
|
|
122
122
|
},
|
|
123
|
+
open: {
|
|
124
|
+
type: "boolean" as const,
|
|
125
|
+
description: "open video after generation",
|
|
126
|
+
default: false,
|
|
127
|
+
},
|
|
123
128
|
};
|
|
124
129
|
|
|
125
130
|
async function runRender(
|
|
@@ -167,6 +172,11 @@ async function runRender(
|
|
|
167
172
|
if (!args.quiet) {
|
|
168
173
|
console.log(`done! ${buffer.byteLength} bytes → ${outputPath}`);
|
|
169
174
|
}
|
|
175
|
+
|
|
176
|
+
if (args.open) {
|
|
177
|
+
const { $ } = await import("bun");
|
|
178
|
+
await $`open ${outputPath}`.quiet();
|
|
179
|
+
}
|
|
170
180
|
}
|
|
171
181
|
|
|
172
182
|
export const renderCmd = defineCommand({
|
|
@@ -242,6 +252,10 @@ function RenderHelpView() {
|
|
|
242
252
|
<VargText variant="accent">-v, --verbose </VargText>show ffmpeg
|
|
243
253
|
commands
|
|
244
254
|
</Text>
|
|
255
|
+
<Text>
|
|
256
|
+
<VargText variant="accent">--open </VargText>open video after
|
|
257
|
+
generation
|
|
258
|
+
</Text>
|
|
245
259
|
</Box>
|
|
246
260
|
|
|
247
261
|
<Header>COMPONENTS</Header>
|
|
@@ -303,6 +317,10 @@ function PreviewHelpView() {
|
|
|
303
317
|
<VargText variant="accent">-v, --verbose </VargText>show ffmpeg
|
|
304
318
|
commands
|
|
305
319
|
</Text>
|
|
320
|
+
<Text>
|
|
321
|
+
<VargText variant="accent">--open </VargText>open video after
|
|
322
|
+
generation
|
|
323
|
+
</Text>
|
|
306
324
|
</Box>
|
|
307
325
|
|
|
308
326
|
<Header>EXAMPLES</Header>
|