vargai 0.4.0-alpha26 → 0.4.0-alpha27

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -65,7 +65,7 @@
65
65
  "vargai": "^0.4.0-alpha11",
66
66
  "zod": "^4.2.1"
67
67
  },
68
- "version": "0.4.0-alpha26",
68
+ "version": "0.4.0-alpha27",
69
69
  "exports": {
70
70
  ".": "./src/index.ts",
71
71
  "./ai": "./src/ai-sdk/index.ts",
@@ -6,9 +6,9 @@ import { generateImage, wrapImageModel } from "ai";
6
6
  import { defineCommand } from "citty";
7
7
  import { Box, Text } from "ink";
8
8
  import { withCache } from "../../ai-sdk/cache";
9
- import { File } from "../../ai-sdk/file";
10
9
  import { fileCache } from "../../ai-sdk/file-cache";
11
10
  import { imagePlaceholderFallbackMiddleware } from "../../ai-sdk/middleware";
11
+ import { computeCacheKey } from "../../react/renderers/utils";
12
12
  import type {
13
13
  ClipProps,
14
14
  ImageInput,
@@ -16,7 +16,6 @@ import type {
16
16
  ImageProps,
17
17
  RenderProps,
18
18
  VargElement,
19
- VargNode,
20
19
  VideoProps,
21
20
  } from "../../react/types";
22
21
  import { Header, HelpBlock, VargBox, VargText } from "../ui/index.ts";
@@ -35,6 +34,7 @@ interface FrameInfo {
35
34
  aspectRatio?: string;
36
35
  duration: number;
37
36
  startTime: number;
37
+ imageElement?: VargElement<"image">;
38
38
  }
39
39
 
40
40
  async function loadComponent(filePath: string): Promise<VargElement> {
@@ -101,7 +101,15 @@ function toFileUrl(pathOrUrl: string): string {
101
101
  return `file://${resolved}`;
102
102
  }
103
103
 
104
- async function resolveImageInput(input: ImageInput): Promise<Uint8Array> {
104
+ interface ImageGeneratorContext {
105
+ generateImage: typeof generateImage;
106
+ defaultModel?: unknown;
107
+ }
108
+
109
+ async function resolveImageInput(
110
+ input: ImageInput,
111
+ ctx: ImageGeneratorContext,
112
+ ): Promise<Uint8Array> {
105
113
  if (input instanceof Uint8Array) {
106
114
  return input;
107
115
  }
@@ -123,7 +131,34 @@ async function resolveImageInput(input: ImageInput): Promise<Uint8Array> {
123
131
  return new Uint8Array(await response.arrayBuffer());
124
132
  }
125
133
 
126
- return null as unknown as Uint8Array;
134
+ if (props.prompt) {
135
+ const model = props.model ?? ctx.defaultModel;
136
+ if (!model) {
137
+ throw new Error("Nested image requires model");
138
+ }
139
+
140
+ const resolvedPrompt = await resolvePromptForGeneration(
141
+ props.prompt,
142
+ ctx,
143
+ );
144
+ const cacheKey = computeCacheKey(imageElement);
145
+
146
+ const { images } = await ctx.generateImage({
147
+ model: model as Parameters<typeof generateImage>[0]["model"],
148
+ prompt: resolvedPrompt,
149
+ aspectRatio: props.aspectRatio as `${number}:${number}` | undefined,
150
+ n: 1,
151
+ cacheKey,
152
+ } as Parameters<typeof generateImage>[0]);
153
+
154
+ const firstImage = images[0];
155
+ if (!firstImage?.uint8Array) {
156
+ throw new Error("Nested image generation returned no data");
157
+ }
158
+ return firstImage.uint8Array;
159
+ }
160
+
161
+ throw new Error("Image element requires prompt or src");
127
162
  }
128
163
  throw new Error("Unknown image input type");
129
164
  }
@@ -202,6 +237,7 @@ function extractFrames(element: VargElement): FrameInfo[] {
202
237
  aspectRatio: props.aspectRatio,
203
238
  duration,
204
239
  startTime: currentTime,
240
+ imageElement: clipChildElement as VargElement<"image">,
205
241
  });
206
242
  break;
207
243
  }
@@ -222,6 +258,7 @@ function extractFrames(element: VargElement): FrameInfo[] {
222
258
  aspectRatio: imageProps.aspectRatio ?? videoProps.aspectRatio,
223
259
  duration,
224
260
  startTime: currentTime,
261
+ imageElement: nestedImage,
225
262
  });
226
263
  } else if (prompt) {
227
264
  frames.push({
@@ -268,6 +305,7 @@ async function detectDefaultImageModel() {
268
305
 
269
306
  async function resolvePromptForGeneration(
270
307
  prompt: ImagePrompt,
308
+ ctx: ImageGeneratorContext,
271
309
  ): Promise<string | { text?: string; images: Uint8Array[] }> {
272
310
  if (typeof prompt === "string") {
273
311
  return prompt;
@@ -275,7 +313,7 @@ async function resolvePromptForGeneration(
275
313
 
276
314
  const resolvedImages: Uint8Array[] = [];
277
315
  for (const img of prompt.images) {
278
- const resolved = await resolveImageInput(img);
316
+ const resolved = await resolveImageInput(img, ctx);
279
317
  if (resolved) {
280
318
  resolvedImages.push(resolved);
281
319
  }
@@ -455,14 +493,27 @@ export const frameCmd = defineCommand({
455
493
  process.exit(1);
456
494
  }
457
495
 
458
- const resolvedPrompt = await resolvePromptForGeneration(frame.prompt);
496
+ const generatorCtx: ImageGeneratorContext = {
497
+ generateImage: wrapGenerateImage,
498
+ defaultModel,
499
+ };
500
+
501
+ const resolvedPrompt = await resolvePromptForGeneration(
502
+ frame.prompt,
503
+ generatorCtx,
504
+ );
505
+
506
+ const cacheKey = frame.imageElement
507
+ ? computeCacheKey(frame.imageElement)
508
+ : undefined;
459
509
 
460
510
  const { images } = await wrapGenerateImage({
461
511
  model: model as Parameters<typeof generateImage>[0]["model"],
462
512
  prompt: resolvedPrompt,
463
513
  aspectRatio: frame.aspectRatio as `${number}:${number}` | undefined,
464
514
  n: 1,
465
- });
515
+ cacheKey,
516
+ } as Parameters<typeof generateImage>[0]);
466
517
 
467
518
  const firstImage = images[0];
468
519
  if (!firstImage?.uint8Array) {
@@ -171,6 +171,159 @@ const character = Image({ prompt: "blue robot" });
171
171
  - \`1:1\` - Instagram (square)
172
172
  `;
173
173
 
174
+ const GUIDE_MD = `## vargai - AI Video Generation
175
+
176
+ ### Setup
177
+
178
+ \`\`\`bash
179
+ bunx vargai init
180
+ bun install vargai ai
181
+ \`\`\`
182
+
183
+ Required: \`FAL_KEY\` in \`.env\`
184
+ Optional: \`ELEVENLABS_API_KEY\` (voice/music), \`REPLICATE_API_TOKEN\` (lipsync), \`GROQ_API_KEY\` (transcription)
185
+
186
+ ### Render videos
187
+
188
+ \`\`\`bash
189
+ bunx vargai render video.tsx
190
+ \`\`\`
191
+
192
+ ### Basic structure
193
+
194
+ Every file needs the JSX pragma and exports a default \`<Render>\`:
195
+
196
+ \`\`\`tsx
197
+ /** @jsxImportSource vargai */
198
+ import { Render, Clip, Image, Video, Speech, Captions, Music } from "vargai/react";
199
+ import { fal, elevenlabs } from "vargai/ai";
200
+
201
+ export default (
202
+ <Render width={1080} height={1920}>
203
+ <Clip duration={5}>
204
+ <Image prompt="..." model={fal.imageModel("flux-pro")} aspectRatio="9:16" />
205
+ </Clip>
206
+ </Render>
207
+ );
208
+ \`\`\`
209
+
210
+ ### Quirks and gotchas
211
+
212
+ 1. **Reusable character pattern** - Export character as named export so it can be imported by other files, then use \`<Image src={character} />\` in the Clip:
213
+ \`\`\`tsx
214
+ /** @jsxImportSource vargai */
215
+ import { Captions, Clip, Image, Render, Speech } from "vargai/react";
216
+ import { elevenlabs, fal } from "vargai/ai";
217
+
218
+ export const character = Image({
219
+ prompt: "character description",
220
+ model: fal.imageModel("nano-banana-pro"),
221
+ aspectRatio: "9:16",
222
+ });
223
+
224
+ export default (
225
+ <Render width={1080} height={1920}>
226
+ <Clip duration={21}>
227
+ <Image src={character} />
228
+ </Clip>
229
+ <Captions src={voiceover} style="tiktok" color="#ffffff" activeColor="#FFD700" />
230
+ </Render>
231
+ );
232
+ \`\`\`
233
+ This file can be both rendered directly (\`bunx vargai render file.tsx\`) and imported by other files (\`import { character } from "./file.tsx"\`).
234
+
235
+ 2. **Captions include audio** - \`<Captions src={voiceover} />\` already plays the audio. No need for separate \`<Speech>\` in the clip.
236
+
237
+ 3. **Clip duration** - Omit \`duration\` to auto-fit content. Set explicit \`duration={N}\` to lock length. If duration is shorter than content, you get black screen while audio continues.
238
+
239
+ 4. **Model names must be exact**:
240
+ - Images: \`flux-pro\`, \`nano-banana-pro\`, \`nano-banana-pro/edit\`
241
+ - Videos: \`kling-v2.5\`, \`wan-2.5\`
242
+ - Lipsync: \`sync-v2-pro\` (NOT \`sync-lipsync\`)
243
+ - Speech: \`eleven_multilingual_v2\`
244
+
245
+ 5. **Speech function syntax**:
246
+ \`\`\`tsx
247
+ // Correct - use for voiceover/captions source
248
+ const voiceover = Speech({
249
+ model: elevenlabs.speechModel("eleven_multilingual_v2"),
250
+ voice: "21m00Tcm4TlvDq8ikWAM",
251
+ children: "Text to speak",
252
+ });
253
+
254
+ // Inside Clip - use JSX with children
255
+ <Speech voice="21m00Tcm4TlvDq8ikWAM" model={elevenlabs.speechModel("eleven_multilingual_v2")}>
256
+ Text to speak
257
+ </Speech>
258
+ \`\`\`
259
+
260
+ 6. **Image-to-image editing** - Use \`nano-banana-pro/edit\` with prompt object:
261
+ \`\`\`tsx
262
+ const edited = Image({
263
+ prompt: {
264
+ text: "new description",
265
+ images: [baseImage],
266
+ },
267
+ model: fal.imageModel("nano-banana-pro/edit"),
268
+ });
269
+ \`\`\`
270
+
271
+ 7. **Lipsync videos** - Pass video and audio to sync:
272
+ \`\`\`tsx
273
+ <Video
274
+ prompt={{
275
+ video: generatedVideo,
276
+ audio: voiceover,
277
+ }}
278
+ model={fal.videoModel("sync-v2-pro")}
279
+ />
280
+ \`\`\`
281
+
282
+ 8. **Caching** - Same prompts/params hit cache automatically. Regenerate by changing prompts.
283
+
284
+ ### Simple template (still image + voiceover + captions)
285
+
286
+ \`\`\`tsx
287
+ /** @jsxImportSource vargai */
288
+ import { Captions, Clip, Image, Render, Speech } from "vargai/react";
289
+ import { elevenlabs, fal } from "vargai/ai";
290
+
291
+ const SCRIPT = \\\`Your script here.\\\`;
292
+
293
+ const voiceover = Speech({
294
+ model: elevenlabs.speechModel("eleven_multilingual_v2"),
295
+ voice: "21m00Tcm4TlvDq8ikWAM",
296
+ children: SCRIPT,
297
+ });
298
+
299
+ export default (
300
+ <Render width={1080} height={1920}>
301
+ <Clip duration={21}>
302
+ <Image
303
+ prompt="character description"
304
+ model={fal.imageModel("nano-banana-pro")}
305
+ aspectRatio="9:16"
306
+ />
307
+ </Clip>
308
+ <Captions src={voiceover} style="tiktok" color="#ffffff" activeColor="#FFD700" />
309
+ </Render>
310
+ );
311
+ \`\`\`
312
+
313
+ ### Aspect ratios
314
+
315
+ - \`9:16\` - TikTok, Reels, Shorts (vertical)
316
+ - \`16:9\` - YouTube (horizontal)
317
+ - \`1:1\` - Instagram (square)
318
+
319
+ ### Workflow
320
+
321
+ 1. **Commit every change** - After each successful render or code change, commit to preserve progress.
322
+ 2. **Test incrementally** - Start simple (still image + audio), add complexity (video, lipsync) step by step.
323
+ 3. **Check duration** - Use \`ffprobe -v error -show_entries format=duration -of csv=p=0 output/file.mp4\` to verify video length.
324
+ 4. **Open to preview** - Use \`open output/file.mp4\` to view rendered videos.
325
+ `;
326
+
174
327
  const ENV_TEMPLATE = `# Varg AI Video Generation - API Keys
175
328
 
176
329
  # REQUIRED - Fal.ai (image & video generation)
@@ -332,6 +485,10 @@ Get your free API key at: ${COLORS.cyan}https://fal.ai/dashboard/keys${COLORS.re
332
485
  writeFileSync(skillPath, SKILL_MD);
333
486
  log.success("Installed SKILL.md (Agent Skills format)");
334
487
 
488
+ const guidePath = join(skillsDir, "GUIDE.md");
489
+ writeFileSync(guidePath, GUIDE_MD);
490
+ log.success("Installed GUIDE.md (usage guide)");
491
+
335
492
  const rulesDir = join(cwd, ".claude/rules");
336
493
  const rulePath = join(rulesDir, "video-generation.md");
337
494
 
@@ -388,6 +545,7 @@ ${COLORS.green}${COLORS.bold}Setup complete!${COLORS.reset}
388
545
  ${COLORS.bold}What was installed:${COLORS.reset}
389
546
  ${COLORS.dim}├─${COLORS.reset} hello.tsx ${COLORS.dim}(starter video)${COLORS.reset}
390
547
  ${COLORS.dim}├─${COLORS.reset} .claude/skills/varg-video-generation/SKILL.md ${COLORS.dim}(Agent Skills)${COLORS.reset}
548
+ ${COLORS.dim}├─${COLORS.reset} .claude/skills/varg-video-generation/GUIDE.md ${COLORS.dim}(usage guide)${COLORS.reset}
391
549
  ${COLORS.dim}├─${COLORS.reset} output/ ${COLORS.dim}(video output folder)${COLORS.reset}
392
550
  ${COLORS.dim}└─${COLORS.reset} .cache/ai/ ${COLORS.dim}(generation cache)${COLORS.reset}
393
551
 
@@ -120,6 +120,11 @@ const sharedArgs = {
120
120
  description: "show ffmpeg commands",
121
121
  default: false,
122
122
  },
123
+ open: {
124
+ type: "boolean" as const,
125
+ description: "open video after generation",
126
+ default: false,
127
+ },
123
128
  };
124
129
 
125
130
  async function runRender(
@@ -167,6 +172,11 @@ async function runRender(
167
172
  if (!args.quiet) {
168
173
  console.log(`done! ${buffer.byteLength} bytes → ${outputPath}`);
169
174
  }
175
+
176
+ if (args.open) {
177
+ const { $ } = await import("bun");
178
+ await $`open ${outputPath}`.quiet();
179
+ }
170
180
  }
171
181
 
172
182
  export const renderCmd = defineCommand({
@@ -242,6 +252,10 @@ function RenderHelpView() {
242
252
  <VargText variant="accent">-v, --verbose </VargText>show ffmpeg
243
253
  commands
244
254
  </Text>
255
+ <Text>
256
+ <VargText variant="accent">--open </VargText>open video after
257
+ generation
258
+ </Text>
245
259
  </Box>
246
260
 
247
261
  <Header>COMPONENTS</Header>
@@ -303,6 +317,10 @@ function PreviewHelpView() {
303
317
  <VargText variant="accent">-v, --verbose </VargText>show ffmpeg
304
318
  commands
305
319
  </Text>
320
+ <Text>
321
+ <VargText variant="accent">--open </VargText>open video after
322
+ generation
323
+ </Text>
306
324
  </Box>
307
325
 
308
326
  <Header>EXAMPLES</Header>