npm - vargai - Versions diffs - 0.4.0-alpha26 → 0.4.0-alpha27 - Mend

vargai 0.4.0-alpha26 → 0.4.0-alpha27

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/package.json +1 -1
package/src/cli/commands/frame.tsx +58 -7
package/src/cli/commands/init.tsx +158 -0
package/src/cli/commands/render.tsx +18 -0
package/src/cli/commands/storyboard.tsx +929 -259
package/src/studio/stages.ts +40 -19

package/package.json CHANGED Viewed

@@ -65,7 +65,7 @@
     "vargai": "^0.4.0-alpha11",
     "zod": "^4.2.1"
   },
-  "version": "0.4.0-alpha26",
+  "version": "0.4.0-alpha27",
   "exports": {
     ".": "./src/index.ts",
     "./ai": "./src/ai-sdk/index.ts",

package/src/cli/commands/frame.tsx CHANGED Viewed

@@ -6,9 +6,9 @@ import { generateImage, wrapImageModel } from "ai";
 import { defineCommand } from "citty";
 import { Box, Text } from "ink";
 import { withCache } from "../../ai-sdk/cache";
-import { File } from "../../ai-sdk/file";
 import { fileCache } from "../../ai-sdk/file-cache";
 import { imagePlaceholderFallbackMiddleware } from "../../ai-sdk/middleware";
+import { computeCacheKey } from "../../react/renderers/utils";
 import type {
   ClipProps,
   ImageInput,
@@ -16,7 +16,6 @@ import type {
   ImageProps,
   RenderProps,
   VargElement,
-  VargNode,
   VideoProps,
 } from "../../react/types";
 import { Header, HelpBlock, VargBox, VargText } from "../ui/index.ts";
@@ -35,6 +34,7 @@ interface FrameInfo {
   aspectRatio?: string;
   duration: number;
   startTime: number;
+  imageElement?: VargElement<"image">;
 }
 async function loadComponent(filePath: string): Promise<VargElement> {
@@ -101,7 +101,15 @@ function toFileUrl(pathOrUrl: string): string {
   return `file://${resolved}`;
 }
-async function resolveImageInput(input: ImageInput): Promise<Uint8Array> {
+interface ImageGeneratorContext {
+  generateImage: typeof generateImage;
+  defaultModel?: unknown;
+}
+async function resolveImageInput(
+  input: ImageInput,
+  ctx: ImageGeneratorContext,
+): Promise<Uint8Array> {
   if (input instanceof Uint8Array) {
     return input;
   }
@@ -123,7 +131,34 @@ async function resolveImageInput(input: ImageInput): Promise<Uint8Array> {
       return new Uint8Array(await response.arrayBuffer());
     }
-    return null as unknown as Uint8Array;
+    if (props.prompt) {
+      const model = props.model ?? ctx.defaultModel;
+      if (!model) {
+        throw new Error("Nested image requires model");
+      }
+      const resolvedPrompt = await resolvePromptForGeneration(
+        props.prompt,
+        ctx,
+      );
+      const cacheKey = computeCacheKey(imageElement);
+      const { images } = await ctx.generateImage({
+        model: model as Parameters<typeof generateImage>[0]["model"],
+        prompt: resolvedPrompt,
+        aspectRatio: props.aspectRatio as `${number}:${number}` | undefined,
+        n: 1,
+        cacheKey,
+      } as Parameters<typeof generateImage>[0]);
+      const firstImage = images[0];
+      if (!firstImage?.uint8Array) {
+        throw new Error("Nested image generation returned no data");
+      }
+      return firstImage.uint8Array;
+    }
+    throw new Error("Image element requires prompt or src");
   }
   throw new Error("Unknown image input type");
 }
@@ -202,6 +237,7 @@ function extractFrames(element: VargElement): FrameInfo[] {
             aspectRatio: props.aspectRatio,
             duration,
             startTime: currentTime,
+            imageElement: clipChildElement as VargElement<"image">,
           });
           break;
         }
@@ -222,6 +258,7 @@ function extractFrames(element: VargElement): FrameInfo[] {
               aspectRatio: imageProps.aspectRatio ?? videoProps.aspectRatio,
               duration,
               startTime: currentTime,
+              imageElement: nestedImage,
             });
           } else if (prompt) {
             frames.push({
@@ -268,6 +305,7 @@ async function detectDefaultImageModel() {
 async function resolvePromptForGeneration(
   prompt: ImagePrompt,
+  ctx: ImageGeneratorContext,
 ): Promise<string | { text?: string; images: Uint8Array[] }> {
   if (typeof prompt === "string") {
     return prompt;
@@ -275,7 +313,7 @@ async function resolvePromptForGeneration(
   const resolvedImages: Uint8Array[] = [];
   for (const img of prompt.images) {
-    const resolved = await resolveImageInput(img);
+    const resolved = await resolveImageInput(img, ctx);
     if (resolved) {
       resolvedImages.push(resolved);
     }
@@ -455,14 +493,27 @@ export const frameCmd = defineCommand({
         process.exit(1);
       }
-      const resolvedPrompt = await resolvePromptForGeneration(frame.prompt);
+      const generatorCtx: ImageGeneratorContext = {
+        generateImage: wrapGenerateImage,
+        defaultModel,
+      };
+      const resolvedPrompt = await resolvePromptForGeneration(
+        frame.prompt,
+        generatorCtx,
+      );
+      const cacheKey = frame.imageElement
+        ? computeCacheKey(frame.imageElement)
+        : undefined;
       const { images } = await wrapGenerateImage({
         model: model as Parameters<typeof generateImage>[0]["model"],
         prompt: resolvedPrompt,
         aspectRatio: frame.aspectRatio as `${number}:${number}` | undefined,
         n: 1,
-      });
+        cacheKey,
+      } as Parameters<typeof generateImage>[0]);
       const firstImage = images[0];
       if (!firstImage?.uint8Array) {

package/src/cli/commands/init.tsx CHANGED Viewed

@@ -171,6 +171,159 @@ const character = Image({ prompt: "blue robot" });
 - \`1:1\` - Instagram (square)
 `;
+const GUIDE_MD = `## vargai - AI Video Generation
+### Setup
+\`\`\`bash
+bunx vargai init
+bun install vargai ai
+\`\`\`
+Required: \`FAL_KEY\` in \`.env\`
+Optional: \`ELEVENLABS_API_KEY\` (voice/music), \`REPLICATE_API_TOKEN\` (lipsync), \`GROQ_API_KEY\` (transcription)
+### Render videos
+\`\`\`bash
+bunx vargai render video.tsx
+\`\`\`
+### Basic structure
+Every file needs the JSX pragma and exports a default \`<Render>\`:
+\`\`\`tsx
+/** @jsxImportSource vargai */
+import { Render, Clip, Image, Video, Speech, Captions, Music } from "vargai/react";
+import { fal, elevenlabs } from "vargai/ai";
+export default (
+  <Render width={1080} height={1920}>
+    <Clip duration={5}>
+      <Image prompt="..." model={fal.imageModel("flux-pro")} aspectRatio="9:16" />
+    </Clip>
+  </Render>
+);
+\`\`\`
+### Quirks and gotchas
+1. **Reusable character pattern** - Export character as named export so it can be imported by other files, then use \`<Image src={character} />\` in the Clip:
+   \`\`\`tsx
+   /** @jsxImportSource vargai */
+   import { Captions, Clip, Image, Render, Speech } from "vargai/react";
+   import { elevenlabs, fal } from "vargai/ai";
+   export const character = Image({
+     prompt: "character description",
+     model: fal.imageModel("nano-banana-pro"),
+     aspectRatio: "9:16",
+   });
+   export default (
+     <Render width={1080} height={1920}>
+       <Clip duration={21}>
+         <Image src={character} />
+       </Clip>
+       <Captions src={voiceover} style="tiktok" color="#ffffff" activeColor="#FFD700" />
+     </Render>
+   );
+   \`\`\`
+   This file can be both rendered directly (\`bunx vargai render file.tsx\`) and imported by other files (\`import { character } from "./file.tsx"\`).
+2. **Captions include audio** - \`<Captions src={voiceover} />\` already plays the audio. No need for separate \`<Speech>\` in the clip.
+3. **Clip duration** - Omit \`duration\` to auto-fit content. Set explicit \`duration={N}\` to lock length. If duration is shorter than content, you get black screen while audio continues.
+4. **Model names must be exact**:
+   - Images: \`flux-pro\`, \`nano-banana-pro\`, \`nano-banana-pro/edit\`
+   - Videos: \`kling-v2.5\`, \`wan-2.5\`
+   - Lipsync: \`sync-v2-pro\` (NOT \`sync-lipsync\`)
+   - Speech: \`eleven_multilingual_v2\`
+5. **Speech function syntax**:
+   \`\`\`tsx
+   // Correct - use for voiceover/captions source
+   const voiceover = Speech({
+     model: elevenlabs.speechModel("eleven_multilingual_v2"),
+     voice: "21m00Tcm4TlvDq8ikWAM",
+     children: "Text to speak",
+   });
+   // Inside Clip - use JSX with children
+   <Speech voice="21m00Tcm4TlvDq8ikWAM" model={elevenlabs.speechModel("eleven_multilingual_v2")}>
+     Text to speak
+   </Speech>
+   \`\`\`
+6. **Image-to-image editing** - Use \`nano-banana-pro/edit\` with prompt object:
+   \`\`\`tsx
+   const edited = Image({
+     prompt: {
+       text: "new description",
+       images: [baseImage],
+     },
+     model: fal.imageModel("nano-banana-pro/edit"),
+   });
+   \`\`\`
+7. **Lipsync videos** - Pass video and audio to sync:
+   \`\`\`tsx
+   <Video
+     prompt={{
+       video: generatedVideo,
+       audio: voiceover,
+     }}
+     model={fal.videoModel("sync-v2-pro")}
+   />
+   \`\`\`
+8. **Caching** - Same prompts/params hit cache automatically. Regenerate by changing prompts.
+### Simple template (still image + voiceover + captions)
+\`\`\`tsx
+/** @jsxImportSource vargai */
+import { Captions, Clip, Image, Render, Speech } from "vargai/react";
+import { elevenlabs, fal } from "vargai/ai";
+const SCRIPT = \\\`Your script here.\\\`;
+const voiceover = Speech({
+  model: elevenlabs.speechModel("eleven_multilingual_v2"),
+  voice: "21m00Tcm4TlvDq8ikWAM",
+  children: SCRIPT,
+});
+export default (
+  <Render width={1080} height={1920}>
+    <Clip duration={21}>
+      <Image
+        prompt="character description"
+        model={fal.imageModel("nano-banana-pro")}
+        aspectRatio="9:16"
+      />
+    </Clip>
+    <Captions src={voiceover} style="tiktok" color="#ffffff" activeColor="#FFD700" />
+  </Render>
+);
+\`\`\`
+### Aspect ratios
+- \`9:16\` - TikTok, Reels, Shorts (vertical)
+- \`16:9\` - YouTube (horizontal)
+- \`1:1\` - Instagram (square)
+### Workflow
+1. **Commit every change** - After each successful render or code change, commit to preserve progress.
+2. **Test incrementally** - Start simple (still image + audio), add complexity (video, lipsync) step by step.
+3. **Check duration** - Use \`ffprobe -v error -show_entries format=duration -of csv=p=0 output/file.mp4\` to verify video length.
+4. **Open to preview** - Use \`open output/file.mp4\` to view rendered videos.
+`;
 const ENV_TEMPLATE = `# Varg AI Video Generation - API Keys
 # REQUIRED - Fal.ai (image & video generation)
@@ -332,6 +485,10 @@ Get your free API key at: ${COLORS.cyan}https://fal.ai/dashboard/keys${COLORS.re
     writeFileSync(skillPath, SKILL_MD);
     log.success("Installed SKILL.md (Agent Skills format)");
+    const guidePath = join(skillsDir, "GUIDE.md");
+    writeFileSync(guidePath, GUIDE_MD);
+    log.success("Installed GUIDE.md (usage guide)");
     const rulesDir = join(cwd, ".claude/rules");
     const rulePath = join(rulesDir, "video-generation.md");
@@ -388,6 +545,7 @@ ${COLORS.green}${COLORS.bold}Setup complete!${COLORS.reset}
 ${COLORS.bold}What was installed:${COLORS.reset}
   ${COLORS.dim}├─${COLORS.reset} hello.tsx ${COLORS.dim}(starter video)${COLORS.reset}
   ${COLORS.dim}├─${COLORS.reset} .claude/skills/varg-video-generation/SKILL.md ${COLORS.dim}(Agent Skills)${COLORS.reset}
+  ${COLORS.dim}├─${COLORS.reset} .claude/skills/varg-video-generation/GUIDE.md ${COLORS.dim}(usage guide)${COLORS.reset}
   ${COLORS.dim}├─${COLORS.reset} output/ ${COLORS.dim}(video output folder)${COLORS.reset}
   ${COLORS.dim}└─${COLORS.reset} .cache/ai/ ${COLORS.dim}(generation cache)${COLORS.reset}

package/src/cli/commands/render.tsx CHANGED Viewed

@@ -120,6 +120,11 @@ const sharedArgs = {
     description: "show ffmpeg commands",
     default: false,
   },
+  open: {
+    type: "boolean" as const,
+    description: "open video after generation",
+    default: false,
+  },
 };
 async function runRender(
@@ -167,6 +172,11 @@ async function runRender(
   if (!args.quiet) {
     console.log(`done! ${buffer.byteLength} bytes → ${outputPath}`);
   }
+  if (args.open) {
+    const { $ } = await import("bun");
+    await $`open ${outputPath}`.quiet();
+  }
 }
 export const renderCmd = defineCommand({
@@ -242,6 +252,10 @@ function RenderHelpView() {
           <VargText variant="accent">-v, --verbose </VargText>show ffmpeg
           commands
         </Text>
+        <Text>
+          <VargText variant="accent">--open </VargText>open video after
+          generation
+        </Text>
       </Box>
       <Header>COMPONENTS</Header>
@@ -303,6 +317,10 @@ function PreviewHelpView() {
           <VargText variant="accent">-v, --verbose </VargText>show ffmpeg
           commands
         </Text>
+        <Text>
+          <VargText variant="accent">--open </VargText>open video after
+          generation
+        </Text>
       </Box>
       <Header>EXAMPLES</Header>