npm - vargai - Versions diffs - 0.4.0-alpha4 → 0.4.0-alpha40 - Mend

vargai 0.4.0-alpha4 → 0.4.0-alpha40

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (114) hide show

package/.env.example +6 -0
package/README.md +483 -61
package/assets/fonts/TikTokSans-Bold.ttf +0 -0
package/examples/grok-imagine-test.tsx +155 -0
package/launch-videos/06-kawaii-fruits.tsx +93 -0
package/launch-videos/07-ugc-weight-loss.tsx +132 -0
package/launch-videos/08-talking-head-varg.tsx +107 -0
package/launch-videos/09-girl.tsx +160 -0
package/launch-videos/README.md +42 -0
package/package.json +10 -4
package/pipeline/cookbooks/round-video-character.md +1 -1
package/skills/varg-video-generation/SKILL.md +224 -0
package/skills/varg-video-generation/references/templates.md +380 -0
package/skills/varg-video-generation/scripts/setup.ts +265 -0
package/src/ai-sdk/cache.ts +1 -3
package/src/ai-sdk/examples/google-image.ts +62 -0
package/src/ai-sdk/index.ts +10 -0
package/src/ai-sdk/middleware/wrap-image-model.ts +4 -21
package/src/ai-sdk/middleware/wrap-music-model.ts +4 -16
package/src/ai-sdk/middleware/wrap-video-model.ts +5 -17
package/src/ai-sdk/providers/CONTRIBUTING.md +457 -0
package/src/ai-sdk/providers/editly/backends/index.ts +8 -0
package/src/ai-sdk/providers/editly/backends/local.ts +94 -0
package/src/ai-sdk/providers/editly/backends/types.ts +74 -0
package/src/ai-sdk/providers/editly/editly.test.ts +49 -1
package/src/ai-sdk/providers/editly/index.ts +164 -80
package/src/ai-sdk/providers/editly/layers.ts +58 -6
package/src/ai-sdk/providers/editly/rendi/editly-with-rendi-backend.test.ts +335 -0
package/src/ai-sdk/providers/editly/rendi/index.ts +289 -0
package/src/ai-sdk/providers/editly/rendi/rendi.test.ts +35 -0
package/src/ai-sdk/providers/editly/types.ts +30 -0
package/src/ai-sdk/providers/elevenlabs.ts +10 -2
package/src/ai-sdk/providers/fal.test.ts +214 -0
package/src/ai-sdk/providers/fal.ts +435 -40
package/src/ai-sdk/providers/google.ts +423 -0
package/src/ai-sdk/providers/together.ts +191 -0
package/src/cli/commands/find.tsx +1 -0
package/src/cli/commands/frame.tsx +616 -0
package/src/cli/commands/hello.ts +85 -0
package/src/cli/commands/help.tsx +18 -30
package/src/cli/commands/index.ts +11 -2
package/src/cli/commands/init.tsx +570 -0
package/src/cli/commands/list.tsx +1 -0
package/src/cli/commands/render.tsx +322 -76
package/src/cli/commands/run.tsx +1 -0
package/src/cli/commands/storyboard.tsx +1714 -0
package/src/cli/commands/which.tsx +1 -0
package/src/cli/index.ts +23 -4
package/src/cli/ui/components/Badge.tsx +1 -0
package/src/cli/ui/components/DataTable.tsx +1 -0
package/src/cli/ui/components/Header.tsx +1 -0
package/src/cli/ui/components/HelpBlock.tsx +1 -0
package/src/cli/ui/components/KeyValue.tsx +1 -0
package/src/cli/ui/components/OptionRow.tsx +1 -0
package/src/cli/ui/components/Separator.tsx +1 -0
package/src/cli/ui/components/StatusBox.tsx +1 -0
package/src/cli/ui/components/VargBox.tsx +1 -0
package/src/cli/ui/components/VargProgress.tsx +1 -0
package/src/cli/ui/components/VargSpinner.tsx +1 -0
package/src/cli/ui/components/VargText.tsx +1 -0
package/src/definitions/actions/grok-edit.ts +133 -0
package/src/definitions/actions/index.ts +16 -0
package/src/definitions/actions/qwen-angles.ts +218 -0
package/src/index.ts +1 -0
package/src/providers/fal.ts +196 -0
package/src/react/assets.ts +9 -0
package/src/react/elements.ts +0 -5
package/src/react/examples/branching.tsx +6 -4
package/src/react/examples/character-video.tsx +13 -10
package/src/react/examples/local-files-test.tsx +19 -0
package/src/react/examples/ltx2-test.tsx +25 -0
package/src/react/examples/madi.tsx +13 -10
package/src/react/examples/mcmeows.tsx +40 -0
package/src/react/examples/music-defaults.tsx +24 -0
package/src/react/examples/quickstart-test.tsx +101 -0
package/src/react/examples/qwen-angles-test.tsx +72 -0
package/src/react/index.ts +3 -3
package/src/react/layouts/grid.tsx +1 -1
package/src/react/layouts/index.ts +2 -1
package/src/react/layouts/slot.tsx +85 -0
package/src/react/layouts/split.tsx +18 -0
package/src/react/react.test.ts +60 -11
package/src/react/renderers/burn-captions.ts +95 -0
package/src/react/renderers/cache.test.ts +182 -0
package/src/react/renderers/captions.ts +25 -6
package/src/react/renderers/clip.ts +56 -25
package/src/react/renderers/context.ts +5 -2
package/src/react/renderers/image.ts +5 -2
package/src/react/renderers/index.ts +0 -1
package/src/react/renderers/music.ts +8 -3
package/src/react/renderers/packshot/blinking-button.ts +413 -0
package/src/react/renderers/packshot.ts +170 -8
package/src/react/renderers/progress.ts +4 -3
package/src/react/renderers/render.ts +127 -71
package/src/react/renderers/speech.ts +2 -2
package/src/react/renderers/split.ts +34 -13
package/src/react/renderers/utils.test.ts +80 -0
package/src/react/renderers/utils.ts +37 -1
package/src/react/renderers/video.ts +47 -9
package/src/react/types.ts +70 -17
package/src/studio/stages.ts +40 -39
package/src/studio/step-renderer.ts +14 -24
package/src/studio/ui/index.html +2 -2
package/src/tests/all.test.ts +4 -4
package/src/tests/index.ts +1 -1
package/test-slot-grid.tsx +19 -0
package/test-slot-userland.tsx +30 -0
package/test-sync-v2.ts +30 -0
package/test-sync-v2.tsx +29 -0
package/tsconfig.json +1 -1
package/video.tsx +7 -0
package/src/ai-sdk/providers/editly/ffmpeg.ts +0 -60
package/src/react/renderers/animate.ts +0 -59
/package/src/cli/commands/{studio.tsx → studio.ts} +0 -0

package/src/react/renderers/split.ts CHANGED Viewed

@@ -1,10 +1,21 @@
 import { editly } from "../../ai-sdk/providers/editly";
-import type { Clip, Layer } from "../../ai-sdk/providers/editly/types";
+import type {
+  Clip,
+  CropPosition,
+  Layer,
+  ResizeMode,
+} from "../../ai-sdk/providers/editly/types";
 import type { SplitProps, VargElement } from "../types";
 import type { RenderContext } from "./context";
 import { renderImage } from "./image";
 import { renderVideo } from "./video";
+interface SplitCell {
+  path: string;
+  resizeMode?: ResizeMode;
+  cropPosition?: CropPosition;
+}
 export async function renderSplit(
   element: VargElement<"split">,
   ctx: RenderContext,
@@ -12,30 +23,39 @@ export async function renderSplit(
   const props = element.props as SplitProps;
   const direction = props.direction ?? "horizontal";
-  const childPaths: string[] = [];
+  const cells: SplitCell[] = [];
   for (const child of element.children) {
     if (!child || typeof child !== "object" || !("type" in child)) continue;
     const childElement = child as VargElement;
+    const childProps = childElement.props as Record<string, unknown>;
     if (childElement.type === "image") {
       const path = await renderImage(childElement as VargElement<"image">, ctx);
-      childPaths.push(path);
+      cells.push({
+        path,
+        resizeMode: childProps.resize as ResizeMode | undefined,
+        cropPosition: childProps.cropPosition as CropPosition | undefined,
+      });
     } else if (childElement.type === "video") {
       const path = await renderVideo(childElement as VargElement<"video">, ctx);
-      childPaths.push(path);
+      cells.push({
+        path,
+        resizeMode: childProps.resize as ResizeMode | undefined,
+        cropPosition: childProps.cropPosition as CropPosition | undefined,
+      });
     }
   }
-  if (childPaths.length === 0) {
+  if (cells.length === 0) {
     throw new Error("Split element requires at least one image or video child");
   }
-  if (childPaths.length === 1) {
-    return childPaths[0]!;
+  if (cells.length === 1) {
+    return cells[0]!.path;
   }
-  const numChildren = childPaths.length;
+  const numChildren = cells.length;
   const cellWidth =
     direction === "horizontal"
       ? Math.floor(ctx.width / numChildren)
@@ -45,24 +65,26 @@ export async function renderSplit(
       ? Math.floor(ctx.height / numChildren)
       : ctx.height;
-  const layers: Layer[] = childPaths.map((path, i) => {
-    const isVideo = path.endsWith(".mp4") || path.endsWith(".webm");
+  const layers: Layer[] = cells.map((cell, i) => {
+    const isVideo = cell.path.endsWith(".mp4") || cell.path.endsWith(".webm");
     const left = direction === "horizontal" ? cellWidth * i : 0;
     const top = direction === "vertical" ? cellHeight * i : 0;
     if (isVideo) {
       return {
         type: "video" as const,
-        path,
+        path: cell.path,
         left,
         top,
         width: cellWidth,
         height: cellHeight,
+        resizeMode: cell.resizeMode,
+        cropPosition: cell.cropPosition,
       };
     }
     return {
       type: "image-overlay" as const,
-      path,
+      path: cell.path,
       position: { x: left, y: top },
       width: cellWidth,
       height: cellHeight,
@@ -73,7 +95,6 @@ export async function renderSplit(
   const clip: Clip = {
     layers,
-    duration: 5,
   };
   const outPath = `/tmp/varg-split-${Date.now()}.mp4`;

package/src/react/renderers/utils.test.ts ADDED Viewed

@@ -0,0 +1,80 @@
+import { describe, expect, test } from "bun:test";
+import { fal } from "../../ai-sdk/providers/fal";
+import { Image, Video } from "../elements";
+import { computeCacheKey } from "./utils";
+describe("computeCacheKey", () => {
+  test("ignores layout props for images", () => {
+    const base = Image({
+      prompt: "lion on a couch",
+      model: fal.imageModel("flux-schnell"),
+      aspectRatio: "16:9",
+    });
+    const variant = Image({
+      prompt: "lion on a couch",
+      model: fal.imageModel("flux-schnell"),
+      aspectRatio: "16:9",
+      left: "10%",
+      top: "5%",
+      width: "50%",
+      height: "50%",
+      resize: "cover",
+      zoom: "in",
+      key: "layout-1",
+    });
+    expect(computeCacheKey(base)).toEqual(computeCacheKey(variant));
+  });
+  test("ignores trim/audio/layout props for videos", () => {
+    const base = Video({
+      prompt: "walk forward, confident stride",
+      model: fal.videoModel("kling-v2.5"),
+      aspectRatio: "9:16",
+    });
+    const variant = Video({
+      prompt: "walk forward, confident stride",
+      model: fal.videoModel("kling-v2.5"),
+      aspectRatio: "9:16",
+      cutFrom: 0.5,
+      cutTo: 2.5,
+      left: "15%",
+      width: "70%",
+      keepAudio: true,
+      volume: 0.5,
+      key: "clip-2",
+    });
+    expect(computeCacheKey(base)).toEqual(computeCacheKey(variant));
+  });
+  test("changes when prompt changes", () => {
+    const a = Image({
+      prompt: "lion on a couch",
+      model: fal.imageModel("flux-schnell"),
+    });
+    const b = Image({
+      prompt: "tiger on a couch",
+      model: fal.imageModel("flux-schnell"),
+    });
+    expect(computeCacheKey(a)).not.toEqual(computeCacheKey(b));
+  });
+  test("changes when model changes", () => {
+    const a = Video({
+      prompt: "walk forward",
+      model: fal.videoModel("kling-v2.5"),
+    });
+    const b = Video({
+      prompt: "walk forward",
+      model: fal.videoModel("wan-2.5"),
+    });
+    expect(computeCacheKey(a)).not.toEqual(computeCacheKey(b));
+  });
+});

package/src/react/renderers/utils.ts CHANGED Viewed

@@ -41,6 +41,42 @@ function getFileFingerprint(path: string): string {
   return `${path}:${stat.mtimeMs}:${stat.size}`;
 }
+const COMMON_IGNORED_PROPS = new Set(["children", "key"]);
+const IGNORED_PROPS_BY_TYPE: Partial<Record<VargElement["type"], Set<string>>> =
+  {
+    image: new Set([
+      "left",
+      "top",
+      "width",
+      "height",
+      "resize",
+      "position",
+      "size",
+      "zoom",
+    ]),
+    video: new Set([
+      "left",
+      "top",
+      "width",
+      "height",
+      "resize",
+      "cutFrom",
+      "cutTo",
+      "volume",
+      "keepAudio",
+    ]),
+    speech: new Set(["volume", "id"]),
+  };
+function shouldIgnoreProp(
+  elementType: VargElement["type"],
+  key: string,
+): boolean {
+  if (COMMON_IGNORED_PROPS.has(key)) return true;
+  return IGNORED_PROPS_BY_TYPE[elementType]?.has(key) ?? false;
+}
 function serializeValue(v: unknown): string {
   if (typeof v === "string") {
     if (isLocalFilePath(v)) {
@@ -67,7 +103,7 @@ export function computeCacheKey(element: VargElement): CacheKeyPart[] {
   const key: CacheKeyPart[] = [element.type];
   for (const [k, v] of Object.entries(element.props)) {
-    if (k === "children") continue;
+    if (shouldIgnoreProp(element.type, k)) continue;
     if (k === "model" && v && typeof v === "object" && "modelId" in v) {
       const model = v as {
         provider?: string;

package/src/react/renderers/video.ts CHANGED Viewed

@@ -9,6 +9,7 @@ import type {
 import type { RenderContext } from "./context";
 import { renderImage } from "./image";
 import { addTask, completeTask, startTask } from "./progress";
+import { renderSpeech } from "./speech";
 import { computeCacheKey, toFileUrl } from "./utils";
 async function resolveImageInput(
@@ -27,13 +28,46 @@ async function resolveImageInput(
   return new Uint8Array(await response.arrayBuffer());
 }
-async function resolveMediaInput(
-  input: Uint8Array | string | undefined,
+async function resolveAudioInput(
+  input: Uint8Array | string | VargElement<"speech"> | undefined,
+  ctx: RenderContext,
 ): Promise<Uint8Array | undefined> {
   if (!input) return undefined;
   if (input instanceof Uint8Array) return input;
-  const response = await fetch(toFileUrl(input));
-  return new Uint8Array(await response.arrayBuffer());
+  if (typeof input === "string") {
+    const response = await fetch(toFileUrl(input));
+    return new Uint8Array(await response.arrayBuffer());
+  }
+  // It's a Speech element - render it first
+  if (input.type === "speech") {
+    const { path } = await renderSpeech(input, ctx);
+    const response = await fetch(toFileUrl(path));
+    return new Uint8Array(await response.arrayBuffer());
+  }
+  throw new Error(
+    `Unsupported audio input type: ${(input as VargElement).type}`,
+  );
+}
+async function resolveVideoInput(
+  input: Uint8Array | string | VargElement<"video"> | undefined,
+  ctx: RenderContext,
+): Promise<Uint8Array | undefined> {
+  if (!input) return undefined;
+  if (input instanceof Uint8Array) return input;
+  if (typeof input === "string") {
+    const response = await fetch(toFileUrl(input));
+    return new Uint8Array(await response.arrayBuffer());
+  }
+  // It's a Video element - render it first
+  if (input.type === "video") {
+    const path = await renderVideo(input, ctx);
+    const response = await fetch(toFileUrl(path));
+    return new Uint8Array(await response.arrayBuffer());
+  }
+  throw new Error(
+    `Unsupported video input type: ${(input as VargElement).type}`,
+  );
 }
 async function resolvePrompt(
@@ -55,8 +89,8 @@ async function resolvePrompt(
     prompt.images
       ? Promise.all(prompt.images.map((img) => resolveImageInput(img, ctx)))
       : undefined,
-    resolveMediaInput(prompt.audio),
-    resolveMediaInput(prompt.video),
+    resolveAudioInput(prompt.audio, ctx),
+    resolveVideoInput(prompt.video, ctx),
   ]);
   return {
     text: prompt.text,
@@ -81,9 +115,11 @@ export async function renderVideo(
     throw new Error("Video element requires either 'prompt' or 'src'");
   }
-  const model = props.model;
+  const model = props.model ?? ctx.defaults?.video;
   if (!model) {
-    throw new Error("Video element requires 'model' prop when using prompt");
+    throw new Error(
+      "Video element requires 'model' prop (or set defaults.video in render options)",
+    );
   }
   // Compute cache key for deduplication
@@ -109,7 +145,9 @@ export async function renderVideo(
     const { video } = await ctx.generateVideo({
       model,
       prompt: resolvedPrompt,
-      duration: 5,
+      duration: props.duration ?? 5,
+      aspectRatio: props.aspectRatio,
+      providerOptions: props.providerOptions,
       cacheKey,
     } as Parameters<typeof generateVideo>[0]);

package/src/react/types.ts CHANGED Viewed

@@ -1,6 +1,13 @@
-import type { ImageModelV3, SpeechModelV3 } from "@ai-sdk/provider";
+import type {
+  ImageModelV3,
+  SharedV3ProviderOptions,
+  SpeechModelV3,
+} from "@ai-sdk/provider";
+import type { FFmpegBackend } from "@/ai-sdk/providers/editly/backends";
+import type { CacheStorage } from "../ai-sdk/cache";
 import type { MusicModelV3 } from "../ai-sdk/music-model";
 import type {
+  CropPosition,
   Position,
   ResizeMode,
   SizeValue,
@@ -14,7 +21,6 @@ export type VargElementType =
   | "overlay"
   | "image"
   | "video"
-  | "animate"
   | "speech"
   | "talking-head"
   | "title"
@@ -69,6 +75,7 @@ export interface RenderProps extends BaseProps {
   height?: number;
   fps?: number;
   normalize?: boolean;
+  shortest?: boolean;
   children?: VargNode;
 }
@@ -99,6 +106,8 @@ export interface ImageProps extends BaseProps, PositionProps {
   position?: Position;
   size?: { width: string; height: string };
   removeBackground?: boolean;
+  /** Provider-specific options (e.g., fal: { acceleration: "high" }) */
+  providerOptions?: SharedV3ProviderOptions;
 }
 export type VideoPrompt =
@@ -106,8 +115,8 @@ export type VideoPrompt =
   | {
       text?: string;
       images?: ImageInput[];
-      audio?: Uint8Array | string;
-      video?: Uint8Array | string;
+      audio?: Uint8Array | string | VargElement<"speech">;
+      video?: Uint8Array | string | VargElement<"video">;
     };
 export type VideoProps = BaseProps &
@@ -118,17 +127,12 @@ export type VideoProps = BaseProps &
     src?: string;
     model?: VideoModelV3;
     resize?: ResizeMode;
+    cropPosition?: CropPosition;
+    aspectRatio?: `${number}:${number}`;
+    /** Provider-specific options (e.g., fal: { generate_audio: true }) */
+    providerOptions?: SharedV3ProviderOptions;
   };
-// Image-to-video animation
-export interface AnimateProps extends BaseProps, PositionProps {
-  image?: VargElement<"image">;
-  src?: string;
-  model?: VideoModelV3;
-  motion?: string;
-  duration?: number;
-}
 export interface SpeechProps extends BaseProps, VolumeProps {
   voice?: string;
   model?: SpeechModelV3;
@@ -168,6 +172,8 @@ export type MusicProps = BaseProps &
     prompt?: string;
     model?: MusicModelV3;
     src?: string;
+    /** Timeline offset in seconds — when in the video this audio starts playing */
+    start?: number;
     loop?: boolean;
     ducking?: boolean;
   };
@@ -176,6 +182,7 @@ export interface CaptionsProps extends BaseProps {
   src?: string | VargElement<"speech">;
   srt?: string;
   style?: "tiktok" | "karaoke" | "bounce" | "typewriter";
+  position?: "top" | "center" | "bottom";
   color?: string;
   activeColor?: string;
   fontSize?: number;
@@ -200,23 +207,70 @@ export interface SwipeProps extends BaseProps {
 export interface PackshotProps extends BaseProps {
   background?: VargElement<"image"> | string;
   logo?: string;
+  /**
+   * Logo position on screen.
+   *
+   * Accepts any {@link Position} value including PositionObject (`{ x, y }`).
+   * A PositionObject is normalised to the closest string position at render
+   * time (see ctaPosition docs for the conversion rules).
+   */
   logoPosition?: Position;
   logoSize?: SizeValue;
+  /** Title text displayed below the logo (e.g. app name) */
+  title?: string;
+  /** Title text color (hex, default: "#FFFFFF") */
+  titleColor?: string;
+  /** Title position on screen (default: "center") */
+  titlePosition?: Position;
+  /** CTA button text */
   cta?: string;
+  /**
+   * CTA button position on screen.
+   *
+   * Accepts any value from the {@link Position} union:
+   * - **String literals** (`"top"`, `"bottom"`, `"center"`, `"top-left"`, etc.)
+   *   are used directly (compound positions like `"top-left"` are collapsed to
+   *   their vertical component for the blinking-button renderer).
+   * - **PositionObject** (`{ x, y }` with optional `originX` / `originY`) is
+   *   supported and will be **normalised** to the closest string position at
+   *   render time.  The y-coordinate is converted to a 0-1 fraction (pixels
+   *   are divided by the video height; percentages are divided by 100) and
+   *   mapped to `"top"` (< 33 %), `"center"` (33-67 %), or `"bottom"` (> 67 %).
+   *   The x-coordinate follows the same logic for contexts that use the full
+   *   nine-position grid.
+   */
   ctaPosition?: Position;
+  /** CTA button background color (hex, default: "#FF6B00") */
   ctaColor?: string;
-  ctaSize?: number;
+  /** CTA button text color (hex, default: "#FFFFFF") */
+  ctaTextColor?: string;
+  /** CTA button size in pixels { width, height } */
+  ctaSize?: { width: number; height: number };
+  /** Enable blinking animation (scale + brightness pulse) */
   blinkCta?: boolean;
+  /** Blink animation cycle duration in seconds (default: 0.8) */
+  blinkFrequency?: number;
+  /** Packshot duration in seconds */
   duration?: number;
 }
-export type RenderMode = "strict" | "default" | "preview";
+export type RenderMode = "strict" | "preview";
+export interface DefaultModels {
+  image?: ImageModelV3;
+  video?: VideoModelV3;
+  speech?: SpeechModelV3;
+  music?: MusicModelV3;
+}
 export interface RenderOptions {
   output?: string;
-  cache?: string;
+  cache?: string | CacheStorage;
   quiet?: boolean;
+  verbose?: boolean;
   mode?: RenderMode;
+  defaults?: DefaultModels;
+  backend?: FFmpegBackend;
 }
 export interface ElementPropsMap {
@@ -225,7 +279,6 @@ export interface ElementPropsMap {
   overlay: OverlayProps;
   image: ImageProps;
   video: VideoProps;
-  animate: AnimateProps;
   speech: SpeechProps;
   "talking-head": TalkingHeadProps;
   title: TitleProps;

package/src/studio/stages.ts CHANGED Viewed

@@ -1,6 +1,6 @@
 import type { VargElement, VargNode } from "../react/types";
-export type StageType = "image" | "video" | "animate" | "speech" | "music";
+export type StageType = "image" | "video" | "speech" | "music";
 export interface RenderStage {
   id: string;
@@ -70,11 +70,6 @@ export function extractStages(element: VargElement): ExtractedStages {
       return "video";
     }
-    if (type === "animate") {
-      const motion = props.motion;
-      return motion ? `animate: ${motion}` : "animate";
-    }
     if (type === "speech") {
       const text = getTextContent(element.children);
       return `speech: ${text.slice(0, 30)}${text.length > 30 ? "..." : ""}`;
@@ -120,13 +115,7 @@ export function extractStages(element: VargElement): ExtractedStages {
     const collectedDeps: string[] = [...parentDeps];
     // Check if this is a renderable stage
-    const stageTypes: StageType[] = [
-      "image",
-      "video",
-      "animate",
-      "speech",
-      "music",
-    ];
+    const stageTypes: StageType[] = ["image", "video", "speech", "music"];
     if (stageTypes.includes(element.type as StageType)) {
       const stageType = element.type as StageType;
@@ -137,35 +126,47 @@ export function extractStages(element: VargElement): ExtractedStages {
         return [];
       }
-      // For video/animate with image inputs, we need to find dependent images first
-      const imageDeps: string[] = [];
-      if (stageType === "video" || stageType === "animate") {
-        // Check prompt.images for nested Image elements
-        const prompt = props.prompt as { images?: VargNode[] } | undefined;
-        if (prompt?.images) {
-          for (const imgInput of prompt.images) {
-            if (
-              imgInput &&
-              typeof imgInput === "object" &&
-              "type" in imgInput
-            ) {
-              const imgElement = imgInput as VargElement;
-              if (imgElement.type === "image") {
-                const deps = walkTree(imgElement, currentPath, collectedDeps);
-                imageDeps.push(...deps);
-              }
+      const nestedDeps: string[] = [];
+      const prompt = props.prompt as Record<string, unknown> | undefined;
+      if (prompt && typeof prompt === "object") {
+        if (Array.isArray(prompt.images)) {
+          for (const input of prompt.images) {
+            if (input && typeof input === "object" && "type" in input) {
+              const deps = walkTree(
+                input as VargElement,
+                currentPath,
+                collectedDeps,
+              );
+              nestedDeps.push(...deps);
             }
           }
         }
-        // Check for image prop in animate
-        if (stageType === "animate" && props.image) {
-          const imgElement = props.image as VargElement;
-          if (imgElement.type === "image") {
-            const deps = walkTree(imgElement, currentPath, collectedDeps);
-            imageDeps.push(...deps);
-          }
+        if (
+          prompt.video &&
+          typeof prompt.video === "object" &&
+          "type" in prompt.video
+        ) {
+          const deps = walkTree(
+            prompt.video as VargElement,
+            currentPath,
+            collectedDeps,
+          );
+          nestedDeps.push(...deps);
+        }
+        if (
+          prompt.audio &&
+          typeof prompt.audio === "object" &&
+          "type" in prompt.audio
+        ) {
+          const deps = walkTree(
+            prompt.audio as VargElement,
+            currentPath,
+            collectedDeps,
+          );
+          nestedDeps.push(...deps);
         }
       }
@@ -176,7 +177,7 @@ export function extractStages(element: VargElement): ExtractedStages {
         label: getLabel(stageType, element),
         element,
         path: currentPath,
-        dependsOn: [...new Set([...collectedDeps, ...imageDeps])],
+        dependsOn: [...new Set([...collectedDeps, ...nestedDeps])],
         status: "pending",
       };

package/src/studio/step-renderer.ts CHANGED Viewed

@@ -1,8 +1,7 @@
 import { generateImage } from "ai";
-import { withCache } from "../ai-sdk/cache";
+import { type CacheStorage, withCache } from "../ai-sdk/cache";
 import { fileCache } from "../ai-sdk/file-cache";
 import { generateVideo } from "../ai-sdk/generate-video";
-import { renderAnimate } from "../react/renderers/animate";
 import type { RenderContext } from "../react/renderers/context";
 import { renderImage } from "../react/renderers/image";
 import { renderMusic } from "../react/renderers/music";
@@ -28,21 +27,26 @@ const sessions = new Map<string, StepSession>();
 export function createStepSession(
   code: string,
   rootElement: VargElement,
-  cacheDir?: string,
+  cache?: string | CacheStorage,
 ): StepSession {
   const props = rootElement.props as RenderProps;
-  const cache = cacheDir ? fileCache({ dir: cacheDir }) : undefined;
+  const cacheStorage =
+    cache === undefined
+      ? undefined
+      : typeof cache === "string"
+        ? fileCache({ dir: cache })
+        : cache;
   const ctx: RenderContext = {
     width: props.width ?? 1920,
     height: props.height ?? 1080,
     fps: props.fps ?? 30,
-    cache,
-    generateImage: cache
-      ? withCache(generateImage, { storage: cache })
+    cache: cacheStorage,
+    generateImage: cacheStorage
+      ? withCache(generateImage, { storage: cacheStorage })
       : generateImage,
-    generateVideo: cache
-      ? withCache(generateVideo, { storage: cache })
+    generateVideo: cacheStorage
+      ? withCache(generateVideo, { storage: cacheStorage })
       : generateVideo,
     tempFiles: [],
     progress: createProgressTracker(false),
@@ -124,20 +128,6 @@ export async function executeStage(
         break;
       }
-      case "animate": {
-        const path = await renderAnimate(
-          stage.element as VargElement<"animate">,
-          session.ctx,
-        );
-        result = {
-          type: "video",
-          path,
-          previewUrl: `/api/step/preview/${session.id}/${stageId}`,
-          mimeType: "video/mp4",
-        };
-        break;
-      }
       case "speech": {
         const speechResult = await renderSpeech(
           stage.element as VargElement<"speech">,
@@ -238,7 +228,7 @@ export async function finalizeRender(
   await render(session.rootElement, {
     output: outputPath,
-    cache: session.ctx.cache ? ".cache/ai" : undefined,
+    cache: session.ctx.cache,
     quiet: true,
   });