npm - vargai - Versions diffs - 0.4.0-alpha54 → 0.4.0-alpha56 - Mend

vargai 0.4.0-alpha54 → 0.4.0-alpha56

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

package/package.json +1 -1
package/src/ai-sdk/providers/editly/editly.test.ts +75 -0
package/src/ai-sdk/providers/editly/index.ts +21 -2
package/src/ai-sdk/providers/editly/rendi/index.ts +39 -5
package/src/ai-sdk/providers/editly/rendi/rendi.test.ts +42 -6
package/src/react/renderers/context.ts +0 -1
package/src/react/renderers/packshot/blinking-button.ts +245 -284
package/src/react/renderers/packshot.ts +68 -27
package/src/react/renderers/slider.ts +1 -0
package/src/react/renderers/swipe.ts +1 -0
package/src/react/types.ts +0 -1
package/.env.example +0 -33
package/garry-tan-varg.tsx +0 -46
package/src/ai-sdk/examples/garry-tan-varg.ts +0 -61
package/src/react/examples/garry-tan-varg.tsx +0 -52

package/package.json CHANGED Viewed

@@ -70,7 +70,7 @@
     "zod": "^4.2.1"
   },
   "sideEffects": false,
-  "version": "0.4.0-alpha54",
+  "version": "0.4.0-alpha56",
   "exports": {
     ".": "./src/index.ts",
     "./ai": "./src/ai-sdk/index.ts",

package/src/ai-sdk/providers/editly/editly.test.ts CHANGED Viewed

@@ -1334,4 +1334,79 @@ describe("editly", () => {
     expect(existsSync(outPath)).toBe(true);
   });
+  // Regression test for issue #123
+  // https://github.com/vargHQ/sdk/issues/123
+  test("issue #123: clip with only positioned videos (no base layer) generates valid filter", async () => {
+    // Bug: when a clip has only positioned videos (from Split/Slot) and no base layer,
+    // buildBaseClipFilter returns an empty outputLabel "", causing ffmpeg to crash with
+    // "Bad (empty?) label found" error like: "[]concat=n=2:v=1:a=0..."
+    // Fix: auto-insert fill-color base when clip has overlays but no base
+    const outPath = "output/editly-test-issue-123-no-base.mp4";
+    if (existsSync(outPath)) unlinkSync(outPath);
+    await editly({
+      outPath,
+      width: 1080,
+      height: 1920,
+      fps: 30,
+      clips: [
+        {
+          duration: 2,
+          layers: [{ type: "fill-color", color: "#ff0000" }],
+          transition: { name: "fade", duration: 0.5 },
+        },
+        {
+          duration: 2,
+          layers: [
+            // Only positioned videos — no base layer
+            {
+              type: "video",
+              path: VIDEO_1,
+              width: "50%",
+              height: "100%",
+              left: "0%",
+              top: "0%",
+              resizeMode: "cover",
+            },
+            {
+              type: "video",
+              path: VIDEO_2,
+              width: "50%",
+              height: "100%",
+              left: "50%",
+              top: "0%",
+              resizeMode: "cover",
+            },
+          ],
+          transition: { name: "fade", duration: 0.5 },
+        },
+        {
+          duration: 2,
+          layers: [{ type: "fill-color", color: "#0000ff" }],
+        },
+      ],
+    });
+    expect(existsSync(outPath)).toBe(true);
+    const info = await ffprobe(outPath);
+    expect(info.duration).toBeGreaterThan(4);
+  });
+  test("issue #123: clip with no layers at all throws clear error", async () => {
+    await expect(
+      editly({
+        outPath: "output/editly-test-issue-123-empty.mp4",
+        width: 640,
+        height: 480,
+        fps: 30,
+        clips: [
+          {
+            duration: 2,
+            layers: [],
+          },
+        ],
+      }),
+    ).rejects.toThrow("produced no video output");
+  });
 });

package/src/ai-sdk/providers/editly/index.ts CHANGED Viewed

@@ -1,5 +1,6 @@
 import { type FFmpegBackend, localBackend } from "./backends";
 import {
+  getFillColorFilter,
   getImageOverlayFilter,
   getImageOverlayPositionFilter,
   getNewsTitleFilter,
@@ -204,7 +205,7 @@ function buildBaseClipFilter(
     cutFrom: number;
     mixVolume?: number | string;
   }[] = [];
-  let baseLabel = "";
+  let baseLabel: string | undefined;
   let inputIdx = inputOffset;
   const baseLayers = clip.layers.filter(
@@ -252,6 +253,18 @@ function buildBaseClipFilter(
     }
   }
+  if (!baseLabel && clipLocalOverlays.length > 0) {
+    const fillFilter = getFillColorFilter(
+      { type: "fill-color", color: "#000000" },
+      inputIdx,
+      width,
+      height,
+      clip.duration,
+    );
+    filters.push(fillFilter.filterComplex);
+    baseLabel = fillFilter.outputLabel;
+  }
   for (let i = 0; i < clipLocalOverlays.length; i++) {
     const layer = clipLocalOverlays[i];
     if (!layer) continue;
@@ -270,7 +283,7 @@ function buildBaseClipFilter(
     const outputLabel = `clip${clipIndex}ov${i}`;
     const positionFilter = getOverlayFilter(
-      baseLabel,
+      baseLabel!,
       overlayFilter.outputLabel,
       layer,
       width,
@@ -282,6 +295,12 @@ function buildBaseClipFilter(
     inputIdx++;
   }
+  if (!baseLabel) {
+    throw new Error(
+      `Clip ${clipIndex} produced no video output — ensure it has at least one visual layer (video, image, or fill-color)`,
+    );
+  }
   return {
     filters,
     inputs,

package/src/ai-sdk/providers/editly/rendi/index.ts CHANGED Viewed

@@ -1,3 +1,4 @@
+import sharp from "sharp";
 import { File } from "../../../file";
 import type { StorageProvider } from "../../../storage/types";
 import type {
@@ -122,7 +123,7 @@ export class RendiBackend implements FFmpegBackend {
   }
   async run(options: FFmpegRunOptions): Promise<FFmpegRunResult> {
-    const {
+    let {
       inputs,
       filterComplex,
       videoFilter,
@@ -131,11 +132,19 @@ export class RendiBackend implements FFmpegBackend {
       verbose,
     } = options;
+    // Synthetic-only commands (e.g. fill-color, gradient clips) produce a
+    // filterComplex that uses lavfi sources like `color=...` with zero file
+    // inputs.  Rendi requires at least one input_file, so we upload a tiny
+    // 1×1 transparent PNG as a dummy input that ffmpeg silently ignores
+    // (the filterComplex never references [0:v]).
     if (!inputs || inputs.length === 0) {
-      throw new Error(
-        "Rendi backend requires at least one input file. " +
-          "Ensure your render contains media elements (Video, Image, etc.) with valid sources.",
-      );
+      if (!filterComplex) {
+        throw new Error(
+          "Rendi backend requires at least one input file or a filterComplex with synthetic sources.",
+        );
+      }
+      const dummyUrl = await this.getOrCreateDummyInput();
+      inputs = [dummyUrl];
     }
     const inputFiles: Record<string, string> = {};
@@ -283,6 +292,31 @@ export class RendiBackend implements FFmpegBackend {
     return file.upload(this.storage);
   }
+  /** Cached URL of the 1×1 dummy PNG so we upload it at most once per backend instance. */
+  private dummyInputUrl: string | null = null;
+  /**
+   * Generate a 1×1 transparent PNG via sharp, upload it to storage, and cache
+   * the URL.  Used as a placeholder input for Rendi when the ffmpeg command
+   * has only synthetic (lavfi) sources and no real file inputs.
+   */
+  private async getOrCreateDummyInput(): Promise<string> {
+    if (this.dummyInputUrl) return this.dummyInputUrl;
+    const png = await sharp({
+      create: {
+        width: 1,
+        height: 1,
+        channels: 4,
+        background: { r: 0, g: 0, b: 0, alpha: 0 },
+      },
+    })
+      .png()
+      .toBuffer();
+    const key = "internal/rendi-dummy-1x1.png";
+    this.dummyInputUrl = await this.storage.upload(png, key, "image/png");
+    return this.dummyInputUrl;
+  }
   private buildCommandString(args: string[]): string {
     return args
       .map((arg) => {

package/src/ai-sdk/providers/editly/rendi/rendi.test.ts CHANGED Viewed

@@ -10,9 +10,19 @@ const mockStorage: StorageProvider = {
   },
 };
+/** Mock storage that accepts uploads and returns a predictable URL. */
+const uploadableStorage: StorageProvider = {
+  async upload(_data: Uint8Array, key: string) {
+    return `https://mock-storage.test/${key}`;
+  },
+};
 describe("rendi backend validation", () => {
-  test("throws error when inputs array is empty", async () => {
-    const backend = createRendiBackend({ storage: mockStorage });
+  test("throws when inputs empty and no filterComplex", async () => {
+    const backend = createRendiBackend({
+      apiKey: "test-key",
+      storage: mockStorage,
+    });
     await expect(
       backend.run({
@@ -20,11 +30,16 @@ describe("rendi backend validation", () => {
         outputArgs: ["-c:v", "libx264"],
         outputPath: "output.mp4",
       }),
-    ).rejects.toThrow("Rendi backend requires at least one input file");
+    ).rejects.toThrow(
+      "Rendi backend requires at least one input file or a filterComplex",
+    );
   });
-  test("throws error when inputs is undefined", async () => {
-    const backend = createRendiBackend({ storage: mockStorage });
+  test("throws when inputs undefined and no filterComplex", async () => {
+    const backend = createRendiBackend({
+      apiKey: "test-key",
+      storage: mockStorage,
+    });
     await expect(
       backend.run({
@@ -32,7 +47,28 @@ describe("rendi backend validation", () => {
         outputArgs: ["-c:v", "libx264"],
         outputPath: "output.mp4",
       }),
-    ).rejects.toThrow("Rendi backend requires at least one input file");
+    ).rejects.toThrow(
+      "Rendi backend requires at least one input file or a filterComplex",
+    );
+  });
+  test("generates dummy input when inputs empty but filterComplex present", async () => {
+    // The run() call will still fail at the Rendi API fetch (no real server),
+    // but it should NOT throw the "requires at least one input" error.
+    // It should get past the validation and fail at the network call.
+    const backend = createRendiBackend({
+      apiKey: "test-key",
+      storage: uploadableStorage,
+    });
+    await expect(
+      backend.run({
+        inputs: [],
+        filterComplex: "color=c=#1a1a2e:s=1080x1920:d=5:r=30[color0]",
+        outputArgs: ["-map", "[color0]", "-c:v", "libx264"],
+        outputPath: "output.mp4",
+      }),
+    ).rejects.toThrow(/Rendi submit failed|fetch/);
   });
 });

package/src/react/renderers/context.ts CHANGED Viewed

@@ -19,5 +19,4 @@ export interface RenderContext {
   defaults?: DefaultModels;
   backend: FFmpegBackend;
   generatedFiles: File[];
-  providerKeys?: Record<string, string | undefined>;
 }

package/src/react/renderers/packshot/blinking-button.ts CHANGED Viewed

@@ -1,7 +1,10 @@
-import { spawn } from "node:child_process";
-import { mkdir, rm } from "node:fs/promises";
 import path from "node:path";
 import sharp from "sharp";
+import type {
+  FFmpegBackend,
+  FFmpegOutput,
+} from "../../../ai-sdk/providers/editly/backends/types";
+import { uploadBuffer } from "../../../providers/storage";
 export interface BlinkingButtonOptions {
   text: string;
@@ -17,14 +20,24 @@ export interface BlinkingButtonOptions {
   buttonHeight?: number; // Button height in pixels
 }
-/**
- * Parse hex color to RGB values
- */
+export interface BlinkingButtonResult {
+  /** Output video — local file path or cloud URL */
+  output: FFmpegOutput;
+  /** X offset for overlaying on the full video frame */
+  x: number;
+  /** Y offset for overlaying on the full video frame */
+  y: number;
+  /** Canvas width of the output video */
+  canvasWidth: number;
+  /** Canvas height of the output video */
+  canvasHeight: number;
+}
+// ─── Helpers ─────────────────────────────────────────────────────────────────
 function hexToRgb(hex: string): { r: number; g: number; b: number } {
   const result = /^#?([a-f\d]{2})([a-f\d]{2})([a-f\d]{2})$/i.exec(hex);
-  if (!result) {
-    return { r: 255, g: 107, b: 0 }; // Default orange
-  }
+  if (!result) return { r: 255, g: 107, b: 0 };
   return {
     r: parseInt(result[1] as string, 16),
     g: parseInt(result[2] as string, 16),
@@ -32,16 +45,10 @@ function hexToRgb(hex: string): { r: number; g: number; b: number } {
   };
 }
-/**
- * Clamp value to max (for color brightening)
- */
 function clamp(value: number, max = 255): number {
   return Math.min(Math.floor(value), max);
 }
-/**
- * Create SVG for button background with gradient and rounded corners
- */
 function createButtonSvg(
   width: number,
   height: number,
@@ -60,25 +67,81 @@ function createButtonSvg(
 </svg>`;
 }
+function escapeXml(text: string): string {
+  return text
+    .replace(/&/g, "&amp;")
+    .replace(/</g, "&lt;")
+    .replace(/>/g, "&gt;")
+    .replace(/"/g, "&quot;")
+    .replace(/'/g, "&apos;");
+}
+function getButtonYPosition(
+  position: "top" | "center" | "bottom",
+  videoHeight: number,
+  buttonHeight: number,
+): number {
+  switch (position) {
+    case "top":
+      return Math.floor(videoHeight * 0.15);
+    case "center":
+      return Math.floor((videoHeight - buttonHeight) / 2);
+    default:
+      return Math.floor(videoHeight * 0.78 - buttonHeight / 2);
+  }
+}
+/** Ensure even dimension for ffmpeg */
+function even(n: number): number {
+  return n % 2 === 0 ? n : n + 1;
+}
 /**
- * Create a blinking CTA button video using Sharp for image generation
- * and ffmpeg for video assembly.
- *
- * Matches Python SDK quality:
- * - Gradient background (lighter top -> darker bottom)
- * - Rounded corners (45% of height)
- * - Scale animation (1.0 -> 1.03)
- * - Brightness animation (0.85 -> 1.2)
- * - Custom font support (TikTokSans-Bold)
+ * Elastic ease oscillator as an ffmpeg expression.
+ * Period P seconds, using time variable `tv` ("t" for scale/eq, "T" for geq).
+ * Returns 0 → 1.15 (overshoot) → 1.0 (settle) → 0 (fall) per cycle.
  */
-export interface BlinkingButtonResult {
-  path: string;
-  x: number;
-  y: number;
+function oscExpr(tv: string, P: number): string {
+  const ph = `(mod(${tv},${P})/${P})`;
+  return `if(lt(${ph},0.25),sin(${ph}/0.25*PI/2)*1.15,if(lt(${ph},0.4),1.15-0.15*(${ph}-0.25)/0.15,cos((${ph}-0.4)/0.6*PI/2)))`;
 }
+/**
+ * Resolve a local file path to a URL for cloud backends.
+ * Local backend: returns the path as-is.
+ * Cloud backend: uploads the file and returns the URL.
+ */
+async function resolvePathForBackend(
+  localPath: string,
+  backend: FFmpegBackend,
+): Promise<string> {
+  if (backend.name === "local") return localPath;
+  const buffer = await Bun.file(localPath).arrayBuffer();
+  const key = `tmp/${Date.now()}-${localPath.split("/").pop()}`;
+  return uploadBuffer(buffer, key, "image/png");
+}
+// ─── Main ────────────────────────────────────────────────────────────────────
+/**
+ * Create a blinking CTA button video using Sharp for static PNG rendering
+ * and a single FFmpeg filter_complex for all animation.
+ *
+ * Architecture:
+ *   1. Sharp renders 2 static PNGs: button (native size) + glow (canvas size)
+ *   2. FFmpeg filter_complex does per-frame animation via expressions:
+ *      - eq(gamma, eval=frame) for brightness pulse (0.85x → 1.2x)
+ *      - scale(eval=frame) for elastic zoom pulse (1.0x → 1.14x)
+ *      - overlay with (W-w)/2 centering for perfect bbox alignment
+ *      - Glow scales 15% larger with 60% max opacity baked in
+ *   3. Output is ProRes 4444 with alpha channel
+ *
+ * Works on both local (ffmpeg binary) and cloud (rendi) backends
+ * via the FFmpegBackend abstraction.
+ */
 export async function createBlinkingButton(
   options: BlinkingButtonOptions,
+  backend: FFmpegBackend,
 ): Promise<BlinkingButtonResult> {
   const {
     text,
@@ -92,26 +155,24 @@ export async function createBlinkingButton(
     position = "bottom",
   } = options;
-  const totalFrames = Math.ceil(duration * fps);
-  // Button dimensions — large and prominent like app store CTAs
   const btnWidth = options.buttonWidth ?? Math.floor(width * 0.7);
   const btnHeight = options.buttonHeight ?? Math.floor(height * 0.09);
   const cornerRadius = Math.floor(btnHeight * 0.45);
-  // Animation padding (button can grow ~14% with overshoot + glow radius)
-  const maxScale = 1.14; // accounts for 1.12 * 1.15 overshoot peak
   const glowRadius = 18;
-  const glowExtraScale = 1.15; // glow is 15% larger than button
-  const totalMaxScale = maxScale * glowExtraScale; // ~1.31 for glow bounds
-  const scalePadding = Math.ceil(
-    Math.max(btnWidth, btnHeight) * (totalMaxScale - 1.0) * 2,
+  // Canvas sizing: must fit button at max animation scale + glow spread
+  const totalMaxScale = 1.14 * 1.15; // button overshoot * glow extra
+  const scalePad = Math.ceil(
+    Math.max(btnWidth, btnHeight) * (totalMaxScale - 1) * 2,
   );
-  const padding = scalePadding + glowRadius * 2;
-  const canvasWidth = btnWidth + padding * 2;
-  const canvasHeight = btnHeight + padding * 2;
+  const padding = scalePad + glowRadius * 2;
+  const cw = even(btnWidth + padding * 2);
+  const ch = even(btnHeight + padding * 2);
+  const btnNativeW = even(btnWidth);
+  const btnNativeH = even(btnHeight);
+  // ── Step 1: Render PNGs with Sharp ─────────────────────────────────────────
-  // Parse colors and create gradient (lighter top, darker bottom)
   const rgb = hexToRgb(bgColor);
   const topColor = {
     r: clamp(rgb.r * 1.15),
@@ -124,289 +185,189 @@ export async function createBlinkingButton(
     b: Math.floor(rgb.b * 0.95),
   };
-  // Font path (relative to this file's compiled location)
-  const fontPath = path.resolve(
-    import.meta.dirname,
-    "../../../assets/fonts/TikTokSans-Bold.ttf",
+  const svgBuf = Buffer.from(
+    createButtonSvg(btnWidth, btnHeight, cornerRadius, topColor, bottomColor),
   );
-  // Create button SVG with gradient
-  const buttonSvg = createButtonSvg(
-    btnWidth,
-    btnHeight,
-    cornerRadius,
-    topColor,
-    bottomColor,
+  const fontPath = path.resolve(
+    import.meta.dirname,
+    "../../../../assets/fonts/TikTokSans-Bold.ttf",
   );
-  // Create text image using Sharp's text feature
   const fontSize = Math.floor(btnHeight * 0.55);
-  const textBuffer = await sharp({
+  const textBuf = await sharp({
     text: {
       text: `<span foreground="${textColor}" font_weight="bold">${escapeXml(text)}</span>`,
       font: "TikTokSans",
       fontfile: fontPath,
       rgba: true,
       align: "center",
-      dpi: Math.floor(fontSize * 2.8), // Larger DPI for bolder text
+      dpi: Math.floor(fontSize * 2.8),
     },
   })
     .png()
     .toBuffer();
-  // Get text dimensions for centering
-  const textMeta = await sharp(textBuffer).metadata();
-  const textWidth = textMeta.width ?? 0;
-  const textHeight = textMeta.height ?? 0;
+  const textMeta = await sharp(textBuf).metadata();
+  const tw = textMeta.width ?? 0;
+  const th = textMeta.height ?? 0;
-  // Create base button frame (button + text on transparent canvas)
-  const baseButtonBuffer = await sharp({
+  // Button at native size (small, for fast eq/scale processing)
+  const btnNativeBuf = await sharp({
     create: {
-      width: canvasWidth,
-      height: canvasHeight,
+      width: btnNativeW,
+      height: btnNativeH,
       channels: 4,
       background: { r: 0, g: 0, b: 0, alpha: 0 },
     },
   })
     .composite([
-      // Button background (centered in canvas)
+      { input: svgBuf, top: 0, left: 0 },
       {
-        input: Buffer.from(buttonSvg),
-        top: padding,
-        left: padding,
+        input: textBuf,
+        top: Math.floor((btnHeight - th) / 2),
+        left: Math.floor((btnWidth - tw) / 2),
       },
-      // Text centered on button
+    ])
+    .png()
+    .toBuffer();
+  // Button at canvas size (for glow generation — blur needs surrounding pixels)
+  const btnCenterX = Math.floor((cw - btnWidth) / 2);
+  const btnCenterY = Math.floor((ch - btnHeight) / 2);
+  const btnCanvasBuf = await sharp({
+    create: {
+      width: cw,
+      height: ch,
+      channels: 4,
+      background: { r: 0, g: 0, b: 0, alpha: 0 },
+    },
+  })
+    .composite([
+      { input: svgBuf, top: btnCenterY, left: btnCenterX },
       {
-        input: textBuffer,
-        top: padding + Math.floor((btnHeight - textHeight) / 2),
-        left: padding + Math.floor((btnWidth - textWidth) / 2),
+        input: textBuf,
+        top: btnCenterY + Math.floor((btnHeight - th) / 2),
+        left: btnCenterX + Math.floor((btnWidth - tw) / 2),
       },
     ])
     .png()
     .toBuffer();
-  // Pre-render glow buffer: blurred, brightened copy of the button for halo effect
-  const glowBuffer = await sharp(baseButtonBuffer)
+  // Glow: blur + brighten + bake 60% max opacity
+  const glowRaw = await sharp(btnCanvasBuf)
     .blur(glowRadius)
     .modulate({ brightness: 1.4 })
+    .raw()
+    .toBuffer({ resolveWithObject: true });
+  for (let i = 3; i < glowRaw.data.length; i += 4) {
+    glowRaw.data[i] = Math.round((glowRaw.data[i] as number) * 0.6);
+  }
+  const glowBuf = await sharp(glowRaw.data, {
+    raw: {
+      width: glowRaw.info.width,
+      height: glowRaw.info.height,
+      channels: 4,
+    },
+  })
     .png()
     .toBuffer();
-  // Calculate button position on full frame
-  const btnY = getButtonYPosition(position, height, canvasHeight);
-  const btnX = Math.floor((width - canvasWidth) / 2);
-  // Create frames directory for intermediate files
-  const framesDir = `/tmp/varg-btn-frames-${Date.now()}`;
-  await mkdir(framesDir, { recursive: true });
-  // Generate animation frames
-  // Using file-based approach for reliability with alpha channel
-  for (let i = 0; i < totalFrames; i++) {
-    const t = i / fps;
-    // Elastic pulse curve: fast expand with overshoot, settle, slow contract
-    const phase = (t % blinkFrequency) / blinkFrequency; // 0 -> 1 within each cycle
-    let osc: number;
-    if (phase < 0.25) {
-      // Fast rise with overshoot to 1.15
-      osc = Math.sin((phase / 0.25) * Math.PI * 0.5) * 1.15;
-    } else if (phase < 0.4) {
-      // Settle back from 1.15 to 1.0
-      const settle = (phase - 0.25) / 0.15;
-      osc = 1.15 - 0.15 * settle;
-    } else {
-      // Slow ease-out fall back to 0
-      const fall = (phase - 0.4) / 0.6;
-      osc = Math.cos(fall * Math.PI * 0.5);
-    }
-    const scale = 1.0 + 0.12 * osc; // 1.0 -> 1.14 -> 1.12 -> 1.0
-    const brightness = 0.85 + 0.35 * Math.max(0, osc); // 0.85 -> 1.2 -> 0.85
-    const glowOpacity = Math.max(0, osc) * 0.6; // 0 -> 0.6 -> 0
-    const scaledW = Math.round(canvasWidth * scale);
-    const scaledH = Math.round(canvasHeight * scale);
-    // Calculate offset to keep button centered after scaling
-    const offsetX = Math.floor((canvasWidth - scaledW) / 2);
-    const offsetY = Math.floor((canvasHeight - scaledH) / 2);
-    // Scale button, apply brightness, then fit to canvas
-    let btnPipeline = sharp(baseButtonBuffer)
-      .resize(scaledW, scaledH, { kernel: "lanczos3" })
-      .modulate({ brightness });
-    if (scaledW > canvasWidth || scaledH > canvasHeight) {
-      // Button exceeds canvas during overshoot — crop from center
-      const cropLeft = Math.floor((scaledW - canvasWidth) / 2);
-      const cropTop = Math.floor((scaledH - canvasHeight) / 2);
-      btnPipeline = btnPipeline.extract({
-        left: Math.max(0, cropLeft),
-        top: Math.max(0, cropTop),
-        width: Math.min(scaledW, canvasWidth),
-        height: Math.min(scaledH, canvasHeight),
-      });
-    } else {
-      btnPipeline = btnPipeline.extend({
-        top: Math.max(0, offsetY),
-        bottom: Math.max(0, canvasHeight - scaledH - offsetY),
-        left: Math.max(0, offsetX),
-        right: Math.max(0, canvasWidth - scaledW - offsetX),
-        background: { r: 0, g: 0, b: 0, alpha: 0 },
-      });
-    }
-    const btnFrame = await btnPipeline.png().toBuffer();
-    // Scale glow slightly larger than button for halo effect
-    const glowScale = scale * 1.15;
-    const glowW = Math.round(canvasWidth * glowScale);
-    const glowH = Math.round(canvasHeight * glowScale);
-    const glowOffX = Math.floor((canvasWidth - glowW) / 2);
-    const glowOffY = Math.floor((canvasHeight - glowH) / 2);
-    // Render glow frame with animated opacity
-    // Scale alpha channel using raw pixel manipulation for precise opacity control
-    let glowResized: sharp.Sharp;
-    if (glowW > canvasWidth || glowH > canvasHeight) {
-      // Glow is larger than canvas — resize then crop to canvas from center
-      const cropLeft = Math.floor((glowW - canvasWidth) / 2);
-      const cropTop = Math.floor((glowH - canvasHeight) / 2);
-      glowResized = sharp(glowBuffer)
-        .resize(glowW, glowH, { kernel: "lanczos3" })
-        .extract({
-          left: Math.max(0, cropLeft),
-          top: Math.max(0, cropTop),
-          width: canvasWidth,
-          height: canvasHeight,
-        });
-    } else {
-      // Glow fits — extend with transparent padding
-      glowResized = sharp(glowBuffer)
-        .resize(glowW, glowH, { kernel: "lanczos3" })
-        .extend({
-          top: Math.max(0, glowOffY),
-          bottom: Math.max(0, canvasHeight - glowH - glowOffY),
-          left: Math.max(0, glowOffX),
-          right: Math.max(0, canvasWidth - glowW - glowOffX),
-          background: { r: 0, g: 0, b: 0, alpha: 0 },
-        });
-    }
-    const { data: glowPixels, info: glowInfo } = await glowResized
-      .raw()
-      .toBuffer({ resolveWithObject: true });
-    // Multiply alpha channel by glowOpacity
-    for (let p = 3; p < glowPixels.length; p += 4) {
-      glowPixels[p] = Math.round((glowPixels[p] as number) * glowOpacity);
-    }
-    const glowFrame = await sharp(glowPixels, {
-      raw: {
-        width: glowInfo.width,
-        height: glowInfo.height,
-        channels: 4,
-      },
-    })
-      .png()
-      .toBuffer();
-    // Composite: transparent canvas <- glow (behind) <- button (on top)
-    await sharp({
-      create: {
-        width: canvasWidth,
-        height: canvasHeight,
-        channels: 4,
-        background: { r: 0, g: 0, b: 0, alpha: 0 },
-      },
-    })
-      .composite([
-        { input: glowFrame, top: 0, left: 0 },
-        { input: btnFrame, top: 0, left: 0 },
-      ])
-      .png()
-      .toFile(`${framesDir}/frame_${String(i).padStart(5, "0")}.png`);
-  }
+  // Write PNGs to temp files
+  const ts = Date.now();
+  const btnPngPath = `/tmp/varg-btn-${ts}.png`;
+  const glowPngPath = `/tmp/varg-glow-${ts}.png`;
-  // Combine frames into video with alpha channel (ProRes 4444)
-  const outputPath = `/tmp/varg-blink-btn-${Date.now()}.mov`;
-  await runFfmpeg([
-    "-y",
-    "-framerate",
-    String(fps),
-    "-i",
-    `${framesDir}/frame_%05d.png`,
-    "-c:v",
-    "prores_ks",
-    "-profile:v",
-    "4444",
-    "-pix_fmt",
-    "yuva444p10le",
-    "-t",
-    String(duration),
-    outputPath,
+  await Promise.all([
+    Bun.write(btnPngPath, btnNativeBuf),
+    Bun.write(glowPngPath, glowBuf),
   ]);
-  // Cleanup frames directory
-  await rm(framesDir, { recursive: true, force: true });
-  return { path: outputPath, x: btnX, y: btnY };
-}
+  // ── Step 2: Build ffmpeg filter_complex ────────────────────────────────────
+  const P = blinkFrequency;
+  const osc = oscExpr("t", P);
+  // eq gamma for brightness: 0.85 at rest → 1.2 at peak
+  const gammaExpr = `0.85+0.35*max(0,${osc})`;
+  // Button scale (on native-size input)
+  const btnSW = `ceil(${btnNativeW}*(1.0+0.12*(${osc}))/2)*2`;
+  const btnSH = `ceil(${btnNativeH}*(1.0+0.12*(${osc}))/2)*2`;
+  // Glow scale (15% larger, on canvas-size input)
+  const glowSW = `ceil(${cw}*(1.0+0.12*(${osc}))*1.15/2)*2`;
+  const glowSH = `ceil(${ch}*(1.0+0.12*(${osc}))*1.15/2)*2`;
+  // Filter complex: uses overlay for centering (no crop+pad drift)
+  const filterComplex = [
+    // Three transparent canvases (base + one per animated layer)
+    `color=0x00000000:s=${cw}x${ch}:r=${fps}:d=${duration},format=rgba[base]`,
+    `color=0x00000000:s=${cw}x${ch}:r=${fps}:d=${duration},format=rgba[btn_canvas]`,
+    `color=0x00000000:s=${cw}x${ch}:r=${fps}:d=${duration},format=rgba[glow_canvas]`,
+    // Button: split alpha → eq(gamma) → merge alpha → scale → center on canvas
+    `[0:v]format=rgba,split[btn_rgb][btn_a]`,
+    `[btn_a]alphaextract[alpha]`,
+    `[btn_rgb]eq=gamma='${gammaExpr}':eval=frame[btn_eq]`,
+    `[btn_eq][alpha]alphamerge,format=rgba,` +
+      `scale=w='${btnSW}':h='${btnSH}':eval=frame:flags=lanczos` +
+      `[btn_scaled]`,
+    `[btn_canvas][btn_scaled]overlay=x='(W-w)/2':y='(H-h)/2':format=auto:eval=frame:shortest=1[btn]`,
+    // Glow: scale → center on canvas (opacity baked in PNG)
+    `[1:v]format=rgba,` +
+      `scale=w='${glowSW}':h='${glowSH}':eval=frame:flags=lanczos` +
+      `[glow_scaled]`,
+    `[glow_canvas][glow_scaled]overlay=x='(W-w)/2':y='(H-h)/2':format=auto:eval=frame:shortest=1[glow]`,
+    // Final composite: base → glow → button
+    `[base][glow]overlay=format=auto:shortest=1[bg]`,
+    `[bg][btn]overlay=format=auto:shortest=1[out]`,
+  ].join(";");
+  // ── Step 3: Run ffmpeg via backend ─────────────────────────────────────────
+  // Resolve PNG paths for cloud backends (uploads to storage)
+  const btnInput = await resolvePathForBackend(btnPngPath, backend);
+  const glowInput = await resolvePathForBackend(glowPngPath, backend);
+  const outputPath = `/tmp/varg-blink-btn-${ts}.mov`;
+  const result = await backend.run({
+    inputs: [
+      { path: btnInput, options: ["-loop", "1"] },
+      { path: glowInput, options: ["-loop", "1"] },
+    ],
+    filterComplex,
+    outputArgs: [
+      "-map",
+      "[out]",
+      "-c:v",
+      "prores_ks",
+      "-profile:v",
+      "4444",
+      "-pix_fmt",
+      "yuva444p10le",
+      "-t",
+      String(duration),
+    ],
+    outputPath,
+  });
-/**
- * Calculate button Y position based on position prop
- */
-function getButtonYPosition(
-  position: "top" | "center" | "bottom",
-  videoHeight: number,
-  buttonHeight: number,
-): number {
-  switch (position) {
-    case "top":
-      return Math.floor(videoHeight * 0.15);
-    case "center":
-      return Math.floor((videoHeight - buttonHeight) / 2);
-    default:
-      return Math.floor(videoHeight * 0.78 - buttonHeight / 2);
-  }
-}
+  // ── Calculate overlay position on full video frame ─────────────────────────
-/**
- * Escape XML special characters for SVG/Pango text
- */
-function escapeXml(text: string): string {
-  return text
-    .replace(/&/g, "&amp;")
-    .replace(/</g, "&lt;")
-    .replace(/>/g, "&gt;")
-    .replace(/"/g, "&quot;")
-    .replace(/'/g, "&apos;");
-}
+  const btnY = getButtonYPosition(position, height, ch);
+  const btnX = Math.floor((width - cw) / 2);
-/**
- * Run ffmpeg command and wait for completion
- */
-function runFfmpeg(args: string[]): Promise<void> {
-  return new Promise((resolve, reject) => {
-    const ffmpeg = spawn("ffmpeg", args, {
-      stdio: ["pipe", "pipe", "pipe"],
-    });
-    let stderr = "";
-    ffmpeg.stderr?.on("data", (data) => {
-      stderr += data.toString();
-    });
-    ffmpeg.on("close", (code) => {
-      if (code === 0) {
-        resolve();
-      } else {
-        reject(new Error(`ffmpeg exited with code ${code}: ${stderr}`));
-      }
-    });
-    ffmpeg.on("error", reject);
-  });
+  return {
+    output: result.output,
+    x: btnX,
+    y: btnY,
+    canvasWidth: cw,
+    canvasHeight: ch,
+  };
 }

package/src/react/renderers/packshot.ts CHANGED Viewed

@@ -1,4 +1,5 @@
 import { editly } from "../../ai-sdk/providers/editly";
+import type { FFmpegOutput } from "../../ai-sdk/providers/editly/backends/types";
 import type {
   Clip,
   ImageOverlayLayer,
@@ -8,11 +9,29 @@ import type {
   SizeValue,
   TitleLayer,
 } from "../../ai-sdk/providers/editly/types";
+import { uploadBuffer } from "../../providers/storage";
 import type { PackshotProps, VargElement } from "../types";
 import type { RenderContext } from "./context";
 import { renderImage } from "./image";
 import { createBlinkingButton } from "./packshot/blinking-button";
+/**
+ * Resolve an FFmpegOutput to a string path/URL, uploading local files for cloud backends.
+ */
+async function resolveInputMaybeUpload(
+  input: FFmpegOutput,
+  shouldUpload: boolean,
+): Promise<string> {
+  if (input.type === "url") return input.url;
+  if (!shouldUpload) return input.path;
+  const buffer = await Bun.file(input.path).arrayBuffer();
+  return uploadBuffer(
+    buffer,
+    `tmp/${Date.now()}-${input.path.split("/").pop()}`,
+    "application/octet-stream",
+  );
+}
 /**
  * Type guard: returns true if `pos` is a PositionObject ({ x, y }).
  */
@@ -165,39 +184,61 @@ export async function renderPackshot(
     height: ctx.height,
     fps: ctx.fps,
     clips: [clip],
+    backend: ctx.backend,
   });
   // ===== BLINKING CTA OVERLAY =====
   if (props.cta && props.blinkCta) {
-    // Create animated button with Sharp at button-size canvas (fast)
-    const btn = await createBlinkingButton({
-      text: props.cta,
-      width: ctx.width,
-      height: ctx.height,
-      duration,
-      fps: ctx.fps,
-      bgColor: props.ctaColor ?? "#FF6B00",
-      textColor: props.ctaTextColor ?? "#FFFFFF",
-      blinkFrequency: props.blinkFrequency ?? 0.8,
-      position: mapCtaPosition(props.ctaPosition, ctx.height),
-      buttonWidth: props.ctaSize?.width,
-      buttonHeight: props.ctaSize?.height,
-    });
+    const btn = await createBlinkingButton(
+      {
+        text: props.cta,
+        width: ctx.width,
+        height: ctx.height,
+        duration,
+        fps: ctx.fps,
+        bgColor: props.ctaColor ?? "#FF6B00",
+        textColor: props.ctaTextColor ?? "#FFFFFF",
+        blinkFrequency: props.blinkFrequency ?? 0.8,
+        position: mapCtaPosition(props.ctaPosition, ctx.height),
+        buttonWidth: props.ctaSize?.width,
+        buttonHeight: props.ctaSize?.height,
+      },
+      ctx.backend,
+    );
+    // Composite button overlay at correct position on base video via backend
+    const isCloud = ctx.backend.name !== "local";
+    const baseInput = await resolveInputMaybeUpload(
+      { type: "file", path: basePath },
+      isCloud,
+    );
+    const btnInput = await resolveInputMaybeUpload(btn.output, isCloud);
-    // Composite button-sized overlay at correct position on base video
     const finalPath = `/tmp/varg-packshot-final-${Date.now()}.mp4`;
-    const { $ } = await import("bun");
-    // Overlay the blinking button (with alpha) on the packshot
-    await $`ffmpeg -y \
-      -i ${basePath} \
-      -i ${btn.path} \
-      -filter_complex "[0:v][1:v]overlay=${btn.x}:${btn.y}:format=auto" \
-      -c:v libx264 -preset fast -crf 18 -pix_fmt yuv420p \
-      ${finalPath}`.quiet();
-    ctx.tempFiles.push(basePath, btn.path);
-    return finalPath;
+    const overlayResult = await ctx.backend.run({
+      inputs: [baseInput, btnInput],
+      filterComplex: `[0:v][1:v]overlay=${btn.x}:${btn.y}:format=auto`,
+      outputArgs: [
+        "-c:v",
+        "libx264",
+        "-preset",
+        "fast",
+        "-crf",
+        "18",
+        "-pix_fmt",
+        "yuv420p",
+      ],
+      outputPath: finalPath,
+    });
+    if (overlayResult.output.type === "file") {
+      ctx.tempFiles.push(basePath, overlayResult.output.path);
+      return overlayResult.output.path;
+    }
+    // Cloud backend returns URL
+    ctx.tempFiles.push(basePath);
+    return overlayResult.output.url;
   }
   ctx.tempFiles.push(basePath);

package/src/react/renderers/slider.ts CHANGED Viewed

@@ -72,6 +72,7 @@ export async function renderSlider(
     height: ctx.height,
     fps: ctx.fps,
     clips,
+    backend: ctx.backend,
   });
   ctx.tempFiles.push(outPath);

package/src/react/renderers/swipe.ts CHANGED Viewed

@@ -83,6 +83,7 @@ export async function renderSwipe(
     height: ctx.height,
     fps: ctx.fps,
     clips,
+    backend: ctx.backend,
   });
   ctx.tempFiles.push(outPath);

package/src/react/types.ts CHANGED Viewed

@@ -274,7 +274,6 @@ export interface RenderOptions {
   defaults?: DefaultModels;
   backend?: FFmpegBackend;
   storage?: StorageProvider;
-  providerKeys?: Record<string, string | undefined>;
 }
 // Re-export from file module for convenience

package/.env.example DELETED Viewed

@@ -1,33 +0,0 @@
-# fal.ai api key
-FAL_API_KEY=fal_xxx
-# higgsfield credentials
-HIGGSFIELD_API_KEY=hf_xxx
-HIGGSFIELD_SECRET=secret_xxx
-# elevenlabs api key
-ELEVENLABS_API_KEY=el_xxx
-# groq api key (ultra-fast whisper transcription)
-GROQ_API_KEY=gsk_xxx
-# fireworks api key (word-level transcription with timestamps)
-FIREWORKS_API_KEY=fw_xxx
-# cloudflare r2 / s3 storage
-CLOUDFLARE_R2_API_URL=https://xxx.r2.cloudflarestorage.com
-CLOUDFLARE_ACCESS_KEY_ID=xxx
-CLOUDFLARE_ACCESS_SECRET=xxx
-CLOUDFLARE_R2_BUCKET=m
-# replicate (optional)
-REPLICATE_API_TOKEN=r8_xxx
-# apify (web scraping actors)
-APIFY_TOKEN=apify_api_xxx
-# decart ai (real-time & batch video/image)
-DECART_API_KEY=decart_xxx
-# together ai (fast flux-schnell, no queue)
-TOGETHER_API_KEY=together_xxx

package/garry-tan-varg.tsx DELETED Viewed

@@ -1,46 +0,0 @@
-import { elevenlabs, fal } from "vargai/ai";
-import {
-  Captions,
-  Clip,
-  Image,
-  Render,
-  render,
-  Speech,
-  Video,
-} from "vargai/react";
-// Garry Tan's face image
-const GARRY_TAN_IMAGE =
-  "https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcRLInrQmohhXbLY10G90yT0AJJCJdArYifp-IDKUmJQlwui0tnLihiHw9OzLfLfbofr5chH2s4&s=10";
-// Create speech first
-const voiceover = Speech({
-  model: elevenlabs.speechModel("eleven_v3"),
-  voice: "adam",
-  children: "varg.ai is cool!",
-});
-// Animate Garry's face to talk
-const animatedGarry = Video({
-  prompt: {
-    text: "man speaking naturally, subtle head movements, friendly expression",
-    images: [GARRY_TAN_IMAGE],
-  },
-  model: fal.videoModel("kling-v2.5"),
-});
-// Sync lip movements with audio
-const syncedVideo = Video({
-  prompt: { video: animatedGarry, audio: voiceover },
-  model: fal.videoModel("sync-v2-pro"),
-});
-await render(
-  <Render width={1080} height={1920}>
-    <Clip duration={5}>{syncedVideo}</Clip>
-    <Captions src={voiceover} style="tiktok" color="#ffffff" />
-  </Render>,
-  { output: "output/garry-tan-varg.mp4" },
-);
-console.log("Done! Video saved to output/garry-tan-varg.mp4");

package/src/ai-sdk/examples/garry-tan-varg.ts DELETED Viewed

@@ -1,61 +0,0 @@
-/**
- * Garry Tan Talking Head Video
- * Generate a video of Garry Tan saying "varg.ai is cool!"
- */
-import {
-  generateImage,
-  experimental_generateSpeech as generateSpeech,
-} from "ai";
-import { elevenlabs, File, fal, generateVideo } from "../index";
-async function main() {
-  const script = `varg.ai is cool!`;
-  console.log("generating Garry Tan image and voice in parallel...");
-  const [imageResult, speechResult] = await Promise.all([
-    generateImage({
-      model: fal.imageModel("flux-schnell"),
-      prompt:
-        "Garry Tan, Y Combinator CEO, Asian American man, short dark hair, glasses, friendly smile, professional headshot, studio lighting, clean background, looking at camera",
-      n: 1,
-    }),
-    generateSpeech({
-      model: elevenlabs.speechModel("turbo"),
-      text: script,
-      voice: "adam",
-    }),
-  ]);
-  const firstImage = imageResult.images[0];
-  if (!firstImage) throw new Error("No image generated");
-  const image = File.from(firstImage);
-  const audio = File.from(speechResult.audio);
-  console.log(`image: ${(await image.data()).byteLength} bytes`);
-  console.log(`audio: ${(await audio.data()).byteLength} bytes`);
-  await Bun.write("output/garry-tan-image.png", await image.data());
-  await Bun.write("output/garry-tan-voice.mp3", await audio.data());
-  console.log("\nanimating Garry Tan (5 seconds)...");
-  const { video } = await generateVideo({
-    model: fal.videoModel("wan-2.5"),
-    prompt: {
-      text: "man talking naturally, moving mouth while speaking, subtle head movements, professional demeanor, blinking naturally",
-      images: [await image.data()],
-    },
-    duration: 5,
-  });
-  const output = File.from(video);
-  console.log(`video: ${(await output.data()).byteLength} bytes`);
-  await Bun.write("output/garry-tan-varg.mp4", await output.data());
-  console.log("\ndone! files saved to output/");
-  console.log("- output/garry-tan-image.png");
-  console.log("- output/garry-tan-voice.mp3");
-  console.log("- output/garry-tan-varg.mp4");
-}
-main().catch(console.error);

package/src/react/examples/garry-tan-varg.tsx DELETED Viewed

@@ -1,52 +0,0 @@
-/**
- * Garry Tan Talking Head Video
- * Using vargai/react JSX syntax with lipsync
- */
-import { elevenlabs, fal, higgsfield } from "../../ai-sdk";
-import { Captions, Clip, Image, Music, Render, render, Speech, Video } from "..";
-const CHARACTER = "Garry Tan, Y Combinator CEO, Asian American man in his 40s, short dark hair, modern glasses, friendly confident smile, professional headshot, studio lighting, clean modern office background";
-const baseCharacter = Image({
-  prompt: CHARACTER,
-  model: higgsfield.imageModel("soul", { styleId: higgsfield.styles.REALISTIC }),
-  aspectRatio: "9:16",
-});
-const animatedCharacter = Video({
-  prompt: { text: "man speaking naturally, subtle head movements, friendly professional expression, blinking naturally", images: [baseCharacter] },
-  model: fal.videoModel("kling-v2.5"),
-});
-const voiceover = Speech({
-  model: elevenlabs.speechModel("eleven_v3"),
-  voice: "adam",
-  children: "varg.ai is cool!"
-});
-async function main() {
-  console.log("Creating Garry Tan talking head video...\n");
-  const video = (
-    <Render width={1080} height={1920}>
-      <Music prompt="modern tech ambient, subtle electronic, minimal, professional" model={elevenlabs.musicModel()} volume={0.1} />
-      <Clip duration={5}>
-        <Video prompt={{ video: animatedCharacter, audio: voiceover }} model={fal.videoModel("sync-v2-pro")} />
-      </Clip>
-      <Captions src={voiceover} style="tiktok" color="#ffffff" />
-    </Render>
-  );
-  console.log("Rendering video with lipsync...");
-  const buffer = await render(video, {
-    output: "output/garry-tan-varg-react.mp4",
-    cache: ".cache/ai",
-  });
-  console.log(`\nDone! ${buffer.byteLength} bytes`);
-  console.log("Output: output/garry-tan-varg-react.mp4");
-}
-main().catch(console.error);