npm - vargai - Versions diffs - 0.4.0-alpha104 → 0.4.0-alpha106 - Mend

vargai 0.4.0-alpha104 → 0.4.0-alpha106

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

package/package.json +4 -1
package/src/ai-sdk/providers/editly/backends/types.ts +5 -0
package/src/ai-sdk/providers/editly/rendi/index.ts +204 -0
package/src/ai-sdk/providers/elevenlabs.ts +1 -1
package/src/ai-sdk/providers/fal.ts +21 -11
package/src/definitions/models/nano-banana-2.ts +6 -5
package/src/providers/elevenlabs.ts +2 -1
package/src/providers/fal.ts +6 -2
package/src/react/renderers/burn-captions.ts +224 -30
package/src/react/renderers/captions.ts +276 -26
package/src/react/renderers/emoji-position-test.ts +458 -0
package/src/react/renderers/emoji.ts +297 -0
package/src/react/renderers/fonts.ts +509 -0
package/src/react/renderers/image.ts +3 -3
package/src/react/renderers/render.ts +31 -0
package/src/react/renderers/text-measure.ts +645 -0
package/src/react/resolve.ts +5 -3
package/src/react/types.ts +4 -1

package/package.json CHANGED Viewed

@@ -28,6 +28,7 @@
     "@commitlint/config-conventional": "^20.0.0",
     "@size-limit/preset-small-lib": "^11.2.0",
     "@types/bun": "latest",
+    "@types/opentype.js": "^1.3.9",
     "@types/react": "^19.2.7",
     "husky": "^9.1.7",
     "lint-staged": "^16.2.7"
@@ -58,9 +59,11 @@
     "ai": "^6.0.26",
     "apify-client": "^2.20.0",
     "citty": "^0.1.6",
+    "fflate": "^0.8.2",
     "fluent-ffmpeg": "^2.1.3",
     "groq-sdk": "^0.36.0",
     "ink": "^6.5.1",
+    "opentype.js": "^1.3.4",
     "p-limit": "^6.2.0",
     "p-map": "^7.0.4",
     "react": "^19.2.0",
@@ -104,7 +107,7 @@
   "license": "Apache-2.0",
   "author": "varg.ai <hello@varg.ai> (https://varg.ai)",
   "sideEffects": false,
-  "version": "0.4.0-alpha104",
+  "version": "0.4.0-alpha106",
   "exports": {
     ".": "./src/index.ts",
     "./ai": "./src/ai-sdk/index.ts",

package/src/ai-sdk/providers/editly/backends/types.ts CHANGED Viewed

@@ -47,6 +47,11 @@ export interface FFmpegRunOptions {
   verbose?: boolean;
   /** Max execution time in seconds (used by cloud backends like Rendi, ignored by local) */
   timeoutSeconds?: number;
+  /** Extra files (e.g. fonts, ASS subtitles) to include alongside inputs.
+   *  When present, cloud backends like Rendi use compressed folder mode
+   *  (input_compressed_folder) to bundle all files together.
+   *  Each entry provides either a `url` to download or raw `data` bytes. */
+  auxiliaryFiles?: { url?: string; data?: Uint8Array; fileName: string }[];
 }
 export type FFmpegOutput =

package/src/ai-sdk/providers/editly/rendi/index.ts CHANGED Viewed

@@ -1,3 +1,4 @@
+import { zipSync } from "fflate";
 import sharp from "sharp";
 import { File } from "../../../file";
 import type { StorageProvider } from "../../../storage/types";
@@ -128,6 +129,11 @@ export class RendiBackend implements FFmpegBackend {
   }
   async run(options: FFmpegRunOptions): Promise<FFmpegRunResult> {
+    // When auxiliary files (e.g. fonts) are present, use compressed folder mode
+    if (options.auxiliaryFiles && options.auxiliaryFiles.length > 0) {
+      return this.runWithCompressedFolder(options);
+    }
     let {
       inputs,
       filterComplex,
@@ -287,6 +293,204 @@ export class RendiBackend implements FFmpegBackend {
     throw new Error("Rendi command timed out");
   }
+  /**
+   * Run an FFmpeg command using Rendi's input_compressed_folder mode.
+   *
+   * Used when auxiliary files (e.g. fonts for subtitle rendering) need to be
+   * bundled alongside regular inputs. Creates a ZIP containing all input files
+   * and auxiliary files, uploads it to storage, and submits to Rendi with
+   * `input_compressed_folder` instead of `input_files`.
+   *
+   * Inside the ZIP, all files are at the root level. The ffmpeg command
+   * references files by their bare filenames (not placeholders).
+   */
+  private async runWithCompressedFolder(
+    options: FFmpegRunOptions,
+  ): Promise<FFmpegRunResult> {
+    const {
+      inputs,
+      videoFilter,
+      filterComplex,
+      outputArgs = [],
+      outputPath,
+      verbose,
+      auxiliaryFiles = [],
+    } = options;
+    // 1. Resolve all input files to URLs
+    const inputEntries: { fileName: string; url: string }[] = [];
+    for (const input of inputs ?? []) {
+      const path = this.getInputPath(input);
+      const url = await this.resolvePath(path);
+      // Extract filename from URL or path
+      const fileName =
+        url.split("/").pop()?.split("?")[0] ?? `input_${inputEntries.length}`;
+      inputEntries.push({ fileName, url });
+    }
+    // 2. Download all files (inputs + auxiliary) into memory
+    const zipContents: Record<string, Uint8Array> = {};
+    const downloadTasks = [
+      ...inputEntries.map(async (entry) => {
+        const res = await fetch(entry.url);
+        if (!res.ok)
+          throw new Error(
+            `Failed to download input ${entry.fileName}: ${res.status}`,
+          );
+        zipContents[entry.fileName] = new Uint8Array(await res.arrayBuffer());
+      }),
+      ...auxiliaryFiles.map(async (file) => {
+        if (file.data) {
+          // Inline data — no download needed
+          zipContents[file.fileName] = file.data;
+          return;
+        }
+        if (!file.url) {
+          throw new Error(
+            `Auxiliary file ${file.fileName} has neither url nor data`,
+          );
+        }
+        const res = await fetch(file.url);
+        if (!res.ok)
+          throw new Error(
+            `Failed to download auxiliary file ${file.fileName}: ${res.status}`,
+          );
+        zipContents[file.fileName] = new Uint8Array(await res.arrayBuffer());
+      }),
+    ];
+    await Promise.all(downloadTasks);
+    if (verbose) {
+      const totalSize = Object.values(zipContents).reduce(
+        (sum, buf) => sum + buf.length,
+        0,
+      );
+      console.log(
+        `[rendi] creating ZIP with ${Object.keys(zipContents).length} files (${(totalSize / 1024 / 1024).toFixed(1)} MB)`,
+      );
+    }
+    // 3. Create ZIP
+    const zipData = zipSync(zipContents, { level: 1 }); // fast compression
+    // 4. Upload ZIP to storage
+    const zipKey = `internal/rendi-compressed-${Date.now()}.zip`;
+    const zipUrl = await this.storage.upload(
+      zipData,
+      zipKey,
+      "application/zip",
+    );
+    if (verbose) {
+      console.log(
+        `[rendi] uploaded ZIP (${(zipData.length / 1024 / 1024).toFixed(1)} MB) -> ${zipUrl}`,
+      );
+    }
+    // 5. Build ffmpeg command using bare filenames (not {{in_X}} placeholders)
+    const inputArgs: string[] = [];
+    for (const [i, input] of (inputs ?? []).entries()) {
+      if (typeof input !== "string" && "options" in input && input.options) {
+        inputArgs.push(...input.options);
+      }
+      inputArgs.push("-i", inputEntries[i]!.fileName);
+    }
+    const filterArgs: string[] = [];
+    if (filterComplex) {
+      filterArgs.push("-filter_complex", filterComplex);
+    }
+    if (videoFilter) {
+      // For compressed folder mode, the video filter references files by
+      // their bare filenames (already resolved in the working directory)
+      filterArgs.push("-vf", videoFilter);
+    }
+    const processedOutputArgs = outputArgs.filter((arg) => arg !== "-y");
+    const commandParts = [
+      ...inputArgs,
+      ...filterArgs,
+      ...processedOutputArgs,
+      "{{out_1}}",
+    ];
+    const ffmpegCommand = this.buildCommandString(commandParts);
+    const outputFilename = outputPath?.split("/").pop() ?? "output.mp4";
+    if (verbose) {
+      console.log("[rendi] input_compressed_folder:", zipUrl);
+      console.log("[rendi] ffmpeg_command:", ffmpegCommand);
+    }
+    // 6. Submit to Rendi with input_compressed_folder
+    const submitResponse = await fetch(`${RENDI_API_BASE}/run-ffmpeg-command`, {
+      method: "POST",
+      headers: {
+        "X-API-KEY": this.apiKey,
+        "Content-Type": "application/json",
+      },
+      body: JSON.stringify({
+        input_compressed_folder: zipUrl,
+        output_files: { out_1: outputFilename },
+        ffmpeg_command: ffmpegCommand,
+        max_command_run_seconds:
+          options.timeoutSeconds ?? this.maxCommandRunSeconds,
+      }),
+    });
+    if (!submitResponse.ok) {
+      const errorText = await submitResponse.text();
+      throw new Error(
+        `Rendi submit failed: ${submitResponse.status} - ${errorText}`,
+      );
+    }
+    const { command_id } =
+      (await submitResponse.json()) as RendiCommandResponse;
+    if (verbose) {
+      console.log("[rendi] command_id:", command_id);
+    }
+    // 7. Poll for completion (same as standard run)
+    let attempts = 0;
+    while (attempts < MAX_POLL_ATTEMPTS) {
+      const statusResponse = await fetch(
+        `${RENDI_API_BASE}/commands/${command_id}`,
+        {
+          headers: { "X-API-KEY": this.apiKey },
+        },
+      );
+      if (!statusResponse.ok) {
+        throw new Error(`Rendi poll failed: ${statusResponse.status}`);
+      }
+      const status = (await statusResponse.json()) as RendiStatusResponse;
+      if (status.status === "SUCCESS") {
+        const outputFile = status.output_files?.out_1;
+        if (!outputFile?.storage_url) {
+          throw new Error("Rendi completed but no output URL");
+        }
+        return { output: { type: "url", url: outputFile.storage_url } };
+      }
+      if (status.status === "FAILED") {
+        throw new Error(
+          `Rendi command failed: ${status.error_message ?? "unknown error"}`,
+        );
+      }
+      await this.sleep(POLL_INTERVAL_MS);
+      attempts++;
+    }
+    throw new Error("Rendi command timed out");
+  }
   async resolvePath(input: FilePath): Promise<string> {
     if (input instanceof File) {
       return input.upload(this.storage);

package/src/ai-sdk/providers/elevenlabs.ts CHANGED Viewed

@@ -89,7 +89,7 @@ class ElevenLabsMusicModel implements MusicModelV3 {
     const elevenLabsOptions = providerOptions?.elevenlabs ?? {};
     const audio = await this.client.music.compose({
       prompt,
-      musicLengthMs: duration ? duration * 1000 : undefined,
+      musicLengthMs: duration ? Math.round(duration * 1000) : undefined,
       modelId: this.modelId,
       ...elevenLabsOptions,
     } as Parameters<typeof this.client.music.compose>[0]);

package/src/ai-sdk/providers/fal.ts CHANGED Viewed

@@ -196,7 +196,7 @@ const IMAGE_MODELS: Record<string, string> = {
   "recraft-v3": "fal-ai/recraft/v3/text-to-image",
   "nano-banana-pro": "fal-ai/nano-banana-pro",
   "nano-banana-pro/edit": "fal-ai/nano-banana-pro/edit",
-  "nano-banana-2": "fal-ai/nano-banana-2/edit",
+  "nano-banana-2": "fal-ai/nano-banana-2",
   "nano-banana-2/edit": "fal-ai/nano-banana-2/edit",
   "seedream-v4.5/edit": "fal-ai/bytedance/seedream/v4.5/edit",
   // Qwen Image 2 - text-to-image and image-to-image editing (standard + pro)
@@ -924,13 +924,21 @@ class FalImageModel implements ImageModelV3 {
     }
     const hasFiles = files && files.length > 0;
-    const finalEndpoint = this.resolveEndpoint();
     let stableKey: string | undefined;
     if (hasFiles && files) {
       const fileHashes = await computeFileHashes(files);
+      const imageUrls = await pMap(files, fileToUrl, { concurrency: 2 });
+      // Reve uses singular image_url instead of image_urls array
+      if (SINGULAR_IMAGE_URL_MODELS.has(this.modelId)) {
+        input.image_url = imageUrls[0];
+      } else {
+        input.image_urls = imageUrls;
+      }
+      // Compute stable key after files are resolved
+      const finalEndpointForKey = this.resolveEndpoint(hasFiles);
       stableKey = JSON.stringify({
-        endpoint: finalEndpoint,
+        endpoint: finalEndpointForKey,
         prompt,
         n,
         size,
@@ -940,13 +948,6 @@ class FalImageModel implements ImageModelV3 {
         modelId: this.modelId,
         fileHashes,
       });
-      const imageUrls = await pMap(files, fileToUrl, { concurrency: 2 });
-      // Reve uses singular image_url instead of image_urls array
-      if (SINGULAR_IMAGE_URL_MODELS.has(this.modelId)) {
-        input.image_url = imageUrls[0];
-      } else {
-        input.image_urls = imageUrls;
-      }
     }
     if (isQwenAngles && !input.image_urls) {
@@ -962,6 +963,10 @@ class FalImageModel implements ImageModelV3 {
       }
     }
+    // Resolve endpoint after file processing so dual-endpoint models
+    // (e.g. nano-banana-2 vs nano-banana-2/edit) route correctly
+    const finalEndpoint = this.resolveEndpoint(hasFiles);
     const result = await executeWithQueueRecovery<{ data: unknown }>(
       finalEndpoint,
       input,
@@ -998,11 +1003,16 @@ class FalImageModel implements ImageModelV3 {
     };
   }
-  private resolveEndpoint(): string {
+  private resolveEndpoint(hasFiles?: boolean): string {
     if (this.modelId.startsWith("raw:")) {
       return this.modelId.slice(4);
     }
+    // Nano Banana 2: route to /edit when images are provided, base endpoint for t2i
+    if (this.modelId === "nano-banana-2" && hasFiles) {
+      return "fal-ai/nano-banana-2/edit";
+    }
     return IMAGE_MODELS[this.modelId] ?? this.modelId;
   }
 }

package/src/definitions/models/nano-banana-2.ts CHANGED Viewed

@@ -1,6 +1,6 @@
 /**
- * Nano Banana 2 image editing model (Google's next-gen image generation/editing)
- * Edit-only model requiring image_urls input
+ * Nano Banana 2 image model (Google's next-gen image generation/editing)
+ * Supports both text-to-image (no images) and image editing (with image_urls)
  */
 import { z } from "zod";
@@ -35,8 +35,9 @@ const nanoBanana2InputSchema = z.object({
   prompt: z.string().describe("Text description for image editing"),
   image_urls: z
     .array(z.string().url())
+    .optional()
     .describe(
-      "Input image URLs for image-to-image editing. Required for this model.",
+      "Input image URLs for image editing. When provided, routes to the /edit endpoint. Omit for text-to-image generation.",
     ),
   resolution: nanoBanana2ResolutionSchema
     .default("1K")
@@ -103,11 +104,11 @@ export const definition: ModelDefinition<typeof schema> = {
   type: "model",
   name: "nano-banana-2",
   description:
-    "Google Nano Banana 2 - next-gen image editing model. Requires image_urls for all operations.",
+    "Google Nano Banana 2 - next-gen image generation and editing model. Supports text-to-image and image editing (with image_urls).",
   providers: ["fal"],
   defaultProvider: "fal",
   providerModels: {
-    fal: "fal-ai/nano-banana-2/edit",
+    fal: "fal-ai/nano-banana-2",
   },
   schema,
 };

package/src/providers/elevenlabs.ts CHANGED Viewed

@@ -117,7 +117,8 @@ export class ElevenLabsProvider extends BaseProvider {
     const audio = await this.client.music.compose({
       prompt,
-      musicLengthMs,
+      musicLengthMs:
+        musicLengthMs != null ? Math.round(musicLengthMs) : undefined,
       modelId: "music_v1",
     });

package/src/providers/fal.ts CHANGED Viewed

@@ -54,9 +54,13 @@ export class FalProvider extends BaseProvider {
         return "fal-ai/nano-banana-pro/edit";
       }
     }
-    // Nano Banana 2: always route to /edit endpoint (edit-only model)
+    // Nano Banana 2: route to /edit when image_urls are provided, otherwise use base t2i endpoint
     if (model === "fal-ai/nano-banana-2") {
-      return "fal-ai/nano-banana-2/edit";
+      const imageUrls = inputs.image_urls as string[] | undefined;
+      if (imageUrls && imageUrls.length > 0) {
+        return "fal-ai/nano-banana-2/edit";
+      }
+      return "fal-ai/nano-banana-2";
     }
     // Qwen Image 2: route to /edit endpoint when image_urls are provided
     if (model === "fal-ai/qwen-image-2/text-to-image") {