vargai 0.4.0-alpha104 → 0.4.0-alpha106

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -28,6 +28,7 @@
28
28
  "@commitlint/config-conventional": "^20.0.0",
29
29
  "@size-limit/preset-small-lib": "^11.2.0",
30
30
  "@types/bun": "latest",
31
+ "@types/opentype.js": "^1.3.9",
31
32
  "@types/react": "^19.2.7",
32
33
  "husky": "^9.1.7",
33
34
  "lint-staged": "^16.2.7"
@@ -58,9 +59,11 @@
58
59
  "ai": "^6.0.26",
59
60
  "apify-client": "^2.20.0",
60
61
  "citty": "^0.1.6",
62
+ "fflate": "^0.8.2",
61
63
  "fluent-ffmpeg": "^2.1.3",
62
64
  "groq-sdk": "^0.36.0",
63
65
  "ink": "^6.5.1",
66
+ "opentype.js": "^1.3.4",
64
67
  "p-limit": "^6.2.0",
65
68
  "p-map": "^7.0.4",
66
69
  "react": "^19.2.0",
@@ -104,7 +107,7 @@
104
107
  "license": "Apache-2.0",
105
108
  "author": "varg.ai <hello@varg.ai> (https://varg.ai)",
106
109
  "sideEffects": false,
107
- "version": "0.4.0-alpha104",
110
+ "version": "0.4.0-alpha106",
108
111
  "exports": {
109
112
  ".": "./src/index.ts",
110
113
  "./ai": "./src/ai-sdk/index.ts",
@@ -47,6 +47,11 @@ export interface FFmpegRunOptions {
47
47
  verbose?: boolean;
48
48
  /** Max execution time in seconds (used by cloud backends like Rendi, ignored by local) */
49
49
  timeoutSeconds?: number;
50
+ /** Extra files (e.g. fonts, ASS subtitles) to include alongside inputs.
51
+ * When present, cloud backends like Rendi use compressed folder mode
52
+ * (input_compressed_folder) to bundle all files together.
53
+ * Each entry provides either a `url` to download or raw `data` bytes. */
54
+ auxiliaryFiles?: { url?: string; data?: Uint8Array; fileName: string }[];
50
55
  }
51
56
 
52
57
  export type FFmpegOutput =
@@ -1,3 +1,4 @@
1
+ import { zipSync } from "fflate";
1
2
  import sharp from "sharp";
2
3
  import { File } from "../../../file";
3
4
  import type { StorageProvider } from "../../../storage/types";
@@ -128,6 +129,11 @@ export class RendiBackend implements FFmpegBackend {
128
129
  }
129
130
 
130
131
  async run(options: FFmpegRunOptions): Promise<FFmpegRunResult> {
132
+ // When auxiliary files (e.g. fonts) are present, use compressed folder mode
133
+ if (options.auxiliaryFiles && options.auxiliaryFiles.length > 0) {
134
+ return this.runWithCompressedFolder(options);
135
+ }
136
+
131
137
  let {
132
138
  inputs,
133
139
  filterComplex,
@@ -287,6 +293,204 @@ export class RendiBackend implements FFmpegBackend {
287
293
  throw new Error("Rendi command timed out");
288
294
  }
289
295
 
296
+ /**
297
+ * Run an FFmpeg command using Rendi's input_compressed_folder mode.
298
+ *
299
+ * Used when auxiliary files (e.g. fonts for subtitle rendering) need to be
300
+ * bundled alongside regular inputs. Creates a ZIP containing all input files
301
+ * and auxiliary files, uploads it to storage, and submits to Rendi with
302
+ * `input_compressed_folder` instead of `input_files`.
303
+ *
304
+ * Inside the ZIP, all files are at the root level. The ffmpeg command
305
+ * references files by their bare filenames (not placeholders).
306
+ */
307
+ private async runWithCompressedFolder(
308
+ options: FFmpegRunOptions,
309
+ ): Promise<FFmpegRunResult> {
310
+ const {
311
+ inputs,
312
+ videoFilter,
313
+ filterComplex,
314
+ outputArgs = [],
315
+ outputPath,
316
+ verbose,
317
+ auxiliaryFiles = [],
318
+ } = options;
319
+
320
+ // 1. Resolve all input files to URLs
321
+ const inputEntries: { fileName: string; url: string }[] = [];
322
+ for (const input of inputs ?? []) {
323
+ const path = this.getInputPath(input);
324
+ const url = await this.resolvePath(path);
325
+ // Extract filename from URL or path
326
+ const fileName =
327
+ url.split("/").pop()?.split("?")[0] ?? `input_${inputEntries.length}`;
328
+ inputEntries.push({ fileName, url });
329
+ }
330
+
331
+ // 2. Download all files (inputs + auxiliary) into memory
332
+ const zipContents: Record<string, Uint8Array> = {};
333
+
334
+ const downloadTasks = [
335
+ ...inputEntries.map(async (entry) => {
336
+ const res = await fetch(entry.url);
337
+ if (!res.ok)
338
+ throw new Error(
339
+ `Failed to download input ${entry.fileName}: ${res.status}`,
340
+ );
341
+ zipContents[entry.fileName] = new Uint8Array(await res.arrayBuffer());
342
+ }),
343
+ ...auxiliaryFiles.map(async (file) => {
344
+ if (file.data) {
345
+ // Inline data — no download needed
346
+ zipContents[file.fileName] = file.data;
347
+ return;
348
+ }
349
+ if (!file.url) {
350
+ throw new Error(
351
+ `Auxiliary file ${file.fileName} has neither url nor data`,
352
+ );
353
+ }
354
+ const res = await fetch(file.url);
355
+ if (!res.ok)
356
+ throw new Error(
357
+ `Failed to download auxiliary file ${file.fileName}: ${res.status}`,
358
+ );
359
+ zipContents[file.fileName] = new Uint8Array(await res.arrayBuffer());
360
+ }),
361
+ ];
362
+
363
+ await Promise.all(downloadTasks);
364
+
365
+ if (verbose) {
366
+ const totalSize = Object.values(zipContents).reduce(
367
+ (sum, buf) => sum + buf.length,
368
+ 0,
369
+ );
370
+ console.log(
371
+ `[rendi] creating ZIP with ${Object.keys(zipContents).length} files (${(totalSize / 1024 / 1024).toFixed(1)} MB)`,
372
+ );
373
+ }
374
+
375
+ // 3. Create ZIP
376
+ const zipData = zipSync(zipContents, { level: 1 }); // fast compression
377
+
378
+ // 4. Upload ZIP to storage
379
+ const zipKey = `internal/rendi-compressed-${Date.now()}.zip`;
380
+ const zipUrl = await this.storage.upload(
381
+ zipData,
382
+ zipKey,
383
+ "application/zip",
384
+ );
385
+
386
+ if (verbose) {
387
+ console.log(
388
+ `[rendi] uploaded ZIP (${(zipData.length / 1024 / 1024).toFixed(1)} MB) -> ${zipUrl}`,
389
+ );
390
+ }
391
+
392
+ // 5. Build ffmpeg command using bare filenames (not {{in_X}} placeholders)
393
+ const inputArgs: string[] = [];
394
+ for (const [i, input] of (inputs ?? []).entries()) {
395
+ if (typeof input !== "string" && "options" in input && input.options) {
396
+ inputArgs.push(...input.options);
397
+ }
398
+ inputArgs.push("-i", inputEntries[i]!.fileName);
399
+ }
400
+
401
+ const filterArgs: string[] = [];
402
+ if (filterComplex) {
403
+ filterArgs.push("-filter_complex", filterComplex);
404
+ }
405
+ if (videoFilter) {
406
+ // For compressed folder mode, the video filter references files by
407
+ // their bare filenames (already resolved in the working directory)
408
+ filterArgs.push("-vf", videoFilter);
409
+ }
410
+
411
+ const processedOutputArgs = outputArgs.filter((arg) => arg !== "-y");
412
+
413
+ const commandParts = [
414
+ ...inputArgs,
415
+ ...filterArgs,
416
+ ...processedOutputArgs,
417
+ "{{out_1}}",
418
+ ];
419
+ const ffmpegCommand = this.buildCommandString(commandParts);
420
+ const outputFilename = outputPath?.split("/").pop() ?? "output.mp4";
421
+
422
+ if (verbose) {
423
+ console.log("[rendi] input_compressed_folder:", zipUrl);
424
+ console.log("[rendi] ffmpeg_command:", ffmpegCommand);
425
+ }
426
+
427
+ // 6. Submit to Rendi with input_compressed_folder
428
+ const submitResponse = await fetch(`${RENDI_API_BASE}/run-ffmpeg-command`, {
429
+ method: "POST",
430
+ headers: {
431
+ "X-API-KEY": this.apiKey,
432
+ "Content-Type": "application/json",
433
+ },
434
+ body: JSON.stringify({
435
+ input_compressed_folder: zipUrl,
436
+ output_files: { out_1: outputFilename },
437
+ ffmpeg_command: ffmpegCommand,
438
+ max_command_run_seconds:
439
+ options.timeoutSeconds ?? this.maxCommandRunSeconds,
440
+ }),
441
+ });
442
+
443
+ if (!submitResponse.ok) {
444
+ const errorText = await submitResponse.text();
445
+ throw new Error(
446
+ `Rendi submit failed: ${submitResponse.status} - ${errorText}`,
447
+ );
448
+ }
449
+
450
+ const { command_id } =
451
+ (await submitResponse.json()) as RendiCommandResponse;
452
+
453
+ if (verbose) {
454
+ console.log("[rendi] command_id:", command_id);
455
+ }
456
+
457
+ // 7. Poll for completion (same as standard run)
458
+ let attempts = 0;
459
+ while (attempts < MAX_POLL_ATTEMPTS) {
460
+ const statusResponse = await fetch(
461
+ `${RENDI_API_BASE}/commands/${command_id}`,
462
+ {
463
+ headers: { "X-API-KEY": this.apiKey },
464
+ },
465
+ );
466
+
467
+ if (!statusResponse.ok) {
468
+ throw new Error(`Rendi poll failed: ${statusResponse.status}`);
469
+ }
470
+
471
+ const status = (await statusResponse.json()) as RendiStatusResponse;
472
+
473
+ if (status.status === "SUCCESS") {
474
+ const outputFile = status.output_files?.out_1;
475
+ if (!outputFile?.storage_url) {
476
+ throw new Error("Rendi completed but no output URL");
477
+ }
478
+ return { output: { type: "url", url: outputFile.storage_url } };
479
+ }
480
+
481
+ if (status.status === "FAILED") {
482
+ throw new Error(
483
+ `Rendi command failed: ${status.error_message ?? "unknown error"}`,
484
+ );
485
+ }
486
+
487
+ await this.sleep(POLL_INTERVAL_MS);
488
+ attempts++;
489
+ }
490
+
491
+ throw new Error("Rendi command timed out");
492
+ }
493
+
290
494
  async resolvePath(input: FilePath): Promise<string> {
291
495
  if (input instanceof File) {
292
496
  return input.upload(this.storage);
@@ -89,7 +89,7 @@ class ElevenLabsMusicModel implements MusicModelV3 {
89
89
  const elevenLabsOptions = providerOptions?.elevenlabs ?? {};
90
90
  const audio = await this.client.music.compose({
91
91
  prompt,
92
- musicLengthMs: duration ? duration * 1000 : undefined,
92
+ musicLengthMs: duration ? Math.round(duration * 1000) : undefined,
93
93
  modelId: this.modelId,
94
94
  ...elevenLabsOptions,
95
95
  } as Parameters<typeof this.client.music.compose>[0]);
@@ -196,7 +196,7 @@ const IMAGE_MODELS: Record<string, string> = {
196
196
  "recraft-v3": "fal-ai/recraft/v3/text-to-image",
197
197
  "nano-banana-pro": "fal-ai/nano-banana-pro",
198
198
  "nano-banana-pro/edit": "fal-ai/nano-banana-pro/edit",
199
- "nano-banana-2": "fal-ai/nano-banana-2/edit",
199
+ "nano-banana-2": "fal-ai/nano-banana-2",
200
200
  "nano-banana-2/edit": "fal-ai/nano-banana-2/edit",
201
201
  "seedream-v4.5/edit": "fal-ai/bytedance/seedream/v4.5/edit",
202
202
  // Qwen Image 2 - text-to-image and image-to-image editing (standard + pro)
@@ -924,13 +924,21 @@ class FalImageModel implements ImageModelV3 {
924
924
  }
925
925
 
926
926
  const hasFiles = files && files.length > 0;
927
- const finalEndpoint = this.resolveEndpoint();
928
927
 
929
928
  let stableKey: string | undefined;
930
929
  if (hasFiles && files) {
931
930
  const fileHashes = await computeFileHashes(files);
931
+ const imageUrls = await pMap(files, fileToUrl, { concurrency: 2 });
932
+ // Reve uses singular image_url instead of image_urls array
933
+ if (SINGULAR_IMAGE_URL_MODELS.has(this.modelId)) {
934
+ input.image_url = imageUrls[0];
935
+ } else {
936
+ input.image_urls = imageUrls;
937
+ }
938
+ // Compute stable key after files are resolved
939
+ const finalEndpointForKey = this.resolveEndpoint(hasFiles);
932
940
  stableKey = JSON.stringify({
933
- endpoint: finalEndpoint,
941
+ endpoint: finalEndpointForKey,
934
942
  prompt,
935
943
  n,
936
944
  size,
@@ -940,13 +948,6 @@ class FalImageModel implements ImageModelV3 {
940
948
  modelId: this.modelId,
941
949
  fileHashes,
942
950
  });
943
- const imageUrls = await pMap(files, fileToUrl, { concurrency: 2 });
944
- // Reve uses singular image_url instead of image_urls array
945
- if (SINGULAR_IMAGE_URL_MODELS.has(this.modelId)) {
946
- input.image_url = imageUrls[0];
947
- } else {
948
- input.image_urls = imageUrls;
949
- }
950
951
  }
951
952
 
952
953
  if (isQwenAngles && !input.image_urls) {
@@ -962,6 +963,10 @@ class FalImageModel implements ImageModelV3 {
962
963
  }
963
964
  }
964
965
 
966
+ // Resolve endpoint after file processing so dual-endpoint models
967
+ // (e.g. nano-banana-2 vs nano-banana-2/edit) route correctly
968
+ const finalEndpoint = this.resolveEndpoint(hasFiles);
969
+
965
970
  const result = await executeWithQueueRecovery<{ data: unknown }>(
966
971
  finalEndpoint,
967
972
  input,
@@ -998,11 +1003,16 @@ class FalImageModel implements ImageModelV3 {
998
1003
  };
999
1004
  }
1000
1005
 
1001
- private resolveEndpoint(): string {
1006
+ private resolveEndpoint(hasFiles?: boolean): string {
1002
1007
  if (this.modelId.startsWith("raw:")) {
1003
1008
  return this.modelId.slice(4);
1004
1009
  }
1005
1010
 
1011
+ // Nano Banana 2: route to /edit when images are provided, base endpoint for t2i
1012
+ if (this.modelId === "nano-banana-2" && hasFiles) {
1013
+ return "fal-ai/nano-banana-2/edit";
1014
+ }
1015
+
1006
1016
  return IMAGE_MODELS[this.modelId] ?? this.modelId;
1007
1017
  }
1008
1018
  }
@@ -1,6 +1,6 @@
1
1
  /**
2
- * Nano Banana 2 image editing model (Google's next-gen image generation/editing)
3
- * Edit-only model requiring image_urls input
2
+ * Nano Banana 2 image model (Google's next-gen image generation/editing)
3
+ * Supports both text-to-image (no images) and image editing (with image_urls)
4
4
  */
5
5
 
6
6
  import { z } from "zod";
@@ -35,8 +35,9 @@ const nanoBanana2InputSchema = z.object({
35
35
  prompt: z.string().describe("Text description for image editing"),
36
36
  image_urls: z
37
37
  .array(z.string().url())
38
+ .optional()
38
39
  .describe(
39
- "Input image URLs for image-to-image editing. Required for this model.",
40
+ "Input image URLs for image editing. When provided, routes to the /edit endpoint. Omit for text-to-image generation.",
40
41
  ),
41
42
  resolution: nanoBanana2ResolutionSchema
42
43
  .default("1K")
@@ -103,11 +104,11 @@ export const definition: ModelDefinition<typeof schema> = {
103
104
  type: "model",
104
105
  name: "nano-banana-2",
105
106
  description:
106
- "Google Nano Banana 2 - next-gen image editing model. Requires image_urls for all operations.",
107
+ "Google Nano Banana 2 - next-gen image generation and editing model. Supports text-to-image and image editing (with image_urls).",
107
108
  providers: ["fal"],
108
109
  defaultProvider: "fal",
109
110
  providerModels: {
110
- fal: "fal-ai/nano-banana-2/edit",
111
+ fal: "fal-ai/nano-banana-2",
111
112
  },
112
113
  schema,
113
114
  };
@@ -117,7 +117,8 @@ export class ElevenLabsProvider extends BaseProvider {
117
117
 
118
118
  const audio = await this.client.music.compose({
119
119
  prompt,
120
- musicLengthMs,
120
+ musicLengthMs:
121
+ musicLengthMs != null ? Math.round(musicLengthMs) : undefined,
121
122
  modelId: "music_v1",
122
123
  });
123
124
 
@@ -54,9 +54,13 @@ export class FalProvider extends BaseProvider {
54
54
  return "fal-ai/nano-banana-pro/edit";
55
55
  }
56
56
  }
57
- // Nano Banana 2: always route to /edit endpoint (edit-only model)
57
+ // Nano Banana 2: route to /edit when image_urls are provided, otherwise use base t2i endpoint
58
58
  if (model === "fal-ai/nano-banana-2") {
59
- return "fal-ai/nano-banana-2/edit";
59
+ const imageUrls = inputs.image_urls as string[] | undefined;
60
+ if (imageUrls && imageUrls.length > 0) {
61
+ return "fal-ai/nano-banana-2/edit";
62
+ }
63
+ return "fal-ai/nano-banana-2";
60
64
  }
61
65
  // Qwen Image 2: route to /edit endpoint when image_urls are provided
62
66
  if (model === "fal-ai/qwen-image-2/text-to-image") {