npm - vargai - Versions diffs - 0.4.0-alpha100 → 0.4.0-alpha101 - Mend

vargai 0.4.0-alpha100 → 0.4.0-alpha101

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

package/README.md +7 -0
package/package.json +1 -1
package/src/ai-sdk/file.ts +8 -1
package/src/ai-sdk/providers/editly/index.ts +26 -4
package/src/ai-sdk/providers/editly/types.ts +2 -0
package/src/ai-sdk/providers/elevenlabs.ts +23 -9
package/src/ai-sdk/providers/fal.ts +16 -28
package/src/ai-sdk/providers/model-rules.ts +129 -0
package/src/definitions/actions/voice.ts +23 -9
package/src/providers/elevenlabs.ts +23 -9
package/src/react/renderers/clip.ts +29 -29
package/src/react/renderers/video.ts +1 -0

package/README.md CHANGED Viewed

@@ -315,6 +315,13 @@ See the [BYOK docs](https://docs.varg.ai/sdk/byok) for details.
 A typical 3-clip video costs $2-5. Cache hits are always free.
+## Star History
+<img width="2832" height="2253" alt="star-history-202643" src="https://github.com/user-attachments/assets/63e84279-d756-43a9-b328-118fb69ed2d5" />
 ## Contributing
 See [CONTRIBUTING.md](CONTRIBUTING.md) for development setup.

package/package.json CHANGED Viewed

@@ -104,7 +104,7 @@
   "license": "Apache-2.0",
   "author": "varg.ai <hello@varg.ai> (https://varg.ai)",
   "sideEffects": false,
-  "version": "0.4.0-alpha100",
+  "version": "0.4.0-alpha101",
   "exports": {
     ".": "./src/index.ts",
     "./ai": "./src/ai-sdk/index.ts",

package/src/ai-sdk/file.ts CHANGED Viewed

@@ -17,6 +17,8 @@ export interface FileMetadata {
   model?: string;
   /** Original prompt used */
   prompt?: string;
+  /** Duration in seconds (for video/audio files) */
+  duration?: number;
 }
 export class File {
@@ -158,11 +160,16 @@ export class File {
     return this._url;
   }
-  /** Get file metadata (type, model, prompt) */
+  /** Get file metadata (type, model, prompt, duration) */
   get metadata(): FileMetadata {
     return this._metadata;
   }
+  /** Duration in seconds (for video/audio files) */
+  get duration(): number | undefined {
+    return this._metadata.duration;
+  }
   /** Set metadata and return this for chaining */
   withMetadata(metadata: FileMetadata): this {
     this._metadata = { ...this._metadata, ...metadata };

package/src/ai-sdk/providers/editly/index.ts CHANGED Viewed

@@ -99,12 +99,34 @@ async function processClips(
     let duration = clip.duration ?? defaultDuration;
     for (const layer of layers) {
-      if (layer.type === "video" && !clip.duration) {
+      if (layer.type === "video") {
         const videoLayer = layer as VideoLayer;
-        const videoDuration = await getVideoDuration(videoLayer.path, backend);
+        // Use pre-propagated duration when available (avoids ffprobe HTTP
+        // round-trip for remote URLs). Fall back to ffprobe otherwise.
+        const videoDuration =
+          videoLayer.sourceDuration ??
+          (await getVideoDuration(videoLayer.path, backend));
         const cutFrom = videoLayer.cutFrom ?? 0;
-        const cutTo = videoLayer.cutTo ?? videoDuration;
-        duration = cutTo - cutFrom;
+        const cutTo = Math.min(
+          videoLayer.cutTo ?? videoDuration,
+          videoDuration,
+        );
+        // Clamp the layer's cutTo so the FFmpeg trim filter also respects
+        // the actual source duration (prevents freeze frames at the tail)
+        videoLayer.cutTo = cutTo;
+        const effectiveDuration = cutTo - cutFrom;
+        if (!clip.duration) {
+          // No explicit duration — derive from the video layer
+          duration = effectiveDuration;
+        } else if (effectiveDuration < duration) {
+          // Explicit duration exceeds actual video length — clamp to avoid
+          // freeze frames and xfade offset misalignment
+          duration = effectiveDuration;
+        }
         break;
       }
     }

package/src/ai-sdk/providers/editly/types.ts CHANGED Viewed

@@ -117,6 +117,8 @@ export interface VideoLayer extends BaseLayer {
   cropPosition?: CropPosition;
   cutFrom?: number;
   cutTo?: number;
+  /** Known source video duration in seconds (avoids ffprobe when set). */
+  sourceDuration?: number;
   width?: SizeValue;
   height?: SizeValue;
   left?: SizeValue;

package/src/ai-sdk/providers/elevenlabs.ts CHANGED Viewed

@@ -21,16 +21,30 @@ import type { MusicModelV3, MusicModelV3CallOptions } from "../music-model";
  * call the gateway's GET /v1/voices endpoint to browse/search.
  */
 const VOICES: Record<string, string> = {
-  rachel: "21m00Tcm4TlvDq8ikWAM",
-  domi: "AZnzlk1XvdvUeBnXmlld",
-  sarah: "EXAVITQu4vr4xnSDxMaL",
-  bella: "EXAVITQu4vr4xnSDxMaL", // alias — ElevenLabs calls this voice "Sarah"
-  antoni: "ErXwobaYiN019PkySvjV",
-  elli: "MF3mGyEYCl7XYWbV9V6O",
-  josh: "TxGEqnHWrfWFTfGW9XjX",
-  arnold: "VR6AewLTigWG4xSOukaG",
+  // Current ElevenLabs premade voices (source: skills/varg-ai/references/models.md)
   adam: "pNInz6obpgDQGcFmaJgB",
-  sam: "yoZ06aMxZJJ28mfd3POQ",
+  alice: "Xb7hH8MSUJpSbSDYk0k2",
+  bella: "hpp4J3VqNfWAUOO0d1Us",
+  bill: "pqHfZKP75CvOlQylNhV4",
+  brian: "nPczCjzI2devNBz1zQrb",
+  callum: "N2lVS1w4EtoT3dr4eOWO",
+  charlie: "IKne3meq5aSn9XLyUdCD",
+  chris: "iP95p4xoKVk53GoZ742B",
+  daniel: "onwK4e9ZLuTAKqWW03F9",
+  eric: "cjVigY5qzO86Huf0OWal",
+  george: "JBFqnCBsd6RMkjVDRZzb",
+  harry: "SOYHLrjzK2X1ezoPC6cr",
+  jessica: "cgSgspJ2msm6clMCkdW9",
+  laura: "FGY2WhTYpPnrIDTdsKH5",
+  liam: "TX3LPaxmHKxFdv7VOQHJ",
+  lily: "pFZP5JQG7iQjIQuC4Bku",
+  matilda: "XrExE9yKIg1WjnnlVkGX",
+  river: "SAz9YHcvj6GT2YYXdXww",
+  roger: "CwhRBWXzGAHq8TQ4Fs17",
+  sarah: "EXAVITQu4vr4xnSDxMaL",
+  will: "bIHbv24MWmeRgasZH58o",
+  // Legacy
+  rachel: "21m00Tcm4TlvDq8ikWAM",
 };
 const TTS_MODELS: Record<string, string> = {

package/src/ai-sdk/providers/fal.ts CHANGED Viewed

@@ -15,6 +15,7 @@ import pMap from "p-map";
 import type { CacheStorage } from "../cache";
 import { fileCache } from "../file-cache";
 import type { VideoModelV3, VideoModelV3CallOptions } from "../video-model";
+import { normalizeProviderInput } from "./model-rules";
 interface PendingRequest {
   request_id: string;
@@ -640,35 +641,22 @@ class FalVideoModel implements VideoModelV3 {
         if (input.video_size === undefined) {
           input.video_size = "auto";
         }
-      } else if (isKlingV3 || isKlingV26) {
-        // Duration must be string for Kling v2.6+ and O3 (v3)
-        input.duration = String(duration ?? 5);
-      } else if (isGrokImagine) {
-        // Grok Imagine: duration 1-15 seconds (default 6)
-        input.duration = duration ?? 6;
-        // Grok Imagine supports resolution: "480p", "720p" (default "720p")
-        if (!input.resolution) {
-          input.resolution = "720p";
-        }
-      } else if (isSora2) {
-        // Sora 2: only supports 4, 8, 12, 16, 20 second durations
-        const allowedDurations = [4, 8, 12, 16, 20];
-        const d = duration ?? 4;
-        if (!allowedDurations.includes(d)) {
-          warnings.push({
-            type: "other",
-            message: `Sora 2 only supports durations: ${allowedDurations.join(", ")}s. Got ${d}s, defaulting to 4s.`,
-          });
-          input.duration = 4;
-        } else {
-          input.duration = d;
-        }
-        // Disable video deletion so generated video URLs remain accessible
-        if (input.delete_video === undefined) {
-          input.delete_video = false;
-        }
       } else {
-        input.duration = duration ?? 5;
+        // Apply model-specific duration normalization via Zod schemas
+        // (clamp to valid range, round floats, convert type e.g. number → string for Kling v3)
+        const normalized = normalizeProviderInput(this.modelId, { duration });
+        input.duration = normalized.duration;
+        // Model-specific non-duration defaults
+        if (isGrokImagine) {
+          if (!input.resolution) {
+            input.resolution = "720p";
+          }
+        } else if (isSora2) {
+          if (input.delete_video === undefined) {
+            input.delete_video = false;
+          }
+        }
       }
       if (hasImageInput && files) {

package/src/ai-sdk/providers/model-rules.ts ADDED Viewed

@@ -0,0 +1,129 @@
+/**
+ * Per-model provider input validation schemas.
+ *
+ * Each model that has provider-specific input constraints (duration format,
+ * allowed ranges, type coercion) gets a Zod schema here. The schemas use
+ * `.transform()` to auto-fix invalid inputs — rounding floats, clamping to
+ * valid ranges, and converting types (e.g. number → string for Kling v3).
+ *
+ * Usage:
+ *   const fixed = normalizeProviderInput("kling-v3", { duration: 2.34 });
+ *   // → { duration: "3" }  (rounded to 2, clamped to min 3, stringified)
+ *
+ * NOTE: This file is kept in sync with gateway/packages/schemas/src/model-rules.ts.
+ *       When adding new model rules, update both files.
+ */
+import { z } from "zod";
+// ---------------------------------------------------------------------------
+// Duration schema builders
+// ---------------------------------------------------------------------------
+/** Duration as string integer clamped to [min, max]. Accepts number, outputs string. */
+function stringIntDuration(min: number, max: number, defaultVal: number) {
+  return z
+    .number()
+    .optional()
+    .transform((v) =>
+      String(Math.max(min, Math.min(max, Math.round(v ?? defaultVal)))),
+    );
+}
+/** Duration snapped to nearest allowed value. Accepts number, outputs number. */
+function enumDuration(allowed: number[], defaultVal: number) {
+  return z
+    .number()
+    .optional()
+    .transform((v) => {
+      const raw = v ?? defaultVal;
+      return allowed.reduce((prev, curr) =>
+        Math.abs(curr - raw) < Math.abs(prev - raw) ? curr : prev,
+      );
+    });
+}
+/** Duration as integer clamped to [min, max]. Accepts number, outputs number. */
+function rangeDuration(min: number, max: number, defaultVal: number) {
+  return z
+    .number()
+    .optional()
+    .transform((v) =>
+      Math.max(min, Math.min(max, Math.round(v ?? defaultVal))),
+    );
+}
+/** Passthrough duration rounded to integer. */
+function intDuration(defaultVal: number) {
+  return z
+    .number()
+    .optional()
+    .transform((v) => Math.round(v ?? defaultVal));
+}
+// ---------------------------------------------------------------------------
+// Per-model provider input schemas
+// ---------------------------------------------------------------------------
+const ModelDurationRules: Record<string, z.ZodType> = {
+  // Kling O3 (v3): fal expects string integer "3"–"15"
+  "kling-v3": z.object({ duration: stringIntDuration(3, 15, 5) }),
+  "kling-v3-standard": z.object({ duration: stringIntDuration(3, 15, 5) }),
+  // Kling v2.6: same rules as v3
+  "kling-v2.6": z.object({ duration: stringIntDuration(3, 15, 5) }),
+  // Kling legacy: exactly 5 or 10
+  "kling-v2.5": z.object({ duration: enumDuration([5, 10], 5) }),
+  "kling-v2.1": z.object({ duration: enumDuration([5, 10], 5) }),
+  "kling-v2": z.object({ duration: enumDuration([5, 10], 5) }),
+  // Wan: 5 or 10
+  "wan-2.5": z.object({ duration: enumDuration([5, 10], 5) }),
+  "wan-2.5-preview": z.object({ duration: enumDuration([5, 10], 5) }),
+  // Minimax: round to integer
+  minimax: z.object({ duration: intDuration(5) }),
+  // Grok Imagine: integer 1–15
+  "grok-imagine": z.object({ duration: rangeDuration(1, 15, 6) }),
+  // Sora 2: only 4, 8, 12, 16, 20
+  "sora-2": z.object({ duration: enumDuration([4, 8, 12, 16, 20], 4) }),
+  "sora-2-pro": z.object({ duration: enumDuration([4, 8, 12, 16, 20], 4) }),
+  // Seedance (piapi): 5, 10, or 15
+  "seedance-2-preview": z.object({ duration: enumDuration([5, 10, 15], 5) }),
+  "seedance-2-fast-preview": z.object({
+    duration: enumDuration([5, 10, 15], 5),
+  }),
+};
+// ---------------------------------------------------------------------------
+// Public API
+// ---------------------------------------------------------------------------
+/**
+ * Normalize provider input for a given model.
+ *
+ * Validates and transforms fields (currently `duration`) to match what the
+ * provider API expects — correct type, clamped to valid range, rounded to
+ * integer.
+ *
+ * - Unknown models: input returned as-is (passthrough).
+ * - Parse failures: input returned as-is (defensive — never throws).
+ */
+export function normalizeProviderInput(
+  model: string,
+  input: Record<string, unknown>,
+): Record<string, unknown> {
+  const schema = ModelDurationRules[model];
+  if (!schema) return input;
+  const result = schema.safeParse({ duration: input.duration });
+  if (!result.success) return input;
+  return { ...input, ...(result.data as Record<string, unknown>) };
+}
+export { ModelDurationRules };

package/src/definitions/actions/voice.ts CHANGED Viewed

@@ -62,16 +62,30 @@ export interface VoiceResult {
 // Voice name to ID mapping. Unknown names pass through as voice_ids.
 const VOICE_MAP: Record<string, string> = {
-  rachel: VOICES.RACHEL,
-  domi: VOICES.DOMI,
-  sarah: VOICES.SARAH,
-  bella: VOICES.BELLA,
-  antoni: VOICES.ANTONI,
-  elli: VOICES.ELLI,
-  josh: VOICES.JOSH,
-  arnold: VOICES.ARNOLD,
+  // Current ElevenLabs premade voices (source: skills/varg-ai/references/models.md)
   adam: VOICES.ADAM,
-  sam: VOICES.SAM,
+  alice: VOICES.ALICE,
+  bella: VOICES.BELLA,
+  bill: VOICES.BILL,
+  brian: VOICES.BRIAN,
+  callum: VOICES.CALLUM,
+  charlie: VOICES.CHARLIE,
+  chris: VOICES.CHRIS,
+  daniel: VOICES.DANIEL,
+  eric: VOICES.ERIC,
+  george: VOICES.GEORGE,
+  harry: VOICES.HARRY,
+  jessica: VOICES.JESSICA,
+  laura: VOICES.LAURA,
+  liam: VOICES.LIAM,
+  lily: VOICES.LILY,
+  matilda: VOICES.MATILDA,
+  river: VOICES.RIVER,
+  roger: VOICES.ROGER,
+  sarah: VOICES.SARAH,
+  will: VOICES.WILL,
+  // Legacy
+  rachel: VOICES.RACHEL,
 };
 export async function generateVoice(

package/src/providers/elevenlabs.ts CHANGED Viewed

@@ -192,16 +192,30 @@ export class ElevenLabsProvider extends BaseProvider {
  * call the gateway's GET /v1/voices endpoint to browse/search.
  */
 export const VOICES = {
-  RACHEL: "21m00Tcm4TlvDq8ikWAM",
-  DOMI: "AZnzlk1XvdvUeBnXmlld",
-  SARAH: "EXAVITQu4vr4xnSDxMaL",
-  BELLA: "EXAVITQu4vr4xnSDxMaL", // alias — ElevenLabs calls this voice "Sarah"
-  ANTONI: "ErXwobaYiN019PkySvjV",
-  ELLI: "MF3mGyEYCl7XYWbV9V6O",
-  JOSH: "TxGEqnHWrfWFTfGW9XjX",
-  ARNOLD: "VR6AewLTigWG4xSOukaG",
+  // Current ElevenLabs premade voices (source: skills/varg-ai/references/models.md)
   ADAM: "pNInz6obpgDQGcFmaJgB",
-  SAM: "yoZ06aMxZJJ28mfd3POQ",
+  ALICE: "Xb7hH8MSUJpSbSDYk0k2",
+  BELLA: "hpp4J3VqNfWAUOO0d1Us",
+  BILL: "pqHfZKP75CvOlQylNhV4",
+  BRIAN: "nPczCjzI2devNBz1zQrb",
+  CALLUM: "N2lVS1w4EtoT3dr4eOWO",
+  CHARLIE: "IKne3meq5aSn9XLyUdCD",
+  CHRIS: "iP95p4xoKVk53GoZ742B",
+  DANIEL: "onwK4e9ZLuTAKqWW03F9",
+  ERIC: "cjVigY5qzO86Huf0OWal",
+  GEORGE: "JBFqnCBsd6RMkjVDRZzb",
+  HARRY: "SOYHLrjzK2X1ezoPC6cr",
+  JESSICA: "cgSgspJ2msm6clMCkdW9",
+  LAURA: "FGY2WhTYpPnrIDTdsKH5",
+  LIAM: "TX3LPaxmHKxFdv7VOQHJ",
+  LILY: "pFZP5JQG7iQjIQuC4Bku",
+  MATILDA: "XrExE9yKIg1WjnnlVkGX",
+  RIVER: "SAz9YHcvj6GT2YYXdXww",
+  ROGER: "CwhRBWXzGAHq8TQ4Fs17",
+  SARAH: "EXAVITQu4vr4xnSDxMaL",
+  WILL: "bIHbv24MWmeRgasZH58o",
+  // Legacy
+  RACHEL: "21m00Tcm4TlvDq8ikWAM",
 };
 // Export singleton instance (lazy initialization means no error on import)

package/src/react/renderers/clip.ts CHANGED Viewed

@@ -92,24 +92,25 @@ async function renderClipLayers(
         const props = element.props as VideoProps;
         pending.push({
           type: "async",
-          promise: renderVideo(element as VargElement<"video">, ctx)
-            .then((file) => ctx.backend.resolvePath(file))
-            .then(
-              (path) =>
-                ({
-                  type: "video",
-                  path,
-                  resizeMode: props.resize,
-                  cropPosition: props.cropPosition,
-                  cutFrom: props.cutFrom ?? clipOptions?.cutFrom,
-                  cutTo: props.cutTo ?? clipOptions?.cutTo,
-                  mixVolume: props.keepAudio ? (props.volume ?? 1) : 0,
-                  left: props.left,
-                  top: props.top,
-                  width: props.width,
-                  height: props.height,
-                }) as VideoLayer,
-            ),
+          promise: renderVideo(element as VargElement<"video">, ctx).then(
+            async (file) => {
+              const path = await ctx.backend.resolvePath(file);
+              return {
+                type: "video",
+                path,
+                sourceDuration: file.duration,
+                resizeMode: props.resize,
+                cropPosition: props.cropPosition,
+                cutFrom: props.cutFrom ?? clipOptions?.cutFrom,
+                cutTo: props.cutTo ?? clipOptions?.cutTo,
+                mixVolume: props.keepAudio ? (props.volume ?? 1) : 0,
+                left: props.left,
+                top: props.top,
+                width: props.width,
+                height: props.height,
+              } as VideoLayer;
+            },
+          ),
         });
         break;
       }
@@ -158,17 +159,16 @@ async function renderClipLayers(
           promise: renderTalkingHead(
             element as VargElement<"talking-head">,
             ctx,
-          )
-            .then((file) => ctx.backend.resolvePath(file))
-            .then(
-              (path) =>
-                ({
-                  type: "video",
-                  path,
-                  resizeMode: "cover",
-                  mixVolume: 1,
-                }) as VideoLayer,
-            ),
+          ).then(async (file) => {
+            const path = await ctx.backend.resolvePath(file);
+            return {
+              type: "video",
+              path,
+              sourceDuration: file.duration,
+              resizeMode: "cover",
+              mixVolume: 1,
+            } as VideoLayer;
+          }),
         });
         break;
       }

package/src/react/renderers/video.ts CHANGED Viewed

@@ -170,6 +170,7 @@ export async function renderVideo(
       type: "video",
       model: modelId,
       prompt: promptText,
+      duration: props.duration,
     });
     if (!file.url && ctx.storage) {