vargai 0.4.0-alpha100 → 0.4.0-alpha101

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -315,6 +315,13 @@ See the [BYOK docs](https://docs.varg.ai/sdk/byok) for details.
315
315
 
316
316
  A typical 3-clip video costs $2-5. Cache hits are always free.
317
317
 
318
+ ## Star History
319
+
320
+ <img width="2832" height="2253" alt="star-history-202643" src="https://github.com/user-attachments/assets/63e84279-d756-43a9-b328-118fb69ed2d5" />
321
+
322
+
323
+
324
+
318
325
  ## Contributing
319
326
 
320
327
  See [CONTRIBUTING.md](CONTRIBUTING.md) for development setup.
package/package.json CHANGED
@@ -104,7 +104,7 @@
104
104
  "license": "Apache-2.0",
105
105
  "author": "varg.ai <hello@varg.ai> (https://varg.ai)",
106
106
  "sideEffects": false,
107
- "version": "0.4.0-alpha100",
107
+ "version": "0.4.0-alpha101",
108
108
  "exports": {
109
109
  ".": "./src/index.ts",
110
110
  "./ai": "./src/ai-sdk/index.ts",
@@ -17,6 +17,8 @@ export interface FileMetadata {
17
17
  model?: string;
18
18
  /** Original prompt used */
19
19
  prompt?: string;
20
+ /** Duration in seconds (for video/audio files) */
21
+ duration?: number;
20
22
  }
21
23
 
22
24
  export class File {
@@ -158,11 +160,16 @@ export class File {
158
160
  return this._url;
159
161
  }
160
162
 
161
- /** Get file metadata (type, model, prompt) */
163
+ /** Get file metadata (type, model, prompt, duration) */
162
164
  get metadata(): FileMetadata {
163
165
  return this._metadata;
164
166
  }
165
167
 
168
+ /** Duration in seconds (for video/audio files) */
169
+ get duration(): number | undefined {
170
+ return this._metadata.duration;
171
+ }
172
+
166
173
  /** Set metadata and return this for chaining */
167
174
  withMetadata(metadata: FileMetadata): this {
168
175
  this._metadata = { ...this._metadata, ...metadata };
@@ -99,12 +99,34 @@ async function processClips(
99
99
  let duration = clip.duration ?? defaultDuration;
100
100
 
101
101
  for (const layer of layers) {
102
- if (layer.type === "video" && !clip.duration) {
102
+ if (layer.type === "video") {
103
103
  const videoLayer = layer as VideoLayer;
104
- const videoDuration = await getVideoDuration(videoLayer.path, backend);
104
+
105
+ // Use pre-propagated duration when available (avoids ffprobe HTTP
106
+ // round-trip for remote URLs). Fall back to ffprobe otherwise.
107
+ const videoDuration =
108
+ videoLayer.sourceDuration ??
109
+ (await getVideoDuration(videoLayer.path, backend));
110
+
105
111
  const cutFrom = videoLayer.cutFrom ?? 0;
106
- const cutTo = videoLayer.cutTo ?? videoDuration;
107
- duration = cutTo - cutFrom;
112
+ const cutTo = Math.min(
113
+ videoLayer.cutTo ?? videoDuration,
114
+ videoDuration,
115
+ );
116
+
117
+ // Clamp the layer's cutTo so the FFmpeg trim filter also respects
118
+ // the actual source duration (prevents freeze frames at the tail)
119
+ videoLayer.cutTo = cutTo;
120
+
121
+ const effectiveDuration = cutTo - cutFrom;
122
+ if (!clip.duration) {
123
+ // No explicit duration — derive from the video layer
124
+ duration = effectiveDuration;
125
+ } else if (effectiveDuration < duration) {
126
+ // Explicit duration exceeds actual video length — clamp to avoid
127
+ // freeze frames and xfade offset misalignment
128
+ duration = effectiveDuration;
129
+ }
108
130
  break;
109
131
  }
110
132
  }
@@ -117,6 +117,8 @@ export interface VideoLayer extends BaseLayer {
117
117
  cropPosition?: CropPosition;
118
118
  cutFrom?: number;
119
119
  cutTo?: number;
120
+ /** Known source video duration in seconds (avoids ffprobe when set). */
121
+ sourceDuration?: number;
120
122
  width?: SizeValue;
121
123
  height?: SizeValue;
122
124
  left?: SizeValue;
@@ -21,16 +21,30 @@ import type { MusicModelV3, MusicModelV3CallOptions } from "../music-model";
21
21
  * call the gateway's GET /v1/voices endpoint to browse/search.
22
22
  */
23
23
  const VOICES: Record<string, string> = {
24
- rachel: "21m00Tcm4TlvDq8ikWAM",
25
- domi: "AZnzlk1XvdvUeBnXmlld",
26
- sarah: "EXAVITQu4vr4xnSDxMaL",
27
- bella: "EXAVITQu4vr4xnSDxMaL", // alias — ElevenLabs calls this voice "Sarah"
28
- antoni: "ErXwobaYiN019PkySvjV",
29
- elli: "MF3mGyEYCl7XYWbV9V6O",
30
- josh: "TxGEqnHWrfWFTfGW9XjX",
31
- arnold: "VR6AewLTigWG4xSOukaG",
24
+ // Current ElevenLabs premade voices (source: skills/varg-ai/references/models.md)
32
25
  adam: "pNInz6obpgDQGcFmaJgB",
33
- sam: "yoZ06aMxZJJ28mfd3POQ",
26
+ alice: "Xb7hH8MSUJpSbSDYk0k2",
27
+ bella: "hpp4J3VqNfWAUOO0d1Us",
28
+ bill: "pqHfZKP75CvOlQylNhV4",
29
+ brian: "nPczCjzI2devNBz1zQrb",
30
+ callum: "N2lVS1w4EtoT3dr4eOWO",
31
+ charlie: "IKne3meq5aSn9XLyUdCD",
32
+ chris: "iP95p4xoKVk53GoZ742B",
33
+ daniel: "onwK4e9ZLuTAKqWW03F9",
34
+ eric: "cjVigY5qzO86Huf0OWal",
35
+ george: "JBFqnCBsd6RMkjVDRZzb",
36
+ harry: "SOYHLrjzK2X1ezoPC6cr",
37
+ jessica: "cgSgspJ2msm6clMCkdW9",
38
+ laura: "FGY2WhTYpPnrIDTdsKH5",
39
+ liam: "TX3LPaxmHKxFdv7VOQHJ",
40
+ lily: "pFZP5JQG7iQjIQuC4Bku",
41
+ matilda: "XrExE9yKIg1WjnnlVkGX",
42
+ river: "SAz9YHcvj6GT2YYXdXww",
43
+ roger: "CwhRBWXzGAHq8TQ4Fs17",
44
+ sarah: "EXAVITQu4vr4xnSDxMaL",
45
+ will: "bIHbv24MWmeRgasZH58o",
46
+ // Legacy
47
+ rachel: "21m00Tcm4TlvDq8ikWAM",
34
48
  };
35
49
 
36
50
  const TTS_MODELS: Record<string, string> = {
@@ -15,6 +15,7 @@ import pMap from "p-map";
15
15
  import type { CacheStorage } from "../cache";
16
16
  import { fileCache } from "../file-cache";
17
17
  import type { VideoModelV3, VideoModelV3CallOptions } from "../video-model";
18
+ import { normalizeProviderInput } from "./model-rules";
18
19
 
19
20
  interface PendingRequest {
20
21
  request_id: string;
@@ -640,35 +641,22 @@ class FalVideoModel implements VideoModelV3 {
640
641
  if (input.video_size === undefined) {
641
642
  input.video_size = "auto";
642
643
  }
643
- } else if (isKlingV3 || isKlingV26) {
644
- // Duration must be string for Kling v2.6+ and O3 (v3)
645
- input.duration = String(duration ?? 5);
646
- } else if (isGrokImagine) {
647
- // Grok Imagine: duration 1-15 seconds (default 6)
648
- input.duration = duration ?? 6;
649
- // Grok Imagine supports resolution: "480p", "720p" (default "720p")
650
- if (!input.resolution) {
651
- input.resolution = "720p";
652
- }
653
- } else if (isSora2) {
654
- // Sora 2: only supports 4, 8, 12, 16, 20 second durations
655
- const allowedDurations = [4, 8, 12, 16, 20];
656
- const d = duration ?? 4;
657
- if (!allowedDurations.includes(d)) {
658
- warnings.push({
659
- type: "other",
660
- message: `Sora 2 only supports durations: ${allowedDurations.join(", ")}s. Got ${d}s, defaulting to 4s.`,
661
- });
662
- input.duration = 4;
663
- } else {
664
- input.duration = d;
665
- }
666
- // Disable video deletion so generated video URLs remain accessible
667
- if (input.delete_video === undefined) {
668
- input.delete_video = false;
669
- }
670
644
  } else {
671
- input.duration = duration ?? 5;
645
+ // Apply model-specific duration normalization via Zod schemas
646
+ // (clamp to valid range, round floats, convert type e.g. number → string for Kling v3)
647
+ const normalized = normalizeProviderInput(this.modelId, { duration });
648
+ input.duration = normalized.duration;
649
+
650
+ // Model-specific non-duration defaults
651
+ if (isGrokImagine) {
652
+ if (!input.resolution) {
653
+ input.resolution = "720p";
654
+ }
655
+ } else if (isSora2) {
656
+ if (input.delete_video === undefined) {
657
+ input.delete_video = false;
658
+ }
659
+ }
672
660
  }
673
661
 
674
662
  if (hasImageInput && files) {
@@ -0,0 +1,129 @@
1
+ /**
2
+ * Per-model provider input validation schemas.
3
+ *
4
+ * Each model that has provider-specific input constraints (duration format,
5
+ * allowed ranges, type coercion) gets a Zod schema here. The schemas use
6
+ * `.transform()` to auto-fix invalid inputs — rounding floats, clamping to
7
+ * valid ranges, and converting types (e.g. number → string for Kling v3).
8
+ *
9
+ * Usage:
10
+ * const fixed = normalizeProviderInput("kling-v3", { duration: 2.34 });
11
+ * // → { duration: "3" } (rounded to 2, clamped to min 3, stringified)
12
+ *
13
+ * NOTE: This file is kept in sync with gateway/packages/schemas/src/model-rules.ts.
14
+ * When adding new model rules, update both files.
15
+ */
16
+
17
+ import { z } from "zod";
18
+
19
+ // ---------------------------------------------------------------------------
20
+ // Duration schema builders
21
+ // ---------------------------------------------------------------------------
22
+
23
+ /** Duration as string integer clamped to [min, max]. Accepts number, outputs string. */
24
+ function stringIntDuration(min: number, max: number, defaultVal: number) {
25
+ return z
26
+ .number()
27
+ .optional()
28
+ .transform((v) =>
29
+ String(Math.max(min, Math.min(max, Math.round(v ?? defaultVal)))),
30
+ );
31
+ }
32
+
33
+ /** Duration snapped to nearest allowed value. Accepts number, outputs number. */
34
+ function enumDuration(allowed: number[], defaultVal: number) {
35
+ return z
36
+ .number()
37
+ .optional()
38
+ .transform((v) => {
39
+ const raw = v ?? defaultVal;
40
+ return allowed.reduce((prev, curr) =>
41
+ Math.abs(curr - raw) < Math.abs(prev - raw) ? curr : prev,
42
+ );
43
+ });
44
+ }
45
+
46
+ /** Duration as integer clamped to [min, max]. Accepts number, outputs number. */
47
+ function rangeDuration(min: number, max: number, defaultVal: number) {
48
+ return z
49
+ .number()
50
+ .optional()
51
+ .transform((v) =>
52
+ Math.max(min, Math.min(max, Math.round(v ?? defaultVal))),
53
+ );
54
+ }
55
+
56
+ /** Passthrough duration rounded to integer. */
57
+ function intDuration(defaultVal: number) {
58
+ return z
59
+ .number()
60
+ .optional()
61
+ .transform((v) => Math.round(v ?? defaultVal));
62
+ }
63
+
64
+ // ---------------------------------------------------------------------------
65
+ // Per-model provider input schemas
66
+ // ---------------------------------------------------------------------------
67
+
68
+ const ModelDurationRules: Record<string, z.ZodType> = {
69
+ // Kling O3 (v3): fal expects string integer "3"–"15"
70
+ "kling-v3": z.object({ duration: stringIntDuration(3, 15, 5) }),
71
+ "kling-v3-standard": z.object({ duration: stringIntDuration(3, 15, 5) }),
72
+
73
+ // Kling v2.6: same rules as v3
74
+ "kling-v2.6": z.object({ duration: stringIntDuration(3, 15, 5) }),
75
+
76
+ // Kling legacy: exactly 5 or 10
77
+ "kling-v2.5": z.object({ duration: enumDuration([5, 10], 5) }),
78
+ "kling-v2.1": z.object({ duration: enumDuration([5, 10], 5) }),
79
+ "kling-v2": z.object({ duration: enumDuration([5, 10], 5) }),
80
+
81
+ // Wan: 5 or 10
82
+ "wan-2.5": z.object({ duration: enumDuration([5, 10], 5) }),
83
+ "wan-2.5-preview": z.object({ duration: enumDuration([5, 10], 5) }),
84
+
85
+ // Minimax: round to integer
86
+ minimax: z.object({ duration: intDuration(5) }),
87
+
88
+ // Grok Imagine: integer 1–15
89
+ "grok-imagine": z.object({ duration: rangeDuration(1, 15, 6) }),
90
+
91
+ // Sora 2: only 4, 8, 12, 16, 20
92
+ "sora-2": z.object({ duration: enumDuration([4, 8, 12, 16, 20], 4) }),
93
+ "sora-2-pro": z.object({ duration: enumDuration([4, 8, 12, 16, 20], 4) }),
94
+
95
+ // Seedance (piapi): 5, 10, or 15
96
+ "seedance-2-preview": z.object({ duration: enumDuration([5, 10, 15], 5) }),
97
+ "seedance-2-fast-preview": z.object({
98
+ duration: enumDuration([5, 10, 15], 5),
99
+ }),
100
+ };
101
+
102
+ // ---------------------------------------------------------------------------
103
+ // Public API
104
+ // ---------------------------------------------------------------------------
105
+
106
+ /**
107
+ * Normalize provider input for a given model.
108
+ *
109
+ * Validates and transforms fields (currently `duration`) to match what the
110
+ * provider API expects — correct type, clamped to valid range, rounded to
111
+ * integer.
112
+ *
113
+ * - Unknown models: input returned as-is (passthrough).
114
+ * - Parse failures: input returned as-is (defensive — never throws).
115
+ */
116
+ export function normalizeProviderInput(
117
+ model: string,
118
+ input: Record<string, unknown>,
119
+ ): Record<string, unknown> {
120
+ const schema = ModelDurationRules[model];
121
+ if (!schema) return input;
122
+
123
+ const result = schema.safeParse({ duration: input.duration });
124
+ if (!result.success) return input;
125
+
126
+ return { ...input, ...(result.data as Record<string, unknown>) };
127
+ }
128
+
129
+ export { ModelDurationRules };
@@ -62,16 +62,30 @@ export interface VoiceResult {
62
62
 
63
63
  // Voice name to ID mapping. Unknown names pass through as voice_ids.
64
64
  const VOICE_MAP: Record<string, string> = {
65
- rachel: VOICES.RACHEL,
66
- domi: VOICES.DOMI,
67
- sarah: VOICES.SARAH,
68
- bella: VOICES.BELLA,
69
- antoni: VOICES.ANTONI,
70
- elli: VOICES.ELLI,
71
- josh: VOICES.JOSH,
72
- arnold: VOICES.ARNOLD,
65
+ // Current ElevenLabs premade voices (source: skills/varg-ai/references/models.md)
73
66
  adam: VOICES.ADAM,
74
- sam: VOICES.SAM,
67
+ alice: VOICES.ALICE,
68
+ bella: VOICES.BELLA,
69
+ bill: VOICES.BILL,
70
+ brian: VOICES.BRIAN,
71
+ callum: VOICES.CALLUM,
72
+ charlie: VOICES.CHARLIE,
73
+ chris: VOICES.CHRIS,
74
+ daniel: VOICES.DANIEL,
75
+ eric: VOICES.ERIC,
76
+ george: VOICES.GEORGE,
77
+ harry: VOICES.HARRY,
78
+ jessica: VOICES.JESSICA,
79
+ laura: VOICES.LAURA,
80
+ liam: VOICES.LIAM,
81
+ lily: VOICES.LILY,
82
+ matilda: VOICES.MATILDA,
83
+ river: VOICES.RIVER,
84
+ roger: VOICES.ROGER,
85
+ sarah: VOICES.SARAH,
86
+ will: VOICES.WILL,
87
+ // Legacy
88
+ rachel: VOICES.RACHEL,
75
89
  };
76
90
 
77
91
  export async function generateVoice(
@@ -192,16 +192,30 @@ export class ElevenLabsProvider extends BaseProvider {
192
192
  * call the gateway's GET /v1/voices endpoint to browse/search.
193
193
  */
194
194
  export const VOICES = {
195
- RACHEL: "21m00Tcm4TlvDq8ikWAM",
196
- DOMI: "AZnzlk1XvdvUeBnXmlld",
197
- SARAH: "EXAVITQu4vr4xnSDxMaL",
198
- BELLA: "EXAVITQu4vr4xnSDxMaL", // alias — ElevenLabs calls this voice "Sarah"
199
- ANTONI: "ErXwobaYiN019PkySvjV",
200
- ELLI: "MF3mGyEYCl7XYWbV9V6O",
201
- JOSH: "TxGEqnHWrfWFTfGW9XjX",
202
- ARNOLD: "VR6AewLTigWG4xSOukaG",
195
+ // Current ElevenLabs premade voices (source: skills/varg-ai/references/models.md)
203
196
  ADAM: "pNInz6obpgDQGcFmaJgB",
204
- SAM: "yoZ06aMxZJJ28mfd3POQ",
197
+ ALICE: "Xb7hH8MSUJpSbSDYk0k2",
198
+ BELLA: "hpp4J3VqNfWAUOO0d1Us",
199
+ BILL: "pqHfZKP75CvOlQylNhV4",
200
+ BRIAN: "nPczCjzI2devNBz1zQrb",
201
+ CALLUM: "N2lVS1w4EtoT3dr4eOWO",
202
+ CHARLIE: "IKne3meq5aSn9XLyUdCD",
203
+ CHRIS: "iP95p4xoKVk53GoZ742B",
204
+ DANIEL: "onwK4e9ZLuTAKqWW03F9",
205
+ ERIC: "cjVigY5qzO86Huf0OWal",
206
+ GEORGE: "JBFqnCBsd6RMkjVDRZzb",
207
+ HARRY: "SOYHLrjzK2X1ezoPC6cr",
208
+ JESSICA: "cgSgspJ2msm6clMCkdW9",
209
+ LAURA: "FGY2WhTYpPnrIDTdsKH5",
210
+ LIAM: "TX3LPaxmHKxFdv7VOQHJ",
211
+ LILY: "pFZP5JQG7iQjIQuC4Bku",
212
+ MATILDA: "XrExE9yKIg1WjnnlVkGX",
213
+ RIVER: "SAz9YHcvj6GT2YYXdXww",
214
+ ROGER: "CwhRBWXzGAHq8TQ4Fs17",
215
+ SARAH: "EXAVITQu4vr4xnSDxMaL",
216
+ WILL: "bIHbv24MWmeRgasZH58o",
217
+ // Legacy
218
+ RACHEL: "21m00Tcm4TlvDq8ikWAM",
205
219
  };
206
220
 
207
221
  // Export singleton instance (lazy initialization means no error on import)
@@ -92,24 +92,25 @@ async function renderClipLayers(
92
92
  const props = element.props as VideoProps;
93
93
  pending.push({
94
94
  type: "async",
95
- promise: renderVideo(element as VargElement<"video">, ctx)
96
- .then((file) => ctx.backend.resolvePath(file))
97
- .then(
98
- (path) =>
99
- ({
100
- type: "video",
101
- path,
102
- resizeMode: props.resize,
103
- cropPosition: props.cropPosition,
104
- cutFrom: props.cutFrom ?? clipOptions?.cutFrom,
105
- cutTo: props.cutTo ?? clipOptions?.cutTo,
106
- mixVolume: props.keepAudio ? (props.volume ?? 1) : 0,
107
- left: props.left,
108
- top: props.top,
109
- width: props.width,
110
- height: props.height,
111
- }) as VideoLayer,
112
- ),
95
+ promise: renderVideo(element as VargElement<"video">, ctx).then(
96
+ async (file) => {
97
+ const path = await ctx.backend.resolvePath(file);
98
+ return {
99
+ type: "video",
100
+ path,
101
+ sourceDuration: file.duration,
102
+ resizeMode: props.resize,
103
+ cropPosition: props.cropPosition,
104
+ cutFrom: props.cutFrom ?? clipOptions?.cutFrom,
105
+ cutTo: props.cutTo ?? clipOptions?.cutTo,
106
+ mixVolume: props.keepAudio ? (props.volume ?? 1) : 0,
107
+ left: props.left,
108
+ top: props.top,
109
+ width: props.width,
110
+ height: props.height,
111
+ } as VideoLayer;
112
+ },
113
+ ),
113
114
  });
114
115
  break;
115
116
  }
@@ -158,17 +159,16 @@ async function renderClipLayers(
158
159
  promise: renderTalkingHead(
159
160
  element as VargElement<"talking-head">,
160
161
  ctx,
161
- )
162
- .then((file) => ctx.backend.resolvePath(file))
163
- .then(
164
- (path) =>
165
- ({
166
- type: "video",
167
- path,
168
- resizeMode: "cover",
169
- mixVolume: 1,
170
- }) as VideoLayer,
171
- ),
162
+ ).then(async (file) => {
163
+ const path = await ctx.backend.resolvePath(file);
164
+ return {
165
+ type: "video",
166
+ path,
167
+ sourceDuration: file.duration,
168
+ resizeMode: "cover",
169
+ mixVolume: 1,
170
+ } as VideoLayer;
171
+ }),
172
172
  });
173
173
  break;
174
174
  }
@@ -170,6 +170,7 @@ export async function renderVideo(
170
170
  type: "video",
171
171
  model: modelId,
172
172
  prompt: promptText,
173
+ duration: props.duration,
173
174
  });
174
175
 
175
176
  if (!file.url && ctx.storage) {