vargai 0.4.0-alpha13 → 0.4.0-alpha15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -64,7 +64,7 @@
64
64
  "replicate": "^1.4.0",
65
65
  "zod": "^4.2.1"
66
66
  },
67
- "version": "0.4.0-alpha13",
67
+ "version": "0.4.0-alpha15",
68
68
  "exports": {
69
69
  ".": "./src/index.ts",
70
70
  "./ai": "./src/ai-sdk/index.ts",
@@ -137,6 +137,15 @@ function isOverlayLayer(layer: Layer): boolean {
137
137
  return isVideoOverlayLayer(layer) || isImageOverlayLayer(layer);
138
138
  }
139
139
 
140
+ function isTextOverlayLayer(layer: Layer): boolean {
141
+ return (
142
+ layer.type === "title" ||
143
+ layer.type === "subtitle" ||
144
+ layer.type === "news-title" ||
145
+ layer.type === "slide-in-text"
146
+ );
147
+ }
148
+
140
149
  function buildBaseClipFilter(
141
150
  clip: ProcessedClip,
142
151
  clipIndex: number,
@@ -164,7 +173,10 @@ function buildBaseClipFilter(
164
173
  let baseLabel = "";
165
174
  let inputIdx = inputOffset;
166
175
 
167
- const baseLayers = clip.layers.filter((l) => l && !isOverlayLayer(l));
176
+ // Filter out overlay layers AND text overlay layers (text will be applied after image overlays)
177
+ const baseLayers = clip.layers.filter(
178
+ (l) => l && !isOverlayLayer(l) && !isTextOverlayLayer(l),
179
+ );
168
180
 
169
181
  for (let i = 0; i < baseLayers.length; i++) {
170
182
  const layer = baseLayers[i];
@@ -201,58 +213,6 @@ function buildBaseClipFilter(
201
213
  inputIdx++;
202
214
  }
203
215
  }
204
-
205
- if (layer.type === "title") {
206
- const titleFilter = getTitleFilter(
207
- layer as TitleLayer,
208
- baseLabel,
209
- width,
210
- height,
211
- clip.duration,
212
- );
213
- const newLabel = `title${clipIndex}_${i}`;
214
- filters.push(`${titleFilter}[${newLabel}]`);
215
- baseLabel = newLabel;
216
- }
217
-
218
- if (layer.type === "subtitle") {
219
- const subtitleFilter = getSubtitleFilter(
220
- layer as SubtitleLayer,
221
- baseLabel,
222
- width,
223
- height,
224
- clip.duration,
225
- );
226
- const newLabel = `sub${clipIndex}_${i}`;
227
- filters.push(`${subtitleFilter}[${newLabel}]`);
228
- baseLabel = newLabel;
229
- }
230
-
231
- if (layer.type === "news-title") {
232
- const newsFilter = getNewsTitleFilter(
233
- layer as NewsTitleLayer,
234
- baseLabel,
235
- width,
236
- height,
237
- clip.duration,
238
- );
239
- const newLabel = `news${clipIndex}_${i}`;
240
- filters.push(`${newsFilter}[${newLabel}]`);
241
- baseLabel = newLabel;
242
- }
243
-
244
- if (layer.type === "slide-in-text") {
245
- const slideFilter = getSlideInTextFilter(
246
- layer as SlideInTextLayer,
247
- baseLabel,
248
- width,
249
- height,
250
- clip.duration,
251
- );
252
- const newLabel = `slide${clipIndex}_${i}`;
253
- filters.push(`${slideFilter}[${newLabel}]`);
254
- baseLabel = newLabel;
255
- }
256
216
  }
257
217
 
258
218
  return {
@@ -358,6 +318,41 @@ function collectAudioLayers(
358
318
  return audioLayers;
359
319
  }
360
320
 
321
+ type TextLayer = TitleLayer | SubtitleLayer | NewsTitleLayer | SlideInTextLayer;
322
+
323
+ interface TimedTextLayer {
324
+ layer: TextLayer;
325
+ startTime: number;
326
+ duration: number;
327
+ }
328
+
329
+ function collectTextLayers(clips: ProcessedClip[]): TimedTextLayer[] {
330
+ const textLayers: TimedTextLayer[] = [];
331
+ let currentTime = 0;
332
+
333
+ for (let i = 0; i < clips.length; i++) {
334
+ const clip = clips[i];
335
+ if (!clip) continue;
336
+
337
+ for (const layer of clip.layers) {
338
+ if (layer && isTextOverlayLayer(layer)) {
339
+ textLayers.push({
340
+ layer: layer as TextLayer,
341
+ startTime: currentTime,
342
+ duration: clip.duration,
343
+ });
344
+ }
345
+ }
346
+
347
+ currentTime += clip.duration;
348
+ if (i < clips.length - 1) {
349
+ currentTime -= clip.transition.duration;
350
+ }
351
+ }
352
+
353
+ return textLayers;
354
+ }
355
+
361
356
  function buildTransitionFilter(
362
357
  fromLabel: string,
363
358
  toLabel: string,
@@ -744,6 +739,67 @@ export async function editly(config: EditlyConfig): Promise<void> {
744
739
  finalVideoLabel = currentBase;
745
740
  }
746
741
 
742
+ const textLayers = collectTextLayers(clips);
743
+ if (textLayers.length > 0) {
744
+ let currentBase = finalVideoLabel;
745
+
746
+ for (let i = 0; i < textLayers.length; i++) {
747
+ const timedLayer = textLayers[i];
748
+ if (!timedLayer) continue;
749
+
750
+ const { layer, startTime, duration } = timedLayer;
751
+ const outputLabel = `vwithtext${i}`;
752
+
753
+ const timedLayerWithEnable = {
754
+ ...layer,
755
+ start: layer.start ?? startTime,
756
+ stop: layer.stop ?? startTime + duration,
757
+ };
758
+
759
+ if (layer.type === "title") {
760
+ const titleFilter = getTitleFilter(
761
+ timedLayerWithEnable as TitleLayer,
762
+ currentBase,
763
+ width,
764
+ height,
765
+ totalDuration,
766
+ );
767
+ allFilters.push(`${titleFilter}[${outputLabel}]`);
768
+ } else if (layer.type === "subtitle") {
769
+ const subtitleFilter = getSubtitleFilter(
770
+ timedLayerWithEnable as SubtitleLayer,
771
+ currentBase,
772
+ width,
773
+ height,
774
+ totalDuration,
775
+ );
776
+ allFilters.push(`${subtitleFilter}[${outputLabel}]`);
777
+ } else if (layer.type === "news-title") {
778
+ const newsFilter = getNewsTitleFilter(
779
+ timedLayerWithEnable as NewsTitleLayer,
780
+ currentBase,
781
+ width,
782
+ height,
783
+ totalDuration,
784
+ );
785
+ allFilters.push(`${newsFilter}[${outputLabel}]`);
786
+ } else if (layer.type === "slide-in-text") {
787
+ const slideFilter = getSlideInTextFilter(
788
+ timedLayerWithEnable as SlideInTextLayer,
789
+ currentBase,
790
+ width,
791
+ height,
792
+ totalDuration,
793
+ );
794
+ allFilters.push(`${slideFilter}[${outputLabel}]`);
795
+ }
796
+
797
+ currentBase = outputLabel;
798
+ }
799
+
800
+ finalVideoLabel = currentBase;
801
+ }
802
+
747
803
  const clipAudioLayers = collectAudioLayers(clips);
748
804
  const videoInputCount = allInputs.length;
749
805
  const audioFilter = buildAudioFilter(
@@ -1,19 +1,38 @@
1
1
  import { existsSync, mkdirSync } from "node:fs";
2
- import { dirname, resolve } from "node:path";
2
+ import { resolve } from "node:path";
3
3
  import { defineCommand } from "citty";
4
4
  import { render } from "../../react/render";
5
- import type { RenderMode, VargElement } from "../../react/types";
5
+ import type { DefaultModels, RenderMode, VargElement } from "../../react/types";
6
6
 
7
7
  const AUTO_IMPORTS = `/** @jsxImportSource vargai */
8
8
  import { Animate, Captions, Clip, Image, Music, Overlay, Packshot, Render, Slider, Speech, Split, Subtitle, Swipe, TalkingHead, Title, Video, Grid, SplitLayout } from "vargai/react";
9
9
  import { fal, elevenlabs, replicate } from "vargai/ai";
10
10
  `;
11
11
 
12
+ async function detectDefaultModels(): Promise<DefaultModels | undefined> {
13
+ const defaults: DefaultModels = {};
14
+
15
+ if (process.env.FAL_KEY) {
16
+ const { fal } = await import("../../ai-sdk/providers/fal");
17
+ defaults.image = fal.imageModel("flux-schnell");
18
+ defaults.video = fal.videoModel("wan-2.5");
19
+ }
20
+
21
+ if (process.env.ELEVENLABS_API_KEY) {
22
+ const { elevenlabs } = await import("../../ai-sdk/providers/elevenlabs");
23
+ defaults.speech = elevenlabs.speechModel("eleven_multilingual_v2");
24
+ defaults.music = elevenlabs.musicModel("music_v1");
25
+ }
26
+
27
+ return Object.keys(defaults).length > 0 ? defaults : undefined;
28
+ }
29
+
12
30
  async function loadComponent(filePath: string): Promise<VargElement> {
13
31
  const resolvedPath = resolve(filePath);
14
32
  const source = await Bun.file(resolvedPath).text();
15
33
 
16
- const hasImports =
34
+ const hasAnyImport = source.includes(" from ");
35
+ const hasVargaiImport =
17
36
  source.includes("from 'vargai") ||
18
37
  source.includes('from "vargai') ||
19
38
  source.includes("from '@vargai") ||
@@ -22,22 +41,21 @@ async function loadComponent(filePath: string): Promise<VargElement> {
22
41
  const hasJsxPragma =
23
42
  source.includes("@jsxImportSource") || source.includes("@jsx ");
24
43
 
25
- if (hasImports && hasJsxPragma) {
44
+ // file has imports (relative or absolute) - import directly to preserve paths
45
+ if (hasAnyImport) {
26
46
  const mod = await import(resolvedPath);
27
47
  return mod.default;
28
48
  }
29
49
 
50
+ // no imports - inject auto-imports and jsx pragma
30
51
  const pkgDir = new URL("../../..", import.meta.url).pathname;
31
52
  const tmpDir = `${pkgDir}/.cache/varg-render`;
32
53
  if (!existsSync(tmpDir)) {
33
54
  mkdirSync(tmpDir, { recursive: true });
34
55
  }
35
56
 
36
- const prepended = hasImports
37
- ? `/** @jsxImportSource vargai */\n`
38
- : AUTO_IMPORTS;
39
57
  const tmpFile = `${tmpDir}/${Date.now()}.tsx`;
40
- await Bun.write(tmpFile, prepended + source);
58
+ await Bun.write(tmpFile, AUTO_IMPORTS + source);
41
59
 
42
60
  try {
43
61
  const mod = await import(tmpFile);
@@ -130,10 +148,13 @@ export const renderCmd = defineCommand({
130
148
 
131
149
  const useCache = !args["no-cache"] && mode !== "preview";
132
150
 
151
+ const defaults = await detectDefaultModels();
152
+
133
153
  const buffer = await render(component, {
134
154
  output: outputPath,
135
155
  cache: useCache ? args.cache : undefined,
136
156
  mode,
157
+ defaults,
137
158
  });
138
159
 
139
160
  if (!args.quiet) {
@@ -1,6 +1,7 @@
1
1
  import type { generateImage } from "ai";
2
2
  import type { fileCache } from "../../ai-sdk/file-cache";
3
3
  import type { generateVideo } from "../../ai-sdk/generate-video";
4
+ import type { DefaultModels } from "../types";
4
5
  import type { ProgressTracker } from "./progress";
5
6
 
6
7
  export interface RenderContext {
@@ -14,4 +15,6 @@ export interface RenderContext {
14
15
  progress?: ProgressTracker;
15
16
  /** In-memory deduplication for concurrent renders of the same element */
16
17
  pending: Map<string, Promise<string>>;
18
+ /** Default models for elements that don't specify one */
19
+ defaults?: DefaultModels;
17
20
  }
@@ -54,9 +54,11 @@ export async function renderImage(
54
54
  throw new Error("Image element requires either 'prompt' or 'src'");
55
55
  }
56
56
 
57
- const model = props.model;
57
+ const model = props.model ?? ctx.defaults?.image;
58
58
  if (!model) {
59
- throw new Error("Image element requires 'model' prop when using prompt");
59
+ throw new Error(
60
+ "Image element requires 'model' prop (or set defaults.image in render options)",
61
+ );
60
62
  }
61
63
 
62
64
  // Compute cache key for deduplication
@@ -10,9 +10,9 @@ export async function renderMusic(
10
10
  const props = element.props as MusicProps;
11
11
 
12
12
  const prompt = props.prompt;
13
- const model = props.model;
13
+ const model = props.model ?? ctx.defaults?.music;
14
14
  if (!prompt || !model) {
15
- throw new Error("Music generation requires both prompt and model");
15
+ throw new Error("Music requires prompt and model (or set defaults.music)");
16
16
  }
17
17
 
18
18
  const cacheKey = JSON.stringify({
@@ -70,42 +70,63 @@ export async function renderRoot(
70
70
  placeholderCount.total++;
71
71
  };
72
72
 
73
+ const cachedGenerateImage = options.cache
74
+ ? withCache(generateImage, { storage: fileCache({ dir: options.cache }) })
75
+ : generateImage;
76
+
77
+ const cachedGenerateVideo = options.cache
78
+ ? withCache(generateVideo, { storage: fileCache({ dir: options.cache }) })
79
+ : generateVideo;
80
+
73
81
  const wrapGenerateImage: typeof generateImage = async (opts) => {
74
82
  if (
75
83
  typeof opts.model === "string" ||
76
84
  opts.model.specificationVersion !== "v3"
77
85
  ) {
78
- return generateImage(opts);
86
+ return cachedGenerateImage(opts);
87
+ }
88
+
89
+ if (mode === "preview") {
90
+ trackPlaceholder("image");
91
+ }
92
+
93
+ try {
94
+ return await cachedGenerateImage(opts);
95
+ } catch (error) {
96
+ if (mode === "strict") throw error;
97
+ trackPlaceholder("image");
98
+ onFallback(error as Error, String(opts.prompt));
99
+ const wrappedModel = wrapImageModel({
100
+ model: opts.model,
101
+ middleware: imagePlaceholderFallbackMiddleware({
102
+ mode: "preview",
103
+ onFallback: () => {},
104
+ }),
105
+ });
106
+ return generateImage({ ...opts, model: wrappedModel });
79
107
  }
80
- const wrappedModel = wrapImageModel({
81
- model: opts.model,
82
- middleware: imagePlaceholderFallbackMiddleware({
83
- mode,
84
- onFallback: (error, prompt) => {
85
- trackPlaceholder("image");
86
- onFallback(error, prompt);
87
- },
88
- }),
89
- });
90
- const result = await generateImage({ ...opts, model: wrappedModel });
91
- if (mode === "preview") trackPlaceholder("image");
92
- return result;
93
108
  };
94
109
 
95
110
  const wrapGenerateVideo: typeof generateVideo = async (opts) => {
96
- const wrappedModel = wrapVideoModel({
97
- model: opts.model,
98
- middleware: placeholderFallbackMiddleware({
99
- mode,
100
- onFallback: (error, prompt) => {
101
- trackPlaceholder("video");
102
- onFallback(error, prompt);
103
- },
104
- }),
105
- });
106
- const result = await generateVideo({ ...opts, model: wrappedModel });
107
- if (mode === "preview") trackPlaceholder("video");
108
- return result;
111
+ if (mode === "preview") {
112
+ trackPlaceholder("video");
113
+ }
114
+
115
+ try {
116
+ return await cachedGenerateVideo(opts);
117
+ } catch (error) {
118
+ if (mode === "strict") throw error;
119
+ trackPlaceholder("video");
120
+ onFallback(error as Error, String(opts.prompt));
121
+ const wrappedModel = wrapVideoModel({
122
+ model: opts.model,
123
+ middleware: placeholderFallbackMiddleware({
124
+ mode: "preview",
125
+ onFallback: () => {},
126
+ }),
127
+ });
128
+ return generateVideo({ ...opts, model: wrappedModel });
129
+ }
109
130
  };
110
131
 
111
132
  const ctx: RenderContext = {
@@ -113,19 +134,12 @@ export async function renderRoot(
113
134
  height: props.height ?? 1080,
114
135
  fps: props.fps ?? 30,
115
136
  cache: options.cache ? fileCache({ dir: options.cache }) : undefined,
116
- generateImage: options.cache
117
- ? withCache(wrapGenerateImage, {
118
- storage: fileCache({ dir: options.cache }),
119
- })
120
- : wrapGenerateImage,
121
- generateVideo: options.cache
122
- ? withCache(wrapGenerateVideo, {
123
- storage: fileCache({ dir: options.cache }),
124
- })
125
- : wrapGenerateVideo,
137
+ generateImage: wrapGenerateImage,
138
+ generateVideo: wrapGenerateVideo,
126
139
  tempFiles: [],
127
140
  progress,
128
141
  pending: new Map(),
142
+ defaults: options.defaults,
129
143
  };
130
144
 
131
145
  const clipElements: VargElement<"clip">[] = [];
@@ -287,6 +301,7 @@ export async function renderRoot(
287
301
  fps: ctx.fps,
288
302
  clips,
289
303
  audioTracks: audioTracks.length > 0 ? audioTracks : undefined,
304
+ verbose: options.verbose,
290
305
  });
291
306
 
292
307
  completeTask(progress, editlyTaskId);
@@ -21,9 +21,9 @@ export async function renderSpeech(
21
21
  throw new Error("Speech element requires text content");
22
22
  }
23
23
 
24
- const model = props.model;
24
+ const model = props.model ?? ctx.defaults?.speech;
25
25
  if (!model) {
26
- throw new Error("Speech element requires 'model' prop");
26
+ throw new Error("Speech requires 'model' prop (or set defaults.speech)");
27
27
  }
28
28
 
29
29
  const cacheKey = computeCacheKey(element);
@@ -81,9 +81,11 @@ export async function renderVideo(
81
81
  throw new Error("Video element requires either 'prompt' or 'src'");
82
82
  }
83
83
 
84
- const model = props.model;
84
+ const model = props.model ?? ctx.defaults?.video;
85
85
  if (!model) {
86
- throw new Error("Video element requires 'model' prop when using prompt");
86
+ throw new Error(
87
+ "Video element requires 'model' prop (or set defaults.video in render options)",
88
+ );
87
89
  }
88
90
 
89
91
  // Compute cache key for deduplication
@@ -212,11 +212,20 @@ export interface PackshotProps extends BaseProps {
212
212
 
213
213
  export type RenderMode = "strict" | "default" | "preview";
214
214
 
215
+ export interface DefaultModels {
216
+ image?: ImageModelV3;
217
+ video?: VideoModelV3;
218
+ speech?: SpeechModelV3;
219
+ music?: MusicModelV3;
220
+ }
221
+
215
222
  export interface RenderOptions {
216
223
  output?: string;
217
224
  cache?: string;
218
225
  quiet?: boolean;
226
+ verbose?: boolean;
219
227
  mode?: RenderMode;
228
+ defaults?: DefaultModels;
220
229
  }
221
230
 
222
231
  export interface ElementPropsMap {