vargai 0.4.0-alpha30 → 0.4.0-alpha31

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Binary file
package/package.json CHANGED
@@ -65,10 +65,11 @@
65
65
  "react-dom": "^19.2.0",
66
66
  "remotion": "^4.0.377",
67
67
  "replicate": "^1.4.0",
68
+ "sharp": "^0.34.5",
68
69
  "vargai": "^0.4.0-alpha11",
69
70
  "zod": "^4.2.1"
70
71
  },
71
- "version": "0.4.0-alpha30",
72
+ "version": "0.4.0-alpha31",
72
73
  "exports": {
73
74
  ".": "./src/index.ts",
74
75
  "./ai": "./src/ai-sdk/index.ts",
@@ -1105,4 +1105,50 @@ describe("editly", () => {
1105
1105
  expect(info.height).toBe(1920);
1106
1106
  expect(info.duration).toBeCloseTo(3, 0);
1107
1107
  });
1108
+
1109
+ test("video overlay with cropPosition", async () => {
1110
+ const outPath = "output/editly-test-crop-position.mp4";
1111
+ if (existsSync(outPath)) unlinkSync(outPath);
1112
+
1113
+ await editly({
1114
+ outPath,
1115
+ width: 1080,
1116
+ height: 1920,
1117
+ fps: 30,
1118
+ clips: [
1119
+ {
1120
+ duration: 3,
1121
+ layers: [
1122
+ { type: "fill-color", color: "#000000" },
1123
+ {
1124
+ type: "video",
1125
+ path: VIDEO_1,
1126
+ width: 1080,
1127
+ height: 960,
1128
+ left: 0,
1129
+ top: 0,
1130
+ resizeMode: "cover",
1131
+ cropPosition: "top",
1132
+ },
1133
+ {
1134
+ type: "video",
1135
+ path: VIDEO_2,
1136
+ width: 1080,
1137
+ height: 960,
1138
+ left: 0,
1139
+ top: 960,
1140
+ resizeMode: "cover",
1141
+ cropPosition: "bottom",
1142
+ },
1143
+ ],
1144
+ },
1145
+ ],
1146
+ });
1147
+
1148
+ expect(existsSync(outPath)).toBe(true);
1149
+ const info = await ffprobe(outPath);
1150
+ expect(info.width).toBe(1080);
1151
+ expect(info.height).toBe(1920);
1152
+ expect(info.duration).toBeCloseTo(3, 0);
1153
+ });
1108
1154
  });
@@ -1,4 +1,5 @@
1
1
  import type {
2
+ CropPosition,
2
3
  FillColorLayer,
3
4
  ImageLayer,
4
5
  ImageOverlayLayer,
@@ -15,6 +16,33 @@ import type {
15
16
  VideoLayer,
16
17
  } from "./types";
17
18
 
19
+ function getCropPositionExpr(position: CropPosition | undefined): {
20
+ x: string;
21
+ y: string;
22
+ } {
23
+ switch (position) {
24
+ case "top-left":
25
+ return { x: "0", y: "0" };
26
+ case "top":
27
+ return { x: "(iw-ow)/2", y: "0" };
28
+ case "top-right":
29
+ return { x: "iw-ow", y: "0" };
30
+ case "left":
31
+ return { x: "0", y: "(ih-oh)/2" };
32
+ case "right":
33
+ return { x: "iw-ow", y: "(ih-oh)/2" };
34
+ case "bottom-left":
35
+ return { x: "0", y: "ih-oh" };
36
+ case "bottom":
37
+ return { x: "(iw-ow)/2", y: "ih-oh" };
38
+ case "bottom-right":
39
+ return { x: "iw-ow", y: "ih-oh" };
40
+ case "center":
41
+ default:
42
+ return { x: "(iw-ow)/2", y: "(ih-oh)/2" };
43
+ }
44
+ }
45
+
18
46
  function escapeDrawText(text: string): string {
19
47
  return text
20
48
  .replace(/\\/g, "\\\\")
@@ -161,9 +189,14 @@ export function getVideoFilterWithTrim(
161
189
  const layerHeight = parseSize(layer.height, height);
162
190
 
163
191
  if (isOverlay) {
164
- filters.push(
165
- `scale=${layerWidth}:${layerHeight}:force_original_aspect_ratio=decrease`,
166
- );
192
+ let scaleFilter = `scale=${layerWidth}:${layerHeight}:force_original_aspect_ratio=decrease`;
193
+ if (layer.resizeMode === "cover") {
194
+ const { x, y } = getCropPositionExpr(layer.cropPosition);
195
+ scaleFilter = `scale=${layerWidth}:${layerHeight}:force_original_aspect_ratio=increase,crop=${layerWidth}:${layerHeight}:${x}:${y}`;
196
+ } else if (layer.resizeMode === "stretch") {
197
+ scaleFilter = `scale=${layerWidth}:${layerHeight}`;
198
+ }
199
+ filters.push(scaleFilter);
167
200
  filters.push("setsar=1");
168
201
  filters.push("fps=30");
169
202
  filters.push("settb=1/30");
@@ -555,7 +588,13 @@ export function getTitleFilter(
555
588
  ): string {
556
589
  const text = escapeDrawText(layer.text);
557
590
  const color = layer.textColor ?? "white";
558
- const fontSize = Math.round(Math.min(width, height) * 0.08);
591
+
592
+ // Auto-size font to fit within 90% of frame width (same approach as subtitle)
593
+ const maxFontSize = Math.round(Math.min(width, height) * 0.08);
594
+ const maxTextWidth = width * 0.9;
595
+ // Average char width ≈ fontSize * 0.55 for sans-serif fonts
596
+ const fittedFontSize = Math.floor(maxTextWidth / (layer.text.length * 0.55));
597
+ const fontSize = Math.max(16, Math.min(maxFontSize, fittedFontSize));
559
598
 
560
599
  let x = "(w-text_w)/2";
561
600
  let y = "(h-text_h)/2";
@@ -587,7 +626,13 @@ export function getSubtitleFilter(
587
626
  const text = escapeDrawText(layer.text);
588
627
  const textColor = layer.textColor ?? "white";
589
628
  const bgColor = layer.backgroundColor ?? "black@0.7";
590
- const fontSize = Math.round(Math.min(width, height) * 0.05);
629
+
630
+ // Auto-size font to fit within 90% of frame width
631
+ const maxFontSize = Math.round(Math.min(width, height) * 0.05);
632
+ const maxTextWidth = width * 0.9;
633
+ // Average char width ≈ fontSize * 0.55 for sans-serif fonts
634
+ const fittedFontSize = Math.floor(maxTextWidth / (layer.text.length * 0.55));
635
+ const fontSize = Math.max(16, Math.min(maxFontSize, fittedFontSize));
591
636
  const boxPadding = Math.round(fontSize * 0.4);
592
637
 
593
638
  const fontFile = layer.fontPath
@@ -626,7 +671,14 @@ export function getTitleBackgroundFilter(
626
671
 
627
672
  const text = escapeDrawText(layer.text);
628
673
  const textColor = layer.textColor ?? "white";
629
- const fontSize = Math.round(Math.min(width, height) * 0.1);
674
+
675
+ // Auto-size font to fit within 90% of frame width
676
+ const maxFontSizeBg = Math.round(Math.min(width, height) * 0.1);
677
+ const maxTextWidthBg = width * 0.9;
678
+ const fittedFontSizeBg = Math.floor(
679
+ maxTextWidthBg / (layer.text.length * 0.55),
680
+ );
681
+ const fontSize = Math.max(16, Math.min(maxFontSizeBg, fittedFontSizeBg));
630
682
 
631
683
  const fontFile = layer.fontPath
632
684
  ? `:fontfile='${escapeDrawText(layer.fontPath)}'`
@@ -87,6 +87,21 @@ export interface TextLayer extends BaseLayer {
87
87
  fontFamily?: string;
88
88
  }
89
89
 
90
+ /**
91
+ * Crop position anchor for cover mode.
92
+ * NOTE: This is a varg extension to editly, not in the original.
93
+ */
94
+ export type CropPosition =
95
+ | "center"
96
+ | "top"
97
+ | "bottom"
98
+ | "left"
99
+ | "right"
100
+ | "top-left"
101
+ | "top-right"
102
+ | "bottom-left"
103
+ | "bottom-right";
104
+
90
105
  /**
91
106
  * For video layers, if parent `clip.duration` is specified, the video will be slowed/sped-up to match `clip.duration`.
92
107
  * If `cutFrom`/`cutTo` is set, the resulting segment (`cutTo`-`cutFrom`) will be slowed/sped-up to fit `clip.duration`.
@@ -95,6 +110,7 @@ export interface VideoLayer extends BaseLayer {
95
110
  type: "video";
96
111
  path: string;
97
112
  resizeMode?: ResizeMode;
113
+ cropPosition?: CropPosition;
98
114
  cutFrom?: number;
99
115
  cutTo?: number;
100
116
  width?: SizeValue;
@@ -14,6 +14,11 @@ import { fal } from "@fal-ai/client";
14
14
  import type { VideoModelV3, VideoModelV3CallOptions } from "../video-model";
15
15
 
16
16
  const VIDEO_MODELS: Record<string, { t2v: string; i2v: string }> = {
17
+ // Kling v2.6 - latest with native audio generation
18
+ "kling-v2.6": {
19
+ t2v: "fal-ai/kling-video/v2.6/pro/text-to-video",
20
+ i2v: "fal-ai/kling-video/v2.6/pro/image-to-video",
21
+ },
17
22
  "kling-v2.5": {
18
23
  t2v: "fal-ai/kling-video/v2.5-turbo/pro/text-to-video",
19
24
  i2v: "fal-ai/kling-video/v2.5-turbo/pro/image-to-video",
@@ -40,6 +45,13 @@ const VIDEO_MODELS: Record<string, { t2v: string; i2v: string }> = {
40
45
  },
41
46
  };
42
47
 
48
+ // Motion control models - video-to-video with motion transfer
49
+ const MOTION_CONTROL_MODELS: Record<string, string> = {
50
+ "kling-v2.6-motion": "fal-ai/kling-video/v2.6/pro/motion-control",
51
+ "kling-v2.6-motion-standard":
52
+ "fal-ai/kling-video/v2.6/standard/motion-control",
53
+ };
54
+
43
55
  // lipsync models - video + audio input
44
56
  const LIPSYNC_MODELS: Record<string, string> = {
45
57
  "sync-v2": "fal-ai/sync-lipsync",
@@ -157,7 +169,7 @@ class FalVideoModel implements VideoModelV3 {
157
169
  } = options;
158
170
  const warnings: SharedV3Warning[] = [];
159
171
 
160
- const _hasVideoInput = files?.some((f) =>
172
+ const hasVideoInput = files?.some((f) =>
161
173
  getMediaType(f)?.startsWith("video/"),
162
174
  );
163
175
  const hasImageInput = files?.some((f) =>
@@ -168,15 +180,21 @@ class FalVideoModel implements VideoModelV3 {
168
180
  );
169
181
 
170
182
  const isLipsync = LIPSYNC_MODELS[this.modelId] !== undefined;
183
+ const isMotionControl = MOTION_CONTROL_MODELS[this.modelId] !== undefined;
184
+ const isKlingV26 = this.modelId === "kling-v2.6";
185
+
171
186
  const endpoint = isLipsync
172
187
  ? this.resolveLipsyncEndpoint()
173
- : this.resolveEndpoint(hasImageInput ?? false);
188
+ : isMotionControl
189
+ ? this.resolveMotionControlEndpoint()
190
+ : this.resolveEndpoint(hasImageInput ?? false);
174
191
 
175
192
  const input: Record<string, unknown> = {
176
193
  ...(providerOptions?.fal ?? {}),
177
194
  };
178
195
 
179
196
  if (isLipsync) {
197
+ // Lipsync: video + audio input
180
198
  const videoFile = files?.find((f) =>
181
199
  getMediaType(f)?.startsWith("video/"),
182
200
  );
@@ -190,21 +208,70 @@ class FalVideoModel implements VideoModelV3 {
190
208
  if (audioFile) {
191
209
  input.audio_url = await fileToUrl(audioFile);
192
210
  }
211
+ } else if (isMotionControl) {
212
+ // Motion control: image + reference video input
213
+ if (prompt) {
214
+ input.prompt = prompt;
215
+ }
216
+
217
+ const imageFile = files?.find((f) =>
218
+ getMediaType(f)?.startsWith("image/"),
219
+ );
220
+ const videoFile = files?.find((f) =>
221
+ getMediaType(f)?.startsWith("video/"),
222
+ );
223
+
224
+ if (imageFile) {
225
+ input.image_url = await fileToUrl(imageFile);
226
+ }
227
+ if (videoFile) {
228
+ input.video_url = await fileToUrl(videoFile);
229
+ }
230
+
231
+ // Default character orientation to 'video' for better motion matching
232
+ if (!input.character_orientation) {
233
+ input.character_orientation = "video";
234
+ }
235
+
236
+ // Default to keeping original sound
237
+ if (input.keep_original_sound === undefined) {
238
+ input.keep_original_sound = true;
239
+ }
193
240
  } else {
241
+ // Standard video generation
194
242
  input.prompt = prompt;
195
- input.duration = duration ?? 5;
243
+
244
+ // Duration must be string "5" or "10" for Kling v2.6
245
+ if (isKlingV26) {
246
+ input.duration = String(duration ?? 5);
247
+ } else {
248
+ input.duration = duration ?? 5;
249
+ }
196
250
 
197
251
  if (hasImageInput && files) {
198
- const imageFile = files.find((f) =>
252
+ const imageFiles = files.filter((f) =>
199
253
  getMediaType(f)?.startsWith("image/"),
200
254
  );
201
- if (imageFile) {
202
- input.image_url = await fileToUrl(imageFile);
255
+ if (imageFiles.length > 0) {
256
+ // First image is start image
257
+ input.image_url = await fileToUrl(imageFiles[0]!);
258
+ // Second image (if provided) is end image for Kling v2.6
259
+ if (isKlingV26 && imageFiles.length > 1) {
260
+ input.end_image_url = await fileToUrl(imageFiles[1]!);
261
+ }
203
262
  }
204
263
  } else {
205
264
  input.aspect_ratio = aspectRatio ?? "16:9";
206
265
  }
207
266
 
267
+ // Kling v2.6 supports native audio generation
268
+ if (isKlingV26) {
269
+ // Default to generating audio unless explicitly disabled
270
+ if (input.generate_audio === undefined) {
271
+ input.generate_audio = true;
272
+ }
273
+ }
274
+
208
275
  const audioFile = files?.find((f) =>
209
276
  getMediaType(f)?.startsWith("audio/"),
210
277
  );
@@ -283,6 +350,14 @@ class FalVideoModel implements VideoModelV3 {
283
350
 
284
351
  return LIPSYNC_MODELS[this.modelId] ?? this.modelId;
285
352
  }
353
+
354
+ private resolveMotionControlEndpoint(): string {
355
+ if (this.modelId.startsWith("raw:")) {
356
+ return this.modelId.slice(4);
357
+ }
358
+
359
+ return MOTION_CONTROL_MODELS[this.modelId] ?? this.modelId;
360
+ }
286
361
  }
287
362
 
288
363
  class FalImageModel implements ImageModelV3 {
@@ -17,7 +17,7 @@ export {
17
17
  Title,
18
18
  Video,
19
19
  } from "./elements";
20
- export { Grid, SplitLayout } from "./layouts";
20
+ export { Grid, Slot, SplitLayout } from "./layouts";
21
21
  export { render, renderStream } from "./render";
22
22
  export type {
23
23
  CaptionsProps,
@@ -21,7 +21,7 @@ export const Grid = ({
21
21
  top: `${(Math.floor(i / cols) / rowCount) * 100}%`,
22
22
  width: `${(1 / cols) * 100}%`,
23
23
  height: `${(1 / rowCount) * 100}%`,
24
- resize,
24
+ resize: (el.props as Record<string, unknown>).resize ?? resize,
25
25
  },
26
26
  }));
27
27
  return <>{positioned}</>;
@@ -1,2 +1,3 @@
1
1
  export { Grid } from "./grid";
2
- export { SplitLayout } from "./split";
2
+ export { Slot } from "./slot";
3
+ export { Split, SplitLayout } from "./split";
@@ -0,0 +1,85 @@
1
+ import type {
2
+ CropPosition,
3
+ ResizeMode,
4
+ } from "../../ai-sdk/providers/editly/types";
5
+ import type { VargElement } from "../types";
6
+
7
+ type SlotFit = "cover" | "contain" | "contain-blur" | "fill";
8
+
9
+ type SlotPosition =
10
+ | "center"
11
+ | "top"
12
+ | "bottom"
13
+ | "left"
14
+ | "right"
15
+ | "top-left"
16
+ | "top-right"
17
+ | "bottom-left"
18
+ | "bottom-right";
19
+
20
+ interface SlotProps {
21
+ class?: string;
22
+ fit?: SlotFit;
23
+ position?: SlotPosition;
24
+ children: VargElement;
25
+ }
26
+
27
+ interface ParsedSlotClass {
28
+ fit?: SlotFit;
29
+ position?: SlotPosition;
30
+ }
31
+
32
+ function parseSlotClass(classString?: string): ParsedSlotClass {
33
+ if (!classString) return {};
34
+ const result: ParsedSlotClass = {};
35
+
36
+ for (const cls of classString.trim().split(/\s+/)) {
37
+ if (cls === "fit-cover") result.fit = "cover";
38
+ else if (cls === "fit-contain") result.fit = "contain";
39
+ else if (cls === "fit-contain-blur") result.fit = "contain-blur";
40
+ else if (cls === "fit-fill") result.fit = "fill";
41
+ else if (cls === "pos-center") result.position = "center";
42
+ else if (cls === "pos-top") result.position = "top";
43
+ else if (cls === "pos-bottom") result.position = "bottom";
44
+ else if (cls === "pos-left") result.position = "left";
45
+ else if (cls === "pos-right") result.position = "right";
46
+ else if (cls === "pos-top-left") result.position = "top-left";
47
+ else if (cls === "pos-top-right") result.position = "top-right";
48
+ else if (cls === "pos-bottom-left") result.position = "bottom-left";
49
+ else if (cls === "pos-bottom-right") result.position = "bottom-right";
50
+ }
51
+ return result;
52
+ }
53
+
54
+ function slotFitToResize(fit: SlotFit): ResizeMode {
55
+ switch (fit) {
56
+ case "cover":
57
+ return "cover";
58
+ case "contain":
59
+ return "contain";
60
+ case "contain-blur":
61
+ return "contain-blur";
62
+ case "fill":
63
+ return "stretch";
64
+ }
65
+ }
66
+
67
+ export const Slot = ({
68
+ class: className,
69
+ fit,
70
+ position,
71
+ children,
72
+ }: SlotProps) => {
73
+ const parsed = parseSlotClass(className);
74
+ const resolvedFit = fit ?? parsed.fit ?? "cover";
75
+ const resolvedPosition = position ?? parsed.position ?? "center";
76
+
77
+ return {
78
+ ...children,
79
+ props: {
80
+ ...children.props,
81
+ resize: slotFitToResize(resolvedFit),
82
+ cropPosition: resolvedPosition as CropPosition,
83
+ },
84
+ } as VargElement;
85
+ };
@@ -18,3 +18,21 @@ export const SplitLayout = ({
18
18
  {right}
19
19
  </Grid>
20
20
  );
21
+
22
+ export const Split = ({
23
+ direction = "horizontal",
24
+ children,
25
+ }: {
26
+ direction?: "horizontal" | "vertical";
27
+ children: VargElement[];
28
+ }) => {
29
+ if (children.length === 0) return null;
30
+ return (
31
+ <Grid
32
+ columns={direction === "horizontal" ? children.length : 1}
33
+ rows={direction === "vertical" ? children.length : 1}
34
+ >
35
+ {children}
36
+ </Grid>
37
+ );
38
+ };
@@ -69,14 +69,14 @@ interface SubtitleStyle {
69
69
  const STYLE_PRESETS: Record<string, SubtitleStyle> = {
70
70
  tiktok: {
71
71
  fontName: "Montserrat",
72
- fontSize: 32,
72
+ fontSize: 72,
73
73
  primaryColor: "&HFFFFFF",
74
74
  outlineColor: "&H000000",
75
- backColor: "&H80000000",
75
+ backColor: "&H00000000",
76
76
  bold: true,
77
- outline: 3,
77
+ outline: 4,
78
78
  shadow: 0,
79
- marginV: 50,
79
+ marginV: 480,
80
80
  alignment: 2,
81
81
  },
82
82
  karaoke: {
@@ -164,10 +164,17 @@ function formatAssTime(seconds: number): string {
164
164
  return `${h}:${String(m).padStart(2, "0")}:${String(s).padStart(2, "0")}.${String(cs).padStart(2, "0")}`;
165
165
  }
166
166
 
167
- function convertSrtToAss(srtContent: string, style: SubtitleStyle): string {
167
+ function convertSrtToAss(
168
+ srtContent: string,
169
+ style: SubtitleStyle,
170
+ width: number,
171
+ height: number,
172
+ ): string {
168
173
  const assHeader = `[Script Info]
169
174
  Title: Generated Subtitles
170
175
  ScriptType: v4.00+
176
+ PlayResX: ${width}
177
+ PlayResY: ${height}
171
178
  WrapStyle: 0
172
179
  ScaledBorderAndShadow: yes
173
180
  YCbCr Matrix: TV.601
@@ -193,6 +200,12 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
193
200
  return assHeader + assDialogues;
194
201
  }
195
202
 
203
+ const POSITION_ALIGNMENT: Record<string, number> = {
204
+ top: 8,
205
+ center: 5,
206
+ bottom: 2,
207
+ };
208
+
196
209
  function colorToAss(color: string): string {
197
210
  if (color.startsWith("&H")) return color;
198
211
 
@@ -280,15 +293,21 @@ export async function renderCaptions(
280
293
  const styleName = props.style ?? "tiktok";
281
294
  const baseStyle = STYLE_PRESETS[styleName] ?? STYLE_PRESETS.tiktok!;
282
295
 
296
+ const alignment = props.position
297
+ ? (POSITION_ALIGNMENT[props.position] ?? baseStyle.alignment)
298
+ : baseStyle.alignment;
299
+
283
300
  const style: SubtitleStyle = {
284
301
  ...baseStyle,
285
302
  fontSize: props.fontSize ?? baseStyle.fontSize,
286
303
  primaryColor: props.color
287
304
  ? colorToAss(props.color)
288
305
  : baseStyle.primaryColor,
306
+ alignment,
307
+ marginV: props.position === "center" ? 0 : baseStyle.marginV,
289
308
  };
290
309
 
291
- const assContent = convertSrtToAss(srtContent, style);
310
+ const assContent = convertSrtToAss(srtContent, style, ctx.width, ctx.height);
292
311
  const assPath = `/tmp/varg-captions-${Date.now()}.ass`;
293
312
  writeFileSync(assPath, assContent);
294
313
  ctx.tempFiles.push(assPath);
@@ -10,6 +10,7 @@ import type {
10
10
  import type {
11
11
  ClipProps,
12
12
  ImageProps,
13
+ MusicProps,
13
14
  SpeechProps,
14
15
  VargElement,
15
16
  VargNode,
@@ -17,6 +18,7 @@ import type {
17
18
  } from "../types";
18
19
  import type { RenderContext } from "./context";
19
20
  import { renderImage } from "./image";
21
+ import { renderMusic } from "./music";
20
22
  import { renderPackshot } from "./packshot";
21
23
  import { renderSlider } from "./slider";
22
24
  import { renderSpeech } from "./speech";
@@ -24,6 +26,7 @@ import { renderSplit } from "./split";
24
26
  import { renderSubtitle } from "./subtitle";
25
27
  import { renderSwipe } from "./swipe";
26
28
  import { renderTitle } from "./title";
29
+ import { resolvePath } from "./utils";
27
30
  import { renderVideo } from "./video";
28
31
 
29
32
  type PendingLayer =
@@ -90,7 +93,7 @@ async function renderClipLayers(
90
93
  type: "video",
91
94
  path,
92
95
  resizeMode: props.resize,
93
- // Video-level cutFrom/cutTo take precedence over clip-level
96
+ cropPosition: props.cropPosition,
94
97
  cutFrom: props.cutFrom ?? clipOptions?.cutFrom,
95
98
  cutTo: props.cutTo ?? clipOptions?.cutTo,
96
99
  mixVolume: props.keepAudio ? (props.volume ?? 1) : 0,
@@ -136,6 +139,35 @@ async function renderClipLayers(
136
139
  break;
137
140
  }
138
141
 
142
+ case "music": {
143
+ const props = element.props as MusicProps;
144
+ pending.push({
145
+ type: "async",
146
+ promise: (async () => {
147
+ let path: string;
148
+ if (props.src) {
149
+ path = resolvePath(props.src);
150
+ } else if (props.prompt) {
151
+ const result = await renderMusic(
152
+ element as VargElement<"music">,
153
+ ctx,
154
+ );
155
+ path = result.path;
156
+ } else {
157
+ throw new Error("Music requires either src or prompt");
158
+ }
159
+ return {
160
+ type: "audio",
161
+ path,
162
+ mixVolume: props.volume ?? 1,
163
+ cutFrom: props.cutFrom,
164
+ cutTo: props.cutTo,
165
+ } as AudioLayer;
166
+ })(),
167
+ });
168
+ break;
169
+ }
170
+
139
171
  case "split": {
140
172
  pending.push({
141
173
  type: "async",
@@ -40,6 +40,11 @@ export async function renderMusic(
40
40
  const cached = await ctx.cache.get(cacheKey);
41
41
  if (cached) {
42
42
  audioData = cached as Uint8Array;
43
+ // Signal cache hit to progress tracker
44
+ if (taskId && ctx.progress) {
45
+ startTask(ctx.progress, taskId);
46
+ completeTask(ctx.progress, taskId);
47
+ }
43
48
  } else {
44
49
  if (taskId && ctx.progress) startTask(ctx.progress, taskId);
45
50
  audioData = await generateFn();