vargai 0.4.0-alpha54 → 0.4.0-alpha55

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -70,7 +70,7 @@
70
70
  "zod": "^4.2.1"
71
71
  },
72
72
  "sideEffects": false,
73
- "version": "0.4.0-alpha54",
73
+ "version": "0.4.0-alpha55",
74
74
  "exports": {
75
75
  ".": "./src/index.ts",
76
76
  "./ai": "./src/ai-sdk/index.ts",
@@ -1334,4 +1334,79 @@ describe("editly", () => {
1334
1334
 
1335
1335
  expect(existsSync(outPath)).toBe(true);
1336
1336
  });
1337
+
1338
+ // Regression test for issue #123
1339
+ // https://github.com/vargHQ/sdk/issues/123
1340
+ test("issue #123: clip with only positioned videos (no base layer) generates valid filter", async () => {
1341
+ // Bug: when a clip has only positioned videos (from Split/Slot) and no base layer,
1342
+ // buildBaseClipFilter returns an empty outputLabel "", causing ffmpeg to crash with
1343
+ // "Bad (empty?) label found" error like: "[]concat=n=2:v=1:a=0..."
1344
+ // Fix: auto-insert fill-color base when clip has overlays but no base
1345
+ const outPath = "output/editly-test-issue-123-no-base.mp4";
1346
+ if (existsSync(outPath)) unlinkSync(outPath);
1347
+
1348
+ await editly({
1349
+ outPath,
1350
+ width: 1080,
1351
+ height: 1920,
1352
+ fps: 30,
1353
+ clips: [
1354
+ {
1355
+ duration: 2,
1356
+ layers: [{ type: "fill-color", color: "#ff0000" }],
1357
+ transition: { name: "fade", duration: 0.5 },
1358
+ },
1359
+ {
1360
+ duration: 2,
1361
+ layers: [
1362
+ // Only positioned videos — no base layer
1363
+ {
1364
+ type: "video",
1365
+ path: VIDEO_1,
1366
+ width: "50%",
1367
+ height: "100%",
1368
+ left: "0%",
1369
+ top: "0%",
1370
+ resizeMode: "cover",
1371
+ },
1372
+ {
1373
+ type: "video",
1374
+ path: VIDEO_2,
1375
+ width: "50%",
1376
+ height: "100%",
1377
+ left: "50%",
1378
+ top: "0%",
1379
+ resizeMode: "cover",
1380
+ },
1381
+ ],
1382
+ transition: { name: "fade", duration: 0.5 },
1383
+ },
1384
+ {
1385
+ duration: 2,
1386
+ layers: [{ type: "fill-color", color: "#0000ff" }],
1387
+ },
1388
+ ],
1389
+ });
1390
+
1391
+ expect(existsSync(outPath)).toBe(true);
1392
+ const info = await ffprobe(outPath);
1393
+ expect(info.duration).toBeGreaterThan(4);
1394
+ });
1395
+
1396
+ test("issue #123: clip with no layers at all throws clear error", async () => {
1397
+ await expect(
1398
+ editly({
1399
+ outPath: "output/editly-test-issue-123-empty.mp4",
1400
+ width: 640,
1401
+ height: 480,
1402
+ fps: 30,
1403
+ clips: [
1404
+ {
1405
+ duration: 2,
1406
+ layers: [],
1407
+ },
1408
+ ],
1409
+ }),
1410
+ ).rejects.toThrow("produced no video output");
1411
+ });
1337
1412
  });
@@ -1,5 +1,6 @@
1
1
  import { type FFmpegBackend, localBackend } from "./backends";
2
2
  import {
3
+ getFillColorFilter,
3
4
  getImageOverlayFilter,
4
5
  getImageOverlayPositionFilter,
5
6
  getNewsTitleFilter,
@@ -204,7 +205,7 @@ function buildBaseClipFilter(
204
205
  cutFrom: number;
205
206
  mixVolume?: number | string;
206
207
  }[] = [];
207
- let baseLabel = "";
208
+ let baseLabel: string | undefined;
208
209
  let inputIdx = inputOffset;
209
210
 
210
211
  const baseLayers = clip.layers.filter(
@@ -252,6 +253,18 @@ function buildBaseClipFilter(
252
253
  }
253
254
  }
254
255
 
256
+ if (!baseLabel && clipLocalOverlays.length > 0) {
257
+ const fillFilter = getFillColorFilter(
258
+ { type: "fill-color", color: "#000000" },
259
+ inputIdx,
260
+ width,
261
+ height,
262
+ clip.duration,
263
+ );
264
+ filters.push(fillFilter.filterComplex);
265
+ baseLabel = fillFilter.outputLabel;
266
+ }
267
+
255
268
  for (let i = 0; i < clipLocalOverlays.length; i++) {
256
269
  const layer = clipLocalOverlays[i];
257
270
  if (!layer) continue;
@@ -270,7 +283,7 @@ function buildBaseClipFilter(
270
283
 
271
284
  const outputLabel = `clip${clipIndex}ov${i}`;
272
285
  const positionFilter = getOverlayFilter(
273
- baseLabel,
286
+ baseLabel!,
274
287
  overlayFilter.outputLabel,
275
288
  layer,
276
289
  width,
@@ -282,6 +295,12 @@ function buildBaseClipFilter(
282
295
  inputIdx++;
283
296
  }
284
297
 
298
+ if (!baseLabel) {
299
+ throw new Error(
300
+ `Clip ${clipIndex} produced no video output — ensure it has at least one visual layer (video, image, or fill-color)`,
301
+ );
302
+ }
303
+
285
304
  return {
286
305
  filters,
287
306
  inputs,
@@ -19,5 +19,4 @@ export interface RenderContext {
19
19
  defaults?: DefaultModels;
20
20
  backend: FFmpegBackend;
21
21
  generatedFiles: File[];
22
- providerKeys?: Record<string, string | undefined>;
23
22
  }
@@ -1,7 +1,10 @@
1
- import { spawn } from "node:child_process";
2
- import { mkdir, rm } from "node:fs/promises";
3
1
  import path from "node:path";
4
2
  import sharp from "sharp";
3
+ import type {
4
+ FFmpegBackend,
5
+ FFmpegOutput,
6
+ } from "../../../ai-sdk/providers/editly/backends/types";
7
+ import { uploadBuffer } from "../../../providers/storage";
5
8
 
6
9
  export interface BlinkingButtonOptions {
7
10
  text: string;
@@ -17,14 +20,24 @@ export interface BlinkingButtonOptions {
17
20
  buttonHeight?: number; // Button height in pixels
18
21
  }
19
22
 
20
- /**
21
- * Parse hex color to RGB values
22
- */
23
+ export interface BlinkingButtonResult {
24
+ /** Output video local file path or cloud URL */
25
+ output: FFmpegOutput;
26
+ /** X offset for overlaying on the full video frame */
27
+ x: number;
28
+ /** Y offset for overlaying on the full video frame */
29
+ y: number;
30
+ /** Canvas width of the output video */
31
+ canvasWidth: number;
32
+ /** Canvas height of the output video */
33
+ canvasHeight: number;
34
+ }
35
+
36
+ // ─── Helpers ─────────────────────────────────────────────────────────────────
37
+
23
38
  function hexToRgb(hex: string): { r: number; g: number; b: number } {
24
39
  const result = /^#?([a-f\d]{2})([a-f\d]{2})([a-f\d]{2})$/i.exec(hex);
25
- if (!result) {
26
- return { r: 255, g: 107, b: 0 }; // Default orange
27
- }
40
+ if (!result) return { r: 255, g: 107, b: 0 };
28
41
  return {
29
42
  r: parseInt(result[1] as string, 16),
30
43
  g: parseInt(result[2] as string, 16),
@@ -32,16 +45,10 @@ function hexToRgb(hex: string): { r: number; g: number; b: number } {
32
45
  };
33
46
  }
34
47
 
35
- /**
36
- * Clamp value to max (for color brightening)
37
- */
38
48
  function clamp(value: number, max = 255): number {
39
49
  return Math.min(Math.floor(value), max);
40
50
  }
41
51
 
42
- /**
43
- * Create SVG for button background with gradient and rounded corners
44
- */
45
52
  function createButtonSvg(
46
53
  width: number,
47
54
  height: number,
@@ -60,25 +67,81 @@ function createButtonSvg(
60
67
  </svg>`;
61
68
  }
62
69
 
70
+ function escapeXml(text: string): string {
71
+ return text
72
+ .replace(/&/g, "&amp;")
73
+ .replace(/</g, "&lt;")
74
+ .replace(/>/g, "&gt;")
75
+ .replace(/"/g, "&quot;")
76
+ .replace(/'/g, "&apos;");
77
+ }
78
+
79
+ function getButtonYPosition(
80
+ position: "top" | "center" | "bottom",
81
+ videoHeight: number,
82
+ buttonHeight: number,
83
+ ): number {
84
+ switch (position) {
85
+ case "top":
86
+ return Math.floor(videoHeight * 0.15);
87
+ case "center":
88
+ return Math.floor((videoHeight - buttonHeight) / 2);
89
+ default:
90
+ return Math.floor(videoHeight * 0.78 - buttonHeight / 2);
91
+ }
92
+ }
93
+
94
+ /** Ensure even dimension for ffmpeg */
95
+ function even(n: number): number {
96
+ return n % 2 === 0 ? n : n + 1;
97
+ }
98
+
63
99
  /**
64
- * Create a blinking CTA button video using Sharp for image generation
65
- * and ffmpeg for video assembly.
66
- *
67
- * Matches Python SDK quality:
68
- * - Gradient background (lighter top -> darker bottom)
69
- * - Rounded corners (45% of height)
70
- * - Scale animation (1.0 -> 1.03)
71
- * - Brightness animation (0.85 -> 1.2)
72
- * - Custom font support (TikTokSans-Bold)
100
+ * Elastic ease oscillator as an ffmpeg expression.
101
+ * Period P seconds, using time variable `tv` ("t" for scale/eq, "T" for geq).
102
+ * Returns 0 → 1.15 (overshoot) → 1.0 (settle) → 0 (fall) per cycle.
73
103
  */
74
- export interface BlinkingButtonResult {
75
- path: string;
76
- x: number;
77
- y: number;
104
+ function oscExpr(tv: string, P: number): string {
105
+ const ph = `(mod(${tv},${P})/${P})`;
106
+ return `if(lt(${ph},0.25),sin(${ph}/0.25*PI/2)*1.15,if(lt(${ph},0.4),1.15-0.15*(${ph}-0.25)/0.15,cos((${ph}-0.4)/0.6*PI/2)))`;
78
107
  }
79
108
 
109
+ /**
110
+ * Resolve a local file path to a URL for cloud backends.
111
+ * Local backend: returns the path as-is.
112
+ * Cloud backend: uploads the file and returns the URL.
113
+ */
114
+ async function resolvePathForBackend(
115
+ localPath: string,
116
+ backend: FFmpegBackend,
117
+ ): Promise<string> {
118
+ if (backend.name === "local") return localPath;
119
+ const buffer = await Bun.file(localPath).arrayBuffer();
120
+ const key = `tmp/${Date.now()}-${localPath.split("/").pop()}`;
121
+ return uploadBuffer(buffer, key, "image/png");
122
+ }
123
+
124
+ // ─── Main ────────────────────────────────────────────────────────────────────
125
+
126
+ /**
127
+ * Create a blinking CTA button video using Sharp for static PNG rendering
128
+ * and a single FFmpeg filter_complex for all animation.
129
+ *
130
+ * Architecture:
131
+ * 1. Sharp renders 2 static PNGs: button (native size) + glow (canvas size)
132
+ * 2. FFmpeg filter_complex does per-frame animation via expressions:
133
+ * - eq(gamma, eval=frame) for brightness pulse (0.85x → 1.2x)
134
+ * - scale(eval=frame) for elastic zoom pulse (1.0x → 1.14x)
135
+ * - overlay with (W-w)/2 centering for perfect bbox alignment
136
+ * - Glow scales 15% larger with 60% max opacity baked in
137
+ * 3. Output is ProRes 4444 with alpha channel
138
+ *
139
+ * Works on both local (ffmpeg binary) and cloud (rendi) backends
140
+ * via the FFmpegBackend abstraction.
141
+ */
80
142
  export async function createBlinkingButton(
81
143
  options: BlinkingButtonOptions,
144
+ backend: FFmpegBackend,
82
145
  ): Promise<BlinkingButtonResult> {
83
146
  const {
84
147
  text,
@@ -92,26 +155,24 @@ export async function createBlinkingButton(
92
155
  position = "bottom",
93
156
  } = options;
94
157
 
95
- const totalFrames = Math.ceil(duration * fps);
96
-
97
- // Button dimensions — large and prominent like app store CTAs
98
158
  const btnWidth = options.buttonWidth ?? Math.floor(width * 0.7);
99
159
  const btnHeight = options.buttonHeight ?? Math.floor(height * 0.09);
100
160
  const cornerRadius = Math.floor(btnHeight * 0.45);
101
-
102
- // Animation padding (button can grow ~14% with overshoot + glow radius)
103
- const maxScale = 1.14; // accounts for 1.12 * 1.15 overshoot peak
104
161
  const glowRadius = 18;
105
- const glowExtraScale = 1.15; // glow is 15% larger than button
106
- const totalMaxScale = maxScale * glowExtraScale; // ~1.31 for glow bounds
107
- const scalePadding = Math.ceil(
108
- Math.max(btnWidth, btnHeight) * (totalMaxScale - 1.0) * 2,
162
+
163
+ // Canvas sizing: must fit button at max animation scale + glow spread
164
+ const totalMaxScale = 1.14 * 1.15; // button overshoot * glow extra
165
+ const scalePad = Math.ceil(
166
+ Math.max(btnWidth, btnHeight) * (totalMaxScale - 1) * 2,
109
167
  );
110
- const padding = scalePadding + glowRadius * 2;
111
- const canvasWidth = btnWidth + padding * 2;
112
- const canvasHeight = btnHeight + padding * 2;
168
+ const padding = scalePad + glowRadius * 2;
169
+ const cw = even(btnWidth + padding * 2);
170
+ const ch = even(btnHeight + padding * 2);
171
+ const btnNativeW = even(btnWidth);
172
+ const btnNativeH = even(btnHeight);
173
+
174
+ // ── Step 1: Render PNGs with Sharp ─────────────────────────────────────────
113
175
 
114
- // Parse colors and create gradient (lighter top, darker bottom)
115
176
  const rgb = hexToRgb(bgColor);
116
177
  const topColor = {
117
178
  r: clamp(rgb.r * 1.15),
@@ -124,289 +185,189 @@ export async function createBlinkingButton(
124
185
  b: Math.floor(rgb.b * 0.95),
125
186
  };
126
187
 
127
- // Font path (relative to this file's compiled location)
128
- const fontPath = path.resolve(
129
- import.meta.dirname,
130
- "../../../assets/fonts/TikTokSans-Bold.ttf",
188
+ const svgBuf = Buffer.from(
189
+ createButtonSvg(btnWidth, btnHeight, cornerRadius, topColor, bottomColor),
131
190
  );
132
191
 
133
- // Create button SVG with gradient
134
- const buttonSvg = createButtonSvg(
135
- btnWidth,
136
- btnHeight,
137
- cornerRadius,
138
- topColor,
139
- bottomColor,
192
+ const fontPath = path.resolve(
193
+ import.meta.dirname,
194
+ "../../../../assets/fonts/TikTokSans-Bold.ttf",
140
195
  );
141
196
 
142
- // Create text image using Sharp's text feature
143
197
  const fontSize = Math.floor(btnHeight * 0.55);
144
- const textBuffer = await sharp({
198
+ const textBuf = await sharp({
145
199
  text: {
146
200
  text: `<span foreground="${textColor}" font_weight="bold">${escapeXml(text)}</span>`,
147
201
  font: "TikTokSans",
148
202
  fontfile: fontPath,
149
203
  rgba: true,
150
204
  align: "center",
151
- dpi: Math.floor(fontSize * 2.8), // Larger DPI for bolder text
205
+ dpi: Math.floor(fontSize * 2.8),
152
206
  },
153
207
  })
154
208
  .png()
155
209
  .toBuffer();
156
210
 
157
- // Get text dimensions for centering
158
- const textMeta = await sharp(textBuffer).metadata();
159
- const textWidth = textMeta.width ?? 0;
160
- const textHeight = textMeta.height ?? 0;
211
+ const textMeta = await sharp(textBuf).metadata();
212
+ const tw = textMeta.width ?? 0;
213
+ const th = textMeta.height ?? 0;
161
214
 
162
- // Create base button frame (button + text on transparent canvas)
163
- const baseButtonBuffer = await sharp({
215
+ // Button at native size (small, for fast eq/scale processing)
216
+ const btnNativeBuf = await sharp({
164
217
  create: {
165
- width: canvasWidth,
166
- height: canvasHeight,
218
+ width: btnNativeW,
219
+ height: btnNativeH,
167
220
  channels: 4,
168
221
  background: { r: 0, g: 0, b: 0, alpha: 0 },
169
222
  },
170
223
  })
171
224
  .composite([
172
- // Button background (centered in canvas)
225
+ { input: svgBuf, top: 0, left: 0 },
173
226
  {
174
- input: Buffer.from(buttonSvg),
175
- top: padding,
176
- left: padding,
227
+ input: textBuf,
228
+ top: Math.floor((btnHeight - th) / 2),
229
+ left: Math.floor((btnWidth - tw) / 2),
177
230
  },
178
- // Text centered on button
231
+ ])
232
+ .png()
233
+ .toBuffer();
234
+
235
+ // Button at canvas size (for glow generation — blur needs surrounding pixels)
236
+ const btnCenterX = Math.floor((cw - btnWidth) / 2);
237
+ const btnCenterY = Math.floor((ch - btnHeight) / 2);
238
+
239
+ const btnCanvasBuf = await sharp({
240
+ create: {
241
+ width: cw,
242
+ height: ch,
243
+ channels: 4,
244
+ background: { r: 0, g: 0, b: 0, alpha: 0 },
245
+ },
246
+ })
247
+ .composite([
248
+ { input: svgBuf, top: btnCenterY, left: btnCenterX },
179
249
  {
180
- input: textBuffer,
181
- top: padding + Math.floor((btnHeight - textHeight) / 2),
182
- left: padding + Math.floor((btnWidth - textWidth) / 2),
250
+ input: textBuf,
251
+ top: btnCenterY + Math.floor((btnHeight - th) / 2),
252
+ left: btnCenterX + Math.floor((btnWidth - tw) / 2),
183
253
  },
184
254
  ])
185
255
  .png()
186
256
  .toBuffer();
187
257
 
188
- // Pre-render glow buffer: blurred, brightened copy of the button for halo effect
189
- const glowBuffer = await sharp(baseButtonBuffer)
258
+ // Glow: blur + brighten + bake 60% max opacity
259
+ const glowRaw = await sharp(btnCanvasBuf)
190
260
  .blur(glowRadius)
191
261
  .modulate({ brightness: 1.4 })
262
+ .raw()
263
+ .toBuffer({ resolveWithObject: true });
264
+
265
+ for (let i = 3; i < glowRaw.data.length; i += 4) {
266
+ glowRaw.data[i] = Math.round((glowRaw.data[i] as number) * 0.6);
267
+ }
268
+
269
+ const glowBuf = await sharp(glowRaw.data, {
270
+ raw: {
271
+ width: glowRaw.info.width,
272
+ height: glowRaw.info.height,
273
+ channels: 4,
274
+ },
275
+ })
192
276
  .png()
193
277
  .toBuffer();
194
278
 
195
- // Calculate button position on full frame
196
- const btnY = getButtonYPosition(position, height, canvasHeight);
197
- const btnX = Math.floor((width - canvasWidth) / 2);
198
-
199
- // Create frames directory for intermediate files
200
- const framesDir = `/tmp/varg-btn-frames-${Date.now()}`;
201
- await mkdir(framesDir, { recursive: true });
202
-
203
- // Generate animation frames
204
- // Using file-based approach for reliability with alpha channel
205
- for (let i = 0; i < totalFrames; i++) {
206
- const t = i / fps;
207
- // Elastic pulse curve: fast expand with overshoot, settle, slow contract
208
- const phase = (t % blinkFrequency) / blinkFrequency; // 0 -> 1 within each cycle
209
- let osc: number;
210
- if (phase < 0.25) {
211
- // Fast rise with overshoot to 1.15
212
- osc = Math.sin((phase / 0.25) * Math.PI * 0.5) * 1.15;
213
- } else if (phase < 0.4) {
214
- // Settle back from 1.15 to 1.0
215
- const settle = (phase - 0.25) / 0.15;
216
- osc = 1.15 - 0.15 * settle;
217
- } else {
218
- // Slow ease-out fall back to 0
219
- const fall = (phase - 0.4) / 0.6;
220
- osc = Math.cos(fall * Math.PI * 0.5);
221
- }
222
-
223
- const scale = 1.0 + 0.12 * osc; // 1.0 -> 1.14 -> 1.12 -> 1.0
224
- const brightness = 0.85 + 0.35 * Math.max(0, osc); // 0.85 -> 1.2 -> 0.85
225
- const glowOpacity = Math.max(0, osc) * 0.6; // 0 -> 0.6 -> 0
226
-
227
- const scaledW = Math.round(canvasWidth * scale);
228
- const scaledH = Math.round(canvasHeight * scale);
229
-
230
- // Calculate offset to keep button centered after scaling
231
- const offsetX = Math.floor((canvasWidth - scaledW) / 2);
232
- const offsetY = Math.floor((canvasHeight - scaledH) / 2);
233
-
234
- // Scale button, apply brightness, then fit to canvas
235
- let btnPipeline = sharp(baseButtonBuffer)
236
- .resize(scaledW, scaledH, { kernel: "lanczos3" })
237
- .modulate({ brightness });
238
-
239
- if (scaledW > canvasWidth || scaledH > canvasHeight) {
240
- // Button exceeds canvas during overshoot — crop from center
241
- const cropLeft = Math.floor((scaledW - canvasWidth) / 2);
242
- const cropTop = Math.floor((scaledH - canvasHeight) / 2);
243
- btnPipeline = btnPipeline.extract({
244
- left: Math.max(0, cropLeft),
245
- top: Math.max(0, cropTop),
246
- width: Math.min(scaledW, canvasWidth),
247
- height: Math.min(scaledH, canvasHeight),
248
- });
249
- } else {
250
- btnPipeline = btnPipeline.extend({
251
- top: Math.max(0, offsetY),
252
- bottom: Math.max(0, canvasHeight - scaledH - offsetY),
253
- left: Math.max(0, offsetX),
254
- right: Math.max(0, canvasWidth - scaledW - offsetX),
255
- background: { r: 0, g: 0, b: 0, alpha: 0 },
256
- });
257
- }
258
-
259
- const btnFrame = await btnPipeline.png().toBuffer();
260
-
261
- // Scale glow slightly larger than button for halo effect
262
- const glowScale = scale * 1.15;
263
- const glowW = Math.round(canvasWidth * glowScale);
264
- const glowH = Math.round(canvasHeight * glowScale);
265
- const glowOffX = Math.floor((canvasWidth - glowW) / 2);
266
- const glowOffY = Math.floor((canvasHeight - glowH) / 2);
267
-
268
- // Render glow frame with animated opacity
269
- // Scale alpha channel using raw pixel manipulation for precise opacity control
270
- let glowResized: sharp.Sharp;
271
- if (glowW > canvasWidth || glowH > canvasHeight) {
272
- // Glow is larger than canvas — resize then crop to canvas from center
273
- const cropLeft = Math.floor((glowW - canvasWidth) / 2);
274
- const cropTop = Math.floor((glowH - canvasHeight) / 2);
275
- glowResized = sharp(glowBuffer)
276
- .resize(glowW, glowH, { kernel: "lanczos3" })
277
- .extract({
278
- left: Math.max(0, cropLeft),
279
- top: Math.max(0, cropTop),
280
- width: canvasWidth,
281
- height: canvasHeight,
282
- });
283
- } else {
284
- // Glow fits — extend with transparent padding
285
- glowResized = sharp(glowBuffer)
286
- .resize(glowW, glowH, { kernel: "lanczos3" })
287
- .extend({
288
- top: Math.max(0, glowOffY),
289
- bottom: Math.max(0, canvasHeight - glowH - glowOffY),
290
- left: Math.max(0, glowOffX),
291
- right: Math.max(0, canvasWidth - glowW - glowOffX),
292
- background: { r: 0, g: 0, b: 0, alpha: 0 },
293
- });
294
- }
295
-
296
- const { data: glowPixels, info: glowInfo } = await glowResized
297
- .raw()
298
- .toBuffer({ resolveWithObject: true });
299
-
300
- // Multiply alpha channel by glowOpacity
301
- for (let p = 3; p < glowPixels.length; p += 4) {
302
- glowPixels[p] = Math.round((glowPixels[p] as number) * glowOpacity);
303
- }
304
-
305
- const glowFrame = await sharp(glowPixels, {
306
- raw: {
307
- width: glowInfo.width,
308
- height: glowInfo.height,
309
- channels: 4,
310
- },
311
- })
312
- .png()
313
- .toBuffer();
314
-
315
- // Composite: transparent canvas <- glow (behind) <- button (on top)
316
- await sharp({
317
- create: {
318
- width: canvasWidth,
319
- height: canvasHeight,
320
- channels: 4,
321
- background: { r: 0, g: 0, b: 0, alpha: 0 },
322
- },
323
- })
324
- .composite([
325
- { input: glowFrame, top: 0, left: 0 },
326
- { input: btnFrame, top: 0, left: 0 },
327
- ])
328
- .png()
329
- .toFile(`${framesDir}/frame_${String(i).padStart(5, "0")}.png`);
330
- }
279
+ // Write PNGs to temp files
280
+ const ts = Date.now();
281
+ const btnPngPath = `/tmp/varg-btn-${ts}.png`;
282
+ const glowPngPath = `/tmp/varg-glow-${ts}.png`;
331
283
 
332
- // Combine frames into video with alpha channel (ProRes 4444)
333
- const outputPath = `/tmp/varg-blink-btn-${Date.now()}.mov`;
334
-
335
- await runFfmpeg([
336
- "-y",
337
- "-framerate",
338
- String(fps),
339
- "-i",
340
- `${framesDir}/frame_%05d.png`,
341
- "-c:v",
342
- "prores_ks",
343
- "-profile:v",
344
- "4444",
345
- "-pix_fmt",
346
- "yuva444p10le",
347
- "-t",
348
- String(duration),
349
- outputPath,
284
+ await Promise.all([
285
+ Bun.write(btnPngPath, btnNativeBuf),
286
+ Bun.write(glowPngPath, glowBuf),
350
287
  ]);
351
288
 
352
- // Cleanup frames directory
353
- await rm(framesDir, { recursive: true, force: true });
354
-
355
- return { path: outputPath, x: btnX, y: btnY };
356
- }
289
+ // ── Step 2: Build ffmpeg filter_complex ────────────────────────────────────
290
+
291
+ const P = blinkFrequency;
292
+ const osc = oscExpr("t", P);
293
+
294
+ // eq gamma for brightness: 0.85 at rest → 1.2 at peak
295
+ const gammaExpr = `0.85+0.35*max(0,${osc})`;
296
+
297
+ // Button scale (on native-size input)
298
+ const btnSW = `ceil(${btnNativeW}*(1.0+0.12*(${osc}))/2)*2`;
299
+ const btnSH = `ceil(${btnNativeH}*(1.0+0.12*(${osc}))/2)*2`;
300
+
301
+ // Glow scale (15% larger, on canvas-size input)
302
+ const glowSW = `ceil(${cw}*(1.0+0.12*(${osc}))*1.15/2)*2`;
303
+ const glowSH = `ceil(${ch}*(1.0+0.12*(${osc}))*1.15/2)*2`;
304
+
305
+ // Filter complex: uses overlay for centering (no crop+pad drift)
306
+ const filterComplex = [
307
+ // Three transparent canvases (base + one per animated layer)
308
+ `color=0x00000000:s=${cw}x${ch}:r=${fps}:d=${duration},format=rgba[base]`,
309
+ `color=0x00000000:s=${cw}x${ch}:r=${fps}:d=${duration},format=rgba[btn_canvas]`,
310
+ `color=0x00000000:s=${cw}x${ch}:r=${fps}:d=${duration},format=rgba[glow_canvas]`,
311
+
312
+ // Button: split alpha → eq(gamma) → merge alpha → scale → center on canvas
313
+ `[0:v]format=rgba,split[btn_rgb][btn_a]`,
314
+ `[btn_a]alphaextract[alpha]`,
315
+ `[btn_rgb]eq=gamma='${gammaExpr}':eval=frame[btn_eq]`,
316
+ `[btn_eq][alpha]alphamerge,format=rgba,` +
317
+ `scale=w='${btnSW}':h='${btnSH}':eval=frame:flags=lanczos` +
318
+ `[btn_scaled]`,
319
+ `[btn_canvas][btn_scaled]overlay=x='(W-w)/2':y='(H-h)/2':format=auto:eval=frame:shortest=1[btn]`,
320
+
321
+ // Glow: scale → center on canvas (opacity baked in PNG)
322
+ `[1:v]format=rgba,` +
323
+ `scale=w='${glowSW}':h='${glowSH}':eval=frame:flags=lanczos` +
324
+ `[glow_scaled]`,
325
+ `[glow_canvas][glow_scaled]overlay=x='(W-w)/2':y='(H-h)/2':format=auto:eval=frame:shortest=1[glow]`,
326
+
327
+ // Final composite: base → glow → button
328
+ `[base][glow]overlay=format=auto:shortest=1[bg]`,
329
+ `[bg][btn]overlay=format=auto:shortest=1[out]`,
330
+ ].join(";");
331
+
332
+ // ── Step 3: Run ffmpeg via backend ─────────────────────────────────────────
333
+
334
+ // Resolve PNG paths for cloud backends (uploads to storage)
335
+ const btnInput = await resolvePathForBackend(btnPngPath, backend);
336
+ const glowInput = await resolvePathForBackend(glowPngPath, backend);
337
+
338
+ const outputPath = `/tmp/varg-blink-btn-${ts}.mov`;
339
+
340
+ const result = await backend.run({
341
+ inputs: [
342
+ { path: btnInput, options: ["-loop", "1"] },
343
+ { path: glowInput, options: ["-loop", "1"] },
344
+ ],
345
+ filterComplex,
346
+ outputArgs: [
347
+ "-map",
348
+ "[out]",
349
+ "-c:v",
350
+ "prores_ks",
351
+ "-profile:v",
352
+ "4444",
353
+ "-pix_fmt",
354
+ "yuva444p10le",
355
+ "-t",
356
+ String(duration),
357
+ ],
358
+ outputPath,
359
+ });
357
360
 
358
- /**
359
- * Calculate button Y position based on position prop
360
- */
361
- function getButtonYPosition(
362
- position: "top" | "center" | "bottom",
363
- videoHeight: number,
364
- buttonHeight: number,
365
- ): number {
366
- switch (position) {
367
- case "top":
368
- return Math.floor(videoHeight * 0.15);
369
- case "center":
370
- return Math.floor((videoHeight - buttonHeight) / 2);
371
- default:
372
- return Math.floor(videoHeight * 0.78 - buttonHeight / 2);
373
- }
374
- }
361
+ // ── Calculate overlay position on full video frame ─────────────────────────
375
362
 
376
- /**
377
- * Escape XML special characters for SVG/Pango text
378
- */
379
- function escapeXml(text: string): string {
380
- return text
381
- .replace(/&/g, "&amp;")
382
- .replace(/</g, "&lt;")
383
- .replace(/>/g, "&gt;")
384
- .replace(/"/g, "&quot;")
385
- .replace(/'/g, "&apos;");
386
- }
363
+ const btnY = getButtonYPosition(position, height, ch);
364
+ const btnX = Math.floor((width - cw) / 2);
387
365
 
388
- /**
389
- * Run ffmpeg command and wait for completion
390
- */
391
- function runFfmpeg(args: string[]): Promise<void> {
392
- return new Promise((resolve, reject) => {
393
- const ffmpeg = spawn("ffmpeg", args, {
394
- stdio: ["pipe", "pipe", "pipe"],
395
- });
396
-
397
- let stderr = "";
398
- ffmpeg.stderr?.on("data", (data) => {
399
- stderr += data.toString();
400
- });
401
-
402
- ffmpeg.on("close", (code) => {
403
- if (code === 0) {
404
- resolve();
405
- } else {
406
- reject(new Error(`ffmpeg exited with code ${code}: ${stderr}`));
407
- }
408
- });
409
-
410
- ffmpeg.on("error", reject);
411
- });
366
+ return {
367
+ output: result.output,
368
+ x: btnX,
369
+ y: btnY,
370
+ canvasWidth: cw,
371
+ canvasHeight: ch,
372
+ };
412
373
  }
@@ -1,4 +1,5 @@
1
1
  import { editly } from "../../ai-sdk/providers/editly";
2
+ import type { FFmpegOutput } from "../../ai-sdk/providers/editly/backends/types";
2
3
  import type {
3
4
  Clip,
4
5
  ImageOverlayLayer,
@@ -8,11 +9,29 @@ import type {
8
9
  SizeValue,
9
10
  TitleLayer,
10
11
  } from "../../ai-sdk/providers/editly/types";
12
+ import { uploadBuffer } from "../../providers/storage";
11
13
  import type { PackshotProps, VargElement } from "../types";
12
14
  import type { RenderContext } from "./context";
13
15
  import { renderImage } from "./image";
14
16
  import { createBlinkingButton } from "./packshot/blinking-button";
15
17
 
18
+ /**
19
+ * Resolve an FFmpegOutput to a string path/URL, uploading local files for cloud backends.
20
+ */
21
+ async function resolveInputMaybeUpload(
22
+ input: FFmpegOutput,
23
+ shouldUpload: boolean,
24
+ ): Promise<string> {
25
+ if (input.type === "url") return input.url;
26
+ if (!shouldUpload) return input.path;
27
+ const buffer = await Bun.file(input.path).arrayBuffer();
28
+ return uploadBuffer(
29
+ buffer,
30
+ `tmp/${Date.now()}-${input.path.split("/").pop()}`,
31
+ "application/octet-stream",
32
+ );
33
+ }
34
+
16
35
  /**
17
36
  * Type guard: returns true if `pos` is a PositionObject ({ x, y }).
18
37
  */
@@ -165,39 +184,61 @@ export async function renderPackshot(
165
184
  height: ctx.height,
166
185
  fps: ctx.fps,
167
186
  clips: [clip],
187
+ backend: ctx.backend,
168
188
  });
169
189
 
170
190
  // ===== BLINKING CTA OVERLAY =====
171
191
  if (props.cta && props.blinkCta) {
172
- // Create animated button with Sharp at button-size canvas (fast)
173
- const btn = await createBlinkingButton({
174
- text: props.cta,
175
- width: ctx.width,
176
- height: ctx.height,
177
- duration,
178
- fps: ctx.fps,
179
- bgColor: props.ctaColor ?? "#FF6B00",
180
- textColor: props.ctaTextColor ?? "#FFFFFF",
181
- blinkFrequency: props.blinkFrequency ?? 0.8,
182
- position: mapCtaPosition(props.ctaPosition, ctx.height),
183
- buttonWidth: props.ctaSize?.width,
184
- buttonHeight: props.ctaSize?.height,
185
- });
192
+ const btn = await createBlinkingButton(
193
+ {
194
+ text: props.cta,
195
+ width: ctx.width,
196
+ height: ctx.height,
197
+ duration,
198
+ fps: ctx.fps,
199
+ bgColor: props.ctaColor ?? "#FF6B00",
200
+ textColor: props.ctaTextColor ?? "#FFFFFF",
201
+ blinkFrequency: props.blinkFrequency ?? 0.8,
202
+ position: mapCtaPosition(props.ctaPosition, ctx.height),
203
+ buttonWidth: props.ctaSize?.width,
204
+ buttonHeight: props.ctaSize?.height,
205
+ },
206
+ ctx.backend,
207
+ );
208
+
209
+ // Composite button overlay at correct position on base video via backend
210
+ const isCloud = ctx.backend.name !== "local";
211
+ const baseInput = await resolveInputMaybeUpload(
212
+ { type: "file", path: basePath },
213
+ isCloud,
214
+ );
215
+ const btnInput = await resolveInputMaybeUpload(btn.output, isCloud);
186
216
 
187
- // Composite button-sized overlay at correct position on base video
188
217
  const finalPath = `/tmp/varg-packshot-final-${Date.now()}.mp4`;
189
- const { $ } = await import("bun");
190
-
191
- // Overlay the blinking button (with alpha) on the packshot
192
- await $`ffmpeg -y \
193
- -i ${basePath} \
194
- -i ${btn.path} \
195
- -filter_complex "[0:v][1:v]overlay=${btn.x}:${btn.y}:format=auto" \
196
- -c:v libx264 -preset fast -crf 18 -pix_fmt yuv420p \
197
- ${finalPath}`.quiet();
198
-
199
- ctx.tempFiles.push(basePath, btn.path);
200
- return finalPath;
218
+
219
+ const overlayResult = await ctx.backend.run({
220
+ inputs: [baseInput, btnInput],
221
+ filterComplex: `[0:v][1:v]overlay=${btn.x}:${btn.y}:format=auto`,
222
+ outputArgs: [
223
+ "-c:v",
224
+ "libx264",
225
+ "-preset",
226
+ "fast",
227
+ "-crf",
228
+ "18",
229
+ "-pix_fmt",
230
+ "yuv420p",
231
+ ],
232
+ outputPath: finalPath,
233
+ });
234
+
235
+ if (overlayResult.output.type === "file") {
236
+ ctx.tempFiles.push(basePath, overlayResult.output.path);
237
+ return overlayResult.output.path;
238
+ }
239
+ // Cloud backend returns URL
240
+ ctx.tempFiles.push(basePath);
241
+ return overlayResult.output.url;
201
242
  }
202
243
 
203
244
  ctx.tempFiles.push(basePath);
@@ -72,6 +72,7 @@ export async function renderSlider(
72
72
  height: ctx.height,
73
73
  fps: ctx.fps,
74
74
  clips,
75
+ backend: ctx.backend,
75
76
  });
76
77
 
77
78
  ctx.tempFiles.push(outPath);
@@ -83,6 +83,7 @@ export async function renderSwipe(
83
83
  height: ctx.height,
84
84
  fps: ctx.fps,
85
85
  clips,
86
+ backend: ctx.backend,
86
87
  });
87
88
 
88
89
  ctx.tempFiles.push(outPath);
@@ -274,7 +274,6 @@ export interface RenderOptions {
274
274
  defaults?: DefaultModels;
275
275
  backend?: FFmpegBackend;
276
276
  storage?: StorageProvider;
277
- providerKeys?: Record<string, string | undefined>;
278
277
  }
279
278
 
280
279
  // Re-export from file module for convenience
package/.env.example DELETED
@@ -1,33 +0,0 @@
1
- # fal.ai api key
2
- FAL_API_KEY=fal_xxx
3
-
4
- # higgsfield credentials
5
- HIGGSFIELD_API_KEY=hf_xxx
6
- HIGGSFIELD_SECRET=secret_xxx
7
-
8
- # elevenlabs api key
9
- ELEVENLABS_API_KEY=el_xxx
10
-
11
- # groq api key (ultra-fast whisper transcription)
12
- GROQ_API_KEY=gsk_xxx
13
-
14
- # fireworks api key (word-level transcription with timestamps)
15
- FIREWORKS_API_KEY=fw_xxx
16
-
17
- # cloudflare r2 / s3 storage
18
- CLOUDFLARE_R2_API_URL=https://xxx.r2.cloudflarestorage.com
19
- CLOUDFLARE_ACCESS_KEY_ID=xxx
20
- CLOUDFLARE_ACCESS_SECRET=xxx
21
- CLOUDFLARE_R2_BUCKET=m
22
-
23
- # replicate (optional)
24
- REPLICATE_API_TOKEN=r8_xxx
25
-
26
- # apify (web scraping actors)
27
- APIFY_TOKEN=apify_api_xxx
28
-
29
- # decart ai (real-time & batch video/image)
30
- DECART_API_KEY=decart_xxx
31
-
32
- # together ai (fast flux-schnell, no queue)
33
- TOGETHER_API_KEY=together_xxx
@@ -1,46 +0,0 @@
1
- import { elevenlabs, fal } from "vargai/ai";
2
- import {
3
- Captions,
4
- Clip,
5
- Image,
6
- Render,
7
- render,
8
- Speech,
9
- Video,
10
- } from "vargai/react";
11
-
12
- // Garry Tan's face image
13
- const GARRY_TAN_IMAGE =
14
- "https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcRLInrQmohhXbLY10G90yT0AJJCJdArYifp-IDKUmJQlwui0tnLihiHw9OzLfLfbofr5chH2s4&s=10";
15
-
16
- // Create speech first
17
- const voiceover = Speech({
18
- model: elevenlabs.speechModel("eleven_v3"),
19
- voice: "adam",
20
- children: "varg.ai is cool!",
21
- });
22
-
23
- // Animate Garry's face to talk
24
- const animatedGarry = Video({
25
- prompt: {
26
- text: "man speaking naturally, subtle head movements, friendly expression",
27
- images: [GARRY_TAN_IMAGE],
28
- },
29
- model: fal.videoModel("kling-v2.5"),
30
- });
31
-
32
- // Sync lip movements with audio
33
- const syncedVideo = Video({
34
- prompt: { video: animatedGarry, audio: voiceover },
35
- model: fal.videoModel("sync-v2-pro"),
36
- });
37
-
38
- await render(
39
- <Render width={1080} height={1920}>
40
- <Clip duration={5}>{syncedVideo}</Clip>
41
- <Captions src={voiceover} style="tiktok" color="#ffffff" />
42
- </Render>,
43
- { output: "output/garry-tan-varg.mp4" },
44
- );
45
-
46
- console.log("Done! Video saved to output/garry-tan-varg.mp4");
@@ -1,61 +0,0 @@
1
- /**
2
- * Garry Tan Talking Head Video
3
- * Generate a video of Garry Tan saying "varg.ai is cool!"
4
- */
5
-
6
- import {
7
- generateImage,
8
- experimental_generateSpeech as generateSpeech,
9
- } from "ai";
10
- import { elevenlabs, File, fal, generateVideo } from "../index";
11
-
12
- async function main() {
13
- const script = `varg.ai is cool!`;
14
-
15
- console.log("generating Garry Tan image and voice in parallel...");
16
- const [imageResult, speechResult] = await Promise.all([
17
- generateImage({
18
- model: fal.imageModel("flux-schnell"),
19
- prompt:
20
- "Garry Tan, Y Combinator CEO, Asian American man, short dark hair, glasses, friendly smile, professional headshot, studio lighting, clean background, looking at camera",
21
- n: 1,
22
- }),
23
- generateSpeech({
24
- model: elevenlabs.speechModel("turbo"),
25
- text: script,
26
- voice: "adam",
27
- }),
28
- ]);
29
-
30
- const firstImage = imageResult.images[0];
31
- if (!firstImage) throw new Error("No image generated");
32
- const image = File.from(firstImage);
33
- const audio = File.from(speechResult.audio);
34
-
35
- console.log(`image: ${(await image.data()).byteLength} bytes`);
36
- console.log(`audio: ${(await audio.data()).byteLength} bytes`);
37
-
38
- await Bun.write("output/garry-tan-image.png", await image.data());
39
- await Bun.write("output/garry-tan-voice.mp3", await audio.data());
40
-
41
- console.log("\nanimating Garry Tan (5 seconds)...");
42
- const { video } = await generateVideo({
43
- model: fal.videoModel("wan-2.5"),
44
- prompt: {
45
- text: "man talking naturally, moving mouth while speaking, subtle head movements, professional demeanor, blinking naturally",
46
- images: [await image.data()],
47
- },
48
- duration: 5,
49
- });
50
-
51
- const output = File.from(video);
52
- console.log(`video: ${(await output.data()).byteLength} bytes`);
53
- await Bun.write("output/garry-tan-varg.mp4", await output.data());
54
-
55
- console.log("\ndone! files saved to output/");
56
- console.log("- output/garry-tan-image.png");
57
- console.log("- output/garry-tan-voice.mp3");
58
- console.log("- output/garry-tan-varg.mp4");
59
- }
60
-
61
- main().catch(console.error);
@@ -1,52 +0,0 @@
1
- /**
2
- * Garry Tan Talking Head Video
3
- * Using vargai/react JSX syntax with lipsync
4
- */
5
-
6
- import { elevenlabs, fal, higgsfield } from "../../ai-sdk";
7
- import { Captions, Clip, Image, Music, Render, render, Speech, Video } from "..";
8
-
9
- const CHARACTER = "Garry Tan, Y Combinator CEO, Asian American man in his 40s, short dark hair, modern glasses, friendly confident smile, professional headshot, studio lighting, clean modern office background";
10
-
11
- const baseCharacter = Image({
12
- prompt: CHARACTER,
13
- model: higgsfield.imageModel("soul", { styleId: higgsfield.styles.REALISTIC }),
14
- aspectRatio: "9:16",
15
- });
16
-
17
- const animatedCharacter = Video({
18
- prompt: { text: "man speaking naturally, subtle head movements, friendly professional expression, blinking naturally", images: [baseCharacter] },
19
- model: fal.videoModel("kling-v2.5"),
20
- });
21
-
22
- const voiceover = Speech({
23
- model: elevenlabs.speechModel("eleven_v3"),
24
- voice: "adam",
25
- children: "varg.ai is cool!"
26
- });
27
-
28
- async function main() {
29
- console.log("Creating Garry Tan talking head video...\n");
30
-
31
- const video = (
32
- <Render width={1080} height={1920}>
33
- <Music prompt="modern tech ambient, subtle electronic, minimal, professional" model={elevenlabs.musicModel()} volume={0.1} />
34
- <Clip duration={5}>
35
- <Video prompt={{ video: animatedCharacter, audio: voiceover }} model={fal.videoModel("sync-v2-pro")} />
36
- </Clip>
37
- <Captions src={voiceover} style="tiktok" color="#ffffff" />
38
- </Render>
39
- );
40
-
41
- console.log("Rendering video with lipsync...");
42
-
43
- const buffer = await render(video, {
44
- output: "output/garry-tan-varg-react.mp4",
45
- cache: ".cache/ai",
46
- });
47
-
48
- console.log(`\nDone! ${buffer.byteLength} bytes`);
49
- console.log("Output: output/garry-tan-varg-react.mp4");
50
- }
51
-
52
- main().catch(console.error);