vargai 0.4.0-alpha4 → 0.4.0-alpha40

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (114) hide show
  1. package/.env.example +6 -0
  2. package/README.md +483 -61
  3. package/assets/fonts/TikTokSans-Bold.ttf +0 -0
  4. package/examples/grok-imagine-test.tsx +155 -0
  5. package/launch-videos/06-kawaii-fruits.tsx +93 -0
  6. package/launch-videos/07-ugc-weight-loss.tsx +132 -0
  7. package/launch-videos/08-talking-head-varg.tsx +107 -0
  8. package/launch-videos/09-girl.tsx +160 -0
  9. package/launch-videos/README.md +42 -0
  10. package/package.json +10 -4
  11. package/pipeline/cookbooks/round-video-character.md +1 -1
  12. package/skills/varg-video-generation/SKILL.md +224 -0
  13. package/skills/varg-video-generation/references/templates.md +380 -0
  14. package/skills/varg-video-generation/scripts/setup.ts +265 -0
  15. package/src/ai-sdk/cache.ts +1 -3
  16. package/src/ai-sdk/examples/google-image.ts +62 -0
  17. package/src/ai-sdk/index.ts +10 -0
  18. package/src/ai-sdk/middleware/wrap-image-model.ts +4 -21
  19. package/src/ai-sdk/middleware/wrap-music-model.ts +4 -16
  20. package/src/ai-sdk/middleware/wrap-video-model.ts +5 -17
  21. package/src/ai-sdk/providers/CONTRIBUTING.md +457 -0
  22. package/src/ai-sdk/providers/editly/backends/index.ts +8 -0
  23. package/src/ai-sdk/providers/editly/backends/local.ts +94 -0
  24. package/src/ai-sdk/providers/editly/backends/types.ts +74 -0
  25. package/src/ai-sdk/providers/editly/editly.test.ts +49 -1
  26. package/src/ai-sdk/providers/editly/index.ts +164 -80
  27. package/src/ai-sdk/providers/editly/layers.ts +58 -6
  28. package/src/ai-sdk/providers/editly/rendi/editly-with-rendi-backend.test.ts +335 -0
  29. package/src/ai-sdk/providers/editly/rendi/index.ts +289 -0
  30. package/src/ai-sdk/providers/editly/rendi/rendi.test.ts +35 -0
  31. package/src/ai-sdk/providers/editly/types.ts +30 -0
  32. package/src/ai-sdk/providers/elevenlabs.ts +10 -2
  33. package/src/ai-sdk/providers/fal.test.ts +214 -0
  34. package/src/ai-sdk/providers/fal.ts +435 -40
  35. package/src/ai-sdk/providers/google.ts +423 -0
  36. package/src/ai-sdk/providers/together.ts +191 -0
  37. package/src/cli/commands/find.tsx +1 -0
  38. package/src/cli/commands/frame.tsx +616 -0
  39. package/src/cli/commands/hello.ts +85 -0
  40. package/src/cli/commands/help.tsx +18 -30
  41. package/src/cli/commands/index.ts +11 -2
  42. package/src/cli/commands/init.tsx +570 -0
  43. package/src/cli/commands/list.tsx +1 -0
  44. package/src/cli/commands/render.tsx +322 -76
  45. package/src/cli/commands/run.tsx +1 -0
  46. package/src/cli/commands/storyboard.tsx +1714 -0
  47. package/src/cli/commands/which.tsx +1 -0
  48. package/src/cli/index.ts +23 -4
  49. package/src/cli/ui/components/Badge.tsx +1 -0
  50. package/src/cli/ui/components/DataTable.tsx +1 -0
  51. package/src/cli/ui/components/Header.tsx +1 -0
  52. package/src/cli/ui/components/HelpBlock.tsx +1 -0
  53. package/src/cli/ui/components/KeyValue.tsx +1 -0
  54. package/src/cli/ui/components/OptionRow.tsx +1 -0
  55. package/src/cli/ui/components/Separator.tsx +1 -0
  56. package/src/cli/ui/components/StatusBox.tsx +1 -0
  57. package/src/cli/ui/components/VargBox.tsx +1 -0
  58. package/src/cli/ui/components/VargProgress.tsx +1 -0
  59. package/src/cli/ui/components/VargSpinner.tsx +1 -0
  60. package/src/cli/ui/components/VargText.tsx +1 -0
  61. package/src/definitions/actions/grok-edit.ts +133 -0
  62. package/src/definitions/actions/index.ts +16 -0
  63. package/src/definitions/actions/qwen-angles.ts +218 -0
  64. package/src/index.ts +1 -0
  65. package/src/providers/fal.ts +196 -0
  66. package/src/react/assets.ts +9 -0
  67. package/src/react/elements.ts +0 -5
  68. package/src/react/examples/branching.tsx +6 -4
  69. package/src/react/examples/character-video.tsx +13 -10
  70. package/src/react/examples/local-files-test.tsx +19 -0
  71. package/src/react/examples/ltx2-test.tsx +25 -0
  72. package/src/react/examples/madi.tsx +13 -10
  73. package/src/react/examples/mcmeows.tsx +40 -0
  74. package/src/react/examples/music-defaults.tsx +24 -0
  75. package/src/react/examples/quickstart-test.tsx +101 -0
  76. package/src/react/examples/qwen-angles-test.tsx +72 -0
  77. package/src/react/index.ts +3 -3
  78. package/src/react/layouts/grid.tsx +1 -1
  79. package/src/react/layouts/index.ts +2 -1
  80. package/src/react/layouts/slot.tsx +85 -0
  81. package/src/react/layouts/split.tsx +18 -0
  82. package/src/react/react.test.ts +60 -11
  83. package/src/react/renderers/burn-captions.ts +95 -0
  84. package/src/react/renderers/cache.test.ts +182 -0
  85. package/src/react/renderers/captions.ts +25 -6
  86. package/src/react/renderers/clip.ts +56 -25
  87. package/src/react/renderers/context.ts +5 -2
  88. package/src/react/renderers/image.ts +5 -2
  89. package/src/react/renderers/index.ts +0 -1
  90. package/src/react/renderers/music.ts +8 -3
  91. package/src/react/renderers/packshot/blinking-button.ts +413 -0
  92. package/src/react/renderers/packshot.ts +170 -8
  93. package/src/react/renderers/progress.ts +4 -3
  94. package/src/react/renderers/render.ts +127 -71
  95. package/src/react/renderers/speech.ts +2 -2
  96. package/src/react/renderers/split.ts +34 -13
  97. package/src/react/renderers/utils.test.ts +80 -0
  98. package/src/react/renderers/utils.ts +37 -1
  99. package/src/react/renderers/video.ts +47 -9
  100. package/src/react/types.ts +70 -17
  101. package/src/studio/stages.ts +40 -39
  102. package/src/studio/step-renderer.ts +14 -24
  103. package/src/studio/ui/index.html +2 -2
  104. package/src/tests/all.test.ts +4 -4
  105. package/src/tests/index.ts +1 -1
  106. package/test-slot-grid.tsx +19 -0
  107. package/test-slot-userland.tsx +30 -0
  108. package/test-sync-v2.ts +30 -0
  109. package/test-sync-v2.tsx +29 -0
  110. package/tsconfig.json +1 -1
  111. package/video.tsx +7 -0
  112. package/src/ai-sdk/providers/editly/ffmpeg.ts +0 -60
  113. package/src/react/renderers/animate.ts +0 -59
  114. /package/src/cli/commands/{studio.tsx → studio.ts} +0 -0
@@ -0,0 +1,457 @@
1
+ # Adding Models & Providers
2
+
3
+ This guide explains how to add new AI models and providers to the varg SDK.
4
+
5
+ ## Overview
6
+
7
+ Providers in varg extend the [Vercel AI SDK](https://sdk.vercel.ai/) with additional model types for video, music, and other media generation. Each provider implements a consistent interface pattern.
8
+
9
+ ## Architecture
10
+
11
+ ```
12
+ src/ai-sdk/providers/
13
+ ├── fal.ts # Full provider (video, image, transcription)
14
+ ├── elevenlabs.ts # Speech & music provider
15
+ ├── openai.ts # Extends @ai-sdk/openai with video
16
+ ├── google.ts # Image & video provider
17
+ ├── higgsfield.ts # Image-only provider
18
+ ├── replicate.ts # Re-exports @ai-sdk/replicate
19
+ └── CONTRIBUTING.md # This file
20
+ ```
21
+
22
+ ## Model Types
23
+
24
+ | Type | Interface | Use Case |
25
+ |------|-----------|----------|
26
+ | `VideoModelV3` | `../video-model.ts` | Video generation (t2v, i2v, lipsync) |
27
+ | `ImageModelV3` | `@ai-sdk/provider` | Image generation |
28
+ | `SpeechModelV3` | `@ai-sdk/provider` | Text-to-speech |
29
+ | `MusicModelV3` | `../music-model.ts` | Music generation |
30
+ | `TranscriptionModelV3` | `@ai-sdk/provider` | Speech-to-text |
31
+ | `LanguageModelV3` | `@ai-sdk/provider` | LLM text generation |
32
+ | `EmbeddingModelV3` | `@ai-sdk/provider` | Text embeddings |
33
+
34
+ ## Adding a New Model to an Existing Provider
35
+
36
+ ### Example: Adding a new video model to fal.ts
37
+
38
+ 1. **Add to the model mapping:**
39
+
40
+ ```typescript
41
+ const VIDEO_MODELS: Record<string, { t2v: string; i2v: string }> = {
42
+ // existing models...
43
+ "new-model-v1": {
44
+ t2v: "fal-ai/new-model/text-to-video",
45
+ i2v: "fal-ai/new-model/image-to-video",
46
+ },
47
+ };
48
+ ```
49
+
50
+ 2. **That's it!** The existing `FalVideoModel` class handles the rest.
51
+
52
+ ### Example: Adding a model with special handling
53
+
54
+ If the new model needs custom logic, add conditional handling in `doGenerate()`:
55
+
56
+ ```typescript
57
+ async doGenerate(options: VideoModelV3CallOptions) {
58
+ const isNewModel = this.modelId === "new-model-v1";
59
+
60
+ if (isNewModel) {
61
+ // Custom input handling for this model
62
+ input.special_param = options.providerOptions?.fal?.specialParam;
63
+ }
64
+
65
+ // ... rest of generation logic
66
+ }
67
+ ```
68
+
69
+ ## Creating a New Provider
70
+
71
+ ### Step 1: Define the Provider Interface
72
+
73
+ ```typescript
74
+ import {
75
+ type EmbeddingModelV3,
76
+ type ImageModelV3,
77
+ type LanguageModelV3,
78
+ NoSuchModelError,
79
+ type ProviderV3,
80
+ } from "@ai-sdk/provider";
81
+ import type { VideoModelV3 } from "../video-model";
82
+
83
+ export interface MyProviderSettings {
84
+ apiKey?: string;
85
+ baseURL?: string;
86
+ }
87
+
88
+ export interface MyProvider extends ProviderV3 {
89
+ // Add methods for each model type you support
90
+ videoModel(modelId: string): VideoModelV3;
91
+ imageModel(modelId: string): ImageModelV3;
92
+ }
93
+ ```
94
+
95
+ ### Step 2: Implement Model Classes
96
+
97
+ Each model class must implement the corresponding interface:
98
+
99
+ ```typescript
100
+ class MyVideoModel implements VideoModelV3 {
101
+ readonly specificationVersion = "v3" as const;
102
+ readonly provider = "myprovider";
103
+ readonly modelId: string;
104
+ readonly maxVideosPerCall = 1;
105
+
106
+ private apiKey: string;
107
+
108
+ constructor(modelId: string, options: { apiKey?: string } = {}) {
109
+ this.modelId = modelId;
110
+ this.apiKey = options.apiKey ?? process.env.MY_PROVIDER_API_KEY ?? "";
111
+ }
112
+
113
+ async doGenerate(options: VideoModelV3CallOptions) {
114
+ const {
115
+ prompt,
116
+ duration,
117
+ aspectRatio,
118
+ files,
119
+ providerOptions,
120
+ abortSignal,
121
+ } = options;
122
+
123
+ const warnings: SharedV3Warning[] = [];
124
+
125
+ // 1. Build API request
126
+ const input: Record<string, unknown> = {
127
+ prompt,
128
+ duration: duration ?? 5,
129
+ ...(providerOptions?.myprovider ?? {}),
130
+ };
131
+
132
+ // 2. Handle file inputs (for image-to-video, etc.)
133
+ if (files && files.length > 0) {
134
+ const imageFile = files.find(f =>
135
+ f.type === "file"
136
+ ? f.mediaType?.startsWith("image/")
137
+ : /\.(jpg|jpeg|png|webp)$/i.test(f.url)
138
+ );
139
+ if (imageFile) {
140
+ input.image_url = await this.uploadFile(imageFile);
141
+ }
142
+ }
143
+
144
+ // 3. Call the API
145
+ const response = await fetch("https://api.myprovider.com/v1/generate", {
146
+ method: "POST",
147
+ headers: {
148
+ "Authorization": `Bearer ${this.apiKey}`,
149
+ "Content-Type": "application/json",
150
+ },
151
+ body: JSON.stringify(input),
152
+ signal: abortSignal,
153
+ });
154
+
155
+ if (!response.ok) {
156
+ throw new Error(`API error: ${await response.text()}`);
157
+ }
158
+
159
+ const data = await response.json();
160
+
161
+ // 4. Download the result
162
+ const videoResponse = await fetch(data.video_url, { signal: abortSignal });
163
+ const videoBuffer = new Uint8Array(await videoResponse.arrayBuffer());
164
+
165
+ // 5. Return in standard format
166
+ return {
167
+ videos: [videoBuffer],
168
+ warnings,
169
+ response: {
170
+ timestamp: new Date(),
171
+ modelId: this.modelId,
172
+ headers: undefined,
173
+ },
174
+ };
175
+ }
176
+
177
+ private async uploadFile(file: ImageModelV3File): Promise<string> {
178
+ // Implementation depends on provider's upload mechanism
179
+ }
180
+ }
181
+ ```
182
+
183
+ ### Step 3: Create the Provider Factory
184
+
185
+ ```typescript
186
+ export function createMyProvider(
187
+ settings: MyProviderSettings = {},
188
+ ): MyProvider {
189
+ const apiKey = settings.apiKey ?? process.env.MY_PROVIDER_API_KEY;
190
+
191
+ if (!apiKey) {
192
+ throw new Error("MY_PROVIDER_API_KEY not set");
193
+ }
194
+
195
+ return {
196
+ specificationVersion: "v3",
197
+
198
+ videoModel(modelId: string): VideoModelV3 {
199
+ return new MyVideoModel(modelId, { apiKey });
200
+ },
201
+
202
+ imageModel(modelId: string): ImageModelV3 {
203
+ return new MyImageModel(modelId, { apiKey });
204
+ },
205
+
206
+ // Throw NoSuchModelError for unsupported model types
207
+ languageModel(modelId: string): LanguageModelV3 {
208
+ throw new NoSuchModelError({ modelId, modelType: "languageModel" });
209
+ },
210
+
211
+ embeddingModel(modelId: string): EmbeddingModelV3 {
212
+ throw new NoSuchModelError({ modelId, modelType: "embeddingModel" });
213
+ },
214
+ };
215
+ }
216
+ ```
217
+
218
+ ### Step 4: Export a Lazy Singleton
219
+
220
+ ```typescript
221
+ // Lazy initialization - only creates client when first accessed
222
+ let _myprovider: MyProvider | undefined;
223
+
224
+ export const myprovider = new Proxy({} as MyProvider, {
225
+ get(_, prop) {
226
+ if (!_myprovider) {
227
+ _myprovider = createMyProvider();
228
+ }
229
+ return _myprovider[prop as keyof MyProvider];
230
+ },
231
+ });
232
+ ```
233
+
234
+ ### Step 5: Re-export from index
235
+
236
+ Add to `src/ai-sdk/index.ts`:
237
+
238
+ ```typescript
239
+ export { createMyProvider, myprovider } from "./providers/myprovider";
240
+ export type { MyProvider, MyProviderSettings } from "./providers/myprovider";
241
+ ```
242
+
243
+ ## Handling Warnings
244
+
245
+ Use warnings to communicate unsupported features without failing:
246
+
247
+ ```typescript
248
+ if (options.seed !== undefined) {
249
+ warnings.push({
250
+ type: "unsupported",
251
+ feature: "seed",
252
+ details: "Seed is not supported by this model",
253
+ });
254
+ }
255
+
256
+ if (options.fps !== undefined) {
257
+ warnings.push({
258
+ type: "unsupported",
259
+ feature: "fps",
260
+ details: "FPS is not configurable, using provider default",
261
+ });
262
+ }
263
+ ```
264
+
265
+ ## Provider Options Passthrough
266
+
267
+ Allow provider-specific options via `providerOptions`:
268
+
269
+ ```typescript
270
+ // User code:
271
+ await generateVideo({
272
+ model: myprovider.videoModel("model-v1"),
273
+ prompt: "a cat",
274
+ providerOptions: {
275
+ myprovider: {
276
+ customParam: "value",
277
+ negativePrompt: "blurry",
278
+ },
279
+ },
280
+ });
281
+
282
+ // In your model:
283
+ const customOptions = providerOptions?.myprovider ?? {};
284
+ input.custom_param = customOptions.customParam;
285
+ input.negative_prompt = customOptions.negativePrompt;
286
+ ```
287
+
288
+ ## Async Job Polling
289
+
290
+ Many video APIs are async. Here's the standard polling pattern:
291
+
292
+ ```typescript
293
+ async doGenerate(options: VideoModelV3CallOptions) {
294
+ // 1. Create job
295
+ const createResponse = await fetch(`${this.baseURL}/jobs`, {
296
+ method: "POST",
297
+ headers: { Authorization: `Bearer ${this.apiKey}` },
298
+ body: JSON.stringify(input),
299
+ signal: options.abortSignal,
300
+ });
301
+
302
+ const job = await createResponse.json();
303
+
304
+ // 2. Poll for completion
305
+ let status = job.status;
306
+ while (status === "queued" || status === "processing") {
307
+ await new Promise(resolve => setTimeout(resolve, 2000));
308
+
309
+ const statusResponse = await fetch(`${this.baseURL}/jobs/${job.id}`, {
310
+ headers: { Authorization: `Bearer ${this.apiKey}` },
311
+ signal: options.abortSignal,
312
+ });
313
+
314
+ const statusData = await statusResponse.json();
315
+ status = statusData.status;
316
+ }
317
+
318
+ if (status === "failed") {
319
+ throw new Error(`Generation failed: ${job.error}`);
320
+ }
321
+
322
+ // 3. Download result
323
+ const videoResponse = await fetch(job.output_url);
324
+ return { videos: [new Uint8Array(await videoResponse.arrayBuffer())] };
325
+ }
326
+ ```
327
+
328
+ ## File Upload Helpers
329
+
330
+ Common pattern for handling file inputs:
331
+
332
+ ```typescript
333
+ import type { ImageModelV3File } from "@ai-sdk/provider";
334
+
335
+ async function fileToUrl(file: ImageModelV3File): Promise<string> {
336
+ if (file.type === "url") {
337
+ return file.url;
338
+ }
339
+
340
+ // Convert base64/Uint8Array to upload
341
+ const bytes = typeof file.data === "string"
342
+ ? Uint8Array.from(atob(file.data), c => c.charCodeAt(0))
343
+ : file.data;
344
+
345
+ const blob = new Blob([bytes], { type: file.mediaType ?? "image/png" });
346
+
347
+ // Upload to provider's storage (or use data URL for small files)
348
+ return await uploadToStorage(blob);
349
+ }
350
+
351
+ function getMediaType(file: ImageModelV3File): string | undefined {
352
+ if (file.type === "file") return file.mediaType;
353
+
354
+ const ext = file.url.split(".").pop()?.toLowerCase();
355
+ const mimeTypes: Record<string, string> = {
356
+ png: "image/png",
357
+ jpg: "image/jpeg",
358
+ jpeg: "image/jpeg",
359
+ mp3: "audio/mpeg",
360
+ wav: "audio/wav",
361
+ mp4: "video/mp4",
362
+ };
363
+ return mimeTypes[ext ?? ""];
364
+ }
365
+ ```
366
+
367
+ ## Extending Existing Providers
368
+
369
+ To add video support to an existing AI SDK provider (like OpenAI):
370
+
371
+ ```typescript
372
+ import {
373
+ createOpenAI as createOpenAIBase,
374
+ type OpenAIProvider as OpenAIProviderBase,
375
+ } from "@ai-sdk/openai";
376
+
377
+ // Extend the base provider interface
378
+ export interface OpenAIProvider extends OpenAIProviderBase {
379
+ videoModel(modelId: string): VideoModelV3;
380
+ }
381
+
382
+ export function createOpenAI(settings = {}): OpenAIProvider {
383
+ const base = createOpenAIBase(settings);
384
+
385
+ // Create callable function with all base methods
386
+ const provider = ((modelId: string) => base(modelId)) as OpenAIProvider;
387
+ Object.assign(provider, base);
388
+
389
+ // Add video support
390
+ provider.videoModel = (modelId: string): VideoModelV3 =>
391
+ new OpenAIVideoModel(modelId, settings);
392
+
393
+ return provider;
394
+ }
395
+ ```
396
+
397
+ ## Re-exporting External Providers
398
+
399
+ For providers that work as-is from `@ai-sdk/*`:
400
+
401
+ ```typescript
402
+ // replicate.ts - simple re-export
403
+ export {
404
+ createReplicate,
405
+ replicate,
406
+ type ReplicateProvider,
407
+ type ReplicateProviderSettings,
408
+ } from "@ai-sdk/replicate";
409
+ ```
410
+
411
+ ## Testing Your Provider
412
+
413
+ ```typescript
414
+ import { describe, test, expect } from "bun:test";
415
+ import { createMyProvider } from "./myprovider";
416
+
417
+ describe("MyProvider", () => {
418
+ test("creates video model", () => {
419
+ const provider = createMyProvider({ apiKey: "test-key" });
420
+ const model = provider.videoModel("model-v1");
421
+
422
+ expect(model.provider).toBe("myprovider");
423
+ expect(model.modelId).toBe("model-v1");
424
+ expect(model.specificationVersion).toBe("v3");
425
+ });
426
+
427
+ test("throws on missing api key", () => {
428
+ delete process.env.MY_PROVIDER_API_KEY;
429
+ expect(() => createMyProvider()).toThrow("MY_PROVIDER_API_KEY not set");
430
+ });
431
+ });
432
+ ```
433
+
434
+ ## Checklist for New Providers
435
+
436
+ - [ ] Implements `ProviderV3` interface
437
+ - [ ] Model classes implement correct `*ModelV3` interfaces
438
+ - [ ] `specificationVersion` is `"v3"`
439
+ - [ ] Factory function `createProvider(settings)`
440
+ - [ ] Lazy singleton export for convenience
441
+ - [ ] API key from settings OR environment variable
442
+ - [ ] `NoSuchModelError` for unsupported model types
443
+ - [ ] Warnings for unsupported features (don't fail silently)
444
+ - [ ] `providerOptions` passthrough for provider-specific params
445
+ - [ ] `abortSignal` support for cancellation
446
+ - [ ] Proper error handling with descriptive messages
447
+ - [ ] Re-exported from `src/ai-sdk/index.ts`
448
+ - [ ] Environment variable documented in README
449
+
450
+ ## Questions?
451
+
452
+ Check existing providers for reference implementations:
453
+ - **Full provider**: `fal.ts` (video, image, transcription)
454
+ - **Audio provider**: `elevenlabs.ts` (speech, music)
455
+ - **Extended provider**: `openai.ts` (adds video to base)
456
+ - **Simple provider**: `higgsfield.ts` (image only)
457
+ - **Re-export**: `replicate.ts`
@@ -0,0 +1,8 @@
1
+ export { LocalBackend, localBackend } from "./local";
2
+ export type {
3
+ FFmpegBackend,
4
+ FFmpegInput,
5
+ FFmpegRunOptions,
6
+ FFmpegRunResult,
7
+ VideoInfo,
8
+ } from "./types";
@@ -0,0 +1,94 @@
1
+ import { $ } from "bun";
2
+ import type {
3
+ FFmpegBackend,
4
+ FFmpegInput,
5
+ FFmpegRunOptions,
6
+ FFmpegRunResult,
7
+ VideoInfo,
8
+ } from "./types";
9
+
10
+ export class LocalBackend implements FFmpegBackend {
11
+ readonly name = "local";
12
+
13
+ async ffprobe(input: string): Promise<VideoInfo> {
14
+ const result =
15
+ await $`ffprobe -v error -show_entries stream=width,height,r_frame_rate,codec_type -show_entries format=duration -of json ${input}`.json();
16
+
17
+ const videoStream = result.streams?.find(
18
+ (s: { codec_type: string }) => s.codec_type === "video",
19
+ );
20
+ const parsedDuration = parseFloat(result.format?.duration ?? "0");
21
+ const duration = Number.isFinite(parsedDuration) ? parsedDuration : 0;
22
+
23
+ let fps: number | undefined;
24
+ const framerateStr: string | undefined = videoStream?.r_frame_rate;
25
+ if (framerateStr) {
26
+ const parts = framerateStr.split("/").map(Number);
27
+ const num = parts[0];
28
+ const den = parts[1];
29
+ if (den && den > 0 && num) fps = num / den;
30
+ }
31
+
32
+ return {
33
+ duration,
34
+ width: videoStream?.width,
35
+ height: videoStream?.height,
36
+ fps,
37
+ framerateStr,
38
+ };
39
+ }
40
+
41
+ private buildInputArgs(inputs: FFmpegInput[]): string[] {
42
+ const args: string[] = [];
43
+ for (const input of inputs) {
44
+ if (typeof input === "string") {
45
+ args.push("-i", input);
46
+ } else if ("raw" in input) {
47
+ args.push(...input.raw);
48
+ } else {
49
+ if (input.options) args.push(...input.options);
50
+ args.push("-i", input.path);
51
+ }
52
+ }
53
+ return args;
54
+ }
55
+
56
+ async run(options: FFmpegRunOptions): Promise<FFmpegRunResult> {
57
+ const {
58
+ inputs,
59
+ filterComplex,
60
+ videoFilter,
61
+ outputArgs = [],
62
+ outputPath,
63
+ verbose,
64
+ } = options;
65
+
66
+ const inputArgs = this.buildInputArgs(inputs);
67
+
68
+ const ffmpegArgs = [
69
+ "-hide_banner",
70
+ "-loglevel",
71
+ verbose ? "info" : "error",
72
+ ...inputArgs,
73
+ ...(filterComplex ? ["-filter_complex", filterComplex] : []),
74
+ ...(videoFilter ? ["-vf", videoFilter] : []),
75
+ ...outputArgs,
76
+ "-y",
77
+ outputPath,
78
+ ];
79
+
80
+ if (verbose) {
81
+ console.log("ffmpeg", ffmpegArgs.join(" "));
82
+ }
83
+
84
+ const result = await $`ffmpeg ${ffmpegArgs}`.quiet();
85
+
86
+ if (result.exitCode !== 0) {
87
+ throw new Error(`ffmpeg failed with exit code ${result.exitCode}`);
88
+ }
89
+
90
+ return { output: { type: "file", path: outputPath } };
91
+ }
92
+ }
93
+
94
+ export const localBackend = new LocalBackend();
@@ -0,0 +1,74 @@
1
+ /**
2
+ * FFmpeg backend abstraction for dependency injection
3
+ * Allows switching between local ffmpeg and cloud services like Rendi
4
+ */
5
+
6
+ import type { VideoInfo } from "../types";
7
+
8
+ /**
9
+ * Represents the result of running ffprobe
10
+ */
11
+ export type { VideoInfo };
12
+
13
+ /**
14
+ * Represents an input to ffmpeg - can be a simple path/URL or structured with options
15
+ */
16
+ export type FFmpegInput =
17
+ | string
18
+ | {
19
+ /** Path or URL to the input file */
20
+ path: string;
21
+ /** Options to apply BEFORE the -i flag (e.g. -ss 5 for seeking) */
22
+ options?: string[];
23
+ }
24
+ | {
25
+ /** Raw ffmpeg args that don't use -i (e.g. ["-f", "lavfi", "-i", "color=black"]) */
26
+ raw: string[];
27
+ };
28
+
29
+ /**
30
+ * FFmpeg execution options - new interface where backend builds -i flags
31
+ */
32
+ export interface FFmpegRunOptions {
33
+ /** Inputs - backend builds -i flags from these */
34
+ inputs: FFmpegInput[];
35
+ /** Filter complex string (uses input indices like [0:v], [1:a]) */
36
+ filterComplex?: string;
37
+ /** Video filter string for single-input operations */
38
+ videoFilter?: string;
39
+ /** Arguments after inputs but before output (codec, map, etc) */
40
+ outputArgs?: string[];
41
+ /** Output file path */
42
+ outputPath: string;
43
+ /** Enable verbose logging */
44
+ verbose?: boolean;
45
+ }
46
+
47
+ export type FFmpegOutput =
48
+ | { type: "file"; path: string }
49
+ | { type: "url"; url: string };
50
+
51
+ export interface FFmpegRunResult {
52
+ output: FFmpegOutput;
53
+ }
54
+
55
+ /**
56
+ * Backend interface for ffmpeg/ffprobe execution
57
+ */
58
+ export interface FFmpegBackend {
59
+ /** Backend name for identification */
60
+ readonly name: string;
61
+
62
+ /**
63
+ * Run ffprobe to get media file info
64
+ * @param input - File path (local) or URL
65
+ */
66
+ ffprobe(input: string): Promise<VideoInfo>;
67
+
68
+ /**
69
+ * Run ffmpeg command
70
+ * @param options - Execution options including args, inputs, and output path
71
+ * @returns Result with optional URL for cloud backends
72
+ */
73
+ run(options: FFmpegRunOptions): Promise<FFmpegRunResult>;
74
+ }
@@ -1,6 +1,6 @@
1
1
  import { describe, expect, test } from "bun:test";
2
2
  import { existsSync, unlinkSync } from "node:fs";
3
- import { ffprobe } from "./ffmpeg";
3
+ import { localBackend } from "./backends/local";
4
4
  import { editly } from "./index";
5
5
 
6
6
  const VIDEO_1 = "output/sora-landscape.mp4";
@@ -9,6 +9,8 @@ const VIDEO_TALKING = "output/workflow-talking-synced.mp4";
9
9
  const IMAGE_SQUARE = "media/replicate-forest.png";
10
10
  const IMAGE_PORTRAIT = "media/madi-portrait.png";
11
11
 
12
+ const ffprobe = localBackend.ffprobe;
13
+
12
14
  describe("editly", () => {
13
15
  test("requires outPath", async () => {
14
16
  await expect(
@@ -1105,4 +1107,50 @@ describe("editly", () => {
1105
1107
  expect(info.height).toBe(1920);
1106
1108
  expect(info.duration).toBeCloseTo(3, 0);
1107
1109
  });
1110
+
1111
+ test("video overlay with cropPosition", async () => {
1112
+ const outPath = "output/editly-test-crop-position.mp4";
1113
+ if (existsSync(outPath)) unlinkSync(outPath);
1114
+
1115
+ await editly({
1116
+ outPath,
1117
+ width: 1080,
1118
+ height: 1920,
1119
+ fps: 30,
1120
+ clips: [
1121
+ {
1122
+ duration: 3,
1123
+ layers: [
1124
+ { type: "fill-color", color: "#000000" },
1125
+ {
1126
+ type: "video",
1127
+ path: VIDEO_1,
1128
+ width: 1080,
1129
+ height: 960,
1130
+ left: 0,
1131
+ top: 0,
1132
+ resizeMode: "cover",
1133
+ cropPosition: "top",
1134
+ },
1135
+ {
1136
+ type: "video",
1137
+ path: VIDEO_2,
1138
+ width: 1080,
1139
+ height: 960,
1140
+ left: 0,
1141
+ top: 960,
1142
+ resizeMode: "cover",
1143
+ cropPosition: "bottom",
1144
+ },
1145
+ ],
1146
+ },
1147
+ ],
1148
+ });
1149
+
1150
+ expect(existsSync(outPath)).toBe(true);
1151
+ const info = await ffprobe(outPath);
1152
+ expect(info.width).toBe(1080);
1153
+ expect(info.height).toBe(1920);
1154
+ expect(info.duration).toBeCloseTo(3, 0);
1155
+ });
1108
1156
  });