vargai 0.4.0-alpha4 → 0.4.0-alpha40
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +6 -0
- package/README.md +483 -61
- package/assets/fonts/TikTokSans-Bold.ttf +0 -0
- package/examples/grok-imagine-test.tsx +155 -0
- package/launch-videos/06-kawaii-fruits.tsx +93 -0
- package/launch-videos/07-ugc-weight-loss.tsx +132 -0
- package/launch-videos/08-talking-head-varg.tsx +107 -0
- package/launch-videos/09-girl.tsx +160 -0
- package/launch-videos/README.md +42 -0
- package/package.json +10 -4
- package/pipeline/cookbooks/round-video-character.md +1 -1
- package/skills/varg-video-generation/SKILL.md +224 -0
- package/skills/varg-video-generation/references/templates.md +380 -0
- package/skills/varg-video-generation/scripts/setup.ts +265 -0
- package/src/ai-sdk/cache.ts +1 -3
- package/src/ai-sdk/examples/google-image.ts +62 -0
- package/src/ai-sdk/index.ts +10 -0
- package/src/ai-sdk/middleware/wrap-image-model.ts +4 -21
- package/src/ai-sdk/middleware/wrap-music-model.ts +4 -16
- package/src/ai-sdk/middleware/wrap-video-model.ts +5 -17
- package/src/ai-sdk/providers/CONTRIBUTING.md +457 -0
- package/src/ai-sdk/providers/editly/backends/index.ts +8 -0
- package/src/ai-sdk/providers/editly/backends/local.ts +94 -0
- package/src/ai-sdk/providers/editly/backends/types.ts +74 -0
- package/src/ai-sdk/providers/editly/editly.test.ts +49 -1
- package/src/ai-sdk/providers/editly/index.ts +164 -80
- package/src/ai-sdk/providers/editly/layers.ts +58 -6
- package/src/ai-sdk/providers/editly/rendi/editly-with-rendi-backend.test.ts +335 -0
- package/src/ai-sdk/providers/editly/rendi/index.ts +289 -0
- package/src/ai-sdk/providers/editly/rendi/rendi.test.ts +35 -0
- package/src/ai-sdk/providers/editly/types.ts +30 -0
- package/src/ai-sdk/providers/elevenlabs.ts +10 -2
- package/src/ai-sdk/providers/fal.test.ts +214 -0
- package/src/ai-sdk/providers/fal.ts +435 -40
- package/src/ai-sdk/providers/google.ts +423 -0
- package/src/ai-sdk/providers/together.ts +191 -0
- package/src/cli/commands/find.tsx +1 -0
- package/src/cli/commands/frame.tsx +616 -0
- package/src/cli/commands/hello.ts +85 -0
- package/src/cli/commands/help.tsx +18 -30
- package/src/cli/commands/index.ts +11 -2
- package/src/cli/commands/init.tsx +570 -0
- package/src/cli/commands/list.tsx +1 -0
- package/src/cli/commands/render.tsx +322 -76
- package/src/cli/commands/run.tsx +1 -0
- package/src/cli/commands/storyboard.tsx +1714 -0
- package/src/cli/commands/which.tsx +1 -0
- package/src/cli/index.ts +23 -4
- package/src/cli/ui/components/Badge.tsx +1 -0
- package/src/cli/ui/components/DataTable.tsx +1 -0
- package/src/cli/ui/components/Header.tsx +1 -0
- package/src/cli/ui/components/HelpBlock.tsx +1 -0
- package/src/cli/ui/components/KeyValue.tsx +1 -0
- package/src/cli/ui/components/OptionRow.tsx +1 -0
- package/src/cli/ui/components/Separator.tsx +1 -0
- package/src/cli/ui/components/StatusBox.tsx +1 -0
- package/src/cli/ui/components/VargBox.tsx +1 -0
- package/src/cli/ui/components/VargProgress.tsx +1 -0
- package/src/cli/ui/components/VargSpinner.tsx +1 -0
- package/src/cli/ui/components/VargText.tsx +1 -0
- package/src/definitions/actions/grok-edit.ts +133 -0
- package/src/definitions/actions/index.ts +16 -0
- package/src/definitions/actions/qwen-angles.ts +218 -0
- package/src/index.ts +1 -0
- package/src/providers/fal.ts +196 -0
- package/src/react/assets.ts +9 -0
- package/src/react/elements.ts +0 -5
- package/src/react/examples/branching.tsx +6 -4
- package/src/react/examples/character-video.tsx +13 -10
- package/src/react/examples/local-files-test.tsx +19 -0
- package/src/react/examples/ltx2-test.tsx +25 -0
- package/src/react/examples/madi.tsx +13 -10
- package/src/react/examples/mcmeows.tsx +40 -0
- package/src/react/examples/music-defaults.tsx +24 -0
- package/src/react/examples/quickstart-test.tsx +101 -0
- package/src/react/examples/qwen-angles-test.tsx +72 -0
- package/src/react/index.ts +3 -3
- package/src/react/layouts/grid.tsx +1 -1
- package/src/react/layouts/index.ts +2 -1
- package/src/react/layouts/slot.tsx +85 -0
- package/src/react/layouts/split.tsx +18 -0
- package/src/react/react.test.ts +60 -11
- package/src/react/renderers/burn-captions.ts +95 -0
- package/src/react/renderers/cache.test.ts +182 -0
- package/src/react/renderers/captions.ts +25 -6
- package/src/react/renderers/clip.ts +56 -25
- package/src/react/renderers/context.ts +5 -2
- package/src/react/renderers/image.ts +5 -2
- package/src/react/renderers/index.ts +0 -1
- package/src/react/renderers/music.ts +8 -3
- package/src/react/renderers/packshot/blinking-button.ts +413 -0
- package/src/react/renderers/packshot.ts +170 -8
- package/src/react/renderers/progress.ts +4 -3
- package/src/react/renderers/render.ts +127 -71
- package/src/react/renderers/speech.ts +2 -2
- package/src/react/renderers/split.ts +34 -13
- package/src/react/renderers/utils.test.ts +80 -0
- package/src/react/renderers/utils.ts +37 -1
- package/src/react/renderers/video.ts +47 -9
- package/src/react/types.ts +70 -17
- package/src/studio/stages.ts +40 -39
- package/src/studio/step-renderer.ts +14 -24
- package/src/studio/ui/index.html +2 -2
- package/src/tests/all.test.ts +4 -4
- package/src/tests/index.ts +1 -1
- package/test-slot-grid.tsx +19 -0
- package/test-slot-userland.tsx +30 -0
- package/test-sync-v2.ts +30 -0
- package/test-sync-v2.tsx +29 -0
- package/tsconfig.json +1 -1
- package/video.tsx +7 -0
- package/src/ai-sdk/providers/editly/ffmpeg.ts +0 -60
- package/src/react/renderers/animate.ts +0 -59
- /package/src/cli/commands/{studio.tsx → studio.ts} +0 -0
|
@@ -0,0 +1,457 @@
|
|
|
1
|
+
# Adding Models & Providers
|
|
2
|
+
|
|
3
|
+
This guide explains how to add new AI models and providers to the varg SDK.
|
|
4
|
+
|
|
5
|
+
## Overview
|
|
6
|
+
|
|
7
|
+
Providers in varg extend the [Vercel AI SDK](https://sdk.vercel.ai/) with additional model types for video, music, and other media generation. Each provider implements a consistent interface pattern.
|
|
8
|
+
|
|
9
|
+
## Architecture
|
|
10
|
+
|
|
11
|
+
```
|
|
12
|
+
src/ai-sdk/providers/
|
|
13
|
+
├── fal.ts # Full provider (video, image, transcription)
|
|
14
|
+
├── elevenlabs.ts # Speech & music provider
|
|
15
|
+
├── openai.ts # Extends @ai-sdk/openai with video
|
|
16
|
+
├── google.ts # Image & video provider
|
|
17
|
+
├── higgsfield.ts # Image-only provider
|
|
18
|
+
├── replicate.ts # Re-exports @ai-sdk/replicate
|
|
19
|
+
└── CONTRIBUTING.md # This file
|
|
20
|
+
```
|
|
21
|
+
|
|
22
|
+
## Model Types
|
|
23
|
+
|
|
24
|
+
| Type | Interface | Use Case |
|
|
25
|
+
|------|-----------|----------|
|
|
26
|
+
| `VideoModelV3` | `../video-model.ts` | Video generation (t2v, i2v, lipsync) |
|
|
27
|
+
| `ImageModelV3` | `@ai-sdk/provider` | Image generation |
|
|
28
|
+
| `SpeechModelV3` | `@ai-sdk/provider` | Text-to-speech |
|
|
29
|
+
| `MusicModelV3` | `../music-model.ts` | Music generation |
|
|
30
|
+
| `TranscriptionModelV3` | `@ai-sdk/provider` | Speech-to-text |
|
|
31
|
+
| `LanguageModelV3` | `@ai-sdk/provider` | LLM text generation |
|
|
32
|
+
| `EmbeddingModelV3` | `@ai-sdk/provider` | Text embeddings |
|
|
33
|
+
|
|
34
|
+
## Adding a New Model to an Existing Provider
|
|
35
|
+
|
|
36
|
+
### Example: Adding a new video model to fal.ts
|
|
37
|
+
|
|
38
|
+
1. **Add to the model mapping:**
|
|
39
|
+
|
|
40
|
+
```typescript
|
|
41
|
+
const VIDEO_MODELS: Record<string, { t2v: string; i2v: string }> = {
|
|
42
|
+
// existing models...
|
|
43
|
+
"new-model-v1": {
|
|
44
|
+
t2v: "fal-ai/new-model/text-to-video",
|
|
45
|
+
i2v: "fal-ai/new-model/image-to-video",
|
|
46
|
+
},
|
|
47
|
+
};
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
2. **That's it!** The existing `FalVideoModel` class handles the rest.
|
|
51
|
+
|
|
52
|
+
### Example: Adding a model with special handling
|
|
53
|
+
|
|
54
|
+
If the new model needs custom logic, add conditional handling in `doGenerate()`:
|
|
55
|
+
|
|
56
|
+
```typescript
|
|
57
|
+
async doGenerate(options: VideoModelV3CallOptions) {
|
|
58
|
+
const isNewModel = this.modelId === "new-model-v1";
|
|
59
|
+
|
|
60
|
+
if (isNewModel) {
|
|
61
|
+
// Custom input handling for this model
|
|
62
|
+
input.special_param = options.providerOptions?.fal?.specialParam;
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
// ... rest of generation logic
|
|
66
|
+
}
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
## Creating a New Provider
|
|
70
|
+
|
|
71
|
+
### Step 1: Define the Provider Interface
|
|
72
|
+
|
|
73
|
+
```typescript
|
|
74
|
+
import {
|
|
75
|
+
type EmbeddingModelV3,
|
|
76
|
+
type ImageModelV3,
|
|
77
|
+
type LanguageModelV3,
|
|
78
|
+
NoSuchModelError,
|
|
79
|
+
type ProviderV3,
|
|
80
|
+
} from "@ai-sdk/provider";
|
|
81
|
+
import type { VideoModelV3 } from "../video-model";
|
|
82
|
+
|
|
83
|
+
export interface MyProviderSettings {
|
|
84
|
+
apiKey?: string;
|
|
85
|
+
baseURL?: string;
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
export interface MyProvider extends ProviderV3 {
|
|
89
|
+
// Add methods for each model type you support
|
|
90
|
+
videoModel(modelId: string): VideoModelV3;
|
|
91
|
+
imageModel(modelId: string): ImageModelV3;
|
|
92
|
+
}
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
### Step 2: Implement Model Classes
|
|
96
|
+
|
|
97
|
+
Each model class must implement the corresponding interface:
|
|
98
|
+
|
|
99
|
+
```typescript
|
|
100
|
+
class MyVideoModel implements VideoModelV3 {
|
|
101
|
+
readonly specificationVersion = "v3" as const;
|
|
102
|
+
readonly provider = "myprovider";
|
|
103
|
+
readonly modelId: string;
|
|
104
|
+
readonly maxVideosPerCall = 1;
|
|
105
|
+
|
|
106
|
+
private apiKey: string;
|
|
107
|
+
|
|
108
|
+
constructor(modelId: string, options: { apiKey?: string } = {}) {
|
|
109
|
+
this.modelId = modelId;
|
|
110
|
+
this.apiKey = options.apiKey ?? process.env.MY_PROVIDER_API_KEY ?? "";
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
async doGenerate(options: VideoModelV3CallOptions) {
|
|
114
|
+
const {
|
|
115
|
+
prompt,
|
|
116
|
+
duration,
|
|
117
|
+
aspectRatio,
|
|
118
|
+
files,
|
|
119
|
+
providerOptions,
|
|
120
|
+
abortSignal,
|
|
121
|
+
} = options;
|
|
122
|
+
|
|
123
|
+
const warnings: SharedV3Warning[] = [];
|
|
124
|
+
|
|
125
|
+
// 1. Build API request
|
|
126
|
+
const input: Record<string, unknown> = {
|
|
127
|
+
prompt,
|
|
128
|
+
duration: duration ?? 5,
|
|
129
|
+
...(providerOptions?.myprovider ?? {}),
|
|
130
|
+
};
|
|
131
|
+
|
|
132
|
+
// 2. Handle file inputs (for image-to-video, etc.)
|
|
133
|
+
if (files && files.length > 0) {
|
|
134
|
+
const imageFile = files.find(f =>
|
|
135
|
+
f.type === "file"
|
|
136
|
+
? f.mediaType?.startsWith("image/")
|
|
137
|
+
: /\.(jpg|jpeg|png|webp)$/i.test(f.url)
|
|
138
|
+
);
|
|
139
|
+
if (imageFile) {
|
|
140
|
+
input.image_url = await this.uploadFile(imageFile);
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
// 3. Call the API
|
|
145
|
+
const response = await fetch("https://api.myprovider.com/v1/generate", {
|
|
146
|
+
method: "POST",
|
|
147
|
+
headers: {
|
|
148
|
+
"Authorization": `Bearer ${this.apiKey}`,
|
|
149
|
+
"Content-Type": "application/json",
|
|
150
|
+
},
|
|
151
|
+
body: JSON.stringify(input),
|
|
152
|
+
signal: abortSignal,
|
|
153
|
+
});
|
|
154
|
+
|
|
155
|
+
if (!response.ok) {
|
|
156
|
+
throw new Error(`API error: ${await response.text()}`);
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
const data = await response.json();
|
|
160
|
+
|
|
161
|
+
// 4. Download the result
|
|
162
|
+
const videoResponse = await fetch(data.video_url, { signal: abortSignal });
|
|
163
|
+
const videoBuffer = new Uint8Array(await videoResponse.arrayBuffer());
|
|
164
|
+
|
|
165
|
+
// 5. Return in standard format
|
|
166
|
+
return {
|
|
167
|
+
videos: [videoBuffer],
|
|
168
|
+
warnings,
|
|
169
|
+
response: {
|
|
170
|
+
timestamp: new Date(),
|
|
171
|
+
modelId: this.modelId,
|
|
172
|
+
headers: undefined,
|
|
173
|
+
},
|
|
174
|
+
};
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
private async uploadFile(file: ImageModelV3File): Promise<string> {
|
|
178
|
+
// Implementation depends on provider's upload mechanism
|
|
179
|
+
}
|
|
180
|
+
}
|
|
181
|
+
```
|
|
182
|
+
|
|
183
|
+
### Step 3: Create the Provider Factory
|
|
184
|
+
|
|
185
|
+
```typescript
|
|
186
|
+
export function createMyProvider(
|
|
187
|
+
settings: MyProviderSettings = {},
|
|
188
|
+
): MyProvider {
|
|
189
|
+
const apiKey = settings.apiKey ?? process.env.MY_PROVIDER_API_KEY;
|
|
190
|
+
|
|
191
|
+
if (!apiKey) {
|
|
192
|
+
throw new Error("MY_PROVIDER_API_KEY not set");
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
return {
|
|
196
|
+
specificationVersion: "v3",
|
|
197
|
+
|
|
198
|
+
videoModel(modelId: string): VideoModelV3 {
|
|
199
|
+
return new MyVideoModel(modelId, { apiKey });
|
|
200
|
+
},
|
|
201
|
+
|
|
202
|
+
imageModel(modelId: string): ImageModelV3 {
|
|
203
|
+
return new MyImageModel(modelId, { apiKey });
|
|
204
|
+
},
|
|
205
|
+
|
|
206
|
+
// Throw NoSuchModelError for unsupported model types
|
|
207
|
+
languageModel(modelId: string): LanguageModelV3 {
|
|
208
|
+
throw new NoSuchModelError({ modelId, modelType: "languageModel" });
|
|
209
|
+
},
|
|
210
|
+
|
|
211
|
+
embeddingModel(modelId: string): EmbeddingModelV3 {
|
|
212
|
+
throw new NoSuchModelError({ modelId, modelType: "embeddingModel" });
|
|
213
|
+
},
|
|
214
|
+
};
|
|
215
|
+
}
|
|
216
|
+
```
|
|
217
|
+
|
|
218
|
+
### Step 4: Export a Lazy Singleton
|
|
219
|
+
|
|
220
|
+
```typescript
|
|
221
|
+
// Lazy initialization - only creates client when first accessed
|
|
222
|
+
let _myprovider: MyProvider | undefined;
|
|
223
|
+
|
|
224
|
+
export const myprovider = new Proxy({} as MyProvider, {
|
|
225
|
+
get(_, prop) {
|
|
226
|
+
if (!_myprovider) {
|
|
227
|
+
_myprovider = createMyProvider();
|
|
228
|
+
}
|
|
229
|
+
return _myprovider[prop as keyof MyProvider];
|
|
230
|
+
},
|
|
231
|
+
});
|
|
232
|
+
```
|
|
233
|
+
|
|
234
|
+
### Step 5: Re-export from index
|
|
235
|
+
|
|
236
|
+
Add to `src/ai-sdk/index.ts`:
|
|
237
|
+
|
|
238
|
+
```typescript
|
|
239
|
+
export { createMyProvider, myprovider } from "./providers/myprovider";
|
|
240
|
+
export type { MyProvider, MyProviderSettings } from "./providers/myprovider";
|
|
241
|
+
```
|
|
242
|
+
|
|
243
|
+
## Handling Warnings
|
|
244
|
+
|
|
245
|
+
Use warnings to communicate unsupported features without failing:
|
|
246
|
+
|
|
247
|
+
```typescript
|
|
248
|
+
if (options.seed !== undefined) {
|
|
249
|
+
warnings.push({
|
|
250
|
+
type: "unsupported",
|
|
251
|
+
feature: "seed",
|
|
252
|
+
details: "Seed is not supported by this model",
|
|
253
|
+
});
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
if (options.fps !== undefined) {
|
|
257
|
+
warnings.push({
|
|
258
|
+
type: "unsupported",
|
|
259
|
+
feature: "fps",
|
|
260
|
+
details: "FPS is not configurable, using provider default",
|
|
261
|
+
});
|
|
262
|
+
}
|
|
263
|
+
```
|
|
264
|
+
|
|
265
|
+
## Provider Options Passthrough
|
|
266
|
+
|
|
267
|
+
Allow provider-specific options via `providerOptions`:
|
|
268
|
+
|
|
269
|
+
```typescript
|
|
270
|
+
// User code:
|
|
271
|
+
await generateVideo({
|
|
272
|
+
model: myprovider.videoModel("model-v1"),
|
|
273
|
+
prompt: "a cat",
|
|
274
|
+
providerOptions: {
|
|
275
|
+
myprovider: {
|
|
276
|
+
customParam: "value",
|
|
277
|
+
negativePrompt: "blurry",
|
|
278
|
+
},
|
|
279
|
+
},
|
|
280
|
+
});
|
|
281
|
+
|
|
282
|
+
// In your model:
|
|
283
|
+
const customOptions = providerOptions?.myprovider ?? {};
|
|
284
|
+
input.custom_param = customOptions.customParam;
|
|
285
|
+
input.negative_prompt = customOptions.negativePrompt;
|
|
286
|
+
```
|
|
287
|
+
|
|
288
|
+
## Async Job Polling
|
|
289
|
+
|
|
290
|
+
Many video APIs are async. Here's the standard polling pattern:
|
|
291
|
+
|
|
292
|
+
```typescript
|
|
293
|
+
async doGenerate(options: VideoModelV3CallOptions) {
|
|
294
|
+
// 1. Create job
|
|
295
|
+
const createResponse = await fetch(`${this.baseURL}/jobs`, {
|
|
296
|
+
method: "POST",
|
|
297
|
+
headers: { Authorization: `Bearer ${this.apiKey}` },
|
|
298
|
+
body: JSON.stringify(input),
|
|
299
|
+
signal: options.abortSignal,
|
|
300
|
+
});
|
|
301
|
+
|
|
302
|
+
const job = await createResponse.json();
|
|
303
|
+
|
|
304
|
+
// 2. Poll for completion
|
|
305
|
+
let status = job.status;
|
|
306
|
+
while (status === "queued" || status === "processing") {
|
|
307
|
+
await new Promise(resolve => setTimeout(resolve, 2000));
|
|
308
|
+
|
|
309
|
+
const statusResponse = await fetch(`${this.baseURL}/jobs/${job.id}`, {
|
|
310
|
+
headers: { Authorization: `Bearer ${this.apiKey}` },
|
|
311
|
+
signal: options.abortSignal,
|
|
312
|
+
});
|
|
313
|
+
|
|
314
|
+
const statusData = await statusResponse.json();
|
|
315
|
+
status = statusData.status;
|
|
316
|
+
}
|
|
317
|
+
|
|
318
|
+
if (status === "failed") {
|
|
319
|
+
throw new Error(`Generation failed: ${job.error}`);
|
|
320
|
+
}
|
|
321
|
+
|
|
322
|
+
// 3. Download result
|
|
323
|
+
const videoResponse = await fetch(job.output_url);
|
|
324
|
+
return { videos: [new Uint8Array(await videoResponse.arrayBuffer())] };
|
|
325
|
+
}
|
|
326
|
+
```
|
|
327
|
+
|
|
328
|
+
## File Upload Helpers
|
|
329
|
+
|
|
330
|
+
Common pattern for handling file inputs:
|
|
331
|
+
|
|
332
|
+
```typescript
|
|
333
|
+
import type { ImageModelV3File } from "@ai-sdk/provider";
|
|
334
|
+
|
|
335
|
+
async function fileToUrl(file: ImageModelV3File): Promise<string> {
|
|
336
|
+
if (file.type === "url") {
|
|
337
|
+
return file.url;
|
|
338
|
+
}
|
|
339
|
+
|
|
340
|
+
// Convert base64/Uint8Array to upload
|
|
341
|
+
const bytes = typeof file.data === "string"
|
|
342
|
+
? Uint8Array.from(atob(file.data), c => c.charCodeAt(0))
|
|
343
|
+
: file.data;
|
|
344
|
+
|
|
345
|
+
const blob = new Blob([bytes], { type: file.mediaType ?? "image/png" });
|
|
346
|
+
|
|
347
|
+
// Upload to provider's storage (or use data URL for small files)
|
|
348
|
+
return await uploadToStorage(blob);
|
|
349
|
+
}
|
|
350
|
+
|
|
351
|
+
function getMediaType(file: ImageModelV3File): string | undefined {
|
|
352
|
+
if (file.type === "file") return file.mediaType;
|
|
353
|
+
|
|
354
|
+
const ext = file.url.split(".").pop()?.toLowerCase();
|
|
355
|
+
const mimeTypes: Record<string, string> = {
|
|
356
|
+
png: "image/png",
|
|
357
|
+
jpg: "image/jpeg",
|
|
358
|
+
jpeg: "image/jpeg",
|
|
359
|
+
mp3: "audio/mpeg",
|
|
360
|
+
wav: "audio/wav",
|
|
361
|
+
mp4: "video/mp4",
|
|
362
|
+
};
|
|
363
|
+
return mimeTypes[ext ?? ""];
|
|
364
|
+
}
|
|
365
|
+
```
|
|
366
|
+
|
|
367
|
+
## Extending Existing Providers
|
|
368
|
+
|
|
369
|
+
To add video support to an existing AI SDK provider (like OpenAI):
|
|
370
|
+
|
|
371
|
+
```typescript
|
|
372
|
+
import {
|
|
373
|
+
createOpenAI as createOpenAIBase,
|
|
374
|
+
type OpenAIProvider as OpenAIProviderBase,
|
|
375
|
+
} from "@ai-sdk/openai";
|
|
376
|
+
|
|
377
|
+
// Extend the base provider interface
|
|
378
|
+
export interface OpenAIProvider extends OpenAIProviderBase {
|
|
379
|
+
videoModel(modelId: string): VideoModelV3;
|
|
380
|
+
}
|
|
381
|
+
|
|
382
|
+
export function createOpenAI(settings = {}): OpenAIProvider {
|
|
383
|
+
const base = createOpenAIBase(settings);
|
|
384
|
+
|
|
385
|
+
// Create callable function with all base methods
|
|
386
|
+
const provider = ((modelId: string) => base(modelId)) as OpenAIProvider;
|
|
387
|
+
Object.assign(provider, base);
|
|
388
|
+
|
|
389
|
+
// Add video support
|
|
390
|
+
provider.videoModel = (modelId: string): VideoModelV3 =>
|
|
391
|
+
new OpenAIVideoModel(modelId, settings);
|
|
392
|
+
|
|
393
|
+
return provider;
|
|
394
|
+
}
|
|
395
|
+
```
|
|
396
|
+
|
|
397
|
+
## Re-exporting External Providers
|
|
398
|
+
|
|
399
|
+
For providers that work as-is from `@ai-sdk/*`:
|
|
400
|
+
|
|
401
|
+
```typescript
|
|
402
|
+
// replicate.ts - simple re-export
|
|
403
|
+
export {
|
|
404
|
+
createReplicate,
|
|
405
|
+
replicate,
|
|
406
|
+
type ReplicateProvider,
|
|
407
|
+
type ReplicateProviderSettings,
|
|
408
|
+
} from "@ai-sdk/replicate";
|
|
409
|
+
```
|
|
410
|
+
|
|
411
|
+
## Testing Your Provider
|
|
412
|
+
|
|
413
|
+
```typescript
|
|
414
|
+
import { describe, test, expect } from "bun:test";
|
|
415
|
+
import { createMyProvider } from "./myprovider";
|
|
416
|
+
|
|
417
|
+
describe("MyProvider", () => {
|
|
418
|
+
test("creates video model", () => {
|
|
419
|
+
const provider = createMyProvider({ apiKey: "test-key" });
|
|
420
|
+
const model = provider.videoModel("model-v1");
|
|
421
|
+
|
|
422
|
+
expect(model.provider).toBe("myprovider");
|
|
423
|
+
expect(model.modelId).toBe("model-v1");
|
|
424
|
+
expect(model.specificationVersion).toBe("v3");
|
|
425
|
+
});
|
|
426
|
+
|
|
427
|
+
test("throws on missing api key", () => {
|
|
428
|
+
delete process.env.MY_PROVIDER_API_KEY;
|
|
429
|
+
expect(() => createMyProvider()).toThrow("MY_PROVIDER_API_KEY not set");
|
|
430
|
+
});
|
|
431
|
+
});
|
|
432
|
+
```
|
|
433
|
+
|
|
434
|
+
## Checklist for New Providers
|
|
435
|
+
|
|
436
|
+
- [ ] Implements `ProviderV3` interface
|
|
437
|
+
- [ ] Model classes implement correct `*ModelV3` interfaces
|
|
438
|
+
- [ ] `specificationVersion` is `"v3"`
|
|
439
|
+
- [ ] Factory function `createProvider(settings)`
|
|
440
|
+
- [ ] Lazy singleton export for convenience
|
|
441
|
+
- [ ] API key from settings OR environment variable
|
|
442
|
+
- [ ] `NoSuchModelError` for unsupported model types
|
|
443
|
+
- [ ] Warnings for unsupported features (don't fail silently)
|
|
444
|
+
- [ ] `providerOptions` passthrough for provider-specific params
|
|
445
|
+
- [ ] `abortSignal` support for cancellation
|
|
446
|
+
- [ ] Proper error handling with descriptive messages
|
|
447
|
+
- [ ] Re-exported from `src/ai-sdk/index.ts`
|
|
448
|
+
- [ ] Environment variable documented in README
|
|
449
|
+
|
|
450
|
+
## Questions?
|
|
451
|
+
|
|
452
|
+
Check existing providers for reference implementations:
|
|
453
|
+
- **Full provider**: `fal.ts` (video, image, transcription)
|
|
454
|
+
- **Audio provider**: `elevenlabs.ts` (speech, music)
|
|
455
|
+
- **Extended provider**: `openai.ts` (adds video to base)
|
|
456
|
+
- **Simple provider**: `higgsfield.ts` (image only)
|
|
457
|
+
- **Re-export**: `replicate.ts`
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
import { $ } from "bun";
|
|
2
|
+
import type {
|
|
3
|
+
FFmpegBackend,
|
|
4
|
+
FFmpegInput,
|
|
5
|
+
FFmpegRunOptions,
|
|
6
|
+
FFmpegRunResult,
|
|
7
|
+
VideoInfo,
|
|
8
|
+
} from "./types";
|
|
9
|
+
|
|
10
|
+
export class LocalBackend implements FFmpegBackend {
|
|
11
|
+
readonly name = "local";
|
|
12
|
+
|
|
13
|
+
async ffprobe(input: string): Promise<VideoInfo> {
|
|
14
|
+
const result =
|
|
15
|
+
await $`ffprobe -v error -show_entries stream=width,height,r_frame_rate,codec_type -show_entries format=duration -of json ${input}`.json();
|
|
16
|
+
|
|
17
|
+
const videoStream = result.streams?.find(
|
|
18
|
+
(s: { codec_type: string }) => s.codec_type === "video",
|
|
19
|
+
);
|
|
20
|
+
const parsedDuration = parseFloat(result.format?.duration ?? "0");
|
|
21
|
+
const duration = Number.isFinite(parsedDuration) ? parsedDuration : 0;
|
|
22
|
+
|
|
23
|
+
let fps: number | undefined;
|
|
24
|
+
const framerateStr: string | undefined = videoStream?.r_frame_rate;
|
|
25
|
+
if (framerateStr) {
|
|
26
|
+
const parts = framerateStr.split("/").map(Number);
|
|
27
|
+
const num = parts[0];
|
|
28
|
+
const den = parts[1];
|
|
29
|
+
if (den && den > 0 && num) fps = num / den;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
return {
|
|
33
|
+
duration,
|
|
34
|
+
width: videoStream?.width,
|
|
35
|
+
height: videoStream?.height,
|
|
36
|
+
fps,
|
|
37
|
+
framerateStr,
|
|
38
|
+
};
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
private buildInputArgs(inputs: FFmpegInput[]): string[] {
|
|
42
|
+
const args: string[] = [];
|
|
43
|
+
for (const input of inputs) {
|
|
44
|
+
if (typeof input === "string") {
|
|
45
|
+
args.push("-i", input);
|
|
46
|
+
} else if ("raw" in input) {
|
|
47
|
+
args.push(...input.raw);
|
|
48
|
+
} else {
|
|
49
|
+
if (input.options) args.push(...input.options);
|
|
50
|
+
args.push("-i", input.path);
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
return args;
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
async run(options: FFmpegRunOptions): Promise<FFmpegRunResult> {
|
|
57
|
+
const {
|
|
58
|
+
inputs,
|
|
59
|
+
filterComplex,
|
|
60
|
+
videoFilter,
|
|
61
|
+
outputArgs = [],
|
|
62
|
+
outputPath,
|
|
63
|
+
verbose,
|
|
64
|
+
} = options;
|
|
65
|
+
|
|
66
|
+
const inputArgs = this.buildInputArgs(inputs);
|
|
67
|
+
|
|
68
|
+
const ffmpegArgs = [
|
|
69
|
+
"-hide_banner",
|
|
70
|
+
"-loglevel",
|
|
71
|
+
verbose ? "info" : "error",
|
|
72
|
+
...inputArgs,
|
|
73
|
+
...(filterComplex ? ["-filter_complex", filterComplex] : []),
|
|
74
|
+
...(videoFilter ? ["-vf", videoFilter] : []),
|
|
75
|
+
...outputArgs,
|
|
76
|
+
"-y",
|
|
77
|
+
outputPath,
|
|
78
|
+
];
|
|
79
|
+
|
|
80
|
+
if (verbose) {
|
|
81
|
+
console.log("ffmpeg", ffmpegArgs.join(" "));
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
const result = await $`ffmpeg ${ffmpegArgs}`.quiet();
|
|
85
|
+
|
|
86
|
+
if (result.exitCode !== 0) {
|
|
87
|
+
throw new Error(`ffmpeg failed with exit code ${result.exitCode}`);
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
return { output: { type: "file", path: outputPath } };
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
export const localBackend = new LocalBackend();
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* FFmpeg backend abstraction for dependency injection
|
|
3
|
+
* Allows switching between local ffmpeg and cloud services like Rendi
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import type { VideoInfo } from "../types";
|
|
7
|
+
|
|
8
|
+
/**
|
|
9
|
+
* Represents the result of running ffprobe
|
|
10
|
+
*/
|
|
11
|
+
export type { VideoInfo };
|
|
12
|
+
|
|
13
|
+
/**
|
|
14
|
+
* Represents an input to ffmpeg - can be a simple path/URL or structured with options
|
|
15
|
+
*/
|
|
16
|
+
export type FFmpegInput =
|
|
17
|
+
| string
|
|
18
|
+
| {
|
|
19
|
+
/** Path or URL to the input file */
|
|
20
|
+
path: string;
|
|
21
|
+
/** Options to apply BEFORE the -i flag (e.g. -ss 5 for seeking) */
|
|
22
|
+
options?: string[];
|
|
23
|
+
}
|
|
24
|
+
| {
|
|
25
|
+
/** Raw ffmpeg args that don't use -i (e.g. ["-f", "lavfi", "-i", "color=black"]) */
|
|
26
|
+
raw: string[];
|
|
27
|
+
};
|
|
28
|
+
|
|
29
|
+
/**
|
|
30
|
+
* FFmpeg execution options - new interface where backend builds -i flags
|
|
31
|
+
*/
|
|
32
|
+
export interface FFmpegRunOptions {
|
|
33
|
+
/** Inputs - backend builds -i flags from these */
|
|
34
|
+
inputs: FFmpegInput[];
|
|
35
|
+
/** Filter complex string (uses input indices like [0:v], [1:a]) */
|
|
36
|
+
filterComplex?: string;
|
|
37
|
+
/** Video filter string for single-input operations */
|
|
38
|
+
videoFilter?: string;
|
|
39
|
+
/** Arguments after inputs but before output (codec, map, etc) */
|
|
40
|
+
outputArgs?: string[];
|
|
41
|
+
/** Output file path */
|
|
42
|
+
outputPath: string;
|
|
43
|
+
/** Enable verbose logging */
|
|
44
|
+
verbose?: boolean;
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
export type FFmpegOutput =
|
|
48
|
+
| { type: "file"; path: string }
|
|
49
|
+
| { type: "url"; url: string };
|
|
50
|
+
|
|
51
|
+
export interface FFmpegRunResult {
|
|
52
|
+
output: FFmpegOutput;
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
/**
|
|
56
|
+
* Backend interface for ffmpeg/ffprobe execution
|
|
57
|
+
*/
|
|
58
|
+
export interface FFmpegBackend {
|
|
59
|
+
/** Backend name for identification */
|
|
60
|
+
readonly name: string;
|
|
61
|
+
|
|
62
|
+
/**
|
|
63
|
+
* Run ffprobe to get media file info
|
|
64
|
+
* @param input - File path (local) or URL
|
|
65
|
+
*/
|
|
66
|
+
ffprobe(input: string): Promise<VideoInfo>;
|
|
67
|
+
|
|
68
|
+
/**
|
|
69
|
+
* Run ffmpeg command
|
|
70
|
+
* @param options - Execution options including args, inputs, and output path
|
|
71
|
+
* @returns Result with optional URL for cloud backends
|
|
72
|
+
*/
|
|
73
|
+
run(options: FFmpegRunOptions): Promise<FFmpegRunResult>;
|
|
74
|
+
}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { describe, expect, test } from "bun:test";
|
|
2
2
|
import { existsSync, unlinkSync } from "node:fs";
|
|
3
|
-
import {
|
|
3
|
+
import { localBackend } from "./backends/local";
|
|
4
4
|
import { editly } from "./index";
|
|
5
5
|
|
|
6
6
|
const VIDEO_1 = "output/sora-landscape.mp4";
|
|
@@ -9,6 +9,8 @@ const VIDEO_TALKING = "output/workflow-talking-synced.mp4";
|
|
|
9
9
|
const IMAGE_SQUARE = "media/replicate-forest.png";
|
|
10
10
|
const IMAGE_PORTRAIT = "media/madi-portrait.png";
|
|
11
11
|
|
|
12
|
+
const ffprobe = localBackend.ffprobe;
|
|
13
|
+
|
|
12
14
|
describe("editly", () => {
|
|
13
15
|
test("requires outPath", async () => {
|
|
14
16
|
await expect(
|
|
@@ -1105,4 +1107,50 @@ describe("editly", () => {
|
|
|
1105
1107
|
expect(info.height).toBe(1920);
|
|
1106
1108
|
expect(info.duration).toBeCloseTo(3, 0);
|
|
1107
1109
|
});
|
|
1110
|
+
|
|
1111
|
+
test("video overlay with cropPosition", async () => {
|
|
1112
|
+
const outPath = "output/editly-test-crop-position.mp4";
|
|
1113
|
+
if (existsSync(outPath)) unlinkSync(outPath);
|
|
1114
|
+
|
|
1115
|
+
await editly({
|
|
1116
|
+
outPath,
|
|
1117
|
+
width: 1080,
|
|
1118
|
+
height: 1920,
|
|
1119
|
+
fps: 30,
|
|
1120
|
+
clips: [
|
|
1121
|
+
{
|
|
1122
|
+
duration: 3,
|
|
1123
|
+
layers: [
|
|
1124
|
+
{ type: "fill-color", color: "#000000" },
|
|
1125
|
+
{
|
|
1126
|
+
type: "video",
|
|
1127
|
+
path: VIDEO_1,
|
|
1128
|
+
width: 1080,
|
|
1129
|
+
height: 960,
|
|
1130
|
+
left: 0,
|
|
1131
|
+
top: 0,
|
|
1132
|
+
resizeMode: "cover",
|
|
1133
|
+
cropPosition: "top",
|
|
1134
|
+
},
|
|
1135
|
+
{
|
|
1136
|
+
type: "video",
|
|
1137
|
+
path: VIDEO_2,
|
|
1138
|
+
width: 1080,
|
|
1139
|
+
height: 960,
|
|
1140
|
+
left: 0,
|
|
1141
|
+
top: 960,
|
|
1142
|
+
resizeMode: "cover",
|
|
1143
|
+
cropPosition: "bottom",
|
|
1144
|
+
},
|
|
1145
|
+
],
|
|
1146
|
+
},
|
|
1147
|
+
],
|
|
1148
|
+
});
|
|
1149
|
+
|
|
1150
|
+
expect(existsSync(outPath)).toBe(true);
|
|
1151
|
+
const info = await ffprobe(outPath);
|
|
1152
|
+
expect(info.width).toBe(1080);
|
|
1153
|
+
expect(info.height).toBe(1920);
|
|
1154
|
+
expect(info.duration).toBeCloseTo(3, 0);
|
|
1155
|
+
});
|
|
1108
1156
|
});
|