vargai 0.4.0-alpha4 → 0.4.0-alpha40
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +6 -0
- package/README.md +483 -61
- package/assets/fonts/TikTokSans-Bold.ttf +0 -0
- package/examples/grok-imagine-test.tsx +155 -0
- package/launch-videos/06-kawaii-fruits.tsx +93 -0
- package/launch-videos/07-ugc-weight-loss.tsx +132 -0
- package/launch-videos/08-talking-head-varg.tsx +107 -0
- package/launch-videos/09-girl.tsx +160 -0
- package/launch-videos/README.md +42 -0
- package/package.json +10 -4
- package/pipeline/cookbooks/round-video-character.md +1 -1
- package/skills/varg-video-generation/SKILL.md +224 -0
- package/skills/varg-video-generation/references/templates.md +380 -0
- package/skills/varg-video-generation/scripts/setup.ts +265 -0
- package/src/ai-sdk/cache.ts +1 -3
- package/src/ai-sdk/examples/google-image.ts +62 -0
- package/src/ai-sdk/index.ts +10 -0
- package/src/ai-sdk/middleware/wrap-image-model.ts +4 -21
- package/src/ai-sdk/middleware/wrap-music-model.ts +4 -16
- package/src/ai-sdk/middleware/wrap-video-model.ts +5 -17
- package/src/ai-sdk/providers/CONTRIBUTING.md +457 -0
- package/src/ai-sdk/providers/editly/backends/index.ts +8 -0
- package/src/ai-sdk/providers/editly/backends/local.ts +94 -0
- package/src/ai-sdk/providers/editly/backends/types.ts +74 -0
- package/src/ai-sdk/providers/editly/editly.test.ts +49 -1
- package/src/ai-sdk/providers/editly/index.ts +164 -80
- package/src/ai-sdk/providers/editly/layers.ts +58 -6
- package/src/ai-sdk/providers/editly/rendi/editly-with-rendi-backend.test.ts +335 -0
- package/src/ai-sdk/providers/editly/rendi/index.ts +289 -0
- package/src/ai-sdk/providers/editly/rendi/rendi.test.ts +35 -0
- package/src/ai-sdk/providers/editly/types.ts +30 -0
- package/src/ai-sdk/providers/elevenlabs.ts +10 -2
- package/src/ai-sdk/providers/fal.test.ts +214 -0
- package/src/ai-sdk/providers/fal.ts +435 -40
- package/src/ai-sdk/providers/google.ts +423 -0
- package/src/ai-sdk/providers/together.ts +191 -0
- package/src/cli/commands/find.tsx +1 -0
- package/src/cli/commands/frame.tsx +616 -0
- package/src/cli/commands/hello.ts +85 -0
- package/src/cli/commands/help.tsx +18 -30
- package/src/cli/commands/index.ts +11 -2
- package/src/cli/commands/init.tsx +570 -0
- package/src/cli/commands/list.tsx +1 -0
- package/src/cli/commands/render.tsx +322 -76
- package/src/cli/commands/run.tsx +1 -0
- package/src/cli/commands/storyboard.tsx +1714 -0
- package/src/cli/commands/which.tsx +1 -0
- package/src/cli/index.ts +23 -4
- package/src/cli/ui/components/Badge.tsx +1 -0
- package/src/cli/ui/components/DataTable.tsx +1 -0
- package/src/cli/ui/components/Header.tsx +1 -0
- package/src/cli/ui/components/HelpBlock.tsx +1 -0
- package/src/cli/ui/components/KeyValue.tsx +1 -0
- package/src/cli/ui/components/OptionRow.tsx +1 -0
- package/src/cli/ui/components/Separator.tsx +1 -0
- package/src/cli/ui/components/StatusBox.tsx +1 -0
- package/src/cli/ui/components/VargBox.tsx +1 -0
- package/src/cli/ui/components/VargProgress.tsx +1 -0
- package/src/cli/ui/components/VargSpinner.tsx +1 -0
- package/src/cli/ui/components/VargText.tsx +1 -0
- package/src/definitions/actions/grok-edit.ts +133 -0
- package/src/definitions/actions/index.ts +16 -0
- package/src/definitions/actions/qwen-angles.ts +218 -0
- package/src/index.ts +1 -0
- package/src/providers/fal.ts +196 -0
- package/src/react/assets.ts +9 -0
- package/src/react/elements.ts +0 -5
- package/src/react/examples/branching.tsx +6 -4
- package/src/react/examples/character-video.tsx +13 -10
- package/src/react/examples/local-files-test.tsx +19 -0
- package/src/react/examples/ltx2-test.tsx +25 -0
- package/src/react/examples/madi.tsx +13 -10
- package/src/react/examples/mcmeows.tsx +40 -0
- package/src/react/examples/music-defaults.tsx +24 -0
- package/src/react/examples/quickstart-test.tsx +101 -0
- package/src/react/examples/qwen-angles-test.tsx +72 -0
- package/src/react/index.ts +3 -3
- package/src/react/layouts/grid.tsx +1 -1
- package/src/react/layouts/index.ts +2 -1
- package/src/react/layouts/slot.tsx +85 -0
- package/src/react/layouts/split.tsx +18 -0
- package/src/react/react.test.ts +60 -11
- package/src/react/renderers/burn-captions.ts +95 -0
- package/src/react/renderers/cache.test.ts +182 -0
- package/src/react/renderers/captions.ts +25 -6
- package/src/react/renderers/clip.ts +56 -25
- package/src/react/renderers/context.ts +5 -2
- package/src/react/renderers/image.ts +5 -2
- package/src/react/renderers/index.ts +0 -1
- package/src/react/renderers/music.ts +8 -3
- package/src/react/renderers/packshot/blinking-button.ts +413 -0
- package/src/react/renderers/packshot.ts +170 -8
- package/src/react/renderers/progress.ts +4 -3
- package/src/react/renderers/render.ts +127 -71
- package/src/react/renderers/speech.ts +2 -2
- package/src/react/renderers/split.ts +34 -13
- package/src/react/renderers/utils.test.ts +80 -0
- package/src/react/renderers/utils.ts +37 -1
- package/src/react/renderers/video.ts +47 -9
- package/src/react/types.ts +70 -17
- package/src/studio/stages.ts +40 -39
- package/src/studio/step-renderer.ts +14 -24
- package/src/studio/ui/index.html +2 -2
- package/src/tests/all.test.ts +4 -4
- package/src/tests/index.ts +1 -1
- package/test-slot-grid.tsx +19 -0
- package/test-slot-userland.tsx +30 -0
- package/test-sync-v2.ts +30 -0
- package/test-sync-v2.tsx +29 -0
- package/tsconfig.json +1 -1
- package/video.tsx +7 -0
- package/src/ai-sdk/providers/editly/ffmpeg.ts +0 -60
- package/src/react/renderers/animate.ts +0 -59
- /package/src/cli/commands/{studio.tsx → studio.ts} +0 -0
package/.env.example
CHANGED
package/README.md
CHANGED
|
@@ -1,115 +1,537 @@
|
|
|
1
1
|
# varg
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
ai video generation sdk. jsx for videos, built on vercel ai sdk.
|
|
4
4
|
|
|
5
|
-
##
|
|
5
|
+
## quickstart
|
|
6
6
|
|
|
7
7
|
```bash
|
|
8
8
|
bun install vargai ai
|
|
9
9
|
```
|
|
10
10
|
|
|
11
|
-
|
|
11
|
+
set your api key:
|
|
12
|
+
|
|
13
|
+
```bash
|
|
14
|
+
export FAL_API_KEY=fal_xxx # required
|
|
15
|
+
export ELEVENLABS_API_KEY=xxx # optional, for voice/music
|
|
16
|
+
```
|
|
17
|
+
|
|
18
|
+
create `hello.tsx`:
|
|
19
|
+
|
|
20
|
+
```tsx
|
|
21
|
+
import { render, Render, Clip, Image, Video } from "vargai/react";
|
|
22
|
+
import { fal } from "vargai/ai";
|
|
23
|
+
|
|
24
|
+
const fruit = Image({
|
|
25
|
+
prompt: "cute kawaii fluffy orange fruit character, round plush body, small black dot eyes, tiny smile, Pixar style",
|
|
26
|
+
model: fal.imageModel("nano-banana-pro"),
|
|
27
|
+
aspectRatio: "9:16",
|
|
28
|
+
});
|
|
29
|
+
|
|
30
|
+
await render(
|
|
31
|
+
<Render width={1080} height={1920}>
|
|
32
|
+
<Clip duration={3}>
|
|
33
|
+
<Video
|
|
34
|
+
prompt={{
|
|
35
|
+
text: "character waves hello enthusiastically, bounces up and down, eyes squint with joy",
|
|
36
|
+
images: [fruit],
|
|
37
|
+
}}
|
|
38
|
+
model={fal.videoModel("kling-v2.5")}
|
|
39
|
+
/>
|
|
40
|
+
</Clip>
|
|
41
|
+
</Render>,
|
|
42
|
+
{ output: "output/hello.mp4" }
|
|
43
|
+
);
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
run it:
|
|
47
|
+
|
|
48
|
+
```bash
|
|
49
|
+
bun run hello.tsx
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
## installation
|
|
53
|
+
|
|
54
|
+
```bash
|
|
55
|
+
# with bun (recommended)
|
|
56
|
+
bun install vargai ai
|
|
57
|
+
|
|
58
|
+
# with npm
|
|
59
|
+
npm install vargai ai
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
## ai sdk
|
|
63
|
+
|
|
64
|
+
varg extends vercel's ai sdk with video, music, and lipsync. use familiar patterns:
|
|
12
65
|
|
|
13
66
|
```typescript
|
|
14
67
|
import { generateImage } from "ai";
|
|
15
|
-
import {
|
|
68
|
+
import { generateVideo, generateMusic, generateElement, scene, fal, elevenlabs } from "vargai/ai";
|
|
69
|
+
|
|
70
|
+
// generate image
|
|
71
|
+
const { image } = await generateImage({
|
|
72
|
+
model: fal.imageModel("flux-schnell"),
|
|
73
|
+
prompt: "cyberpunk cityscape at night",
|
|
74
|
+
aspectRatio: "16:9",
|
|
75
|
+
});
|
|
76
|
+
|
|
77
|
+
// animate to video
|
|
78
|
+
const { video } = await generateVideo({
|
|
79
|
+
model: fal.videoModel("kling-v2.5"),
|
|
80
|
+
prompt: {
|
|
81
|
+
images: [image.uint8Array],
|
|
82
|
+
text: "camera slowly pans across the city",
|
|
83
|
+
},
|
|
84
|
+
duration: 5,
|
|
85
|
+
});
|
|
86
|
+
|
|
87
|
+
// generate music
|
|
88
|
+
const { audio } = await generateMusic({
|
|
89
|
+
model: elevenlabs.musicModel(),
|
|
90
|
+
prompt: "cyberpunk ambient music, electronic",
|
|
91
|
+
duration: 10,
|
|
92
|
+
});
|
|
93
|
+
|
|
94
|
+
// save output
|
|
95
|
+
await Bun.write("output/city.mp4", video.uint8Array);
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
### character consistency with elements
|
|
99
|
+
|
|
100
|
+
create reusable elements for consistent generation across scenes:
|
|
101
|
+
|
|
102
|
+
```typescript
|
|
103
|
+
import { generateElement, scene, fal } from "vargai/ai";
|
|
104
|
+
import { generateImage, generateVideo } from "ai";
|
|
16
105
|
|
|
17
|
-
//
|
|
106
|
+
// create character from reference
|
|
18
107
|
const { element: character } = await generateElement({
|
|
19
108
|
model: fal.imageModel("nano-banana-pro/edit"),
|
|
20
109
|
type: "character",
|
|
21
110
|
prompt: {
|
|
22
|
-
text: "
|
|
23
|
-
images: [
|
|
111
|
+
text: "woman in her 30s, brown hair, green eyes",
|
|
112
|
+
images: [referenceImageData],
|
|
24
113
|
},
|
|
25
114
|
});
|
|
26
115
|
|
|
27
|
-
//
|
|
28
|
-
const { image:
|
|
116
|
+
// use in scenes - same character every time
|
|
117
|
+
const { image: frame1 } = await generateImage({
|
|
29
118
|
model: fal.imageModel("nano-banana-pro"),
|
|
30
|
-
prompt: scene`${character}
|
|
119
|
+
prompt: scene`${character} waves hello`,
|
|
31
120
|
});
|
|
32
121
|
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
122
|
+
const { image: frame2 } = await generateImage({
|
|
123
|
+
model: fal.imageModel("nano-banana-pro"),
|
|
124
|
+
prompt: scene`${character} gives thumbs up`,
|
|
125
|
+
});
|
|
126
|
+
```
|
|
127
|
+
|
|
128
|
+
### file handling
|
|
129
|
+
|
|
130
|
+
```typescript
|
|
131
|
+
import { File } from "vargai/ai";
|
|
132
|
+
|
|
133
|
+
// load from disk
|
|
134
|
+
const file = File.fromPath("media/portrait.jpg");
|
|
135
|
+
|
|
136
|
+
// load from url
|
|
137
|
+
const file = await File.fromUrl("https://example.com/video.mp4");
|
|
138
|
+
|
|
139
|
+
// load from buffer
|
|
140
|
+
const file = File.fromBuffer(uint8Array, "image/png");
|
|
141
|
+
|
|
142
|
+
// get contents
|
|
143
|
+
const buffer = await file.arrayBuffer();
|
|
144
|
+
const base64 = await file.base64();
|
|
145
|
+
```
|
|
146
|
+
|
|
147
|
+
## jsx / react
|
|
148
|
+
|
|
149
|
+
compose videos declaratively with jsx. everything is cached - same props = instant cache hit.
|
|
150
|
+
|
|
151
|
+
```tsx
|
|
152
|
+
import { render, Render, Clip, Image, Video, Music } from "vargai/react";
|
|
153
|
+
import { fal, elevenlabs } from "vargai/ai";
|
|
154
|
+
|
|
155
|
+
// kawaii fruit characters
|
|
156
|
+
const CHARACTERS = [
|
|
157
|
+
{ name: "orange", prompt: "cute kawaii fluffy orange fruit character, round plush body, Pixar style" },
|
|
158
|
+
{ name: "strawberry", prompt: "cute kawaii fluffy strawberry fruit character, round plush body, Pixar style" },
|
|
159
|
+
{ name: "lemon", prompt: "cute kawaii fluffy lemon fruit character, round plush body, Pixar style" },
|
|
160
|
+
];
|
|
161
|
+
|
|
162
|
+
const characterImages = CHARACTERS.map(char =>
|
|
163
|
+
Image({
|
|
164
|
+
prompt: char.prompt,
|
|
165
|
+
model: fal.imageModel("nano-banana-pro"),
|
|
166
|
+
aspectRatio: "9:16",
|
|
167
|
+
})
|
|
168
|
+
);
|
|
169
|
+
|
|
170
|
+
await render(
|
|
171
|
+
<Render width={1080} height={1920}>
|
|
172
|
+
<Music prompt="cute baby song, playful xylophone, kawaii vibes" model={elevenlabs.musicModel()} />
|
|
173
|
+
|
|
174
|
+
{CHARACTERS.map((char, i) => (
|
|
175
|
+
<Clip key={char.name} duration={2.5}>
|
|
176
|
+
<Video
|
|
177
|
+
prompt={{
|
|
178
|
+
text: "character waves hello, bounces up and down, eyes squint with joy",
|
|
179
|
+
images: [characterImages[i]],
|
|
180
|
+
}}
|
|
181
|
+
model={fal.videoModel("kling-v2.5")}
|
|
182
|
+
/>
|
|
183
|
+
</Clip>
|
|
184
|
+
))}
|
|
185
|
+
</Render>,
|
|
186
|
+
{ output: "output/kawaii-fruits.mp4" }
|
|
187
|
+
);
|
|
188
|
+
```
|
|
189
|
+
|
|
190
|
+
### components
|
|
191
|
+
|
|
192
|
+
| component | purpose | key props |
|
|
193
|
+
|-----------|---------|-----------|
|
|
194
|
+
| `<Render>` | root container | `width`, `height`, `fps` |
|
|
195
|
+
| `<Clip>` | time segment | `duration`, `transition`, `cutFrom`, `cutTo` |
|
|
196
|
+
| `<Image>` | ai or static image | `prompt`, `src`, `model`, `zoom`, `aspectRatio`, `resize` |
|
|
197
|
+
| `<Video>` | ai or source video | `prompt`, `src`, `model`, `volume`, `cutFrom`, `cutTo` |
|
|
198
|
+
| `<Speech>` | text-to-speech | `voice`, `model`, `volume`, `children` |
|
|
199
|
+
| `<Music>` | background music | `prompt`, `src`, `model`, `volume`, `loop`, `ducking` |
|
|
200
|
+
| `<Title>` | text overlay | `position`, `color`, `start`, `end` |
|
|
201
|
+
| `<Subtitle>` | subtitle text | `backgroundColor` |
|
|
202
|
+
| `<Captions>` | auto-generated subs | `src`, `srt`, `style`, `color`, `activeColor` |
|
|
203
|
+
| `<Overlay>` | positioned layer | `left`, `top`, `width`, `height`, `keepAudio` |
|
|
204
|
+
| `<Split>` | side-by-side | `direction` |
|
|
205
|
+
| `<Slider>` | before/after reveal | `direction` |
|
|
206
|
+
| `<Swipe>` | tinder-style cards | `direction`, `interval` |
|
|
207
|
+
| `<TalkingHead>` | animated character | `character`, `src`, `voice`, `model`, `lipsyncModel` |
|
|
208
|
+
| `<Packshot>` | end card with cta | `background`, `logo`, `cta`, `blinkCta` |
|
|
209
|
+
|
|
210
|
+
### layout helpers
|
|
211
|
+
|
|
212
|
+
```tsx
|
|
213
|
+
import { Grid, SplitLayout } from "vargai/react";
|
|
214
|
+
|
|
215
|
+
// grid layout
|
|
216
|
+
<Grid columns={2}>
|
|
217
|
+
<Video prompt="scene 1" />
|
|
218
|
+
<Video prompt="scene 2" />
|
|
219
|
+
</Grid>
|
|
220
|
+
|
|
221
|
+
// split layout (before/after)
|
|
222
|
+
<SplitLayout left={beforeVideo} right={afterVideo} />
|
|
223
|
+
```
|
|
224
|
+
|
|
225
|
+
### transitions
|
|
226
|
+
|
|
227
|
+
67 gl-transitions available:
|
|
228
|
+
|
|
229
|
+
```tsx
|
|
230
|
+
<Clip transition={{ name: "fade", duration: 0.5 }}>
|
|
231
|
+
<Clip transition={{ name: "crossfade", duration: 0.5 }}>
|
|
232
|
+
<Clip transition={{ name: "wipeleft", duration: 0.5 }}>
|
|
233
|
+
<Clip transition={{ name: "cube", duration: 0.8 }}>
|
|
234
|
+
```
|
|
235
|
+
|
|
236
|
+
### caption styles
|
|
237
|
+
|
|
238
|
+
```tsx
|
|
239
|
+
<Captions src={voiceover} style="tiktok" /> // word-by-word highlight
|
|
240
|
+
<Captions src={voiceover} style="karaoke" /> // fill left-to-right
|
|
241
|
+
<Captions src={voiceover} style="bounce" /> // words bounce in
|
|
242
|
+
<Captions src={voiceover} style="typewriter" /> // typing effect
|
|
243
|
+
```
|
|
244
|
+
|
|
245
|
+
### talking head with lipsync
|
|
246
|
+
|
|
247
|
+
```tsx
|
|
248
|
+
import { render, Render, Clip, Image, Video, Speech, Captions, Music } from "vargai/react";
|
|
249
|
+
import { fal, elevenlabs, higgsfield } from "vargai/ai";
|
|
250
|
+
|
|
251
|
+
const voiceover = Speech({
|
|
252
|
+
model: elevenlabs.speechModel("eleven_v3"),
|
|
253
|
+
voice: "5l5f8iK3YPeGga21rQIX",
|
|
254
|
+
children: "With varg, you can create any videos at scale!",
|
|
255
|
+
});
|
|
256
|
+
|
|
257
|
+
// base character with higgsfield soul (realistic)
|
|
258
|
+
const baseCharacter = Image({
|
|
259
|
+
prompt: "beautiful East Asian woman, sleek black bob hair, fitted black t-shirt, iPhone selfie, minimalist bedroom",
|
|
260
|
+
model: higgsfield.imageModel("soul", { styleId: higgsfield.styles.REALISTIC }),
|
|
261
|
+
aspectRatio: "9:16",
|
|
262
|
+
});
|
|
263
|
+
|
|
264
|
+
// animate the character
|
|
265
|
+
const animatedCharacter = Video({
|
|
36
266
|
prompt: {
|
|
37
|
-
text:
|
|
38
|
-
images: [
|
|
267
|
+
text: "woman speaking naturally, subtle head movements, friendly expression",
|
|
268
|
+
images: [baseCharacter],
|
|
39
269
|
},
|
|
40
|
-
|
|
270
|
+
model: fal.videoModel("kling-v2.5"),
|
|
41
271
|
});
|
|
42
272
|
|
|
43
|
-
await
|
|
273
|
+
await render(
|
|
274
|
+
<Render width={1080} height={1920}>
|
|
275
|
+
<Music prompt="modern tech ambient, subtle electronic" model={elevenlabs.musicModel()} volume={0.1} />
|
|
276
|
+
|
|
277
|
+
<Clip duration={5}>
|
|
278
|
+
{/* lipsync: animated video + speech audio -> sync-v2 */}
|
|
279
|
+
<Video
|
|
280
|
+
prompt={{ video: animatedCharacter, audio: voiceover }}
|
|
281
|
+
model={fal.videoModel("sync-v2-pro")}
|
|
282
|
+
/>
|
|
283
|
+
</Clip>
|
|
284
|
+
|
|
285
|
+
<Captions src={voiceover} style="tiktok" color="#ffffff" />
|
|
286
|
+
</Render>,
|
|
287
|
+
{ output: "output/talking-head.mp4" }
|
|
288
|
+
);
|
|
44
289
|
```
|
|
45
290
|
|
|
46
|
-
###
|
|
291
|
+
### ugc transformation video
|
|
292
|
+
|
|
293
|
+
```tsx
|
|
294
|
+
import { render, Render, Clip, Image, Video, Speech, Captions, Music, Title, SplitLayout } from "vargai/react";
|
|
295
|
+
import { fal, elevenlabs, higgsfield } from "vargai/ai";
|
|
296
|
+
|
|
297
|
+
const CHARACTER = "woman in her 30s, brown hair, green eyes";
|
|
298
|
+
|
|
299
|
+
// before: generated with higgsfield soul
|
|
300
|
+
const beforeImage = Image({
|
|
301
|
+
prompt: `${CHARACTER}, overweight, tired expression, loose grey t-shirt, bathroom mirror selfie`,
|
|
302
|
+
model: higgsfield.imageModel("soul", { styleId: higgsfield.styles.REALISTIC }),
|
|
303
|
+
aspectRatio: "9:16",
|
|
304
|
+
});
|
|
305
|
+
|
|
306
|
+
// after: edit with nano-banana-pro using before as reference
|
|
307
|
+
const afterImage = Image({
|
|
308
|
+
prompt: {
|
|
309
|
+
text: `${CHARACTER}, fit slim, confident smile, fitted black tank top, same bathroom, same woman 40 pounds lighter`,
|
|
310
|
+
images: [beforeImage]
|
|
311
|
+
},
|
|
312
|
+
model: fal.imageModel("nano-banana-pro/edit"),
|
|
313
|
+
aspectRatio: "9:16",
|
|
314
|
+
});
|
|
315
|
+
|
|
316
|
+
const beforeVideo = Video({
|
|
317
|
+
prompt: { text: "woman looks down sadly, sighs, tired expression", images: [beforeImage] },
|
|
318
|
+
model: fal.videoModel("kling-v2.5"),
|
|
319
|
+
});
|
|
320
|
+
|
|
321
|
+
const afterVideo = Video({
|
|
322
|
+
prompt: { text: "woman smiles confidently, touches hair, proud expression", images: [afterImage] },
|
|
323
|
+
model: fal.videoModel("kling-v2.5"),
|
|
324
|
+
});
|
|
325
|
+
|
|
326
|
+
const voiceover = Speech({
|
|
327
|
+
model: elevenlabs.speechModel("eleven_multilingual_v2"),
|
|
328
|
+
children: "With this technique I lost 40 pounds in just 3 months!",
|
|
329
|
+
});
|
|
330
|
+
|
|
331
|
+
await render(
|
|
332
|
+
<Render width={1080 * 2} height={1920}>
|
|
333
|
+
<Music prompt="upbeat motivational pop, inspiring transformation" model={elevenlabs.musicModel()} volume={0.15} />
|
|
334
|
+
|
|
335
|
+
<Clip duration={5}>
|
|
336
|
+
<SplitLayout direction="horizontal" left={beforeVideo} right={afterVideo} />
|
|
337
|
+
<Title position="top" color="#ffffff">My 3-Month Transformation</Title>
|
|
338
|
+
</Clip>
|
|
339
|
+
|
|
340
|
+
<Captions src={voiceover} style="tiktok" color="#ffffff" />
|
|
341
|
+
</Render>,
|
|
342
|
+
{ output: "output/transformation.mp4" }
|
|
343
|
+
);
|
|
344
|
+
```
|
|
345
|
+
|
|
346
|
+
### render options
|
|
347
|
+
|
|
348
|
+
```tsx
|
|
349
|
+
// save to file
|
|
350
|
+
await render(<Render>...</Render>, { output: "output/video.mp4" });
|
|
351
|
+
|
|
352
|
+
// with cache directory
|
|
353
|
+
await render(<Render>...</Render>, {
|
|
354
|
+
output: "output/video.mp4",
|
|
355
|
+
cache: ".cache/ai"
|
|
356
|
+
});
|
|
357
|
+
|
|
358
|
+
// get buffer directly
|
|
359
|
+
const buffer = await render(<Render>...</Render>);
|
|
360
|
+
await Bun.write("video.mp4", buffer);
|
|
361
|
+
```
|
|
362
|
+
|
|
363
|
+
## studio
|
|
364
|
+
|
|
365
|
+
visual editor for video workflows. write code or use node-based interface.
|
|
47
366
|
|
|
48
367
|
```bash
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
368
|
+
bun run studio
|
|
369
|
+
# opens http://localhost:8282
|
|
370
|
+
```
|
|
371
|
+
|
|
372
|
+
features:
|
|
373
|
+
- monaco code editor with typescript support
|
|
374
|
+
- node graph visualization of workflow
|
|
375
|
+
- step-by-step execution with previews
|
|
376
|
+
- cache viewer for generated media
|
|
377
|
+
|
|
378
|
+
## skills
|
|
379
|
+
|
|
380
|
+
skills are multi-step workflows that combine actions into pipelines. located in `skills/` directory.
|
|
381
|
+
|
|
382
|
+
## supported providers
|
|
383
|
+
|
|
384
|
+
### fal (primary)
|
|
385
|
+
|
|
386
|
+
```typescript
|
|
387
|
+
import { fal } from "vargai/ai";
|
|
388
|
+
|
|
389
|
+
// image models
|
|
390
|
+
fal.imageModel("flux-schnell") // fast generation
|
|
391
|
+
fal.imageModel("flux-pro") // high quality
|
|
392
|
+
fal.imageModel("flux-dev") // development
|
|
393
|
+
fal.imageModel("nano-banana-pro") // versatile
|
|
394
|
+
fal.imageModel("nano-banana-pro/edit") // image-to-image editing
|
|
395
|
+
fal.imageModel("recraft-v3") // alternative
|
|
396
|
+
|
|
397
|
+
// video models
|
|
398
|
+
fal.videoModel("kling-v2.5") // high quality video
|
|
399
|
+
fal.videoModel("kling-v2.1") // previous version
|
|
400
|
+
fal.videoModel("wan-2.5") // good for characters
|
|
401
|
+
fal.videoModel("minimax") // alternative
|
|
402
|
+
|
|
403
|
+
// lipsync models
|
|
404
|
+
fal.videoModel("sync-v2") // lip sync
|
|
405
|
+
fal.videoModel("sync-v2-pro") // pro lip sync
|
|
406
|
+
|
|
407
|
+
// transcription
|
|
408
|
+
fal.transcriptionModel("whisper")
|
|
52
409
|
```
|
|
53
410
|
|
|
54
|
-
|
|
411
|
+
### elevenlabs
|
|
412
|
+
|
|
413
|
+
```typescript
|
|
414
|
+
import { elevenlabs } from "vargai/ai";
|
|
55
415
|
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
| `varg list` | List all available actions |
|
|
60
|
-
| `varg find <query>` | Search actions by keyword |
|
|
61
|
-
| `varg which <action>` | Show action details and options |
|
|
62
|
-
| `varg help` | Show help |
|
|
416
|
+
// speech models
|
|
417
|
+
elevenlabs.speechModel("eleven_turbo_v2") // fast tts (default)
|
|
418
|
+
elevenlabs.speechModel("eleven_multilingual_v2") // multilingual
|
|
63
419
|
|
|
64
|
-
|
|
420
|
+
// music model
|
|
421
|
+
elevenlabs.musicModel() // music generation
|
|
65
422
|
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
| `image` | Generate image from text | `varg run image --prompt "sunset"` |
|
|
69
|
-
| `video` | Generate video from text or image | `varg run video --prompt "ocean waves" --image ./photo.jpg` |
|
|
70
|
-
| `voice` | Text-to-speech | `varg run voice --text "Hello" --voice sam` |
|
|
71
|
-
| `music` | Generate music | `varg run music --prompt "upbeat electronic"` |
|
|
72
|
-
| `transcribe` | Audio to text/subtitles | `varg run transcribe --audio ./speech.mp3` |
|
|
73
|
-
| `captions` | Add subtitles to video | `varg run captions --video ./clip.mp4` |
|
|
74
|
-
| `sync` | Lipsync audio to video | `varg run sync --video ./face.mp4 --audio ./voice.mp3` |
|
|
75
|
-
| `trim` | Trim video | `varg run trim --input ./video.mp4 --start 0 --end 10` |
|
|
76
|
-
| `cut` | Remove section from video | `varg run cut --input ./video.mp4 --start 5 --end 8` |
|
|
77
|
-
| `merge` | Combine videos | `varg run merge --inputs ./a.mp4 ./b.mp4` |
|
|
78
|
-
| `split` | Split video at timestamps | `varg run split --input ./video.mp4 --timestamps 10,20,30` |
|
|
79
|
-
| `fade` | Add fade in/out | `varg run fade --input ./video.mp4 --type both` |
|
|
80
|
-
| `transition` | Add transitions between clips | `varg run transition --inputs ./a.mp4 ./b.mp4` |
|
|
81
|
-
| `upload` | Upload file to S3 | `varg run upload --file ./video.mp4` |
|
|
423
|
+
// available voices: rachel, adam, bella, josh, sam, antoni, elli, arnold, domi
|
|
424
|
+
```
|
|
82
425
|
|
|
83
|
-
|
|
426
|
+
### higgsfield
|
|
84
427
|
|
|
85
|
-
|
|
428
|
+
```typescript
|
|
429
|
+
import { higgsfield } from "vargai/ai";
|
|
86
430
|
|
|
87
|
-
|
|
88
|
-
|
|
431
|
+
// character-focused image generation with 100+ styles
|
|
432
|
+
higgsfield.imageModel("soul")
|
|
433
|
+
higgsfield.imageModel("soul", {
|
|
434
|
+
styleId: higgsfield.styles.REALISTIC,
|
|
435
|
+
quality: "1080p"
|
|
436
|
+
})
|
|
437
|
+
|
|
438
|
+
// styles include: REALISTIC, ANIME, EDITORIAL_90S, Y2K, GRUNGE, etc.
|
|
439
|
+
```
|
|
440
|
+
|
|
441
|
+
### openai
|
|
442
|
+
|
|
443
|
+
```typescript
|
|
444
|
+
import { openai } from "vargai/ai";
|
|
445
|
+
|
|
446
|
+
// sora video generation
|
|
447
|
+
openai.videoModel("sora-2")
|
|
448
|
+
openai.videoModel("sora-2-pro")
|
|
449
|
+
|
|
450
|
+
// also supports all standard openai models via @ai-sdk/openai
|
|
451
|
+
```
|
|
452
|
+
|
|
453
|
+
### replicate
|
|
454
|
+
|
|
455
|
+
```typescript
|
|
456
|
+
import { replicate } from "vargai/ai";
|
|
457
|
+
|
|
458
|
+
// background removal
|
|
459
|
+
replicate.imageModel("851-labs/background-remover")
|
|
460
|
+
|
|
461
|
+
// any replicate model
|
|
462
|
+
replicate.imageModel("owner/model-name")
|
|
463
|
+
```
|
|
464
|
+
|
|
465
|
+
## supported models
|
|
466
|
+
|
|
467
|
+
### video generation
|
|
468
|
+
|
|
469
|
+
| model | provider | capabilities |
|
|
470
|
+
|-------|----------|--------------|
|
|
471
|
+
| kling-v2.5 | fal | text-to-video, image-to-video |
|
|
472
|
+
| kling-v2.1 | fal | text-to-video, image-to-video |
|
|
473
|
+
| wan-2.5 | fal | image-to-video, good for characters |
|
|
474
|
+
| minimax | fal | text-to-video, image-to-video |
|
|
475
|
+
| sora-2 | openai | text-to-video, image-to-video |
|
|
476
|
+
| sync-v2-pro | fal | lipsync (video + audio input) |
|
|
477
|
+
|
|
478
|
+
### image generation
|
|
479
|
+
|
|
480
|
+
| model | provider | capabilities |
|
|
481
|
+
|-------|----------|--------------|
|
|
482
|
+
| flux-schnell | fal | fast text-to-image |
|
|
483
|
+
| flux-pro | fal | high quality text-to-image |
|
|
484
|
+
| nano-banana-pro | fal | text-to-image, versatile |
|
|
485
|
+
| nano-banana-pro/edit | fal | image-to-image editing |
|
|
486
|
+
| recraft-v3 | fal | text-to-image |
|
|
487
|
+
| soul | higgsfield | character-focused, 100+ styles |
|
|
488
|
+
|
|
489
|
+
### audio
|
|
490
|
+
|
|
491
|
+
| model | provider | capabilities |
|
|
492
|
+
|-------|----------|--------------|
|
|
493
|
+
| eleven_turbo_v2 | elevenlabs | fast text-to-speech |
|
|
494
|
+
| eleven_multilingual_v2 | elevenlabs | multilingual tts |
|
|
495
|
+
| music_v1 | elevenlabs | text-to-music |
|
|
496
|
+
| whisper | fal | speech-to-text |
|
|
497
|
+
|
|
498
|
+
## environment variables
|
|
89
499
|
|
|
90
500
|
```bash
|
|
91
|
-
#
|
|
501
|
+
# required
|
|
92
502
|
FAL_API_KEY=fal_xxx
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
503
|
+
|
|
504
|
+
# optional - enable additional features
|
|
505
|
+
ELEVENLABS_API_KEY=xxx # voice and music
|
|
506
|
+
REPLICATE_API_TOKEN=r8_xxx # background removal, other models
|
|
507
|
+
OPENAI_API_KEY=sk_xxx # sora video
|
|
508
|
+
HIGGSFIELD_API_KEY=hf_xxx # soul character images
|
|
98
509
|
HIGGSFIELD_SECRET=secret_xxx
|
|
510
|
+
GROQ_API_KEY=gsk_xxx # fast transcription
|
|
99
511
|
|
|
100
|
-
#
|
|
512
|
+
# storage (for upload)
|
|
101
513
|
CLOUDFLARE_R2_API_URL=https://xxx.r2.cloudflarestorage.com
|
|
102
514
|
CLOUDFLARE_ACCESS_KEY_ID=xxx
|
|
103
515
|
CLOUDFLARE_ACCESS_SECRET=xxx
|
|
104
516
|
CLOUDFLARE_R2_BUCKET=bucket-name
|
|
105
517
|
```
|
|
106
518
|
|
|
107
|
-
|
|
519
|
+
## cli
|
|
108
520
|
|
|
109
|
-
|
|
521
|
+
```bash
|
|
522
|
+
varg run image --prompt "sunset over mountains"
|
|
523
|
+
varg run video --prompt "ocean waves" --duration 5
|
|
524
|
+
varg run voice --text "Hello world" --voice rachel
|
|
525
|
+
varg list # list all actions
|
|
526
|
+
varg studio # open visual editor
|
|
527
|
+
```
|
|
528
|
+
|
|
529
|
+
## contributing
|
|
110
530
|
|
|
111
|
-
|
|
531
|
+
see [CONTRIBUTING.md](CONTRIBUTING.md) for development setup.
|
|
112
532
|
|
|
113
|
-
##
|
|
533
|
+
## license
|
|
114
534
|
|
|
115
535
|
Apache-2.0 — see [LICENSE.md](LICENSE.md)
|
|
536
|
+
|
|
537
|
+
|
|
Binary file
|