vargai 0.4.0-alpha4 → 0.4.0-alpha40

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (114) hide show
  1. package/.env.example +6 -0
  2. package/README.md +483 -61
  3. package/assets/fonts/TikTokSans-Bold.ttf +0 -0
  4. package/examples/grok-imagine-test.tsx +155 -0
  5. package/launch-videos/06-kawaii-fruits.tsx +93 -0
  6. package/launch-videos/07-ugc-weight-loss.tsx +132 -0
  7. package/launch-videos/08-talking-head-varg.tsx +107 -0
  8. package/launch-videos/09-girl.tsx +160 -0
  9. package/launch-videos/README.md +42 -0
  10. package/package.json +10 -4
  11. package/pipeline/cookbooks/round-video-character.md +1 -1
  12. package/skills/varg-video-generation/SKILL.md +224 -0
  13. package/skills/varg-video-generation/references/templates.md +380 -0
  14. package/skills/varg-video-generation/scripts/setup.ts +265 -0
  15. package/src/ai-sdk/cache.ts +1 -3
  16. package/src/ai-sdk/examples/google-image.ts +62 -0
  17. package/src/ai-sdk/index.ts +10 -0
  18. package/src/ai-sdk/middleware/wrap-image-model.ts +4 -21
  19. package/src/ai-sdk/middleware/wrap-music-model.ts +4 -16
  20. package/src/ai-sdk/middleware/wrap-video-model.ts +5 -17
  21. package/src/ai-sdk/providers/CONTRIBUTING.md +457 -0
  22. package/src/ai-sdk/providers/editly/backends/index.ts +8 -0
  23. package/src/ai-sdk/providers/editly/backends/local.ts +94 -0
  24. package/src/ai-sdk/providers/editly/backends/types.ts +74 -0
  25. package/src/ai-sdk/providers/editly/editly.test.ts +49 -1
  26. package/src/ai-sdk/providers/editly/index.ts +164 -80
  27. package/src/ai-sdk/providers/editly/layers.ts +58 -6
  28. package/src/ai-sdk/providers/editly/rendi/editly-with-rendi-backend.test.ts +335 -0
  29. package/src/ai-sdk/providers/editly/rendi/index.ts +289 -0
  30. package/src/ai-sdk/providers/editly/rendi/rendi.test.ts +35 -0
  31. package/src/ai-sdk/providers/editly/types.ts +30 -0
  32. package/src/ai-sdk/providers/elevenlabs.ts +10 -2
  33. package/src/ai-sdk/providers/fal.test.ts +214 -0
  34. package/src/ai-sdk/providers/fal.ts +435 -40
  35. package/src/ai-sdk/providers/google.ts +423 -0
  36. package/src/ai-sdk/providers/together.ts +191 -0
  37. package/src/cli/commands/find.tsx +1 -0
  38. package/src/cli/commands/frame.tsx +616 -0
  39. package/src/cli/commands/hello.ts +85 -0
  40. package/src/cli/commands/help.tsx +18 -30
  41. package/src/cli/commands/index.ts +11 -2
  42. package/src/cli/commands/init.tsx +570 -0
  43. package/src/cli/commands/list.tsx +1 -0
  44. package/src/cli/commands/render.tsx +322 -76
  45. package/src/cli/commands/run.tsx +1 -0
  46. package/src/cli/commands/storyboard.tsx +1714 -0
  47. package/src/cli/commands/which.tsx +1 -0
  48. package/src/cli/index.ts +23 -4
  49. package/src/cli/ui/components/Badge.tsx +1 -0
  50. package/src/cli/ui/components/DataTable.tsx +1 -0
  51. package/src/cli/ui/components/Header.tsx +1 -0
  52. package/src/cli/ui/components/HelpBlock.tsx +1 -0
  53. package/src/cli/ui/components/KeyValue.tsx +1 -0
  54. package/src/cli/ui/components/OptionRow.tsx +1 -0
  55. package/src/cli/ui/components/Separator.tsx +1 -0
  56. package/src/cli/ui/components/StatusBox.tsx +1 -0
  57. package/src/cli/ui/components/VargBox.tsx +1 -0
  58. package/src/cli/ui/components/VargProgress.tsx +1 -0
  59. package/src/cli/ui/components/VargSpinner.tsx +1 -0
  60. package/src/cli/ui/components/VargText.tsx +1 -0
  61. package/src/definitions/actions/grok-edit.ts +133 -0
  62. package/src/definitions/actions/index.ts +16 -0
  63. package/src/definitions/actions/qwen-angles.ts +218 -0
  64. package/src/index.ts +1 -0
  65. package/src/providers/fal.ts +196 -0
  66. package/src/react/assets.ts +9 -0
  67. package/src/react/elements.ts +0 -5
  68. package/src/react/examples/branching.tsx +6 -4
  69. package/src/react/examples/character-video.tsx +13 -10
  70. package/src/react/examples/local-files-test.tsx +19 -0
  71. package/src/react/examples/ltx2-test.tsx +25 -0
  72. package/src/react/examples/madi.tsx +13 -10
  73. package/src/react/examples/mcmeows.tsx +40 -0
  74. package/src/react/examples/music-defaults.tsx +24 -0
  75. package/src/react/examples/quickstart-test.tsx +101 -0
  76. package/src/react/examples/qwen-angles-test.tsx +72 -0
  77. package/src/react/index.ts +3 -3
  78. package/src/react/layouts/grid.tsx +1 -1
  79. package/src/react/layouts/index.ts +2 -1
  80. package/src/react/layouts/slot.tsx +85 -0
  81. package/src/react/layouts/split.tsx +18 -0
  82. package/src/react/react.test.ts +60 -11
  83. package/src/react/renderers/burn-captions.ts +95 -0
  84. package/src/react/renderers/cache.test.ts +182 -0
  85. package/src/react/renderers/captions.ts +25 -6
  86. package/src/react/renderers/clip.ts +56 -25
  87. package/src/react/renderers/context.ts +5 -2
  88. package/src/react/renderers/image.ts +5 -2
  89. package/src/react/renderers/index.ts +0 -1
  90. package/src/react/renderers/music.ts +8 -3
  91. package/src/react/renderers/packshot/blinking-button.ts +413 -0
  92. package/src/react/renderers/packshot.ts +170 -8
  93. package/src/react/renderers/progress.ts +4 -3
  94. package/src/react/renderers/render.ts +127 -71
  95. package/src/react/renderers/speech.ts +2 -2
  96. package/src/react/renderers/split.ts +34 -13
  97. package/src/react/renderers/utils.test.ts +80 -0
  98. package/src/react/renderers/utils.ts +37 -1
  99. package/src/react/renderers/video.ts +47 -9
  100. package/src/react/types.ts +70 -17
  101. package/src/studio/stages.ts +40 -39
  102. package/src/studio/step-renderer.ts +14 -24
  103. package/src/studio/ui/index.html +2 -2
  104. package/src/tests/all.test.ts +4 -4
  105. package/src/tests/index.ts +1 -1
  106. package/test-slot-grid.tsx +19 -0
  107. package/test-slot-userland.tsx +30 -0
  108. package/test-sync-v2.ts +30 -0
  109. package/test-sync-v2.tsx +29 -0
  110. package/tsconfig.json +1 -1
  111. package/video.tsx +7 -0
  112. package/src/ai-sdk/providers/editly/ffmpeg.ts +0 -60
  113. package/src/react/renderers/animate.ts +0 -59
  114. /package/src/cli/commands/{studio.tsx → studio.ts} +0 -0
package/.env.example CHANGED
@@ -25,3 +25,9 @@ REPLICATE_API_TOKEN=r8_xxx
25
25
 
26
26
  # apify (web scraping actors)
27
27
  APIFY_TOKEN=apify_api_xxx
28
+
29
+ # decart ai (real-time & batch video/image)
30
+ DECART_API_KEY=decart_xxx
31
+
32
+ # together ai (fast flux-schnell, no queue)
33
+ TOGETHER_API_KEY=together_xxx
package/README.md CHANGED
@@ -1,115 +1,537 @@
1
1
  # varg
2
2
 
3
- AI video generation from your terminal.
3
+ ai video generation sdk. jsx for videos, built on vercel ai sdk.
4
4
 
5
- ## Quick Start
5
+ ## quickstart
6
6
 
7
7
  ```bash
8
8
  bun install vargai ai
9
9
  ```
10
10
 
11
- ### SDK Usage
11
+ set your api key:
12
+
13
+ ```bash
14
+ export FAL_API_KEY=fal_xxx # required
15
+ export ELEVENLABS_API_KEY=xxx # optional, for voice/music
16
+ ```
17
+
18
+ create `hello.tsx`:
19
+
20
+ ```tsx
21
+ import { render, Render, Clip, Image, Video } from "vargai/react";
22
+ import { fal } from "vargai/ai";
23
+
24
+ const fruit = Image({
25
+ prompt: "cute kawaii fluffy orange fruit character, round plush body, small black dot eyes, tiny smile, Pixar style",
26
+ model: fal.imageModel("nano-banana-pro"),
27
+ aspectRatio: "9:16",
28
+ });
29
+
30
+ await render(
31
+ <Render width={1080} height={1920}>
32
+ <Clip duration={3}>
33
+ <Video
34
+ prompt={{
35
+ text: "character waves hello enthusiastically, bounces up and down, eyes squint with joy",
36
+ images: [fruit],
37
+ }}
38
+ model={fal.videoModel("kling-v2.5")}
39
+ />
40
+ </Clip>
41
+ </Render>,
42
+ { output: "output/hello.mp4" }
43
+ );
44
+ ```
45
+
46
+ run it:
47
+
48
+ ```bash
49
+ bun run hello.tsx
50
+ ```
51
+
52
+ ## installation
53
+
54
+ ```bash
55
+ # with bun (recommended)
56
+ bun install vargai ai
57
+
58
+ # with npm
59
+ npm install vargai ai
60
+ ```
61
+
62
+ ## ai sdk
63
+
64
+ varg extends vercel's ai sdk with video, music, and lipsync. use familiar patterns:
12
65
 
13
66
  ```typescript
14
67
  import { generateImage } from "ai";
15
- import { File, fal, generateElement, generateVideo, scene } from "vargai";
68
+ import { generateVideo, generateMusic, generateElement, scene, fal, elevenlabs } from "vargai/ai";
69
+
70
+ // generate image
71
+ const { image } = await generateImage({
72
+ model: fal.imageModel("flux-schnell"),
73
+ prompt: "cyberpunk cityscape at night",
74
+ aspectRatio: "16:9",
75
+ });
76
+
77
+ // animate to video
78
+ const { video } = await generateVideo({
79
+ model: fal.videoModel("kling-v2.5"),
80
+ prompt: {
81
+ images: [image.uint8Array],
82
+ text: "camera slowly pans across the city",
83
+ },
84
+ duration: 5,
85
+ });
86
+
87
+ // generate music
88
+ const { audio } = await generateMusic({
89
+ model: elevenlabs.musicModel(),
90
+ prompt: "cyberpunk ambient music, electronic",
91
+ duration: 10,
92
+ });
93
+
94
+ // save output
95
+ await Bun.write("output/city.mp4", video.uint8Array);
96
+ ```
97
+
98
+ ### character consistency with elements
99
+
100
+ create reusable elements for consistent generation across scenes:
101
+
102
+ ```typescript
103
+ import { generateElement, scene, fal } from "vargai/ai";
104
+ import { generateImage, generateVideo } from "ai";
16
105
 
17
- // generate a character from reference image
106
+ // create character from reference
18
107
  const { element: character } = await generateElement({
19
108
  model: fal.imageModel("nano-banana-pro/edit"),
20
109
  type: "character",
21
110
  prompt: {
22
- text: "cartoon character, simple style",
23
- images: [await File.fromPath("media/reference.jpg").arrayBuffer()],
111
+ text: "woman in her 30s, brown hair, green eyes",
112
+ images: [referenceImageData],
24
113
  },
25
114
  });
26
115
 
27
- // generate scene with character
28
- const { image: frame } = await generateImage({
116
+ // use in scenes - same character every time
117
+ const { image: frame1 } = await generateImage({
29
118
  model: fal.imageModel("nano-banana-pro"),
30
- prompt: scene`${character} walks through a forest`,
119
+ prompt: scene`${character} waves hello`,
31
120
  });
32
121
 
33
- // animate the frame
34
- const { video } = await generateVideo({
35
- model: fal.videoModel("wan-2.5"),
122
+ const { image: frame2 } = await generateImage({
123
+ model: fal.imageModel("nano-banana-pro"),
124
+ prompt: scene`${character} gives thumbs up`,
125
+ });
126
+ ```
127
+
128
+ ### file handling
129
+
130
+ ```typescript
131
+ import { File } from "vargai/ai";
132
+
133
+ // load from disk
134
+ const file = File.fromPath("media/portrait.jpg");
135
+
136
+ // load from url
137
+ const file = await File.fromUrl("https://example.com/video.mp4");
138
+
139
+ // load from buffer
140
+ const file = File.fromBuffer(uint8Array, "image/png");
141
+
142
+ // get contents
143
+ const buffer = await file.arrayBuffer();
144
+ const base64 = await file.base64();
145
+ ```
146
+
147
+ ## jsx / react
148
+
149
+ compose videos declaratively with jsx. everything is cached - same props = instant cache hit.
150
+
151
+ ```tsx
152
+ import { render, Render, Clip, Image, Video, Music } from "vargai/react";
153
+ import { fal, elevenlabs } from "vargai/ai";
154
+
155
+ // kawaii fruit characters
156
+ const CHARACTERS = [
157
+ { name: "orange", prompt: "cute kawaii fluffy orange fruit character, round plush body, Pixar style" },
158
+ { name: "strawberry", prompt: "cute kawaii fluffy strawberry fruit character, round plush body, Pixar style" },
159
+ { name: "lemon", prompt: "cute kawaii fluffy lemon fruit character, round plush body, Pixar style" },
160
+ ];
161
+
162
+ const characterImages = CHARACTERS.map(char =>
163
+ Image({
164
+ prompt: char.prompt,
165
+ model: fal.imageModel("nano-banana-pro"),
166
+ aspectRatio: "9:16",
167
+ })
168
+ );
169
+
170
+ await render(
171
+ <Render width={1080} height={1920}>
172
+ <Music prompt="cute baby song, playful xylophone, kawaii vibes" model={elevenlabs.musicModel()} />
173
+
174
+ {CHARACTERS.map((char, i) => (
175
+ <Clip key={char.name} duration={2.5}>
176
+ <Video
177
+ prompt={{
178
+ text: "character waves hello, bounces up and down, eyes squint with joy",
179
+ images: [characterImages[i]],
180
+ }}
181
+ model={fal.videoModel("kling-v2.5")}
182
+ />
183
+ </Clip>
184
+ ))}
185
+ </Render>,
186
+ { output: "output/kawaii-fruits.mp4" }
187
+ );
188
+ ```
189
+
190
+ ### components
191
+
192
+ | component | purpose | key props |
193
+ |-----------|---------|-----------|
194
+ | `<Render>` | root container | `width`, `height`, `fps` |
195
+ | `<Clip>` | time segment | `duration`, `transition`, `cutFrom`, `cutTo` |
196
+ | `<Image>` | ai or static image | `prompt`, `src`, `model`, `zoom`, `aspectRatio`, `resize` |
197
+ | `<Video>` | ai or source video | `prompt`, `src`, `model`, `volume`, `cutFrom`, `cutTo` |
198
+ | `<Speech>` | text-to-speech | `voice`, `model`, `volume`, `children` |
199
+ | `<Music>` | background music | `prompt`, `src`, `model`, `volume`, `loop`, `ducking` |
200
+ | `<Title>` | text overlay | `position`, `color`, `start`, `end` |
201
+ | `<Subtitle>` | subtitle text | `backgroundColor` |
202
+ | `<Captions>` | auto-generated subs | `src`, `srt`, `style`, `color`, `activeColor` |
203
+ | `<Overlay>` | positioned layer | `left`, `top`, `width`, `height`, `keepAudio` |
204
+ | `<Split>` | side-by-side | `direction` |
205
+ | `<Slider>` | before/after reveal | `direction` |
206
+ | `<Swipe>` | tinder-style cards | `direction`, `interval` |
207
+ | `<TalkingHead>` | animated character | `character`, `src`, `voice`, `model`, `lipsyncModel` |
208
+ | `<Packshot>` | end card with cta | `background`, `logo`, `cta`, `blinkCta` |
209
+
210
+ ### layout helpers
211
+
212
+ ```tsx
213
+ import { Grid, SplitLayout } from "vargai/react";
214
+
215
+ // grid layout
216
+ <Grid columns={2}>
217
+ <Video prompt="scene 1" />
218
+ <Video prompt="scene 2" />
219
+ </Grid>
220
+
221
+ // split layout (before/after)
222
+ <SplitLayout left={beforeVideo} right={afterVideo} />
223
+ ```
224
+
225
+ ### transitions
226
+
227
+ 67 gl-transitions available:
228
+
229
+ ```tsx
230
+ <Clip transition={{ name: "fade", duration: 0.5 }}>
231
+ <Clip transition={{ name: "crossfade", duration: 0.5 }}>
232
+ <Clip transition={{ name: "wipeleft", duration: 0.5 }}>
233
+ <Clip transition={{ name: "cube", duration: 0.8 }}>
234
+ ```
235
+
236
+ ### caption styles
237
+
238
+ ```tsx
239
+ <Captions src={voiceover} style="tiktok" /> // word-by-word highlight
240
+ <Captions src={voiceover} style="karaoke" /> // fill left-to-right
241
+ <Captions src={voiceover} style="bounce" /> // words bounce in
242
+ <Captions src={voiceover} style="typewriter" /> // typing effect
243
+ ```
244
+
245
+ ### talking head with lipsync
246
+
247
+ ```tsx
248
+ import { render, Render, Clip, Image, Video, Speech, Captions, Music } from "vargai/react";
249
+ import { fal, elevenlabs, higgsfield } from "vargai/ai";
250
+
251
+ const voiceover = Speech({
252
+ model: elevenlabs.speechModel("eleven_v3"),
253
+ voice: "5l5f8iK3YPeGga21rQIX",
254
+ children: "With varg, you can create any videos at scale!",
255
+ });
256
+
257
+ // base character with higgsfield soul (realistic)
258
+ const baseCharacter = Image({
259
+ prompt: "beautiful East Asian woman, sleek black bob hair, fitted black t-shirt, iPhone selfie, minimalist bedroom",
260
+ model: higgsfield.imageModel("soul", { styleId: higgsfield.styles.REALISTIC }),
261
+ aspectRatio: "9:16",
262
+ });
263
+
264
+ // animate the character
265
+ const animatedCharacter = Video({
36
266
  prompt: {
37
- text: `${character.text} walks through a forest`,
38
- images: [frame.base64],
267
+ text: "woman speaking naturally, subtle head movements, friendly expression",
268
+ images: [baseCharacter],
39
269
  },
40
- duration: 5,
270
+ model: fal.videoModel("kling-v2.5"),
41
271
  });
42
272
 
43
- await Bun.write("output/scene.mp4", video.uint8Array);
273
+ await render(
274
+ <Render width={1080} height={1920}>
275
+ <Music prompt="modern tech ambient, subtle electronic" model={elevenlabs.musicModel()} volume={0.1} />
276
+
277
+ <Clip duration={5}>
278
+ {/* lipsync: animated video + speech audio -> sync-v2 */}
279
+ <Video
280
+ prompt={{ video: animatedCharacter, audio: voiceover }}
281
+ model={fal.videoModel("sync-v2-pro")}
282
+ />
283
+ </Clip>
284
+
285
+ <Captions src={voiceover} style="tiktok" color="#ffffff" />
286
+ </Render>,
287
+ { output: "output/talking-head.mp4" }
288
+ );
44
289
  ```
45
290
 
46
- ### CLI Usage
291
+ ### ugc transformation video
292
+
293
+ ```tsx
294
+ import { render, Render, Clip, Image, Video, Speech, Captions, Music, Title, SplitLayout } from "vargai/react";
295
+ import { fal, elevenlabs, higgsfield } from "vargai/ai";
296
+
297
+ const CHARACTER = "woman in her 30s, brown hair, green eyes";
298
+
299
+ // before: generated with higgsfield soul
300
+ const beforeImage = Image({
301
+ prompt: `${CHARACTER}, overweight, tired expression, loose grey t-shirt, bathroom mirror selfie`,
302
+ model: higgsfield.imageModel("soul", { styleId: higgsfield.styles.REALISTIC }),
303
+ aspectRatio: "9:16",
304
+ });
305
+
306
+ // after: edit with nano-banana-pro using before as reference
307
+ const afterImage = Image({
308
+ prompt: {
309
+ text: `${CHARACTER}, fit slim, confident smile, fitted black tank top, same bathroom, same woman 40 pounds lighter`,
310
+ images: [beforeImage]
311
+ },
312
+ model: fal.imageModel("nano-banana-pro/edit"),
313
+ aspectRatio: "9:16",
314
+ });
315
+
316
+ const beforeVideo = Video({
317
+ prompt: { text: "woman looks down sadly, sighs, tired expression", images: [beforeImage] },
318
+ model: fal.videoModel("kling-v2.5"),
319
+ });
320
+
321
+ const afterVideo = Video({
322
+ prompt: { text: "woman smiles confidently, touches hair, proud expression", images: [afterImage] },
323
+ model: fal.videoModel("kling-v2.5"),
324
+ });
325
+
326
+ const voiceover = Speech({
327
+ model: elevenlabs.speechModel("eleven_multilingual_v2"),
328
+ children: "With this technique I lost 40 pounds in just 3 months!",
329
+ });
330
+
331
+ await render(
332
+ <Render width={1080 * 2} height={1920}>
333
+ <Music prompt="upbeat motivational pop, inspiring transformation" model={elevenlabs.musicModel()} volume={0.15} />
334
+
335
+ <Clip duration={5}>
336
+ <SplitLayout direction="horizontal" left={beforeVideo} right={afterVideo} />
337
+ <Title position="top" color="#ffffff">My 3-Month Transformation</Title>
338
+ </Clip>
339
+
340
+ <Captions src={voiceover} style="tiktok" color="#ffffff" />
341
+ </Render>,
342
+ { output: "output/transformation.mp4" }
343
+ );
344
+ ```
345
+
346
+ ### render options
347
+
348
+ ```tsx
349
+ // save to file
350
+ await render(<Render>...</Render>, { output: "output/video.mp4" });
351
+
352
+ // with cache directory
353
+ await render(<Render>...</Render>, {
354
+ output: "output/video.mp4",
355
+ cache: ".cache/ai"
356
+ });
357
+
358
+ // get buffer directly
359
+ const buffer = await render(<Render>...</Render>);
360
+ await Bun.write("video.mp4", buffer);
361
+ ```
362
+
363
+ ## studio
364
+
365
+ visual editor for video workflows. write code or use node-based interface.
47
366
 
48
367
  ```bash
49
- varg run image --prompt "cyberpunk cityscape at night"
50
- varg run video --prompt "camera flies through clouds" --duration 5
51
- varg run voice --text "Hello world" --voice rachel
368
+ bun run studio
369
+ # opens http://localhost:8282
370
+ ```
371
+
372
+ features:
373
+ - monaco code editor with typescript support
374
+ - node graph visualization of workflow
375
+ - step-by-step execution with previews
376
+ - cache viewer for generated media
377
+
378
+ ## skills
379
+
380
+ skills are multi-step workflows that combine actions into pipelines. located in `skills/` directory.
381
+
382
+ ## supported providers
383
+
384
+ ### fal (primary)
385
+
386
+ ```typescript
387
+ import { fal } from "vargai/ai";
388
+
389
+ // image models
390
+ fal.imageModel("flux-schnell") // fast generation
391
+ fal.imageModel("flux-pro") // high quality
392
+ fal.imageModel("flux-dev") // development
393
+ fal.imageModel("nano-banana-pro") // versatile
394
+ fal.imageModel("nano-banana-pro/edit") // image-to-image editing
395
+ fal.imageModel("recraft-v3") // alternative
396
+
397
+ // video models
398
+ fal.videoModel("kling-v2.5") // high quality video
399
+ fal.videoModel("kling-v2.1") // previous version
400
+ fal.videoModel("wan-2.5") // good for characters
401
+ fal.videoModel("minimax") // alternative
402
+
403
+ // lipsync models
404
+ fal.videoModel("sync-v2") // lip sync
405
+ fal.videoModel("sync-v2-pro") // pro lip sync
406
+
407
+ // transcription
408
+ fal.transcriptionModel("whisper")
52
409
  ```
53
410
 
54
- ## Commands
411
+ ### elevenlabs
412
+
413
+ ```typescript
414
+ import { elevenlabs } from "vargai/ai";
55
415
 
56
- | Command | Description |
57
- |---------|-------------|
58
- | `varg run <action>` | Run an action |
59
- | `varg list` | List all available actions |
60
- | `varg find <query>` | Search actions by keyword |
61
- | `varg which <action>` | Show action details and options |
62
- | `varg help` | Show help |
416
+ // speech models
417
+ elevenlabs.speechModel("eleven_turbo_v2") // fast tts (default)
418
+ elevenlabs.speechModel("eleven_multilingual_v2") // multilingual
63
419
 
64
- ## Actions
420
+ // music model
421
+ elevenlabs.musicModel() // music generation
65
422
 
66
- | Action | Description | Example |
67
- |--------|-------------|---------|
68
- | `image` | Generate image from text | `varg run image --prompt "sunset"` |
69
- | `video` | Generate video from text or image | `varg run video --prompt "ocean waves" --image ./photo.jpg` |
70
- | `voice` | Text-to-speech | `varg run voice --text "Hello" --voice sam` |
71
- | `music` | Generate music | `varg run music --prompt "upbeat electronic"` |
72
- | `transcribe` | Audio to text/subtitles | `varg run transcribe --audio ./speech.mp3` |
73
- | `captions` | Add subtitles to video | `varg run captions --video ./clip.mp4` |
74
- | `sync` | Lipsync audio to video | `varg run sync --video ./face.mp4 --audio ./voice.mp3` |
75
- | `trim` | Trim video | `varg run trim --input ./video.mp4 --start 0 --end 10` |
76
- | `cut` | Remove section from video | `varg run cut --input ./video.mp4 --start 5 --end 8` |
77
- | `merge` | Combine videos | `varg run merge --inputs ./a.mp4 ./b.mp4` |
78
- | `split` | Split video at timestamps | `varg run split --input ./video.mp4 --timestamps 10,20,30` |
79
- | `fade` | Add fade in/out | `varg run fade --input ./video.mp4 --type both` |
80
- | `transition` | Add transitions between clips | `varg run transition --inputs ./a.mp4 ./b.mp4` |
81
- | `upload` | Upload file to S3 | `varg run upload --file ./video.mp4` |
423
+ // available voices: rachel, adam, bella, josh, sam, antoni, elli, arnold, domi
424
+ ```
82
425
 
83
- Use `varg run <action> --help` for all options.
426
+ ### higgsfield
84
427
 
85
- ## Environment Variables
428
+ ```typescript
429
+ import { higgsfield } from "vargai/ai";
86
430
 
87
- <details>
88
- <summary>Required API keys</summary>
431
+ // character-focused image generation with 100+ styles
432
+ higgsfield.imageModel("soul")
433
+ higgsfield.imageModel("soul", {
434
+ styleId: higgsfield.styles.REALISTIC,
435
+ quality: "1080p"
436
+ })
437
+
438
+ // styles include: REALISTIC, ANIME, EDITORIAL_90S, Y2K, GRUNGE, etc.
439
+ ```
440
+
441
+ ### openai
442
+
443
+ ```typescript
444
+ import { openai } from "vargai/ai";
445
+
446
+ // sora video generation
447
+ openai.videoModel("sora-2")
448
+ openai.videoModel("sora-2-pro")
449
+
450
+ // also supports all standard openai models via @ai-sdk/openai
451
+ ```
452
+
453
+ ### replicate
454
+
455
+ ```typescript
456
+ import { replicate } from "vargai/ai";
457
+
458
+ // background removal
459
+ replicate.imageModel("851-labs/background-remover")
460
+
461
+ // any replicate model
462
+ replicate.imageModel("owner/model-name")
463
+ ```
464
+
465
+ ## supported models
466
+
467
+ ### video generation
468
+
469
+ | model | provider | capabilities |
470
+ |-------|----------|--------------|
471
+ | kling-v2.5 | fal | text-to-video, image-to-video |
472
+ | kling-v2.1 | fal | text-to-video, image-to-video |
473
+ | wan-2.5 | fal | image-to-video, good for characters |
474
+ | minimax | fal | text-to-video, image-to-video |
475
+ | sora-2 | openai | text-to-video, image-to-video |
476
+ | sync-v2-pro | fal | lipsync (video + audio input) |
477
+
478
+ ### image generation
479
+
480
+ | model | provider | capabilities |
481
+ |-------|----------|--------------|
482
+ | flux-schnell | fal | fast text-to-image |
483
+ | flux-pro | fal | high quality text-to-image |
484
+ | nano-banana-pro | fal | text-to-image, versatile |
485
+ | nano-banana-pro/edit | fal | image-to-image editing |
486
+ | recraft-v3 | fal | text-to-image |
487
+ | soul | higgsfield | character-focused, 100+ styles |
488
+
489
+ ### audio
490
+
491
+ | model | provider | capabilities |
492
+ |-------|----------|--------------|
493
+ | eleven_turbo_v2 | elevenlabs | fast text-to-speech |
494
+ | eleven_multilingual_v2 | elevenlabs | multilingual tts |
495
+ | music_v1 | elevenlabs | text-to-music |
496
+ | whisper | fal | speech-to-text |
497
+
498
+ ## environment variables
89
499
 
90
500
  ```bash
91
- # AI Providers
501
+ # required
92
502
  FAL_API_KEY=fal_xxx
93
- REPLICATE_API_TOKEN=r8_xxx
94
- ELEVENLABS_API_KEY=xxx
95
- GROQ_API_KEY=gsk_xxx
96
- FIREWORKS_API_KEY=fw_xxx
97
- HIGGSFIELD_API_KEY=hf_xxx
503
+
504
+ # optional - enable additional features
505
+ ELEVENLABS_API_KEY=xxx # voice and music
506
+ REPLICATE_API_TOKEN=r8_xxx # background removal, other models
507
+ OPENAI_API_KEY=sk_xxx # sora video
508
+ HIGGSFIELD_API_KEY=hf_xxx # soul character images
98
509
  HIGGSFIELD_SECRET=secret_xxx
510
+ GROQ_API_KEY=gsk_xxx # fast transcription
99
511
 
100
- # Storage (Cloudflare R2)
512
+ # storage (for upload)
101
513
  CLOUDFLARE_R2_API_URL=https://xxx.r2.cloudflarestorage.com
102
514
  CLOUDFLARE_ACCESS_KEY_ID=xxx
103
515
  CLOUDFLARE_ACCESS_SECRET=xxx
104
516
  CLOUDFLARE_R2_BUCKET=bucket-name
105
517
  ```
106
518
 
107
- </details>
519
+ ## cli
108
520
 
109
- ## Contributing
521
+ ```bash
522
+ varg run image --prompt "sunset over mountains"
523
+ varg run video --prompt "ocean waves" --duration 5
524
+ varg run voice --text "Hello world" --voice rachel
525
+ varg list # list all actions
526
+ varg studio # open visual editor
527
+ ```
528
+
529
+ ## contributing
110
530
 
111
- See [CONTRIBUTING.md](CONTRIBUTING.md) for development setup.
531
+ see [CONTRIBUTING.md](CONTRIBUTING.md) for development setup.
112
532
 
113
- ## License
533
+ ## license
114
534
 
115
535
  Apache-2.0 — see [LICENSE.md](LICENSE.md)
536
+
537
+
Binary file