varg.ai-sdk 0.1.0 → 0.4.0-alpha.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/settings.local.json +1 -1
- package/.env.example +3 -0
- package/.github/workflows/ci.yml +23 -0
- package/.husky/README.md +102 -0
- package/.husky/commit-msg +6 -0
- package/.husky/pre-commit +9 -0
- package/.husky/pre-push +6 -0
- package/.size-limit.json +8 -0
- package/.test-hooks.ts +5 -0
- package/CLAUDE.md +10 -3
- package/CONTRIBUTING.md +150 -0
- package/LICENSE.md +53 -0
- package/README.md +56 -209
- package/SKILLS.md +26 -10
- package/biome.json +7 -1
- package/bun.lock +1286 -0
- package/commitlint.config.js +22 -0
- package/docs/index.html +1130 -0
- package/docs/prompting.md +326 -0
- package/docs/react.md +834 -0
- package/docs/sdk.md +812 -0
- package/ffmpeg/CLAUDE.md +68 -0
- package/package.json +48 -8
- package/pipeline/cookbooks/scripts/animate-frames-parallel.ts +84 -0
- package/pipeline/cookbooks/scripts/combine-scenes.sh +53 -0
- package/pipeline/cookbooks/scripts/generate-frames-parallel.ts +99 -0
- package/pipeline/cookbooks/scripts/still-to-video.sh +37 -0
- package/pipeline/cookbooks/text-to-tiktok.md +669 -0
- package/pipeline/cookbooks/trendwatching.md +156 -0
- package/plan.md +281 -0
- package/scripts/.gitkeep +0 -0
- package/src/ai-sdk/cache.ts +142 -0
- package/src/ai-sdk/examples/cached-generation.ts +53 -0
- package/src/ai-sdk/examples/duet-scene-4.ts +53 -0
- package/src/ai-sdk/examples/duet-scene-5-audio.ts +32 -0
- package/src/ai-sdk/examples/duet-video.ts +56 -0
- package/src/ai-sdk/examples/editly-composition.ts +63 -0
- package/src/ai-sdk/examples/editly-test.ts +57 -0
- package/src/ai-sdk/examples/editly-video-test.ts +52 -0
- package/src/ai-sdk/examples/fal-lipsync.ts +43 -0
- package/src/ai-sdk/examples/higgsfield-image.ts +61 -0
- package/src/ai-sdk/examples/music-generation.ts +19 -0
- package/src/ai-sdk/examples/openai-sora.ts +34 -0
- package/src/ai-sdk/examples/replicate-bg-removal.ts +52 -0
- package/src/ai-sdk/examples/simpsons-scene.ts +61 -0
- package/src/ai-sdk/examples/talking-lion.ts +55 -0
- package/src/ai-sdk/examples/video-generation.ts +39 -0
- package/src/ai-sdk/examples/workflow-animated-girl.ts +104 -0
- package/src/ai-sdk/examples/workflow-before-after.ts +114 -0
- package/src/ai-sdk/examples/workflow-character-grid.ts +112 -0
- package/src/ai-sdk/examples/workflow-slideshow.ts +161 -0
- package/src/ai-sdk/file-cache.ts +112 -0
- package/src/ai-sdk/file.ts +238 -0
- package/src/ai-sdk/generate-element.ts +92 -0
- package/src/ai-sdk/generate-music.ts +46 -0
- package/src/ai-sdk/generate-video.ts +165 -0
- package/src/ai-sdk/index.ts +72 -0
- package/src/ai-sdk/music-model.ts +110 -0
- package/src/ai-sdk/providers/editly/editly.test.ts +1108 -0
- package/src/ai-sdk/providers/editly/ffmpeg.ts +60 -0
- package/src/ai-sdk/providers/editly/index.ts +817 -0
- package/src/ai-sdk/providers/editly/layers.ts +776 -0
- package/src/ai-sdk/providers/editly/plan.md +144 -0
- package/src/ai-sdk/providers/editly/types.ts +328 -0
- package/src/ai-sdk/providers/elevenlabs-provider.ts +255 -0
- package/src/ai-sdk/providers/fal-provider.ts +512 -0
- package/src/ai-sdk/providers/higgsfield.ts +379 -0
- package/src/ai-sdk/providers/openai.ts +251 -0
- package/src/ai-sdk/providers/replicate.ts +16 -0
- package/src/ai-sdk/video-model.ts +185 -0
- package/src/cli/commands/find.tsx +137 -0
- package/src/cli/commands/help.tsx +85 -0
- package/src/cli/commands/index.ts +6 -0
- package/src/cli/commands/list.tsx +238 -0
- package/src/cli/commands/render.tsx +71 -0
- package/src/cli/commands/run.tsx +511 -0
- package/src/cli/commands/which.tsx +253 -0
- package/src/cli/index.ts +114 -0
- package/src/cli/quiet.ts +44 -0
- package/src/cli/types.ts +32 -0
- package/src/cli/ui/components/Badge.tsx +29 -0
- package/src/cli/ui/components/DataTable.tsx +51 -0
- package/src/cli/ui/components/Header.tsx +23 -0
- package/src/cli/ui/components/HelpBlock.tsx +44 -0
- package/src/cli/ui/components/KeyValue.tsx +33 -0
- package/src/cli/ui/components/OptionRow.tsx +81 -0
- package/src/cli/ui/components/Separator.tsx +23 -0
- package/src/cli/ui/components/StatusBox.tsx +108 -0
- package/src/cli/ui/components/VargBox.tsx +51 -0
- package/src/cli/ui/components/VargProgress.tsx +36 -0
- package/src/cli/ui/components/VargSpinner.tsx +34 -0
- package/src/cli/ui/components/VargText.tsx +56 -0
- package/src/cli/ui/components/index.ts +19 -0
- package/src/cli/ui/index.ts +12 -0
- package/src/cli/ui/render.ts +35 -0
- package/src/cli/ui/theme.ts +63 -0
- package/src/cli/utils.ts +78 -0
- package/src/core/executor/executor.ts +201 -0
- package/src/core/executor/index.ts +13 -0
- package/src/core/executor/job.ts +214 -0
- package/src/core/executor/pipeline.ts +222 -0
- package/src/core/index.ts +11 -0
- package/src/core/registry/index.ts +9 -0
- package/src/core/registry/loader.ts +149 -0
- package/src/core/registry/registry.ts +221 -0
- package/src/core/registry/resolver.ts +206 -0
- package/src/core/schema/helpers.ts +134 -0
- package/src/core/schema/index.ts +8 -0
- package/src/core/schema/shared.ts +102 -0
- package/src/core/schema/types.ts +279 -0
- package/src/core/schema/validator.ts +92 -0
- package/src/definitions/actions/captions.ts +261 -0
- package/src/definitions/actions/edit.ts +298 -0
- package/src/definitions/actions/image.ts +125 -0
- package/src/definitions/actions/index.ts +114 -0
- package/src/definitions/actions/music.ts +205 -0
- package/src/definitions/actions/sync.ts +128 -0
- package/{action/transcribe/index.ts → src/definitions/actions/transcribe.ts} +63 -90
- package/src/definitions/actions/upload.ts +111 -0
- package/src/definitions/actions/video.ts +163 -0
- package/src/definitions/actions/voice.ts +119 -0
- package/src/definitions/index.ts +23 -0
- package/src/definitions/models/elevenlabs.ts +50 -0
- package/src/definitions/models/flux.ts +56 -0
- package/src/definitions/models/index.ts +36 -0
- package/src/definitions/models/kling.ts +56 -0
- package/src/definitions/models/llama.ts +54 -0
- package/src/definitions/models/nano-banana-pro.ts +102 -0
- package/src/definitions/models/sonauto.ts +68 -0
- package/src/definitions/models/soul.ts +65 -0
- package/src/definitions/models/wan.ts +54 -0
- package/src/definitions/models/whisper.ts +44 -0
- package/src/definitions/skills/index.ts +12 -0
- package/src/definitions/skills/talking-character.ts +87 -0
- package/src/definitions/skills/text-to-tiktok.ts +97 -0
- package/src/index.ts +118 -0
- package/src/providers/apify.ts +269 -0
- package/src/providers/base.ts +264 -0
- package/src/providers/elevenlabs.ts +217 -0
- package/src/providers/fal.ts +392 -0
- package/src/providers/ffmpeg.ts +544 -0
- package/src/providers/fireworks.ts +193 -0
- package/src/providers/groq.ts +149 -0
- package/src/providers/higgsfield.ts +145 -0
- package/src/providers/index.ts +143 -0
- package/src/providers/replicate.ts +147 -0
- package/src/providers/storage.ts +206 -0
- package/src/react/cli.ts +52 -0
- package/src/react/elements.ts +146 -0
- package/src/react/examples/branching.tsx +66 -0
- package/src/react/examples/captions-demo.tsx +37 -0
- package/src/react/examples/character-video.tsx +84 -0
- package/src/react/examples/grid.tsx +53 -0
- package/src/react/examples/layouts-demo.tsx +57 -0
- package/src/react/examples/madi.tsx +60 -0
- package/src/react/examples/music-test.tsx +35 -0
- package/src/react/examples/onlyfans-1m/workflow.tsx +88 -0
- package/src/react/examples/orange-portrait.tsx +41 -0
- package/src/react/examples/split-element-demo.tsx +60 -0
- package/src/react/examples/split-layout-demo.tsx +60 -0
- package/src/react/examples/split.tsx +41 -0
- package/src/react/examples/video-grid.tsx +46 -0
- package/src/react/index.ts +43 -0
- package/src/react/layouts/grid.tsx +28 -0
- package/src/react/layouts/index.ts +2 -0
- package/src/react/layouts/split.tsx +20 -0
- package/src/react/react.test.ts +309 -0
- package/src/react/render.ts +21 -0
- package/src/react/renderers/animate.ts +59 -0
- package/src/react/renderers/captions.ts +297 -0
- package/src/react/renderers/clip.ts +248 -0
- package/src/react/renderers/context.ts +17 -0
- package/src/react/renderers/image.ts +109 -0
- package/src/react/renderers/index.ts +22 -0
- package/src/react/renderers/music.ts +60 -0
- package/src/react/renderers/packshot.ts +84 -0
- package/src/react/renderers/progress.ts +173 -0
- package/src/react/renderers/render.ts +243 -0
- package/src/react/renderers/slider.ts +69 -0
- package/src/react/renderers/speech.ts +53 -0
- package/src/react/renderers/split.ts +91 -0
- package/src/react/renderers/subtitle.ts +16 -0
- package/src/react/renderers/swipe.ts +75 -0
- package/src/react/renderers/title.ts +17 -0
- package/src/react/renderers/utils.ts +124 -0
- package/src/react/renderers/video.ts +127 -0
- package/src/react/runtime/jsx-dev-runtime.ts +43 -0
- package/src/react/runtime/jsx-runtime.ts +35 -0
- package/src/react/types.ts +232 -0
- package/src/studio/index.ts +26 -0
- package/src/studio/scanner.ts +102 -0
- package/src/studio/server.ts +554 -0
- package/src/studio/stages.ts +251 -0
- package/src/studio/step-renderer.ts +279 -0
- package/src/studio/types.ts +60 -0
- package/src/studio/ui/cache.html +303 -0
- package/src/studio/ui/index.html +1820 -0
- package/src/tests/all.test.ts +509 -0
- package/src/tests/index.ts +33 -0
- package/src/tests/unit.test.ts +403 -0
- package/tsconfig.cli.json +8 -0
- package/tsconfig.json +21 -3
- package/TEST_RESULTS.md +0 -122
- package/action/captions/SKILL.md +0 -170
- package/action/captions/index.ts +0 -227
- package/action/edit/SKILL.md +0 -235
- package/action/edit/index.ts +0 -493
- package/action/image/SKILL.md +0 -140
- package/action/image/index.ts +0 -112
- package/action/sync/SKILL.md +0 -136
- package/action/sync/index.ts +0 -187
- package/action/transcribe/SKILL.md +0 -179
- package/action/video/SKILL.md +0 -116
- package/action/video/index.ts +0 -135
- package/action/voice/SKILL.md +0 -125
- package/action/voice/index.ts +0 -201
- package/index.ts +0 -38
- package/lib/README.md +0 -144
- package/lib/ai-sdk/fal.ts +0 -106
- package/lib/ai-sdk/replicate.ts +0 -107
- package/lib/elevenlabs.ts +0 -382
- package/lib/fal.ts +0 -478
- package/lib/ffmpeg.ts +0 -467
- package/lib/fireworks.ts +0 -235
- package/lib/groq.ts +0 -246
- package/lib/higgsfield.ts +0 -176
- package/lib/remotion/SKILL.md +0 -823
- package/lib/remotion/cli.ts +0 -115
- package/lib/remotion/functions.ts +0 -283
- package/lib/remotion/index.ts +0 -19
- package/lib/remotion/templates.ts +0 -73
- package/lib/replicate.ts +0 -304
- package/output.txt +0 -1
- package/test-import.ts +0 -7
- package/test-services.ts +0 -97
- package/utilities/s3.ts +0 -147
package/action/video/SKILL.md
DELETED
|
@@ -1,116 +0,0 @@
|
|
|
1
|
-
---
|
|
2
|
-
name: video-generation
|
|
3
|
-
description: generate videos from images or text prompts using fal.ai. use when user wants to animate images, create videos from text, or needs ai video generation with 5-10 second clips.
|
|
4
|
-
allowed-tools: Read, Bash
|
|
5
|
-
---
|
|
6
|
-
|
|
7
|
-
# video generation
|
|
8
|
-
|
|
9
|
-
generate ai videos from images or text using fal.ai with automatic s3 upload support.
|
|
10
|
-
|
|
11
|
-
## capabilities
|
|
12
|
-
|
|
13
|
-
- **image-to-video**: animate static images with motion prompts
|
|
14
|
-
- **text-to-video**: generate videos directly from text descriptions
|
|
15
|
-
- supports 5 or 10 second duration
|
|
16
|
-
- automatic s3 upload
|
|
17
|
-
|
|
18
|
-
## usage
|
|
19
|
-
|
|
20
|
-
### generate from image
|
|
21
|
-
```bash
|
|
22
|
-
bun run service/video.ts from_image <prompt> <imageUrl> [duration] [upload]
|
|
23
|
-
```
|
|
24
|
-
|
|
25
|
-
**parameters:**
|
|
26
|
-
- `prompt` (required): motion description (e.g., "camera pan left")
|
|
27
|
-
- `imageUrl` (required): url of the source image
|
|
28
|
-
- `duration` (optional): 5 or 10 seconds (default: 5)
|
|
29
|
-
- `upload` (optional): "true" to upload to s3
|
|
30
|
-
|
|
31
|
-
**example:**
|
|
32
|
-
```bash
|
|
33
|
-
bun run service/video.ts from_image "person talking naturally" https://example.com/headshot.jpg 5 true
|
|
34
|
-
```
|
|
35
|
-
|
|
36
|
-
### generate from text
|
|
37
|
-
```bash
|
|
38
|
-
bun run service/video.ts from_text <prompt> [duration] [upload]
|
|
39
|
-
```
|
|
40
|
-
|
|
41
|
-
**parameters:**
|
|
42
|
-
- `prompt` (required): video scene description
|
|
43
|
-
- `duration` (optional): 5 or 10 seconds (default: 5)
|
|
44
|
-
- `upload` (optional): "true" to upload to s3
|
|
45
|
-
|
|
46
|
-
**example:**
|
|
47
|
-
```bash
|
|
48
|
-
bun run service/video.ts from_text "waves crashing on beach at sunset" 10 true
|
|
49
|
-
```
|
|
50
|
-
|
|
51
|
-
## as library
|
|
52
|
-
|
|
53
|
-
```typescript
|
|
54
|
-
import { generateVideoFromImage, generateVideoFromText } from "./service/video"
|
|
55
|
-
|
|
56
|
-
// animate an image
|
|
57
|
-
const videoResult = await generateVideoFromImage(
|
|
58
|
-
"camera zoom in slowly",
|
|
59
|
-
"https://example.com/portrait.jpg",
|
|
60
|
-
{ duration: 5, upload: true }
|
|
61
|
-
)
|
|
62
|
-
console.log(videoResult.videoUrl)
|
|
63
|
-
console.log(videoResult.uploaded) // s3 url if upload=true
|
|
64
|
-
|
|
65
|
-
// generate from text
|
|
66
|
-
const textVideo = await generateVideoFromText(
|
|
67
|
-
"forest path with sunlight filtering through trees",
|
|
68
|
-
{ duration: 10, upload: true }
|
|
69
|
-
)
|
|
70
|
-
```
|
|
71
|
-
|
|
72
|
-
## output
|
|
73
|
-
|
|
74
|
-
returns `VideoGenerationResult`:
|
|
75
|
-
```typescript
|
|
76
|
-
{
|
|
77
|
-
videoUrl: string, // direct video url
|
|
78
|
-
duration?: number, // actual video duration
|
|
79
|
-
uploaded?: string // s3 url if upload requested
|
|
80
|
-
}
|
|
81
|
-
```
|
|
82
|
-
|
|
83
|
-
## when to use
|
|
84
|
-
|
|
85
|
-
use this skill when:
|
|
86
|
-
- animating character headshots or portraits
|
|
87
|
-
- creating motion from static images
|
|
88
|
-
- generating video clips from text descriptions
|
|
89
|
-
- preparing videos for lipsync or editing pipeline
|
|
90
|
-
- need short form video content (5-10s)
|
|
91
|
-
|
|
92
|
-
## tips
|
|
93
|
-
|
|
94
|
-
**for character animation:**
|
|
95
|
-
- use subtle prompts like "person talking naturally" or "slight head movement"
|
|
96
|
-
- keep duration at 5 seconds for character shots
|
|
97
|
-
- combine with lipsync for talking videos
|
|
98
|
-
|
|
99
|
-
**for scene generation:**
|
|
100
|
-
- be descriptive about camera movement and scene dynamics
|
|
101
|
-
- 10 seconds works better for landscape/scene videos
|
|
102
|
-
|
|
103
|
-
## environment variables
|
|
104
|
-
|
|
105
|
-
required:
|
|
106
|
-
- `FAL_API_KEY` - for fal video generation
|
|
107
|
-
|
|
108
|
-
optional (for s3 upload):
|
|
109
|
-
- `CLOUDFLARE_R2_API_URL`
|
|
110
|
-
- `CLOUDFLARE_ACCESS_KEY_ID`
|
|
111
|
-
- `CLOUDFLARE_ACCESS_SECRET`
|
|
112
|
-
- `CLOUDFLARE_R2_BUCKET`
|
|
113
|
-
|
|
114
|
-
## generation time
|
|
115
|
-
|
|
116
|
-
expect 2-3 minutes per video clip
|
package/action/video/index.ts
DELETED
|
@@ -1,135 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env bun
|
|
2
|
-
/**
|
|
3
|
-
* video generation service combining fal and higgsfield
|
|
4
|
-
* usage: bun run service/video.ts <command> <args>
|
|
5
|
-
*/
|
|
6
|
-
|
|
7
|
-
import { imageToVideo, textToVideo } from "../../lib/fal";
|
|
8
|
-
import { uploadFromUrl } from "../../utilities/s3";
|
|
9
|
-
|
|
10
|
-
export interface VideoGenerationResult {
|
|
11
|
-
videoUrl: string;
|
|
12
|
-
duration?: number;
|
|
13
|
-
uploaded?: string;
|
|
14
|
-
}
|
|
15
|
-
|
|
16
|
-
export async function generateVideoFromImage(
|
|
17
|
-
prompt: string,
|
|
18
|
-
imageUrl: string,
|
|
19
|
-
options: { duration?: 5 | 10; upload?: boolean } = {},
|
|
20
|
-
): Promise<VideoGenerationResult> {
|
|
21
|
-
console.log("[service/video] generating video from image");
|
|
22
|
-
|
|
23
|
-
const result = await imageToVideo({
|
|
24
|
-
prompt,
|
|
25
|
-
imageUrl,
|
|
26
|
-
duration: options.duration,
|
|
27
|
-
});
|
|
28
|
-
|
|
29
|
-
const videoUrl = result.data?.video?.url;
|
|
30
|
-
if (!videoUrl) {
|
|
31
|
-
throw new Error("no video url in result");
|
|
32
|
-
}
|
|
33
|
-
|
|
34
|
-
let uploaded: string | undefined;
|
|
35
|
-
if (options.upload) {
|
|
36
|
-
const timestamp = Date.now();
|
|
37
|
-
const objectKey = `videos/generated/${timestamp}.mp4`;
|
|
38
|
-
uploaded = await uploadFromUrl(videoUrl, objectKey);
|
|
39
|
-
console.log(`[service/video] uploaded to ${uploaded}`);
|
|
40
|
-
}
|
|
41
|
-
|
|
42
|
-
return {
|
|
43
|
-
videoUrl,
|
|
44
|
-
duration: result.data?.duration,
|
|
45
|
-
uploaded,
|
|
46
|
-
};
|
|
47
|
-
}
|
|
48
|
-
|
|
49
|
-
export async function generateVideoFromText(
|
|
50
|
-
prompt: string,
|
|
51
|
-
options: { duration?: 5 | 10; upload?: boolean } = {},
|
|
52
|
-
): Promise<VideoGenerationResult> {
|
|
53
|
-
console.log("[service/video] generating video from text");
|
|
54
|
-
|
|
55
|
-
const result = await textToVideo({
|
|
56
|
-
prompt,
|
|
57
|
-
duration: options.duration,
|
|
58
|
-
});
|
|
59
|
-
|
|
60
|
-
const videoUrl = result.data?.video?.url;
|
|
61
|
-
if (!videoUrl) {
|
|
62
|
-
throw new Error("no video url in result");
|
|
63
|
-
}
|
|
64
|
-
|
|
65
|
-
let uploaded: string | undefined;
|
|
66
|
-
if (options.upload) {
|
|
67
|
-
const timestamp = Date.now();
|
|
68
|
-
const objectKey = `videos/generated/${timestamp}.mp4`;
|
|
69
|
-
uploaded = await uploadFromUrl(videoUrl, objectKey);
|
|
70
|
-
console.log(`[service/video] uploaded to ${uploaded}`);
|
|
71
|
-
}
|
|
72
|
-
|
|
73
|
-
return {
|
|
74
|
-
videoUrl,
|
|
75
|
-
duration: result.data?.duration,
|
|
76
|
-
uploaded,
|
|
77
|
-
};
|
|
78
|
-
}
|
|
79
|
-
|
|
80
|
-
// cli runner
|
|
81
|
-
if (import.meta.main) {
|
|
82
|
-
const [command, ...args] = process.argv.slice(2);
|
|
83
|
-
|
|
84
|
-
switch (command) {
|
|
85
|
-
case "from_image": {
|
|
86
|
-
if (!args[0] || !args[1]) {
|
|
87
|
-
console.log(`
|
|
88
|
-
usage:
|
|
89
|
-
bun run service/video.ts from_image <prompt> <imageUrl> [duration] [upload]
|
|
90
|
-
`);
|
|
91
|
-
process.exit(1);
|
|
92
|
-
}
|
|
93
|
-
const duration = args[2];
|
|
94
|
-
if (duration && duration !== "5" && duration !== "10") {
|
|
95
|
-
console.error("duration must be 5 or 10");
|
|
96
|
-
process.exit(1);
|
|
97
|
-
}
|
|
98
|
-
const imgResult = await generateVideoFromImage(args[0], args[1], {
|
|
99
|
-
duration: duration === "10" ? 10 : 5,
|
|
100
|
-
upload: args[3] === "true",
|
|
101
|
-
});
|
|
102
|
-
console.log(JSON.stringify(imgResult, null, 2));
|
|
103
|
-
break;
|
|
104
|
-
}
|
|
105
|
-
|
|
106
|
-
case "from_text": {
|
|
107
|
-
if (!args[0]) {
|
|
108
|
-
console.log(`
|
|
109
|
-
usage:
|
|
110
|
-
bun run service/video.ts from_text <prompt> [duration] [upload]
|
|
111
|
-
`);
|
|
112
|
-
process.exit(1);
|
|
113
|
-
}
|
|
114
|
-
const duration = args[1];
|
|
115
|
-
if (duration && duration !== "5" && duration !== "10") {
|
|
116
|
-
console.error("duration must be 5 or 10");
|
|
117
|
-
process.exit(1);
|
|
118
|
-
}
|
|
119
|
-
const txtResult = await generateVideoFromText(args[0], {
|
|
120
|
-
duration: duration === "10" ? 10 : 5,
|
|
121
|
-
upload: args[2] === "true",
|
|
122
|
-
});
|
|
123
|
-
console.log(JSON.stringify(txtResult, null, 2));
|
|
124
|
-
break;
|
|
125
|
-
}
|
|
126
|
-
|
|
127
|
-
default:
|
|
128
|
-
console.log(`
|
|
129
|
-
usage:
|
|
130
|
-
bun run service/video.ts from_image <prompt> <imageUrl> [duration] [upload]
|
|
131
|
-
bun run service/video.ts from_text <prompt> [duration] [upload]
|
|
132
|
-
`);
|
|
133
|
-
process.exit(1);
|
|
134
|
-
}
|
|
135
|
-
}
|
package/action/voice/SKILL.md
DELETED
|
@@ -1,125 +0,0 @@
|
|
|
1
|
-
---
|
|
2
|
-
name: voice-synthesis
|
|
3
|
-
description: generate realistic text-to-speech audio using elevenlabs with multiple voice options. use when user needs voiceovers, narration, character voices, or audio for lipsync videos.
|
|
4
|
-
allowed-tools: Read, Bash
|
|
5
|
-
---
|
|
6
|
-
|
|
7
|
-
# voice synthesis
|
|
8
|
-
|
|
9
|
-
generate high-quality text-to-speech audio with elevenlabs.
|
|
10
|
-
|
|
11
|
-
## available voices
|
|
12
|
-
|
|
13
|
-
- **rachel** - clear, professional female voice
|
|
14
|
-
- **domi** - warm, friendly female voice
|
|
15
|
-
- **bella** - energetic female voice
|
|
16
|
-
- **antoni** - friendly male voice
|
|
17
|
-
- **elli** - young, clear female voice
|
|
18
|
-
- **josh** - deep, clear male voice
|
|
19
|
-
- **arnold** - strong, authoritative male voice
|
|
20
|
-
- **adam** - natural, conversational male voice
|
|
21
|
-
- **sam** - raspy, character male voice
|
|
22
|
-
|
|
23
|
-
## usage
|
|
24
|
-
|
|
25
|
-
### generate voice
|
|
26
|
-
```bash
|
|
27
|
-
bun run service/voice.ts generate <text> [voice] [provider] [upload]
|
|
28
|
-
```
|
|
29
|
-
|
|
30
|
-
**parameters:**
|
|
31
|
-
- `text` (required): text to convert to speech
|
|
32
|
-
- `voice` (optional): voice name (default: rachel)
|
|
33
|
-
- `provider` (optional): elevenlabs (default)
|
|
34
|
-
- `upload` (optional): "true" to upload to s3
|
|
35
|
-
|
|
36
|
-
**example:**
|
|
37
|
-
```bash
|
|
38
|
-
bun run service/voice.ts generate "hello world, this is my voice" rachel elevenlabs true
|
|
39
|
-
```
|
|
40
|
-
|
|
41
|
-
### shorthand for elevenlabs
|
|
42
|
-
```bash
|
|
43
|
-
bun run service/voice.ts elevenlabs <text> [voice] [upload]
|
|
44
|
-
```
|
|
45
|
-
|
|
46
|
-
**example:**
|
|
47
|
-
```bash
|
|
48
|
-
bun run service/voice.ts elevenlabs "welcome to our video" josh true
|
|
49
|
-
```
|
|
50
|
-
|
|
51
|
-
## as library
|
|
52
|
-
|
|
53
|
-
```typescript
|
|
54
|
-
import { generateVoice } from "./service/voice"
|
|
55
|
-
|
|
56
|
-
const result = await generateVoice({
|
|
57
|
-
text: "hello world",
|
|
58
|
-
voice: "rachel",
|
|
59
|
-
provider: "elevenlabs",
|
|
60
|
-
upload: true,
|
|
61
|
-
outputPath: "media/voiceover.mp3"
|
|
62
|
-
})
|
|
63
|
-
|
|
64
|
-
console.log(result.provider)
|
|
65
|
-
console.log(result.voiceId)
|
|
66
|
-
console.log(result.uploadUrl)
|
|
67
|
-
```
|
|
68
|
-
|
|
69
|
-
## output
|
|
70
|
-
|
|
71
|
-
returns `VoiceResult`:
|
|
72
|
-
```typescript
|
|
73
|
-
{
|
|
74
|
-
audio: Buffer, // raw audio buffer
|
|
75
|
-
provider: string, // "elevenlabs"
|
|
76
|
-
voiceId: string, // actual voice id used
|
|
77
|
-
uploadUrl?: string // s3 url if upload requested
|
|
78
|
-
}
|
|
79
|
-
```
|
|
80
|
-
|
|
81
|
-
saves audio file to `media/voice-{timestamp}.mp3`
|
|
82
|
-
|
|
83
|
-
## when to use
|
|
84
|
-
|
|
85
|
-
use this skill when:
|
|
86
|
-
- creating voiceovers for videos
|
|
87
|
-
- generating narration or character dialogue
|
|
88
|
-
- preparing audio for lipsync videos
|
|
89
|
-
- need text-to-speech for talking character pipeline
|
|
90
|
-
- testing different voice options
|
|
91
|
-
|
|
92
|
-
## tips
|
|
93
|
-
|
|
94
|
-
**voice selection:**
|
|
95
|
-
- use **rachel** or **josh** for professional narration
|
|
96
|
-
- use **bella** or **antoni** for friendly, casual content
|
|
97
|
-
- use **arnold** for authoritative or dramatic content
|
|
98
|
-
- use **sam** for character or stylized voices
|
|
99
|
-
|
|
100
|
-
**text formatting:**
|
|
101
|
-
- add punctuation for natural pauses
|
|
102
|
-
- use shorter sentences for clearer speech
|
|
103
|
-
- spell out numbers and abbreviations
|
|
104
|
-
|
|
105
|
-
## integration with other services
|
|
106
|
-
|
|
107
|
-
perfect companion for:
|
|
108
|
-
- **lipsync service** - sync generated voice with video
|
|
109
|
-
- **video generation** - create talking character videos
|
|
110
|
-
- **captions service** - auto-generate subtitles from voiceover
|
|
111
|
-
|
|
112
|
-
## environment variables
|
|
113
|
-
|
|
114
|
-
required:
|
|
115
|
-
- `ELEVENLABS_API_KEY` - for voice generation
|
|
116
|
-
|
|
117
|
-
optional (for s3 upload):
|
|
118
|
-
- `CLOUDFLARE_R2_API_URL`
|
|
119
|
-
- `CLOUDFLARE_ACCESS_KEY_ID`
|
|
120
|
-
- `CLOUDFLARE_ACCESS_SECRET`
|
|
121
|
-
- `CLOUDFLARE_R2_BUCKET`
|
|
122
|
-
|
|
123
|
-
## generation time
|
|
124
|
-
|
|
125
|
-
expect 5-15 seconds depending on text length
|
package/action/voice/index.ts
DELETED
|
@@ -1,201 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env bun
|
|
2
|
-
|
|
3
|
-
/**
|
|
4
|
-
* voice service - high-level voice generation combining multiple providers
|
|
5
|
-
* supports elevenlabs and future providers
|
|
6
|
-
*/
|
|
7
|
-
|
|
8
|
-
import { textToSpeech, VOICES } from "../../lib/elevenlabs";
|
|
9
|
-
import { uploadFile } from "../../utilities/s3";
|
|
10
|
-
|
|
11
|
-
// types
|
|
12
|
-
export interface GenerateVoiceOptions {
|
|
13
|
-
text: string;
|
|
14
|
-
voice?: string;
|
|
15
|
-
provider?: "elevenlabs";
|
|
16
|
-
upload?: boolean;
|
|
17
|
-
outputPath?: string;
|
|
18
|
-
}
|
|
19
|
-
|
|
20
|
-
export interface VoiceResult {
|
|
21
|
-
audio: Buffer;
|
|
22
|
-
provider: string;
|
|
23
|
-
voiceId: string;
|
|
24
|
-
uploadUrl?: string;
|
|
25
|
-
}
|
|
26
|
-
|
|
27
|
-
// core functions
|
|
28
|
-
export async function generateVoice(
|
|
29
|
-
options: GenerateVoiceOptions,
|
|
30
|
-
): Promise<VoiceResult> {
|
|
31
|
-
const {
|
|
32
|
-
text,
|
|
33
|
-
voice = "rachel",
|
|
34
|
-
provider = "elevenlabs",
|
|
35
|
-
upload = false,
|
|
36
|
-
outputPath,
|
|
37
|
-
} = options;
|
|
38
|
-
|
|
39
|
-
if (!text) {
|
|
40
|
-
throw new Error("text is required");
|
|
41
|
-
}
|
|
42
|
-
|
|
43
|
-
console.log(`[voice] generating with ${provider} (${voice})...`);
|
|
44
|
-
|
|
45
|
-
let audio: Buffer;
|
|
46
|
-
let voiceId: string;
|
|
47
|
-
|
|
48
|
-
switch (provider) {
|
|
49
|
-
case "elevenlabs": {
|
|
50
|
-
// map friendly names to voice ids
|
|
51
|
-
const voiceMap: Record<string, string> = {
|
|
52
|
-
rachel: VOICES.RACHEL,
|
|
53
|
-
domi: VOICES.DOMI,
|
|
54
|
-
bella: VOICES.BELLA,
|
|
55
|
-
antoni: VOICES.ANTONI,
|
|
56
|
-
elli: VOICES.ELLI,
|
|
57
|
-
josh: VOICES.JOSH,
|
|
58
|
-
arnold: VOICES.ARNOLD,
|
|
59
|
-
adam: VOICES.ADAM,
|
|
60
|
-
sam: VOICES.SAM,
|
|
61
|
-
};
|
|
62
|
-
|
|
63
|
-
voiceId = voiceMap[voice.toLowerCase()] || voice;
|
|
64
|
-
|
|
65
|
-
audio = await textToSpeech({
|
|
66
|
-
text,
|
|
67
|
-
voiceId,
|
|
68
|
-
outputPath,
|
|
69
|
-
});
|
|
70
|
-
break;
|
|
71
|
-
}
|
|
72
|
-
|
|
73
|
-
default:
|
|
74
|
-
throw new Error(`unsupported provider: ${provider}`);
|
|
75
|
-
}
|
|
76
|
-
|
|
77
|
-
const result: VoiceResult = {
|
|
78
|
-
audio,
|
|
79
|
-
provider,
|
|
80
|
-
voiceId,
|
|
81
|
-
};
|
|
82
|
-
|
|
83
|
-
// upload to s3 if requested
|
|
84
|
-
if (upload && outputPath) {
|
|
85
|
-
const objectKey = `voice/${Date.now()}-${voice}.mp3`;
|
|
86
|
-
const uploadUrl = await uploadFile(outputPath, objectKey);
|
|
87
|
-
result.uploadUrl = uploadUrl;
|
|
88
|
-
console.log(`[voice] uploaded to ${uploadUrl}`);
|
|
89
|
-
}
|
|
90
|
-
|
|
91
|
-
return result;
|
|
92
|
-
}
|
|
93
|
-
|
|
94
|
-
// cli
|
|
95
|
-
async function cli() {
|
|
96
|
-
const args = process.argv.slice(2);
|
|
97
|
-
const command = args[0];
|
|
98
|
-
|
|
99
|
-
if (!command || command === "help") {
|
|
100
|
-
console.log(`
|
|
101
|
-
usage:
|
|
102
|
-
bun run service/voice.ts <command> [args]
|
|
103
|
-
|
|
104
|
-
commands:
|
|
105
|
-
generate <text> [voice] [provider] [upload] generate voice from text
|
|
106
|
-
elevenlabs <text> [voice] [upload] generate with elevenlabs
|
|
107
|
-
help show this help
|
|
108
|
-
|
|
109
|
-
examples:
|
|
110
|
-
bun run service/voice.ts generate "hello world" rachel elevenlabs false
|
|
111
|
-
bun run service/voice.ts elevenlabs "hello world" josh true
|
|
112
|
-
bun run service/voice.ts generate "welcome to ai" bella
|
|
113
|
-
|
|
114
|
-
available voices:
|
|
115
|
-
rachel, domi, bella, antoni, elli, josh, arnold, adam, sam
|
|
116
|
-
|
|
117
|
-
providers:
|
|
118
|
-
elevenlabs (default)
|
|
119
|
-
|
|
120
|
-
environment:
|
|
121
|
-
ELEVENLABS_API_KEY - required for elevenlabs
|
|
122
|
-
CLOUDFLARE_* - required for upload
|
|
123
|
-
`);
|
|
124
|
-
process.exit(0);
|
|
125
|
-
}
|
|
126
|
-
|
|
127
|
-
try {
|
|
128
|
-
switch (command) {
|
|
129
|
-
case "generate": {
|
|
130
|
-
const text = args[1];
|
|
131
|
-
const voice = args[2];
|
|
132
|
-
const provider = (args[3] || "elevenlabs") as "elevenlabs";
|
|
133
|
-
const upload = args[4] === "true";
|
|
134
|
-
|
|
135
|
-
if (!text) {
|
|
136
|
-
throw new Error("text is required");
|
|
137
|
-
}
|
|
138
|
-
|
|
139
|
-
const outputPath = `media/voice-${Date.now()}.mp3`;
|
|
140
|
-
|
|
141
|
-
const result = await generateVoice({
|
|
142
|
-
text,
|
|
143
|
-
voice,
|
|
144
|
-
provider,
|
|
145
|
-
upload,
|
|
146
|
-
outputPath,
|
|
147
|
-
});
|
|
148
|
-
|
|
149
|
-
console.log(`[voice] result:`, {
|
|
150
|
-
provider: result.provider,
|
|
151
|
-
voiceId: result.voiceId,
|
|
152
|
-
audioSize: result.audio.length,
|
|
153
|
-
outputPath,
|
|
154
|
-
uploadUrl: result.uploadUrl,
|
|
155
|
-
});
|
|
156
|
-
break;
|
|
157
|
-
}
|
|
158
|
-
|
|
159
|
-
case "elevenlabs": {
|
|
160
|
-
const text = args[1];
|
|
161
|
-
const voice = args[2];
|
|
162
|
-
const upload = args[3] === "true";
|
|
163
|
-
|
|
164
|
-
if (!text) {
|
|
165
|
-
throw new Error("text is required");
|
|
166
|
-
}
|
|
167
|
-
|
|
168
|
-
const outputPath = `media/voice-${Date.now()}.mp3`;
|
|
169
|
-
|
|
170
|
-
const result = await generateVoice({
|
|
171
|
-
text,
|
|
172
|
-
voice,
|
|
173
|
-
provider: "elevenlabs",
|
|
174
|
-
upload,
|
|
175
|
-
outputPath,
|
|
176
|
-
});
|
|
177
|
-
|
|
178
|
-
console.log(`[voice] result:`, {
|
|
179
|
-
provider: result.provider,
|
|
180
|
-
voiceId: result.voiceId,
|
|
181
|
-
audioSize: result.audio.length,
|
|
182
|
-
outputPath,
|
|
183
|
-
uploadUrl: result.uploadUrl,
|
|
184
|
-
});
|
|
185
|
-
break;
|
|
186
|
-
}
|
|
187
|
-
|
|
188
|
-
default:
|
|
189
|
-
console.error(`unknown command: ${command}`);
|
|
190
|
-
console.log(`run 'bun run service/voice.ts help' for usage`);
|
|
191
|
-
process.exit(1);
|
|
192
|
-
}
|
|
193
|
-
} catch (error) {
|
|
194
|
-
console.error(`[voice] error:`, error);
|
|
195
|
-
process.exit(1);
|
|
196
|
-
}
|
|
197
|
-
}
|
|
198
|
-
|
|
199
|
-
if (import.meta.main) {
|
|
200
|
-
cli();
|
|
201
|
-
}
|
package/index.ts
DELETED
|
@@ -1,38 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* varg.ai sdk
|
|
3
|
-
* video generation and editing tools
|
|
4
|
-
*/
|
|
5
|
-
|
|
6
|
-
// re-export external clients
|
|
7
|
-
export { fal } from "@ai-sdk/fal";
|
|
8
|
-
export { replicate } from "@ai-sdk/replicate";
|
|
9
|
-
export { fal as falClient } from "@fal-ai/client";
|
|
10
|
-
export { HiggsfieldClient } from "@higgsfield/client";
|
|
11
|
-
// lib exports - ai-sdk/fal (provider)
|
|
12
|
-
export * as aiSdkFal from "./lib/ai-sdk/fal";
|
|
13
|
-
// lib exports - ai-sdk/replicate (provider)
|
|
14
|
-
export * as aiSdkReplicate from "./lib/ai-sdk/replicate";
|
|
15
|
-
// lib exports - elevenlabs
|
|
16
|
-
export * from "./lib/elevenlabs";
|
|
17
|
-
// lib exports - fal (client)
|
|
18
|
-
export * from "./lib/fal";
|
|
19
|
-
// lib exports - ffmpeg
|
|
20
|
-
export * from "./lib/ffmpeg";
|
|
21
|
-
// lib exports - fireworks
|
|
22
|
-
export * from "./lib/fireworks";
|
|
23
|
-
// lib exports - groq
|
|
24
|
-
export * from "./lib/groq";
|
|
25
|
-
// lib exports - higgsfield
|
|
26
|
-
export * from "./lib/higgsfield";
|
|
27
|
-
// lib exports - replicate
|
|
28
|
-
export * from "./lib/replicate";
|
|
29
|
-
// service exports
|
|
30
|
-
export * from "./service/captions";
|
|
31
|
-
export * from "./service/edit";
|
|
32
|
-
export * from "./service/image";
|
|
33
|
-
export * from "./service/sync";
|
|
34
|
-
export * from "./service/transcribe";
|
|
35
|
-
export * from "./service/video";
|
|
36
|
-
export * from "./service/voice";
|
|
37
|
-
// utilities exports
|
|
38
|
-
export * from "./utilities/s3";
|