vargai 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/settings.local.json +7 -0
- package/.env.example +27 -0
- package/.github/workflows/ci.yml +23 -0
- package/.husky/README.md +102 -0
- package/.husky/commit-msg +6 -0
- package/.husky/pre-commit +9 -0
- package/.husky/pre-push +6 -0
- package/.size-limit.json +8 -0
- package/.test-hooks.ts +5 -0
- package/CLAUDE.md +125 -0
- package/CONTRIBUTING.md +150 -0
- package/LICENSE.md +53 -0
- package/README.md +78 -0
- package/SKILLS.md +173 -0
- package/STRUCTURE.md +92 -0
- package/biome.json +34 -0
- package/bun.lock +1254 -0
- package/commitlint.config.js +22 -0
- package/docs/plan.md +66 -0
- package/docs/todo.md +14 -0
- package/docs/varg-sdk.md +812 -0
- package/ffmpeg/CLAUDE.md +68 -0
- package/package.json +69 -0
- package/pipeline/cookbooks/SKILL.md +285 -0
- package/pipeline/cookbooks/remotion-video.md +585 -0
- package/pipeline/cookbooks/round-video-character.md +337 -0
- package/pipeline/cookbooks/scripts/animate-frames-parallel.ts +84 -0
- package/pipeline/cookbooks/scripts/combine-scenes.sh +53 -0
- package/pipeline/cookbooks/scripts/generate-frames-parallel.ts +99 -0
- package/pipeline/cookbooks/scripts/still-to-video.sh +37 -0
- package/pipeline/cookbooks/talking-character.md +59 -0
- package/pipeline/cookbooks/text-to-tiktok.md +669 -0
- package/pipeline/cookbooks/trendwatching.md +156 -0
- package/plan.md +281 -0
- package/scripts/.gitkeep +0 -0
- package/src/ai-sdk/cache.ts +142 -0
- package/src/ai-sdk/examples/cached-generation.ts +53 -0
- package/src/ai-sdk/examples/duet-scene-4.ts +53 -0
- package/src/ai-sdk/examples/duet-scene-5-audio.ts +32 -0
- package/src/ai-sdk/examples/duet-video.ts +56 -0
- package/src/ai-sdk/examples/editly-composition.ts +63 -0
- package/src/ai-sdk/examples/editly-test.ts +57 -0
- package/src/ai-sdk/examples/editly-video-test.ts +52 -0
- package/src/ai-sdk/examples/fal-lipsync.ts +43 -0
- package/src/ai-sdk/examples/higgsfield-image.ts +61 -0
- package/src/ai-sdk/examples/music-generation.ts +19 -0
- package/src/ai-sdk/examples/openai-sora.ts +34 -0
- package/src/ai-sdk/examples/replicate-bg-removal.ts +52 -0
- package/src/ai-sdk/examples/simpsons-scene.ts +61 -0
- package/src/ai-sdk/examples/talking-lion.ts +55 -0
- package/src/ai-sdk/examples/video-generation.ts +39 -0
- package/src/ai-sdk/examples/workflow-animated-girl.ts +104 -0
- package/src/ai-sdk/examples/workflow-before-after.ts +114 -0
- package/src/ai-sdk/examples/workflow-character-grid.ts +112 -0
- package/src/ai-sdk/examples/workflow-slideshow.ts +161 -0
- package/src/ai-sdk/file-cache.ts +112 -0
- package/src/ai-sdk/file.ts +238 -0
- package/src/ai-sdk/generate-element.ts +92 -0
- package/src/ai-sdk/generate-music.ts +46 -0
- package/src/ai-sdk/generate-video.ts +165 -0
- package/src/ai-sdk/index.ts +72 -0
- package/src/ai-sdk/music-model.ts +110 -0
- package/src/ai-sdk/providers/editly/editly.test.ts +1108 -0
- package/src/ai-sdk/providers/editly/ffmpeg.ts +60 -0
- package/src/ai-sdk/providers/editly/index.ts +817 -0
- package/src/ai-sdk/providers/editly/layers.ts +772 -0
- package/src/ai-sdk/providers/editly/plan.md +144 -0
- package/src/ai-sdk/providers/editly/types.ts +328 -0
- package/src/ai-sdk/providers/elevenlabs-provider.ts +255 -0
- package/src/ai-sdk/providers/fal-provider.ts +512 -0
- package/src/ai-sdk/providers/higgsfield.ts +379 -0
- package/src/ai-sdk/providers/openai.ts +251 -0
- package/src/ai-sdk/providers/replicate.ts +16 -0
- package/src/ai-sdk/video-model.ts +185 -0
- package/src/cli/commands/find.tsx +137 -0
- package/src/cli/commands/help.tsx +85 -0
- package/src/cli/commands/index.ts +9 -0
- package/src/cli/commands/list.tsx +238 -0
- package/src/cli/commands/run.tsx +511 -0
- package/src/cli/commands/which.tsx +253 -0
- package/src/cli/index.ts +112 -0
- package/src/cli/quiet.ts +44 -0
- package/src/cli/types.ts +32 -0
- package/src/cli/ui/components/Badge.tsx +29 -0
- package/src/cli/ui/components/DataTable.tsx +51 -0
- package/src/cli/ui/components/Header.tsx +23 -0
- package/src/cli/ui/components/HelpBlock.tsx +44 -0
- package/src/cli/ui/components/KeyValue.tsx +33 -0
- package/src/cli/ui/components/OptionRow.tsx +81 -0
- package/src/cli/ui/components/Separator.tsx +23 -0
- package/src/cli/ui/components/StatusBox.tsx +108 -0
- package/src/cli/ui/components/VargBox.tsx +51 -0
- package/src/cli/ui/components/VargProgress.tsx +36 -0
- package/src/cli/ui/components/VargSpinner.tsx +34 -0
- package/src/cli/ui/components/VargText.tsx +56 -0
- package/src/cli/ui/components/index.ts +19 -0
- package/src/cli/ui/index.ts +12 -0
- package/src/cli/ui/render.ts +35 -0
- package/src/cli/ui/theme.ts +63 -0
- package/src/cli/utils.ts +78 -0
- package/src/core/executor/executor.ts +201 -0
- package/src/core/executor/index.ts +13 -0
- package/src/core/executor/job.ts +214 -0
- package/src/core/executor/pipeline.ts +222 -0
- package/src/core/index.ts +11 -0
- package/src/core/registry/index.ts +9 -0
- package/src/core/registry/loader.ts +149 -0
- package/src/core/registry/registry.ts +221 -0
- package/src/core/registry/resolver.ts +206 -0
- package/src/core/schema/helpers.ts +134 -0
- package/src/core/schema/index.ts +8 -0
- package/src/core/schema/shared.ts +102 -0
- package/src/core/schema/types.ts +279 -0
- package/src/core/schema/validator.ts +92 -0
- package/src/definitions/actions/captions.ts +261 -0
- package/src/definitions/actions/edit.ts +298 -0
- package/src/definitions/actions/image.ts +125 -0
- package/src/definitions/actions/index.ts +114 -0
- package/src/definitions/actions/music.ts +205 -0
- package/src/definitions/actions/sync.ts +128 -0
- package/src/definitions/actions/transcribe.ts +200 -0
- package/src/definitions/actions/upload.ts +111 -0
- package/src/definitions/actions/video.ts +163 -0
- package/src/definitions/actions/voice.ts +119 -0
- package/src/definitions/index.ts +23 -0
- package/src/definitions/models/elevenlabs.ts +50 -0
- package/src/definitions/models/flux.ts +56 -0
- package/src/definitions/models/index.ts +36 -0
- package/src/definitions/models/kling.ts +56 -0
- package/src/definitions/models/llama.ts +54 -0
- package/src/definitions/models/nano-banana-pro.ts +102 -0
- package/src/definitions/models/sonauto.ts +68 -0
- package/src/definitions/models/soul.ts +65 -0
- package/src/definitions/models/wan.ts +54 -0
- package/src/definitions/models/whisper.ts +44 -0
- package/src/definitions/skills/index.ts +12 -0
- package/src/definitions/skills/talking-character.ts +87 -0
- package/src/definitions/skills/text-to-tiktok.ts +97 -0
- package/src/index.ts +118 -0
- package/src/providers/apify.ts +269 -0
- package/src/providers/base.ts +264 -0
- package/src/providers/elevenlabs.ts +217 -0
- package/src/providers/fal.ts +392 -0
- package/src/providers/ffmpeg.ts +544 -0
- package/src/providers/fireworks.ts +193 -0
- package/src/providers/groq.ts +149 -0
- package/src/providers/higgsfield.ts +145 -0
- package/src/providers/index.ts +143 -0
- package/src/providers/replicate.ts +147 -0
- package/src/providers/storage.ts +206 -0
- package/src/tests/all.test.ts +509 -0
- package/src/tests/index.ts +33 -0
- package/src/tests/unit.test.ts +403 -0
- package/tsconfig.json +45 -0
package/ffmpeg/CLAUDE.md
ADDED
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
# ffmpeg video mixing lessons
|
|
2
|
+
|
|
3
|
+
## problem: audio/video desync when mixing clips
|
|
4
|
+
|
|
5
|
+
### what went wrong
|
|
6
|
+
|
|
7
|
+
using `-ss X -t Y` to pre-trim input, then applying relative trim filters caused timing drift:
|
|
8
|
+
|
|
9
|
+
```bash
|
|
10
|
+
# BAD: relative timestamps after pre-trim
|
|
11
|
+
ffmpeg -ss 64 -t 36 -i original.mp4 ...
|
|
12
|
+
-filter_complex "
|
|
13
|
+
[0:v]split=5[orig1][orig2]...;
|
|
14
|
+
[orig1]trim=0:4,setpts=PTS-STARTPTS[o1];
|
|
15
|
+
[orig2]trim=4:11,setpts=PTS-STARTPTS[o2];
|
|
16
|
+
..."
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
this produced wrong duration (38s instead of 36s) with audio desync.
|
|
20
|
+
|
|
21
|
+
### solution: use absolute timestamps from full input
|
|
22
|
+
|
|
23
|
+
trim directly from full original using absolute timestamps:
|
|
24
|
+
|
|
25
|
+
```bash
|
|
26
|
+
# GOOD: absolute timestamps from full file
|
|
27
|
+
ffmpeg -i original.mp4 -i scene1.mp4 -i scene2.mp4 ...
|
|
28
|
+
-filter_complex "
|
|
29
|
+
[0:v]trim=64:68,setpts=PTS-STARTPTS[o1];
|
|
30
|
+
[1:v]scale=1280:720,trim=4:6,setpts=PTS-STARTPTS[s1];
|
|
31
|
+
[0:v]trim=70:75,setpts=PTS-STARTPTS[o2];
|
|
32
|
+
...
|
|
33
|
+
[o1][s1][o2]...concat=n=N:v=1:a=0[outv];
|
|
34
|
+
[0:a]atrim=64:100,asetpts=PTS-STARTPTS[outa]
|
|
35
|
+
"
|
|
36
|
+
-map "[outv]" -map "[outa]"
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
### key points
|
|
40
|
+
|
|
41
|
+
1. **absolute timestamps**: trim from full input file, not pre-trimmed
|
|
42
|
+
2. **separate audio handling**: use `atrim` on audio stream independently
|
|
43
|
+
3. **setpts reset**: always use `setpts=PTS-STARTPTS` after trim to reset timestamps
|
|
44
|
+
4. **scale before trim**: when mixing different resolutions, scale first then trim
|
|
45
|
+
5. **video duration = audio duration**: ensure total video segments match audio segment length
|
|
46
|
+
|
|
47
|
+
### example: inserting clips into original
|
|
48
|
+
|
|
49
|
+
to insert generated clips at specific timestamps while keeping continuous audio:
|
|
50
|
+
|
|
51
|
+
```bash
|
|
52
|
+
ffmpeg -y \
|
|
53
|
+
-i original.mp4 \
|
|
54
|
+
-i generated-scene.mp4 \
|
|
55
|
+
-filter_complex "
|
|
56
|
+
[0:v]trim=START1:END1,setpts=PTS-STARTPTS[o1];
|
|
57
|
+
[1:v]scale=1280:720,trim=0:DURATION,setpts=PTS-STARTPTS[s1];
|
|
58
|
+
[0:v]trim=START2:END2,setpts=PTS-STARTPTS[o2];
|
|
59
|
+
[o1][s1][o2]concat=n=3:v=1:a=0[outv];
|
|
60
|
+
[0:a]atrim=AUDIO_START:AUDIO_END,asetpts=PTS-STARTPTS[outa]
|
|
61
|
+
" \
|
|
62
|
+
-map "[outv]" -map "[outa]" \
|
|
63
|
+
-c:v libx264 -preset fast -crf 18 \
|
|
64
|
+
-c:a aac -b:a 192k \
|
|
65
|
+
output.mp4
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
timestamps must add up: `(END1-START1) + DURATION + (END2-START2) = AUDIO_END - AUDIO_START`
|
package/package.json
ADDED
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "vargai",
|
|
3
|
+
"module": "src/index.ts",
|
|
4
|
+
"type": "module",
|
|
5
|
+
"bin": {
|
|
6
|
+
"varg": "./src/cli/index.ts"
|
|
7
|
+
},
|
|
8
|
+
"scripts": {
|
|
9
|
+
"check": "biome check . && tsc --noEmit",
|
|
10
|
+
"lint": "biome check .",
|
|
11
|
+
"format": "biome format --write .",
|
|
12
|
+
"type-check": "tsc --noEmit",
|
|
13
|
+
"prepare": "husky install",
|
|
14
|
+
"size": "size-limit"
|
|
15
|
+
},
|
|
16
|
+
"lint-staged": {
|
|
17
|
+
"*.{js,ts,tsx}": [
|
|
18
|
+
"biome check --write --no-errors-on-unmatched"
|
|
19
|
+
],
|
|
20
|
+
"*.json": [
|
|
21
|
+
"biome format --write"
|
|
22
|
+
]
|
|
23
|
+
},
|
|
24
|
+
"devDependencies": {
|
|
25
|
+
"@biomejs/biome": "^2.3.7",
|
|
26
|
+
"@commitlint/cli": "^20.1.0",
|
|
27
|
+
"@commitlint/config-conventional": "^20.0.0",
|
|
28
|
+
"@size-limit/preset-small-lib": "^11.2.0",
|
|
29
|
+
"@types/bun": "latest",
|
|
30
|
+
"@types/react": "^19.2.7",
|
|
31
|
+
"husky": "^9.1.7",
|
|
32
|
+
"lint-staged": "^16.2.7"
|
|
33
|
+
},
|
|
34
|
+
"peerDependencies": {
|
|
35
|
+
"typescript": "^5"
|
|
36
|
+
},
|
|
37
|
+
"dependencies": {
|
|
38
|
+
"@ai-sdk/fal": "^1.0.23",
|
|
39
|
+
"@ai-sdk/openai": "^3.0.9",
|
|
40
|
+
"@ai-sdk/provider": "^3.0.2",
|
|
41
|
+
"@ai-sdk/replicate": "^2.0.5",
|
|
42
|
+
"@aws-sdk/client-s3": "^3.937.0",
|
|
43
|
+
"@aws-sdk/s3-request-presigner": "^3.937.0",
|
|
44
|
+
"@elevenlabs/elevenlabs-js": "^2.28.0",
|
|
45
|
+
"@fal-ai/client": "^1.7.2",
|
|
46
|
+
"@higgsfield/client": "^0.1.2",
|
|
47
|
+
"@inkjs/ui": "^2.0.0",
|
|
48
|
+
"@remotion/cli": "^4.0.377",
|
|
49
|
+
"@types/fluent-ffmpeg": "^2.1.28",
|
|
50
|
+
"ai": "^6.0.26",
|
|
51
|
+
"apify-client": "^2.20.0",
|
|
52
|
+
"citty": "^0.1.6",
|
|
53
|
+
"fluent-ffmpeg": "^2.1.3",
|
|
54
|
+
"groq-sdk": "^0.36.0",
|
|
55
|
+
"ink": "^6.5.1",
|
|
56
|
+
"react": "^19.2.0",
|
|
57
|
+
"react-dom": "^19.2.0",
|
|
58
|
+
"remotion": "^4.0.377",
|
|
59
|
+
"replicate": "^1.4.0",
|
|
60
|
+
"zod": "^4.2.1"
|
|
61
|
+
},
|
|
62
|
+
"version": "0.3.0",
|
|
63
|
+
"exports": {
|
|
64
|
+
".": "./src/index.ts",
|
|
65
|
+
"./core": "./src/core/index.ts",
|
|
66
|
+
"./providers": "./src/providers/index.ts",
|
|
67
|
+
"./definitions": "./src/definitions/index.ts"
|
|
68
|
+
}
|
|
69
|
+
}
|
|
@@ -0,0 +1,285 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: talking-character-pipeline
|
|
3
|
+
description: complete workflow to create talking character videos with lipsync and captions. use when creating ai character videos, talking avatars, narrated content, or social media character content with voiceover.
|
|
4
|
+
allowed-tools: Read, Bash
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
# talking character pipeline
|
|
8
|
+
|
|
9
|
+
create professional talking character videos from scratch using the complete varg.ai sdk workflow.
|
|
10
|
+
|
|
11
|
+
## overview
|
|
12
|
+
|
|
13
|
+
this pipeline combines multiple services to create a fully produced talking character video:
|
|
14
|
+
1. character headshot generation
|
|
15
|
+
2. voiceover synthesis
|
|
16
|
+
3. character animation
|
|
17
|
+
4. lipsync
|
|
18
|
+
5. auto-generated captions
|
|
19
|
+
6. social media optimization
|
|
20
|
+
|
|
21
|
+
**total time**: ~4-5 minutes per video
|
|
22
|
+
|
|
23
|
+
## step-by-step workflow
|
|
24
|
+
|
|
25
|
+
### 1. create character headshot
|
|
26
|
+
```bash
|
|
27
|
+
bun run service/image.ts soul "professional headshot of a friendly person, studio lighting" true
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
**output**: character image url + s3 url
|
|
31
|
+
**time**: ~30 seconds
|
|
32
|
+
|
|
33
|
+
**tip**: be specific about character appearance, lighting, and style for best results
|
|
34
|
+
|
|
35
|
+
### 2. generate voiceover
|
|
36
|
+
```bash
|
|
37
|
+
bun run service/voice.ts elevenlabs "hello world, this is my character speaking" rachel true
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
**output**: `media/voice-{timestamp}.mp3` + s3 url
|
|
41
|
+
**time**: ~10 seconds
|
|
42
|
+
|
|
43
|
+
**tip**: choose voice that matches character (rachel/bella for female, josh/antoni for male)
|
|
44
|
+
|
|
45
|
+
### 3. animate character
|
|
46
|
+
```bash
|
|
47
|
+
bun run service/video.ts from_image "person talking naturally, professional demeanor" <headshot_url> 5 true
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
**output**: animated video url + s3 url
|
|
51
|
+
**time**: ~2-3 minutes
|
|
52
|
+
|
|
53
|
+
**tip**: use subtle motion prompts like "person talking naturally" or "slight head movement"
|
|
54
|
+
|
|
55
|
+
### 4. add lipsync
|
|
56
|
+
```bash
|
|
57
|
+
bun run service/sync.ts wav2lip <video_url> <audio_url>
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
**output**: lipsynced video url
|
|
61
|
+
**time**: ~30 seconds
|
|
62
|
+
|
|
63
|
+
**tip**: wav2lip works best with close-up character shots and clear audio
|
|
64
|
+
|
|
65
|
+
### 5. add captions
|
|
66
|
+
```bash
|
|
67
|
+
bun run service/captions.ts <video_path> captioned.mp4 --provider fireworks
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
**output**: `captioned.mp4` with subtitles
|
|
71
|
+
**time**: ~15 seconds (includes transcription)
|
|
72
|
+
|
|
73
|
+
**tip**: fireworks provider gives word-level timing for professional captions
|
|
74
|
+
|
|
75
|
+
### 6. prepare for social media
|
|
76
|
+
```bash
|
|
77
|
+
bun run service/edit.ts social captioned.mp4 final-tiktok.mp4 tiktok
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
**output**: `final-tiktok.mp4` optimized for platform
|
|
81
|
+
**time**: ~5 seconds
|
|
82
|
+
|
|
83
|
+
**platforms**: tiktok, instagram, youtube-shorts, youtube, twitter
|
|
84
|
+
|
|
85
|
+
## complete example
|
|
86
|
+
|
|
87
|
+
```bash
|
|
88
|
+
# step 1: generate character
|
|
89
|
+
bun run service/image.ts soul \
|
|
90
|
+
"professional business woman, friendly smile, studio lighting" \
|
|
91
|
+
true
|
|
92
|
+
|
|
93
|
+
# step 2: create voiceover
|
|
94
|
+
bun run service/voice.ts elevenlabs \
|
|
95
|
+
"welcome to our company. we're excited to show you our new product" \
|
|
96
|
+
rachel \
|
|
97
|
+
true
|
|
98
|
+
|
|
99
|
+
# step 3: animate character
|
|
100
|
+
bun run service/video.ts from_image \
|
|
101
|
+
"person talking professionally" \
|
|
102
|
+
https://your-s3-url/character.jpg \
|
|
103
|
+
5 \
|
|
104
|
+
true
|
|
105
|
+
|
|
106
|
+
# step 4: sync lips
|
|
107
|
+
bun run service/sync.ts wav2lip \
|
|
108
|
+
https://your-s3-url/animated.mp4 \
|
|
109
|
+
https://your-s3-url/voice.mp3
|
|
110
|
+
|
|
111
|
+
# step 5: add captions
|
|
112
|
+
bun run service/captions.ts \
|
|
113
|
+
synced-video.mp4 \
|
|
114
|
+
captioned.mp4 \
|
|
115
|
+
--provider fireworks \
|
|
116
|
+
--font "Arial Black" \
|
|
117
|
+
--size 32
|
|
118
|
+
|
|
119
|
+
# step 6: optimize for tiktok
|
|
120
|
+
bun run service/edit.ts social \
|
|
121
|
+
captioned.mp4 \
|
|
122
|
+
final-tiktok.mp4 \
|
|
123
|
+
tiktok
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
## programmatic workflow
|
|
127
|
+
|
|
128
|
+
```typescript
|
|
129
|
+
import { generateWithSoul } from "./service/image"
|
|
130
|
+
import { generateVoice } from "./service/voice"
|
|
131
|
+
import { generateVideoFromImage } from "./service/video"
|
|
132
|
+
import { lipsyncWav2Lip } from "./service/sync"
|
|
133
|
+
import { addCaptions } from "./service/captions"
|
|
134
|
+
import { prepareForSocial } from "./service/edit"
|
|
135
|
+
|
|
136
|
+
// 1. character
|
|
137
|
+
const character = await generateWithSoul(
|
|
138
|
+
"friendly business person, professional",
|
|
139
|
+
{ upload: true }
|
|
140
|
+
)
|
|
141
|
+
|
|
142
|
+
// 2. voice
|
|
143
|
+
const voice = await generateVoice({
|
|
144
|
+
text: "hello, welcome to our video",
|
|
145
|
+
voice: "rachel",
|
|
146
|
+
upload: true,
|
|
147
|
+
outputPath: "media/voice.mp3"
|
|
148
|
+
})
|
|
149
|
+
|
|
150
|
+
// 3. animate
|
|
151
|
+
const video = await generateVideoFromImage(
|
|
152
|
+
"person talking naturally",
|
|
153
|
+
character.uploaded!,
|
|
154
|
+
{ duration: 5, upload: true }
|
|
155
|
+
)
|
|
156
|
+
|
|
157
|
+
// 4. lipsync
|
|
158
|
+
const synced = await lipsyncWav2Lip({
|
|
159
|
+
videoUrl: video.uploaded!,
|
|
160
|
+
audioUrl: voice.uploadUrl!
|
|
161
|
+
})
|
|
162
|
+
|
|
163
|
+
// 5. captions
|
|
164
|
+
const captioned = await addCaptions({
|
|
165
|
+
videoPath: synced,
|
|
166
|
+
output: "captioned.mp4",
|
|
167
|
+
provider: "fireworks"
|
|
168
|
+
})
|
|
169
|
+
|
|
170
|
+
// 6. social media
|
|
171
|
+
const final = await prepareForSocial({
|
|
172
|
+
input: captioned,
|
|
173
|
+
output: "final.mp4",
|
|
174
|
+
platform: "tiktok"
|
|
175
|
+
})
|
|
176
|
+
```
|
|
177
|
+
|
|
178
|
+
## use cases
|
|
179
|
+
|
|
180
|
+
### marketing content
|
|
181
|
+
- product announcements
|
|
182
|
+
- brand messaging
|
|
183
|
+
- explainer videos
|
|
184
|
+
- social media ads
|
|
185
|
+
|
|
186
|
+
### educational content
|
|
187
|
+
- course introductions
|
|
188
|
+
- tutorial narration
|
|
189
|
+
- lesson summaries
|
|
190
|
+
- educational social media
|
|
191
|
+
|
|
192
|
+
### social media
|
|
193
|
+
- tiktok character content
|
|
194
|
+
- instagram reels with narration
|
|
195
|
+
- youtube shorts
|
|
196
|
+
- twitter video posts
|
|
197
|
+
|
|
198
|
+
## tips for best results
|
|
199
|
+
|
|
200
|
+
**character creation:**
|
|
201
|
+
- be specific about appearance, expression, lighting
|
|
202
|
+
- "professional", "friendly", "casual" work well
|
|
203
|
+
- mention "studio lighting" for clean backgrounds
|
|
204
|
+
|
|
205
|
+
**voiceover:**
|
|
206
|
+
- write natural, conversational scripts
|
|
207
|
+
- add punctuation for natural pauses
|
|
208
|
+
- keep sentences short and clear
|
|
209
|
+
- match voice gender to character
|
|
210
|
+
|
|
211
|
+
**animation:**
|
|
212
|
+
- use subtle motion prompts
|
|
213
|
+
- 5 seconds is perfect for character talking shots
|
|
214
|
+
- avoid complex camera movements
|
|
215
|
+
|
|
216
|
+
**lipsync:**
|
|
217
|
+
- wav2lip works best with frontal face views
|
|
218
|
+
- ensure audio is clear and well-paced
|
|
219
|
+
- close-up shots give better results
|
|
220
|
+
|
|
221
|
+
**captions:**
|
|
222
|
+
- use fireworks for word-level timing
|
|
223
|
+
- larger font sizes (28-32) work better on mobile
|
|
224
|
+
- white text with black outline is most readable
|
|
225
|
+
|
|
226
|
+
**social media:**
|
|
227
|
+
- vertical (9:16) for tiktok/instagram/shorts
|
|
228
|
+
- landscape (16:9) for youtube/twitter
|
|
229
|
+
- keep total video under 60 seconds for best engagement
|
|
230
|
+
|
|
231
|
+
## estimated costs
|
|
232
|
+
|
|
233
|
+
per video (approximate):
|
|
234
|
+
- character image: $0.05 (higgsfield soul)
|
|
235
|
+
- voiceover: $0.10 (elevenlabs)
|
|
236
|
+
- animation: $0.20 (fal image-to-video)
|
|
237
|
+
- lipsync: $0.10 (replicate wav2lip)
|
|
238
|
+
- transcription: $0.02 (fireworks)
|
|
239
|
+
|
|
240
|
+
**total**: ~$0.47 per video
|
|
241
|
+
|
|
242
|
+
## troubleshooting
|
|
243
|
+
|
|
244
|
+
**character doesn't look consistent:**
|
|
245
|
+
- use higgsfield soul instead of fal for characters
|
|
246
|
+
- save character image and reuse for consistency
|
|
247
|
+
|
|
248
|
+
**lipsync doesn't match well:**
|
|
249
|
+
- ensure video shows face clearly
|
|
250
|
+
- use close-up shots
|
|
251
|
+
- check audio quality and clarity
|
|
252
|
+
|
|
253
|
+
**animation looks unnatural:**
|
|
254
|
+
- simplify motion prompt
|
|
255
|
+
- use "person talking naturally" or "slight movement"
|
|
256
|
+
- avoid dramatic camera movements
|
|
257
|
+
|
|
258
|
+
**captions are off-sync:**
|
|
259
|
+
- use fireworks provider for better timing
|
|
260
|
+
- check audio quality
|
|
261
|
+
- verify video fps is standard (24/30fps)
|
|
262
|
+
|
|
263
|
+
## required environment variables
|
|
264
|
+
|
|
265
|
+
```bash
|
|
266
|
+
HIGGSFIELD_API_KEY=hf_xxx
|
|
267
|
+
HIGGSFIELD_SECRET=secret_xxx
|
|
268
|
+
ELEVENLABS_API_KEY=el_xxx
|
|
269
|
+
FAL_API_KEY=fal_xxx
|
|
270
|
+
REPLICATE_API_TOKEN=r8_xxx
|
|
271
|
+
FIREWORKS_API_KEY=fw_xxx
|
|
272
|
+
CLOUDFLARE_R2_API_URL=https://xxx.r2.cloudflarestorage.com
|
|
273
|
+
CLOUDFLARE_ACCESS_KEY_ID=xxx
|
|
274
|
+
CLOUDFLARE_ACCESS_SECRET=xxx
|
|
275
|
+
CLOUDFLARE_R2_BUCKET=m
|
|
276
|
+
```
|
|
277
|
+
|
|
278
|
+
## next steps
|
|
279
|
+
|
|
280
|
+
after creating your talking character video:
|
|
281
|
+
- upload to social platforms
|
|
282
|
+
- analyze performance metrics
|
|
283
|
+
- iterate on character design and scripts
|
|
284
|
+
- create series with consistent character
|
|
285
|
+
- experiment with different voices and styles
|