vargai 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/settings.local.json +7 -0
- package/.env.example +27 -0
- package/.github/workflows/ci.yml +23 -0
- package/.husky/README.md +102 -0
- package/.husky/commit-msg +6 -0
- package/.husky/pre-commit +9 -0
- package/.husky/pre-push +6 -0
- package/.size-limit.json +8 -0
- package/.test-hooks.ts +5 -0
- package/CLAUDE.md +125 -0
- package/CONTRIBUTING.md +150 -0
- package/LICENSE.md +53 -0
- package/README.md +78 -0
- package/SKILLS.md +173 -0
- package/STRUCTURE.md +92 -0
- package/biome.json +34 -0
- package/bun.lock +1254 -0
- package/commitlint.config.js +22 -0
- package/docs/plan.md +66 -0
- package/docs/todo.md +14 -0
- package/docs/varg-sdk.md +812 -0
- package/ffmpeg/CLAUDE.md +68 -0
- package/package.json +69 -0
- package/pipeline/cookbooks/SKILL.md +285 -0
- package/pipeline/cookbooks/remotion-video.md +585 -0
- package/pipeline/cookbooks/round-video-character.md +337 -0
- package/pipeline/cookbooks/scripts/animate-frames-parallel.ts +84 -0
- package/pipeline/cookbooks/scripts/combine-scenes.sh +53 -0
- package/pipeline/cookbooks/scripts/generate-frames-parallel.ts +99 -0
- package/pipeline/cookbooks/scripts/still-to-video.sh +37 -0
- package/pipeline/cookbooks/talking-character.md +59 -0
- package/pipeline/cookbooks/text-to-tiktok.md +669 -0
- package/pipeline/cookbooks/trendwatching.md +156 -0
- package/plan.md +281 -0
- package/scripts/.gitkeep +0 -0
- package/src/ai-sdk/cache.ts +142 -0
- package/src/ai-sdk/examples/cached-generation.ts +53 -0
- package/src/ai-sdk/examples/duet-scene-4.ts +53 -0
- package/src/ai-sdk/examples/duet-scene-5-audio.ts +32 -0
- package/src/ai-sdk/examples/duet-video.ts +56 -0
- package/src/ai-sdk/examples/editly-composition.ts +63 -0
- package/src/ai-sdk/examples/editly-test.ts +57 -0
- package/src/ai-sdk/examples/editly-video-test.ts +52 -0
- package/src/ai-sdk/examples/fal-lipsync.ts +43 -0
- package/src/ai-sdk/examples/higgsfield-image.ts +61 -0
- package/src/ai-sdk/examples/music-generation.ts +19 -0
- package/src/ai-sdk/examples/openai-sora.ts +34 -0
- package/src/ai-sdk/examples/replicate-bg-removal.ts +52 -0
- package/src/ai-sdk/examples/simpsons-scene.ts +61 -0
- package/src/ai-sdk/examples/talking-lion.ts +55 -0
- package/src/ai-sdk/examples/video-generation.ts +39 -0
- package/src/ai-sdk/examples/workflow-animated-girl.ts +104 -0
- package/src/ai-sdk/examples/workflow-before-after.ts +114 -0
- package/src/ai-sdk/examples/workflow-character-grid.ts +112 -0
- package/src/ai-sdk/examples/workflow-slideshow.ts +161 -0
- package/src/ai-sdk/file-cache.ts +112 -0
- package/src/ai-sdk/file.ts +238 -0
- package/src/ai-sdk/generate-element.ts +92 -0
- package/src/ai-sdk/generate-music.ts +46 -0
- package/src/ai-sdk/generate-video.ts +165 -0
- package/src/ai-sdk/index.ts +72 -0
- package/src/ai-sdk/music-model.ts +110 -0
- package/src/ai-sdk/providers/editly/editly.test.ts +1108 -0
- package/src/ai-sdk/providers/editly/ffmpeg.ts +60 -0
- package/src/ai-sdk/providers/editly/index.ts +817 -0
- package/src/ai-sdk/providers/editly/layers.ts +772 -0
- package/src/ai-sdk/providers/editly/plan.md +144 -0
- package/src/ai-sdk/providers/editly/types.ts +328 -0
- package/src/ai-sdk/providers/elevenlabs-provider.ts +255 -0
- package/src/ai-sdk/providers/fal-provider.ts +512 -0
- package/src/ai-sdk/providers/higgsfield.ts +379 -0
- package/src/ai-sdk/providers/openai.ts +251 -0
- package/src/ai-sdk/providers/replicate.ts +16 -0
- package/src/ai-sdk/video-model.ts +185 -0
- package/src/cli/commands/find.tsx +137 -0
- package/src/cli/commands/help.tsx +85 -0
- package/src/cli/commands/index.ts +9 -0
- package/src/cli/commands/list.tsx +238 -0
- package/src/cli/commands/run.tsx +511 -0
- package/src/cli/commands/which.tsx +253 -0
- package/src/cli/index.ts +112 -0
- package/src/cli/quiet.ts +44 -0
- package/src/cli/types.ts +32 -0
- package/src/cli/ui/components/Badge.tsx +29 -0
- package/src/cli/ui/components/DataTable.tsx +51 -0
- package/src/cli/ui/components/Header.tsx +23 -0
- package/src/cli/ui/components/HelpBlock.tsx +44 -0
- package/src/cli/ui/components/KeyValue.tsx +33 -0
- package/src/cli/ui/components/OptionRow.tsx +81 -0
- package/src/cli/ui/components/Separator.tsx +23 -0
- package/src/cli/ui/components/StatusBox.tsx +108 -0
- package/src/cli/ui/components/VargBox.tsx +51 -0
- package/src/cli/ui/components/VargProgress.tsx +36 -0
- package/src/cli/ui/components/VargSpinner.tsx +34 -0
- package/src/cli/ui/components/VargText.tsx +56 -0
- package/src/cli/ui/components/index.ts +19 -0
- package/src/cli/ui/index.ts +12 -0
- package/src/cli/ui/render.ts +35 -0
- package/src/cli/ui/theme.ts +63 -0
- package/src/cli/utils.ts +78 -0
- package/src/core/executor/executor.ts +201 -0
- package/src/core/executor/index.ts +13 -0
- package/src/core/executor/job.ts +214 -0
- package/src/core/executor/pipeline.ts +222 -0
- package/src/core/index.ts +11 -0
- package/src/core/registry/index.ts +9 -0
- package/src/core/registry/loader.ts +149 -0
- package/src/core/registry/registry.ts +221 -0
- package/src/core/registry/resolver.ts +206 -0
- package/src/core/schema/helpers.ts +134 -0
- package/src/core/schema/index.ts +8 -0
- package/src/core/schema/shared.ts +102 -0
- package/src/core/schema/types.ts +279 -0
- package/src/core/schema/validator.ts +92 -0
- package/src/definitions/actions/captions.ts +261 -0
- package/src/definitions/actions/edit.ts +298 -0
- package/src/definitions/actions/image.ts +125 -0
- package/src/definitions/actions/index.ts +114 -0
- package/src/definitions/actions/music.ts +205 -0
- package/src/definitions/actions/sync.ts +128 -0
- package/src/definitions/actions/transcribe.ts +200 -0
- package/src/definitions/actions/upload.ts +111 -0
- package/src/definitions/actions/video.ts +163 -0
- package/src/definitions/actions/voice.ts +119 -0
- package/src/definitions/index.ts +23 -0
- package/src/definitions/models/elevenlabs.ts +50 -0
- package/src/definitions/models/flux.ts +56 -0
- package/src/definitions/models/index.ts +36 -0
- package/src/definitions/models/kling.ts +56 -0
- package/src/definitions/models/llama.ts +54 -0
- package/src/definitions/models/nano-banana-pro.ts +102 -0
- package/src/definitions/models/sonauto.ts +68 -0
- package/src/definitions/models/soul.ts +65 -0
- package/src/definitions/models/wan.ts +54 -0
- package/src/definitions/models/whisper.ts +44 -0
- package/src/definitions/skills/index.ts +12 -0
- package/src/definitions/skills/talking-character.ts +87 -0
- package/src/definitions/skills/text-to-tiktok.ts +97 -0
- package/src/index.ts +118 -0
- package/src/providers/apify.ts +269 -0
- package/src/providers/base.ts +264 -0
- package/src/providers/elevenlabs.ts +217 -0
- package/src/providers/fal.ts +392 -0
- package/src/providers/ffmpeg.ts +544 -0
- package/src/providers/fireworks.ts +193 -0
- package/src/providers/groq.ts +149 -0
- package/src/providers/higgsfield.ts +145 -0
- package/src/providers/index.ts +143 -0
- package/src/providers/replicate.ts +147 -0
- package/src/providers/storage.ts +206 -0
- package/src/tests/all.test.ts +509 -0
- package/src/tests/index.ts +33 -0
- package/src/tests/unit.test.ts +403 -0
- package/tsconfig.json +45 -0
|
@@ -0,0 +1,669 @@
|
|
|
1
|
+
# text to tiktok pipeline
|
|
2
|
+
|
|
3
|
+
turn text (reddit post, script, etc) into a tiktok with ai-generated looping background and voiceover
|
|
4
|
+
|
|
5
|
+
## overview
|
|
6
|
+
|
|
7
|
+
1. generate voiceover from text (elevenlabs)
|
|
8
|
+
2. get word-level timestamps (fireworks whisper)
|
|
9
|
+
3. generate looping background video (kling on fal.ai)
|
|
10
|
+
4. combine with captions (ffmpeg)
|
|
11
|
+
|
|
12
|
+
## step 1: prepare content
|
|
13
|
+
|
|
14
|
+
```bash
|
|
15
|
+
mkdir -p media/your-project
|
|
16
|
+
```
|
|
17
|
+
|
|
18
|
+
put your text in `media/your-project/post.md`
|
|
19
|
+
|
|
20
|
+
## step 2: generate voiceover
|
|
21
|
+
|
|
22
|
+
**important:** strip markdown formatting (like `#`) before sending to elevenlabs - it will read them aloud!
|
|
23
|
+
|
|
24
|
+
```bash
|
|
25
|
+
# sam voice - relaxed male (good for philosophical/alan watts style content)
|
|
26
|
+
TEXT=$(cat media/your-project/post.md | sed 's/^# //')
|
|
27
|
+
bun run lib/elevenlabs.ts tts "$TEXT" sam media/your-project/voiceover.mp3
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
voice options:
|
|
31
|
+
- `sam` - relaxed american male (best for philosophical/calm content)
|
|
32
|
+
- `adam` - deep american male (more intense/pushy)
|
|
33
|
+
- `antoni` - mature male
|
|
34
|
+
- `josh` - american male
|
|
35
|
+
- `rachel` - american female
|
|
36
|
+
|
|
37
|
+
model used: `eleven_multilingual_v2`
|
|
38
|
+
|
|
39
|
+
check duration:
|
|
40
|
+
```bash
|
|
41
|
+
ffprobe -v error -show_entries format=duration -of default=noprint_wrappers=1:nokey=1 media/your-project/voiceover.mp3
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
## step 3: get word timestamps
|
|
45
|
+
|
|
46
|
+
```bash
|
|
47
|
+
bun run lib/fireworks.ts media/your-project/voiceover.mp3 media/your-project/segments.json
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
outputs json with word-level timestamps for captions.
|
|
51
|
+
|
|
52
|
+
## step 4: generate looping background video
|
|
53
|
+
|
|
54
|
+
### 4a: generate first frame with motion (important for loops)
|
|
55
|
+
|
|
56
|
+
for driving/movement scenes, bake motion blur into the first frame so the video starts already moving:
|
|
57
|
+
|
|
58
|
+
```bash
|
|
59
|
+
bun run lib/fal.ts generate_image "POV from inside moving car driving through rainy city at night, motion blur on streetlights and neon signs, raindrops streaking on windshield from speed, dashboard visible at bottom, blurred city lights rushing past, cinematic motion blur, photorealistic" "fal-ai/flux-pro/v1.1" "portrait_16_9"
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
download and save to `media/your-project/frame.jpg`
|
|
63
|
+
|
|
64
|
+
### 4b: generate looping video with tail_image_url
|
|
65
|
+
|
|
66
|
+
use the same image for start and end frame to create seamless loop:
|
|
67
|
+
|
|
68
|
+
```typescript
|
|
69
|
+
import { imageToVideo } from "./lib/fal.ts";
|
|
70
|
+
|
|
71
|
+
const result = await imageToVideo({
|
|
72
|
+
prompt: "POV from inside car driving through rainy city at night, continuous forward motion, passing neon signs and streetlights, raindrops streaming on windshield, steady driving pace, seamless loop",
|
|
73
|
+
imageUrl: "media/your-project/frame.jpg",
|
|
74
|
+
tailImageUrl: "media/your-project/frame.jpg", // same as start = loop!
|
|
75
|
+
duration: 10,
|
|
76
|
+
});
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
### 4c: extend to full length
|
|
80
|
+
|
|
81
|
+
loop the 10sec clip to match voiceover duration:
|
|
82
|
+
|
|
83
|
+
```bash
|
|
84
|
+
# for 3 min voiceover, loop ~18 times
|
|
85
|
+
ffmpeg -y -stream_loop 17 -i media/your-project/bg_10sec.mp4 -t 180 -c copy media/your-project/bg_full.mp4
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
## step 5: combine video + audio + captions + screenshot overlay
|
|
89
|
+
|
|
90
|
+
combine everything with optional screenshot overlay for first few seconds:
|
|
91
|
+
|
|
92
|
+
```bash
|
|
93
|
+
ffmpeg -y \
|
|
94
|
+
-i media/your-project/bg_full.mp4 \
|
|
95
|
+
-i media/your-project/voiceover.mp3 \
|
|
96
|
+
-loop 1 -t 4 -i media/your-project/screenshot.png \
|
|
97
|
+
-filter_complex "\
|
|
98
|
+
[2:v]scale=900:-1,format=yuva420p,fade=t=out:st=3:d=1:alpha=1[screenshot]; \
|
|
99
|
+
[0:v][screenshot]overlay=(W-w)/2:(H-h)/2:shortest=0:eof_action=pass[vbase]; \
|
|
100
|
+
[vbase]subtitles=media/your-project/captions.srt:force_style='FontName=Arial,FontSize=20,PrimaryColour=&HFFFFFF,OutlineColour=&H000000,Bold=-1,Alignment=2,MarginV=60,Outline=3'[vout]" \
|
|
101
|
+
-map "[vout]" -map 1:a \
|
|
102
|
+
-c:v libx264 -preset faster -crf 20 \
|
|
103
|
+
-c:a aac -b:a 128k \
|
|
104
|
+
-shortest \
|
|
105
|
+
-movflags +faststart \
|
|
106
|
+
media/your-project/final.mp4
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
this command:
|
|
110
|
+
- overlays screenshot for first 4 seconds with fade out
|
|
111
|
+
- adds word-by-word SRT captions
|
|
112
|
+
- combines background video + voiceover audio
|
|
113
|
+
|
|
114
|
+
### simpler version (no screenshot overlay)
|
|
115
|
+
|
|
116
|
+
```bash
|
|
117
|
+
ffmpeg -y -i media/your-project/bg_full.mp4 \
|
|
118
|
+
-i media/your-project/voiceover.mp3 \
|
|
119
|
+
-vf "subtitles=media/your-project/captions.srt:force_style='FontName=Arial,FontSize=20,Bold=-1,Outline=3'" \
|
|
120
|
+
-c:v libx264 -preset faster -crf 20 \
|
|
121
|
+
-c:a aac -b:a 128k \
|
|
122
|
+
-shortest \
|
|
123
|
+
-movflags +faststart \
|
|
124
|
+
media/your-project/final.mp4
|
|
125
|
+
```
|
|
126
|
+
|
|
127
|
+
## tips for looping backgrounds
|
|
128
|
+
|
|
129
|
+
### prompt tips for seamless loops
|
|
130
|
+
- "continuous forward motion" - keeps direction consistent
|
|
131
|
+
- "seamless loop" - hints at looping intent
|
|
132
|
+
- "steady pace" - avoids acceleration/deceleration
|
|
133
|
+
- "never stops" - prevents pausing
|
|
134
|
+
|
|
135
|
+
### motion blur in first frame
|
|
136
|
+
if your scene has movement (driving, walking, flying), generate the first frame WITH motion blur. otherwise kling will animate from a still start.
|
|
137
|
+
|
|
138
|
+
bad: "car on rainy street" (static frame = car starts from stop)
|
|
139
|
+
good: "moving car, motion blur on lights, raindrops streaking" (motion frame = already driving)
|
|
140
|
+
|
|
141
|
+
### use tail_image_url for loops
|
|
142
|
+
kling's `tail_image_url` parameter forces the video to end on a specific frame. set it to the same as `image_url` to create a seamless loop.
|
|
143
|
+
|
|
144
|
+
### background ideas that loop well
|
|
145
|
+
- driving POV (city, highway, rain)
|
|
146
|
+
- walking POV (city streets, forest path)
|
|
147
|
+
- clouds/sky timelapse
|
|
148
|
+
- ocean waves
|
|
149
|
+
- abstract particles/smoke
|
|
150
|
+
|
|
151
|
+
### backgrounds that DON'T loop well
|
|
152
|
+
- scenes with specific landmarks
|
|
153
|
+
- conversations/interactions
|
|
154
|
+
- anything with a narrative arc
|
|
155
|
+
|
|
156
|
+
## tips for text prep
|
|
157
|
+
|
|
158
|
+
### strip markdown before tts
|
|
159
|
+
elevenlabs reads markdown literally - `# heading` becomes "hashtag heading". always strip:
|
|
160
|
+
|
|
161
|
+
```bash
|
|
162
|
+
TEXT=$(cat post.md | sed 's/^# //')
|
|
163
|
+
```
|
|
164
|
+
|
|
165
|
+
### run grammar check
|
|
166
|
+
before generating voiceover, check for:
|
|
167
|
+
- typos ("she'll met" vs "she'll meet")
|
|
168
|
+
- missing apostrophes ("its" vs "it's")
|
|
169
|
+
- repeated words ("into to")
|
|
170
|
+
- missing commas ("After all she" vs "After all, she")
|
|
171
|
+
|
|
172
|
+
## example project structure
|
|
173
|
+
|
|
174
|
+
```
|
|
175
|
+
media/your-project/
|
|
176
|
+
├── post.md # original text
|
|
177
|
+
├── voiceover.mp3 # elevenlabs output
|
|
178
|
+
├── segments.json # word timestamps
|
|
179
|
+
├── frame.jpg # first frame (with motion blur)
|
|
180
|
+
├── bg_10sec.mp4 # 10sec looping clip
|
|
181
|
+
├── bg_full.mp4 # extended to voiceover length
|
|
182
|
+
├── subtitles.ass # generated captions
|
|
183
|
+
└── final.mp4 # final tiktok
|
|
184
|
+
```
|
|
185
|
+
|
|
186
|
+
## full example: philosophical reddit post
|
|
187
|
+
|
|
188
|
+
```bash
|
|
189
|
+
# 1. voiceover (strip markdown, use relaxed sam voice)
|
|
190
|
+
TEXT=$(cat media/girl-ruined-you/post.md | sed 's/^# //')
|
|
191
|
+
bun run lib/elevenlabs.ts tts "$TEXT" sam media/girl-ruined-you/voiceover.mp3
|
|
192
|
+
|
|
193
|
+
# 2. get word timestamps for captions (outputs .srt)
|
|
194
|
+
bun run lib/fireworks.ts media/girl-ruined-you/voiceover.mp3 media/girl-ruined-you/captions.srt
|
|
195
|
+
|
|
196
|
+
# 3. first frame with motion blur
|
|
197
|
+
bun run lib/fal.ts generate_image "POV from inside moving car driving through rainy city at night, motion blur on streetlights, raindrops streaking on windshield, cinematic" "fal-ai/flux-pro/v1.1" "portrait_16_9"
|
|
198
|
+
|
|
199
|
+
# 4. looping video (run as ts script for tail_image_url)
|
|
200
|
+
# see step 4b above
|
|
201
|
+
|
|
202
|
+
# 5. extend loop to match voiceover duration
|
|
203
|
+
ffmpeg -y -stream_loop 17 -i media/girl-ruined-you/bg_car_final.mp4 -t 177 -c copy media/girl-ruined-you/bg_full.mp4
|
|
204
|
+
|
|
205
|
+
# 6. combine with screenshot overlay + captions
|
|
206
|
+
ffmpeg -y \
|
|
207
|
+
-i media/girl-ruined-you/bg_full.mp4 \
|
|
208
|
+
-i media/girl-ruined-you/voiceover.mp3 \
|
|
209
|
+
-loop 1 -t 4 -i media/girl-ruined-you/shot.png \
|
|
210
|
+
-filter_complex "\
|
|
211
|
+
[2:v]scale=900:-1,format=yuva420p,fade=t=out:st=3:d=1:alpha=1[screenshot]; \
|
|
212
|
+
[0:v][screenshot]overlay=(W-w)/2:(H-h)/2:shortest=0:eof_action=pass[vbase]; \
|
|
213
|
+
[vbase]subtitles=media/girl-ruined-you/captions.srt:force_style='FontName=Arial,FontSize=20,PrimaryColour=&HFFFFFF,OutlineColour=&H000000,Bold=-1,Alignment=2,MarginV=60,Outline=3'[vout]" \
|
|
214
|
+
-map "[vout]" -map 1:a \
|
|
215
|
+
-c:v libx264 -preset faster -crf 20 \
|
|
216
|
+
-c:a aac -b:a 128k \
|
|
217
|
+
-shortest \
|
|
218
|
+
-movflags +faststart \
|
|
219
|
+
media/girl-ruined-you/final_with_captions.mp4
|
|
220
|
+
```
|
|
221
|
+
|
|
222
|
+
## alternative: minecraft/subway surfers background
|
|
223
|
+
|
|
224
|
+
if you have a minecraft parkour or subway surfers video:
|
|
225
|
+
|
|
226
|
+
```bash
|
|
227
|
+
# loop existing background video
|
|
228
|
+
ffmpeg -stream_loop -1 -i media/backgrounds/minecraft_parkour.mp4 \
|
|
229
|
+
-i media/your-project/voiceover.mp3 \
|
|
230
|
+
-vf "scale=1080:1920:force_original_aspect_ratio=increase,crop=1080:1920" \
|
|
231
|
+
-c:v libx264 -preset faster -crf 23 \
|
|
232
|
+
-c:a aac -b:a 128k \
|
|
233
|
+
-shortest \
|
|
234
|
+
-movflags +faststart \
|
|
235
|
+
media/your-project/final.mp4
|
|
236
|
+
```
|
|
237
|
+
|
|
238
|
+
note: yt-dlp currently broken for youtube downloads. use cobalt.tools or screen record instead.
|
|
239
|
+
|
|
240
|
+
---
|
|
241
|
+
|
|
242
|
+
# helper scripts
|
|
243
|
+
|
|
244
|
+
scripts in `pipeline/cookbooks/scripts/` to speed up common tasks:
|
|
245
|
+
|
|
246
|
+
| script | description | usage |
|
|
247
|
+
|--------|-------------|-------|
|
|
248
|
+
| `generate-frames-parallel.ts` | generate multiple scene frames in parallel using flux kontext | `bun run pipeline/cookbooks/scripts/generate-frames-parallel.ts` |
|
|
249
|
+
| `animate-frames-parallel.ts` | animate multiple frames in parallel using kling | `bun run pipeline/cookbooks/scripts/animate-frames-parallel.ts` |
|
|
250
|
+
| `combine-scenes.sh` | combine scene videos with audio clips using ffmpeg | `./pipeline/cookbooks/scripts/combine-scenes.sh media/your-project` |
|
|
251
|
+
| `still-to-video.sh` | convert still image to video with ken burns effect | `./pipeline/cookbooks/scripts/still-to-video.sh input.jpg output.mp4 5 in` |
|
|
252
|
+
|
|
253
|
+
**generate-frames-parallel.ts** - edit the `configs` array to define your scenes, supports single character (kontext) and multi-character (kontext/multi) frames.
|
|
254
|
+
|
|
255
|
+
**animate-frames-parallel.ts** - edit the `configs` array with frame paths and prompts. all kling requests run in parallel.
|
|
256
|
+
|
|
257
|
+
**combine-scenes.sh** - edit the `SCENES` array to define scene timing. handles audio extraction, video looping, and concatenation.
|
|
258
|
+
|
|
259
|
+
**still-to-video.sh** - useful when kling fails or for simple scenes. creates slow zoom (ken burns) effect from a still frame.
|
|
260
|
+
|
|
261
|
+
---
|
|
262
|
+
|
|
263
|
+
# advanced: character-based storytelling videos
|
|
264
|
+
|
|
265
|
+
for narrative content with consistent characters (like animated story videos)
|
|
266
|
+
|
|
267
|
+
## character generation
|
|
268
|
+
|
|
269
|
+
### generate consistent characters with nano banana pro
|
|
270
|
+
|
|
271
|
+
```bash
|
|
272
|
+
# protagonist (male)
|
|
273
|
+
bun run lib/fal.ts generate_image "3D cartoon anthropomorphic male cat character, cute stylized, standing pose, young adult male energy, slightly guarded but hopeful expression, big expressive eyes, wearing hoodie and jeans, muted earth tones, pixar dreamworks style, full body shot, white background, character reference sheet" "fal-ai/nano-banana-pro" "portrait_16_9"
|
|
274
|
+
|
|
275
|
+
# love interest (female)
|
|
276
|
+
bun run lib/fal.ts generate_image "3D cartoon anthropomorphic female cat character, cute stylized, standing pose, mature confident woman energy, warm loving expression, big expressive eyes, wearing elegant but casual dress, warm colors, pixar dreamworks style, full body shot, white background, character reference sheet" "fal-ai/nano-banana-pro" "portrait_16_9"
|
|
277
|
+
```
|
|
278
|
+
|
|
279
|
+
save as `cat_protagonist.png`, `cat_love_interest.png`, etc.
|
|
280
|
+
|
|
281
|
+
## video generation models comparison
|
|
282
|
+
|
|
283
|
+
### for character consistency (multiple characters in one scene)
|
|
284
|
+
|
|
285
|
+
| model | reference support | best for |
|
|
286
|
+
|-------|------------------|----------|
|
|
287
|
+
| **veo3.1/reference-to-video** | multiple `image_urls` | best character consistency across multiple refs |
|
|
288
|
+
| **vidu/q2/reference-to-video** | up to 7 `reference_image_urls` | good consistency (had api issues) |
|
|
289
|
+
| **bytedance/lynx** | subject reference | designed for subject consistency |
|
|
290
|
+
|
|
291
|
+
### for single character animation
|
|
292
|
+
|
|
293
|
+
| model | notes |
|
|
294
|
+
|-------|-------|
|
|
295
|
+
| **kling-video/v2.5-turbo/pro** | reliable, good motion, supports `tail_image_url` for loops |
|
|
296
|
+
| **sora-2/image-to-video** | up to 12 sec, great quality, no reference support |
|
|
297
|
+
| **veo3.1/image-to-video** | good quality, uses image as literal first frame |
|
|
298
|
+
|
|
299
|
+
## workflow: scene-by-scene generation
|
|
300
|
+
|
|
301
|
+
### step 1: create scene script with timestamps
|
|
302
|
+
|
|
303
|
+
use the SRT file to understand timing:
|
|
304
|
+
```bash
|
|
305
|
+
head -50 media/your-project/captions.srt
|
|
306
|
+
```
|
|
307
|
+
|
|
308
|
+
create a scene breakdown markdown file (`scene_script.md`):
|
|
309
|
+
|
|
310
|
+
```markdown
|
|
311
|
+
# Scene Script - Cat Story (2:56 total)
|
|
312
|
+
|
|
313
|
+
Characters:
|
|
314
|
+
- **Protagonist** (male cat in hoodie) - the "you/brother"
|
|
315
|
+
- **First Girl** (young playful female cat in pastel) - the immature one
|
|
316
|
+
- **Second Girl/Amelia** (mature elegant female cat in orange) - "The One"
|
|
317
|
+
|
|
318
|
+
All scenes: 9:16 portrait, pixar 3d style, no talking/lip movement, cinematic
|
|
319
|
+
|
|
320
|
+
---
|
|
321
|
+
|
|
322
|
+
## Scene 1: Title/Hook (0:00-0:03)
|
|
323
|
+
**Text:** "How a girl that didn't love you ruined your future marriage"
|
|
324
|
+
**Visual:** Protagonist cat sitting alone on a bench at night, city lights behind, looking down sadly.
|
|
325
|
+
**Duration:** 3 sec
|
|
326
|
+
|
|
327
|
+
## Scene 2: Young Love (0:03-0:09)
|
|
328
|
+
**Text:** "When you were young, you fell in love..."
|
|
329
|
+
**Visual:** Protagonist and First Girl walking together in a park, sunny day.
|
|
330
|
+
**Duration:** 6 sec
|
|
331
|
+
|
|
332
|
+
## Scene 7: Flashback (0:54-1:01)
|
|
333
|
+
**Text:** "...your training kicks in..."
|
|
334
|
+
**Visual:** FLASHBACK - reuse Scene 5 video in B&W jittery style
|
|
335
|
+
**Duration:** 6.5 sec
|
|
336
|
+
```
|
|
337
|
+
|
|
338
|
+
key tips:
|
|
339
|
+
- include character descriptions at top
|
|
340
|
+
- note when to reuse videos (flashbacks, repeated scenes)
|
|
341
|
+
- specify visual style effects (B&W, jitter, etc)
|
|
342
|
+
- include exact timestamps from SRT
|
|
343
|
+
|
|
344
|
+
### step 2: generate scene frames with flux kontext
|
|
345
|
+
|
|
346
|
+
**important:** veo3.1 image-to-video uses the reference as the literal first frame. generate proper scene frames first!
|
|
347
|
+
|
|
348
|
+
**use the helper script for parallel generation:**
|
|
349
|
+
```bash
|
|
350
|
+
# edit the configs in the script first, then run:
|
|
351
|
+
bun run pipeline/cookbooks/scripts/generate-frames-parallel.ts
|
|
352
|
+
```
|
|
353
|
+
|
|
354
|
+
or manually:
|
|
355
|
+
```typescript
|
|
356
|
+
import { fal } from "@fal-ai/client";
|
|
357
|
+
|
|
358
|
+
// upload character reference
|
|
359
|
+
const protagonist = await fal.storage.upload(Bun.file("media/your-project/cat_protagonist.png"));
|
|
360
|
+
|
|
361
|
+
// use flux kontext to place character in scene
|
|
362
|
+
const result = await fal.subscribe("fal-ai/flux-pro/kontext", {
|
|
363
|
+
input: {
|
|
364
|
+
prompt: "Place this 3D cartoon cat character sitting alone on a park bench at night, city lights bokeh in background, looking down sadly, melancholy mood, cinematic lighting, pixar style, 9:16 portrait vertical composition",
|
|
365
|
+
image_url: protagonist,
|
|
366
|
+
aspect_ratio: "9:16"
|
|
367
|
+
}
|
|
368
|
+
});
|
|
369
|
+
```
|
|
370
|
+
|
|
371
|
+
### step 3: animate frames with kling
|
|
372
|
+
|
|
373
|
+
**use the helper script for parallel animation:**
|
|
374
|
+
```bash
|
|
375
|
+
# edit the configs in the script first, then run:
|
|
376
|
+
bun run pipeline/cookbooks/scripts/animate-frames-parallel.ts
|
|
377
|
+
```
|
|
378
|
+
|
|
379
|
+
or manually:
|
|
380
|
+
```typescript
|
|
381
|
+
const result = await fal.subscribe("fal-ai/kling-video/v2.5-turbo/pro/image-to-video", {
|
|
382
|
+
input: {
|
|
383
|
+
prompt: "3D pixar animation, the cat character sits still on bench looking down sadly, subtle breathing movement, city lights twinkle softly in background, slow gentle camera push in, melancholy cinematic mood, no talking",
|
|
384
|
+
image_url: sceneFrameUrl,
|
|
385
|
+
duration: "5",
|
|
386
|
+
aspect_ratio: "9:16"
|
|
387
|
+
}
|
|
388
|
+
});
|
|
389
|
+
```
|
|
390
|
+
|
|
391
|
+
**for static frames (ken burns effect):**
|
|
392
|
+
```bash
|
|
393
|
+
# convert still image to video with slow zoom
|
|
394
|
+
./pipeline/cookbooks/scripts/still-to-video.sh media/your-project/scene1_frame.jpg media/your-project/scene1_video.mp4 5 in
|
|
395
|
+
```
|
|
396
|
+
|
|
397
|
+
### step 4: for scenes with multiple characters
|
|
398
|
+
|
|
399
|
+
use veo3.1 reference-to-video:
|
|
400
|
+
|
|
401
|
+
```typescript
|
|
402
|
+
const result = await fal.subscribe("fal-ai/veo3.1/reference-to-video", {
|
|
403
|
+
input: {
|
|
404
|
+
prompt: "3D pixar style animation, two anthropomorphic cats walking together in a sunny park - male cat looks at female cat with love, she looks distracted, sunny golden hour lighting, no talking",
|
|
405
|
+
image_urls: [protagonistUrl, loveInterestUrl],
|
|
406
|
+
duration: "8s",
|
|
407
|
+
resolution: "720p",
|
|
408
|
+
generate_audio: false
|
|
409
|
+
}
|
|
410
|
+
});
|
|
411
|
+
```
|
|
412
|
+
|
|
413
|
+
### step 5: assemble with remotion
|
|
414
|
+
|
|
415
|
+
after generating all scene videos, use remotion for final assembly:
|
|
416
|
+
|
|
417
|
+
```bash
|
|
418
|
+
# copy all scene videos to remotion public folder
|
|
419
|
+
cp media/your-project/scene*_video.mp4 lib/remotion/public/your-project/
|
|
420
|
+
cp media/your-project/voiceover.mp3 lib/remotion/public/your-project/
|
|
421
|
+
|
|
422
|
+
# create composition
|
|
423
|
+
bun run lib/remotion/index.ts create YourProject
|
|
424
|
+
|
|
425
|
+
# edit composition with scene timeline (see remotion workflow section)
|
|
426
|
+
# preview in studio
|
|
427
|
+
bun remotion studio lib/remotion/compositions/YourProject.root.tsx --public-dir=lib/remotion/public
|
|
428
|
+
|
|
429
|
+
# render
|
|
430
|
+
bun remotion render lib/remotion/compositions/YourProject.root.tsx YourProject final.mp4 --public-dir=lib/remotion/public
|
|
431
|
+
```
|
|
432
|
+
|
|
433
|
+
**alternative: simple ffmpeg assembly (no captions styling):**
|
|
434
|
+
```bash
|
|
435
|
+
# use the combine-scenes script
|
|
436
|
+
./pipeline/cookbooks/scripts/combine-scenes.sh media/your-project
|
|
437
|
+
```
|
|
438
|
+
|
|
439
|
+
## tips for character videos
|
|
440
|
+
|
|
441
|
+
### no talking/lip sync
|
|
442
|
+
- prompt with "no talking", "no lip movement", "silent"
|
|
443
|
+
- characters express emotion through body language
|
|
444
|
+
- voiceover is added in post
|
|
445
|
+
|
|
446
|
+
### portrait format (9:16)
|
|
447
|
+
- flux kontext supports `aspect_ratio: "9:16"`
|
|
448
|
+
- kling supports `aspect_ratio: "9:16"`
|
|
449
|
+
- veo3.1 reference-to-video outputs landscape by default (16:9)
|
|
450
|
+
|
|
451
|
+
### scene continuity
|
|
452
|
+
- keep same character references across all scenes
|
|
453
|
+
- use similar lighting descriptions ("cinematic", "golden hour", "moody blue")
|
|
454
|
+
- match camera style ("slow push in", "static shot")
|
|
455
|
+
|
|
456
|
+
### stitching scenes (simple ffmpeg)
|
|
457
|
+
```bash
|
|
458
|
+
# create file list
|
|
459
|
+
echo "file 'scene1.mp4'" > scenes.txt
|
|
460
|
+
echo "file 'scene2.mp4'" >> scenes.txt
|
|
461
|
+
echo "file 'scene3.mp4'" >> scenes.txt
|
|
462
|
+
|
|
463
|
+
# concatenate
|
|
464
|
+
ffmpeg -f concat -safe 0 -i scenes.txt -c copy combined_scenes.mp4
|
|
465
|
+
```
|
|
466
|
+
|
|
467
|
+
### stitching scenes (remotion - recommended)
|
|
468
|
+
use remotion for more control:
|
|
469
|
+
- reuse same video in multiple scenes (flashbacks)
|
|
470
|
+
- add effects (B&W, jitter for flashbacks)
|
|
471
|
+
- tiktok-style word-by-word captions
|
|
472
|
+
- named sequences visible in studio
|
|
473
|
+
|
|
474
|
+
```typescript
|
|
475
|
+
// separate video file from scene id - allows reuse
|
|
476
|
+
const SCENES = [
|
|
477
|
+
{ id: 1, video: 1, name: "1. Title", start: 0, duration: 3.5 },
|
|
478
|
+
{ id: 7, video: 5, name: "7. Flashback", start: 54.5, duration: 6.5, flashback: true },
|
|
479
|
+
{ id: 12, video: 13, name: "12. Years Pass", start: 120, duration: 15 },
|
|
480
|
+
{ id: 14, video: 13, name: "14. His Love", start: 155, duration: 10 }, // same video!
|
|
481
|
+
];
|
|
482
|
+
```
|
|
483
|
+
|
|
484
|
+
### flashback effect
|
|
485
|
+
for memory/past scenes, add B&W + jitter:
|
|
486
|
+
```typescript
|
|
487
|
+
const flashbackStyle = flashback ? {
|
|
488
|
+
filter: "grayscale(100%) contrast(1.2) brightness(0.9)",
|
|
489
|
+
transform: `translate(${Math.sin(frame * 0.5) * 2}px, ${Math.cos(frame * 0.7) * 1.5}px)`,
|
|
490
|
+
} : {};
|
|
491
|
+
```
|
|
492
|
+
|
|
493
|
+
### video looping (critical!)
|
|
494
|
+
when scene duration > video duration, video freezes on last frame. fix with `<Loop>`:
|
|
495
|
+
|
|
496
|
+
```typescript
|
|
497
|
+
import { Loop } from "remotion";
|
|
498
|
+
|
|
499
|
+
// define actual video file durations (from ffprobe)
|
|
500
|
+
const VIDEO_DURATIONS_SECONDS: Record<number, number> = {
|
|
501
|
+
1: 5.041667,
|
|
502
|
+
2: 10.041667,
|
|
503
|
+
3: 5.041667,
|
|
504
|
+
// ... etc
|
|
505
|
+
};
|
|
506
|
+
|
|
507
|
+
// looping video component
|
|
508
|
+
const LoopingVideo: React.FC<{
|
|
509
|
+
src: string;
|
|
510
|
+
flashback?: boolean;
|
|
511
|
+
loopDurationInFrames: number;
|
|
512
|
+
}> = ({ src, flashback, loopDurationInFrames }) => {
|
|
513
|
+
const frame = useCurrentFrame();
|
|
514
|
+
|
|
515
|
+
const flashbackStyle = flashback ? {
|
|
516
|
+
filter: "grayscale(100%) contrast(1.2) brightness(0.9)",
|
|
517
|
+
transform: `translate(${Math.sin(frame * 0.5) * 2}px, ${Math.cos(frame * 0.7) * 1.5}px)`,
|
|
518
|
+
} : {};
|
|
519
|
+
|
|
520
|
+
return (
|
|
521
|
+
<Loop durationInFrames={loopDurationInFrames}>
|
|
522
|
+
<OffthreadVideo src={src} style={{ ...flashbackStyle }} muted />
|
|
523
|
+
</Loop>
|
|
524
|
+
);
|
|
525
|
+
};
|
|
526
|
+
|
|
527
|
+
// usage: pass video's actual duration, not scene duration
|
|
528
|
+
const loopDuration = VIDEO_DURATIONS_SECONDS[scene.video];
|
|
529
|
+
const loopDurationInFrames = Math.round(loopDuration * fps);
|
|
530
|
+
|
|
531
|
+
<LoopingVideo
|
|
532
|
+
src={staticFile(`scene${scene.video}_video.mp4`)}
|
|
533
|
+
loopDurationInFrames={loopDurationInFrames}
|
|
534
|
+
/>
|
|
535
|
+
```
|
|
536
|
+
|
|
537
|
+
**key insight**: `<Loop durationInFrames>` needs the VIDEO's duration, not the scene's duration. the video loops within the scene's timeframe.
|
|
538
|
+
|
|
539
|
+
### get video durations
|
|
540
|
+
```bash
|
|
541
|
+
# probe all scene videos
|
|
542
|
+
for f in lib/remotion/public/your-project/scene*_video.mp4; do
|
|
543
|
+
echo -n "$(basename $f): "
|
|
544
|
+
ffprobe -v error -show_entries format=duration -of default=noprint_wrappers=1:nokey=1 "$f"
|
|
545
|
+
done
|
|
546
|
+
```
|
|
547
|
+
|
|
548
|
+
## project structure for character videos
|
|
549
|
+
|
|
550
|
+
```
|
|
551
|
+
media/your-project/
|
|
552
|
+
├── post.md # script/text
|
|
553
|
+
├── scene_script.md # scene breakdown with timestamps
|
|
554
|
+
├── characters/
|
|
555
|
+
│ ├── cat_protagonist.png # character reference
|
|
556
|
+
│ ├── cat_first_girl.png
|
|
557
|
+
│ └── cat_second_girl.png
|
|
558
|
+
├── frames/
|
|
559
|
+
│ ├── scene1_frame.jpg # generated scene frames
|
|
560
|
+
│ ├── scene2_frame.jpg
|
|
561
|
+
│ └── ...
|
|
562
|
+
├── scenes/
|
|
563
|
+
│ ├── scene1.mp4 # animated scenes
|
|
564
|
+
│ ├── scene2.mp4
|
|
565
|
+
│ └── ...
|
|
566
|
+
├── voiceover.mp3
|
|
567
|
+
├── captions.srt
|
|
568
|
+
└── final.mp4
|
|
569
|
+
```
|
|
570
|
+
|
|
571
|
+
---
|
|
572
|
+
|
|
573
|
+
## remotion workflow (advanced: beautiful tiktok-style captions)
|
|
574
|
+
|
|
575
|
+
for professional captions with word-by-word highlighting, use remotion instead of ffmpeg:
|
|
576
|
+
|
|
577
|
+
### step 1: copy assets to remotion public folder
|
|
578
|
+
|
|
579
|
+
```bash
|
|
580
|
+
mkdir -p lib/remotion/public/your-project
|
|
581
|
+
cp media/your-project/*.mp4 lib/remotion/public/your-project/
|
|
582
|
+
cp media/your-project/voiceover.mp3 lib/remotion/public/your-project/
|
|
583
|
+
```
|
|
584
|
+
|
|
585
|
+
### step 2: create remotion composition
|
|
586
|
+
|
|
587
|
+
```bash
|
|
588
|
+
bun run lib/remotion/index.ts create YourProject
|
|
589
|
+
```
|
|
590
|
+
|
|
591
|
+
### step 3: edit composition with scene timeline
|
|
592
|
+
|
|
593
|
+
key features:
|
|
594
|
+
- **named sequences** - each scene gets a name visible in Studio
|
|
595
|
+
- **separate video id from scene id** - allows reusing videos (flashbacks, repeated scenes)
|
|
596
|
+
- **flashback effects** - B&W + jitter for memory/past scenes
|
|
597
|
+
- **tiktok captions** - word-by-word highlighting with active word glow
|
|
598
|
+
|
|
599
|
+
example scene structure:
|
|
600
|
+
```typescript
|
|
601
|
+
const SCENES = [
|
|
602
|
+
// protagonist alone on bench at night
|
|
603
|
+
{ id: 1, video: 1, name: "1. Title/Hook", start: 0, duration: 3.5 },
|
|
604
|
+
|
|
605
|
+
// flashback - reuse scene 5 with B&W effect
|
|
606
|
+
{ id: 7, video: 5, name: "7. Flashback", start: 54.5, duration: 6.5, flashback: true },
|
|
607
|
+
|
|
608
|
+
// same video used in multiple places
|
|
609
|
+
{ id: 12, video: 13, name: "12. Years Pass", start: 120, duration: 15 },
|
|
610
|
+
{ id: 14, video: 13, name: "14. His Love", start: 155, duration: 10 },
|
|
611
|
+
];
|
|
612
|
+
```
|
|
613
|
+
|
|
614
|
+
### step 4: preview with remotion studio
|
|
615
|
+
|
|
616
|
+
```bash
|
|
617
|
+
bun remotion studio lib/remotion/compositions/YourProject.root.tsx --public-dir=lib/remotion/public
|
|
618
|
+
```
|
|
619
|
+
|
|
620
|
+
studio features:
|
|
621
|
+
- scrub through timeline
|
|
622
|
+
- see all named sequences
|
|
623
|
+
- debug timing issues
|
|
624
|
+
- preview before rendering
|
|
625
|
+
|
|
626
|
+
### step 5: render
|
|
627
|
+
|
|
628
|
+
```bash
|
|
629
|
+
# quick preview (480p)
|
|
630
|
+
bun remotion render lib/remotion/compositions/YourProject.root.tsx YourProject preview.mp4 --public-dir=lib/remotion/public --scale=0.5
|
|
631
|
+
|
|
632
|
+
# full quality
|
|
633
|
+
bun remotion render lib/remotion/compositions/YourProject.root.tsx YourProject final.mp4 --public-dir=lib/remotion/public
|
|
634
|
+
```
|
|
635
|
+
|
|
636
|
+
### remotion vs ffmpeg
|
|
637
|
+
|
|
638
|
+
| feature | ffmpeg | remotion |
|
|
639
|
+
|---------|--------|----------|
|
|
640
|
+
| basic captions | yes (ASS/SRT) | yes |
|
|
641
|
+
| word-by-word highlight | limited | excellent |
|
|
642
|
+
| flashback effects | complex filters | simple CSS |
|
|
643
|
+
| scene names/debugging | none | Studio UI |
|
|
644
|
+
| reusing videos | manual | easy (video != id) |
|
|
645
|
+
| transitions | complex | built-in |
|
|
646
|
+
| render time | fast | slower |
|
|
647
|
+
|
|
648
|
+
**use ffmpeg** for simple overlays, **use remotion** for polished tiktok-style videos
|
|
649
|
+
|
|
650
|
+
### embedding SRT in composition
|
|
651
|
+
|
|
652
|
+
avoid file loading issues by embedding SRT content directly:
|
|
653
|
+
|
|
654
|
+
```typescript
|
|
655
|
+
// paste SRT content directly in file
|
|
656
|
+
const SRT_CONTENT = `1
|
|
657
|
+
00:00:00,071 --> 00:00:00,291
|
|
658
|
+
How
|
|
659
|
+
|
|
660
|
+
2
|
|
661
|
+
00:00:00,291 --> 00:00:00,351
|
|
662
|
+
a
|
|
663
|
+
...`;
|
|
664
|
+
|
|
665
|
+
// parse once at module level
|
|
666
|
+
const CAPTIONS = parseSRT(SRT_CONTENT);
|
|
667
|
+
```
|
|
668
|
+
|
|
669
|
+
see `lib/remotion/SKILL.md` for full caption implementation
|