@vargai/sdk 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. package/.env.example +24 -0
  2. package/CLAUDE.md +118 -0
  3. package/HIGGSFIELD_REWRITE_SUMMARY.md +300 -0
  4. package/README.md +231 -0
  5. package/SKILLS.md +157 -0
  6. package/STRUCTURE.md +92 -0
  7. package/TEST_RESULTS.md +122 -0
  8. package/action/captions/SKILL.md +170 -0
  9. package/action/captions/index.ts +169 -0
  10. package/action/edit/SKILL.md +235 -0
  11. package/action/edit/index.ts +437 -0
  12. package/action/image/SKILL.md +140 -0
  13. package/action/image/index.ts +105 -0
  14. package/action/sync/SKILL.md +136 -0
  15. package/action/sync/index.ts +145 -0
  16. package/action/transcribe/SKILL.md +179 -0
  17. package/action/transcribe/index.ts +210 -0
  18. package/action/video/SKILL.md +116 -0
  19. package/action/video/index.ts +125 -0
  20. package/action/voice/SKILL.md +125 -0
  21. package/action/voice/index.ts +136 -0
  22. package/biome.json +33 -0
  23. package/bun.lock +842 -0
  24. package/cli/commands/find.ts +58 -0
  25. package/cli/commands/help.ts +70 -0
  26. package/cli/commands/list.ts +49 -0
  27. package/cli/commands/run.ts +237 -0
  28. package/cli/commands/which.ts +66 -0
  29. package/cli/discover.ts +66 -0
  30. package/cli/index.ts +33 -0
  31. package/cli/runner.ts +65 -0
  32. package/cli/types.ts +49 -0
  33. package/cli/ui.ts +185 -0
  34. package/index.ts +75 -0
  35. package/lib/README.md +144 -0
  36. package/lib/ai-sdk/fal.ts +106 -0
  37. package/lib/ai-sdk/replicate.ts +107 -0
  38. package/lib/elevenlabs.ts +382 -0
  39. package/lib/fal.ts +467 -0
  40. package/lib/ffmpeg.ts +467 -0
  41. package/lib/fireworks.ts +235 -0
  42. package/lib/groq.ts +246 -0
  43. package/lib/higgsfield/MIGRATION.md +308 -0
  44. package/lib/higgsfield/README.md +273 -0
  45. package/lib/higgsfield/example.ts +228 -0
  46. package/lib/higgsfield/index.ts +241 -0
  47. package/lib/higgsfield/soul.ts +262 -0
  48. package/lib/higgsfield.ts +176 -0
  49. package/lib/remotion/SKILL.md +823 -0
  50. package/lib/remotion/cli.ts +115 -0
  51. package/lib/remotion/functions.ts +283 -0
  52. package/lib/remotion/index.ts +19 -0
  53. package/lib/remotion/templates.ts +73 -0
  54. package/lib/replicate.ts +304 -0
  55. package/output.txt +1 -0
  56. package/package.json +42 -0
  57. package/pipeline/cookbooks/SKILL.md +285 -0
  58. package/pipeline/cookbooks/remotion-video.md +585 -0
  59. package/pipeline/cookbooks/round-video-character.md +337 -0
  60. package/pipeline/cookbooks/talking-character.md +59 -0
  61. package/scripts/produce-menopause-campaign.sh +202 -0
  62. package/service/music/SKILL.md +229 -0
  63. package/service/music/index.ts +296 -0
  64. package/test-import.ts +7 -0
  65. package/test-services.ts +97 -0
  66. package/tsconfig.json +29 -0
  67. package/utilities/s3.ts +147 -0
@@ -0,0 +1,337 @@
1
+ # round video character cookbook
2
+
3
+ create realistic round selfie videos for telegram: front-facing camera POV videos with authentic camera shake, lighting, and audio
4
+
5
+ ## what this does
6
+
7
+ 1. generates 3 first frame options: person in specified setting (conference, station, etc)
8
+ 2. ai picks the best first frame from the 3 options
9
+ 3. generates voiceover from text script
10
+ 4. creates talking video using wan 2.5 with audio sync
11
+
12
+ ## inputs
13
+
14
+ - `text_script`: what the person will say
15
+ - `profile_photo`: photo of the person (e.g., media/friend/katia.jpg)
16
+ - `scene_location`: where they are (from script or default: conference/underground station)
17
+
18
+ ## steps
19
+
20
+ ### step 1: generate first frame options (person in setting)
21
+
22
+ generate 3 variations and let ai pick the best one:
23
+
24
+ ```bash
25
+ # generate 3 SELFIE-STYLE first frame options using nano banana pro
26
+ # CRITICAL: use the proven prompt structure below
27
+ # aspect_ratio "auto" preserves the original photo's aspect ratio (portrait/landscape)
28
+
29
+ # option 1
30
+ bun run lib/fal.ts image_to_image \
31
+ "selfie POV, camera with subtle natural wobble and shake throughout, focus on subject with shallow depth of field, dramatic low-light environment with intense magenta and hot pink lighting creating strong color cast, ambient blue lights scattered in background, dark indoor busy setting, abstract out-of-focus colorful lights creating busy background, location: hackathon space, wear black hoodie without any text on it" \
32
+ media/friend/katia.jpg \
33
+ auto
34
+
35
+ # option 2
36
+ bun run lib/fal.ts image_to_image \
37
+ "selfie POV, camera with subtle natural wobble and shake throughout, focus on subject with shallow depth of field, dramatic low-light environment with intense magenta and hot pink lighting creating strong color cast, ambient blue lights scattered in background, dark indoor busy setting, abstract out-of-focus colorful lights creating busy background, location: hackathon space, wear black hoodie without any text on it" \
38
+ media/friend/katia.jpg \
39
+ auto
40
+
41
+ # option 3
42
+ bun run lib/fal.ts image_to_image \
43
+ "selfie POV, camera with subtle natural wobble and shake throughout, focus on subject with shallow depth of field, dramatic low-light environment with intense magenta and hot pink lighting creating strong color cast, ambient blue lights scattered in background, dark indoor busy setting, abstract out-of-focus colorful lights creating busy background, location: hackathon space, wear black hoodie without any text on it" \
44
+ media/friend/katia.jpg \
45
+ auto
46
+ ```
47
+
48
+ **important prompting for selfie style (image-to-image):**
49
+ - start with "selfie POV" - simple and effective
50
+ - include "camera with subtle natural wobble and shake throughout"
51
+ - specify "focus on subject with shallow depth of field"
52
+ - lighting: "dramatic low-light environment with intense magenta and hot pink lighting creating strong color cast"
53
+ - background: "ambient blue lights scattered in background, dark indoor busy setting, abstract out-of-focus colorful lights"
54
+ - clothing: "wear black hoodie without any text on it" (or specify other clothing)
55
+ - location: flexible - adjust based on script (hackathon space, metro station, office, etc.)
56
+ - aspect ratio "auto" preserves original dimensions - critical for avoiding squashed/stretched video!
57
+
58
+ each command outputs a URL like: `https://v3b.fal.media/files/.../image.jpg`
59
+
60
+ download all 3 options:
61
+ ```bash
62
+ curl -o media/friend/option1.jpg "https://url-from-option-1.jpg"
63
+ curl -o media/friend/option2.jpg "https://url-from-option-2.jpg"
64
+ curl -o media/friend/option3.jpg "https://url-from-option-3.jpg"
65
+ ```
66
+
67
+ **ai should review the 3 options and pick the best one based on:**
68
+ - face quality and recognition
69
+ - natural selfie look
70
+ - lighting and color balance
71
+ - background blur and composition
72
+ - overall authenticity
73
+
74
+ use the selected image url for step 4 (wan 2.5)
75
+
76
+ ### step 2: generate voiceover
77
+
78
+ ```bash
79
+ # generate voice from script
80
+ # save to media/friend/[name]/voice.mp3 for organization
81
+ bun run lib/elevenlabs.ts tts \
82
+ "hey everyone! excited to share this update from the conference" \
83
+ rachel \
84
+ media/friend/katia/voice.mp3
85
+ ```
86
+
87
+ the audio is saved to `media/friend/[name]/voice.mp3`. you'll need to upload this to get a url for wan 2.5.
88
+
89
+ ### step 3: generate talking video with wan 2.5 (via fal)
90
+
91
+ **important: audio must be at least 3 seconds long!**
92
+
93
+ fal's wan-25 endpoint requires audio duration of 3+ seconds. if your script is too short, extend it.
94
+
95
+ ```bash
96
+ # use fal's wan-25 endpoint (supports local files and urls)
97
+ # audio and image files will be auto-uploaded if local paths are provided
98
+ # duration MUST be 5 or 10 seconds only
99
+ bun run lib/fal.ts wan \
100
+ media/friend/katia/option2.jpg \
101
+ media/friend/katia/voice.mp3 \
102
+ "front-facing camera selfie POV video, handheld phone directly in front of face with continuous slight wobble and shake, subject in sharp focus with softly blurred background shallow depth of field, dramatic low-light scene with intense magenta hot pink light illuminating face and blue ambient lights in blurred background, dark indoor busy conference setting with abstract out-of-focus lights, conversational audio with muffled background crowd chatter and commotion" \
103
+ 10 \
104
+ 480p
105
+ ```
106
+
107
+ **command structure:**
108
+ ```bash
109
+ bun run lib/fal.ts wan <image_path_or_url> <audio_path_or_url> <prompt> [duration] [resolution]
110
+ ```
111
+
112
+ **parameters:**
113
+ - image: local path or url (auto-uploaded if local)
114
+ - audio: local path or url (auto-uploaded if local, **must be 3+ seconds**)
115
+ - prompt: detailed video style description
116
+ - duration: 5 or 10 (default: 5)
117
+ - resolution: 480p, 720p, or 1080p (default: 480p)
118
+
119
+ **detailed prompt structure for realistic selfie videos:**
120
+
121
+ the prompt should include ALL these elements for maximum authenticity:
122
+
123
+ **camera technique:**
124
+ - "front-facing camera selfie POV video"
125
+ - "handheld phone directly in front of face"
126
+ - "continuous slight wobble and shake"
127
+
128
+ **focus & composition:**
129
+ - "subject in sharp focus"
130
+ - "softly blurred background shallow depth of field"
131
+
132
+ **lighting:**
133
+ - "dramatic low-light scene"
134
+ - "intense magenta hot pink light illuminating face" (or specify your lighting color)
135
+ - "blue ambient lights in blurred background" (optional, for busy settings)
136
+
137
+ **setting:**
138
+ - "dark indoor busy setting with abstract out-of-focus lights" (adjust based on location)
139
+
140
+ **audio characteristics:**
141
+ - "conversational audio with muffled background crowd chatter and commotion"
142
+
143
+ this comprehensive prompting creates videos that look like authentic, quickly-recorded selfie messages with realistic imperfections.
144
+
145
+ this takes 2-4 minutes. the command will wait for completion and output the video url.
146
+
147
+ ### step 4: download result
148
+
149
+ ```bash
150
+ # fal wan-25 returns video url like: https://v3b.fal.media/files/.../video.mp4
151
+ curl -o media/friend/talking-character.mp4 "https://v3b.fal.media/files/.../video.mp4"
152
+ ```
153
+
154
+ ## output
155
+
156
+ - first frame options: 3 variations (jpg) - `media/friend/[name]/option1.jpg`, `option2.jpg`, `option3.jpg`
157
+ - selected first frame: best option chosen by ai
158
+ - voiceover: `media/friend/[name]/voice.mp3`
159
+ - final video: `media/friend/[name]/talking-character.mp4`
160
+
161
+ ## timing
162
+
163
+ - first frame generation: 15-30s (3 options)
164
+ - ai selection: instant
165
+ - voiceover: 5-10s
166
+ - wan 2.5 processing (fal): 2-4min
167
+
168
+ **total: ~3-5 minutes**
169
+
170
+ ## scene context examples
171
+
172
+ choose setting based on script context. always include handheld camera description for authentic look:
173
+
174
+ | script mentions | step 1: first frame prompt | wan 2.5 prompt (detailed style) |
175
+ |----------------|---------------------------|--------------------------------|
176
+ | "at the conference" / "hackathon" | selfie POV, camera with subtle natural wobble and shake throughout, focus on subject with shallow depth of field, dramatic low-light environment with intense magenta and hot pink lighting creating strong color cast, ambient blue lights scattered in background, dark indoor busy setting, abstract out-of-focus colorful lights creating busy background, location: hackathon space, wear black hoodie without any text on it | front-facing camera selfie POV video, handheld phone directly in front of face with continuous slight wobble, subject in sharp focus with softly blurred background, dramatic low-light with intense magenta hot pink light illuminating face and blue ambient lights in blurred background, dark indoor busy conference setting with abstract out-of-focus lights, conversational audio with muffled background crowd chatter |
177
+ | "subway" / "metro" | selfie POV, camera with subtle natural wobble and shake throughout, focus on subject with shallow depth of field, dramatic low-light environment with harsh fluorescent lighting, ambient lights scattered in background, dark underground station setting, abstract out-of-focus lights, location: metro station, wear black hoodie without any text on it | front-facing camera selfie POV video, handheld phone with slight shake, sharp focus on subject with blurred metro background, harsh fluorescent lighting with cool tones, dark underground station with out-of-focus commuters and lights, audio with echoing background noise and distant train sounds |
178
+ | "office" | selfie POV, camera with subtle natural wobble and shake throughout, focus on subject with shallow depth of field, soft indoor office lighting environment, ambient lights in background, modern workspace setting, abstract out-of-focus monitors and lights, location: office, wear black hoodie without any text on it | front-facing camera selfie POV video, handheld phone wobble, sharp subject focus with blurred office background, soft indoor office lighting, modern workspace with blurred monitors and colleagues in background, conversational audio with quiet office ambient noise |
179
+ | "street" | selfie POV, camera with subtle natural wobble and shake throughout, focus on subject with shallow depth of field, natural daylight or street lighting environment, ambient lights in background, urban street setting, abstract out-of-focus pedestrians and lights, location: city street, wear black hoodie without any text on it | front-facing camera selfie POV video, handheld shake, sharp focus with blurred street background, natural daylight or street lighting, urban setting with out-of-focus pedestrians and traffic, audio with street noise and distant traffic sounds |
180
+ | no location | selfie POV, camera with subtle natural wobble and shake throughout, focus on subject with shallow depth of field, dramatic low-light environment with intense magenta and hot pink lighting creating strong color cast, ambient blue lights scattered in background, dark indoor busy setting, abstract out-of-focus colorful lights, wear black hoodie without any text on it | front-facing camera selfie POV video, handheld phone with slight wobble, sharp subject with softly blurred background, natural indoor lighting, casual indoor setting, conversational audio (default) |
181
+
182
+ **key phrases for authentic selfie look:**
183
+
184
+ **step 1 (first frame - image-to-image):**
185
+
186
+ proven prompt structure (adjust location only):
187
+ ```
188
+ selfie POV, camera with subtle natural wobble and shake throughout, focus on subject with shallow depth of field, dramatic low-light environment with intense magenta and hot pink lighting creating strong color cast, ambient blue lights scattered in background, dark indoor busy setting, abstract out-of-focus colorful lights creating busy background, location: [hackathon space/metro station/office/city street], wear black hoodie without any text on it
189
+ ```
190
+
191
+ - start with "selfie POV" - simple, no zoom confusion
192
+ - "camera with subtle natural wobble and shake throughout" - natural movement
193
+ - "focus on subject with shallow depth of field" - proper framing
194
+ - lighting: magenta/hot pink with blue ambient (adjust per setting)
195
+ - location: flexible - change based on script
196
+ - clothing: black hoodie without text (or adjust as needed)
197
+
198
+ **step 4 (wan 2.5) - comprehensive style elements:**
199
+
200
+ *camera technique:*
201
+ - "front-facing camera selfie POV video"
202
+ - "handheld phone directly in front of face"
203
+ - "continuous slight wobble and shake"
204
+
205
+ *focus & depth:*
206
+ - "subject in sharp focus"
207
+ - "softly blurred background"
208
+ - "shallow depth of field"
209
+
210
+ *lighting:*
211
+ - "dramatic low-light scene"
212
+ - "intense magenta hot pink light illuminating face" (adjust color per setting)
213
+ - "blue ambient lights in blurred background" (optional)
214
+
215
+ *setting:*
216
+ - "dark indoor busy setting"
217
+ - "abstract out-of-focus lights"
218
+ - adjust per location (conference/metro/office/street)
219
+
220
+ *audio:*
221
+ - "conversational audio with muffled background crowd chatter and commotion"
222
+ - adjust per setting (metro=echoing/train sounds, office=quiet ambient, street=traffic)
223
+
224
+ ## example: full workflow
225
+
226
+ ```bash
227
+ # scenario: katia sharing conference update
228
+ # script: "hey everyone! i'm so excited to share this amazing update with you from the conference today"
229
+ # photo: media/friend/katia.jpg
230
+ # note: audio must be 3+ seconds long for wan-25!
231
+
232
+ # step 1: generate 3 SELFIE first frame options with nano banana pro
233
+ bun run lib/fal.ts image_to_image \
234
+ "selfie POV, camera with subtle natural wobble and shake throughout, focus on subject with shallow depth of field, dramatic low-light environment with intense magenta and hot pink lighting creating strong color cast, ambient blue lights scattered in background, dark indoor busy setting, abstract out-of-focus colorful lights creating busy background, location: hackathon space, wear black hoodie without any text on it" \
235
+ media/friend/katia.jpg \
236
+ auto
237
+ # output 1: https://v3b.fal.media/files/.../option1.png
238
+
239
+ bun run lib/fal.ts image_to_image \
240
+ "selfie POV, camera with subtle natural wobble and shake throughout, focus on subject with shallow depth of field, dramatic low-light environment with intense magenta and hot pink lighting creating strong color cast, ambient blue lights scattered in background, dark indoor busy setting, abstract out-of-focus colorful lights creating busy background, location: hackathon space, wear black hoodie without any text on it" \
241
+ media/friend/katia.jpg \
242
+ auto
243
+ # output 2: https://v3b.fal.media/files/.../option2.png
244
+
245
+ bun run lib/fal.ts image_to_image \
246
+ "selfie POV, camera with subtle natural wobble and shake throughout, focus on subject with shallow depth of field, dramatic low-light environment with intense magenta and hot pink lighting creating strong color cast, ambient blue lights scattered in background, dark indoor busy setting, abstract out-of-focus colorful lights creating busy background, location: hackathon space, wear black hoodie without any text on it" \
247
+ media/friend/katia.jpg \
248
+ auto
249
+ # output 3: https://v3b.fal.media/files/.../option3.png
250
+
251
+ # download all 3 options
252
+ curl -o media/friend/katia/option1.jpg "https://v3b.fal.media/files/.../option1.png"
253
+ curl -o media/friend/katia/option2.jpg "https://v3b.fal.media/files/.../option2.png"
254
+ curl -o media/friend/katia/option3.jpg "https://v3b.fal.media/files/.../option3.png"
255
+
256
+ # ai reviews the 3 options and picks the best one based on:
257
+ # - face quality and recognition
258
+ # - natural selfie look
259
+ # - lighting and color balance
260
+ # - background blur and composition
261
+ # - overall authenticity
262
+ # selected: option2 (example)
263
+
264
+ # step 2: generate voice (ensure 3+ seconds for wan-25)
265
+ bun run lib/elevenlabs.ts tts \
266
+ "hey everyone! i'm so excited to share this amazing update with you from the conference today" \
267
+ rachel \
268
+ media/friend/katia/voice.mp3
269
+ # output: media/friend/katia/voice.mp3
270
+
271
+ # step 3: run wan-25 (fal) - auto-uploads local files
272
+ bun run lib/fal.ts wan \
273
+ media/friend/katia/option2.jpg \
274
+ media/friend/katia/voice.mp3 \
275
+ "front-facing camera selfie POV video, handheld phone directly in front of face with continuous slight wobble and shake, subject in sharp focus with softly blurred background shallow depth of field, dramatic low-light scene with intense magenta hot pink light illuminating face and blue ambient lights in blurred background, dark indoor busy conference setting with abstract out-of-focus lights, conversational audio with muffled background crowd chatter and commotion" \
276
+ 10 \
277
+ 480p
278
+ # takes 2-4 minutes...
279
+ # output: { "data": { "video": { "url": "https://v3b.fal.media/files/.../video.mp4" } } }
280
+
281
+ # step 4: download
282
+ curl -o media/friend/katia-talking.mp4 "https://v3b.fal.media/files/.../video.mp4"
283
+ ```
284
+
285
+ **tested successfully** with katia.jpg and aleks - see media/friend/ for example outputs!
286
+
287
+ ## tips
288
+
289
+ - **selfie perspective**: CRITICAL - always use "selfie POV" in step 1 first frame generation!
290
+ - **audio duration**: CRITICAL - wan-25 requires audio to be at least 3 seconds long. extend short scripts!
291
+ - **duration constraint**: wan-25 only accepts 5 or 10 second videos
292
+ - **script length**: ensure script is at least 3 seconds when spoken, max 10 seconds
293
+ - **aspect ratio preservation**: CRITICAL - always use "auto" aspect ratio in image-to-image to avoid squashed/stretched videos!
294
+ - **nano banana pro**: uses aspect_ratio="auto" to preserve original photo dimensions (portrait/landscape)
295
+ - **local file support**: fal wan command auto-uploads local files - no need for manual upload step!
296
+ - **handheld camera**: always include "handheld phone" + "wobble and shake" in wan-25 prompt for authentic look
297
+ - **first frame quality**: this is the base - make it look natural and selfie-like!
298
+ - **scene matching**: extract location from script when mentioned
299
+ - **voice selection**: rachel (default) is clear and professional
300
+ - **resolution**: 480p is faster (2-3min), 720p/1080p takes longer (4-5min)
301
+ - **save intermediates**: store outputs in media/friend/[name]/ for organization and reuse
302
+ - **using fal instead of replicate**: fal's wan-25 endpoint is faster and more reliable than replicate
303
+
304
+ ## voice options
305
+
306
+ ```bash
307
+ # female voices (american english)
308
+ bun run lib/elevenlabs.ts tts "script" rachel media/friend/[name]/voice.mp3
309
+ bun run lib/elevenlabs.ts tts "script" bella media/friend/[name]/voice.mp3
310
+ bun run lib/elevenlabs.ts tts "script" elli media/friend/[name]/voice.mp3
311
+
312
+ # male voices (american english)
313
+ bun run lib/elevenlabs.ts tts "script" antoni media/friend/[name]/voice.mp3
314
+ bun run lib/elevenlabs.ts tts "script" josh media/friend/[name]/voice.mp3
315
+ ```
316
+
317
+ see all voices: `bun run lib/elevenlabs.ts voices`
318
+
319
+ ## environment setup
320
+
321
+ ```bash
322
+ # required api keys
323
+ export ELEVENLABS_API_KEY="your_key"
324
+ export FAL_KEY="your_key" # for wan-25 and image generation
325
+ ```
326
+
327
+ ## changelog
328
+
329
+ **2024-11-22:**
330
+ - switched to fal's wan-25-preview endpoint (faster, more reliable than replicate)
331
+ - added wan-25 support to lib/fal.ts with auto-upload for local files
332
+ - discovered: audio must be at least 3 seconds long for wan-25 (critical!)
333
+ - simplified workflow: no manual audio upload step needed
334
+ - tested successfully with aleks photo and "give me money" script
335
+ - switched from flux to nano banana pro for image-to-image (better aspect ratio preservation)
336
+ - fixed squashed video issue by using aspect_ratio="auto"
337
+ - clarified duration constraints (5 or 10 seconds only)
@@ -0,0 +1,59 @@
1
+ # talking character pipeline
2
+
3
+ create a talking character video with lipsync and captions
4
+
5
+ ## steps
6
+
7
+ ### 1. create character headshot
8
+ ```bash
9
+ # generate character using higgsfield soul
10
+ bun run service/image.ts soul "professional headshot of a friendly person, studio lighting" true
11
+ ```
12
+
13
+ ### 2. generate voiceover
14
+ ```bash
15
+ # use fal voice synthesis
16
+ bun run lib/fal.ts generate_speech "hello world, this is my voice" true
17
+ ```
18
+
19
+ ### 3. animate character
20
+ ```bash
21
+ # image-to-video with character talking
22
+ bun run service/video.ts from_image "person talking naturally, professional demeanor" <headshot_url> 5 true
23
+ ```
24
+
25
+ ### 4. add lipsync
26
+ ```bash
27
+ # sync lips with voiceover
28
+ bun run service/sync.ts overlay <video_url> <audio_url> output.mp4
29
+ ```
30
+
31
+ ### 5. add captions
32
+ ```bash
33
+ # add auto-generated captions with transcription
34
+ bun run service/captions.ts output.mp4 captioned.mp4 --provider fireworks
35
+ ```
36
+
37
+ ### 6. prepare for social media
38
+ ```bash
39
+ # resize and optimize for tiktok/instagram
40
+ bun run service/edit.ts social captioned.mp4 final-tiktok.mp4 tiktok
41
+ ```
42
+
43
+ ## expected output
44
+ - character headshot (png)
45
+ - voiceover audio (mp3)
46
+ - animated video (mp4)
47
+ - lipsynced video (mp4)
48
+ - captioned video (mp4)
49
+ - final social media ready video (mp4)
50
+
51
+ ## estimated time
52
+ - headshot: 30s
53
+ - voiceover: 10s
54
+ - animation: 2-3min
55
+ - lipsync: 30s
56
+ - captions: 15s (includes transcription)
57
+ - social prep: 5s
58
+
59
+ total: ~4-5min
@@ -0,0 +1,202 @@
1
+ #!/bin/bash
2
+ # Automated production script for 15 Menopause Diet campaign videos
3
+ # Prerequisites:
4
+ # - All character images generated (✅ done)
5
+ # - Voiceover generated (✅ done)
6
+ # - Screencast video file at media/funnel-screencast.mp4
7
+ # - Background music at media/background-music.mp3 (optional)
8
+ # - FAL API credentials configured
9
+
10
+ set -e # Exit on error
11
+
12
+ echo "🎬 Starting Menopause Campaign Video Production"
13
+ echo "==============================================="
14
+
15
+ # Load asset data
16
+ ASSETS_FILE="media/menopause-campaign-assets.json"
17
+ VOICE_URL="http://s3.varg.ai/varg/voice/1763782542061-rachel.mp3"
18
+ VOICE_LOCAL="media/voice-1763782540364.mp3"
19
+
20
+ # Create output directories
21
+ mkdir -p media/campaign/{animated,synced,captioned,hooks,final}
22
+
23
+ echo ""
24
+ echo "📋 Step 0: Generate SRT from voiceover"
25
+ echo "--------------------------------------"
26
+ if [ ! -f "media/voice-1763782540364.srt" ]; then
27
+ bun run service/transcribe/index.ts \
28
+ "$VOICE_LOCAL" \
29
+ fireworks \
30
+ media/voice-1763782540364.srt
31
+ echo "✅ SRT file generated"
32
+ else
33
+ echo "⏭️ SRT file already exists"
34
+ fi
35
+
36
+ echo ""
37
+ echo "🎨 Step 1: Animate all 15 character images"
38
+ echo "-------------------------------------------"
39
+ for i in {0..14}; do
40
+ IMAGE_URL=$(jq -r ".characters[$i].imageUrl" "$ASSETS_FILE")
41
+ PROFESSION=$(jq -r ".characters[$i].profession" "$ASSETS_FILE" | tr ' ' '-')
42
+ OUTPUT_NUM=$((i+1))
43
+
44
+ echo "Animating $PROFESSION ($OUTPUT_NUM/15)..."
45
+
46
+ # Run animation
47
+ bun run service/video/index.ts from_image \
48
+ "person talking naturally to camera, professional demeanor" \
49
+ "$IMAGE_URL" \
50
+ 5 \
51
+ true \
52
+ > "media/campaign/animated/result-$OUTPUT_NUM.json"
53
+
54
+ # Extract video URL from result
55
+ VIDEO_URL=$(jq -r '.uploaded' "media/campaign/animated/result-$OUTPUT_NUM.json")
56
+ echo "$VIDEO_URL" >> media/campaign/animated/video-urls.txt
57
+
58
+ echo "✅ Animated: $PROFESSION"
59
+ done
60
+
61
+ echo ""
62
+ echo "🎤 Step 2: Lipsync videos with voiceover"
63
+ echo "-----------------------------------------"
64
+ line_num=1
65
+ while IFS= read -r video_url; do
66
+ echo "Lipsyncing video $line_num/15..."
67
+
68
+ bun run service/sync/index.ts wav2lip \
69
+ "$video_url" \
70
+ "$VOICE_URL" \
71
+ > "media/campaign/synced/result-$line_num.json"
72
+
73
+ # Download the synced video
74
+ SYNCED_URL=$(jq -r '.videoUrl' "media/campaign/synced/result-$line_num.json")
75
+ curl -o "media/campaign/synced/video-$line_num.mp4" "$SYNCED_URL"
76
+
77
+ echo "✅ Lipsynced video $line_num"
78
+ ((line_num++))
79
+ done < media/campaign/animated/video-urls.txt
80
+
81
+ echo ""
82
+ echo "📝 Step 3: Add dynamic captions"
83
+ echo "--------------------------------"
84
+ for i in {1..15}; do
85
+ echo "Adding captions to video $i/15..."
86
+
87
+ bun run service/captions/index.ts \
88
+ "media/campaign/synced/video-$i.mp4" \
89
+ "media/campaign/captioned/video-$i.mp4" \
90
+ --srt media/voice-1763782540364.srt \
91
+ --font "Arial Black" \
92
+ --size 32 \
93
+ --color "&HFFFFFF"
94
+
95
+ echo "✅ Captioned video $i"
96
+ done
97
+
98
+ echo ""
99
+ echo "🏷️ Step 4: Add subtitle and disclaimer overlays"
100
+ echo "------------------------------------------------"
101
+ for i in {1..15}; do
102
+ echo "Adding overlays to video $i/15..."
103
+
104
+ # Add title "My Menopause weight loss" at top
105
+ # Add subtitle "Scientifically designed for women 40+" near bottom
106
+ # Add disclaimer at bottom
107
+ ffmpeg -i "media/campaign/captioned/video-$i.mp4" \
108
+ -vf "[in]drawtext=fontfile=/System/Library/Fonts/Supplemental/Arial.ttf:text='My Menopause weight loss':fontcolor=white:fontsize=42:x=(w-text_w)/2:y=60:box=1:boxcolor=black@0.7:boxborderw=10:borderw=2:bordercolor=white,\
109
+ drawtext=fontfile=/System/Library/Fonts/Supplemental/Arial.ttf:text='Scientifically designed for women 40+':fontcolor=white:fontsize=16:x=(w-text_w)/2:y=h-100:alpha=0.9,\
110
+ drawtext=fontfile=/System/Library/Fonts/Supplemental/Arial.ttf:text='Results may vary. Always consult your doctor before starting any diet program':fontcolor=white:fontsize=12:x=(w-text_w)/2:y=h-60:alpha=0.8[out]" \
111
+ -codec:a copy \
112
+ -y \
113
+ "media/campaign/hooks/hook-$i.mp4"
114
+
115
+ echo "✅ Added overlays to video $i"
116
+ done
117
+
118
+ echo ""
119
+ echo "🎞️ Step 5: Prepare screencast with music"
120
+ echo "-----------------------------------------"
121
+ if [ ! -f "media/funnel-screencast.mp4" ]; then
122
+ echo "❌ ERROR: media/funnel-screencast.mp4 not found!"
123
+ echo " Please provide the menopause funnel screencast."
124
+ exit 1
125
+ fi
126
+
127
+ if [ -f "media/background-music.mp3" ]; then
128
+ echo "Adding music to screencast..."
129
+ ffmpeg -i media/funnel-screencast.mp4 \
130
+ -i media/background-music.mp3 \
131
+ -c:v copy \
132
+ -map 0:v:0 \
133
+ -map 1:a:0 \
134
+ -shortest \
135
+ -y \
136
+ media/campaign/funnel-with-music.mp4
137
+ echo "✅ Music added to screencast"
138
+ else
139
+ echo "⚠️ No background music found, using screencast as-is"
140
+ cp media/funnel-screencast.mp4 media/campaign/funnel-with-music.mp4
141
+ fi
142
+
143
+ echo ""
144
+ echo "🔗 Step 6: Merge hooks with screencast"
145
+ echo "---------------------------------------"
146
+ for i in {1..15}; do
147
+ echo "Merging video $i/15..."
148
+
149
+ # Create concat file
150
+ echo "file '../../hooks/hook-$i.mp4'" > "media/campaign/concat-$i.txt"
151
+ echo "file '../funnel-with-music.mp4'" >> "media/campaign/concat-$i.txt"
152
+
153
+ # Merge
154
+ ffmpeg -f concat -safe 0 -i "media/campaign/concat-$i.txt" \
155
+ -c copy \
156
+ -y \
157
+ "media/campaign/merged-$i.mp4"
158
+
159
+ echo "✅ Merged video $i"
160
+ done
161
+
162
+ echo ""
163
+ echo "📱 Step 7: Optimize for social media (TikTok vertical)"
164
+ echo "-------------------------------------------------------"
165
+ for i in {0..14}; do
166
+ VIDEO_NUM=$((i+1))
167
+ PROFESSION=$(jq -r ".characters[$i].profession" "$ASSETS_FILE" | tr ' ' '-')
168
+
169
+ echo "Optimizing $PROFESSION ($VIDEO_NUM/15)..."
170
+
171
+ bun run service/edit/index.ts social \
172
+ "media/campaign/merged-$VIDEO_NUM.mp4" \
173
+ "media/campaign/final/$PROFESSION-tiktok.mp4" \
174
+ tiktok
175
+
176
+ echo "✅ Final video: $PROFESSION"
177
+ done
178
+
179
+ echo ""
180
+ echo "✨ PRODUCTION COMPLETE!"
181
+ echo "======================"
182
+ echo ""
183
+ echo "📊 Summary:"
184
+ echo " • 15 character images ✅"
185
+ echo " • 1 voiceover audio ✅"
186
+ echo " • 15 animated videos ✅"
187
+ echo " • 15 lipsynced videos ✅"
188
+ echo " • 15 captioned videos ✅"
189
+ echo " • 15 final videos with overlays ✅"
190
+ echo ""
191
+ echo "📁 Final videos location: media/campaign/final/"
192
+ echo ""
193
+ echo "🎯 Next steps:"
194
+ echo " 1. Review all 15 videos"
195
+ echo " 2. Test on target platforms"
196
+ echo " 3. Upload to ad platform"
197
+ echo " 4. Start A/B testing!"
198
+ echo ""
199
+
200
+ # List all final files
201
+ echo "📋 Final video files:"
202
+ ls -lh media/campaign/final/