varg.ai-sdk 0.1.0 → 0.4.0-alpha.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (236) hide show
  1. package/.claude/settings.local.json +1 -1
  2. package/.env.example +3 -0
  3. package/.github/workflows/ci.yml +23 -0
  4. package/.husky/README.md +102 -0
  5. package/.husky/commit-msg +6 -0
  6. package/.husky/pre-commit +9 -0
  7. package/.husky/pre-push +6 -0
  8. package/.size-limit.json +8 -0
  9. package/.test-hooks.ts +5 -0
  10. package/CLAUDE.md +10 -3
  11. package/CONTRIBUTING.md +150 -0
  12. package/LICENSE.md +53 -0
  13. package/README.md +56 -209
  14. package/SKILLS.md +26 -10
  15. package/biome.json +7 -1
  16. package/bun.lock +1286 -0
  17. package/commitlint.config.js +22 -0
  18. package/docs/index.html +1130 -0
  19. package/docs/prompting.md +326 -0
  20. package/docs/react.md +834 -0
  21. package/docs/sdk.md +812 -0
  22. package/ffmpeg/CLAUDE.md +68 -0
  23. package/package.json +48 -8
  24. package/pipeline/cookbooks/scripts/animate-frames-parallel.ts +84 -0
  25. package/pipeline/cookbooks/scripts/combine-scenes.sh +53 -0
  26. package/pipeline/cookbooks/scripts/generate-frames-parallel.ts +99 -0
  27. package/pipeline/cookbooks/scripts/still-to-video.sh +37 -0
  28. package/pipeline/cookbooks/text-to-tiktok.md +669 -0
  29. package/pipeline/cookbooks/trendwatching.md +156 -0
  30. package/plan.md +281 -0
  31. package/scripts/.gitkeep +0 -0
  32. package/src/ai-sdk/cache.ts +142 -0
  33. package/src/ai-sdk/examples/cached-generation.ts +53 -0
  34. package/src/ai-sdk/examples/duet-scene-4.ts +53 -0
  35. package/src/ai-sdk/examples/duet-scene-5-audio.ts +32 -0
  36. package/src/ai-sdk/examples/duet-video.ts +56 -0
  37. package/src/ai-sdk/examples/editly-composition.ts +63 -0
  38. package/src/ai-sdk/examples/editly-test.ts +57 -0
  39. package/src/ai-sdk/examples/editly-video-test.ts +52 -0
  40. package/src/ai-sdk/examples/fal-lipsync.ts +43 -0
  41. package/src/ai-sdk/examples/higgsfield-image.ts +61 -0
  42. package/src/ai-sdk/examples/music-generation.ts +19 -0
  43. package/src/ai-sdk/examples/openai-sora.ts +34 -0
  44. package/src/ai-sdk/examples/replicate-bg-removal.ts +52 -0
  45. package/src/ai-sdk/examples/simpsons-scene.ts +61 -0
  46. package/src/ai-sdk/examples/talking-lion.ts +55 -0
  47. package/src/ai-sdk/examples/video-generation.ts +39 -0
  48. package/src/ai-sdk/examples/workflow-animated-girl.ts +104 -0
  49. package/src/ai-sdk/examples/workflow-before-after.ts +114 -0
  50. package/src/ai-sdk/examples/workflow-character-grid.ts +112 -0
  51. package/src/ai-sdk/examples/workflow-slideshow.ts +161 -0
  52. package/src/ai-sdk/file-cache.ts +112 -0
  53. package/src/ai-sdk/file.ts +238 -0
  54. package/src/ai-sdk/generate-element.ts +92 -0
  55. package/src/ai-sdk/generate-music.ts +46 -0
  56. package/src/ai-sdk/generate-video.ts +165 -0
  57. package/src/ai-sdk/index.ts +72 -0
  58. package/src/ai-sdk/music-model.ts +110 -0
  59. package/src/ai-sdk/providers/editly/editly.test.ts +1108 -0
  60. package/src/ai-sdk/providers/editly/ffmpeg.ts +60 -0
  61. package/src/ai-sdk/providers/editly/index.ts +817 -0
  62. package/src/ai-sdk/providers/editly/layers.ts +776 -0
  63. package/src/ai-sdk/providers/editly/plan.md +144 -0
  64. package/src/ai-sdk/providers/editly/types.ts +328 -0
  65. package/src/ai-sdk/providers/elevenlabs-provider.ts +255 -0
  66. package/src/ai-sdk/providers/fal-provider.ts +512 -0
  67. package/src/ai-sdk/providers/higgsfield.ts +379 -0
  68. package/src/ai-sdk/providers/openai.ts +251 -0
  69. package/src/ai-sdk/providers/replicate.ts +16 -0
  70. package/src/ai-sdk/video-model.ts +185 -0
  71. package/src/cli/commands/find.tsx +137 -0
  72. package/src/cli/commands/help.tsx +85 -0
  73. package/src/cli/commands/index.ts +6 -0
  74. package/src/cli/commands/list.tsx +238 -0
  75. package/src/cli/commands/render.tsx +71 -0
  76. package/src/cli/commands/run.tsx +511 -0
  77. package/src/cli/commands/which.tsx +253 -0
  78. package/src/cli/index.ts +114 -0
  79. package/src/cli/quiet.ts +44 -0
  80. package/src/cli/types.ts +32 -0
  81. package/src/cli/ui/components/Badge.tsx +29 -0
  82. package/src/cli/ui/components/DataTable.tsx +51 -0
  83. package/src/cli/ui/components/Header.tsx +23 -0
  84. package/src/cli/ui/components/HelpBlock.tsx +44 -0
  85. package/src/cli/ui/components/KeyValue.tsx +33 -0
  86. package/src/cli/ui/components/OptionRow.tsx +81 -0
  87. package/src/cli/ui/components/Separator.tsx +23 -0
  88. package/src/cli/ui/components/StatusBox.tsx +108 -0
  89. package/src/cli/ui/components/VargBox.tsx +51 -0
  90. package/src/cli/ui/components/VargProgress.tsx +36 -0
  91. package/src/cli/ui/components/VargSpinner.tsx +34 -0
  92. package/src/cli/ui/components/VargText.tsx +56 -0
  93. package/src/cli/ui/components/index.ts +19 -0
  94. package/src/cli/ui/index.ts +12 -0
  95. package/src/cli/ui/render.ts +35 -0
  96. package/src/cli/ui/theme.ts +63 -0
  97. package/src/cli/utils.ts +78 -0
  98. package/src/core/executor/executor.ts +201 -0
  99. package/src/core/executor/index.ts +13 -0
  100. package/src/core/executor/job.ts +214 -0
  101. package/src/core/executor/pipeline.ts +222 -0
  102. package/src/core/index.ts +11 -0
  103. package/src/core/registry/index.ts +9 -0
  104. package/src/core/registry/loader.ts +149 -0
  105. package/src/core/registry/registry.ts +221 -0
  106. package/src/core/registry/resolver.ts +206 -0
  107. package/src/core/schema/helpers.ts +134 -0
  108. package/src/core/schema/index.ts +8 -0
  109. package/src/core/schema/shared.ts +102 -0
  110. package/src/core/schema/types.ts +279 -0
  111. package/src/core/schema/validator.ts +92 -0
  112. package/src/definitions/actions/captions.ts +261 -0
  113. package/src/definitions/actions/edit.ts +298 -0
  114. package/src/definitions/actions/image.ts +125 -0
  115. package/src/definitions/actions/index.ts +114 -0
  116. package/src/definitions/actions/music.ts +205 -0
  117. package/src/definitions/actions/sync.ts +128 -0
  118. package/{action/transcribe/index.ts → src/definitions/actions/transcribe.ts} +63 -90
  119. package/src/definitions/actions/upload.ts +111 -0
  120. package/src/definitions/actions/video.ts +163 -0
  121. package/src/definitions/actions/voice.ts +119 -0
  122. package/src/definitions/index.ts +23 -0
  123. package/src/definitions/models/elevenlabs.ts +50 -0
  124. package/src/definitions/models/flux.ts +56 -0
  125. package/src/definitions/models/index.ts +36 -0
  126. package/src/definitions/models/kling.ts +56 -0
  127. package/src/definitions/models/llama.ts +54 -0
  128. package/src/definitions/models/nano-banana-pro.ts +102 -0
  129. package/src/definitions/models/sonauto.ts +68 -0
  130. package/src/definitions/models/soul.ts +65 -0
  131. package/src/definitions/models/wan.ts +54 -0
  132. package/src/definitions/models/whisper.ts +44 -0
  133. package/src/definitions/skills/index.ts +12 -0
  134. package/src/definitions/skills/talking-character.ts +87 -0
  135. package/src/definitions/skills/text-to-tiktok.ts +97 -0
  136. package/src/index.ts +118 -0
  137. package/src/providers/apify.ts +269 -0
  138. package/src/providers/base.ts +264 -0
  139. package/src/providers/elevenlabs.ts +217 -0
  140. package/src/providers/fal.ts +392 -0
  141. package/src/providers/ffmpeg.ts +544 -0
  142. package/src/providers/fireworks.ts +193 -0
  143. package/src/providers/groq.ts +149 -0
  144. package/src/providers/higgsfield.ts +145 -0
  145. package/src/providers/index.ts +143 -0
  146. package/src/providers/replicate.ts +147 -0
  147. package/src/providers/storage.ts +206 -0
  148. package/src/react/cli.ts +52 -0
  149. package/src/react/elements.ts +146 -0
  150. package/src/react/examples/branching.tsx +66 -0
  151. package/src/react/examples/captions-demo.tsx +37 -0
  152. package/src/react/examples/character-video.tsx +84 -0
  153. package/src/react/examples/grid.tsx +53 -0
  154. package/src/react/examples/layouts-demo.tsx +57 -0
  155. package/src/react/examples/madi.tsx +60 -0
  156. package/src/react/examples/music-test.tsx +35 -0
  157. package/src/react/examples/onlyfans-1m/workflow.tsx +88 -0
  158. package/src/react/examples/orange-portrait.tsx +41 -0
  159. package/src/react/examples/split-element-demo.tsx +60 -0
  160. package/src/react/examples/split-layout-demo.tsx +60 -0
  161. package/src/react/examples/split.tsx +41 -0
  162. package/src/react/examples/video-grid.tsx +46 -0
  163. package/src/react/index.ts +43 -0
  164. package/src/react/layouts/grid.tsx +28 -0
  165. package/src/react/layouts/index.ts +2 -0
  166. package/src/react/layouts/split.tsx +20 -0
  167. package/src/react/react.test.ts +309 -0
  168. package/src/react/render.ts +21 -0
  169. package/src/react/renderers/animate.ts +59 -0
  170. package/src/react/renderers/captions.ts +297 -0
  171. package/src/react/renderers/clip.ts +248 -0
  172. package/src/react/renderers/context.ts +17 -0
  173. package/src/react/renderers/image.ts +109 -0
  174. package/src/react/renderers/index.ts +22 -0
  175. package/src/react/renderers/music.ts +60 -0
  176. package/src/react/renderers/packshot.ts +84 -0
  177. package/src/react/renderers/progress.ts +173 -0
  178. package/src/react/renderers/render.ts +243 -0
  179. package/src/react/renderers/slider.ts +69 -0
  180. package/src/react/renderers/speech.ts +53 -0
  181. package/src/react/renderers/split.ts +91 -0
  182. package/src/react/renderers/subtitle.ts +16 -0
  183. package/src/react/renderers/swipe.ts +75 -0
  184. package/src/react/renderers/title.ts +17 -0
  185. package/src/react/renderers/utils.ts +124 -0
  186. package/src/react/renderers/video.ts +127 -0
  187. package/src/react/runtime/jsx-dev-runtime.ts +43 -0
  188. package/src/react/runtime/jsx-runtime.ts +35 -0
  189. package/src/react/types.ts +232 -0
  190. package/src/studio/index.ts +26 -0
  191. package/src/studio/scanner.ts +102 -0
  192. package/src/studio/server.ts +554 -0
  193. package/src/studio/stages.ts +251 -0
  194. package/src/studio/step-renderer.ts +279 -0
  195. package/src/studio/types.ts +60 -0
  196. package/src/studio/ui/cache.html +303 -0
  197. package/src/studio/ui/index.html +1820 -0
  198. package/src/tests/all.test.ts +509 -0
  199. package/src/tests/index.ts +33 -0
  200. package/src/tests/unit.test.ts +403 -0
  201. package/tsconfig.cli.json +8 -0
  202. package/tsconfig.json +21 -3
  203. package/TEST_RESULTS.md +0 -122
  204. package/action/captions/SKILL.md +0 -170
  205. package/action/captions/index.ts +0 -227
  206. package/action/edit/SKILL.md +0 -235
  207. package/action/edit/index.ts +0 -493
  208. package/action/image/SKILL.md +0 -140
  209. package/action/image/index.ts +0 -112
  210. package/action/sync/SKILL.md +0 -136
  211. package/action/sync/index.ts +0 -187
  212. package/action/transcribe/SKILL.md +0 -179
  213. package/action/video/SKILL.md +0 -116
  214. package/action/video/index.ts +0 -135
  215. package/action/voice/SKILL.md +0 -125
  216. package/action/voice/index.ts +0 -201
  217. package/index.ts +0 -38
  218. package/lib/README.md +0 -144
  219. package/lib/ai-sdk/fal.ts +0 -106
  220. package/lib/ai-sdk/replicate.ts +0 -107
  221. package/lib/elevenlabs.ts +0 -382
  222. package/lib/fal.ts +0 -478
  223. package/lib/ffmpeg.ts +0 -467
  224. package/lib/fireworks.ts +0 -235
  225. package/lib/groq.ts +0 -246
  226. package/lib/higgsfield.ts +0 -176
  227. package/lib/remotion/SKILL.md +0 -823
  228. package/lib/remotion/cli.ts +0 -115
  229. package/lib/remotion/functions.ts +0 -283
  230. package/lib/remotion/index.ts +0 -19
  231. package/lib/remotion/templates.ts +0 -73
  232. package/lib/replicate.ts +0 -304
  233. package/output.txt +0 -1
  234. package/test-import.ts +0 -7
  235. package/test-services.ts +0 -97
  236. package/utilities/s3.ts +0 -147
@@ -1,112 +0,0 @@
1
- #!/usr/bin/env bun
2
- /**
3
- * image generation service combining fal and higgsfield
4
- * usage: bun run service/image.ts <command> <args>
5
- */
6
-
7
- import { generateImage } from "../../lib/fal";
8
- import { generateSoul } from "../../lib/higgsfield";
9
- import { uploadFromUrl } from "../../utilities/s3";
10
-
11
- export interface ImageGenerationResult {
12
- imageUrl: string;
13
- uploaded?: string;
14
- }
15
-
16
- export async function generateWithFal(
17
- prompt: string,
18
- options: { model?: string; upload?: boolean } = {},
19
- ): Promise<ImageGenerationResult> {
20
- console.log("[service/image] generating with fal");
21
-
22
- const result = await generateImage({ prompt, model: options.model });
23
-
24
- const imageUrl = result.data?.images?.[0]?.url;
25
- if (!imageUrl) {
26
- throw new Error("no image url in result");
27
- }
28
-
29
- let uploaded: string | undefined;
30
- if (options.upload) {
31
- const timestamp = Date.now();
32
- const objectKey = `images/fal/${timestamp}.png`;
33
- uploaded = await uploadFromUrl(imageUrl, objectKey);
34
- console.log(`[service/image] uploaded to ${uploaded}`);
35
- }
36
-
37
- return { imageUrl, uploaded };
38
- }
39
-
40
- export async function generateWithSoul(
41
- prompt: string,
42
- options: { styleId?: string; upload?: boolean } = {},
43
- ): Promise<ImageGenerationResult> {
44
- console.log("[service/image] generating with higgsfield soul");
45
-
46
- const result = await generateSoul({
47
- prompt,
48
- styleId: options.styleId,
49
- });
50
-
51
- const imageUrl = result.jobs?.[0]?.results?.raw?.url;
52
- if (!imageUrl) {
53
- throw new Error("no image url in result");
54
- }
55
-
56
- let uploaded: string | undefined;
57
- if (options.upload) {
58
- const timestamp = Date.now();
59
- const objectKey = `images/soul/${timestamp}.png`;
60
- uploaded = await uploadFromUrl(imageUrl, objectKey);
61
- console.log(`[service/image] uploaded to ${uploaded}`);
62
- }
63
-
64
- return { imageUrl, uploaded };
65
- }
66
-
67
- // cli runner
68
- if (import.meta.main) {
69
- const [command, ...args] = process.argv.slice(2);
70
-
71
- switch (command) {
72
- case "fal": {
73
- if (!args[0]) {
74
- console.log(`
75
- usage:
76
- bun run service/image.ts fal <prompt> [model] [upload]
77
- `);
78
- process.exit(1);
79
- }
80
- const falResult = await generateWithFal(args[0], {
81
- model: args[1],
82
- upload: args[2] === "true",
83
- });
84
- console.log(JSON.stringify(falResult, null, 2));
85
- break;
86
- }
87
-
88
- case "soul": {
89
- if (!args[0]) {
90
- console.log(`
91
- usage:
92
- bun run service/image.ts soul <prompt> [styleId] [upload]
93
- `);
94
- process.exit(1);
95
- }
96
- const soulResult = await generateWithSoul(args[0], {
97
- styleId: args[1],
98
- upload: args[2] === "true",
99
- });
100
- console.log(JSON.stringify(soulResult, null, 2));
101
- break;
102
- }
103
-
104
- default:
105
- console.log(`
106
- usage:
107
- bun run service/image.ts fal <prompt> [model] [upload]
108
- bun run service/image.ts soul <prompt> [styleId] [upload]
109
- `);
110
- process.exit(1);
111
- }
112
- }
@@ -1,136 +0,0 @@
1
- ---
2
- name: video-lipsync
3
- description: sync video with audio using wav2lip ai model or simple audio overlay. use when creating talking videos, matching lip movements to audio, or combining video with voiceovers.
4
- allowed-tools: Read, Bash
5
- ---
6
-
7
- # video lipsync
8
-
9
- sync video with audio using ai-powered lipsync or simple overlay.
10
-
11
- ## methods
12
-
13
- ### wav2lip (ai-powered)
14
- - uses replicate wav2lip model
15
- - matches lip movements to audio
16
- - works with url inputs
17
- - processing time: 30-60 seconds
18
- - best for: talking character videos
19
-
20
- ### overlay (simple)
21
- - adds audio track to video using ffmpeg
22
- - no lip movement matching
23
- - works with local files
24
- - processing time: instant
25
- - best for: background music, voiceovers
26
-
27
- ## usage
28
-
29
- ### sync with method selection
30
- ```bash
31
- bun run service/sync.ts sync <videoUrl> <audioUrl> [method] [output]
32
- ```
33
-
34
- **parameters:**
35
- - `videoUrl` (required): video file path or url
36
- - `audioUrl` (required): audio file path or url
37
- - `method` (optional): "wav2lip" or "overlay" (default: overlay)
38
- - `output` (optional): output path (default: output-synced.mp4)
39
-
40
- **example:**
41
- ```bash
42
- bun run service/sync.ts sync video.mp4 audio.mp3 overlay output.mp4
43
- ```
44
-
45
- ### wav2lip direct
46
- ```bash
47
- bun run service/sync.ts wav2lip <videoUrl> <audioUrl>
48
- ```
49
-
50
- **example:**
51
- ```bash
52
- bun run service/sync.ts wav2lip https://example.com/character.mp4 https://example.com/voice.mp3
53
- ```
54
-
55
- ### overlay direct
56
- ```bash
57
- bun run service/sync.ts overlay <videoPath> <audioPath> [output]
58
- ```
59
-
60
- **example:**
61
- ```bash
62
- bun run service/sync.ts overlay character.mp4 narration.mp3 final.mp4
63
- ```
64
-
65
- ## as library
66
-
67
- ```typescript
68
- import { lipsync, lipsyncWav2Lip, lipsyncOverlay } from "./service/sync"
69
-
70
- // flexible sync
71
- const result = await lipsync({
72
- videoUrl: "video.mp4",
73
- audioUrl: "audio.mp3",
74
- method: "wav2lip",
75
- output: "synced.mp4"
76
- })
77
-
78
- // wav2lip specific
79
- const lipsynced = await lipsyncWav2Lip({
80
- videoUrl: "https://example.com/video.mp4",
81
- audioUrl: "https://example.com/audio.mp3"
82
- })
83
-
84
- // overlay specific
85
- const overlayed = await lipsyncOverlay(
86
- "video.mp4",
87
- "audio.mp3",
88
- "output.mp4"
89
- )
90
- ```
91
-
92
- ## when to use each method
93
-
94
- ### use wav2lip when:
95
- - creating talking character videos
96
- - lip movements must match speech
97
- - have urls for video and audio
98
- - quality is more important than speed
99
-
100
- ### use overlay when:
101
- - adding background music
102
- - audio doesn't require lip sync
103
- - working with local files
104
- - need instant processing
105
-
106
- ## typical workflow
107
-
108
- 1. generate character image (image service)
109
- 2. animate character (video service)
110
- 3. generate voiceover (voice service)
111
- 4. sync with wav2lip (this service)
112
- 5. add captions (captions service)
113
-
114
- ## tips
115
-
116
- **for wav2lip:**
117
- - use close-up character shots for best results
118
- - ensure audio is clear and well-paced
119
- - video should show face clearly
120
- - works best with 5-10 second clips
121
-
122
- **for overlay:**
123
- - match audio length to video length
124
- - ffmpeg will loop short audio or trim long audio
125
- - preserves original video quality
126
-
127
- ## environment variables
128
-
129
- required (for wav2lip):
130
- - `REPLICATE_API_TOKEN` - for wav2lip model
131
-
132
- no special requirements for overlay method (ffmpeg must be installed)
133
-
134
- ## error handling
135
-
136
- if wav2lip fails, the service automatically falls back to overlay method with a warning message.
@@ -1,187 +0,0 @@
1
- #!/usr/bin/env bun
2
-
3
- /**
4
- * lipsync service - combines video with audio using various methods
5
- * supports wav2lip, synclabs, and simple audio overlay
6
- */
7
-
8
- import { addAudio } from "../../lib/ffmpeg";
9
- import { runModel } from "../../lib/replicate";
10
-
11
- // types
12
- export interface LipsyncOptions {
13
- videoUrl: string;
14
- audioUrl: string;
15
- method?: "wav2lip" | "synclabs" | "overlay";
16
- output?: string;
17
- }
18
-
19
- export interface Wav2LipOptions {
20
- videoUrl: string;
21
- audioUrl: string;
22
- }
23
-
24
- // core functions
25
- export async function lipsync(options: LipsyncOptions) {
26
- const { videoUrl, audioUrl, method = "overlay", output } = options;
27
-
28
- if (!videoUrl || !audioUrl) {
29
- throw new Error("videoUrl and audioUrl are required");
30
- }
31
-
32
- console.log(`[sync] syncing video with audio using ${method}...`);
33
-
34
- switch (method) {
35
- case "wav2lip":
36
- return await lipsyncWav2Lip({ videoUrl, audioUrl });
37
-
38
- case "synclabs":
39
- console.log(
40
- `[sync] synclabs not yet implemented, falling back to overlay`,
41
- );
42
- return await lipsyncOverlay(videoUrl, audioUrl, output);
43
-
44
- case "overlay":
45
- return await lipsyncOverlay(videoUrl, audioUrl, output);
46
-
47
- default:
48
- throw new Error(`unknown lipsync method: ${method}`);
49
- }
50
- }
51
-
52
- export async function lipsyncWav2Lip(options: Wav2LipOptions) {
53
- const { videoUrl, audioUrl } = options;
54
-
55
- console.log(`[sync] using wav2lip model...`);
56
-
57
- try {
58
- const output = await runModel("devxpy/cog-wav2lip", {
59
- face: videoUrl,
60
- audio: audioUrl,
61
- });
62
-
63
- console.log(`[sync] wav2lip completed`);
64
- return output;
65
- } catch (error) {
66
- console.error(`[sync] wav2lip error:`, error);
67
- throw error;
68
- }
69
- }
70
-
71
- export async function lipsyncOverlay(
72
- videoPath: string,
73
- audioPath: string,
74
- output: string = "output-synced.mp4",
75
- ) {
76
- console.log(`[sync] overlaying audio on video...`);
77
-
78
- try {
79
- const result = await addAudio({
80
- videoPath,
81
- audioPath,
82
- output,
83
- });
84
-
85
- console.log(`[sync] overlay completed`);
86
- return result;
87
- } catch (error) {
88
- console.error(`[sync] overlay error:`, error);
89
- throw error;
90
- }
91
- }
92
-
93
- // cli
94
- async function cli() {
95
- const args = process.argv.slice(2);
96
- const command = args[0];
97
-
98
- if (!command || command === "help") {
99
- console.log(`
100
- usage:
101
- bun run service/sync.ts <command> [args]
102
-
103
- commands:
104
- sync <videoUrl> <audioUrl> [method] [output] sync video with audio
105
- wav2lip <videoUrl> <audioUrl> use wav2lip model
106
- overlay <videoPath> <audioPath> [output] simple audio overlay
107
- help show this help
108
-
109
- methods:
110
- wav2lip - ai-powered lipsync using replicate (url inputs)
111
- overlay - simple audio overlay using ffmpeg (local files)
112
-
113
- examples:
114
- bun run service/sync.ts sync video.mp4 audio.mp3 overlay output.mp4
115
- bun run service/sync.ts wav2lip https://example.com/video.mp4 https://example.com/audio.mp3
116
- bun run service/sync.ts overlay video.mp4 audio.mp3 synced.mp4
117
-
118
- environment:
119
- REPLICATE_API_TOKEN - required for wav2lip method
120
- `);
121
- process.exit(0);
122
- }
123
-
124
- try {
125
- switch (command) {
126
- case "sync": {
127
- const videoUrl = args[1];
128
- const audioUrl = args[2];
129
- const method = (args[3] || "overlay") as "wav2lip" | "overlay";
130
- const output = args[4];
131
-
132
- if (!videoUrl || !audioUrl) {
133
- throw new Error("videoUrl and audioUrl are required");
134
- }
135
-
136
- const result = await lipsync({
137
- videoUrl,
138
- audioUrl,
139
- method,
140
- output,
141
- });
142
-
143
- console.log(`[sync] result:`, result);
144
- break;
145
- }
146
-
147
- case "wav2lip": {
148
- const videoUrl = args[1];
149
- const audioUrl = args[2];
150
-
151
- if (!videoUrl || !audioUrl) {
152
- throw new Error("videoUrl and audioUrl are required");
153
- }
154
-
155
- const result = await lipsyncWav2Lip({ videoUrl, audioUrl });
156
- console.log(`[sync] result:`, result);
157
- break;
158
- }
159
-
160
- case "overlay": {
161
- const videoPath = args[1];
162
- const audioPath = args[2];
163
- const output = args[3];
164
-
165
- if (!videoPath || !audioPath) {
166
- throw new Error("videoPath and audioPath are required");
167
- }
168
-
169
- const result = await lipsyncOverlay(videoPath, audioPath, output);
170
- console.log(`[sync] result:`, result);
171
- break;
172
- }
173
-
174
- default:
175
- console.error(`unknown command: ${command}`);
176
- console.log(`run 'bun run service/sync.ts help' for usage`);
177
- process.exit(1);
178
- }
179
- } catch (error) {
180
- console.error(`[sync] error:`, error);
181
- process.exit(1);
182
- }
183
- }
184
-
185
- if (import.meta.main) {
186
- cli();
187
- }
@@ -1,179 +0,0 @@
1
- ---
2
- name: audio-transcription
3
- description: transcribe audio to text or subtitles using groq whisper or fireworks with srt/vtt support. use when converting speech to text, generating subtitles, or need word-level timestamps for captions.
4
- allowed-tools: Read, Bash
5
- ---
6
-
7
- # audio transcription
8
-
9
- convert audio to text or subtitle files using ai transcription.
10
-
11
- ## providers
12
-
13
- ### groq (ultra-fast)
14
- - uses whisper-large-v3
15
- - fastest transcription (~5-10 seconds)
16
- - plain text output
17
- - sentence-level timing
18
- - best for: quick transcripts, text extraction
19
-
20
- ### fireworks (word-level)
21
- - uses whisper-v3
22
- - word-level timestamps
23
- - outputs srt or vtt format
24
- - precise subtitle timing
25
- - best for: captions, subtitles, timed transcripts
26
-
27
- ## usage
28
-
29
- ### basic transcription
30
- ```bash
31
- bun run service/transcribe.ts <audioUrl> <provider> [outputPath]
32
- ```
33
-
34
- **example:**
35
- ```bash
36
- bun run service/transcribe.ts media/audio.mp3 groq
37
- bun run service/transcribe.ts media/audio.mp3 fireworks output.srt
38
- ```
39
-
40
- ### with output format
41
- ```bash
42
- bun run lib/fireworks.ts <audioPath> <outputPath>
43
- ```
44
-
45
- **example:**
46
- ```bash
47
- bun run lib/fireworks.ts media/audio.mp3 output.srt
48
- ```
49
-
50
- ## as library
51
-
52
- ```typescript
53
- import { transcribe } from "./service/transcribe"
54
-
55
- // groq transcription
56
- const groqResult = await transcribe({
57
- audioUrl: "media/audio.mp3",
58
- provider: "groq",
59
- outputFormat: "text"
60
- })
61
- console.log(groqResult.text)
62
-
63
- // fireworks with srt
64
- const fireworksResult = await transcribe({
65
- audioUrl: "media/audio.mp3",
66
- provider: "fireworks",
67
- outputFormat: "srt",
68
- outputPath: "subtitles.srt"
69
- })
70
- console.log(fireworksResult.text)
71
- console.log(fireworksResult.outputPath) // subtitles.srt
72
- ```
73
-
74
- ## output formats
75
-
76
- ### text (groq default)
77
- ```
78
- This is the transcribed text from the audio file.
79
- All words in plain text format.
80
- ```
81
-
82
- ### srt (subtitle format)
83
- ```
84
- 1
85
- 00:00:00,000 --> 00:00:02,500
86
- This is the first subtitle
87
-
88
- 2
89
- 00:00:02,500 --> 00:00:05,000
90
- This is the second subtitle
91
- ```
92
-
93
- ### vtt (web video text tracks)
94
- ```
95
- WEBVTT
96
-
97
- 00:00:00.000 --> 00:00:02.500
98
- This is the first subtitle
99
-
100
- 00:00:02.500 --> 00:00:05.000
101
- This is the second subtitle
102
- ```
103
-
104
- ## when to use
105
-
106
- use this skill when:
107
- - converting speech to text
108
- - generating subtitles for videos
109
- - creating accessible content
110
- - need word-level timing for captions
111
- - extracting dialogue from media
112
- - preparing transcripts for analysis
113
-
114
- ## provider comparison
115
-
116
- | feature | groq | fireworks |
117
- |---------|------|-----------|
118
- | speed | ultra-fast (5-10s) | moderate (15-30s) |
119
- | output | plain text | srt/vtt with timestamps |
120
- | timing | sentence-level | word-level |
121
- | use case | quick transcripts | precise subtitles |
122
-
123
- ## typical workflows
124
-
125
- ### for captions
126
- 1. record or generate audio (voice service)
127
- 2. transcribe with fireworks (this service)
128
- 3. add captions to video (captions service)
129
-
130
- ### for transcripts
131
- 1. extract audio from video
132
- 2. transcribe with groq (this service)
133
- 3. use text for analysis or documentation
134
-
135
- ## tips
136
-
137
- **provider selection:**
138
- - use **groq** when you just need the text fast
139
- - use **fireworks** when you need subtitle files
140
- - use **fireworks** for captions on social media videos
141
-
142
- **audio quality:**
143
- - clear audio transcribes more accurately
144
- - reduce background noise when possible
145
- - supports mp3, wav, m4a, and most audio formats
146
-
147
- **timing accuracy:**
148
- - fireworks provides word-level timestamps
149
- - perfect for lip-sync verification
150
- - great for precise subtitle placement
151
-
152
- ## integration with other services
153
-
154
- perfect companion for:
155
- - **captions service** - auto-generate video subtitles
156
- - **voice service** - transcribe generated speech
157
- - **sync service** - verify audio timing
158
-
159
- ## environment variables
160
-
161
- required:
162
- - `GROQ_API_KEY` - for groq provider
163
- - `FIREWORKS_API_KEY` - for fireworks provider
164
-
165
- ## processing time
166
-
167
- - **groq**: 5-10 seconds (any audio length)
168
- - **fireworks**: 15-30 seconds (depending on audio length)
169
-
170
- ## supported formats
171
-
172
- input audio:
173
- - mp3, wav, m4a, ogg, flac
174
- - video files (extracts audio automatically)
175
-
176
- output formats:
177
- - text (plain text)
178
- - srt (subtitles)
179
- - vtt (web video text tracks)