varg.ai-sdk 0.1.1 → 0.4.0-alpha.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/settings.local.json +1 -1
- package/.env.example +3 -0
- package/.github/workflows/ci.yml +23 -0
- package/.husky/README.md +102 -0
- package/.husky/commit-msg +6 -0
- package/.husky/pre-commit +9 -0
- package/.husky/pre-push +6 -0
- package/.size-limit.json +8 -0
- package/.test-hooks.ts +5 -0
- package/CLAUDE.md +10 -3
- package/CONTRIBUTING.md +150 -0
- package/LICENSE.md +53 -0
- package/README.md +56 -209
- package/SKILLS.md +26 -10
- package/biome.json +7 -1
- package/bun.lock +1286 -0
- package/commitlint.config.js +22 -0
- package/docs/index.html +1130 -0
- package/docs/prompting.md +326 -0
- package/docs/react.md +834 -0
- package/docs/sdk.md +812 -0
- package/ffmpeg/CLAUDE.md +68 -0
- package/package.json +43 -10
- package/pipeline/cookbooks/scripts/animate-frames-parallel.ts +84 -0
- package/pipeline/cookbooks/scripts/combine-scenes.sh +53 -0
- package/pipeline/cookbooks/scripts/generate-frames-parallel.ts +99 -0
- package/pipeline/cookbooks/scripts/still-to-video.sh +37 -0
- package/pipeline/cookbooks/text-to-tiktok.md +669 -0
- package/pipeline/cookbooks/trendwatching.md +156 -0
- package/plan.md +281 -0
- package/scripts/.gitkeep +0 -0
- package/src/ai-sdk/cache.ts +142 -0
- package/src/ai-sdk/examples/cached-generation.ts +53 -0
- package/src/ai-sdk/examples/duet-scene-4.ts +53 -0
- package/src/ai-sdk/examples/duet-scene-5-audio.ts +32 -0
- package/src/ai-sdk/examples/duet-video.ts +56 -0
- package/src/ai-sdk/examples/editly-composition.ts +63 -0
- package/src/ai-sdk/examples/editly-test.ts +57 -0
- package/src/ai-sdk/examples/editly-video-test.ts +52 -0
- package/src/ai-sdk/examples/fal-lipsync.ts +43 -0
- package/src/ai-sdk/examples/higgsfield-image.ts +61 -0
- package/src/ai-sdk/examples/music-generation.ts +19 -0
- package/src/ai-sdk/examples/openai-sora.ts +34 -0
- package/src/ai-sdk/examples/replicate-bg-removal.ts +52 -0
- package/src/ai-sdk/examples/simpsons-scene.ts +61 -0
- package/src/ai-sdk/examples/talking-lion.ts +55 -0
- package/src/ai-sdk/examples/video-generation.ts +39 -0
- package/src/ai-sdk/examples/workflow-animated-girl.ts +104 -0
- package/src/ai-sdk/examples/workflow-before-after.ts +114 -0
- package/src/ai-sdk/examples/workflow-character-grid.ts +112 -0
- package/src/ai-sdk/examples/workflow-slideshow.ts +161 -0
- package/src/ai-sdk/file-cache.ts +112 -0
- package/src/ai-sdk/file.ts +238 -0
- package/src/ai-sdk/generate-element.ts +92 -0
- package/src/ai-sdk/generate-music.ts +46 -0
- package/src/ai-sdk/generate-video.ts +165 -0
- package/src/ai-sdk/index.ts +72 -0
- package/src/ai-sdk/music-model.ts +110 -0
- package/src/ai-sdk/providers/editly/editly.test.ts +1108 -0
- package/src/ai-sdk/providers/editly/ffmpeg.ts +60 -0
- package/src/ai-sdk/providers/editly/index.ts +817 -0
- package/src/ai-sdk/providers/editly/layers.ts +776 -0
- package/src/ai-sdk/providers/editly/plan.md +144 -0
- package/src/ai-sdk/providers/editly/types.ts +328 -0
- package/src/ai-sdk/providers/elevenlabs-provider.ts +255 -0
- package/src/ai-sdk/providers/fal-provider.ts +512 -0
- package/src/ai-sdk/providers/higgsfield.ts +379 -0
- package/src/ai-sdk/providers/openai.ts +251 -0
- package/src/ai-sdk/providers/replicate.ts +16 -0
- package/src/ai-sdk/video-model.ts +185 -0
- package/src/cli/commands/find.tsx +137 -0
- package/src/cli/commands/help.tsx +85 -0
- package/src/cli/commands/index.ts +6 -0
- package/src/cli/commands/list.tsx +238 -0
- package/src/cli/commands/render.tsx +71 -0
- package/src/cli/commands/run.tsx +511 -0
- package/src/cli/commands/which.tsx +253 -0
- package/src/cli/index.ts +114 -0
- package/src/cli/quiet.ts +44 -0
- package/src/cli/types.ts +32 -0
- package/src/cli/ui/components/Badge.tsx +29 -0
- package/src/cli/ui/components/DataTable.tsx +51 -0
- package/src/cli/ui/components/Header.tsx +23 -0
- package/src/cli/ui/components/HelpBlock.tsx +44 -0
- package/src/cli/ui/components/KeyValue.tsx +33 -0
- package/src/cli/ui/components/OptionRow.tsx +81 -0
- package/src/cli/ui/components/Separator.tsx +23 -0
- package/src/cli/ui/components/StatusBox.tsx +108 -0
- package/src/cli/ui/components/VargBox.tsx +51 -0
- package/src/cli/ui/components/VargProgress.tsx +36 -0
- package/src/cli/ui/components/VargSpinner.tsx +34 -0
- package/src/cli/ui/components/VargText.tsx +56 -0
- package/src/cli/ui/components/index.ts +19 -0
- package/src/cli/ui/index.ts +12 -0
- package/src/cli/ui/render.ts +35 -0
- package/src/cli/ui/theme.ts +63 -0
- package/src/cli/utils.ts +78 -0
- package/src/core/executor/executor.ts +201 -0
- package/src/core/executor/index.ts +13 -0
- package/src/core/executor/job.ts +214 -0
- package/src/core/executor/pipeline.ts +222 -0
- package/src/core/index.ts +11 -0
- package/src/core/registry/index.ts +9 -0
- package/src/core/registry/loader.ts +149 -0
- package/src/core/registry/registry.ts +221 -0
- package/src/core/registry/resolver.ts +206 -0
- package/src/core/schema/helpers.ts +134 -0
- package/src/core/schema/index.ts +8 -0
- package/src/core/schema/shared.ts +102 -0
- package/src/core/schema/types.ts +279 -0
- package/src/core/schema/validator.ts +92 -0
- package/src/definitions/actions/captions.ts +261 -0
- package/src/definitions/actions/edit.ts +298 -0
- package/src/definitions/actions/image.ts +125 -0
- package/src/definitions/actions/index.ts +114 -0
- package/src/definitions/actions/music.ts +205 -0
- package/src/definitions/actions/sync.ts +128 -0
- package/{action/transcribe/index.ts → src/definitions/actions/transcribe.ts} +58 -68
- package/src/definitions/actions/upload.ts +111 -0
- package/src/definitions/actions/video.ts +163 -0
- package/src/definitions/actions/voice.ts +119 -0
- package/src/definitions/index.ts +23 -0
- package/src/definitions/models/elevenlabs.ts +50 -0
- package/src/definitions/models/flux.ts +56 -0
- package/src/definitions/models/index.ts +36 -0
- package/src/definitions/models/kling.ts +56 -0
- package/src/definitions/models/llama.ts +54 -0
- package/src/definitions/models/nano-banana-pro.ts +102 -0
- package/src/definitions/models/sonauto.ts +68 -0
- package/src/definitions/models/soul.ts +65 -0
- package/src/definitions/models/wan.ts +54 -0
- package/src/definitions/models/whisper.ts +44 -0
- package/src/definitions/skills/index.ts +12 -0
- package/src/definitions/skills/talking-character.ts +87 -0
- package/src/definitions/skills/text-to-tiktok.ts +97 -0
- package/src/index.ts +118 -0
- package/src/providers/apify.ts +269 -0
- package/src/providers/base.ts +264 -0
- package/src/providers/elevenlabs.ts +217 -0
- package/src/providers/fal.ts +392 -0
- package/src/providers/ffmpeg.ts +544 -0
- package/src/providers/fireworks.ts +193 -0
- package/src/providers/groq.ts +149 -0
- package/src/providers/higgsfield.ts +145 -0
- package/src/providers/index.ts +143 -0
- package/src/providers/replicate.ts +147 -0
- package/src/providers/storage.ts +206 -0
- package/src/react/cli.ts +52 -0
- package/src/react/elements.ts +146 -0
- package/src/react/examples/branching.tsx +66 -0
- package/src/react/examples/captions-demo.tsx +37 -0
- package/src/react/examples/character-video.tsx +84 -0
- package/src/react/examples/grid.tsx +53 -0
- package/src/react/examples/layouts-demo.tsx +57 -0
- package/src/react/examples/madi.tsx +60 -0
- package/src/react/examples/music-test.tsx +35 -0
- package/src/react/examples/onlyfans-1m/workflow.tsx +88 -0
- package/src/react/examples/orange-portrait.tsx +41 -0
- package/src/react/examples/split-element-demo.tsx +60 -0
- package/src/react/examples/split-layout-demo.tsx +60 -0
- package/src/react/examples/split.tsx +41 -0
- package/src/react/examples/video-grid.tsx +46 -0
- package/src/react/index.ts +43 -0
- package/src/react/layouts/grid.tsx +28 -0
- package/src/react/layouts/index.ts +2 -0
- package/src/react/layouts/split.tsx +20 -0
- package/src/react/react.test.ts +309 -0
- package/src/react/render.ts +21 -0
- package/src/react/renderers/animate.ts +59 -0
- package/src/react/renderers/captions.ts +297 -0
- package/src/react/renderers/clip.ts +248 -0
- package/src/react/renderers/context.ts +17 -0
- package/src/react/renderers/image.ts +109 -0
- package/src/react/renderers/index.ts +22 -0
- package/src/react/renderers/music.ts +60 -0
- package/src/react/renderers/packshot.ts +84 -0
- package/src/react/renderers/progress.ts +173 -0
- package/src/react/renderers/render.ts +243 -0
- package/src/react/renderers/slider.ts +69 -0
- package/src/react/renderers/speech.ts +53 -0
- package/src/react/renderers/split.ts +91 -0
- package/src/react/renderers/subtitle.ts +16 -0
- package/src/react/renderers/swipe.ts +75 -0
- package/src/react/renderers/title.ts +17 -0
- package/src/react/renderers/utils.ts +124 -0
- package/src/react/renderers/video.ts +127 -0
- package/src/react/runtime/jsx-dev-runtime.ts +43 -0
- package/src/react/runtime/jsx-runtime.ts +35 -0
- package/src/react/types.ts +232 -0
- package/src/studio/index.ts +26 -0
- package/src/studio/scanner.ts +102 -0
- package/src/studio/server.ts +554 -0
- package/src/studio/stages.ts +251 -0
- package/src/studio/step-renderer.ts +279 -0
- package/src/studio/types.ts +60 -0
- package/src/studio/ui/cache.html +303 -0
- package/src/studio/ui/index.html +1820 -0
- package/src/tests/all.test.ts +509 -0
- package/src/tests/index.ts +33 -0
- package/src/tests/unit.test.ts +403 -0
- package/tsconfig.cli.json +8 -0
- package/tsconfig.json +21 -3
- package/TEST_RESULTS.md +0 -122
- package/action/captions/SKILL.md +0 -170
- package/action/captions/index.ts +0 -169
- package/action/edit/SKILL.md +0 -235
- package/action/edit/index.ts +0 -437
- package/action/image/SKILL.md +0 -140
- package/action/image/index.ts +0 -105
- package/action/sync/SKILL.md +0 -136
- package/action/sync/index.ts +0 -145
- package/action/transcribe/SKILL.md +0 -179
- package/action/video/SKILL.md +0 -116
- package/action/video/index.ts +0 -125
- package/action/voice/SKILL.md +0 -125
- package/action/voice/index.ts +0 -136
- package/cli/commands/find.ts +0 -58
- package/cli/commands/help.ts +0 -70
- package/cli/commands/list.ts +0 -49
- package/cli/commands/run.ts +0 -237
- package/cli/commands/which.ts +0 -66
- package/cli/discover.ts +0 -66
- package/cli/index.ts +0 -33
- package/cli/runner.ts +0 -65
- package/cli/types.ts +0 -49
- package/cli/ui.ts +0 -185
- package/index.ts +0 -75
- package/lib/README.md +0 -144
- package/lib/ai-sdk/fal.ts +0 -106
- package/lib/ai-sdk/replicate.ts +0 -107
- package/lib/elevenlabs.ts +0 -382
- package/lib/fal.ts +0 -467
- package/lib/ffmpeg.ts +0 -467
- package/lib/fireworks.ts +0 -235
- package/lib/groq.ts +0 -246
- package/lib/higgsfield.ts +0 -176
- package/lib/remotion/SKILL.md +0 -823
- package/lib/remotion/cli.ts +0 -115
- package/lib/remotion/functions.ts +0 -283
- package/lib/remotion/index.ts +0 -19
- package/lib/remotion/templates.ts +0 -73
- package/lib/replicate.ts +0 -304
- package/output.txt +0 -1
- package/test-import.ts +0 -7
- package/test-services.ts +0 -97
- package/utilities/s3.ts +0 -147
package/action/sync/SKILL.md
DELETED
|
@@ -1,136 +0,0 @@
|
|
|
1
|
-
---
|
|
2
|
-
name: video-lipsync
|
|
3
|
-
description: sync video with audio using wav2lip ai model or simple audio overlay. use when creating talking videos, matching lip movements to audio, or combining video with voiceovers.
|
|
4
|
-
allowed-tools: Read, Bash
|
|
5
|
-
---
|
|
6
|
-
|
|
7
|
-
# video lipsync
|
|
8
|
-
|
|
9
|
-
sync video with audio using ai-powered lipsync or simple overlay.
|
|
10
|
-
|
|
11
|
-
## methods
|
|
12
|
-
|
|
13
|
-
### wav2lip (ai-powered)
|
|
14
|
-
- uses replicate wav2lip model
|
|
15
|
-
- matches lip movements to audio
|
|
16
|
-
- works with url inputs
|
|
17
|
-
- processing time: 30-60 seconds
|
|
18
|
-
- best for: talking character videos
|
|
19
|
-
|
|
20
|
-
### overlay (simple)
|
|
21
|
-
- adds audio track to video using ffmpeg
|
|
22
|
-
- no lip movement matching
|
|
23
|
-
- works with local files
|
|
24
|
-
- processing time: instant
|
|
25
|
-
- best for: background music, voiceovers
|
|
26
|
-
|
|
27
|
-
## usage
|
|
28
|
-
|
|
29
|
-
### sync with method selection
|
|
30
|
-
```bash
|
|
31
|
-
bun run service/sync.ts sync <videoUrl> <audioUrl> [method] [output]
|
|
32
|
-
```
|
|
33
|
-
|
|
34
|
-
**parameters:**
|
|
35
|
-
- `videoUrl` (required): video file path or url
|
|
36
|
-
- `audioUrl` (required): audio file path or url
|
|
37
|
-
- `method` (optional): "wav2lip" or "overlay" (default: overlay)
|
|
38
|
-
- `output` (optional): output path (default: output-synced.mp4)
|
|
39
|
-
|
|
40
|
-
**example:**
|
|
41
|
-
```bash
|
|
42
|
-
bun run service/sync.ts sync video.mp4 audio.mp3 overlay output.mp4
|
|
43
|
-
```
|
|
44
|
-
|
|
45
|
-
### wav2lip direct
|
|
46
|
-
```bash
|
|
47
|
-
bun run service/sync.ts wav2lip <videoUrl> <audioUrl>
|
|
48
|
-
```
|
|
49
|
-
|
|
50
|
-
**example:**
|
|
51
|
-
```bash
|
|
52
|
-
bun run service/sync.ts wav2lip https://example.com/character.mp4 https://example.com/voice.mp3
|
|
53
|
-
```
|
|
54
|
-
|
|
55
|
-
### overlay direct
|
|
56
|
-
```bash
|
|
57
|
-
bun run service/sync.ts overlay <videoPath> <audioPath> [output]
|
|
58
|
-
```
|
|
59
|
-
|
|
60
|
-
**example:**
|
|
61
|
-
```bash
|
|
62
|
-
bun run service/sync.ts overlay character.mp4 narration.mp3 final.mp4
|
|
63
|
-
```
|
|
64
|
-
|
|
65
|
-
## as library
|
|
66
|
-
|
|
67
|
-
```typescript
|
|
68
|
-
import { lipsync, lipsyncWav2Lip, lipsyncOverlay } from "./service/sync"
|
|
69
|
-
|
|
70
|
-
// flexible sync
|
|
71
|
-
const result = await lipsync({
|
|
72
|
-
videoUrl: "video.mp4",
|
|
73
|
-
audioUrl: "audio.mp3",
|
|
74
|
-
method: "wav2lip",
|
|
75
|
-
output: "synced.mp4"
|
|
76
|
-
})
|
|
77
|
-
|
|
78
|
-
// wav2lip specific
|
|
79
|
-
const lipsynced = await lipsyncWav2Lip({
|
|
80
|
-
videoUrl: "https://example.com/video.mp4",
|
|
81
|
-
audioUrl: "https://example.com/audio.mp3"
|
|
82
|
-
})
|
|
83
|
-
|
|
84
|
-
// overlay specific
|
|
85
|
-
const overlayed = await lipsyncOverlay(
|
|
86
|
-
"video.mp4",
|
|
87
|
-
"audio.mp3",
|
|
88
|
-
"output.mp4"
|
|
89
|
-
)
|
|
90
|
-
```
|
|
91
|
-
|
|
92
|
-
## when to use each method
|
|
93
|
-
|
|
94
|
-
### use wav2lip when:
|
|
95
|
-
- creating talking character videos
|
|
96
|
-
- lip movements must match speech
|
|
97
|
-
- have urls for video and audio
|
|
98
|
-
- quality is more important than speed
|
|
99
|
-
|
|
100
|
-
### use overlay when:
|
|
101
|
-
- adding background music
|
|
102
|
-
- audio doesn't require lip sync
|
|
103
|
-
- working with local files
|
|
104
|
-
- need instant processing
|
|
105
|
-
|
|
106
|
-
## typical workflow
|
|
107
|
-
|
|
108
|
-
1. generate character image (image service)
|
|
109
|
-
2. animate character (video service)
|
|
110
|
-
3. generate voiceover (voice service)
|
|
111
|
-
4. sync with wav2lip (this service)
|
|
112
|
-
5. add captions (captions service)
|
|
113
|
-
|
|
114
|
-
## tips
|
|
115
|
-
|
|
116
|
-
**for wav2lip:**
|
|
117
|
-
- use close-up character shots for best results
|
|
118
|
-
- ensure audio is clear and well-paced
|
|
119
|
-
- video should show face clearly
|
|
120
|
-
- works best with 5-10 second clips
|
|
121
|
-
|
|
122
|
-
**for overlay:**
|
|
123
|
-
- match audio length to video length
|
|
124
|
-
- ffmpeg will loop short audio or trim long audio
|
|
125
|
-
- preserves original video quality
|
|
126
|
-
|
|
127
|
-
## environment variables
|
|
128
|
-
|
|
129
|
-
required (for wav2lip):
|
|
130
|
-
- `REPLICATE_API_TOKEN` - for wav2lip model
|
|
131
|
-
|
|
132
|
-
no special requirements for overlay method (ffmpeg must be installed)
|
|
133
|
-
|
|
134
|
-
## error handling
|
|
135
|
-
|
|
136
|
-
if wav2lip fails, the service automatically falls back to overlay method with a warning message.
|
package/action/sync/index.ts
DELETED
|
@@ -1,145 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env bun
|
|
2
|
-
|
|
3
|
-
/**
|
|
4
|
-
* lipsync service - combines video with audio using various methods
|
|
5
|
-
* supports wav2lip, synclabs, and simple audio overlay
|
|
6
|
-
*/
|
|
7
|
-
|
|
8
|
-
import type { ActionMeta } from "../../cli/types";
|
|
9
|
-
import { addAudio } from "../../lib/ffmpeg";
|
|
10
|
-
import { runModel } from "../../lib/replicate";
|
|
11
|
-
|
|
12
|
-
export const meta: ActionMeta = {
|
|
13
|
-
name: "sync",
|
|
14
|
-
type: "action",
|
|
15
|
-
description: "sync audio to video (lipsync)",
|
|
16
|
-
inputType: "video+audio",
|
|
17
|
-
outputType: "video",
|
|
18
|
-
schema: {
|
|
19
|
-
input: {
|
|
20
|
-
type: "object",
|
|
21
|
-
required: ["video", "audio"],
|
|
22
|
-
properties: {
|
|
23
|
-
video: {
|
|
24
|
-
type: "string",
|
|
25
|
-
format: "file-path",
|
|
26
|
-
description: "input video file or url",
|
|
27
|
-
},
|
|
28
|
-
audio: {
|
|
29
|
-
type: "string",
|
|
30
|
-
format: "file-path",
|
|
31
|
-
description: "audio file or url to sync",
|
|
32
|
-
},
|
|
33
|
-
method: {
|
|
34
|
-
type: "string",
|
|
35
|
-
enum: ["wav2lip", "overlay"],
|
|
36
|
-
default: "overlay",
|
|
37
|
-
description: "sync method (wav2lip requires urls)",
|
|
38
|
-
},
|
|
39
|
-
output: {
|
|
40
|
-
type: "string",
|
|
41
|
-
format: "file-path",
|
|
42
|
-
description: "output video path",
|
|
43
|
-
},
|
|
44
|
-
},
|
|
45
|
-
},
|
|
46
|
-
output: { type: "string", format: "file-path", description: "video path" },
|
|
47
|
-
},
|
|
48
|
-
async run(options) {
|
|
49
|
-
const { video, audio, method, output } = options as {
|
|
50
|
-
video: string;
|
|
51
|
-
audio: string;
|
|
52
|
-
method?: "wav2lip" | "overlay";
|
|
53
|
-
output?: string;
|
|
54
|
-
};
|
|
55
|
-
return lipsync({ videoUrl: video, audioUrl: audio, method, output });
|
|
56
|
-
},
|
|
57
|
-
};
|
|
58
|
-
|
|
59
|
-
// types
|
|
60
|
-
export interface LipsyncOptions {
|
|
61
|
-
videoUrl: string;
|
|
62
|
-
audioUrl: string;
|
|
63
|
-
method?: "wav2lip" | "synclabs" | "overlay";
|
|
64
|
-
output?: string;
|
|
65
|
-
}
|
|
66
|
-
|
|
67
|
-
export interface Wav2LipOptions {
|
|
68
|
-
videoUrl: string;
|
|
69
|
-
audioUrl: string;
|
|
70
|
-
}
|
|
71
|
-
|
|
72
|
-
// core functions
|
|
73
|
-
export async function lipsync(options: LipsyncOptions) {
|
|
74
|
-
const { videoUrl, audioUrl, method = "overlay", output } = options;
|
|
75
|
-
|
|
76
|
-
if (!videoUrl || !audioUrl) {
|
|
77
|
-
throw new Error("videoUrl and audioUrl are required");
|
|
78
|
-
}
|
|
79
|
-
|
|
80
|
-
console.log(`[sync] syncing video with audio using ${method}...`);
|
|
81
|
-
|
|
82
|
-
switch (method) {
|
|
83
|
-
case "wav2lip":
|
|
84
|
-
return await lipsyncWav2Lip({ videoUrl, audioUrl });
|
|
85
|
-
|
|
86
|
-
case "synclabs":
|
|
87
|
-
console.log(
|
|
88
|
-
`[sync] synclabs not yet implemented, falling back to overlay`,
|
|
89
|
-
);
|
|
90
|
-
return await lipsyncOverlay(videoUrl, audioUrl, output);
|
|
91
|
-
|
|
92
|
-
case "overlay":
|
|
93
|
-
return await lipsyncOverlay(videoUrl, audioUrl, output);
|
|
94
|
-
|
|
95
|
-
default:
|
|
96
|
-
throw new Error(`unknown lipsync method: ${method}`);
|
|
97
|
-
}
|
|
98
|
-
}
|
|
99
|
-
|
|
100
|
-
export async function lipsyncWav2Lip(options: Wav2LipOptions) {
|
|
101
|
-
const { videoUrl, audioUrl } = options;
|
|
102
|
-
|
|
103
|
-
console.log(`[sync] using wav2lip model...`);
|
|
104
|
-
|
|
105
|
-
try {
|
|
106
|
-
const output = await runModel("devxpy/cog-wav2lip", {
|
|
107
|
-
face: videoUrl,
|
|
108
|
-
audio: audioUrl,
|
|
109
|
-
});
|
|
110
|
-
|
|
111
|
-
console.log(`[sync] wav2lip completed`);
|
|
112
|
-
return output;
|
|
113
|
-
} catch (error) {
|
|
114
|
-
console.error(`[sync] wav2lip error:`, error);
|
|
115
|
-
throw error;
|
|
116
|
-
}
|
|
117
|
-
}
|
|
118
|
-
|
|
119
|
-
export async function lipsyncOverlay(
|
|
120
|
-
videoPath: string,
|
|
121
|
-
audioPath: string,
|
|
122
|
-
output: string = "output-synced.mp4",
|
|
123
|
-
) {
|
|
124
|
-
console.log(`[sync] overlaying audio on video...`);
|
|
125
|
-
|
|
126
|
-
try {
|
|
127
|
-
const result = await addAudio({
|
|
128
|
-
videoPath,
|
|
129
|
-
audioPath,
|
|
130
|
-
output,
|
|
131
|
-
});
|
|
132
|
-
|
|
133
|
-
console.log(`[sync] overlay completed`);
|
|
134
|
-
return result;
|
|
135
|
-
} catch (error) {
|
|
136
|
-
console.error(`[sync] overlay error:`, error);
|
|
137
|
-
throw error;
|
|
138
|
-
}
|
|
139
|
-
}
|
|
140
|
-
|
|
141
|
-
// cli
|
|
142
|
-
if (import.meta.main) {
|
|
143
|
-
const { runCli } = await import("../../cli/runner");
|
|
144
|
-
runCli(meta);
|
|
145
|
-
}
|
|
@@ -1,179 +0,0 @@
|
|
|
1
|
-
---
|
|
2
|
-
name: audio-transcription
|
|
3
|
-
description: transcribe audio to text or subtitles using groq whisper or fireworks with srt/vtt support. use when converting speech to text, generating subtitles, or need word-level timestamps for captions.
|
|
4
|
-
allowed-tools: Read, Bash
|
|
5
|
-
---
|
|
6
|
-
|
|
7
|
-
# audio transcription
|
|
8
|
-
|
|
9
|
-
convert audio to text or subtitle files using ai transcription.
|
|
10
|
-
|
|
11
|
-
## providers
|
|
12
|
-
|
|
13
|
-
### groq (ultra-fast)
|
|
14
|
-
- uses whisper-large-v3
|
|
15
|
-
- fastest transcription (~5-10 seconds)
|
|
16
|
-
- plain text output
|
|
17
|
-
- sentence-level timing
|
|
18
|
-
- best for: quick transcripts, text extraction
|
|
19
|
-
|
|
20
|
-
### fireworks (word-level)
|
|
21
|
-
- uses whisper-v3
|
|
22
|
-
- word-level timestamps
|
|
23
|
-
- outputs srt or vtt format
|
|
24
|
-
- precise subtitle timing
|
|
25
|
-
- best for: captions, subtitles, timed transcripts
|
|
26
|
-
|
|
27
|
-
## usage
|
|
28
|
-
|
|
29
|
-
### basic transcription
|
|
30
|
-
```bash
|
|
31
|
-
bun run service/transcribe.ts <audioUrl> <provider> [outputPath]
|
|
32
|
-
```
|
|
33
|
-
|
|
34
|
-
**example:**
|
|
35
|
-
```bash
|
|
36
|
-
bun run service/transcribe.ts media/audio.mp3 groq
|
|
37
|
-
bun run service/transcribe.ts media/audio.mp3 fireworks output.srt
|
|
38
|
-
```
|
|
39
|
-
|
|
40
|
-
### with output format
|
|
41
|
-
```bash
|
|
42
|
-
bun run lib/fireworks.ts <audioPath> <outputPath>
|
|
43
|
-
```
|
|
44
|
-
|
|
45
|
-
**example:**
|
|
46
|
-
```bash
|
|
47
|
-
bun run lib/fireworks.ts media/audio.mp3 output.srt
|
|
48
|
-
```
|
|
49
|
-
|
|
50
|
-
## as library
|
|
51
|
-
|
|
52
|
-
```typescript
|
|
53
|
-
import { transcribe } from "./service/transcribe"
|
|
54
|
-
|
|
55
|
-
// groq transcription
|
|
56
|
-
const groqResult = await transcribe({
|
|
57
|
-
audioUrl: "media/audio.mp3",
|
|
58
|
-
provider: "groq",
|
|
59
|
-
outputFormat: "text"
|
|
60
|
-
})
|
|
61
|
-
console.log(groqResult.text)
|
|
62
|
-
|
|
63
|
-
// fireworks with srt
|
|
64
|
-
const fireworksResult = await transcribe({
|
|
65
|
-
audioUrl: "media/audio.mp3",
|
|
66
|
-
provider: "fireworks",
|
|
67
|
-
outputFormat: "srt",
|
|
68
|
-
outputPath: "subtitles.srt"
|
|
69
|
-
})
|
|
70
|
-
console.log(fireworksResult.text)
|
|
71
|
-
console.log(fireworksResult.outputPath) // subtitles.srt
|
|
72
|
-
```
|
|
73
|
-
|
|
74
|
-
## output formats
|
|
75
|
-
|
|
76
|
-
### text (groq default)
|
|
77
|
-
```
|
|
78
|
-
This is the transcribed text from the audio file.
|
|
79
|
-
All words in plain text format.
|
|
80
|
-
```
|
|
81
|
-
|
|
82
|
-
### srt (subtitle format)
|
|
83
|
-
```
|
|
84
|
-
1
|
|
85
|
-
00:00:00,000 --> 00:00:02,500
|
|
86
|
-
This is the first subtitle
|
|
87
|
-
|
|
88
|
-
2
|
|
89
|
-
00:00:02,500 --> 00:00:05,000
|
|
90
|
-
This is the second subtitle
|
|
91
|
-
```
|
|
92
|
-
|
|
93
|
-
### vtt (web video text tracks)
|
|
94
|
-
```
|
|
95
|
-
WEBVTT
|
|
96
|
-
|
|
97
|
-
00:00:00.000 --> 00:00:02.500
|
|
98
|
-
This is the first subtitle
|
|
99
|
-
|
|
100
|
-
00:00:02.500 --> 00:00:05.000
|
|
101
|
-
This is the second subtitle
|
|
102
|
-
```
|
|
103
|
-
|
|
104
|
-
## when to use
|
|
105
|
-
|
|
106
|
-
use this skill when:
|
|
107
|
-
- converting speech to text
|
|
108
|
-
- generating subtitles for videos
|
|
109
|
-
- creating accessible content
|
|
110
|
-
- need word-level timing for captions
|
|
111
|
-
- extracting dialogue from media
|
|
112
|
-
- preparing transcripts for analysis
|
|
113
|
-
|
|
114
|
-
## provider comparison
|
|
115
|
-
|
|
116
|
-
| feature | groq | fireworks |
|
|
117
|
-
|---------|------|-----------|
|
|
118
|
-
| speed | ultra-fast (5-10s) | moderate (15-30s) |
|
|
119
|
-
| output | plain text | srt/vtt with timestamps |
|
|
120
|
-
| timing | sentence-level | word-level |
|
|
121
|
-
| use case | quick transcripts | precise subtitles |
|
|
122
|
-
|
|
123
|
-
## typical workflows
|
|
124
|
-
|
|
125
|
-
### for captions
|
|
126
|
-
1. record or generate audio (voice service)
|
|
127
|
-
2. transcribe with fireworks (this service)
|
|
128
|
-
3. add captions to video (captions service)
|
|
129
|
-
|
|
130
|
-
### for transcripts
|
|
131
|
-
1. extract audio from video
|
|
132
|
-
2. transcribe with groq (this service)
|
|
133
|
-
3. use text for analysis or documentation
|
|
134
|
-
|
|
135
|
-
## tips
|
|
136
|
-
|
|
137
|
-
**provider selection:**
|
|
138
|
-
- use **groq** when you just need the text fast
|
|
139
|
-
- use **fireworks** when you need subtitle files
|
|
140
|
-
- use **fireworks** for captions on social media videos
|
|
141
|
-
|
|
142
|
-
**audio quality:**
|
|
143
|
-
- clear audio transcribes more accurately
|
|
144
|
-
- reduce background noise when possible
|
|
145
|
-
- supports mp3, wav, m4a, and most audio formats
|
|
146
|
-
|
|
147
|
-
**timing accuracy:**
|
|
148
|
-
- fireworks provides word-level timestamps
|
|
149
|
-
- perfect for lip-sync verification
|
|
150
|
-
- great for precise subtitle placement
|
|
151
|
-
|
|
152
|
-
## integration with other services
|
|
153
|
-
|
|
154
|
-
perfect companion for:
|
|
155
|
-
- **captions service** - auto-generate video subtitles
|
|
156
|
-
- **voice service** - transcribe generated speech
|
|
157
|
-
- **sync service** - verify audio timing
|
|
158
|
-
|
|
159
|
-
## environment variables
|
|
160
|
-
|
|
161
|
-
required:
|
|
162
|
-
- `GROQ_API_KEY` - for groq provider
|
|
163
|
-
- `FIREWORKS_API_KEY` - for fireworks provider
|
|
164
|
-
|
|
165
|
-
## processing time
|
|
166
|
-
|
|
167
|
-
- **groq**: 5-10 seconds (any audio length)
|
|
168
|
-
- **fireworks**: 15-30 seconds (depending on audio length)
|
|
169
|
-
|
|
170
|
-
## supported formats
|
|
171
|
-
|
|
172
|
-
input audio:
|
|
173
|
-
- mp3, wav, m4a, ogg, flac
|
|
174
|
-
- video files (extracts audio automatically)
|
|
175
|
-
|
|
176
|
-
output formats:
|
|
177
|
-
- text (plain text)
|
|
178
|
-
- srt (subtitles)
|
|
179
|
-
- vtt (web video text tracks)
|
package/action/video/SKILL.md
DELETED
|
@@ -1,116 +0,0 @@
|
|
|
1
|
-
---
|
|
2
|
-
name: video-generation
|
|
3
|
-
description: generate videos from images or text prompts using fal.ai. use when user wants to animate images, create videos from text, or needs ai video generation with 5-10 second clips.
|
|
4
|
-
allowed-tools: Read, Bash
|
|
5
|
-
---
|
|
6
|
-
|
|
7
|
-
# video generation
|
|
8
|
-
|
|
9
|
-
generate ai videos from images or text using fal.ai with automatic s3 upload support.
|
|
10
|
-
|
|
11
|
-
## capabilities
|
|
12
|
-
|
|
13
|
-
- **image-to-video**: animate static images with motion prompts
|
|
14
|
-
- **text-to-video**: generate videos directly from text descriptions
|
|
15
|
-
- supports 5 or 10 second duration
|
|
16
|
-
- automatic s3 upload
|
|
17
|
-
|
|
18
|
-
## usage
|
|
19
|
-
|
|
20
|
-
### generate from image
|
|
21
|
-
```bash
|
|
22
|
-
bun run service/video.ts from_image <prompt> <imageUrl> [duration] [upload]
|
|
23
|
-
```
|
|
24
|
-
|
|
25
|
-
**parameters:**
|
|
26
|
-
- `prompt` (required): motion description (e.g., "camera pan left")
|
|
27
|
-
- `imageUrl` (required): url of the source image
|
|
28
|
-
- `duration` (optional): 5 or 10 seconds (default: 5)
|
|
29
|
-
- `upload` (optional): "true" to upload to s3
|
|
30
|
-
|
|
31
|
-
**example:**
|
|
32
|
-
```bash
|
|
33
|
-
bun run service/video.ts from_image "person talking naturally" https://example.com/headshot.jpg 5 true
|
|
34
|
-
```
|
|
35
|
-
|
|
36
|
-
### generate from text
|
|
37
|
-
```bash
|
|
38
|
-
bun run service/video.ts from_text <prompt> [duration] [upload]
|
|
39
|
-
```
|
|
40
|
-
|
|
41
|
-
**parameters:**
|
|
42
|
-
- `prompt` (required): video scene description
|
|
43
|
-
- `duration` (optional): 5 or 10 seconds (default: 5)
|
|
44
|
-
- `upload` (optional): "true" to upload to s3
|
|
45
|
-
|
|
46
|
-
**example:**
|
|
47
|
-
```bash
|
|
48
|
-
bun run service/video.ts from_text "waves crashing on beach at sunset" 10 true
|
|
49
|
-
```
|
|
50
|
-
|
|
51
|
-
## as library
|
|
52
|
-
|
|
53
|
-
```typescript
|
|
54
|
-
import { generateVideoFromImage, generateVideoFromText } from "./service/video"
|
|
55
|
-
|
|
56
|
-
// animate an image
|
|
57
|
-
const videoResult = await generateVideoFromImage(
|
|
58
|
-
"camera zoom in slowly",
|
|
59
|
-
"https://example.com/portrait.jpg",
|
|
60
|
-
{ duration: 5, upload: true }
|
|
61
|
-
)
|
|
62
|
-
console.log(videoResult.videoUrl)
|
|
63
|
-
console.log(videoResult.uploaded) // s3 url if upload=true
|
|
64
|
-
|
|
65
|
-
// generate from text
|
|
66
|
-
const textVideo = await generateVideoFromText(
|
|
67
|
-
"forest path with sunlight filtering through trees",
|
|
68
|
-
{ duration: 10, upload: true }
|
|
69
|
-
)
|
|
70
|
-
```
|
|
71
|
-
|
|
72
|
-
## output
|
|
73
|
-
|
|
74
|
-
returns `VideoGenerationResult`:
|
|
75
|
-
```typescript
|
|
76
|
-
{
|
|
77
|
-
videoUrl: string, // direct video url
|
|
78
|
-
duration?: number, // actual video duration
|
|
79
|
-
uploaded?: string // s3 url if upload requested
|
|
80
|
-
}
|
|
81
|
-
```
|
|
82
|
-
|
|
83
|
-
## when to use
|
|
84
|
-
|
|
85
|
-
use this skill when:
|
|
86
|
-
- animating character headshots or portraits
|
|
87
|
-
- creating motion from static images
|
|
88
|
-
- generating video clips from text descriptions
|
|
89
|
-
- preparing videos for lipsync or editing pipeline
|
|
90
|
-
- need short form video content (5-10s)
|
|
91
|
-
|
|
92
|
-
## tips
|
|
93
|
-
|
|
94
|
-
**for character animation:**
|
|
95
|
-
- use subtle prompts like "person talking naturally" or "slight head movement"
|
|
96
|
-
- keep duration at 5 seconds for character shots
|
|
97
|
-
- combine with lipsync for talking videos
|
|
98
|
-
|
|
99
|
-
**for scene generation:**
|
|
100
|
-
- be descriptive about camera movement and scene dynamics
|
|
101
|
-
- 10 seconds works better for landscape/scene videos
|
|
102
|
-
|
|
103
|
-
## environment variables
|
|
104
|
-
|
|
105
|
-
required:
|
|
106
|
-
- `FAL_API_KEY` - for fal video generation
|
|
107
|
-
|
|
108
|
-
optional (for s3 upload):
|
|
109
|
-
- `CLOUDFLARE_R2_API_URL`
|
|
110
|
-
- `CLOUDFLARE_ACCESS_KEY_ID`
|
|
111
|
-
- `CLOUDFLARE_ACCESS_SECRET`
|
|
112
|
-
- `CLOUDFLARE_R2_BUCKET`
|
|
113
|
-
|
|
114
|
-
## generation time
|
|
115
|
-
|
|
116
|
-
expect 2-3 minutes per video clip
|
package/action/video/index.ts
DELETED
|
@@ -1,125 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env bun
|
|
2
|
-
/**
|
|
3
|
-
* video generation service combining fal and higgsfield
|
|
4
|
-
* usage: bun run service/video.ts <command> <args>
|
|
5
|
-
*/
|
|
6
|
-
|
|
7
|
-
import type { ActionMeta } from "../../cli/types";
|
|
8
|
-
import { imageToVideo, textToVideo } from "../../lib/fal";
|
|
9
|
-
import { uploadFromUrl } from "../../utilities/s3";
|
|
10
|
-
|
|
11
|
-
export const meta: ActionMeta = {
|
|
12
|
-
name: "video",
|
|
13
|
-
type: "action",
|
|
14
|
-
description: "generate video from text or image",
|
|
15
|
-
inputType: "text/image",
|
|
16
|
-
outputType: "video",
|
|
17
|
-
schema: {
|
|
18
|
-
input: {
|
|
19
|
-
type: "object",
|
|
20
|
-
required: ["prompt"],
|
|
21
|
-
properties: {
|
|
22
|
-
prompt: { type: "string", description: "what to generate" },
|
|
23
|
-
image: {
|
|
24
|
-
type: "string",
|
|
25
|
-
format: "file-path",
|
|
26
|
-
description: "input image (enables image-to-video)",
|
|
27
|
-
},
|
|
28
|
-
duration: {
|
|
29
|
-
type: "integer",
|
|
30
|
-
enum: [5, 10],
|
|
31
|
-
default: 5,
|
|
32
|
-
description: "video duration in seconds",
|
|
33
|
-
},
|
|
34
|
-
},
|
|
35
|
-
},
|
|
36
|
-
output: { type: "string", format: "file-path", description: "video path" },
|
|
37
|
-
},
|
|
38
|
-
async run(options) {
|
|
39
|
-
const { prompt, image, duration } = options as {
|
|
40
|
-
prompt: string;
|
|
41
|
-
image?: string;
|
|
42
|
-
duration?: 5 | 10;
|
|
43
|
-
};
|
|
44
|
-
if (image) {
|
|
45
|
-
return generateVideoFromImage(prompt, image, { duration });
|
|
46
|
-
}
|
|
47
|
-
return generateVideoFromText(prompt, { duration });
|
|
48
|
-
},
|
|
49
|
-
};
|
|
50
|
-
|
|
51
|
-
export interface VideoGenerationResult {
|
|
52
|
-
videoUrl: string;
|
|
53
|
-
duration?: number;
|
|
54
|
-
uploaded?: string;
|
|
55
|
-
}
|
|
56
|
-
|
|
57
|
-
export async function generateVideoFromImage(
|
|
58
|
-
prompt: string,
|
|
59
|
-
imageUrl: string,
|
|
60
|
-
options: { duration?: 5 | 10; upload?: boolean } = {},
|
|
61
|
-
): Promise<VideoGenerationResult> {
|
|
62
|
-
console.log("[service/video] generating video from image");
|
|
63
|
-
|
|
64
|
-
const result = await imageToVideo({
|
|
65
|
-
prompt,
|
|
66
|
-
imageUrl,
|
|
67
|
-
duration: options.duration,
|
|
68
|
-
});
|
|
69
|
-
|
|
70
|
-
const videoUrl = result.data?.video?.url;
|
|
71
|
-
if (!videoUrl) {
|
|
72
|
-
throw new Error("no video url in result");
|
|
73
|
-
}
|
|
74
|
-
|
|
75
|
-
let uploaded: string | undefined;
|
|
76
|
-
if (options.upload) {
|
|
77
|
-
const timestamp = Date.now();
|
|
78
|
-
const objectKey = `videos/generated/${timestamp}.mp4`;
|
|
79
|
-
uploaded = await uploadFromUrl(videoUrl, objectKey);
|
|
80
|
-
console.log(`[service/video] uploaded to ${uploaded}`);
|
|
81
|
-
}
|
|
82
|
-
|
|
83
|
-
return {
|
|
84
|
-
videoUrl,
|
|
85
|
-
duration: result.data?.duration,
|
|
86
|
-
uploaded,
|
|
87
|
-
};
|
|
88
|
-
}
|
|
89
|
-
|
|
90
|
-
export async function generateVideoFromText(
|
|
91
|
-
prompt: string,
|
|
92
|
-
options: { duration?: 5 | 10; upload?: boolean } = {},
|
|
93
|
-
): Promise<VideoGenerationResult> {
|
|
94
|
-
console.log("[service/video] generating video from text");
|
|
95
|
-
|
|
96
|
-
const result = await textToVideo({
|
|
97
|
-
prompt,
|
|
98
|
-
duration: options.duration,
|
|
99
|
-
});
|
|
100
|
-
|
|
101
|
-
const videoUrl = result.data?.video?.url;
|
|
102
|
-
if (!videoUrl) {
|
|
103
|
-
throw new Error("no video url in result");
|
|
104
|
-
}
|
|
105
|
-
|
|
106
|
-
let uploaded: string | undefined;
|
|
107
|
-
if (options.upload) {
|
|
108
|
-
const timestamp = Date.now();
|
|
109
|
-
const objectKey = `videos/generated/${timestamp}.mp4`;
|
|
110
|
-
uploaded = await uploadFromUrl(videoUrl, objectKey);
|
|
111
|
-
console.log(`[service/video] uploaded to ${uploaded}`);
|
|
112
|
-
}
|
|
113
|
-
|
|
114
|
-
return {
|
|
115
|
-
videoUrl,
|
|
116
|
-
duration: result.data?.duration,
|
|
117
|
-
uploaded,
|
|
118
|
-
};
|
|
119
|
-
}
|
|
120
|
-
|
|
121
|
-
// cli
|
|
122
|
-
if (import.meta.main) {
|
|
123
|
-
const { runCli } = await import("../../cli/runner");
|
|
124
|
-
runCli(meta);
|
|
125
|
-
}
|