varg.ai-sdk 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/settings.local.json +7 -0
- package/.env.example +24 -0
- package/CLAUDE.md +118 -0
- package/README.md +231 -0
- package/SKILLS.md +157 -0
- package/STRUCTURE.md +92 -0
- package/TEST_RESULTS.md +122 -0
- package/action/captions/SKILL.md +170 -0
- package/action/captions/index.ts +227 -0
- package/action/edit/SKILL.md +235 -0
- package/action/edit/index.ts +493 -0
- package/action/image/SKILL.md +140 -0
- package/action/image/index.ts +112 -0
- package/action/sync/SKILL.md +136 -0
- package/action/sync/index.ts +187 -0
- package/action/transcribe/SKILL.md +179 -0
- package/action/transcribe/index.ts +227 -0
- package/action/video/SKILL.md +116 -0
- package/action/video/index.ts +135 -0
- package/action/voice/SKILL.md +125 -0
- package/action/voice/index.ts +201 -0
- package/biome.json +33 -0
- package/index.ts +38 -0
- package/lib/README.md +144 -0
- package/lib/ai-sdk/fal.ts +106 -0
- package/lib/ai-sdk/replicate.ts +107 -0
- package/lib/elevenlabs.ts +382 -0
- package/lib/fal.ts +478 -0
- package/lib/ffmpeg.ts +467 -0
- package/lib/fireworks.ts +235 -0
- package/lib/groq.ts +246 -0
- package/lib/higgsfield.ts +176 -0
- package/lib/remotion/SKILL.md +823 -0
- package/lib/remotion/cli.ts +115 -0
- package/lib/remotion/functions.ts +283 -0
- package/lib/remotion/index.ts +19 -0
- package/lib/remotion/templates.ts +73 -0
- package/lib/replicate.ts +304 -0
- package/output.txt +1 -0
- package/package.json +35 -0
- package/pipeline/cookbooks/SKILL.md +285 -0
- package/pipeline/cookbooks/remotion-video.md +585 -0
- package/pipeline/cookbooks/round-video-character.md +337 -0
- package/pipeline/cookbooks/talking-character.md +59 -0
- package/test-import.ts +7 -0
- package/test-services.ts +97 -0
- package/tsconfig.json +29 -0
- package/utilities/s3.ts +147 -0
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: image-generation
|
|
3
|
+
description: generate ai images using fal (flux models) or higgsfield soul characters. use when user wants to create images, headshots, character portraits, or needs image generation with specific models.
|
|
4
|
+
allowed-tools: Read, Bash
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
# image generation
|
|
8
|
+
|
|
9
|
+
generate ai images using multiple providers with automatic s3 upload support.
|
|
10
|
+
|
|
11
|
+
## providers
|
|
12
|
+
|
|
13
|
+
### fal (flux models)
|
|
14
|
+
- high quality image generation
|
|
15
|
+
- supports flux-pro, flux-dev, and other flux models
|
|
16
|
+
- configurable model selection
|
|
17
|
+
- automatic image opening on generation
|
|
18
|
+
|
|
19
|
+
### higgsfield soul
|
|
20
|
+
- character headshot generation
|
|
21
|
+
- consistent character style
|
|
22
|
+
- professional portrait quality
|
|
23
|
+
- custom style references
|
|
24
|
+
|
|
25
|
+
## usage
|
|
26
|
+
|
|
27
|
+
### generate with fal
|
|
28
|
+
```bash
|
|
29
|
+
bun run service/image.ts fal "a beautiful sunset over mountains" [model] [upload]
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
**parameters:**
|
|
33
|
+
- `prompt` (required): text description of the image
|
|
34
|
+
- `model` (optional): fal model to use (default: flux-pro)
|
|
35
|
+
- `upload` (optional): "true" to upload to s3
|
|
36
|
+
|
|
37
|
+
**example:**
|
|
38
|
+
```bash
|
|
39
|
+
bun run service/image.ts fal "professional headshot, studio lighting" true
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
### generate with soul
|
|
43
|
+
```bash
|
|
44
|
+
bun run service/image.ts soul "friendly person smiling" [styleId] [upload]
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
**parameters:**
|
|
48
|
+
- `prompt` (required): character description
|
|
49
|
+
- `styleId` (optional): custom higgsfield style reference
|
|
50
|
+
- `upload` (optional): "true" to upload to s3
|
|
51
|
+
|
|
52
|
+
**example:**
|
|
53
|
+
```bash
|
|
54
|
+
bun run service/image.ts soul "professional business woman" true
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
## as library
|
|
58
|
+
|
|
59
|
+
```typescript
|
|
60
|
+
import { generateWithFal, generateWithSoul } from "./service/image"
|
|
61
|
+
|
|
62
|
+
// fal generation
|
|
63
|
+
const falResult = await generateWithFal("sunset over ocean", {
|
|
64
|
+
model: "fal-ai/flux-pro/v1.1",
|
|
65
|
+
upload: true
|
|
66
|
+
})
|
|
67
|
+
console.log(falResult.imageUrl)
|
|
68
|
+
console.log(falResult.uploaded) // s3 url if upload=true
|
|
69
|
+
|
|
70
|
+
// soul generation
|
|
71
|
+
const soulResult = await generateWithSoul("friendly character", {
|
|
72
|
+
upload: true
|
|
73
|
+
})
|
|
74
|
+
console.log(soulResult.imageUrl)
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
## output
|
|
78
|
+
|
|
79
|
+
returns `ImageGenerationResult`:
|
|
80
|
+
```typescript
|
|
81
|
+
{
|
|
82
|
+
imageUrl: string, // direct image url
|
|
83
|
+
uploaded?: string // s3 url if upload requested
|
|
84
|
+
}
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
## when to use
|
|
88
|
+
|
|
89
|
+
use this skill when:
|
|
90
|
+
- generating images from text descriptions
|
|
91
|
+
- creating character headshots or portraits
|
|
92
|
+
- need consistent character style (use soul)
|
|
93
|
+
- need high quality photorealistic images (use fal)
|
|
94
|
+
- preparing images for video generation pipeline
|
|
95
|
+
|
|
96
|
+
## nsfw filtering and content moderation
|
|
97
|
+
|
|
98
|
+
fal.ai has content safety filters that may flag images as nsfw:
|
|
99
|
+
|
|
100
|
+
**common triggers:**
|
|
101
|
+
- prompts mentioning "athletic wear", "fitted sportswear", "gym clothes"
|
|
102
|
+
- certain body descriptions even when clothed
|
|
103
|
+
- prompts that could be interpreted as revealing clothing
|
|
104
|
+
|
|
105
|
+
**symptoms:**
|
|
106
|
+
- image generation returns but file is empty (often 7.6KB)
|
|
107
|
+
- no error message, just an unusable file
|
|
108
|
+
- happens inconsistently across similar prompts
|
|
109
|
+
|
|
110
|
+
**solutions:**
|
|
111
|
+
- specify modest, full-coverage clothing explicitly:
|
|
112
|
+
- ✅ "long sleeve athletic top and full length leggings"
|
|
113
|
+
- ✅ "fully covered in modest workout attire"
|
|
114
|
+
- ❌ "athletic wear" (too vague, may trigger filter)
|
|
115
|
+
- ❌ "fitted sportswear" (may trigger filter)
|
|
116
|
+
- add "professional", "modest", "appropriate" to descriptions
|
|
117
|
+
- if multiple images in batch get flagged, adjust prompts to be more explicit about coverage
|
|
118
|
+
- always check output file sizes - empty files (< 10KB) indicate nsfw filtering
|
|
119
|
+
|
|
120
|
+
**example:**
|
|
121
|
+
```bash
|
|
122
|
+
# ❌ may get flagged as nsfw
|
|
123
|
+
bun run service/image.ts fal "woman in athletic wear"
|
|
124
|
+
|
|
125
|
+
# ✅ less likely to trigger filter
|
|
126
|
+
bun run service/image.ts fal "woman wearing long sleeve athletic top and full length leggings"
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
## environment variables
|
|
130
|
+
|
|
131
|
+
required:
|
|
132
|
+
- `FAL_API_KEY` - for fal image generation
|
|
133
|
+
- `HIGGSFIELD_API_KEY` - for soul character generation
|
|
134
|
+
- `HIGGSFIELD_SECRET` - for higgsfield authentication
|
|
135
|
+
|
|
136
|
+
optional (for s3 upload):
|
|
137
|
+
- `CLOUDFLARE_R2_API_URL`
|
|
138
|
+
- `CLOUDFLARE_ACCESS_KEY_ID`
|
|
139
|
+
- `CLOUDFLARE_ACCESS_SECRET`
|
|
140
|
+
- `CLOUDFLARE_R2_BUCKET`
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
#!/usr/bin/env bun
|
|
2
|
+
/**
|
|
3
|
+
* image generation service combining fal and higgsfield
|
|
4
|
+
* usage: bun run service/image.ts <command> <args>
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
import { generateImage } from "../../lib/fal";
|
|
8
|
+
import { generateSoul } from "../../lib/higgsfield";
|
|
9
|
+
import { uploadFromUrl } from "../../utilities/s3";
|
|
10
|
+
|
|
11
|
+
export interface ImageGenerationResult {
|
|
12
|
+
imageUrl: string;
|
|
13
|
+
uploaded?: string;
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
export async function generateWithFal(
|
|
17
|
+
prompt: string,
|
|
18
|
+
options: { model?: string; upload?: boolean } = {},
|
|
19
|
+
): Promise<ImageGenerationResult> {
|
|
20
|
+
console.log("[service/image] generating with fal");
|
|
21
|
+
|
|
22
|
+
const result = await generateImage({ prompt, model: options.model });
|
|
23
|
+
|
|
24
|
+
const imageUrl = result.data?.images?.[0]?.url;
|
|
25
|
+
if (!imageUrl) {
|
|
26
|
+
throw new Error("no image url in result");
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
let uploaded: string | undefined;
|
|
30
|
+
if (options.upload) {
|
|
31
|
+
const timestamp = Date.now();
|
|
32
|
+
const objectKey = `images/fal/${timestamp}.png`;
|
|
33
|
+
uploaded = await uploadFromUrl(imageUrl, objectKey);
|
|
34
|
+
console.log(`[service/image] uploaded to ${uploaded}`);
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
return { imageUrl, uploaded };
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
export async function generateWithSoul(
|
|
41
|
+
prompt: string,
|
|
42
|
+
options: { styleId?: string; upload?: boolean } = {},
|
|
43
|
+
): Promise<ImageGenerationResult> {
|
|
44
|
+
console.log("[service/image] generating with higgsfield soul");
|
|
45
|
+
|
|
46
|
+
const result = await generateSoul({
|
|
47
|
+
prompt,
|
|
48
|
+
styleId: options.styleId,
|
|
49
|
+
});
|
|
50
|
+
|
|
51
|
+
const imageUrl = result.jobs?.[0]?.results?.raw?.url;
|
|
52
|
+
if (!imageUrl) {
|
|
53
|
+
throw new Error("no image url in result");
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
let uploaded: string | undefined;
|
|
57
|
+
if (options.upload) {
|
|
58
|
+
const timestamp = Date.now();
|
|
59
|
+
const objectKey = `images/soul/${timestamp}.png`;
|
|
60
|
+
uploaded = await uploadFromUrl(imageUrl, objectKey);
|
|
61
|
+
console.log(`[service/image] uploaded to ${uploaded}`);
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
return { imageUrl, uploaded };
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
// cli runner
|
|
68
|
+
if (import.meta.main) {
|
|
69
|
+
const [command, ...args] = process.argv.slice(2);
|
|
70
|
+
|
|
71
|
+
switch (command) {
|
|
72
|
+
case "fal": {
|
|
73
|
+
if (!args[0]) {
|
|
74
|
+
console.log(`
|
|
75
|
+
usage:
|
|
76
|
+
bun run service/image.ts fal <prompt> [model] [upload]
|
|
77
|
+
`);
|
|
78
|
+
process.exit(1);
|
|
79
|
+
}
|
|
80
|
+
const falResult = await generateWithFal(args[0], {
|
|
81
|
+
model: args[1],
|
|
82
|
+
upload: args[2] === "true",
|
|
83
|
+
});
|
|
84
|
+
console.log(JSON.stringify(falResult, null, 2));
|
|
85
|
+
break;
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
case "soul": {
|
|
89
|
+
if (!args[0]) {
|
|
90
|
+
console.log(`
|
|
91
|
+
usage:
|
|
92
|
+
bun run service/image.ts soul <prompt> [styleId] [upload]
|
|
93
|
+
`);
|
|
94
|
+
process.exit(1);
|
|
95
|
+
}
|
|
96
|
+
const soulResult = await generateWithSoul(args[0], {
|
|
97
|
+
styleId: args[1],
|
|
98
|
+
upload: args[2] === "true",
|
|
99
|
+
});
|
|
100
|
+
console.log(JSON.stringify(soulResult, null, 2));
|
|
101
|
+
break;
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
default:
|
|
105
|
+
console.log(`
|
|
106
|
+
usage:
|
|
107
|
+
bun run service/image.ts fal <prompt> [model] [upload]
|
|
108
|
+
bun run service/image.ts soul <prompt> [styleId] [upload]
|
|
109
|
+
`);
|
|
110
|
+
process.exit(1);
|
|
111
|
+
}
|
|
112
|
+
}
|
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: video-lipsync
|
|
3
|
+
description: sync video with audio using wav2lip ai model or simple audio overlay. use when creating talking videos, matching lip movements to audio, or combining video with voiceovers.
|
|
4
|
+
allowed-tools: Read, Bash
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
# video lipsync
|
|
8
|
+
|
|
9
|
+
sync video with audio using ai-powered lipsync or simple overlay.
|
|
10
|
+
|
|
11
|
+
## methods
|
|
12
|
+
|
|
13
|
+
### wav2lip (ai-powered)
|
|
14
|
+
- uses replicate wav2lip model
|
|
15
|
+
- matches lip movements to audio
|
|
16
|
+
- works with url inputs
|
|
17
|
+
- processing time: 30-60 seconds
|
|
18
|
+
- best for: talking character videos
|
|
19
|
+
|
|
20
|
+
### overlay (simple)
|
|
21
|
+
- adds audio track to video using ffmpeg
|
|
22
|
+
- no lip movement matching
|
|
23
|
+
- works with local files
|
|
24
|
+
- processing time: instant
|
|
25
|
+
- best for: background music, voiceovers
|
|
26
|
+
|
|
27
|
+
## usage
|
|
28
|
+
|
|
29
|
+
### sync with method selection
|
|
30
|
+
```bash
|
|
31
|
+
bun run service/sync.ts sync <videoUrl> <audioUrl> [method] [output]
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
**parameters:**
|
|
35
|
+
- `videoUrl` (required): video file path or url
|
|
36
|
+
- `audioUrl` (required): audio file path or url
|
|
37
|
+
- `method` (optional): "wav2lip" or "overlay" (default: overlay)
|
|
38
|
+
- `output` (optional): output path (default: output-synced.mp4)
|
|
39
|
+
|
|
40
|
+
**example:**
|
|
41
|
+
```bash
|
|
42
|
+
bun run service/sync.ts sync video.mp4 audio.mp3 overlay output.mp4
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
### wav2lip direct
|
|
46
|
+
```bash
|
|
47
|
+
bun run service/sync.ts wav2lip <videoUrl> <audioUrl>
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
**example:**
|
|
51
|
+
```bash
|
|
52
|
+
bun run service/sync.ts wav2lip https://example.com/character.mp4 https://example.com/voice.mp3
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
### overlay direct
|
|
56
|
+
```bash
|
|
57
|
+
bun run service/sync.ts overlay <videoPath> <audioPath> [output]
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
**example:**
|
|
61
|
+
```bash
|
|
62
|
+
bun run service/sync.ts overlay character.mp4 narration.mp3 final.mp4
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
## as library
|
|
66
|
+
|
|
67
|
+
```typescript
|
|
68
|
+
import { lipsync, lipsyncWav2Lip, lipsyncOverlay } from "./service/sync"
|
|
69
|
+
|
|
70
|
+
// flexible sync
|
|
71
|
+
const result = await lipsync({
|
|
72
|
+
videoUrl: "video.mp4",
|
|
73
|
+
audioUrl: "audio.mp3",
|
|
74
|
+
method: "wav2lip",
|
|
75
|
+
output: "synced.mp4"
|
|
76
|
+
})
|
|
77
|
+
|
|
78
|
+
// wav2lip specific
|
|
79
|
+
const lipsynced = await lipsyncWav2Lip({
|
|
80
|
+
videoUrl: "https://example.com/video.mp4",
|
|
81
|
+
audioUrl: "https://example.com/audio.mp3"
|
|
82
|
+
})
|
|
83
|
+
|
|
84
|
+
// overlay specific
|
|
85
|
+
const overlayed = await lipsyncOverlay(
|
|
86
|
+
"video.mp4",
|
|
87
|
+
"audio.mp3",
|
|
88
|
+
"output.mp4"
|
|
89
|
+
)
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
## when to use each method
|
|
93
|
+
|
|
94
|
+
### use wav2lip when:
|
|
95
|
+
- creating talking character videos
|
|
96
|
+
- lip movements must match speech
|
|
97
|
+
- have urls for video and audio
|
|
98
|
+
- quality is more important than speed
|
|
99
|
+
|
|
100
|
+
### use overlay when:
|
|
101
|
+
- adding background music
|
|
102
|
+
- audio doesn't require lip sync
|
|
103
|
+
- working with local files
|
|
104
|
+
- need instant processing
|
|
105
|
+
|
|
106
|
+
## typical workflow
|
|
107
|
+
|
|
108
|
+
1. generate character image (image service)
|
|
109
|
+
2. animate character (video service)
|
|
110
|
+
3. generate voiceover (voice service)
|
|
111
|
+
4. sync with wav2lip (this service)
|
|
112
|
+
5. add captions (captions service)
|
|
113
|
+
|
|
114
|
+
## tips
|
|
115
|
+
|
|
116
|
+
**for wav2lip:**
|
|
117
|
+
- use close-up character shots for best results
|
|
118
|
+
- ensure audio is clear and well-paced
|
|
119
|
+
- video should show face clearly
|
|
120
|
+
- works best with 5-10 second clips
|
|
121
|
+
|
|
122
|
+
**for overlay:**
|
|
123
|
+
- match audio length to video length
|
|
124
|
+
- ffmpeg will loop short audio or trim long audio
|
|
125
|
+
- preserves original video quality
|
|
126
|
+
|
|
127
|
+
## environment variables
|
|
128
|
+
|
|
129
|
+
required (for wav2lip):
|
|
130
|
+
- `REPLICATE_API_TOKEN` - for wav2lip model
|
|
131
|
+
|
|
132
|
+
no special requirements for overlay method (ffmpeg must be installed)
|
|
133
|
+
|
|
134
|
+
## error handling
|
|
135
|
+
|
|
136
|
+
if wav2lip fails, the service automatically falls back to overlay method with a warning message.
|
|
@@ -0,0 +1,187 @@
|
|
|
1
|
+
#!/usr/bin/env bun
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* lipsync service - combines video with audio using various methods
|
|
5
|
+
* supports wav2lip, synclabs, and simple audio overlay
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import { addAudio } from "../../lib/ffmpeg";
|
|
9
|
+
import { runModel } from "../../lib/replicate";
|
|
10
|
+
|
|
11
|
+
// types
|
|
12
|
+
export interface LipsyncOptions {
|
|
13
|
+
videoUrl: string;
|
|
14
|
+
audioUrl: string;
|
|
15
|
+
method?: "wav2lip" | "synclabs" | "overlay";
|
|
16
|
+
output?: string;
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
export interface Wav2LipOptions {
|
|
20
|
+
videoUrl: string;
|
|
21
|
+
audioUrl: string;
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
// core functions
|
|
25
|
+
export async function lipsync(options: LipsyncOptions) {
|
|
26
|
+
const { videoUrl, audioUrl, method = "overlay", output } = options;
|
|
27
|
+
|
|
28
|
+
if (!videoUrl || !audioUrl) {
|
|
29
|
+
throw new Error("videoUrl and audioUrl are required");
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
console.log(`[sync] syncing video with audio using ${method}...`);
|
|
33
|
+
|
|
34
|
+
switch (method) {
|
|
35
|
+
case "wav2lip":
|
|
36
|
+
return await lipsyncWav2Lip({ videoUrl, audioUrl });
|
|
37
|
+
|
|
38
|
+
case "synclabs":
|
|
39
|
+
console.log(
|
|
40
|
+
`[sync] synclabs not yet implemented, falling back to overlay`,
|
|
41
|
+
);
|
|
42
|
+
return await lipsyncOverlay(videoUrl, audioUrl, output);
|
|
43
|
+
|
|
44
|
+
case "overlay":
|
|
45
|
+
return await lipsyncOverlay(videoUrl, audioUrl, output);
|
|
46
|
+
|
|
47
|
+
default:
|
|
48
|
+
throw new Error(`unknown lipsync method: ${method}`);
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
export async function lipsyncWav2Lip(options: Wav2LipOptions) {
|
|
53
|
+
const { videoUrl, audioUrl } = options;
|
|
54
|
+
|
|
55
|
+
console.log(`[sync] using wav2lip model...`);
|
|
56
|
+
|
|
57
|
+
try {
|
|
58
|
+
const output = await runModel("devxpy/cog-wav2lip", {
|
|
59
|
+
face: videoUrl,
|
|
60
|
+
audio: audioUrl,
|
|
61
|
+
});
|
|
62
|
+
|
|
63
|
+
console.log(`[sync] wav2lip completed`);
|
|
64
|
+
return output;
|
|
65
|
+
} catch (error) {
|
|
66
|
+
console.error(`[sync] wav2lip error:`, error);
|
|
67
|
+
throw error;
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
export async function lipsyncOverlay(
|
|
72
|
+
videoPath: string,
|
|
73
|
+
audioPath: string,
|
|
74
|
+
output: string = "output-synced.mp4",
|
|
75
|
+
) {
|
|
76
|
+
console.log(`[sync] overlaying audio on video...`);
|
|
77
|
+
|
|
78
|
+
try {
|
|
79
|
+
const result = await addAudio({
|
|
80
|
+
videoPath,
|
|
81
|
+
audioPath,
|
|
82
|
+
output,
|
|
83
|
+
});
|
|
84
|
+
|
|
85
|
+
console.log(`[sync] overlay completed`);
|
|
86
|
+
return result;
|
|
87
|
+
} catch (error) {
|
|
88
|
+
console.error(`[sync] overlay error:`, error);
|
|
89
|
+
throw error;
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
// cli
|
|
94
|
+
async function cli() {
|
|
95
|
+
const args = process.argv.slice(2);
|
|
96
|
+
const command = args[0];
|
|
97
|
+
|
|
98
|
+
if (!command || command === "help") {
|
|
99
|
+
console.log(`
|
|
100
|
+
usage:
|
|
101
|
+
bun run service/sync.ts <command> [args]
|
|
102
|
+
|
|
103
|
+
commands:
|
|
104
|
+
sync <videoUrl> <audioUrl> [method] [output] sync video with audio
|
|
105
|
+
wav2lip <videoUrl> <audioUrl> use wav2lip model
|
|
106
|
+
overlay <videoPath> <audioPath> [output] simple audio overlay
|
|
107
|
+
help show this help
|
|
108
|
+
|
|
109
|
+
methods:
|
|
110
|
+
wav2lip - ai-powered lipsync using replicate (url inputs)
|
|
111
|
+
overlay - simple audio overlay using ffmpeg (local files)
|
|
112
|
+
|
|
113
|
+
examples:
|
|
114
|
+
bun run service/sync.ts sync video.mp4 audio.mp3 overlay output.mp4
|
|
115
|
+
bun run service/sync.ts wav2lip https://example.com/video.mp4 https://example.com/audio.mp3
|
|
116
|
+
bun run service/sync.ts overlay video.mp4 audio.mp3 synced.mp4
|
|
117
|
+
|
|
118
|
+
environment:
|
|
119
|
+
REPLICATE_API_TOKEN - required for wav2lip method
|
|
120
|
+
`);
|
|
121
|
+
process.exit(0);
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
try {
|
|
125
|
+
switch (command) {
|
|
126
|
+
case "sync": {
|
|
127
|
+
const videoUrl = args[1];
|
|
128
|
+
const audioUrl = args[2];
|
|
129
|
+
const method = (args[3] || "overlay") as "wav2lip" | "overlay";
|
|
130
|
+
const output = args[4];
|
|
131
|
+
|
|
132
|
+
if (!videoUrl || !audioUrl) {
|
|
133
|
+
throw new Error("videoUrl and audioUrl are required");
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
const result = await lipsync({
|
|
137
|
+
videoUrl,
|
|
138
|
+
audioUrl,
|
|
139
|
+
method,
|
|
140
|
+
output,
|
|
141
|
+
});
|
|
142
|
+
|
|
143
|
+
console.log(`[sync] result:`, result);
|
|
144
|
+
break;
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
case "wav2lip": {
|
|
148
|
+
const videoUrl = args[1];
|
|
149
|
+
const audioUrl = args[2];
|
|
150
|
+
|
|
151
|
+
if (!videoUrl || !audioUrl) {
|
|
152
|
+
throw new Error("videoUrl and audioUrl are required");
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
const result = await lipsyncWav2Lip({ videoUrl, audioUrl });
|
|
156
|
+
console.log(`[sync] result:`, result);
|
|
157
|
+
break;
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
case "overlay": {
|
|
161
|
+
const videoPath = args[1];
|
|
162
|
+
const audioPath = args[2];
|
|
163
|
+
const output = args[3];
|
|
164
|
+
|
|
165
|
+
if (!videoPath || !audioPath) {
|
|
166
|
+
throw new Error("videoPath and audioPath are required");
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
const result = await lipsyncOverlay(videoPath, audioPath, output);
|
|
170
|
+
console.log(`[sync] result:`, result);
|
|
171
|
+
break;
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
default:
|
|
175
|
+
console.error(`unknown command: ${command}`);
|
|
176
|
+
console.log(`run 'bun run service/sync.ts help' for usage`);
|
|
177
|
+
process.exit(1);
|
|
178
|
+
}
|
|
179
|
+
} catch (error) {
|
|
180
|
+
console.error(`[sync] error:`, error);
|
|
181
|
+
process.exit(1);
|
|
182
|
+
}
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
if (import.meta.main) {
|
|
186
|
+
cli();
|
|
187
|
+
}
|