varg.ai-sdk 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/settings.local.json +7 -0
- package/.env.example +24 -0
- package/CLAUDE.md +118 -0
- package/README.md +231 -0
- package/SKILLS.md +157 -0
- package/STRUCTURE.md +92 -0
- package/TEST_RESULTS.md +122 -0
- package/action/captions/SKILL.md +170 -0
- package/action/captions/index.ts +227 -0
- package/action/edit/SKILL.md +235 -0
- package/action/edit/index.ts +493 -0
- package/action/image/SKILL.md +140 -0
- package/action/image/index.ts +112 -0
- package/action/sync/SKILL.md +136 -0
- package/action/sync/index.ts +187 -0
- package/action/transcribe/SKILL.md +179 -0
- package/action/transcribe/index.ts +227 -0
- package/action/video/SKILL.md +116 -0
- package/action/video/index.ts +135 -0
- package/action/voice/SKILL.md +125 -0
- package/action/voice/index.ts +201 -0
- package/biome.json +33 -0
- package/index.ts +38 -0
- package/lib/README.md +144 -0
- package/lib/ai-sdk/fal.ts +106 -0
- package/lib/ai-sdk/replicate.ts +107 -0
- package/lib/elevenlabs.ts +382 -0
- package/lib/fal.ts +478 -0
- package/lib/ffmpeg.ts +467 -0
- package/lib/fireworks.ts +235 -0
- package/lib/groq.ts +246 -0
- package/lib/higgsfield.ts +176 -0
- package/lib/remotion/SKILL.md +823 -0
- package/lib/remotion/cli.ts +115 -0
- package/lib/remotion/functions.ts +283 -0
- package/lib/remotion/index.ts +19 -0
- package/lib/remotion/templates.ts +73 -0
- package/lib/replicate.ts +304 -0
- package/output.txt +1 -0
- package/package.json +35 -0
- package/pipeline/cookbooks/SKILL.md +285 -0
- package/pipeline/cookbooks/remotion-video.md +585 -0
- package/pipeline/cookbooks/round-video-character.md +337 -0
- package/pipeline/cookbooks/talking-character.md +59 -0
- package/test-import.ts +7 -0
- package/test-services.ts +97 -0
- package/tsconfig.json +29 -0
- package/utilities/s3.ts +147 -0
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: voice-synthesis
|
|
3
|
+
description: generate realistic text-to-speech audio using elevenlabs with multiple voice options. use when user needs voiceovers, narration, character voices, or audio for lipsync videos.
|
|
4
|
+
allowed-tools: Read, Bash
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
# voice synthesis
|
|
8
|
+
|
|
9
|
+
generate high-quality text-to-speech audio with elevenlabs.
|
|
10
|
+
|
|
11
|
+
## available voices
|
|
12
|
+
|
|
13
|
+
- **rachel** - clear, professional female voice
|
|
14
|
+
- **domi** - warm, friendly female voice
|
|
15
|
+
- **bella** - energetic female voice
|
|
16
|
+
- **antoni** - friendly male voice
|
|
17
|
+
- **elli** - young, clear female voice
|
|
18
|
+
- **josh** - deep, clear male voice
|
|
19
|
+
- **arnold** - strong, authoritative male voice
|
|
20
|
+
- **adam** - natural, conversational male voice
|
|
21
|
+
- **sam** - raspy, character male voice
|
|
22
|
+
|
|
23
|
+
## usage
|
|
24
|
+
|
|
25
|
+
### generate voice
|
|
26
|
+
```bash
|
|
27
|
+
bun run service/voice.ts generate <text> [voice] [provider] [upload]
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
**parameters:**
|
|
31
|
+
- `text` (required): text to convert to speech
|
|
32
|
+
- `voice` (optional): voice name (default: rachel)
|
|
33
|
+
- `provider` (optional): elevenlabs (default)
|
|
34
|
+
- `upload` (optional): "true" to upload to s3
|
|
35
|
+
|
|
36
|
+
**example:**
|
|
37
|
+
```bash
|
|
38
|
+
bun run service/voice.ts generate "hello world, this is my voice" rachel elevenlabs true
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
### shorthand for elevenlabs
|
|
42
|
+
```bash
|
|
43
|
+
bun run service/voice.ts elevenlabs <text> [voice] [upload]
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
**example:**
|
|
47
|
+
```bash
|
|
48
|
+
bun run service/voice.ts elevenlabs "welcome to our video" josh true
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
## as library
|
|
52
|
+
|
|
53
|
+
```typescript
|
|
54
|
+
import { generateVoice } from "./service/voice"
|
|
55
|
+
|
|
56
|
+
const result = await generateVoice({
|
|
57
|
+
text: "hello world",
|
|
58
|
+
voice: "rachel",
|
|
59
|
+
provider: "elevenlabs",
|
|
60
|
+
upload: true,
|
|
61
|
+
outputPath: "media/voiceover.mp3"
|
|
62
|
+
})
|
|
63
|
+
|
|
64
|
+
console.log(result.provider)
|
|
65
|
+
console.log(result.voiceId)
|
|
66
|
+
console.log(result.uploadUrl)
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
## output
|
|
70
|
+
|
|
71
|
+
returns `VoiceResult`:
|
|
72
|
+
```typescript
|
|
73
|
+
{
|
|
74
|
+
audio: Buffer, // raw audio buffer
|
|
75
|
+
provider: string, // "elevenlabs"
|
|
76
|
+
voiceId: string, // actual voice id used
|
|
77
|
+
uploadUrl?: string // s3 url if upload requested
|
|
78
|
+
}
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
saves audio file to `media/voice-{timestamp}.mp3`
|
|
82
|
+
|
|
83
|
+
## when to use
|
|
84
|
+
|
|
85
|
+
use this skill when:
|
|
86
|
+
- creating voiceovers for videos
|
|
87
|
+
- generating narration or character dialogue
|
|
88
|
+
- preparing audio for lipsync videos
|
|
89
|
+
- need text-to-speech for talking character pipeline
|
|
90
|
+
- testing different voice options
|
|
91
|
+
|
|
92
|
+
## tips
|
|
93
|
+
|
|
94
|
+
**voice selection:**
|
|
95
|
+
- use **rachel** or **josh** for professional narration
|
|
96
|
+
- use **bella** or **antoni** for friendly, casual content
|
|
97
|
+
- use **arnold** for authoritative or dramatic content
|
|
98
|
+
- use **sam** for character or stylized voices
|
|
99
|
+
|
|
100
|
+
**text formatting:**
|
|
101
|
+
- add punctuation for natural pauses
|
|
102
|
+
- use shorter sentences for clearer speech
|
|
103
|
+
- spell out numbers and abbreviations
|
|
104
|
+
|
|
105
|
+
## integration with other services
|
|
106
|
+
|
|
107
|
+
perfect companion for:
|
|
108
|
+
- **lipsync service** - sync generated voice with video
|
|
109
|
+
- **video generation** - create talking character videos
|
|
110
|
+
- **captions service** - auto-generate subtitles from voiceover
|
|
111
|
+
|
|
112
|
+
## environment variables
|
|
113
|
+
|
|
114
|
+
required:
|
|
115
|
+
- `ELEVENLABS_API_KEY` - for voice generation
|
|
116
|
+
|
|
117
|
+
optional (for s3 upload):
|
|
118
|
+
- `CLOUDFLARE_R2_API_URL`
|
|
119
|
+
- `CLOUDFLARE_ACCESS_KEY_ID`
|
|
120
|
+
- `CLOUDFLARE_ACCESS_SECRET`
|
|
121
|
+
- `CLOUDFLARE_R2_BUCKET`
|
|
122
|
+
|
|
123
|
+
## generation time
|
|
124
|
+
|
|
125
|
+
expect 5-15 seconds depending on text length
|
|
@@ -0,0 +1,201 @@
|
|
|
1
|
+
#!/usr/bin/env bun
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* voice service - high-level voice generation combining multiple providers
|
|
5
|
+
* supports elevenlabs and future providers
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import { textToSpeech, VOICES } from "../../lib/elevenlabs";
|
|
9
|
+
import { uploadFile } from "../../utilities/s3";
|
|
10
|
+
|
|
11
|
+
// types
|
|
12
|
+
export interface GenerateVoiceOptions {
|
|
13
|
+
text: string;
|
|
14
|
+
voice?: string;
|
|
15
|
+
provider?: "elevenlabs";
|
|
16
|
+
upload?: boolean;
|
|
17
|
+
outputPath?: string;
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
export interface VoiceResult {
|
|
21
|
+
audio: Buffer;
|
|
22
|
+
provider: string;
|
|
23
|
+
voiceId: string;
|
|
24
|
+
uploadUrl?: string;
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
// core functions
|
|
28
|
+
export async function generateVoice(
|
|
29
|
+
options: GenerateVoiceOptions,
|
|
30
|
+
): Promise<VoiceResult> {
|
|
31
|
+
const {
|
|
32
|
+
text,
|
|
33
|
+
voice = "rachel",
|
|
34
|
+
provider = "elevenlabs",
|
|
35
|
+
upload = false,
|
|
36
|
+
outputPath,
|
|
37
|
+
} = options;
|
|
38
|
+
|
|
39
|
+
if (!text) {
|
|
40
|
+
throw new Error("text is required");
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
console.log(`[voice] generating with ${provider} (${voice})...`);
|
|
44
|
+
|
|
45
|
+
let audio: Buffer;
|
|
46
|
+
let voiceId: string;
|
|
47
|
+
|
|
48
|
+
switch (provider) {
|
|
49
|
+
case "elevenlabs": {
|
|
50
|
+
// map friendly names to voice ids
|
|
51
|
+
const voiceMap: Record<string, string> = {
|
|
52
|
+
rachel: VOICES.RACHEL,
|
|
53
|
+
domi: VOICES.DOMI,
|
|
54
|
+
bella: VOICES.BELLA,
|
|
55
|
+
antoni: VOICES.ANTONI,
|
|
56
|
+
elli: VOICES.ELLI,
|
|
57
|
+
josh: VOICES.JOSH,
|
|
58
|
+
arnold: VOICES.ARNOLD,
|
|
59
|
+
adam: VOICES.ADAM,
|
|
60
|
+
sam: VOICES.SAM,
|
|
61
|
+
};
|
|
62
|
+
|
|
63
|
+
voiceId = voiceMap[voice.toLowerCase()] || voice;
|
|
64
|
+
|
|
65
|
+
audio = await textToSpeech({
|
|
66
|
+
text,
|
|
67
|
+
voiceId,
|
|
68
|
+
outputPath,
|
|
69
|
+
});
|
|
70
|
+
break;
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
default:
|
|
74
|
+
throw new Error(`unsupported provider: ${provider}`);
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
const result: VoiceResult = {
|
|
78
|
+
audio,
|
|
79
|
+
provider,
|
|
80
|
+
voiceId,
|
|
81
|
+
};
|
|
82
|
+
|
|
83
|
+
// upload to s3 if requested
|
|
84
|
+
if (upload && outputPath) {
|
|
85
|
+
const objectKey = `voice/${Date.now()}-${voice}.mp3`;
|
|
86
|
+
const uploadUrl = await uploadFile(outputPath, objectKey);
|
|
87
|
+
result.uploadUrl = uploadUrl;
|
|
88
|
+
console.log(`[voice] uploaded to ${uploadUrl}`);
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
return result;
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
// cli
|
|
95
|
+
async function cli() {
|
|
96
|
+
const args = process.argv.slice(2);
|
|
97
|
+
const command = args[0];
|
|
98
|
+
|
|
99
|
+
if (!command || command === "help") {
|
|
100
|
+
console.log(`
|
|
101
|
+
usage:
|
|
102
|
+
bun run service/voice.ts <command> [args]
|
|
103
|
+
|
|
104
|
+
commands:
|
|
105
|
+
generate <text> [voice] [provider] [upload] generate voice from text
|
|
106
|
+
elevenlabs <text> [voice] [upload] generate with elevenlabs
|
|
107
|
+
help show this help
|
|
108
|
+
|
|
109
|
+
examples:
|
|
110
|
+
bun run service/voice.ts generate "hello world" rachel elevenlabs false
|
|
111
|
+
bun run service/voice.ts elevenlabs "hello world" josh true
|
|
112
|
+
bun run service/voice.ts generate "welcome to ai" bella
|
|
113
|
+
|
|
114
|
+
available voices:
|
|
115
|
+
rachel, domi, bella, antoni, elli, josh, arnold, adam, sam
|
|
116
|
+
|
|
117
|
+
providers:
|
|
118
|
+
elevenlabs (default)
|
|
119
|
+
|
|
120
|
+
environment:
|
|
121
|
+
ELEVENLABS_API_KEY - required for elevenlabs
|
|
122
|
+
CLOUDFLARE_* - required for upload
|
|
123
|
+
`);
|
|
124
|
+
process.exit(0);
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
try {
|
|
128
|
+
switch (command) {
|
|
129
|
+
case "generate": {
|
|
130
|
+
const text = args[1];
|
|
131
|
+
const voice = args[2];
|
|
132
|
+
const provider = (args[3] || "elevenlabs") as "elevenlabs";
|
|
133
|
+
const upload = args[4] === "true";
|
|
134
|
+
|
|
135
|
+
if (!text) {
|
|
136
|
+
throw new Error("text is required");
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
const outputPath = `media/voice-${Date.now()}.mp3`;
|
|
140
|
+
|
|
141
|
+
const result = await generateVoice({
|
|
142
|
+
text,
|
|
143
|
+
voice,
|
|
144
|
+
provider,
|
|
145
|
+
upload,
|
|
146
|
+
outputPath,
|
|
147
|
+
});
|
|
148
|
+
|
|
149
|
+
console.log(`[voice] result:`, {
|
|
150
|
+
provider: result.provider,
|
|
151
|
+
voiceId: result.voiceId,
|
|
152
|
+
audioSize: result.audio.length,
|
|
153
|
+
outputPath,
|
|
154
|
+
uploadUrl: result.uploadUrl,
|
|
155
|
+
});
|
|
156
|
+
break;
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
case "elevenlabs": {
|
|
160
|
+
const text = args[1];
|
|
161
|
+
const voice = args[2];
|
|
162
|
+
const upload = args[3] === "true";
|
|
163
|
+
|
|
164
|
+
if (!text) {
|
|
165
|
+
throw new Error("text is required");
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
const outputPath = `media/voice-${Date.now()}.mp3`;
|
|
169
|
+
|
|
170
|
+
const result = await generateVoice({
|
|
171
|
+
text,
|
|
172
|
+
voice,
|
|
173
|
+
provider: "elevenlabs",
|
|
174
|
+
upload,
|
|
175
|
+
outputPath,
|
|
176
|
+
});
|
|
177
|
+
|
|
178
|
+
console.log(`[voice] result:`, {
|
|
179
|
+
provider: result.provider,
|
|
180
|
+
voiceId: result.voiceId,
|
|
181
|
+
audioSize: result.audio.length,
|
|
182
|
+
outputPath,
|
|
183
|
+
uploadUrl: result.uploadUrl,
|
|
184
|
+
});
|
|
185
|
+
break;
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
default:
|
|
189
|
+
console.error(`unknown command: ${command}`);
|
|
190
|
+
console.log(`run 'bun run service/voice.ts help' for usage`);
|
|
191
|
+
process.exit(1);
|
|
192
|
+
}
|
|
193
|
+
} catch (error) {
|
|
194
|
+
console.error(`[voice] error:`, error);
|
|
195
|
+
process.exit(1);
|
|
196
|
+
}
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
if (import.meta.main) {
|
|
200
|
+
cli();
|
|
201
|
+
}
|
package/biome.json
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "https://biomejs.dev/schemas/2.3.7/schema.json",
|
|
3
|
+
"vcs": {
|
|
4
|
+
"enabled": true,
|
|
5
|
+
"clientKind": "git",
|
|
6
|
+
"useIgnoreFile": true
|
|
7
|
+
},
|
|
8
|
+
"files": {
|
|
9
|
+
"ignoreUnknown": false
|
|
10
|
+
},
|
|
11
|
+
"formatter": {
|
|
12
|
+
"enabled": true,
|
|
13
|
+
"indentStyle": "space"
|
|
14
|
+
},
|
|
15
|
+
"linter": {
|
|
16
|
+
"enabled": true,
|
|
17
|
+
"rules": {
|
|
18
|
+
"recommended": true
|
|
19
|
+
}
|
|
20
|
+
},
|
|
21
|
+
"assist": {
|
|
22
|
+
"actions": {
|
|
23
|
+
"source": {
|
|
24
|
+
"organizeImports": "on"
|
|
25
|
+
}
|
|
26
|
+
}
|
|
27
|
+
},
|
|
28
|
+
"javascript": {
|
|
29
|
+
"formatter": {
|
|
30
|
+
"quoteStyle": "double"
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
}
|
package/index.ts
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* varg.ai sdk
|
|
3
|
+
* video generation and editing tools
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
// re-export external clients
|
|
7
|
+
export { fal } from "@ai-sdk/fal";
|
|
8
|
+
export { replicate } from "@ai-sdk/replicate";
|
|
9
|
+
export { fal as falClient } from "@fal-ai/client";
|
|
10
|
+
export { HiggsfieldClient } from "@higgsfield/client";
|
|
11
|
+
// lib exports - ai-sdk/fal (provider)
|
|
12
|
+
export * as aiSdkFal from "./lib/ai-sdk/fal";
|
|
13
|
+
// lib exports - ai-sdk/replicate (provider)
|
|
14
|
+
export * as aiSdkReplicate from "./lib/ai-sdk/replicate";
|
|
15
|
+
// lib exports - elevenlabs
|
|
16
|
+
export * from "./lib/elevenlabs";
|
|
17
|
+
// lib exports - fal (client)
|
|
18
|
+
export * from "./lib/fal";
|
|
19
|
+
// lib exports - ffmpeg
|
|
20
|
+
export * from "./lib/ffmpeg";
|
|
21
|
+
// lib exports - fireworks
|
|
22
|
+
export * from "./lib/fireworks";
|
|
23
|
+
// lib exports - groq
|
|
24
|
+
export * from "./lib/groq";
|
|
25
|
+
// lib exports - higgsfield
|
|
26
|
+
export * from "./lib/higgsfield";
|
|
27
|
+
// lib exports - replicate
|
|
28
|
+
export * from "./lib/replicate";
|
|
29
|
+
// service exports
|
|
30
|
+
export * from "./service/captions";
|
|
31
|
+
export * from "./service/edit";
|
|
32
|
+
export * from "./service/image";
|
|
33
|
+
export * from "./service/sync";
|
|
34
|
+
export * from "./service/transcribe";
|
|
35
|
+
export * from "./service/video";
|
|
36
|
+
export * from "./service/voice";
|
|
37
|
+
// utilities exports
|
|
38
|
+
export * from "./utilities/s3";
|
package/lib/README.md
ADDED
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
# lib/ modules
|
|
2
|
+
|
|
3
|
+
## two fal implementations
|
|
4
|
+
|
|
5
|
+
### lib/ai-sdk/fal.ts - ai-sdk provider (recommended for images)
|
|
6
|
+
|
|
7
|
+
uses `@ai-sdk/fal` with the vercel ai sdk's `experimental_generateImage`
|
|
8
|
+
|
|
9
|
+
**benefits:**
|
|
10
|
+
- clean, typed api via vercel ai sdk
|
|
11
|
+
- automatic image format handling (uint8array)
|
|
12
|
+
- consistent interface with other ai providers
|
|
13
|
+
- built-in aspect ratio support
|
|
14
|
+
- better for standard image generation
|
|
15
|
+
|
|
16
|
+
**example:**
|
|
17
|
+
```bash
|
|
18
|
+
bun run lib/ai-sdk/fal.ts generate_image "cyberpunk city" "fal-ai/flux/dev" "16:9"
|
|
19
|
+
```
|
|
20
|
+
|
|
21
|
+
**code:**
|
|
22
|
+
```typescript
|
|
23
|
+
import { fal } from "@ai-sdk/fal"
|
|
24
|
+
import { experimental_generateImage as generateImage } from "ai"
|
|
25
|
+
|
|
26
|
+
const { image, providerMetadata } = await generateImage({
|
|
27
|
+
model: fal.image("fal-ai/flux/dev"),
|
|
28
|
+
prompt: "beautiful sunset",
|
|
29
|
+
aspectRatio: "16:9",
|
|
30
|
+
})
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
### lib/fal.ts - fal client direct (for video & advanced features)
|
|
34
|
+
|
|
35
|
+
uses `@fal-ai/client` directly with the raw fal api
|
|
36
|
+
|
|
37
|
+
**benefits:**
|
|
38
|
+
- access to all fal features (video, advanced params)
|
|
39
|
+
- streaming/queue updates
|
|
40
|
+
- full control over api parameters
|
|
41
|
+
- required for video generation (no ai-sdk support yet)
|
|
42
|
+
- **supports local images** - automatically uploads local files to fal storage
|
|
43
|
+
|
|
44
|
+
**examples:**
|
|
45
|
+
```bash
|
|
46
|
+
# image generation
|
|
47
|
+
bun run lib/fal.ts generate_image "aurora borealis" "fal-ai/flux-pro/v1.1"
|
|
48
|
+
|
|
49
|
+
# video from url
|
|
50
|
+
bun run lib/fal.ts image_to_video "person talking" "https://image.url" 5
|
|
51
|
+
|
|
52
|
+
# video from local file (auto-uploads)
|
|
53
|
+
bun run lib/fal.ts image_to_video "ocean waves" "./media/beach.jpg" 10
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
**code:**
|
|
57
|
+
```typescript
|
|
58
|
+
import { imageToVideo } from "./lib/fal"
|
|
59
|
+
|
|
60
|
+
// works with both urls and local files
|
|
61
|
+
const result = await imageToVideo({
|
|
62
|
+
prompt: "person talking",
|
|
63
|
+
imageUrl: "./local/image.jpg", // or "https://..."
|
|
64
|
+
duration: 5,
|
|
65
|
+
})
|
|
66
|
+
|
|
67
|
+
// local files are automatically uploaded to fal storage
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
## when to use which?
|
|
71
|
+
|
|
72
|
+
| use case | approach |
|
|
73
|
+
|----------|----------|
|
|
74
|
+
| standard image generation | ai-sdk provider ✓ |
|
|
75
|
+
| video generation | fal client direct ✓ |
|
|
76
|
+
| advanced fal features | fal client direct ✓ |
|
|
77
|
+
| multi-provider app | ai-sdk provider ✓ |
|
|
78
|
+
| custom queue handling | fal client direct ✓ |
|
|
79
|
+
|
|
80
|
+
## higgsfield.ts
|
|
81
|
+
|
|
82
|
+
uses `@higgsfield/client` for soul character generation
|
|
83
|
+
|
|
84
|
+
**features:**
|
|
85
|
+
- generate soul images with custom styles
|
|
86
|
+
- create and manage character references
|
|
87
|
+
- list available soul styles
|
|
88
|
+
- poll for job completion
|
|
89
|
+
|
|
90
|
+
**example:**
|
|
91
|
+
```bash
|
|
92
|
+
HF_API_KEY=xxx HF_API_SECRET=xxx bun run lib/higgsfield.ts generate_soul "professional headshot"
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
## elevenlabs.ts
|
|
96
|
+
|
|
97
|
+
uses `@elevenlabs/elevenlabs-js` for voice, music, and sound effects generation
|
|
98
|
+
|
|
99
|
+
**features:**
|
|
100
|
+
- text-to-speech with multiple voices
|
|
101
|
+
- music generation from text prompts
|
|
102
|
+
- sound effects generation
|
|
103
|
+
- voice management
|
|
104
|
+
|
|
105
|
+
**examples:**
|
|
106
|
+
```bash
|
|
107
|
+
# text-to-speech
|
|
108
|
+
bun run lib/elevenlabs.ts tts "hello world" rachel output.mp3
|
|
109
|
+
|
|
110
|
+
# music generation
|
|
111
|
+
bun run lib/elevenlabs.ts music "upbeat electronic dance music" 30000 music.mp3
|
|
112
|
+
|
|
113
|
+
# sound effects
|
|
114
|
+
bun run lib/elevenlabs.ts sfx "ocean waves crashing" 5 waves.mp3
|
|
115
|
+
|
|
116
|
+
# list voices
|
|
117
|
+
bun run lib/elevenlabs.ts voices
|
|
118
|
+
```
|
|
119
|
+
|
|
120
|
+
**code:**
|
|
121
|
+
```typescript
|
|
122
|
+
import { textToSpeech, generateMusic, generateSoundEffect } from "./lib/elevenlabs"
|
|
123
|
+
|
|
124
|
+
// voice
|
|
125
|
+
const audio = await textToSpeech({
|
|
126
|
+
text: "hello world",
|
|
127
|
+
voiceId: "rachel",
|
|
128
|
+
outputPath: "output.mp3"
|
|
129
|
+
})
|
|
130
|
+
|
|
131
|
+
// music
|
|
132
|
+
const music = await generateMusic({
|
|
133
|
+
prompt: "epic orchestral music",
|
|
134
|
+
musicLengthMs: 60000,
|
|
135
|
+
outputPath: "music.mp3"
|
|
136
|
+
})
|
|
137
|
+
|
|
138
|
+
// sound effects
|
|
139
|
+
const sfx = await generateSoundEffect({
|
|
140
|
+
text: "thunder and rain",
|
|
141
|
+
durationSeconds: 10,
|
|
142
|
+
outputPath: "sfx.mp3"
|
|
143
|
+
})
|
|
144
|
+
```
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
#!/usr/bin/env bun
|
|
2
|
+
/**
|
|
3
|
+
* fal.ai wrapper using @ai-sdk/fal provider
|
|
4
|
+
* recommended for standard image generation with vercel ai sdk
|
|
5
|
+
*
|
|
6
|
+
* usage: bun run lib/ai-sdk/fal.ts <command> <args>
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
import { fal } from "@ai-sdk/fal";
|
|
10
|
+
import { experimental_generateImage as generateImageAI } from "ai";
|
|
11
|
+
|
|
12
|
+
export async function generateImage(args: {
|
|
13
|
+
prompt: string;
|
|
14
|
+
model?: string;
|
|
15
|
+
aspectRatio?: "1:1" | "16:9" | "9:16" | "4:3" | "3:4";
|
|
16
|
+
}) {
|
|
17
|
+
const modelId = args.model || "fal-ai/flux/dev";
|
|
18
|
+
|
|
19
|
+
console.log(`[ai-sdk/fal] generating image with ${modelId}`);
|
|
20
|
+
console.log(`[ai-sdk/fal] prompt: ${args.prompt}`);
|
|
21
|
+
if (args.aspectRatio) {
|
|
22
|
+
console.log(`[ai-sdk/fal] aspect ratio: ${args.aspectRatio}`);
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
try {
|
|
26
|
+
const { image, providerMetadata } = await generateImageAI({
|
|
27
|
+
model: fal.image(modelId),
|
|
28
|
+
prompt: args.prompt,
|
|
29
|
+
aspectRatio: args.aspectRatio,
|
|
30
|
+
});
|
|
31
|
+
|
|
32
|
+
console.log("[ai-sdk/fal] completed!");
|
|
33
|
+
|
|
34
|
+
// return in consistent format
|
|
35
|
+
return {
|
|
36
|
+
image: {
|
|
37
|
+
url: image.base64 ? `data:image/png;base64,${image.base64}` : undefined,
|
|
38
|
+
uint8Array: image.uint8Array,
|
|
39
|
+
},
|
|
40
|
+
metadata: providerMetadata?.fal,
|
|
41
|
+
};
|
|
42
|
+
} catch (error) {
|
|
43
|
+
console.error("[ai-sdk/fal] error:", error);
|
|
44
|
+
throw error;
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
// cli runner
|
|
49
|
+
if (import.meta.main) {
|
|
50
|
+
const [command, ...args] = process.argv.slice(2);
|
|
51
|
+
|
|
52
|
+
switch (command) {
|
|
53
|
+
case "generate_image": {
|
|
54
|
+
if (!args[0]) {
|
|
55
|
+
console.log(`
|
|
56
|
+
usage:
|
|
57
|
+
bun run lib/ai-sdk/fal.ts generate_image <prompt> [model] [aspectRatio]
|
|
58
|
+
|
|
59
|
+
examples:
|
|
60
|
+
bun run lib/ai-sdk/fal.ts generate_image "sunset over ocean" "fal-ai/flux/dev" "16:9"
|
|
61
|
+
bun run lib/ai-sdk/fal.ts generate_image "portrait photo" "fal-ai/flux-pro/v1.1" "9:16"
|
|
62
|
+
|
|
63
|
+
available models:
|
|
64
|
+
- fal-ai/flux/dev (default, fast)
|
|
65
|
+
- fal-ai/flux-pro/v1.1 (high quality)
|
|
66
|
+
- fal-ai/flux/schnell (very fast)
|
|
67
|
+
- fal-ai/ideogram/character (character consistency)
|
|
68
|
+
`);
|
|
69
|
+
process.exit(1);
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
const result = await generateImage({
|
|
73
|
+
prompt: args[0],
|
|
74
|
+
model: args[1],
|
|
75
|
+
aspectRatio: args[2] as
|
|
76
|
+
| "1:1"
|
|
77
|
+
| "16:9"
|
|
78
|
+
| "9:16"
|
|
79
|
+
| "4:3"
|
|
80
|
+
| "3:4"
|
|
81
|
+
| undefined,
|
|
82
|
+
});
|
|
83
|
+
|
|
84
|
+
// save image to file
|
|
85
|
+
if (result.image.uint8Array) {
|
|
86
|
+
const filename = `/tmp/fal-ai-sdk-${Date.now()}.png`;
|
|
87
|
+
await Bun.write(filename, result.image.uint8Array);
|
|
88
|
+
console.log(`\nimage saved to: ${filename}`);
|
|
89
|
+
|
|
90
|
+
// open image
|
|
91
|
+
await Bun.spawn(["open", filename]);
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
console.log("\nmetadata:");
|
|
95
|
+
console.log(JSON.stringify(result.metadata, null, 2));
|
|
96
|
+
break;
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
default:
|
|
100
|
+
console.log(`
|
|
101
|
+
usage:
|
|
102
|
+
bun run lib/ai-sdk/fal.ts generate_image <prompt> [model] [aspectRatio]
|
|
103
|
+
`);
|
|
104
|
+
process.exit(1);
|
|
105
|
+
}
|
|
106
|
+
}
|