@vargai/sdk 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. package/.env.example +24 -0
  2. package/CLAUDE.md +118 -0
  3. package/HIGGSFIELD_REWRITE_SUMMARY.md +300 -0
  4. package/README.md +231 -0
  5. package/SKILLS.md +157 -0
  6. package/STRUCTURE.md +92 -0
  7. package/TEST_RESULTS.md +122 -0
  8. package/action/captions/SKILL.md +170 -0
  9. package/action/captions/index.ts +169 -0
  10. package/action/edit/SKILL.md +235 -0
  11. package/action/edit/index.ts +437 -0
  12. package/action/image/SKILL.md +140 -0
  13. package/action/image/index.ts +105 -0
  14. package/action/sync/SKILL.md +136 -0
  15. package/action/sync/index.ts +145 -0
  16. package/action/transcribe/SKILL.md +179 -0
  17. package/action/transcribe/index.ts +210 -0
  18. package/action/video/SKILL.md +116 -0
  19. package/action/video/index.ts +125 -0
  20. package/action/voice/SKILL.md +125 -0
  21. package/action/voice/index.ts +136 -0
  22. package/biome.json +33 -0
  23. package/bun.lock +842 -0
  24. package/cli/commands/find.ts +58 -0
  25. package/cli/commands/help.ts +70 -0
  26. package/cli/commands/list.ts +49 -0
  27. package/cli/commands/run.ts +237 -0
  28. package/cli/commands/which.ts +66 -0
  29. package/cli/discover.ts +66 -0
  30. package/cli/index.ts +33 -0
  31. package/cli/runner.ts +65 -0
  32. package/cli/types.ts +49 -0
  33. package/cli/ui.ts +185 -0
  34. package/index.ts +75 -0
  35. package/lib/README.md +144 -0
  36. package/lib/ai-sdk/fal.ts +106 -0
  37. package/lib/ai-sdk/replicate.ts +107 -0
  38. package/lib/elevenlabs.ts +382 -0
  39. package/lib/fal.ts +467 -0
  40. package/lib/ffmpeg.ts +467 -0
  41. package/lib/fireworks.ts +235 -0
  42. package/lib/groq.ts +246 -0
  43. package/lib/higgsfield/MIGRATION.md +308 -0
  44. package/lib/higgsfield/README.md +273 -0
  45. package/lib/higgsfield/example.ts +228 -0
  46. package/lib/higgsfield/index.ts +241 -0
  47. package/lib/higgsfield/soul.ts +262 -0
  48. package/lib/higgsfield.ts +176 -0
  49. package/lib/remotion/SKILL.md +823 -0
  50. package/lib/remotion/cli.ts +115 -0
  51. package/lib/remotion/functions.ts +283 -0
  52. package/lib/remotion/index.ts +19 -0
  53. package/lib/remotion/templates.ts +73 -0
  54. package/lib/replicate.ts +304 -0
  55. package/output.txt +1 -0
  56. package/package.json +42 -0
  57. package/pipeline/cookbooks/SKILL.md +285 -0
  58. package/pipeline/cookbooks/remotion-video.md +585 -0
  59. package/pipeline/cookbooks/round-video-character.md +337 -0
  60. package/pipeline/cookbooks/talking-character.md +59 -0
  61. package/scripts/produce-menopause-campaign.sh +202 -0
  62. package/service/music/SKILL.md +229 -0
  63. package/service/music/index.ts +296 -0
  64. package/test-import.ts +7 -0
  65. package/test-services.ts +97 -0
  66. package/tsconfig.json +29 -0
  67. package/utilities/s3.ts +147 -0
@@ -0,0 +1,210 @@
1
+ #!/usr/bin/env bun
2
+
3
+ /**
4
+ * audio transcription service
5
+ * supports groq whisper, fireworks api, and future providers
6
+ */
7
+
8
+ import { writeFileSync } from "node:fs";
9
+ import { toFile } from "groq-sdk/uploads";
10
+ import type { ActionMeta } from "../../cli/types";
11
+ import {
12
+ convertFireworksToSRT,
13
+ transcribeWithFireworks as fireworksTranscribe,
14
+ } from "../../lib/fireworks";
15
+ import { GROQ_MODELS, transcribeAudio as groqTranscribe } from "../../lib/groq";
16
+
17
+ export const meta: ActionMeta = {
18
+ name: "transcribe",
19
+ type: "action",
20
+ description: "speech to text transcription",
21
+ inputType: "audio",
22
+ outputType: "text",
23
+ schema: {
24
+ input: {
25
+ type: "object",
26
+ required: ["audio"],
27
+ properties: {
28
+ audio: {
29
+ type: "string",
30
+ format: "file-path",
31
+ description: "audio/video file to transcribe",
32
+ },
33
+ provider: {
34
+ type: "string",
35
+ enum: ["groq", "fireworks"],
36
+ default: "groq",
37
+ description: "transcription provider",
38
+ },
39
+ output: {
40
+ type: "string",
41
+ format: "file-path",
42
+ description: "output file path",
43
+ },
44
+ },
45
+ },
46
+ output: { type: "string", description: "transcribed text" },
47
+ },
48
+ async run(options) {
49
+ const { audio, provider, output } = options as {
50
+ audio: string;
51
+ provider?: "groq" | "fireworks";
52
+ output?: string;
53
+ };
54
+ return transcribe({ audioUrl: audio, provider, outputPath: output });
55
+ },
56
+ };
57
+
58
+ // types
59
+ export interface TranscribeOptions {
60
+ audioUrl: string; // url or local file path
61
+ provider?: "groq" | "fireworks";
62
+ model?: string;
63
+ language?: string;
64
+ outputFormat?: "text" | "srt";
65
+ outputPath?: string;
66
+ }
67
+
68
+ export interface TranscribeResult {
69
+ success: boolean;
70
+ text?: string;
71
+ srt?: string;
72
+ error?: string;
73
+ }
74
+
75
+ // groq transcription
76
+ async function transcribeWithGroq(
77
+ audioUrl: string,
78
+ options: {
79
+ model?: string;
80
+ language?: string;
81
+ outputFormat?: "text" | "srt";
82
+ },
83
+ ): Promise<TranscribeResult> {
84
+ try {
85
+ console.log("[transcribe] using groq whisper...");
86
+
87
+ // load audio file (local or remote)
88
+ let audioBuffer: ArrayBuffer;
89
+ let fileName = "audio.mp3";
90
+
91
+ if (audioUrl.startsWith("http://") || audioUrl.startsWith("https://")) {
92
+ // fetch remote file
93
+ const audioResponse = await fetch(audioUrl);
94
+ audioBuffer = await audioResponse.arrayBuffer();
95
+ } else {
96
+ // read local file with bun
97
+ const file = Bun.file(audioUrl);
98
+ audioBuffer = await file.arrayBuffer();
99
+ fileName = audioUrl.split("/").pop() || "audio.mp3";
100
+ }
101
+
102
+ const audioFile = await toFile(audioBuffer, fileName);
103
+
104
+ // transcribe with groq
105
+ const text = await groqTranscribe({
106
+ file: audioFile,
107
+ model: options.model || GROQ_MODELS.WHISPER_LARGE,
108
+ language: options.language,
109
+ });
110
+
111
+ console.log("[transcribe] groq transcription complete");
112
+
113
+ if (options.outputFormat === "srt") {
114
+ // groq returns plain text, so we need to convert to srt
115
+ // for now just return text with warning
116
+ console.warn(
117
+ "[transcribe] groq returns plain text, use fireworks for srt format",
118
+ );
119
+ return { success: true, text, srt: text };
120
+ }
121
+
122
+ return { success: true, text };
123
+ } catch (error) {
124
+ console.error("[transcribe] groq error:", error);
125
+ return {
126
+ success: false,
127
+ error:
128
+ error instanceof Error ? error.message : "groq transcription failed",
129
+ };
130
+ }
131
+ }
132
+
133
+ // fireworks transcription (with srt support)
134
+ async function transcribeWithFireworks(
135
+ audioUrl: string,
136
+ ): Promise<TranscribeResult> {
137
+ try {
138
+ console.log("[transcribe] using fireworks api...");
139
+
140
+ const data = await fireworksTranscribe({
141
+ audioPath: audioUrl,
142
+ });
143
+
144
+ const srtText = convertFireworksToSRT(data.words || []);
145
+ console.log("[transcribe] fireworks transcription complete");
146
+
147
+ return { success: true, srt: srtText, text: data.text };
148
+ } catch (error) {
149
+ console.error("[transcribe] fireworks error:", error);
150
+ return {
151
+ success: false,
152
+ error:
153
+ error instanceof Error
154
+ ? error.message
155
+ : "fireworks transcription failed",
156
+ };
157
+ }
158
+ }
159
+
160
+ // main transcription function
161
+ export async function transcribe(
162
+ options: TranscribeOptions,
163
+ ): Promise<TranscribeResult> {
164
+ const {
165
+ audioUrl,
166
+ provider = "groq",
167
+ model,
168
+ language,
169
+ outputFormat = "text",
170
+ outputPath,
171
+ } = options;
172
+
173
+ if (!audioUrl) {
174
+ throw new Error("audioUrl is required");
175
+ }
176
+
177
+ console.log(`[transcribe] transcribing ${audioUrl} with ${provider}...`);
178
+
179
+ let result: TranscribeResult;
180
+
181
+ // choose provider
182
+ if (provider === "groq") {
183
+ result = await transcribeWithGroq(audioUrl, {
184
+ model,
185
+ language,
186
+ outputFormat,
187
+ });
188
+ } else if (provider === "fireworks") {
189
+ result = await transcribeWithFireworks(audioUrl);
190
+ } else {
191
+ throw new Error(`unknown provider: ${provider}`);
192
+ }
193
+
194
+ // save to file if requested
195
+ if (result.success && outputPath) {
196
+ const content = outputFormat === "srt" ? result.srt : result.text;
197
+ if (content) {
198
+ writeFileSync(outputPath, content);
199
+ console.log(`[transcribe] saved to ${outputPath}`);
200
+ }
201
+ }
202
+
203
+ return result;
204
+ }
205
+
206
+ // cli
207
+ if (import.meta.main) {
208
+ const { runCli } = await import("../../cli/runner");
209
+ runCli(meta);
210
+ }
@@ -0,0 +1,116 @@
1
+ ---
2
+ name: video-generation
3
+ description: generate videos from images or text prompts using fal.ai. use when user wants to animate images, create videos from text, or needs ai video generation with 5-10 second clips.
4
+ allowed-tools: Read, Bash
5
+ ---
6
+
7
+ # video generation
8
+
9
+ generate ai videos from images or text using fal.ai with automatic s3 upload support.
10
+
11
+ ## capabilities
12
+
13
+ - **image-to-video**: animate static images with motion prompts
14
+ - **text-to-video**: generate videos directly from text descriptions
15
+ - supports 5 or 10 second duration
16
+ - automatic s3 upload
17
+
18
+ ## usage
19
+
20
+ ### generate from image
21
+ ```bash
22
+ bun run service/video.ts from_image <prompt> <imageUrl> [duration] [upload]
23
+ ```
24
+
25
+ **parameters:**
26
+ - `prompt` (required): motion description (e.g., "camera pan left")
27
+ - `imageUrl` (required): url of the source image
28
+ - `duration` (optional): 5 or 10 seconds (default: 5)
29
+ - `upload` (optional): "true" to upload to s3
30
+
31
+ **example:**
32
+ ```bash
33
+ bun run service/video.ts from_image "person talking naturally" https://example.com/headshot.jpg 5 true
34
+ ```
35
+
36
+ ### generate from text
37
+ ```bash
38
+ bun run service/video.ts from_text <prompt> [duration] [upload]
39
+ ```
40
+
41
+ **parameters:**
42
+ - `prompt` (required): video scene description
43
+ - `duration` (optional): 5 or 10 seconds (default: 5)
44
+ - `upload` (optional): "true" to upload to s3
45
+
46
+ **example:**
47
+ ```bash
48
+ bun run service/video.ts from_text "waves crashing on beach at sunset" 10 true
49
+ ```
50
+
51
+ ## as library
52
+
53
+ ```typescript
54
+ import { generateVideoFromImage, generateVideoFromText } from "./service/video"
55
+
56
+ // animate an image
57
+ const videoResult = await generateVideoFromImage(
58
+ "camera zoom in slowly",
59
+ "https://example.com/portrait.jpg",
60
+ { duration: 5, upload: true }
61
+ )
62
+ console.log(videoResult.videoUrl)
63
+ console.log(videoResult.uploaded) // s3 url if upload=true
64
+
65
+ // generate from text
66
+ const textVideo = await generateVideoFromText(
67
+ "forest path with sunlight filtering through trees",
68
+ { duration: 10, upload: true }
69
+ )
70
+ ```
71
+
72
+ ## output
73
+
74
+ returns `VideoGenerationResult`:
75
+ ```typescript
76
+ {
77
+ videoUrl: string, // direct video url
78
+ duration?: number, // actual video duration
79
+ uploaded?: string // s3 url if upload requested
80
+ }
81
+ ```
82
+
83
+ ## when to use
84
+
85
+ use this skill when:
86
+ - animating character headshots or portraits
87
+ - creating motion from static images
88
+ - generating video clips from text descriptions
89
+ - preparing videos for lipsync or editing pipeline
90
+ - need short form video content (5-10s)
91
+
92
+ ## tips
93
+
94
+ **for character animation:**
95
+ - use subtle prompts like "person talking naturally" or "slight head movement"
96
+ - keep duration at 5 seconds for character shots
97
+ - combine with lipsync for talking videos
98
+
99
+ **for scene generation:**
100
+ - be descriptive about camera movement and scene dynamics
101
+ - 10 seconds works better for landscape/scene videos
102
+
103
+ ## environment variables
104
+
105
+ required:
106
+ - `FAL_API_KEY` - for fal video generation
107
+
108
+ optional (for s3 upload):
109
+ - `CLOUDFLARE_R2_API_URL`
110
+ - `CLOUDFLARE_ACCESS_KEY_ID`
111
+ - `CLOUDFLARE_ACCESS_SECRET`
112
+ - `CLOUDFLARE_R2_BUCKET`
113
+
114
+ ## generation time
115
+
116
+ expect 2-3 minutes per video clip
@@ -0,0 +1,125 @@
1
+ #!/usr/bin/env bun
2
+ /**
3
+ * video generation service combining fal and higgsfield
4
+ * usage: bun run service/video.ts <command> <args>
5
+ */
6
+
7
+ import type { ActionMeta } from "../../cli/types";
8
+ import { imageToVideo, textToVideo } from "../../lib/fal";
9
+ import { uploadFromUrl } from "../../utilities/s3";
10
+
11
+ export const meta: ActionMeta = {
12
+ name: "video",
13
+ type: "action",
14
+ description: "generate video from text or image",
15
+ inputType: "text/image",
16
+ outputType: "video",
17
+ schema: {
18
+ input: {
19
+ type: "object",
20
+ required: ["prompt"],
21
+ properties: {
22
+ prompt: { type: "string", description: "what to generate" },
23
+ image: {
24
+ type: "string",
25
+ format: "file-path",
26
+ description: "input image (enables image-to-video)",
27
+ },
28
+ duration: {
29
+ type: "integer",
30
+ enum: [5, 10],
31
+ default: 5,
32
+ description: "video duration in seconds",
33
+ },
34
+ },
35
+ },
36
+ output: { type: "string", format: "file-path", description: "video path" },
37
+ },
38
+ async run(options) {
39
+ const { prompt, image, duration } = options as {
40
+ prompt: string;
41
+ image?: string;
42
+ duration?: 5 | 10;
43
+ };
44
+ if (image) {
45
+ return generateVideoFromImage(prompt, image, { duration });
46
+ }
47
+ return generateVideoFromText(prompt, { duration });
48
+ },
49
+ };
50
+
51
+ export interface VideoGenerationResult {
52
+ videoUrl: string;
53
+ duration?: number;
54
+ uploaded?: string;
55
+ }
56
+
57
+ export async function generateVideoFromImage(
58
+ prompt: string,
59
+ imageUrl: string,
60
+ options: { duration?: 5 | 10; upload?: boolean } = {},
61
+ ): Promise<VideoGenerationResult> {
62
+ console.log("[service/video] generating video from image");
63
+
64
+ const result = await imageToVideo({
65
+ prompt,
66
+ imageUrl,
67
+ duration: options.duration,
68
+ });
69
+
70
+ const videoUrl = result.data?.video?.url;
71
+ if (!videoUrl) {
72
+ throw new Error("no video url in result");
73
+ }
74
+
75
+ let uploaded: string | undefined;
76
+ if (options.upload) {
77
+ const timestamp = Date.now();
78
+ const objectKey = `videos/generated/${timestamp}.mp4`;
79
+ uploaded = await uploadFromUrl(videoUrl, objectKey);
80
+ console.log(`[service/video] uploaded to ${uploaded}`);
81
+ }
82
+
83
+ return {
84
+ videoUrl,
85
+ duration: result.data?.duration,
86
+ uploaded,
87
+ };
88
+ }
89
+
90
+ export async function generateVideoFromText(
91
+ prompt: string,
92
+ options: { duration?: 5 | 10; upload?: boolean } = {},
93
+ ): Promise<VideoGenerationResult> {
94
+ console.log("[service/video] generating video from text");
95
+
96
+ const result = await textToVideo({
97
+ prompt,
98
+ duration: options.duration,
99
+ });
100
+
101
+ const videoUrl = result.data?.video?.url;
102
+ if (!videoUrl) {
103
+ throw new Error("no video url in result");
104
+ }
105
+
106
+ let uploaded: string | undefined;
107
+ if (options.upload) {
108
+ const timestamp = Date.now();
109
+ const objectKey = `videos/generated/${timestamp}.mp4`;
110
+ uploaded = await uploadFromUrl(videoUrl, objectKey);
111
+ console.log(`[service/video] uploaded to ${uploaded}`);
112
+ }
113
+
114
+ return {
115
+ videoUrl,
116
+ duration: result.data?.duration,
117
+ uploaded,
118
+ };
119
+ }
120
+
121
+ // cli
122
+ if (import.meta.main) {
123
+ const { runCli } = await import("../../cli/runner");
124
+ runCli(meta);
125
+ }
@@ -0,0 +1,125 @@
1
+ ---
2
+ name: voice-synthesis
3
+ description: generate realistic text-to-speech audio using elevenlabs with multiple voice options. use when user needs voiceovers, narration, character voices, or audio for lipsync videos.
4
+ allowed-tools: Read, Bash
5
+ ---
6
+
7
+ # voice synthesis
8
+
9
+ generate high-quality text-to-speech audio with elevenlabs.
10
+
11
+ ## available voices
12
+
13
+ - **rachel** - clear, professional female voice
14
+ - **domi** - warm, friendly female voice
15
+ - **bella** - energetic female voice
16
+ - **antoni** - friendly male voice
17
+ - **elli** - young, clear female voice
18
+ - **josh** - deep, clear male voice
19
+ - **arnold** - strong, authoritative male voice
20
+ - **adam** - natural, conversational male voice
21
+ - **sam** - raspy, character male voice
22
+
23
+ ## usage
24
+
25
+ ### generate voice
26
+ ```bash
27
+ bun run service/voice.ts generate <text> [voice] [provider] [upload]
28
+ ```
29
+
30
+ **parameters:**
31
+ - `text` (required): text to convert to speech
32
+ - `voice` (optional): voice name (default: rachel)
33
+ - `provider` (optional): elevenlabs (default)
34
+ - `upload` (optional): "true" to upload to s3
35
+
36
+ **example:**
37
+ ```bash
38
+ bun run service/voice.ts generate "hello world, this is my voice" rachel elevenlabs true
39
+ ```
40
+
41
+ ### shorthand for elevenlabs
42
+ ```bash
43
+ bun run service/voice.ts elevenlabs <text> [voice] [upload]
44
+ ```
45
+
46
+ **example:**
47
+ ```bash
48
+ bun run service/voice.ts elevenlabs "welcome to our video" josh true
49
+ ```
50
+
51
+ ## as library
52
+
53
+ ```typescript
54
+ import { generateVoice } from "./service/voice"
55
+
56
+ const result = await generateVoice({
57
+ text: "hello world",
58
+ voice: "rachel",
59
+ provider: "elevenlabs",
60
+ upload: true,
61
+ outputPath: "media/voiceover.mp3"
62
+ })
63
+
64
+ console.log(result.provider)
65
+ console.log(result.voiceId)
66
+ console.log(result.uploadUrl)
67
+ ```
68
+
69
+ ## output
70
+
71
+ returns `VoiceResult`:
72
+ ```typescript
73
+ {
74
+ audio: Buffer, // raw audio buffer
75
+ provider: string, // "elevenlabs"
76
+ voiceId: string, // actual voice id used
77
+ uploadUrl?: string // s3 url if upload requested
78
+ }
79
+ ```
80
+
81
+ saves audio file to `media/voice-{timestamp}.mp3`
82
+
83
+ ## when to use
84
+
85
+ use this skill when:
86
+ - creating voiceovers for videos
87
+ - generating narration or character dialogue
88
+ - preparing audio for lipsync videos
89
+ - need text-to-speech for talking character pipeline
90
+ - testing different voice options
91
+
92
+ ## tips
93
+
94
+ **voice selection:**
95
+ - use **rachel** or **josh** for professional narration
96
+ - use **bella** or **antoni** for friendly, casual content
97
+ - use **arnold** for authoritative or dramatic content
98
+ - use **sam** for character or stylized voices
99
+
100
+ **text formatting:**
101
+ - add punctuation for natural pauses
102
+ - use shorter sentences for clearer speech
103
+ - spell out numbers and abbreviations
104
+
105
+ ## integration with other services
106
+
107
+ perfect companion for:
108
+ - **lipsync service** - sync generated voice with video
109
+ - **video generation** - create talking character videos
110
+ - **captions service** - auto-generate subtitles from voiceover
111
+
112
+ ## environment variables
113
+
114
+ required:
115
+ - `ELEVENLABS_API_KEY` - for voice generation
116
+
117
+ optional (for s3 upload):
118
+ - `CLOUDFLARE_R2_API_URL`
119
+ - `CLOUDFLARE_ACCESS_KEY_ID`
120
+ - `CLOUDFLARE_ACCESS_SECRET`
121
+ - `CLOUDFLARE_R2_BUCKET`
122
+
123
+ ## generation time
124
+
125
+ expect 5-15 seconds depending on text length
@@ -0,0 +1,136 @@
1
+ #!/usr/bin/env bun
2
+
3
+ /**
4
+ * voice service - high-level voice generation combining multiple providers
5
+ * supports elevenlabs and future providers
6
+ */
7
+
8
+ import type { ActionMeta } from "../../cli/types";
9
+ import { textToSpeech, VOICES } from "../../lib/elevenlabs";
10
+ import { uploadFile } from "../../utilities/s3";
11
+
12
+ export const meta: ActionMeta = {
13
+ name: "voice",
14
+ type: "action",
15
+ description: "text to speech generation",
16
+ inputType: "text",
17
+ outputType: "audio",
18
+ schema: {
19
+ input: {
20
+ type: "object",
21
+ required: ["text"],
22
+ properties: {
23
+ text: { type: "string", description: "text to convert to speech" },
24
+ voice: {
25
+ type: "string",
26
+ enum: ["rachel", "domi", "bella", "antoni", "josh", "adam", "sam"],
27
+ default: "rachel",
28
+ description: "voice to use",
29
+ },
30
+ output: {
31
+ type: "string",
32
+ format: "file-path",
33
+ description: "output file path",
34
+ },
35
+ },
36
+ },
37
+ output: { type: "string", format: "file-path", description: "audio path" },
38
+ },
39
+ async run(options) {
40
+ const { text, voice, output } = options as {
41
+ text: string;
42
+ voice?: string;
43
+ output?: string;
44
+ };
45
+ return generateVoice({ text, voice, outputPath: output });
46
+ },
47
+ };
48
+
49
+ // types
50
+ export interface GenerateVoiceOptions {
51
+ text: string;
52
+ voice?: string;
53
+ provider?: "elevenlabs";
54
+ upload?: boolean;
55
+ outputPath?: string;
56
+ }
57
+
58
+ export interface VoiceResult {
59
+ audio: Buffer;
60
+ provider: string;
61
+ voiceId: string;
62
+ uploadUrl?: string;
63
+ }
64
+
65
+ // core functions
66
+ export async function generateVoice(
67
+ options: GenerateVoiceOptions,
68
+ ): Promise<VoiceResult> {
69
+ const {
70
+ text,
71
+ voice = "rachel",
72
+ provider = "elevenlabs",
73
+ upload = false,
74
+ outputPath,
75
+ } = options;
76
+
77
+ if (!text) {
78
+ throw new Error("text is required");
79
+ }
80
+
81
+ console.log(`[voice] generating with ${provider} (${voice})...`);
82
+
83
+ let audio: Buffer;
84
+ let voiceId: string;
85
+
86
+ switch (provider) {
87
+ case "elevenlabs": {
88
+ // map friendly names to voice ids
89
+ const voiceMap: Record<string, string> = {
90
+ rachel: VOICES.RACHEL,
91
+ domi: VOICES.DOMI,
92
+ bella: VOICES.BELLA,
93
+ antoni: VOICES.ANTONI,
94
+ elli: VOICES.ELLI,
95
+ josh: VOICES.JOSH,
96
+ arnold: VOICES.ARNOLD,
97
+ adam: VOICES.ADAM,
98
+ sam: VOICES.SAM,
99
+ };
100
+
101
+ voiceId = voiceMap[voice.toLowerCase()] || voice;
102
+
103
+ audio = await textToSpeech({
104
+ text,
105
+ voiceId,
106
+ outputPath,
107
+ });
108
+ break;
109
+ }
110
+
111
+ default:
112
+ throw new Error(`unsupported provider: ${provider}`);
113
+ }
114
+
115
+ const result: VoiceResult = {
116
+ audio,
117
+ provider,
118
+ voiceId,
119
+ };
120
+
121
+ // upload to s3 if requested
122
+ if (upload && outputPath) {
123
+ const objectKey = `voice/${Date.now()}-${voice}.mp3`;
124
+ const uploadUrl = await uploadFile(outputPath, objectKey);
125
+ result.uploadUrl = uploadUrl;
126
+ console.log(`[voice] uploaded to ${uploadUrl}`);
127
+ }
128
+
129
+ return result;
130
+ }
131
+
132
+ // cli
133
+ if (import.meta.main) {
134
+ const { runCli } = await import("../../cli/runner");
135
+ runCli(meta);
136
+ }