@vibeframe/cli 0.27.0 → 0.29.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/dist/agent/adapters/index.d.ts +1 -0
- package/dist/agent/adapters/index.d.ts.map +1 -1
- package/dist/agent/adapters/index.js +5 -0
- package/dist/agent/adapters/index.js.map +1 -1
- package/dist/agent/adapters/openrouter.d.ts +16 -0
- package/dist/agent/adapters/openrouter.d.ts.map +1 -0
- package/dist/agent/adapters/openrouter.js +100 -0
- package/dist/agent/adapters/openrouter.js.map +1 -0
- package/dist/agent/types.d.ts +1 -1
- package/dist/agent/types.d.ts.map +1 -1
- package/dist/commands/agent.d.ts.map +1 -1
- package/dist/commands/agent.js +3 -1
- package/dist/commands/agent.js.map +1 -1
- package/dist/commands/setup.js +5 -2
- package/dist/commands/setup.js.map +1 -1
- package/dist/config/schema.d.ts +2 -1
- package/dist/config/schema.d.ts.map +1 -1
- package/dist/config/schema.js +2 -0
- package/dist/config/schema.js.map +1 -1
- package/dist/index.js +0 -0
- package/package.json +16 -12
- package/.turbo/turbo-build.log +0 -4
- package/.turbo/turbo-lint.log +0 -21
- package/.turbo/turbo-test.log +0 -689
- package/src/agent/adapters/claude.ts +0 -143
- package/src/agent/adapters/gemini.ts +0 -159
- package/src/agent/adapters/index.ts +0 -61
- package/src/agent/adapters/ollama.ts +0 -231
- package/src/agent/adapters/openai.ts +0 -116
- package/src/agent/adapters/xai.ts +0 -119
- package/src/agent/index.ts +0 -251
- package/src/agent/memory/index.ts +0 -151
- package/src/agent/prompts/system.ts +0 -106
- package/src/agent/tools/ai-editing.ts +0 -845
- package/src/agent/tools/ai-generation.ts +0 -1073
- package/src/agent/tools/ai-pipeline.ts +0 -1055
- package/src/agent/tools/ai.ts +0 -21
- package/src/agent/tools/batch.ts +0 -429
- package/src/agent/tools/e2e.test.ts +0 -545
- package/src/agent/tools/export.ts +0 -184
- package/src/agent/tools/filesystem.ts +0 -237
- package/src/agent/tools/index.ts +0 -150
- package/src/agent/tools/integration.test.ts +0 -775
- package/src/agent/tools/media.ts +0 -697
- package/src/agent/tools/project.ts +0 -313
- package/src/agent/tools/timeline.ts +0 -951
- package/src/agent/types.ts +0 -68
- package/src/commands/agent.ts +0 -340
- package/src/commands/ai-analyze.ts +0 -429
- package/src/commands/ai-animated-caption.ts +0 -390
- package/src/commands/ai-audio.ts +0 -941
- package/src/commands/ai-broll.ts +0 -490
- package/src/commands/ai-edit-cli.ts +0 -658
- package/src/commands/ai-edit.ts +0 -1542
- package/src/commands/ai-fill-gaps.ts +0 -566
- package/src/commands/ai-helpers.ts +0 -65
- package/src/commands/ai-highlights.ts +0 -1303
- package/src/commands/ai-image.ts +0 -761
- package/src/commands/ai-motion.ts +0 -347
- package/src/commands/ai-narrate.ts +0 -451
- package/src/commands/ai-review.ts +0 -309
- package/src/commands/ai-script-pipeline-cli.ts +0 -1710
- package/src/commands/ai-script-pipeline.ts +0 -1365
- package/src/commands/ai-suggest-edit.ts +0 -264
- package/src/commands/ai-video-fx.ts +0 -445
- package/src/commands/ai-video.ts +0 -915
- package/src/commands/ai-viral.ts +0 -595
- package/src/commands/ai-visual-fx.ts +0 -601
- package/src/commands/ai.test.ts +0 -627
- package/src/commands/ai.ts +0 -307
- package/src/commands/analyze.ts +0 -282
- package/src/commands/audio.ts +0 -644
- package/src/commands/batch.test.ts +0 -279
- package/src/commands/batch.ts +0 -440
- package/src/commands/detect.ts +0 -329
- package/src/commands/doctor.ts +0 -237
- package/src/commands/edit-cmd.ts +0 -1014
- package/src/commands/export.ts +0 -918
- package/src/commands/generate.ts +0 -2146
- package/src/commands/media.ts +0 -177
- package/src/commands/output.ts +0 -142
- package/src/commands/pipeline.ts +0 -398
- package/src/commands/project.test.ts +0 -127
- package/src/commands/project.ts +0 -149
- package/src/commands/sanitize.ts +0 -60
- package/src/commands/schema.ts +0 -130
- package/src/commands/setup.ts +0 -509
- package/src/commands/timeline.test.ts +0 -499
- package/src/commands/timeline.ts +0 -529
- package/src/commands/validate.ts +0 -77
- package/src/config/config.test.ts +0 -197
- package/src/config/index.ts +0 -125
- package/src/config/schema.ts +0 -82
- package/src/engine/index.ts +0 -2
- package/src/engine/project.test.ts +0 -702
- package/src/engine/project.ts +0 -439
- package/src/index.ts +0 -146
- package/src/utils/api-key.test.ts +0 -41
- package/src/utils/api-key.ts +0 -247
- package/src/utils/audio.ts +0 -83
- package/src/utils/exec-safe.ts +0 -75
- package/src/utils/first-run.ts +0 -52
- package/src/utils/provider-resolver.ts +0 -56
- package/src/utils/remotion.ts +0 -951
- package/src/utils/subtitle.test.ts +0 -227
- package/src/utils/subtitle.ts +0 -169
- package/src/utils/tty.ts +0 -196
- package/tsconfig.json +0 -20
package/src/commands/audio.ts
DELETED
|
@@ -1,644 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* @module audio
|
|
3
|
-
*
|
|
4
|
-
* Top-level `vibe audio` command group for audio operations.
|
|
5
|
-
*
|
|
6
|
-
* Commands:
|
|
7
|
-
* audio transcribe - Transcribe audio using Whisper
|
|
8
|
-
* audio voices - List available ElevenLabs voices
|
|
9
|
-
* audio isolate - Isolate vocals from audio (ElevenLabs)
|
|
10
|
-
* audio voice-clone - Clone a voice from audio samples (ElevenLabs)
|
|
11
|
-
* audio dub - Dub audio/video to another language (Whisper + Claude + ElevenLabs)
|
|
12
|
-
* audio duck - Auto-duck background music when voice is present (FFmpeg)
|
|
13
|
-
*
|
|
14
|
-
* @dependencies Whisper (OpenAI), ElevenLabs, Claude (Anthropic), FFmpeg
|
|
15
|
-
*/
|
|
16
|
-
|
|
17
|
-
import { Command } from "commander";
|
|
18
|
-
import { resolve, dirname, basename, extname } from "node:path";
|
|
19
|
-
import { readFile, writeFile } from "node:fs/promises";
|
|
20
|
-
import { existsSync } from "node:fs";
|
|
21
|
-
import chalk from "chalk";
|
|
22
|
-
import ora from "ora";
|
|
23
|
-
import {
|
|
24
|
-
WhisperProvider,
|
|
25
|
-
ElevenLabsProvider,
|
|
26
|
-
ClaudeProvider,
|
|
27
|
-
} from "@vibeframe/ai-providers";
|
|
28
|
-
import { getApiKey, requireApiKey } from "../utils/api-key.js";
|
|
29
|
-
import { execSafe, commandExists, execSafeSync } from "../utils/exec-safe.js";
|
|
30
|
-
import { detectFormat, formatTranscript } from "../utils/subtitle.js";
|
|
31
|
-
import { formatTime } from "./ai-helpers.js";
|
|
32
|
-
import { isJsonMode, outputResult } from "./output.js";
|
|
33
|
-
import { rejectControlChars } from "./validate.js";
|
|
34
|
-
|
|
35
|
-
export const audioCommand = new Command("audio")
|
|
36
|
-
.alias("au")
|
|
37
|
-
.description("Audio operations (transcribe, TTS, voice clone, ducking)")
|
|
38
|
-
.addHelpText(
|
|
39
|
-
"after",
|
|
40
|
-
`
|
|
41
|
-
Examples:
|
|
42
|
-
$ vibe audio transcribe interview.mp3 -o transcript.srt -f srt
|
|
43
|
-
$ vibe audio transcribe video.mp4 -l ko # Specify language
|
|
44
|
-
$ vibe audio voices # List available voices
|
|
45
|
-
$ vibe audio isolate song.mp3 -o vocals.mp3
|
|
46
|
-
$ vibe audio voice-clone sample.mp3 -n "my-voice"
|
|
47
|
-
$ vibe audio dub video.mp4 -l ko -o dubbed.mp4
|
|
48
|
-
$ vibe audio duck music.mp3 --voice narration.mp3 -o ducked.mp3
|
|
49
|
-
|
|
50
|
-
API Keys:
|
|
51
|
-
OPENAI_API_KEY transcribe (Whisper)
|
|
52
|
-
ELEVENLABS_API_KEY voices, isolate, voice-clone
|
|
53
|
-
OPENAI_API_KEY + ANTHROPIC_API_KEY + ELEVENLABS_API_KEY dub (full pipeline)
|
|
54
|
-
No key needed duck (FFmpeg only)
|
|
55
|
-
|
|
56
|
-
Run 'vibe schema audio.<command>' for structured parameter info.
|
|
57
|
-
`
|
|
58
|
-
);
|
|
59
|
-
|
|
60
|
-
// ── audio transcribe ───────────────────────────────────────────────────
|
|
61
|
-
|
|
62
|
-
audioCommand
|
|
63
|
-
.command("transcribe")
|
|
64
|
-
.description("Transcribe audio using Whisper")
|
|
65
|
-
.argument("<audio>", "Audio file path")
|
|
66
|
-
.option("-k, --api-key <key>", "OpenAI API key (or set OPENAI_API_KEY env)")
|
|
67
|
-
.option("-l, --language <lang>", "Language code (e.g., en, ko)")
|
|
68
|
-
.option("-o, --output <path>", "Output file path")
|
|
69
|
-
.option("-f, --format <format>", "Output format: json, srt, vtt (auto-detected from extension)")
|
|
70
|
-
.action(async (audioPath: string, options) => {
|
|
71
|
-
try {
|
|
72
|
-
const apiKey = await requireApiKey("OPENAI_API_KEY", "OpenAI", options.apiKey);
|
|
73
|
-
|
|
74
|
-
const spinner = ora("Initializing Whisper...").start();
|
|
75
|
-
|
|
76
|
-
const whisper = new WhisperProvider();
|
|
77
|
-
await whisper.initialize({ apiKey });
|
|
78
|
-
|
|
79
|
-
spinner.text = "Reading audio file...";
|
|
80
|
-
const absPath = resolve(process.cwd(), audioPath);
|
|
81
|
-
const audioBuffer = await readFile(absPath);
|
|
82
|
-
const audioBlob = new Blob([audioBuffer]);
|
|
83
|
-
|
|
84
|
-
spinner.text = "Transcribing...";
|
|
85
|
-
const result = await whisper.transcribe(audioBlob, options.language);
|
|
86
|
-
|
|
87
|
-
if (result.status === "failed") {
|
|
88
|
-
spinner.fail(chalk.red(`Transcription failed: ${result.error}`));
|
|
89
|
-
process.exit(1);
|
|
90
|
-
}
|
|
91
|
-
|
|
92
|
-
spinner.succeed(chalk.green("Transcription complete"));
|
|
93
|
-
|
|
94
|
-
if (isJsonMode()) {
|
|
95
|
-
outputResult({ success: true, fullText: result.fullText, segments: result.segments, language: result.detectedLanguage, outputPath: options.output ? resolve(process.cwd(), options.output) : undefined });
|
|
96
|
-
return;
|
|
97
|
-
}
|
|
98
|
-
|
|
99
|
-
console.log();
|
|
100
|
-
console.log(chalk.bold.cyan("Transcript"));
|
|
101
|
-
console.log(chalk.dim("─".repeat(60)));
|
|
102
|
-
console.log(result.fullText);
|
|
103
|
-
console.log();
|
|
104
|
-
|
|
105
|
-
if (result.segments && result.segments.length > 0) {
|
|
106
|
-
console.log(chalk.bold.cyan("Segments"));
|
|
107
|
-
console.log(chalk.dim("─".repeat(60)));
|
|
108
|
-
for (const seg of result.segments) {
|
|
109
|
-
const time = `[${formatTime(seg.startTime)} - ${formatTime(seg.endTime)}]`;
|
|
110
|
-
console.log(`${chalk.dim(time)} ${seg.text}`);
|
|
111
|
-
}
|
|
112
|
-
console.log();
|
|
113
|
-
}
|
|
114
|
-
|
|
115
|
-
if (options.output) {
|
|
116
|
-
const outputPath = resolve(process.cwd(), options.output);
|
|
117
|
-
const format = detectFormat(options.output, options.format);
|
|
118
|
-
const content = formatTranscript(result, format);
|
|
119
|
-
await writeFile(outputPath, content, "utf-8");
|
|
120
|
-
console.log(chalk.green(`Saved ${format.toUpperCase()} to: ${outputPath}`));
|
|
121
|
-
}
|
|
122
|
-
} catch (error) {
|
|
123
|
-
console.error(chalk.red("Transcription failed"));
|
|
124
|
-
console.error(error);
|
|
125
|
-
process.exit(1);
|
|
126
|
-
}
|
|
127
|
-
});
|
|
128
|
-
|
|
129
|
-
// ── audio voices ───────────────────────────────────────────────────────
|
|
130
|
-
|
|
131
|
-
audioCommand
|
|
132
|
-
.command("voices")
|
|
133
|
-
.description("List available ElevenLabs voices")
|
|
134
|
-
.option("-k, --api-key <key>", "ElevenLabs API key (or set ELEVENLABS_API_KEY env)")
|
|
135
|
-
.action(async (options) => {
|
|
136
|
-
try {
|
|
137
|
-
const apiKey = await requireApiKey("ELEVENLABS_API_KEY", "ElevenLabs", options.apiKey);
|
|
138
|
-
|
|
139
|
-
const spinner = ora("Fetching voices...").start();
|
|
140
|
-
const elevenlabs = new ElevenLabsProvider();
|
|
141
|
-
await elevenlabs.initialize({ apiKey });
|
|
142
|
-
|
|
143
|
-
const voices = await elevenlabs.getVoices();
|
|
144
|
-
spinner.succeed(chalk.green(`Found ${voices.length} voices`));
|
|
145
|
-
|
|
146
|
-
if (isJsonMode()) {
|
|
147
|
-
outputResult({ success: true, voices: voices.map(v => ({ name: v.name, voiceId: v.voice_id, category: v.category, labels: v.labels })) });
|
|
148
|
-
return;
|
|
149
|
-
}
|
|
150
|
-
|
|
151
|
-
console.log();
|
|
152
|
-
console.log(chalk.bold.cyan("Available Voices"));
|
|
153
|
-
console.log(chalk.dim("─".repeat(60)));
|
|
154
|
-
|
|
155
|
-
for (const voice of voices) {
|
|
156
|
-
console.log();
|
|
157
|
-
console.log(`${chalk.bold(voice.name)} ${chalk.dim(`(${voice.voice_id})`)}`);
|
|
158
|
-
console.log(` Category: ${voice.category}`);
|
|
159
|
-
}
|
|
160
|
-
console.log();
|
|
161
|
-
} catch (error) {
|
|
162
|
-
console.error(chalk.red("Failed to fetch voices"));
|
|
163
|
-
console.error(error);
|
|
164
|
-
process.exit(1);
|
|
165
|
-
}
|
|
166
|
-
});
|
|
167
|
-
|
|
168
|
-
// ── audio isolate ──────────────────────────────────────────────────────
|
|
169
|
-
|
|
170
|
-
audioCommand
|
|
171
|
-
.command("isolate")
|
|
172
|
-
.description("Isolate vocals from audio using ElevenLabs")
|
|
173
|
-
.argument("<audio>", "Input audio file path")
|
|
174
|
-
.option("-k, --api-key <key>", "ElevenLabs API key (or set ELEVENLABS_API_KEY env)")
|
|
175
|
-
.option("-o, --output <path>", "Output audio file path", "vocals.mp3")
|
|
176
|
-
.option("--dry-run", "Preview parameters without executing")
|
|
177
|
-
.action(async (audioPath: string, options) => {
|
|
178
|
-
try {
|
|
179
|
-
if (options.dryRun) {
|
|
180
|
-
outputResult({ dryRun: true, command: "audio isolate", audioPath });
|
|
181
|
-
return;
|
|
182
|
-
}
|
|
183
|
-
|
|
184
|
-
const apiKey = await requireApiKey("ELEVENLABS_API_KEY", "ElevenLabs", options.apiKey);
|
|
185
|
-
|
|
186
|
-
const spinner = ora("Reading audio file...").start();
|
|
187
|
-
|
|
188
|
-
const absPath = resolve(process.cwd(), audioPath);
|
|
189
|
-
const audioBuffer = await readFile(absPath);
|
|
190
|
-
|
|
191
|
-
spinner.text = "Isolating vocals...";
|
|
192
|
-
|
|
193
|
-
const elevenlabs = new ElevenLabsProvider();
|
|
194
|
-
await elevenlabs.initialize({ apiKey });
|
|
195
|
-
|
|
196
|
-
const result = await elevenlabs.isolateVocals(audioBuffer);
|
|
197
|
-
|
|
198
|
-
if (!result.success || !result.audioBuffer) {
|
|
199
|
-
spinner.fail(chalk.red(result.error || "Audio isolation failed"));
|
|
200
|
-
process.exit(1);
|
|
201
|
-
}
|
|
202
|
-
|
|
203
|
-
const outputPath = resolve(process.cwd(), options.output);
|
|
204
|
-
await writeFile(outputPath, result.audioBuffer);
|
|
205
|
-
|
|
206
|
-
spinner.succeed(chalk.green("Vocals isolated"));
|
|
207
|
-
|
|
208
|
-
if (isJsonMode()) {
|
|
209
|
-
outputResult({ success: true, outputPath });
|
|
210
|
-
return;
|
|
211
|
-
}
|
|
212
|
-
|
|
213
|
-
console.log(chalk.green(`Saved to: ${outputPath}`));
|
|
214
|
-
console.log();
|
|
215
|
-
} catch (error) {
|
|
216
|
-
console.error(chalk.red("Audio isolation failed"));
|
|
217
|
-
console.error(error);
|
|
218
|
-
process.exit(1);
|
|
219
|
-
}
|
|
220
|
-
});
|
|
221
|
-
|
|
222
|
-
// ── audio voice-clone ──────────────────────────────────────────────────
|
|
223
|
-
|
|
224
|
-
audioCommand
|
|
225
|
-
.command("voice-clone")
|
|
226
|
-
.description("Clone a voice from audio samples using ElevenLabs")
|
|
227
|
-
.argument("[samples...]", "Audio sample files (1-25 files)")
|
|
228
|
-
.option("-k, --api-key <key>", "ElevenLabs API key (or set ELEVENLABS_API_KEY env)")
|
|
229
|
-
.option("-n, --name <name>", "Voice name (required)")
|
|
230
|
-
.option("-d, --description <desc>", "Voice description")
|
|
231
|
-
.option("--labels <json>", "Labels as JSON (e.g., '{\"accent\": \"american\"}')")
|
|
232
|
-
.option("--remove-noise", "Remove background noise from samples")
|
|
233
|
-
.option("--list", "List all available voices")
|
|
234
|
-
.option("--dry-run", "Preview parameters without executing")
|
|
235
|
-
.action(async (samples: string[], options) => {
|
|
236
|
-
try {
|
|
237
|
-
if (options.dryRun) {
|
|
238
|
-
outputResult({ dryRun: true, command: "audio voice-clone", samples: samples?.length, name: options.name });
|
|
239
|
-
return;
|
|
240
|
-
}
|
|
241
|
-
|
|
242
|
-
const apiKey = await requireApiKey("ELEVENLABS_API_KEY", "ElevenLabs", options.apiKey);
|
|
243
|
-
|
|
244
|
-
const elevenlabs = new ElevenLabsProvider();
|
|
245
|
-
await elevenlabs.initialize({ apiKey });
|
|
246
|
-
|
|
247
|
-
// List voices mode
|
|
248
|
-
if (options.list) {
|
|
249
|
-
const spinner = ora("Fetching voices...").start();
|
|
250
|
-
const voices = await elevenlabs.getVoices();
|
|
251
|
-
spinner.succeed(chalk.green(`Found ${voices.length} voices`));
|
|
252
|
-
|
|
253
|
-
console.log();
|
|
254
|
-
console.log(chalk.bold.cyan("Available Voices"));
|
|
255
|
-
console.log(chalk.dim("─".repeat(60)));
|
|
256
|
-
|
|
257
|
-
for (const voice of voices) {
|
|
258
|
-
const category = chalk.dim(`(${voice.category})`);
|
|
259
|
-
console.log(`${chalk.bold(voice.name)} ${category}`);
|
|
260
|
-
console.log(` ${chalk.dim("ID:")} ${voice.voice_id}`);
|
|
261
|
-
if (voice.labels && Object.keys(voice.labels).length > 0) {
|
|
262
|
-
console.log(` ${chalk.dim("Labels:")} ${JSON.stringify(voice.labels)}`);
|
|
263
|
-
}
|
|
264
|
-
console.log();
|
|
265
|
-
}
|
|
266
|
-
return;
|
|
267
|
-
}
|
|
268
|
-
|
|
269
|
-
// Clone voice mode
|
|
270
|
-
if (!options.name) {
|
|
271
|
-
console.error(chalk.red("Voice name is required. Use --name <name>"));
|
|
272
|
-
process.exit(1);
|
|
273
|
-
}
|
|
274
|
-
|
|
275
|
-
rejectControlChars(options.name);
|
|
276
|
-
|
|
277
|
-
if (!samples || samples.length === 0) {
|
|
278
|
-
console.error(chalk.red("At least one audio sample is required"));
|
|
279
|
-
process.exit(1);
|
|
280
|
-
}
|
|
281
|
-
|
|
282
|
-
const spinner = ora("Reading audio samples...").start();
|
|
283
|
-
|
|
284
|
-
const audioBuffers: Buffer[] = [];
|
|
285
|
-
for (const samplePath of samples) {
|
|
286
|
-
const absPath = resolve(process.cwd(), samplePath);
|
|
287
|
-
if (!existsSync(absPath)) {
|
|
288
|
-
spinner.fail(chalk.red(`File not found: ${samplePath}`));
|
|
289
|
-
process.exit(1);
|
|
290
|
-
}
|
|
291
|
-
const buffer = await readFile(absPath);
|
|
292
|
-
audioBuffers.push(buffer);
|
|
293
|
-
}
|
|
294
|
-
|
|
295
|
-
spinner.text = `Cloning voice from ${audioBuffers.length} sample(s)...`;
|
|
296
|
-
|
|
297
|
-
const labels = options.labels ? JSON.parse(options.labels) : undefined;
|
|
298
|
-
|
|
299
|
-
const result = await elevenlabs.cloneVoice(audioBuffers, {
|
|
300
|
-
name: options.name,
|
|
301
|
-
description: options.description,
|
|
302
|
-
labels,
|
|
303
|
-
removeBackgroundNoise: options.removeNoise,
|
|
304
|
-
});
|
|
305
|
-
|
|
306
|
-
if (!result.success) {
|
|
307
|
-
spinner.fail(chalk.red(result.error || "Voice cloning failed"));
|
|
308
|
-
process.exit(1);
|
|
309
|
-
}
|
|
310
|
-
|
|
311
|
-
spinner.succeed(chalk.green("Voice cloned successfully"));
|
|
312
|
-
|
|
313
|
-
if (isJsonMode()) {
|
|
314
|
-
outputResult({ success: true, name: options.name, voiceId: result.voiceId });
|
|
315
|
-
return;
|
|
316
|
-
}
|
|
317
|
-
|
|
318
|
-
console.log();
|
|
319
|
-
console.log(chalk.bold.cyan("Voice Details"));
|
|
320
|
-
console.log(chalk.dim("─".repeat(60)));
|
|
321
|
-
console.log(`Name: ${chalk.bold(options.name)}`);
|
|
322
|
-
console.log(`Voice ID: ${chalk.bold(result.voiceId)}`);
|
|
323
|
-
console.log();
|
|
324
|
-
console.log(chalk.dim("Use this voice ID with:"));
|
|
325
|
-
console.log(chalk.dim(` pnpm vibe audio tts "Hello world" -v ${result.voiceId}`));
|
|
326
|
-
console.log();
|
|
327
|
-
} catch (error) {
|
|
328
|
-
console.error(chalk.red("Voice cloning failed"));
|
|
329
|
-
console.error(error);
|
|
330
|
-
process.exit(1);
|
|
331
|
-
}
|
|
332
|
-
});
|
|
333
|
-
|
|
334
|
-
// ── audio dub ──────────────────────────────────────────────────────────
|
|
335
|
-
|
|
336
|
-
audioCommand
|
|
337
|
-
.command("dub")
|
|
338
|
-
.description("Dub audio/video to another language (transcribe, translate, TTS)")
|
|
339
|
-
.argument("<media>", "Input media file (video or audio)")
|
|
340
|
-
.option("-l, --language <lang>", "Target language code (e.g., es, ko, ja) (required)")
|
|
341
|
-
.option("--source <lang>", "Source language code (default: auto-detect)")
|
|
342
|
-
.option("-v, --voice <id>", "ElevenLabs voice ID for output")
|
|
343
|
-
.option("--analyze-only", "Only analyze and show timing, don't generate audio")
|
|
344
|
-
.option("-o, --output <path>", "Output file path")
|
|
345
|
-
.option("--dry-run", "Preview parameters without executing")
|
|
346
|
-
.action(async (mediaPath: string, options) => {
|
|
347
|
-
try {
|
|
348
|
-
if (options.dryRun) {
|
|
349
|
-
outputResult({ dryRun: true, command: "audio dub", mediaPath, targetLanguage: options.language, sourceLanguage: options.source, voice: options.voice });
|
|
350
|
-
return;
|
|
351
|
-
}
|
|
352
|
-
|
|
353
|
-
if (!options.language) {
|
|
354
|
-
console.error(chalk.red("Target language is required. Use -l or --language"));
|
|
355
|
-
process.exit(1);
|
|
356
|
-
}
|
|
357
|
-
|
|
358
|
-
const absPath = resolve(process.cwd(), mediaPath);
|
|
359
|
-
if (!existsSync(absPath)) {
|
|
360
|
-
console.error(chalk.red(`File not found: ${mediaPath}`));
|
|
361
|
-
process.exit(1);
|
|
362
|
-
}
|
|
363
|
-
|
|
364
|
-
// Check required API keys
|
|
365
|
-
const openaiKey = await requireApiKey("OPENAI_API_KEY", "OpenAI");
|
|
366
|
-
const anthropicKey = await requireApiKey("ANTHROPIC_API_KEY", "Anthropic");
|
|
367
|
-
const elevenlabsKey = options.analyzeOnly
|
|
368
|
-
? await getApiKey("ELEVENLABS_API_KEY", "ElevenLabs", undefined)
|
|
369
|
-
: await requireApiKey("ELEVENLABS_API_KEY", "ElevenLabs");
|
|
370
|
-
|
|
371
|
-
const spinner = ora("Extracting audio...").start();
|
|
372
|
-
|
|
373
|
-
// Check if input is video
|
|
374
|
-
const ext = extname(absPath).toLowerCase();
|
|
375
|
-
const isVideo = [".mp4", ".mov", ".avi", ".mkv", ".webm"].includes(ext);
|
|
376
|
-
|
|
377
|
-
// Step 1: Extract audio if video
|
|
378
|
-
let audioPath = absPath;
|
|
379
|
-
if (isVideo) {
|
|
380
|
-
const tempAudioPath = resolve(dirname(absPath), `temp-audio-${Date.now()}.mp3`);
|
|
381
|
-
try {
|
|
382
|
-
execSafeSync("ffmpeg", ["-i", absPath, "-vn", "-acodec", "mp3", "-y", tempAudioPath]);
|
|
383
|
-
audioPath = tempAudioPath;
|
|
384
|
-
} catch {
|
|
385
|
-
spinner.fail(chalk.red("Failed to extract audio from video"));
|
|
386
|
-
process.exit(1);
|
|
387
|
-
}
|
|
388
|
-
}
|
|
389
|
-
|
|
390
|
-
// Step 2: Transcribe with Whisper
|
|
391
|
-
spinner.text = "Transcribing audio...";
|
|
392
|
-
const whisper = new WhisperProvider();
|
|
393
|
-
await whisper.initialize({ apiKey: openaiKey });
|
|
394
|
-
|
|
395
|
-
const audioBuffer = await readFile(audioPath);
|
|
396
|
-
const audioBlob = new Blob([audioBuffer]);
|
|
397
|
-
|
|
398
|
-
const transcriptResult = await whisper.transcribe(audioBlob, options.source);
|
|
399
|
-
|
|
400
|
-
if (transcriptResult.status === "failed" || !transcriptResult.segments) {
|
|
401
|
-
spinner.fail(chalk.red(`Transcription failed: ${transcriptResult.error}`));
|
|
402
|
-
process.exit(1);
|
|
403
|
-
}
|
|
404
|
-
|
|
405
|
-
// Step 3: Translate with Claude
|
|
406
|
-
spinner.text = "Translating...";
|
|
407
|
-
const claude = new ClaudeProvider();
|
|
408
|
-
await claude.initialize({ apiKey: anthropicKey });
|
|
409
|
-
|
|
410
|
-
// Build translation prompt
|
|
411
|
-
const segments = transcriptResult.segments;
|
|
412
|
-
const segmentTexts = segments.map((s, i) => `[${i}] ${s.text}`).join("\n");
|
|
413
|
-
|
|
414
|
-
// Language names for better translation context
|
|
415
|
-
const languageNames: Record<string, string> = {
|
|
416
|
-
en: "English", es: "Spanish", fr: "French", de: "German",
|
|
417
|
-
it: "Italian", pt: "Portuguese", ja: "Japanese", ko: "Korean",
|
|
418
|
-
zh: "Chinese", ar: "Arabic", ru: "Russian", hi: "Hindi",
|
|
419
|
-
};
|
|
420
|
-
const targetLangName = languageNames[options.language] || options.language;
|
|
421
|
-
|
|
422
|
-
let translatedSegments: Array<{ index: number; text: string; startTime: number; endTime: number }> = [];
|
|
423
|
-
|
|
424
|
-
try {
|
|
425
|
-
const storyboard = await claude.analyzeContent(
|
|
426
|
-
`TRANSLATE to ${targetLangName}. Return the translated text only, preserving segment numbers:\n\n${segmentTexts}`,
|
|
427
|
-
segments[segments.length - 1]?.endTime || 60
|
|
428
|
-
);
|
|
429
|
-
|
|
430
|
-
if (storyboard && storyboard.length > 0) {
|
|
431
|
-
translatedSegments = segments.map((s, i) => ({
|
|
432
|
-
index: i,
|
|
433
|
-
text: storyboard[i]?.description || s.text,
|
|
434
|
-
startTime: s.startTime,
|
|
435
|
-
endTime: s.endTime,
|
|
436
|
-
}));
|
|
437
|
-
} else {
|
|
438
|
-
translatedSegments = segments.map((s, i) => ({
|
|
439
|
-
index: i,
|
|
440
|
-
text: s.text,
|
|
441
|
-
startTime: s.startTime,
|
|
442
|
-
endTime: s.endTime,
|
|
443
|
-
}));
|
|
444
|
-
}
|
|
445
|
-
} catch {
|
|
446
|
-
translatedSegments = segments.map((s, i) => ({
|
|
447
|
-
index: i,
|
|
448
|
-
text: s.text,
|
|
449
|
-
startTime: s.startTime,
|
|
450
|
-
endTime: s.endTime,
|
|
451
|
-
}));
|
|
452
|
-
}
|
|
453
|
-
|
|
454
|
-
spinner.succeed(chalk.green("Transcription and translation complete"));
|
|
455
|
-
|
|
456
|
-
// Display timing analysis
|
|
457
|
-
console.log();
|
|
458
|
-
console.log(chalk.bold.cyan("Dubbing Analysis"));
|
|
459
|
-
console.log(chalk.dim("─".repeat(60)));
|
|
460
|
-
console.log(`Source language: ${transcriptResult.detectedLanguage || options.source || "auto"}`);
|
|
461
|
-
console.log(`Target language: ${targetLangName}`);
|
|
462
|
-
console.log(`Segments: ${segments.length}`);
|
|
463
|
-
console.log();
|
|
464
|
-
|
|
465
|
-
console.log(chalk.bold("Segment Timing:"));
|
|
466
|
-
for (let i = 0; i < Math.min(5, segments.length); i++) {
|
|
467
|
-
const seg = segments[i];
|
|
468
|
-
const time = `[${formatTime(seg.startTime)} - ${formatTime(seg.endTime)}]`;
|
|
469
|
-
console.log(`${chalk.dim(time)} ${seg.text}`);
|
|
470
|
-
console.log(`${chalk.dim(" →")} ${chalk.green(translatedSegments[i]?.text || seg.text)}`);
|
|
471
|
-
console.log();
|
|
472
|
-
}
|
|
473
|
-
|
|
474
|
-
if (segments.length > 5) {
|
|
475
|
-
console.log(chalk.dim(`... and ${segments.length - 5} more segments`));
|
|
476
|
-
}
|
|
477
|
-
|
|
478
|
-
if (options.analyzeOnly) {
|
|
479
|
-
console.log();
|
|
480
|
-
console.log(chalk.dim("Use without --analyze-only to generate dubbed audio"));
|
|
481
|
-
|
|
482
|
-
if (options.output) {
|
|
483
|
-
const timingPath = resolve(process.cwd(), options.output);
|
|
484
|
-
const timingData = {
|
|
485
|
-
sourcePath: absPath,
|
|
486
|
-
sourceLanguage: transcriptResult.detectedLanguage || options.source || "auto",
|
|
487
|
-
targetLanguage: options.language,
|
|
488
|
-
segments: segments.map((s, i) => ({
|
|
489
|
-
index: i,
|
|
490
|
-
startTime: s.startTime,
|
|
491
|
-
endTime: s.endTime,
|
|
492
|
-
original: s.text,
|
|
493
|
-
translated: translatedSegments[i]?.text || s.text,
|
|
494
|
-
})),
|
|
495
|
-
};
|
|
496
|
-
await writeFile(timingPath, JSON.stringify(timingData, null, 2));
|
|
497
|
-
console.log(`Timing saved to: ${chalk.bold(timingPath)}`);
|
|
498
|
-
}
|
|
499
|
-
return;
|
|
500
|
-
}
|
|
501
|
-
|
|
502
|
-
// Step 4: Generate TTS for each segment
|
|
503
|
-
spinner.start("Generating dubbed audio...");
|
|
504
|
-
const elevenlabs = new ElevenLabsProvider();
|
|
505
|
-
await elevenlabs.initialize({ apiKey: elevenlabsKey! });
|
|
506
|
-
|
|
507
|
-
const dubbedAudioBuffers: Array<{ buffer: Buffer; startTime: number }> = [];
|
|
508
|
-
|
|
509
|
-
for (let i = 0; i < translatedSegments.length; i++) {
|
|
510
|
-
spinner.text = `Generating audio segment ${i + 1}/${translatedSegments.length}...`;
|
|
511
|
-
const seg = translatedSegments[i];
|
|
512
|
-
|
|
513
|
-
const ttsResult = await elevenlabs.textToSpeech(seg.text, {
|
|
514
|
-
voiceId: options.voice,
|
|
515
|
-
});
|
|
516
|
-
|
|
517
|
-
if (ttsResult.success && ttsResult.audioBuffer) {
|
|
518
|
-
dubbedAudioBuffers.push({
|
|
519
|
-
buffer: ttsResult.audioBuffer,
|
|
520
|
-
startTime: seg.startTime,
|
|
521
|
-
});
|
|
522
|
-
}
|
|
523
|
-
}
|
|
524
|
-
|
|
525
|
-
// Step 5: Combine and save
|
|
526
|
-
spinner.text = "Combining audio...";
|
|
527
|
-
|
|
528
|
-
const combinedBuffer = Buffer.concat(dubbedAudioBuffers.map((a) => a.buffer));
|
|
529
|
-
|
|
530
|
-
const outputExt = isVideo ? ".mp3" : extname(absPath);
|
|
531
|
-
const defaultOutputPath = resolve(
|
|
532
|
-
dirname(absPath),
|
|
533
|
-
`${basename(absPath, extname(absPath))}-${options.language}${outputExt}`
|
|
534
|
-
);
|
|
535
|
-
const finalOutputPath = resolve(process.cwd(), options.output || defaultOutputPath);
|
|
536
|
-
|
|
537
|
-
await writeFile(finalOutputPath, combinedBuffer);
|
|
538
|
-
|
|
539
|
-
spinner.succeed(chalk.green("Dubbing complete"));
|
|
540
|
-
|
|
541
|
-
if (isJsonMode()) {
|
|
542
|
-
outputResult({ success: true, sourceLanguage: transcriptResult.detectedLanguage || options.source || "auto", targetLanguage: options.language, segmentCount: translatedSegments.length, outputPath: finalOutputPath });
|
|
543
|
-
return;
|
|
544
|
-
}
|
|
545
|
-
|
|
546
|
-
console.log();
|
|
547
|
-
console.log(`Saved to: ${chalk.bold(finalOutputPath)}`);
|
|
548
|
-
console.log();
|
|
549
|
-
|
|
550
|
-
// Clean up temp audio if we extracted from video
|
|
551
|
-
if (isVideo && audioPath !== absPath) {
|
|
552
|
-
try {
|
|
553
|
-
const { unlink } = await import("node:fs/promises");
|
|
554
|
-
await unlink(audioPath);
|
|
555
|
-
} catch {
|
|
556
|
-
// Ignore cleanup errors
|
|
557
|
-
}
|
|
558
|
-
}
|
|
559
|
-
} catch (error) {
|
|
560
|
-
console.error(chalk.red("Dubbing failed"));
|
|
561
|
-
console.error(error);
|
|
562
|
-
process.exit(1);
|
|
563
|
-
}
|
|
564
|
-
});
|
|
565
|
-
|
|
566
|
-
// ── audio duck ─────────────────────────────────────────────────────────
|
|
567
|
-
|
|
568
|
-
audioCommand
|
|
569
|
-
.command("duck")
|
|
570
|
-
.description("Auto-duck background music when voice is present (FFmpeg)")
|
|
571
|
-
.argument("<music>", "Background music file path")
|
|
572
|
-
.option("-v, --voice <path>", "Voice/narration track (required)")
|
|
573
|
-
.option("-o, --output <path>", "Output audio file path")
|
|
574
|
-
.option("-t, --threshold <dB>", "Sidechain threshold in dB", "-30")
|
|
575
|
-
.option("-r, --ratio <ratio>", "Compression ratio", "3")
|
|
576
|
-
.option("-a, --attack <ms>", "Attack time in ms", "20")
|
|
577
|
-
.option("-l, --release <ms>", "Release time in ms", "200")
|
|
578
|
-
.option("--dry-run", "Preview parameters without executing")
|
|
579
|
-
.action(async (musicPath: string, options) => {
|
|
580
|
-
try {
|
|
581
|
-
if (options.dryRun) {
|
|
582
|
-
const threshold = parseFloat(options.threshold);
|
|
583
|
-
const ratio = parseFloat(options.ratio);
|
|
584
|
-
const attack = parseFloat(options.attack);
|
|
585
|
-
const release = parseFloat(options.release);
|
|
586
|
-
outputResult({ dryRun: true, command: "audio duck", musicPath, voicePath: options.voice, threshold, ratio, attack, release });
|
|
587
|
-
return;
|
|
588
|
-
}
|
|
589
|
-
|
|
590
|
-
if (!options.voice) {
|
|
591
|
-
console.error(chalk.red("Voice track required. Use --voice <path>"));
|
|
592
|
-
process.exit(1);
|
|
593
|
-
}
|
|
594
|
-
|
|
595
|
-
// Check FFmpeg availability
|
|
596
|
-
if (!commandExists("ffmpeg")) {
|
|
597
|
-
console.error(chalk.red("FFmpeg not found. Please install FFmpeg."));
|
|
598
|
-
process.exit(1);
|
|
599
|
-
}
|
|
600
|
-
|
|
601
|
-
const spinner = ora("Processing audio ducking...").start();
|
|
602
|
-
|
|
603
|
-
const absMusic = resolve(process.cwd(), musicPath);
|
|
604
|
-
const absVoice = resolve(process.cwd(), options.voice);
|
|
605
|
-
const outputPath = options.output
|
|
606
|
-
? resolve(process.cwd(), options.output)
|
|
607
|
-
: absMusic.replace(/(\.[^.]+)$/, "-ducked$1");
|
|
608
|
-
|
|
609
|
-
// Convert threshold from dB to linear (0-1 scale)
|
|
610
|
-
const thresholdDb = parseFloat(options.threshold);
|
|
611
|
-
const thresholdLinear = Math.pow(10, thresholdDb / 20);
|
|
612
|
-
|
|
613
|
-
const ratio = parseFloat(options.ratio);
|
|
614
|
-
const attack = parseFloat(options.attack);
|
|
615
|
-
const release = parseFloat(options.release);
|
|
616
|
-
|
|
617
|
-
// FFmpeg sidechain compress filter
|
|
618
|
-
const filterComplex = `[0:a][1:a]sidechaincompress=threshold=${thresholdLinear}:ratio=${ratio}:attack=${attack}:release=${release}[out]`;
|
|
619
|
-
|
|
620
|
-
await execSafe("ffmpeg", ["-i", absMusic, "-i", absVoice, "-filter_complex", filterComplex, "-map", "[out]", outputPath, "-y"]);
|
|
621
|
-
|
|
622
|
-
spinner.succeed(chalk.green("Audio ducking complete"));
|
|
623
|
-
|
|
624
|
-
if (isJsonMode()) {
|
|
625
|
-
outputResult({ success: true, musicPath: absMusic, voicePath: options.voice, threshold: thresholdDb, ratio, outputPath });
|
|
626
|
-
return;
|
|
627
|
-
}
|
|
628
|
-
|
|
629
|
-
console.log();
|
|
630
|
-
console.log(chalk.dim("─".repeat(60)));
|
|
631
|
-
console.log(`Music: ${musicPath}`);
|
|
632
|
-
console.log(`Voice: ${options.voice}`);
|
|
633
|
-
console.log(`Threshold: ${thresholdDb}dB`);
|
|
634
|
-
console.log(`Ratio: ${ratio}:1`);
|
|
635
|
-
console.log(`Attack/Release: ${attack}ms / ${release}ms`);
|
|
636
|
-
console.log();
|
|
637
|
-
console.log(chalk.green(`Output: ${outputPath}`));
|
|
638
|
-
console.log();
|
|
639
|
-
} catch (error) {
|
|
640
|
-
console.error(chalk.red("Audio ducking failed"));
|
|
641
|
-
console.error(error);
|
|
642
|
-
process.exit(1);
|
|
643
|
-
}
|
|
644
|
-
});
|