@vibeframe/cli 0.27.0 → 0.29.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (109) hide show
  1. package/LICENSE +21 -0
  2. package/dist/agent/adapters/index.d.ts +1 -0
  3. package/dist/agent/adapters/index.d.ts.map +1 -1
  4. package/dist/agent/adapters/index.js +5 -0
  5. package/dist/agent/adapters/index.js.map +1 -1
  6. package/dist/agent/adapters/openrouter.d.ts +16 -0
  7. package/dist/agent/adapters/openrouter.d.ts.map +1 -0
  8. package/dist/agent/adapters/openrouter.js +100 -0
  9. package/dist/agent/adapters/openrouter.js.map +1 -0
  10. package/dist/agent/types.d.ts +1 -1
  11. package/dist/agent/types.d.ts.map +1 -1
  12. package/dist/commands/agent.d.ts.map +1 -1
  13. package/dist/commands/agent.js +3 -1
  14. package/dist/commands/agent.js.map +1 -1
  15. package/dist/commands/setup.js +5 -2
  16. package/dist/commands/setup.js.map +1 -1
  17. package/dist/config/schema.d.ts +2 -1
  18. package/dist/config/schema.d.ts.map +1 -1
  19. package/dist/config/schema.js +2 -0
  20. package/dist/config/schema.js.map +1 -1
  21. package/dist/index.js +0 -0
  22. package/package.json +16 -12
  23. package/.turbo/turbo-build.log +0 -4
  24. package/.turbo/turbo-lint.log +0 -21
  25. package/.turbo/turbo-test.log +0 -689
  26. package/src/agent/adapters/claude.ts +0 -143
  27. package/src/agent/adapters/gemini.ts +0 -159
  28. package/src/agent/adapters/index.ts +0 -61
  29. package/src/agent/adapters/ollama.ts +0 -231
  30. package/src/agent/adapters/openai.ts +0 -116
  31. package/src/agent/adapters/xai.ts +0 -119
  32. package/src/agent/index.ts +0 -251
  33. package/src/agent/memory/index.ts +0 -151
  34. package/src/agent/prompts/system.ts +0 -106
  35. package/src/agent/tools/ai-editing.ts +0 -845
  36. package/src/agent/tools/ai-generation.ts +0 -1073
  37. package/src/agent/tools/ai-pipeline.ts +0 -1055
  38. package/src/agent/tools/ai.ts +0 -21
  39. package/src/agent/tools/batch.ts +0 -429
  40. package/src/agent/tools/e2e.test.ts +0 -545
  41. package/src/agent/tools/export.ts +0 -184
  42. package/src/agent/tools/filesystem.ts +0 -237
  43. package/src/agent/tools/index.ts +0 -150
  44. package/src/agent/tools/integration.test.ts +0 -775
  45. package/src/agent/tools/media.ts +0 -697
  46. package/src/agent/tools/project.ts +0 -313
  47. package/src/agent/tools/timeline.ts +0 -951
  48. package/src/agent/types.ts +0 -68
  49. package/src/commands/agent.ts +0 -340
  50. package/src/commands/ai-analyze.ts +0 -429
  51. package/src/commands/ai-animated-caption.ts +0 -390
  52. package/src/commands/ai-audio.ts +0 -941
  53. package/src/commands/ai-broll.ts +0 -490
  54. package/src/commands/ai-edit-cli.ts +0 -658
  55. package/src/commands/ai-edit.ts +0 -1542
  56. package/src/commands/ai-fill-gaps.ts +0 -566
  57. package/src/commands/ai-helpers.ts +0 -65
  58. package/src/commands/ai-highlights.ts +0 -1303
  59. package/src/commands/ai-image.ts +0 -761
  60. package/src/commands/ai-motion.ts +0 -347
  61. package/src/commands/ai-narrate.ts +0 -451
  62. package/src/commands/ai-review.ts +0 -309
  63. package/src/commands/ai-script-pipeline-cli.ts +0 -1710
  64. package/src/commands/ai-script-pipeline.ts +0 -1365
  65. package/src/commands/ai-suggest-edit.ts +0 -264
  66. package/src/commands/ai-video-fx.ts +0 -445
  67. package/src/commands/ai-video.ts +0 -915
  68. package/src/commands/ai-viral.ts +0 -595
  69. package/src/commands/ai-visual-fx.ts +0 -601
  70. package/src/commands/ai.test.ts +0 -627
  71. package/src/commands/ai.ts +0 -307
  72. package/src/commands/analyze.ts +0 -282
  73. package/src/commands/audio.ts +0 -644
  74. package/src/commands/batch.test.ts +0 -279
  75. package/src/commands/batch.ts +0 -440
  76. package/src/commands/detect.ts +0 -329
  77. package/src/commands/doctor.ts +0 -237
  78. package/src/commands/edit-cmd.ts +0 -1014
  79. package/src/commands/export.ts +0 -918
  80. package/src/commands/generate.ts +0 -2146
  81. package/src/commands/media.ts +0 -177
  82. package/src/commands/output.ts +0 -142
  83. package/src/commands/pipeline.ts +0 -398
  84. package/src/commands/project.test.ts +0 -127
  85. package/src/commands/project.ts +0 -149
  86. package/src/commands/sanitize.ts +0 -60
  87. package/src/commands/schema.ts +0 -130
  88. package/src/commands/setup.ts +0 -509
  89. package/src/commands/timeline.test.ts +0 -499
  90. package/src/commands/timeline.ts +0 -529
  91. package/src/commands/validate.ts +0 -77
  92. package/src/config/config.test.ts +0 -197
  93. package/src/config/index.ts +0 -125
  94. package/src/config/schema.ts +0 -82
  95. package/src/engine/index.ts +0 -2
  96. package/src/engine/project.test.ts +0 -702
  97. package/src/engine/project.ts +0 -439
  98. package/src/index.ts +0 -146
  99. package/src/utils/api-key.test.ts +0 -41
  100. package/src/utils/api-key.ts +0 -247
  101. package/src/utils/audio.ts +0 -83
  102. package/src/utils/exec-safe.ts +0 -75
  103. package/src/utils/first-run.ts +0 -52
  104. package/src/utils/provider-resolver.ts +0 -56
  105. package/src/utils/remotion.ts +0 -951
  106. package/src/utils/subtitle.test.ts +0 -227
  107. package/src/utils/subtitle.ts +0 -169
  108. package/src/utils/tty.ts +0 -196
  109. package/tsconfig.json +0 -20
@@ -1,644 +0,0 @@
1
- /**
2
- * @module audio
3
- *
4
- * Top-level `vibe audio` command group for audio operations.
5
- *
6
- * Commands:
7
- * audio transcribe - Transcribe audio using Whisper
8
- * audio voices - List available ElevenLabs voices
9
- * audio isolate - Isolate vocals from audio (ElevenLabs)
10
- * audio voice-clone - Clone a voice from audio samples (ElevenLabs)
11
- * audio dub - Dub audio/video to another language (Whisper + Claude + ElevenLabs)
12
- * audio duck - Auto-duck background music when voice is present (FFmpeg)
13
- *
14
- * @dependencies Whisper (OpenAI), ElevenLabs, Claude (Anthropic), FFmpeg
15
- */
16
-
17
- import { Command } from "commander";
18
- import { resolve, dirname, basename, extname } from "node:path";
19
- import { readFile, writeFile } from "node:fs/promises";
20
- import { existsSync } from "node:fs";
21
- import chalk from "chalk";
22
- import ora from "ora";
23
- import {
24
- WhisperProvider,
25
- ElevenLabsProvider,
26
- ClaudeProvider,
27
- } from "@vibeframe/ai-providers";
28
- import { getApiKey, requireApiKey } from "../utils/api-key.js";
29
- import { execSafe, commandExists, execSafeSync } from "../utils/exec-safe.js";
30
- import { detectFormat, formatTranscript } from "../utils/subtitle.js";
31
- import { formatTime } from "./ai-helpers.js";
32
- import { isJsonMode, outputResult } from "./output.js";
33
- import { rejectControlChars } from "./validate.js";
34
-
35
- export const audioCommand = new Command("audio")
36
- .alias("au")
37
- .description("Audio operations (transcribe, TTS, voice clone, ducking)")
38
- .addHelpText(
39
- "after",
40
- `
41
- Examples:
42
- $ vibe audio transcribe interview.mp3 -o transcript.srt -f srt
43
- $ vibe audio transcribe video.mp4 -l ko # Specify language
44
- $ vibe audio voices # List available voices
45
- $ vibe audio isolate song.mp3 -o vocals.mp3
46
- $ vibe audio voice-clone sample.mp3 -n "my-voice"
47
- $ vibe audio dub video.mp4 -l ko -o dubbed.mp4
48
- $ vibe audio duck music.mp3 --voice narration.mp3 -o ducked.mp3
49
-
50
- API Keys:
51
- OPENAI_API_KEY transcribe (Whisper)
52
- ELEVENLABS_API_KEY voices, isolate, voice-clone
53
- OPENAI_API_KEY + ANTHROPIC_API_KEY + ELEVENLABS_API_KEY dub (full pipeline)
54
- No key needed duck (FFmpeg only)
55
-
56
- Run 'vibe schema audio.<command>' for structured parameter info.
57
- `
58
- );
59
-
60
- // ── audio transcribe ───────────────────────────────────────────────────
61
-
62
- audioCommand
63
- .command("transcribe")
64
- .description("Transcribe audio using Whisper")
65
- .argument("<audio>", "Audio file path")
66
- .option("-k, --api-key <key>", "OpenAI API key (or set OPENAI_API_KEY env)")
67
- .option("-l, --language <lang>", "Language code (e.g., en, ko)")
68
- .option("-o, --output <path>", "Output file path")
69
- .option("-f, --format <format>", "Output format: json, srt, vtt (auto-detected from extension)")
70
- .action(async (audioPath: string, options) => {
71
- try {
72
- const apiKey = await requireApiKey("OPENAI_API_KEY", "OpenAI", options.apiKey);
73
-
74
- const spinner = ora("Initializing Whisper...").start();
75
-
76
- const whisper = new WhisperProvider();
77
- await whisper.initialize({ apiKey });
78
-
79
- spinner.text = "Reading audio file...";
80
- const absPath = resolve(process.cwd(), audioPath);
81
- const audioBuffer = await readFile(absPath);
82
- const audioBlob = new Blob([audioBuffer]);
83
-
84
- spinner.text = "Transcribing...";
85
- const result = await whisper.transcribe(audioBlob, options.language);
86
-
87
- if (result.status === "failed") {
88
- spinner.fail(chalk.red(`Transcription failed: ${result.error}`));
89
- process.exit(1);
90
- }
91
-
92
- spinner.succeed(chalk.green("Transcription complete"));
93
-
94
- if (isJsonMode()) {
95
- outputResult({ success: true, fullText: result.fullText, segments: result.segments, language: result.detectedLanguage, outputPath: options.output ? resolve(process.cwd(), options.output) : undefined });
96
- return;
97
- }
98
-
99
- console.log();
100
- console.log(chalk.bold.cyan("Transcript"));
101
- console.log(chalk.dim("─".repeat(60)));
102
- console.log(result.fullText);
103
- console.log();
104
-
105
- if (result.segments && result.segments.length > 0) {
106
- console.log(chalk.bold.cyan("Segments"));
107
- console.log(chalk.dim("─".repeat(60)));
108
- for (const seg of result.segments) {
109
- const time = `[${formatTime(seg.startTime)} - ${formatTime(seg.endTime)}]`;
110
- console.log(`${chalk.dim(time)} ${seg.text}`);
111
- }
112
- console.log();
113
- }
114
-
115
- if (options.output) {
116
- const outputPath = resolve(process.cwd(), options.output);
117
- const format = detectFormat(options.output, options.format);
118
- const content = formatTranscript(result, format);
119
- await writeFile(outputPath, content, "utf-8");
120
- console.log(chalk.green(`Saved ${format.toUpperCase()} to: ${outputPath}`));
121
- }
122
- } catch (error) {
123
- console.error(chalk.red("Transcription failed"));
124
- console.error(error);
125
- process.exit(1);
126
- }
127
- });
128
-
129
- // ── audio voices ───────────────────────────────────────────────────────
130
-
131
- audioCommand
132
- .command("voices")
133
- .description("List available ElevenLabs voices")
134
- .option("-k, --api-key <key>", "ElevenLabs API key (or set ELEVENLABS_API_KEY env)")
135
- .action(async (options) => {
136
- try {
137
- const apiKey = await requireApiKey("ELEVENLABS_API_KEY", "ElevenLabs", options.apiKey);
138
-
139
- const spinner = ora("Fetching voices...").start();
140
- const elevenlabs = new ElevenLabsProvider();
141
- await elevenlabs.initialize({ apiKey });
142
-
143
- const voices = await elevenlabs.getVoices();
144
- spinner.succeed(chalk.green(`Found ${voices.length} voices`));
145
-
146
- if (isJsonMode()) {
147
- outputResult({ success: true, voices: voices.map(v => ({ name: v.name, voiceId: v.voice_id, category: v.category, labels: v.labels })) });
148
- return;
149
- }
150
-
151
- console.log();
152
- console.log(chalk.bold.cyan("Available Voices"));
153
- console.log(chalk.dim("─".repeat(60)));
154
-
155
- for (const voice of voices) {
156
- console.log();
157
- console.log(`${chalk.bold(voice.name)} ${chalk.dim(`(${voice.voice_id})`)}`);
158
- console.log(` Category: ${voice.category}`);
159
- }
160
- console.log();
161
- } catch (error) {
162
- console.error(chalk.red("Failed to fetch voices"));
163
- console.error(error);
164
- process.exit(1);
165
- }
166
- });
167
-
168
- // ── audio isolate ──────────────────────────────────────────────────────
169
-
170
- audioCommand
171
- .command("isolate")
172
- .description("Isolate vocals from audio using ElevenLabs")
173
- .argument("<audio>", "Input audio file path")
174
- .option("-k, --api-key <key>", "ElevenLabs API key (or set ELEVENLABS_API_KEY env)")
175
- .option("-o, --output <path>", "Output audio file path", "vocals.mp3")
176
- .option("--dry-run", "Preview parameters without executing")
177
- .action(async (audioPath: string, options) => {
178
- try {
179
- if (options.dryRun) {
180
- outputResult({ dryRun: true, command: "audio isolate", audioPath });
181
- return;
182
- }
183
-
184
- const apiKey = await requireApiKey("ELEVENLABS_API_KEY", "ElevenLabs", options.apiKey);
185
-
186
- const spinner = ora("Reading audio file...").start();
187
-
188
- const absPath = resolve(process.cwd(), audioPath);
189
- const audioBuffer = await readFile(absPath);
190
-
191
- spinner.text = "Isolating vocals...";
192
-
193
- const elevenlabs = new ElevenLabsProvider();
194
- await elevenlabs.initialize({ apiKey });
195
-
196
- const result = await elevenlabs.isolateVocals(audioBuffer);
197
-
198
- if (!result.success || !result.audioBuffer) {
199
- spinner.fail(chalk.red(result.error || "Audio isolation failed"));
200
- process.exit(1);
201
- }
202
-
203
- const outputPath = resolve(process.cwd(), options.output);
204
- await writeFile(outputPath, result.audioBuffer);
205
-
206
- spinner.succeed(chalk.green("Vocals isolated"));
207
-
208
- if (isJsonMode()) {
209
- outputResult({ success: true, outputPath });
210
- return;
211
- }
212
-
213
- console.log(chalk.green(`Saved to: ${outputPath}`));
214
- console.log();
215
- } catch (error) {
216
- console.error(chalk.red("Audio isolation failed"));
217
- console.error(error);
218
- process.exit(1);
219
- }
220
- });
221
-
222
- // ── audio voice-clone ──────────────────────────────────────────────────
223
-
224
- audioCommand
225
- .command("voice-clone")
226
- .description("Clone a voice from audio samples using ElevenLabs")
227
- .argument("[samples...]", "Audio sample files (1-25 files)")
228
- .option("-k, --api-key <key>", "ElevenLabs API key (or set ELEVENLABS_API_KEY env)")
229
- .option("-n, --name <name>", "Voice name (required)")
230
- .option("-d, --description <desc>", "Voice description")
231
- .option("--labels <json>", "Labels as JSON (e.g., '{\"accent\": \"american\"}')")
232
- .option("--remove-noise", "Remove background noise from samples")
233
- .option("--list", "List all available voices")
234
- .option("--dry-run", "Preview parameters without executing")
235
- .action(async (samples: string[], options) => {
236
- try {
237
- if (options.dryRun) {
238
- outputResult({ dryRun: true, command: "audio voice-clone", samples: samples?.length, name: options.name });
239
- return;
240
- }
241
-
242
- const apiKey = await requireApiKey("ELEVENLABS_API_KEY", "ElevenLabs", options.apiKey);
243
-
244
- const elevenlabs = new ElevenLabsProvider();
245
- await elevenlabs.initialize({ apiKey });
246
-
247
- // List voices mode
248
- if (options.list) {
249
- const spinner = ora("Fetching voices...").start();
250
- const voices = await elevenlabs.getVoices();
251
- spinner.succeed(chalk.green(`Found ${voices.length} voices`));
252
-
253
- console.log();
254
- console.log(chalk.bold.cyan("Available Voices"));
255
- console.log(chalk.dim("─".repeat(60)));
256
-
257
- for (const voice of voices) {
258
- const category = chalk.dim(`(${voice.category})`);
259
- console.log(`${chalk.bold(voice.name)} ${category}`);
260
- console.log(` ${chalk.dim("ID:")} ${voice.voice_id}`);
261
- if (voice.labels && Object.keys(voice.labels).length > 0) {
262
- console.log(` ${chalk.dim("Labels:")} ${JSON.stringify(voice.labels)}`);
263
- }
264
- console.log();
265
- }
266
- return;
267
- }
268
-
269
- // Clone voice mode
270
- if (!options.name) {
271
- console.error(chalk.red("Voice name is required. Use --name <name>"));
272
- process.exit(1);
273
- }
274
-
275
- rejectControlChars(options.name);
276
-
277
- if (!samples || samples.length === 0) {
278
- console.error(chalk.red("At least one audio sample is required"));
279
- process.exit(1);
280
- }
281
-
282
- const spinner = ora("Reading audio samples...").start();
283
-
284
- const audioBuffers: Buffer[] = [];
285
- for (const samplePath of samples) {
286
- const absPath = resolve(process.cwd(), samplePath);
287
- if (!existsSync(absPath)) {
288
- spinner.fail(chalk.red(`File not found: ${samplePath}`));
289
- process.exit(1);
290
- }
291
- const buffer = await readFile(absPath);
292
- audioBuffers.push(buffer);
293
- }
294
-
295
- spinner.text = `Cloning voice from ${audioBuffers.length} sample(s)...`;
296
-
297
- const labels = options.labels ? JSON.parse(options.labels) : undefined;
298
-
299
- const result = await elevenlabs.cloneVoice(audioBuffers, {
300
- name: options.name,
301
- description: options.description,
302
- labels,
303
- removeBackgroundNoise: options.removeNoise,
304
- });
305
-
306
- if (!result.success) {
307
- spinner.fail(chalk.red(result.error || "Voice cloning failed"));
308
- process.exit(1);
309
- }
310
-
311
- spinner.succeed(chalk.green("Voice cloned successfully"));
312
-
313
- if (isJsonMode()) {
314
- outputResult({ success: true, name: options.name, voiceId: result.voiceId });
315
- return;
316
- }
317
-
318
- console.log();
319
- console.log(chalk.bold.cyan("Voice Details"));
320
- console.log(chalk.dim("─".repeat(60)));
321
- console.log(`Name: ${chalk.bold(options.name)}`);
322
- console.log(`Voice ID: ${chalk.bold(result.voiceId)}`);
323
- console.log();
324
- console.log(chalk.dim("Use this voice ID with:"));
325
- console.log(chalk.dim(` pnpm vibe audio tts "Hello world" -v ${result.voiceId}`));
326
- console.log();
327
- } catch (error) {
328
- console.error(chalk.red("Voice cloning failed"));
329
- console.error(error);
330
- process.exit(1);
331
- }
332
- });
333
-
334
- // ── audio dub ──────────────────────────────────────────────────────────
335
-
336
- audioCommand
337
- .command("dub")
338
- .description("Dub audio/video to another language (transcribe, translate, TTS)")
339
- .argument("<media>", "Input media file (video or audio)")
340
- .option("-l, --language <lang>", "Target language code (e.g., es, ko, ja) (required)")
341
- .option("--source <lang>", "Source language code (default: auto-detect)")
342
- .option("-v, --voice <id>", "ElevenLabs voice ID for output")
343
- .option("--analyze-only", "Only analyze and show timing, don't generate audio")
344
- .option("-o, --output <path>", "Output file path")
345
- .option("--dry-run", "Preview parameters without executing")
346
- .action(async (mediaPath: string, options) => {
347
- try {
348
- if (options.dryRun) {
349
- outputResult({ dryRun: true, command: "audio dub", mediaPath, targetLanguage: options.language, sourceLanguage: options.source, voice: options.voice });
350
- return;
351
- }
352
-
353
- if (!options.language) {
354
- console.error(chalk.red("Target language is required. Use -l or --language"));
355
- process.exit(1);
356
- }
357
-
358
- const absPath = resolve(process.cwd(), mediaPath);
359
- if (!existsSync(absPath)) {
360
- console.error(chalk.red(`File not found: ${mediaPath}`));
361
- process.exit(1);
362
- }
363
-
364
- // Check required API keys
365
- const openaiKey = await requireApiKey("OPENAI_API_KEY", "OpenAI");
366
- const anthropicKey = await requireApiKey("ANTHROPIC_API_KEY", "Anthropic");
367
- const elevenlabsKey = options.analyzeOnly
368
- ? await getApiKey("ELEVENLABS_API_KEY", "ElevenLabs", undefined)
369
- : await requireApiKey("ELEVENLABS_API_KEY", "ElevenLabs");
370
-
371
- const spinner = ora("Extracting audio...").start();
372
-
373
- // Check if input is video
374
- const ext = extname(absPath).toLowerCase();
375
- const isVideo = [".mp4", ".mov", ".avi", ".mkv", ".webm"].includes(ext);
376
-
377
- // Step 1: Extract audio if video
378
- let audioPath = absPath;
379
- if (isVideo) {
380
- const tempAudioPath = resolve(dirname(absPath), `temp-audio-${Date.now()}.mp3`);
381
- try {
382
- execSafeSync("ffmpeg", ["-i", absPath, "-vn", "-acodec", "mp3", "-y", tempAudioPath]);
383
- audioPath = tempAudioPath;
384
- } catch {
385
- spinner.fail(chalk.red("Failed to extract audio from video"));
386
- process.exit(1);
387
- }
388
- }
389
-
390
- // Step 2: Transcribe with Whisper
391
- spinner.text = "Transcribing audio...";
392
- const whisper = new WhisperProvider();
393
- await whisper.initialize({ apiKey: openaiKey });
394
-
395
- const audioBuffer = await readFile(audioPath);
396
- const audioBlob = new Blob([audioBuffer]);
397
-
398
- const transcriptResult = await whisper.transcribe(audioBlob, options.source);
399
-
400
- if (transcriptResult.status === "failed" || !transcriptResult.segments) {
401
- spinner.fail(chalk.red(`Transcription failed: ${transcriptResult.error}`));
402
- process.exit(1);
403
- }
404
-
405
- // Step 3: Translate with Claude
406
- spinner.text = "Translating...";
407
- const claude = new ClaudeProvider();
408
- await claude.initialize({ apiKey: anthropicKey });
409
-
410
- // Build translation prompt
411
- const segments = transcriptResult.segments;
412
- const segmentTexts = segments.map((s, i) => `[${i}] ${s.text}`).join("\n");
413
-
414
- // Language names for better translation context
415
- const languageNames: Record<string, string> = {
416
- en: "English", es: "Spanish", fr: "French", de: "German",
417
- it: "Italian", pt: "Portuguese", ja: "Japanese", ko: "Korean",
418
- zh: "Chinese", ar: "Arabic", ru: "Russian", hi: "Hindi",
419
- };
420
- const targetLangName = languageNames[options.language] || options.language;
421
-
422
- let translatedSegments: Array<{ index: number; text: string; startTime: number; endTime: number }> = [];
423
-
424
- try {
425
- const storyboard = await claude.analyzeContent(
426
- `TRANSLATE to ${targetLangName}. Return the translated text only, preserving segment numbers:\n\n${segmentTexts}`,
427
- segments[segments.length - 1]?.endTime || 60
428
- );
429
-
430
- if (storyboard && storyboard.length > 0) {
431
- translatedSegments = segments.map((s, i) => ({
432
- index: i,
433
- text: storyboard[i]?.description || s.text,
434
- startTime: s.startTime,
435
- endTime: s.endTime,
436
- }));
437
- } else {
438
- translatedSegments = segments.map((s, i) => ({
439
- index: i,
440
- text: s.text,
441
- startTime: s.startTime,
442
- endTime: s.endTime,
443
- }));
444
- }
445
- } catch {
446
- translatedSegments = segments.map((s, i) => ({
447
- index: i,
448
- text: s.text,
449
- startTime: s.startTime,
450
- endTime: s.endTime,
451
- }));
452
- }
453
-
454
- spinner.succeed(chalk.green("Transcription and translation complete"));
455
-
456
- // Display timing analysis
457
- console.log();
458
- console.log(chalk.bold.cyan("Dubbing Analysis"));
459
- console.log(chalk.dim("─".repeat(60)));
460
- console.log(`Source language: ${transcriptResult.detectedLanguage || options.source || "auto"}`);
461
- console.log(`Target language: ${targetLangName}`);
462
- console.log(`Segments: ${segments.length}`);
463
- console.log();
464
-
465
- console.log(chalk.bold("Segment Timing:"));
466
- for (let i = 0; i < Math.min(5, segments.length); i++) {
467
- const seg = segments[i];
468
- const time = `[${formatTime(seg.startTime)} - ${formatTime(seg.endTime)}]`;
469
- console.log(`${chalk.dim(time)} ${seg.text}`);
470
- console.log(`${chalk.dim(" →")} ${chalk.green(translatedSegments[i]?.text || seg.text)}`);
471
- console.log();
472
- }
473
-
474
- if (segments.length > 5) {
475
- console.log(chalk.dim(`... and ${segments.length - 5} more segments`));
476
- }
477
-
478
- if (options.analyzeOnly) {
479
- console.log();
480
- console.log(chalk.dim("Use without --analyze-only to generate dubbed audio"));
481
-
482
- if (options.output) {
483
- const timingPath = resolve(process.cwd(), options.output);
484
- const timingData = {
485
- sourcePath: absPath,
486
- sourceLanguage: transcriptResult.detectedLanguage || options.source || "auto",
487
- targetLanguage: options.language,
488
- segments: segments.map((s, i) => ({
489
- index: i,
490
- startTime: s.startTime,
491
- endTime: s.endTime,
492
- original: s.text,
493
- translated: translatedSegments[i]?.text || s.text,
494
- })),
495
- };
496
- await writeFile(timingPath, JSON.stringify(timingData, null, 2));
497
- console.log(`Timing saved to: ${chalk.bold(timingPath)}`);
498
- }
499
- return;
500
- }
501
-
502
- // Step 4: Generate TTS for each segment
503
- spinner.start("Generating dubbed audio...");
504
- const elevenlabs = new ElevenLabsProvider();
505
- await elevenlabs.initialize({ apiKey: elevenlabsKey! });
506
-
507
- const dubbedAudioBuffers: Array<{ buffer: Buffer; startTime: number }> = [];
508
-
509
- for (let i = 0; i < translatedSegments.length; i++) {
510
- spinner.text = `Generating audio segment ${i + 1}/${translatedSegments.length}...`;
511
- const seg = translatedSegments[i];
512
-
513
- const ttsResult = await elevenlabs.textToSpeech(seg.text, {
514
- voiceId: options.voice,
515
- });
516
-
517
- if (ttsResult.success && ttsResult.audioBuffer) {
518
- dubbedAudioBuffers.push({
519
- buffer: ttsResult.audioBuffer,
520
- startTime: seg.startTime,
521
- });
522
- }
523
- }
524
-
525
- // Step 5: Combine and save
526
- spinner.text = "Combining audio...";
527
-
528
- const combinedBuffer = Buffer.concat(dubbedAudioBuffers.map((a) => a.buffer));
529
-
530
- const outputExt = isVideo ? ".mp3" : extname(absPath);
531
- const defaultOutputPath = resolve(
532
- dirname(absPath),
533
- `${basename(absPath, extname(absPath))}-${options.language}${outputExt}`
534
- );
535
- const finalOutputPath = resolve(process.cwd(), options.output || defaultOutputPath);
536
-
537
- await writeFile(finalOutputPath, combinedBuffer);
538
-
539
- spinner.succeed(chalk.green("Dubbing complete"));
540
-
541
- if (isJsonMode()) {
542
- outputResult({ success: true, sourceLanguage: transcriptResult.detectedLanguage || options.source || "auto", targetLanguage: options.language, segmentCount: translatedSegments.length, outputPath: finalOutputPath });
543
- return;
544
- }
545
-
546
- console.log();
547
- console.log(`Saved to: ${chalk.bold(finalOutputPath)}`);
548
- console.log();
549
-
550
- // Clean up temp audio if we extracted from video
551
- if (isVideo && audioPath !== absPath) {
552
- try {
553
- const { unlink } = await import("node:fs/promises");
554
- await unlink(audioPath);
555
- } catch {
556
- // Ignore cleanup errors
557
- }
558
- }
559
- } catch (error) {
560
- console.error(chalk.red("Dubbing failed"));
561
- console.error(error);
562
- process.exit(1);
563
- }
564
- });
565
-
566
- // ── audio duck ─────────────────────────────────────────────────────────
567
-
568
- audioCommand
569
- .command("duck")
570
- .description("Auto-duck background music when voice is present (FFmpeg)")
571
- .argument("<music>", "Background music file path")
572
- .option("-v, --voice <path>", "Voice/narration track (required)")
573
- .option("-o, --output <path>", "Output audio file path")
574
- .option("-t, --threshold <dB>", "Sidechain threshold in dB", "-30")
575
- .option("-r, --ratio <ratio>", "Compression ratio", "3")
576
- .option("-a, --attack <ms>", "Attack time in ms", "20")
577
- .option("-l, --release <ms>", "Release time in ms", "200")
578
- .option("--dry-run", "Preview parameters without executing")
579
- .action(async (musicPath: string, options) => {
580
- try {
581
- if (options.dryRun) {
582
- const threshold = parseFloat(options.threshold);
583
- const ratio = parseFloat(options.ratio);
584
- const attack = parseFloat(options.attack);
585
- const release = parseFloat(options.release);
586
- outputResult({ dryRun: true, command: "audio duck", musicPath, voicePath: options.voice, threshold, ratio, attack, release });
587
- return;
588
- }
589
-
590
- if (!options.voice) {
591
- console.error(chalk.red("Voice track required. Use --voice <path>"));
592
- process.exit(1);
593
- }
594
-
595
- // Check FFmpeg availability
596
- if (!commandExists("ffmpeg")) {
597
- console.error(chalk.red("FFmpeg not found. Please install FFmpeg."));
598
- process.exit(1);
599
- }
600
-
601
- const spinner = ora("Processing audio ducking...").start();
602
-
603
- const absMusic = resolve(process.cwd(), musicPath);
604
- const absVoice = resolve(process.cwd(), options.voice);
605
- const outputPath = options.output
606
- ? resolve(process.cwd(), options.output)
607
- : absMusic.replace(/(\.[^.]+)$/, "-ducked$1");
608
-
609
- // Convert threshold from dB to linear (0-1 scale)
610
- const thresholdDb = parseFloat(options.threshold);
611
- const thresholdLinear = Math.pow(10, thresholdDb / 20);
612
-
613
- const ratio = parseFloat(options.ratio);
614
- const attack = parseFloat(options.attack);
615
- const release = parseFloat(options.release);
616
-
617
- // FFmpeg sidechain compress filter
618
- const filterComplex = `[0:a][1:a]sidechaincompress=threshold=${thresholdLinear}:ratio=${ratio}:attack=${attack}:release=${release}[out]`;
619
-
620
- await execSafe("ffmpeg", ["-i", absMusic, "-i", absVoice, "-filter_complex", filterComplex, "-map", "[out]", outputPath, "-y"]);
621
-
622
- spinner.succeed(chalk.green("Audio ducking complete"));
623
-
624
- if (isJsonMode()) {
625
- outputResult({ success: true, musicPath: absMusic, voicePath: options.voice, threshold: thresholdDb, ratio, outputPath });
626
- return;
627
- }
628
-
629
- console.log();
630
- console.log(chalk.dim("─".repeat(60)));
631
- console.log(`Music: ${musicPath}`);
632
- console.log(`Voice: ${options.voice}`);
633
- console.log(`Threshold: ${thresholdDb}dB`);
634
- console.log(`Ratio: ${ratio}:1`);
635
- console.log(`Attack/Release: ${attack}ms / ${release}ms`);
636
- console.log();
637
- console.log(chalk.green(`Output: ${outputPath}`));
638
- console.log();
639
- } catch (error) {
640
- console.error(chalk.red("Audio ducking failed"));
641
- console.error(error);
642
- process.exit(1);
643
- }
644
- });